From 0091cd7649dba602b735d6e269318ef5b71a43c5 Mon Sep 17 00:00:00 2001 From: "shengquan.nian" Date: Mon, 22 Apr 2024 12:18:56 +0800 Subject: [PATCH] add model pointrcnn_iou --- .../pointrcnn-iou/pytorch/README.md | 69 + .../pointrcnn-iou/pytorch/numba/.coveragerc | 24 + .../pointrcnn-iou/pytorch/numba/.flake8 | 263 + .../pytorch/numba/.gitattributes | 1 + .../pytorch/numba/.github/CODEOWNERS | 98 + .../.github/ISSUE_TEMPLATE/Bug_report.md | 34 + .../.github/ISSUE_TEMPLATE/Feature_request.md | 23 + .../numba/.github/ISSUE_TEMPLATE/config.yml | 11 + .../ISSUE_TEMPLATE/first_rc_checklist.md | 41 + .../ISSUE_TEMPLATE/sub_rc_checklist.md | 37 + .../numba/.github/PULL_REQUEST_TEMPLATE.md | 39 + .../pytorch/numba/.github/workflows/stale.yml | 20 + .../pointrcnn-iou/pytorch/numba/.gitignore | 28 + .../pytorch/numba/.pre-commit-config.yaml | 5 + .../pytorch/numba/.readthedocs.yml | 15 + .../pointrcnn-iou/pytorch/numba/CHANGE_LOG | 5717 +++++++ .../pytorch/numba/CONTRIBUTING.md | 53 + .../pointrcnn-iou/pytorch/numba/LICENSE | 24 + .../pytorch/numba/LICENSES.third-party | 493 + .../pointrcnn-iou/pytorch/numba/MANIFEST.in | 10 + .../pointrcnn-iou/pytorch/numba/README.rst | 61 + .../pytorch/numba/azure-pipelines.yml | 129 + .../pointrcnn-iou/pytorch/numba/bin/numba | 8 + .../pytorch/numba/build_numba.sh | 21 + .../buildscripts/appveyor/run_with_env.cmd | 90 + .../buildscripts/azure/azure-linux-macos.yml | 47 + .../buildscripts/azure/azure-windows.yml | 62 + .../buildscripts/condarecipe.local/bld.bat | 3 + .../buildscripts/condarecipe.local/build.sh | 16 + .../condarecipe.local/conda_build_config.yaml | 12 + .../condarecipe.local/license.txt | 24 + .../buildscripts/condarecipe.local/meta.yaml | 94 + .../condarecipe.local/run_test.bat | 19 + .../condarecipe.local/run_test.sh | 53 + .../condarecipe_clone_icc_rt/bld.bat | 3 + .../condarecipe_clone_icc_rt/build.sh | 5 + .../condarecipe_clone_icc_rt/meta.yaml | 38 + .../condarecipe_clone_icc_rt/scripts/build.py | 33 + .../numba/buildscripts/gpuci/axis.yaml | 19 + .../pytorch/numba/buildscripts/gpuci/build.sh | 74 + .../incremental/MacOSX10.10.sdk.checksum | 1 + .../buildscripts/incremental/after_success.sh | 11 + .../numba/buildscripts/incremental/build.cmd | 10 + .../numba/buildscripts/incremental/build.sh | 28 + .../incremental/install_miniconda.sh | 19 + .../incremental/setup_conda_environment.cmd | 45 + .../incremental/setup_conda_environment.sh | 117 + .../numba/buildscripts/incremental/test.cmd | 34 + .../numba/buildscripts/incremental/test.sh | 131 + .../pytorch/numba/clean_numba.sh | 9 + .../pointrcnn-iou/pytorch/numba/codecov.yml | 22 + .../pytorch/numba/contrib/valgrind-numba.supp | 21 + .../pointrcnn-iou/pytorch/numba/docs/Makefile | 177 + .../numba/docs/_static/js/modernizr.min.js | 7 + .../docs/_static/numba-blue-icon-rgb.svg | 27 + .../docs/_static/numba-white-icon-rgb.svg | 27 + .../numba/docs/_static/rtd-overrides.css | 3 + .../pytorch/numba/docs/_templates/EMPTY | 0 .../pytorch/numba/docs/dagmap/README.md | 57 + .../pytorch/numba/docs/dagmap/dagmap.yaml | 195 + .../numba/docs/dagmap/jquery.graphviz.svg.js | 537 + .../pytorch/numba/docs/dagmap/render.py | 85 + .../pytorch/numba/docs/dagmap/template.html | 110 + .../pytorch/numba/docs/environment.yml | 16 + .../pytorch/numba/docs/gh-pages.py | 150 + .../pointrcnn-iou/pytorch/numba/docs/make.bat | 242 + .../pytorch/numba/docs/requirements.txt | 1 + .../pytorch/numba/docs/source/_ext/ghfiles.py | 75 + .../pytorch/numba/docs/source/conf.py | 343 + .../numba/docs/source/cuda-reference/host.rst | 232 + .../docs/source/cuda-reference/index.rst | 10 + .../docs/source/cuda-reference/kernel.rst | 586 + .../docs/source/cuda-reference/libdevice.rst | 16 + .../docs/source/cuda-reference/memory.rst | 24 + .../docs/source/cuda-reference/types.rst | 56 + .../numba/docs/source/cuda/bindings.rst | 43 + .../numba/docs/source/cuda/caching.rst | 35 + .../docs/source/cuda/cooperative_groups.rst | 111 + .../docs/source/cuda/cuda_array_interface.rst | 531 + .../numba/docs/source/cuda/cuda_ffi.rst | 158 + .../docs/source/cuda/cudapysupported.rst | 296 + .../docs/source/cuda/device-functions.rst | 15 + .../docs/source/cuda/device-management.rst | 92 + .../numba/docs/source/cuda/examples.rst | 527 + .../docs/source/cuda/external-memory.rst | 320 + .../pytorch/numba/docs/source/cuda/faq.rst | 20 + .../numba/docs/source/cuda/fastmath.rst | 36 + .../pytorch/numba/docs/source/cuda/index.rst | 29 + .../numba/docs/source/cuda/intrinsics.rst | 58 + .../pytorch/numba/docs/source/cuda/ipc.rst | 36 + .../numba/docs/source/cuda/kernels.rst | 233 + .../numba/docs/source/cuda/laplace_final.svg | 1953 +++ .../docs/source/cuda/laplace_initial.svg | 1838 ++ .../pytorch/numba/docs/source/cuda/memory.rst | 257 + .../numba/docs/source/cuda/overview.rst | 133 + .../pytorch/numba/docs/source/cuda/random.rst | 90 + .../numba/docs/source/cuda/reduction.rst | 33 + .../numba/docs/source/cuda/simulator.rst | 104 + .../pytorch/numba/docs/source/cuda/ufunc.rst | 154 + .../docs/source/developer/architecture.rst | 909 + .../numba/docs/source/developer/caching.rst | 111 + .../source/developer/compiler_pass_example.py | 78 + .../docs/source/developer/contributing.rst | 491 + .../docs/source/developer/custom_pipeline.rst | 173 + .../numba/docs/source/developer/debugging.rst | 138 + .../docs/source/developer/dispatching.rst | 267 + .../docs/source/developer/environment.rst | 59 + .../numba/docs/source/developer/event_api.rst | 5 + .../docs/source/developer/generators.rst | 307 + .../numba/docs/source/developer/hashing.rst | 54 + .../numba/docs/source/developer/index.rst | 32 + .../docs/source/developer/inline_example.py | 82 + .../developer/inline_overload_example.py | 61 + .../numba/docs/source/developer/inlining.rst | 281 + .../numba/docs/source/developer/listings.rst | 29 + .../numba/docs/source/developer/literal.rst | 82 + .../developer/live_variable_analysis.rst | 86 + .../docs/source/developer/llvm_timings.rst | 107 + .../numba/docs/source/developer/mission.rst | 82 + .../docs/source/developer/numba-runtime.rst | 200 + .../numba/docs/source/developer/release.rst | 49 + .../numba/docs/source/developer/repomap.rst | 582 + .../numba/docs/source/developer/rewrites.rst | 397 + .../numba/docs/source/developer/stencil.rst | 170 + .../source/developer/target_extension.rst | 61 + .../developer/threading_implementation.rst | 249 + .../docs/source/extending/entrypoints.rst | 65 + .../docs/source/extending/high-level.rst | 254 + .../numba/docs/source/extending/index.rst | 30 + .../source/extending/interval-example.rst | 345 + .../numba/docs/source/extending/low-level.rst | 194 + .../numba/docs/source/extending/mynorm.py | 72 + .../source/extending/overloading-guide.rst | 192 + .../numba/docs/source/extending/template.py | 21 + .../pytorch/numba/docs/source/glossary.rst | 106 + .../pytorch/numba/docs/source/index.rst | 37 + .../numba/docs/source/proposals/cfunc.rst | 147 + .../source/proposals/extension-points.rst | 414 + .../proposals/external-memory-management.rst | 922 + .../numba/docs/source/proposals/index.rst | 35 + .../docs/source/proposals/integer-typing.rst | 186 + .../docs/source/proposals/jit-classes.rst | 231 + .../source/proposals/np-where-override.py | 51 + .../source/proposals/recursion_callstack.svg | 4 + .../docs/source/proposals/type-inference.rst | 124 + .../source/proposals/typing_recursion.rst | 129 + .../docs/source/reference/aot-compilation.rst | 76 + .../docs/source/reference/deprecation.rst | 312 + .../numba/docs/source/reference/envvars.rst | 589 + .../docs/source/reference/fpsemantics.rst | 81 + .../numba/docs/source/reference/index.rst | 16 + .../docs/source/reference/jit-compilation.rst | 572 + .../docs/source/reference/numpysupported.rst | 925 + .../docs/source/reference/pysemantics.rst | 88 + .../docs/source/reference/pysupported.rst | 1284 ++ .../numba/docs/source/reference/types.rst | 375 + .../numba/docs/source/reference/utils.rst | 34 + .../numba/docs/source/release-notes.rst | 5 + .../numba/docs/source/user/5minguide.rst | 223 + .../pytorch/numba/docs/source/user/cfunc.rst | 218 + .../pytorch/numba/docs/source/user/cli.rst | 165 + .../numba/docs/source/user/examples.rst | 52 + .../pytorch/numba/docs/source/user/faq.rst | 413 + .../numba/docs/source/user/generated-jit.rst | 71 + .../pytorch/numba/docs/source/user/index.rst | 26 + .../numba/docs/source/user/installing.rst | 373 + .../numba/docs/source/user/jit-module.rst | 103 + .../pytorch/numba/docs/source/user/jit.rst | 201 + .../numba/docs/source/user/jitclass.rst | 270 + .../numba/docs/source/user/overview.rst | 34 + .../numba/docs/source/user/parallel.rst | 719 + .../docs/source/user/performance-tips.rst | 248 + .../pytorch/numba/docs/source/user/pycc.rst | 140 + .../numba/docs/source/user/stencil.rst | 255 + .../pytorch/numba/docs/source/user/talks.rst | 36 + .../docs/source/user/threading-layer.rst | 313 + .../numba/docs/source/user/troubleshoot.rst | 1179 ++ .../numba/docs/source/user/vectorize.rst | 423 + .../numba/docs/source/user/withobjmode.rst | 34 + .../pytorch/numba/install_numba.sh | 42 + .../pointrcnn-iou/pytorch/numba/mypy.ini | 52 + .../pytorch/numba/numba/__init__.py | 224 + .../pytorch/numba/numba/__main__.py | 6 + .../pytorch/numba/numba/_arraystruct.h | 21 + .../pytorch/numba/numba/_devicearray.cpp | 142 + .../pytorch/numba/numba/_devicearray.h | 25 + .../pytorch/numba/numba/_dispatcher.cpp | 1223 ++ .../pytorch/numba/numba/_dynfunc.c | 507 + .../pytorch/numba/numba/_dynfuncmod.c | 93 + .../pytorch/numba/numba/_hashtable.c | 530 + .../pytorch/numba/numba/_hashtable.h | 132 + .../pytorch/numba/numba/_helperlib.c | 1186 ++ .../pytorch/numba/numba/_helpermod.c | 307 + .../pytorch/numba/numba/_lapack.c | 1946 +++ .../pytorch/numba/numba/_npymath_exports.c | 46 + .../pytorch/numba/numba/_numba_common.h | 39 + .../pytorch/numba/numba/_pymodule.h | 32 + .../pytorch/numba/numba/_random.c | 492 + .../pytorch/numba/numba/_typeof.c | 1133 ++ .../pytorch/numba/numba/_typeof.h | 16 + .../pytorch/numba/numba/_unicodetype_db.h | 6091 +++++++ .../pytorch/numba/numba/_version.py | 238 + .../pytorch/numba/numba/capsulethunk.h | 108 + .../pytorch/numba/numba/cext/__init__.py | 23 + .../pytorch/numba/numba/cext/cext.h | 18 + .../pytorch/numba/numba/cext/dictobject.c | 1191 ++ .../pytorch/numba/numba/cext/dictobject.h | 222 + .../pytorch/numba/numba/cext/listobject.c | 977 ++ .../pytorch/numba/numba/cext/listobject.h | 135 + .../pytorch/numba/numba/cext/utils.c | 8 + .../numba/numba/cloudpickle/__init__.py | 12 + .../numba/numba/cloudpickle/cloudpickle.py | 848 + .../numba/cloudpickle/cloudpickle_fast.py | 775 + .../pytorch/numba/numba/cloudpickle/compat.py | 13 + .../pytorch/numba/numba/core/__init__.py | 0 .../pytorch/numba/numba/core/analysis.py | 722 + .../numba/numba/core/annotations/__init__.py | 0 .../numba/core/annotations/pretty_annotate.py | 283 + .../numba/core/annotations/template.html | 144 + .../core/annotations/type_annotations.py | 283 + .../pytorch/numba/numba/core/base.py | 1255 ++ .../pytorch/numba/numba/core/boxing.py | 1317 ++ .../pytorch/numba/numba/core/bytecode.py | 369 + .../pytorch/numba/numba/core/byteflow.py | 1555 ++ .../pytorch/numba/numba/core/caching.py | 731 + .../pytorch/numba/numba/core/callconv.py | 650 + .../pytorch/numba/numba/core/callwrapper.py | 226 + .../pytorch/numba/numba/core/ccallback.py | 134 + .../pytorch/numba/numba/core/cgutils.py | 1194 ++ .../pytorch/numba/numba/core/codegen.py | 1437 ++ .../pytorch/numba/numba/core/compiler.py | 790 + .../pytorch/numba/numba/core/compiler_lock.py | 56 + .../numba/numba/core/compiler_machinery.py | 463 + .../pytorch/numba/numba/core/config.py | 527 + .../pytorch/numba/numba/core/consts.py | 118 + .../pytorch/numba/numba/core/controlflow.py | 954 ++ .../pytorch/numba/numba/core/cpu.py | 379 + .../pytorch/numba/numba/core/cpu_options.py | 181 + .../pytorch/numba/numba/core/dataflow.py | 914 + .../numba/numba/core/datamodel/__init__.py | 4 + .../numba/numba/core/datamodel/manager.py | 47 + .../numba/numba/core/datamodel/models.py | 1384 ++ .../numba/numba/core/datamodel/packer.py | 213 + .../numba/numba/core/datamodel/registry.py | 18 + .../numba/numba/core/datamodel/testing.py | 150 + .../pytorch/numba/numba/core/debuginfo.py | 609 + .../pytorch/numba/numba/core/decorators.py | 309 + .../pytorch/numba/numba/core/descriptors.py | 21 + .../pytorch/numba/numba/core/dispatcher.py | 1322 ++ .../pytorch/numba/numba/core/entrypoints.py | 58 + .../pytorch/numba/numba/core/environment.py | 64 + .../pytorch/numba/numba/core/errors.py | 848 + .../pytorch/numba/numba/core/event.py | 491 + .../pytorch/numba/numba/core/extending.py | 583 + .../pytorch/numba/numba/core/externals.py | 155 + .../pytorch/numba/numba/core/fastmathpass.py | 44 + .../pytorch/numba/numba/core/funcdesc.py | 230 + .../pytorch/numba/numba/core/generators.py | 356 + .../pytorch/numba/numba/core/imputils.py | 469 + .../numba/numba/core/inline_closurecall.py | 1557 ++ .../pytorch/numba/numba/core/interpreter.py | 2876 ++++ .../pytorch/numba/numba/core/intrinsics.py | 101 + .../pytorch/numba/numba/core/ir.py | 1630 ++ .../pytorch/numba/numba/core/ir_utils.py | 2350 +++ .../numba/numba/core/itanium_mangler.py | 205 + .../pytorch/numba/numba/core/llvm_bindings.py | 46 + .../pytorch/numba/numba/core/lowering.py | 1536 ++ .../numba/numba/core/object_mode_passes.py | 169 + .../pytorch/numba/numba/core/optional.py | 121 + .../pytorch/numba/numba/core/options.py | 109 + .../pytorch/numba/numba/core/overload_glue.py | 308 + .../pytorch/numba/numba/core/postproc.py | 238 + .../pytorch/numba/numba/core/pylowering.py | 655 + .../pytorch/numba/numba/core/pythonapi.py | 1686 ++ .../pytorch/numba/numba/core/registry.py | 112 + .../numba/numba/core/removerefctpass.py | 120 + .../pytorch/numba/numba/core/retarget.py | 135 + .../numba/numba/core/rewrites/__init__.py | 8 + .../numba/numba/core/rewrites/ir_print.py | 82 + .../numba/numba/core/rewrites/registry.py | 98 + .../numba/numba/core/rewrites/static_binop.py | 35 + .../numba/core/rewrites/static_getitem.py | 175 + .../numba/numba/core/rewrites/static_raise.py | 79 + .../numba/numba/core/runtime/__init__.py | 1 + .../numba/numba/core/runtime/_nrt_python.c | 459 + .../numba/numba/core/runtime/_nrt_pythonmod.c | 207 + .../numba/numba/core/runtime/context.py | 401 + .../pytorch/numba/numba/core/runtime/nrt.c | 595 + .../pytorch/numba/numba/core/runtime/nrt.h | 272 + .../pytorch/numba/numba/core/runtime/nrt.py | 136 + .../numba/numba/core/runtime/nrt_external.h | 65 + .../numba/numba/core/runtime/nrtdynmod.py | 215 + .../numba/numba/core/runtime/nrtopt.py | 182 + .../pytorch/numba/numba/core/serialize.py | 248 + .../pytorch/numba/numba/core/sigutils.py | 53 + .../pytorch/numba/numba/core/ssa.py | 457 + .../numba/numba/core/target_extension.py | 168 + .../pytorch/numba/numba/core/targetconfig.py | 321 + .../pytorch/numba/numba/core/tracing.py | 216 + .../pytorch/numba/numba/core/transforms.py | 918 + .../numba/numba/core/typeconv/__init__.py | 1 + .../numba/numba/core/typeconv/_typeconv.cpp | 205 + .../numba/numba/core/typeconv/castgraph.py | 133 + .../numba/numba/core/typeconv/rules.py | 60 + .../numba/numba/core/typeconv/test.cpp | 39 + .../numba/numba/core/typeconv/typeconv.cpp | 210 + .../numba/numba/core/typeconv/typeconv.hpp | 98 + .../numba/numba/core/typeconv/typeconv.py | 128 + .../pytorch/numba/numba/core/typed_passes.py | 864 + .../pytorch/numba/numba/core/typeinfer.py | 1780 ++ .../numba/numba/core/types/__init__.py | 178 + .../numba/numba/core/types/abstract.py | 507 + .../pytorch/numba/numba/core/types/common.py | 104 + .../numba/numba/core/types/containers.py | 958 ++ .../numba/numba/core/types/function_type.py | 211 + .../numba/numba/core/types/functions.py | 745 + .../numba/numba/core/types/iterators.py | 108 + .../pytorch/numba/numba/core/types/misc.py | 545 + .../numba/numba/core/types/npytypes.py | 604 + .../pytorch/numba/numba/core/types/scalars.py | 270 + .../numba/numba/core/typing/__init__.py | 3 + .../numba/numba/core/typing/arraydecl.py | 849 + .../numba/numba/core/typing/asnumbatype.py | 139 + .../numba/numba/core/typing/bufproto.py | 67 + .../numba/numba/core/typing/builtins.py | 1157 ++ .../numba/numba/core/typing/cffi_utils.py | 229 + .../numba/numba/core/typing/cmathdecl.py | 72 + .../numba/numba/core/typing/collections.py | 121 + .../numba/numba/core/typing/context.py | 720 + .../numba/numba/core/typing/ctypes_utils.py | 128 + .../numba/numba/core/typing/dictdecl.py | 29 + .../numba/numba/core/typing/enumdecl.py | 64 + .../numba/numba/core/typing/listdecl.py | 198 + .../numba/numba/core/typing/mathdecl.py | 141 + .../numba/numba/core/typing/npdatetime.py | 285 + .../numba/numba/core/typing/npydecl.py | 1239 ++ .../numba/numba/core/typing/randomdecl.py | 300 + .../numba/numba/core/typing/setdecl.py | 192 + .../numba/numba/core/typing/templates.py | 1322 ++ .../pytorch/numba/numba/core/typing/typeof.py | 284 + .../numba/numba/core/unsafe/__init__.py | 0 .../pytorch/numba/numba/core/unsafe/bytes.py | 49 + .../pytorch/numba/numba/core/unsafe/eh.py | 62 + .../pytorch/numba/numba/core/unsafe/nrt.py | 20 + .../numba/numba/core/unsafe/refcount.py | 80 + .../numba/numba/core/untyped_passes.py | 1722 ++ .../pytorch/numba/numba/core/utils.py | 722 + .../pytorch/numba/numba/core/withcontexts.py | 535 + .../pytorch/numba/numba/cpython/__init__.py | 0 .../pytorch/numba/numba/cpython/builtins.py | 825 + .../pytorch/numba/numba/cpython/charseq.py | 1020 ++ .../pytorch/numba/numba/cpython/cmathimpl.py | 528 + .../pytorch/numba/numba/cpython/enumimpl.py | 89 + .../pytorch/numba/numba/cpython/hashing.py | 759 + .../pytorch/numba/numba/cpython/heapq.py | 266 + .../pytorch/numba/numba/cpython/iterators.py | 140 + .../pytorch/numba/numba/cpython/listobj.py | 1260 ++ .../pytorch/numba/numba/cpython/mathimpl.py | 453 + .../pytorch/numba/numba/cpython/numbers.py | 1363 ++ .../pytorch/numba/numba/cpython/printimpl.py | 82 + .../pytorch/numba/numba/cpython/randomimpl.py | 1644 ++ .../pytorch/numba/numba/cpython/rangeobj.py | 299 + .../pytorch/numba/numba/cpython/setobj.py | 1543 ++ .../pytorch/numba/numba/cpython/slicing.py | 302 + .../pytorch/numba/numba/cpython/tupleobj.py | 412 + .../pytorch/numba/numba/cpython/unicode.py | 2498 +++ .../numba/numba/cpython/unicode_support.py | 765 + .../numba/numba/cpython/unsafe/__init__.py | 0 .../numba/numba/cpython/unsafe/numbers.py | 53 + .../numba/numba/cpython/unsafe/tuple.py | 84 + .../pytorch/numba/numba/cuda/__init__.py | 16 + .../pytorch/numba/numba/cuda/api.py | 517 + .../pytorch/numba/numba/cuda/api_util.py | 30 + .../pytorch/numba/numba/cuda/args.py | 77 + .../pytorch/numba/numba/cuda/codegen.py | 443 + .../pytorch/numba/numba/cuda/compiler.py | 319 + .../pytorch/numba/numba/cuda/cuda_paths.py | 165 + .../pytorch/numba/numba/cuda/cudadecl.py | 689 + .../numba/numba/cuda/cudadrv/__init__.py | 9 + .../numba/numba/cuda/cudadrv/_extras.c | 45 + .../numba/numba/cuda/cudadrv/devicearray.py | 902 + .../numba/numba/cuda/cudadrv/devices.py | 248 + .../numba/numba/cuda/cudadrv/driver.py | 3191 ++++ .../numba/numba/cuda/cudadrv/drvapi.py | 394 + .../pytorch/numba/numba/cuda/cudadrv/enums.py | 444 + .../pytorch/numba/numba/cuda/cudadrv/error.py | 19 + .../pytorch/numba/numba/cuda/cudadrv/libs.py | 127 + .../numba/numba/cuda/cudadrv/ndarray.py | 20 + .../pytorch/numba/numba/cuda/cudadrv/nvvm.py | 909 + .../pytorch/numba/numba/cuda/cudadrv/rtapi.py | 10 + .../numba/numba/cuda/cudadrv/runtime.py | 143 + .../pytorch/numba/numba/cuda/cudaimpl.py | 1116 ++ .../pytorch/numba/numba/cuda/cudamath.py | 130 + .../pytorch/numba/numba/cuda/decorators.py | 170 + .../pytorch/numba/numba/cuda/descriptor.py | 33 + .../pytorch/numba/numba/cuda/device_init.py | 88 + .../pytorch/numba/numba/cuda/dispatcher.py | 902 + .../pytorch/numba/numba/cuda/errors.py | 59 + .../pytorch/numba/numba/cuda/initialize.py | 17 + .../numba/numba/cuda/intrinsic_wrapper.py | 77 + .../numba/numba/cuda/kernels/__init__.py | 0 .../numba/numba/cuda/kernels/reduction.py | 262 + .../numba/numba/cuda/kernels/transpose.py | 65 + .../pytorch/numba/numba/cuda/libdevice.py | 3382 ++++ .../pytorch/numba/numba/cuda/libdevicedecl.py | 17 + .../numba/numba/cuda/libdevicefuncs.py | 1057 ++ .../pytorch/numba/numba/cuda/libdeviceimpl.py | 83 + .../pytorch/numba/numba/cuda/mathimpl.py | 287 + .../pytorch/numba/numba/cuda/models.py | 41 + .../pytorch/numba/numba/cuda/nvvmutils.py | 265 + .../pytorch/numba/numba/cuda/printimpl.py | 86 + .../pytorch/numba/numba/cuda/random.py | 291 + .../numba/numba/cuda/simulator/__init__.py | 38 + .../pytorch/numba/numba/cuda/simulator/api.py | 103 + .../numba/numba/cuda/simulator/compiler.py | 7 + .../numba/cuda/simulator/cudadrv/__init__.py | 2 + .../cuda/simulator/cudadrv/devicearray.py | 407 + .../numba/cuda/simulator/cudadrv/devices.py | 111 + .../numba/cuda/simulator/cudadrv/driver.py | 54 + .../numba/cuda/simulator/cudadrv/drvapi.py | 4 + .../numba/cuda/simulator/cudadrv/error.py | 2 + .../numba/cuda/simulator/cudadrv/libs.py | 2 + .../numba/cuda/simulator/cudadrv/nvvm.py | 30 + .../numba/cuda/simulator/cudadrv/runtime.py | 19 + .../numba/numba/cuda/simulator/kernel.py | 307 + .../numba/numba/cuda/simulator/kernelapi.py | 439 + .../numba/numba/cuda/simulator/reduction.py | 15 + .../numba/cuda/simulator/vector_types.py | 58 + .../numba/numba/cuda/simulator_init.py | 17 + .../pytorch/numba/numba/cuda/stubs.py | 827 + .../pytorch/numba/numba/cuda/target.py | 374 + .../pytorch/numba/numba/cuda/testing.py | 170 + .../numba/numba/cuda/tests/__init__.py | 24 + .../numba/cuda/tests/cudadrv/__init__.py | 8 + .../numba/cuda/tests/cudadrv/data/__init__.py | 0 .../cuda/tests/cudadrv/data/cuda_include.cu | 5 + .../numba/cuda/tests/cudadrv/data/error.cu | 7 + .../numba/cuda/tests/cudadrv/data/jitlink.cu | 13 + .../numba/cuda/tests/cudadrv/data/jitlink.ptx | 30 + .../numba/cuda/tests/cudadrv/data/warn.cu | 7 + .../cuda/tests/cudadrv/test_array_attr.py | 92 + .../cuda/tests/cudadrv/test_context_stack.py | 145 + .../tests/cudadrv/test_cuda_array_slicing.py | 379 + .../tests/cudadrv/test_cuda_auto_context.py | 21 + .../tests/cudadrv/test_cuda_devicerecord.py | 179 + .../cuda/tests/cudadrv/test_cuda_driver.py | 235 + .../cuda/tests/cudadrv/test_cuda_libraries.py | 22 + .../cuda/tests/cudadrv/test_cuda_memory.py | 193 + .../cuda/tests/cudadrv/test_cuda_ndarray.py | 547 + .../cuda/tests/cudadrv/test_deallocations.py | 249 + .../numba/cuda/tests/cudadrv/test_detect.py | 81 + .../cuda/tests/cudadrv/test_emm_plugins.py | 192 + .../numba/cuda/tests/cudadrv/test_events.py | 38 + .../cuda/tests/cudadrv/test_host_alloc.py | 65 + .../numba/cuda/tests/cudadrv/test_init.py | 139 + .../cuda/tests/cudadrv/test_inline_ptx.py | 35 + .../numba/cuda/tests/cudadrv/test_ir_patch.py | 26 + .../numba/cuda/tests/cudadrv/test_linker.py | 230 + .../cuda/tests/cudadrv/test_managed_alloc.py | 127 + .../cuda/tests/cudadrv/test_nvvm_driver.py | 147 + .../numba/cuda/tests/cudadrv/test_pinned.py | 37 + .../numba/cuda/tests/cudadrv/test_profiler.py | 20 + .../numba/cuda/tests/cudadrv/test_ptds.py | 145 + .../cuda/tests/cudadrv/test_reset_device.py | 36 + .../numba/cuda/tests/cudadrv/test_runtime.py | 86 + .../cuda/tests/cudadrv/test_select_device.py | 41 + .../numba/cuda/tests/cudadrv/test_streams.py | 111 + .../numba/numba/cuda/tests/cudapy/__init__.py | 8 + .../numba/cuda/tests/cudapy/cache_usecases.py | 223 + .../tests/cudapy/cache_with_cpu_usecases.py | 41 + .../cuda/tests/cudapy/extensions_usecases.py | 58 + .../numba/numba/cuda/tests/cudapy/jitlink.ptx | 30 + .../cuda/tests/cudapy/recursion_usecases.py | 100 + .../numba/cuda/tests/cudapy/test_alignment.py | 42 + .../numba/cuda/tests/cudapy/test_array.py | 260 + .../cuda/tests/cudapy/test_array_args.py | 201 + .../cuda/tests/cudapy/test_array_methods.py | 35 + .../numba/cuda/tests/cudapy/test_atomics.py | 1559 ++ .../cuda/tests/cudapy/test_blackscholes.py | 120 + .../numba/cuda/tests/cudapy/test_boolean.py | 24 + .../numba/cuda/tests/cudapy/test_caching.py | 493 + .../numba/cuda/tests/cudapy/test_casting.py | 239 + .../numba/cuda/tests/cudapy/test_compiler.py | 163 + .../numba/cuda/tests/cudapy/test_complex.py | 296 + .../cuda/tests/cudapy/test_complex_kernel.py | 20 + .../cuda/tests/cudapy/test_const_string.py | 216 + .../numba/cuda/tests/cudapy/test_constmem.py | 185 + .../tests/cudapy/test_cooperative_groups.py | 145 + .../tests/cudapy/test_cuda_array_interface.py | 435 + .../tests/cudapy/test_cuda_jit_no_types.py | 90 + .../numba/cuda/tests/cudapy/test_datetime.py | 94 + .../numba/cuda/tests/cudapy/test_debug.py | 93 + .../numba/cuda/tests/cudapy/test_debuginfo.py | 274 + .../cuda/tests/cudapy/test_device_func.py | 222 + .../cuda/tests/cudapy/test_dispatcher.py | 292 + .../numba/cuda/tests/cudapy/test_enums.py | 121 + .../numba/cuda/tests/cudapy/test_errors.py | 79 + .../numba/cuda/tests/cudapy/test_exception.py | 168 + .../numba/cuda/tests/cudapy/test_extending.py | 155 + .../numba/cuda/tests/cudapy/test_fastmath.py | 204 + .../numba/cuda/tests/cudapy/test_forall.py | 52 + .../numba/cuda/tests/cudapy/test_freevar.py | 29 + .../cuda/tests/cudapy/test_frexp_ldexp.py | 66 + .../numba/cuda/tests/cudapy/test_globals.py | 60 + .../numba/cuda/tests/cudapy/test_gufunc.py | 309 + .../cuda/tests/cudapy/test_gufunc_scalar.py | 159 + .../tests/cudapy/test_gufunc_scheduling.py | 95 + .../numba/cuda/tests/cudapy/test_idiv.py | 37 + .../numba/cuda/tests/cudapy/test_inspect.py | 133 + .../cuda/tests/cudapy/test_intrinsics.py | 870 + .../numba/numba/cuda/tests/cudapy/test_ipc.py | 314 + .../numba/cuda/tests/cudapy/test_iterators.py | 99 + .../numba/cuda/tests/cudapy/test_lang.py | 64 + .../numba/cuda/tests/cudapy/test_laplace.py | 119 + .../numba/cuda/tests/cudapy/test_libdevice.py | 186 + .../numba/cuda/tests/cudapy/test_lineinfo.py | 43 + .../numba/cuda/tests/cudapy/test_localmem.py | 164 + .../numba/cuda/tests/cudapy/test_mandel.py | 33 + .../numba/cuda/tests/cudapy/test_math.py | 739 + .../numba/cuda/tests/cudapy/test_matmul.py | 74 + .../numba/cuda/tests/cudapy/test_minmax.py | 113 + .../cuda/tests/cudapy/test_montecarlo.py | 22 + .../numba/cuda/tests/cudapy/test_multigpu.py | 140 + .../cuda/tests/cudapy/test_multiprocessing.py | 46 + .../cuda/tests/cudapy/test_multithreads.py | 101 + .../numba/cuda/tests/cudapy/test_nondet.py | 49 + .../numba/cuda/tests/cudapy/test_operator.py | 41 + .../cuda/tests/cudapy/test_optimization.py | 86 + .../numba/cuda/tests/cudapy/test_overload.py | 300 + .../numba/cuda/tests/cudapy/test_powi.py | 124 + .../numba/cuda/tests/cudapy/test_print.py | 128 + .../cuda/tests/cudapy/test_py2_div_issue.py | 33 + .../numba/cuda/tests/cudapy/test_random.py | 104 + .../cuda/tests/cudapy/test_record_dtype.py | 610 + .../numba/cuda/tests/cudapy/test_recursion.py | 125 + .../numba/cuda/tests/cudapy/test_reduction.py | 76 + .../test_retrieve_autoconverted_arrays.py | 83 + .../numba/cuda/tests/cudapy/test_serialize.py | 85 + .../numba/cuda/tests/cudapy/test_slicing.py | 37 + .../numba/numba/cuda/tests/cudapy/test_sm.py | 444 + .../cuda/tests/cudapy/test_sm_creation.py | 205 + .../numba/cuda/tests/cudapy/test_sync.py | 244 + .../numba/cuda/tests/cudapy/test_transpose.py | 80 + .../numba/cuda/tests/cudapy/test_userexc.py | 40 + .../cuda/tests/cudapy/test_vector_type.py | 307 + .../numba/cuda/tests/cudapy/test_vectorize.py | 226 + .../tests/cudapy/test_vectorize_complex.py | 20 + .../cuda/tests/cudapy/test_vectorize_decor.py | 69 + .../tests/cudapy/test_vectorize_device.py | 36 + .../tests/cudapy/test_vectorize_scalar_arg.py | 37 + .../numba/cuda/tests/cudapy/test_warning.py | 139 + .../numba/cuda/tests/cudapy/test_warp_ops.py | 276 + .../numba/cuda/tests/cudasim/__init__.py | 6 + .../numba/numba/cuda/tests/cudasim/support.py | 6 + .../cuda/tests/cudasim/test_cudasim_issues.py | 102 + .../numba/cuda/tests/doc_examples/__init__.py | 6 + .../cuda/tests/doc_examples/ffi/__init__.py | 0 .../cuda/tests/doc_examples/ffi/functions.cu | 15 + .../numba/cuda/tests/doc_examples/test_cg.py | 75 + .../tests/doc_examples/test_cpu_gpu_compat.py | 76 + .../numba/cuda/tests/doc_examples/test_ffi.py | 50 + .../cuda/tests/doc_examples/test_laplace.py | 153 + .../cuda/tests/doc_examples/test_matmul.py | 173 + .../tests/doc_examples/test_montecarlo.py | 109 + .../cuda/tests/doc_examples/test_random.py | 59 + .../cuda/tests/doc_examples/test_reduction.py | 76 + .../tests/doc_examples/test_sessionize.py | 128 + .../cuda/tests/doc_examples/test_vecadd.py | 73 + .../numba/numba/cuda/tests/nocuda/__init__.py | 8 + .../numba/cuda/tests/nocuda/test_import.py | 49 + .../cuda/tests/nocuda/test_library_lookup.py | 238 + .../numba/cuda/tests/nocuda/test_nvvm.py | 80 + .../pytorch/numba/numba/cuda/types.py | 37 + .../pytorch/numba/numba/cuda/vector_types.py | 209 + .../pytorch/numba/numba/cuda/vectorizers.py | 253 + .../numba/numba/experimental/__init__.py | 1 + .../numba/numba/experimental/function_type.py | 265 + .../numba/experimental/jitclass/__init__.py | 3 + .../numba/numba/experimental/jitclass/_box.c | 179 + .../numba/numba/experimental/jitclass/base.py | 592 + .../numba/experimental/jitclass/boxing.py | 256 + .../numba/experimental/jitclass/decorators.py | 82 + .../numba/experimental/jitclass/overloads.py | 227 + .../numba/numba/experimental/structref.py | 384 + .../pytorch/numba/numba/extending.py | 3 + .../pytorch/numba/numba/mathnames.h | 77 + .../pytorch/numba/numba/misc/__init__.py | 0 .../pytorch/numba/numba/misc/appdirs.py | 551 + .../pytorch/numba/numba/misc/cffiimpl.py | 22 + .../pytorch/numba/numba/misc/cmdlang.gdb | 5 + .../pytorch/numba/numba/misc/dummyarray.py | 456 + .../pytorch/numba/numba/misc/dump_style.py | 84 + .../pytorch/numba/numba/misc/findlib.py | 60 + .../numba/numba/misc/firstlinefinder.py | 96 + .../pytorch/numba/numba/misc/gdb_hook.py | 228 + .../numba/numba/misc/gdb_print_extension.py | 212 + .../pytorch/numba/numba/misc/help/__init__.py | 0 .../numba/numba/misc/help/inspector.py | 433 + .../pytorch/numba/numba/misc/init_utils.py | 44 + .../pytorch/numba/numba/misc/inspection.py | 103 + .../pytorch/numba/numba/misc/literal.py | 24 + .../numba/numba/misc/llvm_pass_timings.py | 402 + .../pytorch/numba/numba/misc/mergesort.py | 126 + .../pytorch/numba/numba/misc/numba_entry.py | 72 + .../pytorch/numba/numba/misc/numba_gdbinfo.py | 161 + .../pytorch/numba/numba/misc/numba_sysinfo.py | 667 + .../pytorch/numba/numba/misc/quicksort.py | 258 + .../pytorch/numba/numba/misc/special.py | 104 + .../pytorch/numba/numba/misc/timsort.py | 943 ++ .../pytorch/numba/numba/mviewbuf.c | 370 + .../pytorch/numba/numba/np/__init__.py | 0 .../pytorch/numba/numba/np/arraymath.py | 4881 ++++++ .../pytorch/numba/numba/np/arrayobj.py | 6075 +++++++ .../pytorch/numba/numba/np/extensions.py | 10 + .../pytorch/numba/numba/np/linalg.py | 2763 +++ .../pytorch/numba/numba/np/npdatetime.py | 855 + .../numba/numba/np/npdatetime_helpers.py | 212 + .../pytorch/numba/numba/np/npyfuncs.py | 1624 ++ .../pytorch/numba/numba/np/npyimpl.py | 613 + .../pytorch/numba/numba/np/numpy_support.py | 720 + .../pytorch/numba/numba/np/polynomial.py | 59 + .../pytorch/numba/numba/np/random/__init__.py | 0 .../numba/numba/np/random/generator_core.py | 124 + .../numba/np/random/generator_methods.py | 73 + .../pytorch/numba/numba/np/ufunc/__init__.py | 32 + .../pytorch/numba/numba/np/ufunc/_internal.c | 804 + .../pytorch/numba/numba/np/ufunc/_internal.h | 27 + .../numba/numba/np/ufunc/_num_threads.c | 36 + .../pytorch/numba/numba/np/ufunc/_ufunc.c | 222 + .../numba/numba/np/ufunc/array_exprs.py | 406 + .../numba/numba/np/ufunc/decorators.py | 199 + .../numba/numba/np/ufunc/deviceufunc.py | 841 + .../pytorch/numba/numba/np/ufunc/dufunc.py | 323 + .../pytorch/numba/numba/np/ufunc/gufunc.py | 192 + .../numba/numba/np/ufunc/gufunc_scheduler.cpp | 441 + .../numba/numba/np/ufunc/gufunc_scheduler.h | 51 + .../pytorch/numba/numba/np/ufunc/omppool.cpp | 265 + .../pytorch/numba/numba/np/ufunc/parallel.py | 758 + .../pytorch/numba/numba/np/ufunc/sigparse.py | 63 + .../pytorch/numba/numba/np/ufunc/tbbpool.cpp | 352 + .../numba/numba/np/ufunc/ufuncbuilder.py | 394 + .../pytorch/numba/numba/np/ufunc/workqueue.c | 656 + .../pytorch/numba/numba/np/ufunc/workqueue.h | 65 + .../pytorch/numba/numba/np/ufunc/wrappers.py | 743 + .../pytorch/numba/numba/np/ufunc_db.py | 1153 ++ .../pytorch/numba/numba/np/unsafe/__init__.py | 0 .../pytorch/numba/numba/np/unsafe/ndarray.py | 80 + .../pytorch/numba/numba/parfors/__init__.py | 1 + .../numba/numba/parfors/array_analysis.py | 3260 ++++ .../pytorch/numba/numba/parfors/parfor.py | 5016 ++++++ .../numba/numba/parfors/parfor_lowering.py | 1923 +++ .../numba/parfors/parfor_lowering_utils.py | 213 + .../pytorch/numba/numba/pycc/__init__.py | 100 + .../pytorch/numba/numba/pycc/cc.py | 306 + .../pytorch/numba/numba/pycc/compiler.py | 471 + .../pytorch/numba/numba/pycc/decorators.py | 72 + .../pytorch/numba/numba/pycc/llvm_types.py | 37 + .../pytorch/numba/numba/pycc/modulemixin.c | 205 + .../pytorch/numba/numba/pycc/platform.py | 273 + .../pytorch/numba/numba/pycc/pycc | 3 + .../pytorch/numba/numba/runtests.py | 9 + .../pytorch/numba/numba/scripts/__init__.py | 0 .../numba/scripts/generate_lower_listing.py | 169 + .../pytorch/numba/numba/stencils/__init__.py | 0 .../pytorch/numba/numba/stencils/stencil.py | 837 + .../numba/numba/stencils/stencilparfor.py | 944 ++ .../pytorch/numba/numba/testing/__init__.py | 61 + .../pytorch/numba/numba/testing/__main__.py | 4 + .../pytorch/numba/numba/testing/_runtests.py | 114 + .../pytorch/numba/numba/testing/loader.py | 26 + .../pytorch/numba/numba/testing/main.py | 796 + .../pytorch/numba/numba/testing/notebook.py | 171 + .../pytorch/numba/numba/tests/__init__.py | 33 + .../numba/numba/tests/annotation_usecases.py | 16 + .../numba/numba/tests/cache_usecases.py | 184 + .../numba/numba/tests/cffi_usecases.py | 197 + .../numba/numba/tests/cfunc_cache_usecases.py | 70 + .../numba/tests/cloudpickle_main_class.py | 6 + .../numba/numba/tests/compile_with_pycc.py | 134 + .../numba/numba/tests/complex_usecases.py | 93 + .../numba/numba/tests/ctypes_usecases.py | 114 + .../numba/tests/doc_examples/__init__.py | 10 + .../numba/tests/doc_examples/test_examples.py | 202 + .../numba/tests/doc_examples/test_jitclass.py | 97 + .../test_literal_container_usage.py | 161 + .../doc_examples/test_literally_usage.py | 59 + .../doc_examples/test_llvm_pass_timings.py | 31 + .../doc_examples/test_numpy_generators.py | 38 + .../doc_examples/test_parallel_chunksize.py | 122 + .../tests/doc_examples/test_rec_array.py | 46 + .../doc_examples/test_structref_usage.py | 149 + .../doc_examples/test_typed_dict_usage.py | 111 + .../doc_examples/test_typed_list_usage.py | 95 + .../pytorch/numba/numba/tests/dummy_module.py | 4 + .../numba/numba/tests/enum_usecases.py | 55 + .../numba/numba/tests/error_usecases.py | 6 + .../pytorch/numba/numba/tests/gdb/__init__.py | 10 + .../numba/numba/tests/gdb/test_array_arg.py | 51 + .../numba/numba/tests/gdb/test_basic.py | 39 + .../numba/tests/gdb/test_break_on_symbol.py | 34 + .../tests/gdb/test_break_on_symbol_version.py | 65 + .../tests/gdb/test_conditional_breakpoint.py | 45 + .../numba/tests/gdb/test_pretty_print.py | 69 + .../pytorch/numba/numba/tests/gdb_support.py | 197 + .../numba/numba/tests/inlining_usecases.py | 45 + .../numba/numba/tests/matmul_usecase.py | 24 + .../numba/numba/tests/npyufunc/__init__.py | 10 + .../numba/tests/npyufunc/cache_usecases.py | 76 + .../numba/tests/npyufunc/test_caching.py | 228 + .../numba/numba/tests/npyufunc/test_dufunc.py | 125 + .../numba/numba/tests/npyufunc/test_errors.py | 174 + .../numba/numba/tests/npyufunc/test_gufunc.py | 481 + .../npyufunc/test_parallel_env_variable.py | 38 + .../tests/npyufunc/test_parallel_low_work.py | 44 + .../npyufunc/test_parallel_ufunc_issues.py | 128 + .../numba/numba/tests/npyufunc/test_ufunc.py | 145 + .../tests/npyufunc/test_ufuncbuilding.py | 369 + .../tests/npyufunc/test_vectorize_decor.py | 151 + .../numba/tests/orphaned_semaphore_usecase.py | 24 + .../numba/numba/tests/overload_usecases.py | 28 + .../numba/tests/parfors_cache_usecases.py | 66 + .../numba/tests/parfors_max_label_error.py | 68 + .../numba/numba/tests/pdlike_usecase.py | 306 + .../tests/pycc_distutils_usecase/__init__.py | 0 .../pycc_distutils_usecase/nested/__init__.py | 0 .../nested/source_module.py | 20 + .../pycc_distutils_usecase/setup_distutils.py | 15 + .../setup_distutils_nested.py | 15 + .../setup_setuptools.py | 15 + .../setup_setuptools_nested.py | 15 + .../pycc_distutils_usecase/source_module.py | 18 + .../numba/numba/tests/recursion_usecases.py | 228 + .../numba/numba/tests/serialize_usecases.py | 114 + .../pytorch/numba/numba/tests/support.py | 1200 ++ .../numba/numba/tests/test_alignment.py | 40 + .../numba/numba/tests/test_analysis.py | 1050 ++ .../numba/numba/tests/test_annotations.py | 251 + .../pytorch/numba/numba/tests/test_api.py | 90 + .../numba/numba/tests/test_array_analysis.py | 1140 ++ .../numba/numba/tests/test_array_attr.py | 389 + .../numba/numba/tests/test_array_constants.py | 193 + .../numba/numba/tests/test_array_exprs.py | 742 + .../numba/numba/tests/test_array_iterators.py | 549 + .../numba/tests/test_array_manipulation.py | 1256 ++ .../numba/numba/tests/test_array_methods.py | 1646 ++ .../numba/tests/test_array_reductions.py | 1214 ++ .../numba/numba/tests/test_array_return.py | 41 + .../numba/numba/tests/test_asnumbatype.py | 170 + .../numba/numba/tests/test_auto_constants.py | 40 + .../numba/numba/tests/test_blackscholes.py | 125 + .../numba/numba/tests/test_boundscheck.py | 266 + .../numba/numba/tests/test_buffer_protocol.py | 289 + .../numba/numba/tests/test_builtins.py | 1404 ++ .../numba/numba/tests/test_byteflow.py | 94 + .../pytorch/numba/numba/tests/test_caching.py | 1076 ++ .../pytorch/numba/numba/tests/test_casting.py | 140 + .../pytorch/numba/numba/tests/test_cffi.py | 194 + .../pytorch/numba/numba/tests/test_cfunc.py | 412 + .../pytorch/numba/numba/tests/test_cgutils.py | 152 + .../numba/numba/tests/test_chained_assign.py | 145 + .../numba/numba/tests/test_chrome_trace.py | 51 + .../pytorch/numba/numba/tests/test_cli.py | 281 + .../pytorch/numba/numba/tests/test_closure.py | 575 + .../pytorch/numba/numba/tests/test_codegen.py | 261 + .../numba/numba/tests/test_compile_cache.py | 153 + .../numba/numba/tests/test_compiler_flags.py | 111 + .../numba/numba/tests/test_compiler_lock.py | 23 + .../pytorch/numba/numba/tests/test_complex.py | 315 + .../numba/numba/tests/test_comprehension.py | 527 + .../tests/test_conditions_as_predicates.py | 193 + .../pytorch/numba/numba/tests/test_config.py | 123 + .../numba/numba/tests/test_conversion.py | 233 + .../numba/numba/tests/test_copy_propagate.py | 171 + .../pytorch/numba/numba/tests/test_ctypes.py | 261 + .../numba/numba/tests/test_dataflow.py | 192 + .../numba/numba/tests/test_datamodel.py | 219 + .../pytorch/numba/numba/tests/test_debug.py | 314 + .../numba/numba/tests/test_debuginfo.py | 751 + .../numba/numba/tests/test_deprecations.py | 59 + .../numba/numba/tests/test_dictimpl.py | 651 + .../numba/numba/tests/test_dictobject.py | 2429 +++ .../pytorch/numba/numba/tests/test_dicts.py | 172 + .../numba/numba/tests/test_dispatcher.py | 1227 ++ .../numba/numba/tests/test_dummyarray.py | 350 + .../numba/numba/tests/test_dyn_array.py | 1805 ++ .../numba/numba/tests/test_dyn_func.py | 43 + .../numba/numba/tests/test_entrypoints.py | 230 + .../pytorch/numba/numba/tests/test_enums.py | 180 + .../numba/numba/tests/test_errorhandling.py | 496 + .../numba/numba/tests/test_errormodels.py | 28 + .../pytorch/numba/numba/tests/test_event.py | 217 + .../numba/numba/tests/test_exceptions.py | 330 + .../numba/numba/tests/test_extended_arg.py | 42 + .../numba/numba/tests/test_extending.py | 2077 +++ .../numba/numba/tests/test_extending_types.py | 170 + .../numba/numba/tests/test_fancy_indexing.py | 261 + .../numba/numba/tests/test_fastmath.py | 123 + .../numba/numba/tests/test_firstlinefinder.py | 119 + .../numba/numba/tests/test_flow_control.py | 1311 ++ .../numba/numba/tests/test_func_interface.py | 43 + .../numba/numba/tests/test_func_lifetime.py | 164 + .../numba/numba/tests/test_funcdesc.py | 61 + .../numba/numba/tests/test_function_type.py | 1261 ++ .../numba/numba/tests/test_gdb_bindings.py | 271 + .../numba/numba/tests/test_gdb_dwarf.py | 261 + .../numba/numba/tests/test_generators.py | 682 + .../pytorch/numba/numba/tests/test_gil.py | 183 + .../pytorch/numba/numba/tests/test_globals.py | 266 + .../pytorch/numba/numba/tests/test_hashing.py | 383 + .../pytorch/numba/numba/tests/test_heapq.py | 489 + .../pytorch/numba/numba/tests/test_help.py | 92 + .../pytorch/numba/numba/tests/test_import.py | 112 + .../numba/numba/tests/test_indexing.py | 1140 ++ .../numba/numba/tests/test_init_utils.py | 42 + .../numba/numba/tests/test_inlining.py | 286 + .../numba/numba/tests/test_interpreter.py | 1150 ++ .../numba/numba/tests/test_interproc.py | 47 + .../numba/numba/tests/test_intwidth.py | 90 + .../pytorch/numba/numba/tests/test_ir.py | 560 + .../numba/numba/tests/test_ir_inlining.py | 1544 ++ .../numba/numba/tests/test_ir_utils.py | 273 + .../numba/numba/tests/test_itanium_mangler.py | 80 + .../numba/numba/tests/test_iteration.py | 254 + .../numba/numba/tests/test_jit_module.py | 145 + .../numba/numba/tests/test_jitclasses.py | 1878 +++ .../numba/numba/tests/test_jitmethod.py | 72 + .../pytorch/numba/numba/tests/test_linalg.py | 2691 +++ .../numba/numba/tests/test_listimpl.py | 527 + .../numba/numba/tests/test_listobject.py | 1638 ++ .../pytorch/numba/numba/tests/test_lists.py | 1832 ++ .../numba/tests/test_literal_dispatch.py | 400 + .../numba/tests/test_llvm_pass_timings.py | 124 + .../numba/tests/test_llvm_version_check.py | 42 + .../pytorch/numba/numba/tests/test_locals.py | 18 + .../numba/numba/tests/test_looplifting.py | 604 + .../test_make_function_to_jit_function.py | 284 + .../numba/numba/tests/test_mandelbrot.py | 35 + .../numba/numba/tests/test_mangling.py | 41 + .../numba/tests/test_map_filter_reduce.py | 91 + .../pytorch/numba/numba/tests/test_mathlib.py | 655 + .../pytorch/numba/numba/tests/test_maxmin.py | 41 + .../numba/tests/test_mixed_tuple_unroller.py | 1984 +++ .../numba/numba/tests/test_moved_modules.py | 30 + .../pytorch/numba/numba/tests/test_multi3.py | 43 + .../pytorch/numba/numba/tests/test_nan.py | 40 + .../numba/tests/test_ndarray_subclasses.py | 346 + .../numba/numba/tests/test_nested_calls.py | 146 + .../numba/numba/tests/test_np_functions.py | 4841 ++++++ .../numba/numba/tests/test_np_randomgen.py | 153 + .../numba/numba/tests/test_npdatetime.py | 1089 ++ .../pytorch/numba/numba/tests/test_nrt.py | 696 + .../numba/numba/tests/test_nrt_refct.py | 114 + .../numba/numba/tests/test_num_threads.py | 630 + .../numba/numba/tests/test_numberctor.py | 250 + .../pytorch/numba/numba/tests/test_numbers.py | 90 + .../pytorch/numba/numba/tests/test_numconv.py | 38 + .../numba/numba/tests/test_numpy_support.py | 437 + .../numba/numba/tests/test_numpyadapt.py | 43 + .../numba/numba/tests/test_obj_lifetime.py | 503 + .../numba/numba/tests/test_object_mode.py | 192 + .../pytorch/numba/numba/tests/test_objects.py | 69 + .../numba/numba/tests/test_operators.py | 1612 ++ .../numba/numba/tests/test_optional.py | 264 + .../pytorch/numba/numba/tests/test_overlap.py | 134 + .../numba/tests/test_parallel_backend.py | 1245 ++ .../pytorch/numba/numba/tests/test_parfors.py | 4623 +++++ .../numba/numba/tests/test_parfors_caching.py | 83 + .../numba/numba/tests/test_parfors_passes.py | 678 + .../numba/numba/tests/test_pipeline.py | 158 + .../numba/numba/tests/test_polynomial.py | 116 + .../tests/test_practical_lowering_issues.py | 212 + .../pytorch/numba/numba/tests/test_print.py | 197 + .../numba/numba/tests/test_profiler.py | 98 + .../pytorch/numba/numba/tests/test_pycc.py | 429 + .../numba/numba/tests/test_python_int.py | 54 + .../pytorch/numba/numba/tests/test_random.py | 1677 ++ .../pytorch/numba/numba/tests/test_range.py | 196 + .../numba/tests/test_recarray_usecases.py | 148 + .../numba/numba/tests/test_record_dtype.py | 1737 ++ .../numba/numba/tests/test_recursion.py | 124 + .../numba/numba/tests/test_refop_pruning.py | 160 + .../numba/numba/tests/test_remove_dead.py | 304 + .../numba/numba/tests/test_retargeting.py | 291 + .../numba/numba/tests/test_return_values.py | 78 + .../numba/numba/tests/test_runtests.py | 205 + .../numba/numba/tests/test_serialize.py | 325 + .../pytorch/numba/numba/tests/test_sets.py | 785 + .../pytorch/numba/numba/tests/test_slices.py | 259 + .../pytorch/numba/numba/tests/test_sort.py | 1199 ++ .../pytorch/numba/numba/tests/test_ssa.py | 577 + .../numba/numba/tests/test_stencils.py | 2838 ++++ .../numba/numba/tests/test_storeslice.py | 71 + .../numba/numba/tests/test_struct_ref.py | 429 + .../pytorch/numba/numba/tests/test_support.py | 346 + .../pytorch/numba/numba/tests/test_svml.py | 454 + .../numba/tests/test_sys_stdin_assignment.py | 67 + .../pytorch/numba/numba/tests/test_sysinfo.py | 189 + .../numba/tests/test_target_extension.py | 852 + .../tests/test_target_overloadselector.py | 148 + .../numba/numba/tests/test_threadsafety.py | 97 + .../pytorch/numba/numba/tests/test_tracing.py | 182 + .../numba/numba/tests/test_try_except.py | 851 + .../pytorch/numba/numba/tests/test_tuples.py | 777 + .../numba/numba/tests/test_typeconv.py | 295 + .../numba/numba/tests/test_typedlist.py | 1660 ++ .../numba/tests/test_typedobjectutils.py | 68 + .../numba/numba/tests/test_typeguard.py | 35 + .../numba/numba/tests/test_typeinfer.py | 837 + .../numba/numba/tests/test_typenames.py | 17 + .../pytorch/numba/numba/tests/test_typeof.py | 569 + .../pytorch/numba/numba/tests/test_types.py | 879 + .../numba/numba/tests/test_typingerror.py | 237 + .../pytorch/numba/numba/tests/test_ufuncs.py | 1831 ++ .../pytorch/numba/numba/tests/test_unicode.py | 2615 +++ .../numba/numba/tests/test_unicode_array.py | 892 + .../numba/numba/tests/test_unicode_names.py | 61 + .../numba/numba/tests/test_unpack_sequence.py | 237 + .../tests/test_unpickle_without_module.py | 49 + .../numba/tests/test_unsafe_intrinsics.py | 230 + .../numba/numba/tests/test_usecases.py | 225 + .../numba/numba/tests/test_vectorization.py | 73 + .../test_vectorization_type_inference.py | 43 + .../numba/numba/tests/test_warnings.py | 222 + .../numba/numba/tests/test_withlifting.py | 1256 ++ .../pytorch/numba/numba/tests/test_wrapper.py | 104 + .../numba/tests/threading_backend_usecases.py | 29 + .../pytorch/numba/numba/tests/usecases.py | 91 + .../pytorch/numba/numba/typed/__init__.py | 20 + .../pytorch/numba/numba/typed/dictimpl.py | 20 + .../pytorch/numba/numba/typed/dictobject.py | 1349 ++ .../pytorch/numba/numba/typed/listobject.py | 1531 ++ .../pytorch/numba/numba/typed/py.typed | 0 .../pytorch/numba/numba/typed/typeddict.py | 355 + .../pytorch/numba/numba/typed/typedlist.py | 687 + .../numba/numba/typed/typedobjectutils.py | 199 + .../pytorch/numba/numba/types/__init__.py | 3 + .../pytorch/numba/requirements.txt | 1 + .../pointrcnn-iou/pytorch/numba/runtests.py | 16 + .../pointrcnn-iou/pytorch/numba/setup.py | 439 + .../pointrcnn-iou/pytorch/numba/version.txt | 1 + .../pointrcnn-iou/pytorch/numba/versioneer.py | 1046 ++ .../.github/workflows/close_stale_issues.yml | 27 + .../pytorch/openpcdet/.gitignore | 31 + .../pointrcnn-iou/pytorch/openpcdet/LICENSE | 201 + .../pytorch/openpcdet/README-ILUVATAR.md | 6 + .../pointrcnn-iou/pytorch/openpcdet/README.md | 291 + .../pytorch/openpcdet/build_openpcdet.sh | 16 + .../pytorch/openpcdet/clean_openpcdet.sh | 10 + .../pytorch/openpcdet/docker/Dockerfile | 55 + .../pytorch/openpcdet/docker/README.md | 25 + .../pytorch/openpcdet/docker/cu116.Dockerfile | 83 + .../openpcdet/docs/CUSTOM_DATASET_TUTORIAL.md | 108 + .../pytorch/openpcdet/docs/DEMO.md | 51 + .../pytorch/openpcdet/docs/GETTING_STARTED.md | 273 + .../pytorch/openpcdet/docs/INSTALL.md | 38 + .../pytorch/openpcdet/docs/changelog.md | 40 + .../openpcdet/docs/dataset_vs_model.png | Bin 0 -> 126450 bytes .../pytorch/openpcdet/docs/demo.png | Bin 0 -> 620088 bytes .../guidelines_of_approaches/bevfusion.md | 35 + .../docs/guidelines_of_approaches/mppnet.md | 73 + .../openpcdet/docs/model_framework.png | Bin 0 -> 104083 bytes .../openpcdet/docs/multiple_models_demo.png | Bin 0 -> 235379 bytes .../pytorch/openpcdet/docs/open_mmlab.png | Bin 0 -> 262300 bytes .../pytorch/openpcdet/install_openpcdet.sh | 33 + .../pytorch/openpcdet/pcdet/__init__.py | 24 + .../pytorch/openpcdet/pcdet/config.py | 85 + .../openpcdet/pcdet/datasets/__init__.py | 82 + .../pcdet/datasets/argo2/__init__.py | 2 + .../pcdet/datasets/argo2/argo2_dataset.py | 537 + .../datasets/argo2/argo2_utils/constants.py | 12 + .../pcdet/datasets/argo2/argo2_utils/so3.py | 141 + .../pcdet/datasets/augmentor/__init__.py | 0 .../datasets/augmentor/augmentor_utils.py | 658 + .../datasets/augmentor/data_augmentor.py | 319 + .../datasets/augmentor/database_sampler.py | 502 + .../pcdet/datasets/custom/__init__.py | 0 .../pcdet/datasets/custom/custom_dataset.py | 283 + .../openpcdet/pcdet/datasets/dataset.py | 325 + .../pcdet/datasets/kitti/__init__.py | 0 .../pcdet/datasets/kitti/kitti_dataset.py | 484 + .../kitti/kitti_object_eval_python/LICENSE | 21 + .../kitti/kitti_object_eval_python/README.md | 32 + .../kitti_object_eval_python/__init__.py | 0 .../kitti/kitti_object_eval_python/eval.py | 808 + .../kitti_object_eval_python/evaluate.py | 33 + .../kitti_object_eval_python/kitti_common.py | 412 + .../kitti_object_eval_python/rotate_iou.py | 330 + .../pcdet/datasets/kitti/kitti_utils.py | 66 + .../openpcdet/pcdet/datasets/lyft/__init__.py | 0 .../pcdet/datasets/lyft/lyft_dataset.py | 303 + .../datasets/lyft/lyft_mAP_eval/__init__.py | 0 .../datasets/lyft/lyft_mAP_eval/lyft_eval.py | 435 + .../pcdet/datasets/lyft/lyft_utils.py | 332 + .../pcdet/datasets/nuscenes/__init__.py | 0 .../datasets/nuscenes/nuscenes_dataset.py | 434 + .../pcdet/datasets/nuscenes/nuscenes_utils.py | 588 + .../openpcdet/pcdet/datasets/once/__init__.py | 0 .../pcdet/datasets/once/once_dataset.py | 444 + .../datasets/once/once_eval/eval_utils.py | 53 + .../datasets/once/once_eval/evaluation.py | 420 + .../datasets/once/once_eval/iou_utils.py | 344 + .../pcdet/datasets/once/once_toolkits.py | 125 + .../pcdet/datasets/pandaset/__init__.py | 0 .../datasets/pandaset/pandaset_dataset.py | 489 + .../pcdet/datasets/processor/__init__.py | 0 .../datasets/processor/data_processor.py | 298 + .../processor/point_feature_encoder.py | 57 + .../pcdet/datasets/waymo/__init__.py | 0 .../pcdet/datasets/waymo/waymo_dataset.py | 827 + .../pcdet/datasets/waymo/waymo_eval.py | 251 + .../pcdet/datasets/waymo/waymo_utils.py | 268 + .../openpcdet/pcdet/models/__init__.py | 54 + .../pcdet/models/backbones_2d/__init__.py | 7 + .../models/backbones_2d/base_bev_backbone.py | 351 + .../models/backbones_2d/fuser/__init__.py | 4 + .../models/backbones_2d/fuser/convfuser.py | 33 + .../backbones_2d/map_to_bev/__init__.py | 10 + .../map_to_bev/conv2d_collapse.py | 38 + .../map_to_bev/height_compression.py | 26 + .../map_to_bev/pointpillar_scatter.py | 73 + .../pcdet/models/backbones_3d/__init__.py | 22 + .../pcdet/models/backbones_3d/dsvt.py | 616 + .../focal_sparse_conv/SemanticSeg/__init__.py | 0 .../SemanticSeg/basic_blocks.py | 65 + .../SemanticSeg/pyramid_ffn.py | 77 + .../SemanticSeg/sem_deeplabv3.py | 160 + .../focal_sparse_conv/__init__.py | 0 .../focal_sparse_conv/focal_sparse_conv.py | 224 + .../focal_sparse_conv/focal_sparse_utils.py | 147 + .../pcdet/models/backbones_3d/pfe/__init__.py | 5 + .../backbones_3d/pfe/voxel_set_abstraction.py | 411 + .../models/backbones_3d/pointnet2_backbone.py | 206 + .../models/backbones_3d/spconv_backbone.py | 295 + .../models/backbones_3d/spconv_backbone_2d.py | 300 + .../backbones_3d/spconv_backbone_focal.py | 269 + .../backbones_3d/spconv_backbone_voxelnext.py | 225 + .../spconv_backbone_voxelnext2d.py | 219 + .../pcdet/models/backbones_3d/spconv_unet.py | 212 + .../pcdet/models/backbones_3d/vfe/__init__.py | 18 + .../backbones_3d/vfe/dynamic_mean_vfe.py | 76 + .../backbones_3d/vfe/dynamic_pillar_vfe.py | 240 + .../backbones_3d/vfe/dynamic_voxel_vfe.py | 106 + .../models/backbones_3d/vfe/image_vfe.py | 85 + .../vfe/image_vfe_modules/__init__.py | 0 .../vfe/image_vfe_modules/f2v/__init__.py | 5 + .../f2v/frustum_grid_generator.py | 145 + .../image_vfe_modules/f2v/frustum_to_voxel.py | 54 + .../vfe/image_vfe_modules/f2v/sampler.py | 37 + .../vfe/image_vfe_modules/ffn/__init__.py | 5 + .../vfe/image_vfe_modules/ffn/ddn/__init__.py | 5 + .../ffn/ddn/ddn_deeplabv3.py | 24 + .../image_vfe_modules/ffn/ddn/ddn_template.py | 162 + .../ffn/ddn_loss/__init__.py | 5 + .../ffn/ddn_loss/balancer.py | 50 + .../ffn/ddn_loss/ddn_loss.py | 75 + .../vfe/image_vfe_modules/ffn/depth_ffn.py | 103 + .../pcdet/models/backbones_3d/vfe/mean_vfe.py | 31 + .../models/backbones_3d/vfe/pillar_vfe.py | 123 + .../models/backbones_3d/vfe/vfe_template.py | 22 + .../pcdet/models/backbones_image/__init__.py | 4 + .../backbones_image/img_neck/__init__.py | 4 + .../img_neck/generalized_lss.py | 76 + .../pcdet/models/backbones_image/swin.py | 736 + .../pcdet/models/dense_heads/__init__.py | 21 + .../models/dense_heads/anchor_head_multi.py | 373 + .../models/dense_heads/anchor_head_single.py | 75 + .../dense_heads/anchor_head_template.py | 275 + .../pcdet/models/dense_heads/center_head.py | 416 + .../models/dense_heads/point_head_box.py | 115 + .../models/dense_heads/point_head_simple.py | 91 + .../models/dense_heads/point_head_template.py | 210 + .../dense_heads/point_intra_part_head.py | 127 + .../dense_heads/target_assigner/__init__.py | 0 .../target_assigner/anchor_generator.py | 79 + .../target_assigner/atss_target_assigner.py | 141 + .../axis_aligned_target_assigner.py | 210 + .../target_assigner/hungarian_assigner.py | 131 + .../models/dense_heads/transfusion_head.py | 479 + .../models/dense_heads/voxelnext_head.py | 559 + .../pcdet/models/detectors/PartA2_net.py | 31 + .../pcdet/models/detectors/__init__.py | 46 + .../pcdet/models/detectors/bevfusion.py | 101 + .../openpcdet/pcdet/models/detectors/caddn.py | 38 + .../pcdet/models/detectors/centerpoint.py | 50 + .../models/detectors/detector3d_template.py | 415 + .../pcdet/models/detectors/mppnet.py | 181 + .../pcdet/models/detectors/mppnet_e2e.py | 222 + .../pcdet/models/detectors/pillarnet.py | 50 + .../pcdet/models/detectors/point_rcnn.py | 30 + .../pcdet/models/detectors/pointpillar.py | 34 + .../pcdet/models/detectors/pv_rcnn.py | 36 + .../models/detectors/pv_rcnn_plusplus.py | 53 + .../pcdet/models/detectors/second_net.py | 34 + .../pcdet/models/detectors/second_net_iou.py | 177 + .../pcdet/models/detectors/transfusion.py | 50 + .../pcdet/models/detectors/voxel_rcnn.py | 37 + .../pcdet/models/detectors/voxelnext.py | 44 + .../pcdet/models/model_utils/__init__.py | 0 .../models/model_utils/basic_block_2d.py | 34 + .../models/model_utils/centernet_utils.py | 385 + .../pcdet/models/model_utils/dsvt_utils.py | 150 + .../models/model_utils/model_nms_utils.py | 107 + .../pcdet/models/model_utils/mppnet_utils.py | 420 + .../pcdet/models/model_utils/swin_utils.py | 659 + .../models/model_utils/transfusion_utils.py | 102 + .../pcdet/models/roi_heads/__init__.py | 19 + .../pcdet/models/roi_heads/mppnet_head.py | 992 ++ .../roi_heads/mppnet_memory_bank_e2e.py | 581 + .../pcdet/models/roi_heads/partA2_head.py | 224 + .../pcdet/models/roi_heads/pointrcnn_head.py | 179 + .../pcdet/models/roi_heads/pvrcnn_head.py | 175 + .../models/roi_heads/roi_head_template.py | 261 + .../pcdet/models/roi_heads/second_head.py | 188 + .../roi_heads/target_assigner/__init__.py | 0 .../target_assigner/proposal_target_layer.py | 228 + .../pcdet/models/roi_heads/voxelrcnn_head.py | 262 + .../pcdet/models/view_transforms/__init__.py | 4 + .../pcdet/models/view_transforms/depth_lss.py | 258 + .../pytorch/openpcdet/pcdet/ops/__init__.py | 0 .../openpcdet/pcdet/ops/bev_pool/__init__.py | 1 + .../openpcdet/pcdet/ops/bev_pool/bev_pool.py | 97 + .../pcdet/ops/bev_pool/src/bev_pool.cpp | 94 + .../pcdet/ops/bev_pool/src/bev_pool_cuda.cu | 98 + .../pcdet/ops/ingroup_inds/__init__.py | 0 .../pcdet/ops/ingroup_inds/ingroup_inds_op.py | 31 + .../pcdet/ops/ingroup_inds/src/error.cuh | 18 + .../ops/ingroup_inds/src/ingroup_inds.cpp | 54 + .../ingroup_inds/src/ingroup_inds_kernel.cu | 77 + .../openpcdet/pcdet/ops/iou3d_nms/__init__.py | 0 .../pcdet/ops/iou3d_nms/iou3d_nms_utils.py | 189 + .../pcdet/ops/iou3d_nms/src/iou3d_cpu.cpp | 273 + .../pcdet/ops/iou3d_nms/src/iou3d_cpu.h | 11 + .../pcdet/ops/iou3d_nms/src/iou3d_nms.cpp | 235 + .../pcdet/ops/iou3d_nms/src/iou3d_nms.h | 17 + .../pcdet/ops/iou3d_nms/src/iou3d_nms_api.cpp | 20 + .../ops/iou3d_nms/src/iou3d_nms_kernel.cu | 464 + .../openpcdet/pcdet/ops/pointnet2/__init__.py | 0 .../ops/pointnet2/pointnet2_batch/__init__.py | 0 .../pointnet2_batch/pointnet2_modules.py | 174 + .../pointnet2_batch/pointnet2_utils.py | 290 + .../pointnet2_batch/src/ball_query.cpp | 39 + .../pointnet2_batch/src/ball_query_gpu.cu | 73 + .../pointnet2_batch/src/ball_query_gpu.h | 15 + .../pointnet2_batch/src/cuda_utils.h | 16 + .../pointnet2_batch/src/group_points.cpp | 36 + .../pointnet2_batch/src/group_points_gpu.cu | 92 + .../pointnet2_batch/src/group_points_gpu.h | 22 + .../pointnet2_batch/src/interpolate.cpp | 56 + .../pointnet2_batch/src/interpolate_gpu.cu | 169 + .../pointnet2_batch/src/interpolate_gpu.h | 30 + .../pointnet2_batch/src/pointnet2_api.cpp | 24 + .../pointnet2_batch/src/sampling.cpp | 46 + .../pointnet2_batch/src/sampling_gpu.cu | 260 + .../pointnet2_batch/src/sampling_gpu.h | 29 + .../ops/pointnet2/pointnet2_stack/__init__.py | 0 .../pointnet2_stack/pointnet2_modules.py | 470 + .../pointnet2_stack/pointnet2_utils.py | 457 + .../pointnet2_stack/src/ball_query.cpp | 45 + .../pointnet2_stack/src/ball_query_gpu.cu | 90 + .../pointnet2_stack/src/ball_query_gpu.h | 25 + .../pointnet2_stack/src/cuda_utils.h | 9 + .../pointnet2_stack/src/group_points.cpp | 68 + .../pointnet2_stack/src/group_points_gpu.cu | 125 + .../pointnet2_stack/src/group_points_gpu.h | 31 + .../pointnet2_stack/src/interpolate.cpp | 107 + .../pointnet2_stack/src/interpolate_gpu.cu | 196 + .../pointnet2_stack/src/interpolate_gpu.h | 39 + .../pointnet2_stack/src/pointnet2_api.cpp | 31 + .../pointnet2_stack/src/sampling.cpp | 57 + .../pointnet2_stack/src/sampling_gpu.cu | 350 + .../pointnet2_stack/src/sampling_gpu.h | 23 + .../pointnet2_stack/src/vector_pool.cpp | 200 + .../pointnet2_stack/src/vector_pool_gpu.cu | 487 + .../pointnet2_stack/src/vector_pool_gpu.h | 71 + .../pointnet2_stack/src/voxel_query.cpp | 41 + .../pointnet2_stack/src/voxel_query_gpu.cu | 113 + .../pointnet2_stack/src/voxel_query_gpu.h | 19 + .../pointnet2_stack/voxel_pool_modules.py | 131 + .../pointnet2_stack/voxel_query_utils.py | 100 + .../pcdet/ops/roiaware_pool3d/__init__.py | 0 .../roiaware_pool3d/roiaware_pool3d_utils.py | 111 + .../roiaware_pool3d/src/roiaware_pool3d.cpp | 177 + .../src/roiaware_pool3d_kernel.cu | 359 + .../pcdet/ops/roipoint_pool3d/__init__.py | 0 .../roipoint_pool3d/roipoint_pool3d_utils.py | 67 + .../roipoint_pool3d/src/roipoint_pool3d.cpp | 60 + .../src/roipoint_pool3d_kernel.cu | 165 + .../pytorch/openpcdet/pcdet/utils/__init__.py | 0 .../openpcdet/pcdet/utils/box_coder_utils.py | 222 + .../openpcdet/pcdet/utils/box_utils.py | 440 + .../pcdet/utils/calibration_kitti.py | 125 + .../openpcdet/pcdet/utils/common_utils.py | 295 + .../openpcdet/pcdet/utils/commu_utils.py | 182 + .../openpcdet/pcdet/utils/loss_utils.py | 649 + .../openpcdet/pcdet/utils/object3d_custom.py | 83 + .../openpcdet/pcdet/utils/object3d_kitti.py | 83 + .../openpcdet/pcdet/utils/spconv_utils.py | 38 + .../openpcdet/pcdet/utils/transform_utils.py | 91 + .../pytorch/openpcdet/requirements.txt | 4 + .../pointrcnn-iou/pytorch/openpcdet/setup.py | 137 + .../pytorch/openpcdet/tools/_init_path.py | 2 + .../pytorch/openpcdet/tools/demo.py | 112 + .../openpcdet/tools/eval_utils/eval_utils.py | 140 + .../create_integrated_database.py | 86 + .../tools/scripts/slurm_test_mgpu.sh | 30 + .../tools/scripts/slurm_test_single.sh | 19 + .../openpcdet/tools/scripts/slurm_train.sh | 32 + .../openpcdet/tools/scripts/slurm_train_v2.sh | 30 + .../openpcdet/tools/scripts/torch_train.sh | 18 + .../pytorch/openpcdet/tools/test.py | 207 + .../pytorch/openpcdet/tools/train.py | 230 + .../train_utils/optimization/__init__.py | 68 + .../train_utils/optimization/fastai_optim.py | 264 + .../optimization/learning_schedules_fastai.py | 162 + .../tools/train_utils/train_utils.py | 272 + .../tools/visual_utils/open3d_vis_utils.py | 116 + .../tools/visual_utils/visualize_utils.py | 215 + .../pointrcnn-iou/pytorch/spconv/.gitignore | 109 + .../pointrcnn-iou/pytorch/spconv/.gitmodules | 3 + .../pytorch/spconv/CMakeLists.txt | 50 + .../pointrcnn-iou/pytorch/spconv/LICENSE | 201 + .../pytorch/spconv/README-ILUVATAR.md | 16 + .../pointrcnn-iou/pytorch/spconv/README.md | 142 + .../pytorch/spconv/build_spconv.sh | 33 + .../pytorch/spconv/clean_spconv.sh | 9 + .../pytorch/spconv/include/paramsgrid.h | 62 + .../pytorch/spconv/include/prettyprint.h | 445 + .../pytorch/spconv/include/pybind11_utils.h | 61 + .../pytorch/spconv/include/spconv/box_iou.h | 103 + .../pytorch/spconv/include/spconv/geometry.h | 297 + .../pytorch/spconv/include/spconv/indice.cu.h | 244 + .../pytorch/spconv/include/spconv/indice.h | 79 + .../pytorch/spconv/include/spconv/maxpool.h | 44 + .../pytorch/spconv/include/spconv/mp_helper.h | 47 + .../pytorch/spconv/include/spconv/nms.h | 201 + .../pytorch/spconv/include/spconv/nms_gpu.h | 18 + .../spconv/include/spconv/point2voxel.h | 94 + .../pytorch/spconv/include/spconv/pool_ops.h | 97 + .../spconv/include/spconv/reordering.cu.h | 161 + .../spconv/include/spconv/reordering.h | 40 + .../spconv/include/spconv/spconv_ops.h | 561 + .../include/tensorview/helper_kernel.cu.h | 81 + .../spconv/include/tensorview/helper_launch.h | 21 + .../spconv/include/tensorview/tensorview.h | 1146 ++ .../pytorch/spconv/include/torch_utils.h | 66 + .../pytorch/spconv/include/utility/timer.h | 54 + .../pytorch/spconv/install_spconv.sh | 35 + .../pointrcnn-iou/pytorch/spconv/setup.py | 89 + .../pytorch/spconv/spconv/__init__.py | 97 + .../pytorch/spconv/spconv/conv.py | 355 + .../pytorch/spconv/spconv/functional.py | 118 + .../pytorch/spconv/spconv/modules.py | 130 + .../pytorch/spconv/spconv/ops.py | 157 + .../pytorch/spconv/spconv/pool.py | 105 + .../pytorch/spconv/spconv/test_utils.py | 190 + .../pytorch/spconv/spconv/utils/__init__.py | 112 + .../pytorch/spconv/src/spconv/CMakeLists.txt | 9 + .../pytorch/spconv/src/spconv/all.cc | 34 + .../pytorch/spconv/src/spconv/indice.cc | 90 + .../pytorch/spconv/src/spconv/indice.cu | 158 + .../pytorch/spconv/src/spconv/maxpool.cc | 82 + .../pytorch/spconv/src/spconv/maxpool.cu | 472 + .../pytorch/spconv/src/spconv/reordering.cc | 70 + .../pytorch/spconv/src/spconv/reordering.cu | 155 + .../pytorch/spconv/src/utils/CMakeLists.txt | 22 + .../pytorch/spconv/src/utils/all.cc | 54 + .../pytorch/spconv/src/utils/nms.cu | 165 + .../pytorch/spconv/test/CMakeLists.txt | 27 + .../pytorch/spconv/test/src/catch_main.cpp | 15 + .../spconv/test/src/test_conv_rule.cpp | 127 + .../pytorch/spconv/test/test_SparseConv2d.py | 125 + .../pytorch/spconv/test/test_SparseConv3d.py | 160 + .../spconv/test/test_SparseConvTensor.py | 20 + .../spconv/test/test_SparseInverseConv2d.py | 90 + .../spconv/test/test_SparseInverseConv3d.py | 91 + .../pytorch/spconv/test/test_SubMConv2d.py | 126 + .../pytorch/spconv/test/test_SubMConv3d.py | 123 + .../pytorch/spconv/test/test_conv.py | 618 + .../spconv/third_party/catch2/catch.hpp | 14020 ++++++++++++++++ .../spconv/third_party/pybind11/.appveyor.yml | 35 + .../spconv/third_party/pybind11/.clang-format | 38 + .../spconv/third_party/pybind11/.clang-tidy | 75 + .../third_party/pybind11/.cmake-format.yaml | 73 + .../third_party/pybind11/.gitattributes | 1 + .../third_party/pybind11/.github/CODEOWNERS | 9 + .../pybind11/.github/CONTRIBUTING.md | 388 + .../.github/ISSUE_TEMPLATE/bug-report.yml | 45 + .../.github/ISSUE_TEMPLATE/config.yml | 8 + .../pybind11/.github/dependabot.yml | 7 + .../third_party/pybind11/.github/labeler.yml | 8 + .../pybind11/.github/labeler_merged.yml | 3 + .../pybind11/.github/matchers/pylint.json | 32 + .../pybind11/.github/pull_request_template.md | 19 + .../pybind11/.github/workflows/ci.yml | 956 ++ .../pybind11/.github/workflows/configure.yml | 80 + .../pybind11/.github/workflows/format.yml | 55 + .../pybind11/.github/workflows/labeler.yml | 16 + .../pybind11/.github/workflows/pip.yml | 110 + .../pybind11/.github/workflows/upstream.yml | 112 + .../spconv/third_party/pybind11/.gitignore | 45 + .../pybind11/.pre-commit-config.yaml | 170 + .../third_party/pybind11/.readthedocs.yml | 3 + .../third_party/pybind11/CMakeLists.txt | 299 + .../spconv/third_party/pybind11/LICENSE | 29 + .../spconv/third_party/pybind11/MANIFEST.in | 5 + .../spconv/third_party/pybind11/README.rst | 180 + .../spconv/third_party/pybind11/docs/Doxyfile | 21 + .../pybind11/docs/_static/css/custom.css | 3 + .../pybind11/docs/advanced/cast/chrono.rst | 81 + .../pybind11/docs/advanced/cast/custom.rst | 93 + .../pybind11/docs/advanced/cast/eigen.rst | 310 + .../docs/advanced/cast/functional.rst | 109 + .../pybind11/docs/advanced/cast/index.rst | 43 + .../pybind11/docs/advanced/cast/overview.rst | 170 + .../pybind11/docs/advanced/cast/stl.rst | 249 + .../pybind11/docs/advanced/cast/strings.rst | 292 + .../pybind11/docs/advanced/classes.rst | 1335 ++ .../pybind11/docs/advanced/embedding.rst | 262 + .../pybind11/docs/advanced/exceptions.rst | 398 + .../pybind11/docs/advanced/functions.rst | 614 + .../pybind11/docs/advanced/misc.rst | 337 + .../pybind11/docs/advanced/pycpp/index.rst | 13 + .../pybind11/docs/advanced/pycpp/numpy.rst | 455 + .../pybind11/docs/advanced/pycpp/object.rst | 286 + .../docs/advanced/pycpp/utilities.rst | 155 + .../pybind11/docs/advanced/smart_ptrs.rst | 174 + .../third_party/pybind11/docs/basics.rst | 307 + .../third_party/pybind11/docs/benchmark.py | 87 + .../third_party/pybind11/docs/benchmark.rst | 95 + .../third_party/pybind11/docs/changelog.rst | 2468 +++ .../third_party/pybind11/docs/classes.rst | 541 + .../third_party/pybind11/docs/cmake/index.rst | 8 + .../third_party/pybind11/docs/compiling.rst | 638 + .../spconv/third_party/pybind11/docs/conf.py | 369 + .../spconv/third_party/pybind11/docs/faq.rst | 307 + .../third_party/pybind11/docs/index.rst | 48 + .../third_party/pybind11/docs/installing.rst | 105 + .../third_party/pybind11/docs/limitations.rst | 72 + .../pybind11/docs/pybind11-logo.png | Bin 0 -> 61034 bytes .../docs/pybind11_vs_boost_python1.png | Bin 0 -> 44653 bytes .../docs/pybind11_vs_boost_python1.svg | 427 + .../docs/pybind11_vs_boost_python2.png | Bin 0 -> 41121 bytes .../docs/pybind11_vs_boost_python2.svg | 427 + .../third_party/pybind11/docs/reference.rst | 130 + .../third_party/pybind11/docs/release.rst | 97 + .../pybind11/docs/requirements.txt | 6 + .../third_party/pybind11/docs/upgrade.rst | 552 + .../pybind11/include/pybind11/attr.h | 678 + .../pybind11/include/pybind11/buffer_info.h | 193 + .../pybind11/include/pybind11/cast.h | 1665 ++ .../pybind11/include/pybind11/chrono.h | 225 + .../pybind11/include/pybind11/common.h | 2 + .../pybind11/include/pybind11/complex.h | 74 + .../pybind11/include/pybind11/detail/class.h | 742 + .../pybind11/include/pybind11/detail/common.h | 1169 ++ .../pybind11/include/pybind11/detail/descr.h | 158 + .../pybind11/include/pybind11/detail/init.h | 428 + .../include/pybind11/detail/internals.h | 562 + .../pybind11/detail/type_caster_base.h | 1010 ++ .../pybind11/include/pybind11/detail/typeid.h | 65 + .../pybind11/include/pybind11/eigen.h | 708 + .../pybind11/include/pybind11/embed.h | 277 + .../pybind11/include/pybind11/eval.h | 156 + .../pybind11/include/pybind11/functional.h | 130 + .../pybind11/include/pybind11/gil.h | 202 + .../pybind11/include/pybind11/iostream.h | 265 + .../pybind11/include/pybind11/numpy.h | 1984 +++ .../pybind11/include/pybind11/operators.h | 201 + .../pybind11/include/pybind11/options.h | 76 + .../pybind11/include/pybind11/pybind11.h | 2864 ++++ .../pybind11/include/pybind11/pytypes.h | 2392 +++ .../pybind11/include/pybind11/stl.h | 425 + .../include/pybind11/stl/filesystem.h | 116 + .../pybind11/include/pybind11/stl_bind.h | 785 + .../spconv/third_party/pybind11/noxfile.py | 97 + .../third_party/pybind11/pybind11/__init__.py | 16 + .../third_party/pybind11/pybind11/__main__.py | 49 + .../third_party/pybind11/pybind11/_version.py | 12 + .../third_party/pybind11/pybind11/commands.py | 25 + .../third_party/pybind11/pybind11/py.typed | 0 .../pybind11/pybind11/setup_helpers.py | 504 + .../third_party/pybind11/pyproject.toml | 61 + .../spconv/third_party/pybind11/setup.cfg | 50 + .../spconv/third_party/pybind11/setup.py | 149 + .../third_party/pybind11/tests/CMakeLists.txt | 558 + .../third_party/pybind11/tests/conftest.py | 213 + .../pybind11/tests/constructor_stats.h | 322 + .../pybind11/tests/cross_module_gil_utils.cpp | 45 + ...s_module_interleaved_error_already_set.cpp | 51 + .../spconv/third_party/pybind11/tests/env.py | 28 + .../tests/extra_python_package/pytest.ini | 0 .../tests/extra_python_package/test_files.py | 274 + .../tests/extra_setuptools/pytest.ini | 0 .../extra_setuptools/test_setuphelper.py | 151 + .../pybind11/tests/local_bindings.h | 92 + .../third_party/pybind11/tests/object.h | 205 + .../tests/pybind11_cross_module_tests.cpp | 149 + .../pybind11/tests/pybind11_tests.cpp | 117 + .../pybind11/tests/pybind11_tests.h | 85 + .../third_party/pybind11/tests/pytest.ini | 22 + .../pybind11/tests/requirements.txt | 9 + .../third_party/pybind11/tests/test_async.cpp | 25 + .../third_party/pybind11/tests/test_async.py | 24 + .../pybind11/tests/test_buffers.cpp | 224 + .../pybind11/tests/test_buffers.py | 163 + .../pybind11/tests/test_builtin_casters.cpp | 382 + .../pybind11/tests/test_builtin_casters.py | 526 + .../pybind11/tests/test_call_policies.cpp | 115 + .../pybind11/tests/test_call_policies.py | 247 + .../pybind11/tests/test_callbacks.cpp | 243 + .../pybind11/tests/test_callbacks.py | 195 + .../pybind11/tests/test_chrono.cpp | 81 + .../third_party/pybind11/tests/test_chrono.py | 209 + .../third_party/pybind11/tests/test_class.cpp | 619 + .../third_party/pybind11/tests/test_class.py | 471 + .../tests/test_cmake_build/CMakeLists.txt | 84 + .../pybind11/tests/test_cmake_build/embed.cpp | 23 + .../installed_embed/CMakeLists.txt | 28 + .../installed_function/CMakeLists.txt | 39 + .../installed_target/CMakeLists.txt | 46 + .../pybind11/tests/test_cmake_build/main.cpp | 6 + .../subdirectory_embed/CMakeLists.txt | 41 + .../subdirectory_function/CMakeLists.txt | 35 + .../subdirectory_target/CMakeLists.txt | 41 + .../pybind11/tests/test_cmake_build/test.py | 8 + .../pybind11/tests/test_const_name.cpp | 55 + .../pybind11/tests/test_const_name.py | 29 + .../tests/test_constants_and_functions.cpp | 159 + .../tests/test_constants_and_functions.py | 52 + .../pybind11/tests/test_copy_move.cpp | 295 + .../pybind11/tests/test_copy_move.py | 132 + .../tests/test_custom_type_casters.cpp | 209 + .../tests/test_custom_type_casters.py | 120 + .../pybind11/tests/test_custom_type_setup.cpp | 41 + .../pybind11/tests/test_custom_type_setup.py | 48 + .../pybind11/tests/test_docstring_options.cpp | 88 + .../pybind11/tests/test_docstring_options.py | 41 + .../third_party/pybind11/tests/test_eigen.cpp | 401 + .../third_party/pybind11/tests/test_eigen.py | 775 + .../pybind11/tests/test_embed/CMakeLists.txt | 47 + .../pybind11/tests/test_embed/catch.cpp | 27 + .../tests/test_embed/external_module.cpp | 20 + .../tests/test_embed/test_interpreter.cpp | 395 + .../tests/test_embed/test_interpreter.py | 14 + .../tests/test_embed/test_trampoline.py | 16 + .../third_party/pybind11/tests/test_enum.cpp | 133 + .../third_party/pybind11/tests/test_enum.py | 264 + .../third_party/pybind11/tests/test_eval.cpp | 118 + .../third_party/pybind11/tests/test_eval.py | 50 + .../pybind11/tests/test_eval_call.py | 4 + .../pybind11/tests/test_exceptions.cpp | 337 + .../pybind11/tests/test_exceptions.h | 13 + .../pybind11/tests/test_exceptions.py | 362 + .../tests/test_factory_constructors.cpp | 430 + .../tests/test_factory_constructors.py | 516 + .../pybind11/tests/test_gil_scoped.cpp | 47 + .../pybind11/tests/test_gil_scoped.py | 93 + .../pybind11/tests/test_iostream.cpp | 126 + .../pybind11/tests/test_iostream.py | 295 + .../tests/test_kwargs_and_defaults.cpp | 273 + .../tests/test_kwargs_and_defaults.py | 390 + .../pybind11/tests/test_local_bindings.cpp | 106 + .../pybind11/tests/test_local_bindings.py | 256 + .../tests/test_methods_and_attributes.cpp | 459 + .../tests/test_methods_and_attributes.py | 527 + .../pybind11/tests/test_modules.cpp | 125 + .../pybind11/tests/test_modules.py | 121 + .../tests/test_multiple_inheritance.cpp | 341 + .../tests/test_multiple_inheritance.py | 493 + .../pybind11/tests/test_numpy_array.cpp | 524 + .../pybind11/tests/test_numpy_array.py | 587 + .../pybind11/tests/test_numpy_dtypes.cpp | 614 + .../pybind11/tests/test_numpy_dtypes.py | 446 + .../pybind11/tests/test_numpy_vectorize.cpp | 107 + .../pybind11/tests/test_numpy_vectorize.py | 266 + .../pybind11/tests/test_opaque_types.cpp | 77 + .../pybind11/tests/test_opaque_types.py | 58 + .../tests/test_operator_overloading.cpp | 288 + .../tests/test_operator_overloading.py | 152 + .../pybind11/tests/test_pickling.cpp | 194 + .../pybind11/tests/test_pickling.py | 93 + .../pybind11/tests/test_pytypes.cpp | 759 + .../pybind11/tests/test_pytypes.py | 741 + .../tests/test_sequences_and_iterators.cpp | 562 + .../tests/test_sequences_and_iterators.py | 243 + .../pybind11/tests/test_smart_ptr.cpp | 470 + .../pybind11/tests/test_smart_ptr.py | 315 + .../third_party/pybind11/tests/test_stl.cpp | 545 + .../third_party/pybind11/tests/test_stl.py | 377 + .../pybind11/tests/test_stl_binders.cpp | 152 + .../pybind11/tests/test_stl_binders.py | 311 + .../tests/test_tagbased_polymorphic.cpp | 147 + .../tests/test_tagbased_polymorphic.py | 28 + .../pybind11/tests/test_thread.cpp | 66 + .../third_party/pybind11/tests/test_thread.py | 42 + .../third_party/pybind11/tests/test_union.cpp | 22 + .../third_party/pybind11/tests/test_union.py | 8 + .../pybind11/tests/test_virtual_functions.cpp | 591 + .../pybind11/tests/test_virtual_functions.py | 459 + .../pybind11/tests/valgrind-numpy-scipy.supp | 140 + .../pybind11/tests/valgrind-python.supp | 117 + .../pybind11/tools/FindCatch.cmake | 72 + .../pybind11/tools/FindEigen3.cmake | 86 + .../pybind11/tools/FindPythonLibsNew.cmake | 281 + .../third_party/pybind11/tools/check-style.sh | 44 + .../pybind11/tools/cmake_uninstall.cmake.in | 23 + .../third_party/pybind11/tools/libsize.py | 36 + .../pybind11/tools/make_changelog.py | 63 + .../pybind11/tools/pybind11Common.cmake | 385 + .../pybind11/tools/pybind11Config.cmake.in | 231 + .../pybind11/tools/pybind11NewTools.cmake | 254 + .../pybind11/tools/pybind11Tools.cmake | 227 + .../third_party/pybind11/tools/pyproject.toml | 3 + .../pybind11/tools/setup_global.py.in | 59 + .../pybind11/tools/setup_main.py.in | 40 + 1515 files changed, 454893 insertions(+) create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/README.md create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/.coveragerc create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/.flake8 create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/.gitattributes create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/.github/CODEOWNERS create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/.github/ISSUE_TEMPLATE/Bug_report.md create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/.github/ISSUE_TEMPLATE/Feature_request.md create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/.github/ISSUE_TEMPLATE/config.yml create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/.github/ISSUE_TEMPLATE/first_rc_checklist.md create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/.github/ISSUE_TEMPLATE/sub_rc_checklist.md create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/.github/PULL_REQUEST_TEMPLATE.md create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/.github/workflows/stale.yml create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/.gitignore create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/.pre-commit-config.yaml create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/.readthedocs.yml create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/CHANGE_LOG create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/CONTRIBUTING.md create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/LICENSE create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/LICENSES.third-party create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/MANIFEST.in create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/README.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/azure-pipelines.yml create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/bin/numba create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/build_numba.sh create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/appveyor/run_with_env.cmd create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/azure/azure-linux-macos.yml create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/azure/azure-windows.yml create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe.local/bld.bat create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe.local/build.sh create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe.local/conda_build_config.yaml create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe.local/license.txt create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe.local/meta.yaml create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe.local/run_test.bat create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe.local/run_test.sh create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe_clone_icc_rt/bld.bat create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe_clone_icc_rt/build.sh create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe_clone_icc_rt/meta.yaml create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe_clone_icc_rt/scripts/build.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/gpuci/axis.yaml create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/gpuci/build.sh create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/MacOSX10.10.sdk.checksum create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/after_success.sh create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/build.cmd create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/build.sh create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/install_miniconda.sh create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/setup_conda_environment.cmd create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/setup_conda_environment.sh create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/test.cmd create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/test.sh create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/clean_numba.sh create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/codecov.yml create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/contrib/valgrind-numba.supp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/Makefile create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/_static/js/modernizr.min.js create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/_static/numba-blue-icon-rgb.svg create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/_static/numba-white-icon-rgb.svg create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/_static/rtd-overrides.css create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/_templates/EMPTY create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/dagmap/README.md create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/dagmap/dagmap.yaml create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/dagmap/jquery.graphviz.svg.js create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/dagmap/render.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/dagmap/template.html create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/environment.yml create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/gh-pages.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/make.bat create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/requirements.txt create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/_ext/ghfiles.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/conf.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda-reference/host.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda-reference/index.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda-reference/kernel.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda-reference/libdevice.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda-reference/memory.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda-reference/types.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/bindings.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/caching.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/cooperative_groups.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/cuda_array_interface.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/cuda_ffi.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/cudapysupported.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/device-functions.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/device-management.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/examples.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/external-memory.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/faq.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/fastmath.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/index.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/intrinsics.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/ipc.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/kernels.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/laplace_final.svg create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/laplace_initial.svg create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/memory.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/overview.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/random.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/reduction.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/simulator.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/ufunc.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/architecture.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/caching.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/compiler_pass_example.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/contributing.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/custom_pipeline.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/debugging.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/dispatching.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/environment.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/event_api.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/generators.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/hashing.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/index.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/inline_example.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/inline_overload_example.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/inlining.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/listings.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/literal.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/live_variable_analysis.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/llvm_timings.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/mission.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/numba-runtime.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/release.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/repomap.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/rewrites.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/stencil.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/target_extension.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/threading_implementation.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/extending/entrypoints.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/extending/high-level.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/extending/index.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/extending/interval-example.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/extending/low-level.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/extending/mynorm.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/extending/overloading-guide.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/extending/template.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/glossary.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/index.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/cfunc.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/extension-points.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/external-memory-management.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/index.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/integer-typing.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/jit-classes.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/np-where-override.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/recursion_callstack.svg create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/type-inference.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/typing_recursion.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/aot-compilation.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/deprecation.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/envvars.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/fpsemantics.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/index.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/jit-compilation.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/numpysupported.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/pysemantics.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/pysupported.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/types.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/utils.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/release-notes.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/5minguide.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/cfunc.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/cli.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/examples.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/faq.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/generated-jit.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/index.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/installing.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/jit-module.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/jit.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/jitclass.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/overview.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/parallel.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/performance-tips.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/pycc.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/stencil.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/talks.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/threading-layer.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/troubleshoot.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/vectorize.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/withobjmode.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/install_numba.sh create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/mypy.ini create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/__main__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_arraystruct.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_devicearray.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_devicearray.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_dispatcher.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_dynfunc.c create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_dynfuncmod.c create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_hashtable.c create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_hashtable.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_helperlib.c create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_helpermod.c create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_lapack.c create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_npymath_exports.c create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_numba_common.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_pymodule.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_random.c create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_typeof.c create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_typeof.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_unicodetype_db.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_version.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/capsulethunk.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cext/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cext/cext.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cext/dictobject.c create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cext/dictobject.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cext/listobject.c create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cext/listobject.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cext/utils.c create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cloudpickle/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cloudpickle/cloudpickle.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cloudpickle/cloudpickle_fast.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cloudpickle/compat.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/analysis.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/annotations/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/annotations/pretty_annotate.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/annotations/template.html create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/annotations/type_annotations.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/base.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/boxing.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/bytecode.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/byteflow.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/caching.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/callconv.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/callwrapper.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/ccallback.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/cgutils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/codegen.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/compiler.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/compiler_lock.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/compiler_machinery.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/config.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/consts.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/controlflow.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/cpu.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/cpu_options.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/dataflow.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/datamodel/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/datamodel/manager.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/datamodel/models.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/datamodel/packer.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/datamodel/registry.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/datamodel/testing.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/debuginfo.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/decorators.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/descriptors.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/dispatcher.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/entrypoints.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/environment.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/errors.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/event.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/extending.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/externals.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/fastmathpass.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/funcdesc.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/generators.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/imputils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/inline_closurecall.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/interpreter.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/intrinsics.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/ir.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/ir_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/itanium_mangler.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/llvm_bindings.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/lowering.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/object_mode_passes.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/optional.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/options.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/overload_glue.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/postproc.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/pylowering.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/pythonapi.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/registry.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/removerefctpass.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/retarget.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/rewrites/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/rewrites/ir_print.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/rewrites/registry.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/rewrites/static_binop.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/rewrites/static_getitem.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/rewrites/static_raise.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/_nrt_python.c create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/_nrt_pythonmod.c create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/context.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/nrt.c create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/nrt.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/nrt.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/nrt_external.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/nrtdynmod.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/nrtopt.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/serialize.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/sigutils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/ssa.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/target_extension.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/targetconfig.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/tracing.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/transforms.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/typeconv/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/typeconv/_typeconv.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/typeconv/castgraph.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/typeconv/rules.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/typeconv/test.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/typeconv/typeconv.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/typeconv/typeconv.hpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/typeconv/typeconv.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/typed_passes.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/typeinfer.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/types/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/types/abstract.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/types/common.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/types/containers.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/types/function_type.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/types/functions.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/types/iterators.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/types/misc.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/types/npytypes.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/types/scalars.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/typing/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/typing/arraydecl.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/typing/asnumbatype.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/typing/bufproto.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/typing/builtins.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/typing/cffi_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/typing/cmathdecl.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/typing/collections.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/typing/context.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/typing/ctypes_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/typing/dictdecl.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/typing/enumdecl.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/typing/listdecl.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/typing/mathdecl.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/typing/npdatetime.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/typing/npydecl.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/typing/randomdecl.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/typing/setdecl.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/typing/templates.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/typing/typeof.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/unsafe/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/unsafe/bytes.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/unsafe/eh.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/unsafe/nrt.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/unsafe/refcount.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/untyped_passes.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/withcontexts.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cpython/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cpython/builtins.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cpython/charseq.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cpython/cmathimpl.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cpython/enumimpl.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cpython/hashing.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cpython/heapq.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cpython/iterators.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cpython/listobj.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cpython/mathimpl.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cpython/numbers.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cpython/printimpl.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cpython/randomimpl.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cpython/rangeobj.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cpython/setobj.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cpython/slicing.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cpython/tupleobj.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cpython/unicode.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cpython/unicode_support.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cpython/unsafe/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cpython/unsafe/numbers.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cpython/unsafe/tuple.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/api.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/api_util.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/args.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/codegen.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/compiler.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/cuda_paths.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/cudadecl.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/cudadrv/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/cudadrv/_extras.c create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/cudadrv/devicearray.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/cudadrv/devices.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/cudadrv/driver.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/cudadrv/drvapi.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/cudadrv/enums.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/cudadrv/error.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/cudadrv/libs.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/cudadrv/ndarray.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/cudadrv/nvvm.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/cudadrv/rtapi.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/cudadrv/runtime.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/cudaimpl.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/cudamath.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/decorators.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/descriptor.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/device_init.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/dispatcher.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/errors.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/initialize.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/intrinsic_wrapper.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/kernels/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/kernels/reduction.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/kernels/transpose.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/libdevice.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/libdevicedecl.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/libdevicefuncs.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/libdeviceimpl.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/mathimpl.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/models.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/nvvmutils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/printimpl.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/random.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/simulator/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/simulator/api.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/simulator/compiler.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/simulator/cudadrv/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/simulator/cudadrv/devicearray.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/simulator/cudadrv/devices.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/simulator/cudadrv/driver.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/simulator/cudadrv/drvapi.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/simulator/cudadrv/error.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/simulator/cudadrv/libs.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/simulator/cudadrv/nvvm.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/simulator/cudadrv/runtime.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/simulator/kernel.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/simulator/kernelapi.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/simulator/reduction.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/simulator/vector_types.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/simulator_init.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/stubs.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/target.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/testing.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/data/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/data/cuda_include.cu create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/data/error.cu create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/data/jitlink.cu create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/data/jitlink.ptx create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/data/warn.cu create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/test_array_attr.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/test_context_stack.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/test_cuda_auto_context.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/test_cuda_driver.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/test_cuda_libraries.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/test_cuda_memory.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/test_cuda_ndarray.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/test_deallocations.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/test_detect.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/test_emm_plugins.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/test_events.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/test_host_alloc.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/test_init.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/test_inline_ptx.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/test_ir_patch.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/test_linker.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/test_managed_alloc.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/test_nvvm_driver.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/test_pinned.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/test_profiler.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/test_ptds.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/test_reset_device.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/test_runtime.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/test_select_device.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudadrv/test_streams.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/cache_usecases.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/extensions_usecases.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/jitlink.ptx create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/recursion_usecases.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_alignment.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_array.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_array_args.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_array_methods.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_atomics.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_blackscholes.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_boolean.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_caching.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_casting.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_compiler.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_complex.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_complex_kernel.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_const_string.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_constmem.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_cooperative_groups.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_cuda_array_interface.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_datetime.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_debug.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_debuginfo.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_device_func.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_dispatcher.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_enums.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_errors.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_exception.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_extending.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_fastmath.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_forall.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_freevar.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_frexp_ldexp.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_globals.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_gufunc.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_gufunc_scalar.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_gufunc_scheduling.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_idiv.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_inspect.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_intrinsics.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_ipc.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_iterators.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_lang.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_laplace.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_libdevice.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_lineinfo.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_localmem.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_mandel.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_math.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_matmul.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_minmax.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_montecarlo.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_multigpu.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_multiprocessing.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_multithreads.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_nondet.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_operator.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_optimization.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_overload.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_powi.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_print.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_py2_div_issue.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_random.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_record_dtype.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_recursion.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_reduction.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_serialize.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_slicing.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_sm.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_sm_creation.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_sync.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_transpose.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_userexc.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_vector_type.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_vectorize.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_vectorize_complex.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_vectorize_decor.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_vectorize_device.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_warning.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudapy/test_warp_ops.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudasim/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudasim/support.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/cudasim/test_cudasim_issues.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/doc_examples/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/doc_examples/ffi/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/doc_examples/ffi/functions.cu create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/doc_examples/test_cg.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/doc_examples/test_ffi.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/doc_examples/test_laplace.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/doc_examples/test_matmul.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/doc_examples/test_montecarlo.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/doc_examples/test_random.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/doc_examples/test_reduction.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/doc_examples/test_sessionize.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/doc_examples/test_vecadd.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/nocuda/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/nocuda/test_import.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/nocuda/test_library_lookup.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/tests/nocuda/test_nvvm.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/types.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/vector_types.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cuda/vectorizers.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/experimental/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/experimental/function_type.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/experimental/jitclass/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/experimental/jitclass/_box.c create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/experimental/jitclass/base.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/experimental/jitclass/boxing.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/experimental/jitclass/decorators.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/experimental/jitclass/overloads.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/experimental/structref.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/extending.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/mathnames.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/misc/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/misc/appdirs.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/misc/cffiimpl.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/misc/cmdlang.gdb create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/misc/dummyarray.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/misc/dump_style.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/misc/findlib.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/misc/firstlinefinder.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/misc/gdb_hook.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/misc/gdb_print_extension.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/misc/help/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/misc/help/inspector.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/misc/init_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/misc/inspection.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/misc/literal.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/misc/llvm_pass_timings.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/misc/mergesort.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/misc/numba_entry.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/misc/numba_gdbinfo.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/misc/numba_sysinfo.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/misc/quicksort.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/misc/special.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/misc/timsort.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/mviewbuf.c create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/arraymath.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/arrayobj.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/extensions.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/linalg.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/npdatetime.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/npdatetime_helpers.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/npyfuncs.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/npyimpl.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/numpy_support.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/polynomial.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/random/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/random/generator_core.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/random/generator_methods.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/ufunc/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/ufunc/_internal.c create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/ufunc/_internal.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/ufunc/_num_threads.c create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/ufunc/_ufunc.c create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/ufunc/array_exprs.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/ufunc/decorators.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/ufunc/deviceufunc.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/ufunc/dufunc.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/ufunc/gufunc.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/ufunc/gufunc_scheduler.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/ufunc/gufunc_scheduler.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/ufunc/omppool.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/ufunc/parallel.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/ufunc/sigparse.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/ufunc/tbbpool.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/ufunc/ufuncbuilder.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/ufunc/workqueue.c create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/ufunc/workqueue.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/ufunc/wrappers.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/ufunc_db.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/unsafe/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/np/unsafe/ndarray.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/parfors/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/parfors/array_analysis.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/parfors/parfor.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/parfors/parfor_lowering.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/parfors/parfor_lowering_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/pycc/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/pycc/cc.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/pycc/compiler.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/pycc/decorators.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/pycc/llvm_types.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/pycc/modulemixin.c create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/pycc/platform.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/pycc/pycc create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/runtests.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/scripts/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/scripts/generate_lower_listing.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/stencils/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/stencils/stencil.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/stencils/stencilparfor.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/testing/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/testing/__main__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/testing/_runtests.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/testing/loader.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/testing/main.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/testing/notebook.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/annotation_usecases.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/cache_usecases.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/cffi_usecases.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/cfunc_cache_usecases.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/cloudpickle_main_class.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/compile_with_pycc.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/complex_usecases.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/ctypes_usecases.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/doc_examples/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/doc_examples/test_examples.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/doc_examples/test_jitclass.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/doc_examples/test_literal_container_usage.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/doc_examples/test_literally_usage.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/doc_examples/test_llvm_pass_timings.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/doc_examples/test_numpy_generators.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/doc_examples/test_parallel_chunksize.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/doc_examples/test_rec_array.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/doc_examples/test_structref_usage.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/doc_examples/test_typed_dict_usage.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/doc_examples/test_typed_list_usage.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/dummy_module.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/enum_usecases.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/error_usecases.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/gdb/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/gdb/test_array_arg.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/gdb/test_basic.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/gdb/test_break_on_symbol.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/gdb/test_break_on_symbol_version.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/gdb/test_conditional_breakpoint.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/gdb/test_pretty_print.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/gdb_support.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/inlining_usecases.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/matmul_usecase.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/npyufunc/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/npyufunc/cache_usecases.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/npyufunc/test_caching.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/npyufunc/test_dufunc.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/npyufunc/test_errors.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/npyufunc/test_gufunc.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/npyufunc/test_parallel_env_variable.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/npyufunc/test_parallel_low_work.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/npyufunc/test_parallel_ufunc_issues.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/npyufunc/test_ufunc.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/npyufunc/test_ufuncbuilding.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/npyufunc/test_vectorize_decor.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/orphaned_semaphore_usecase.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/overload_usecases.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/parfors_cache_usecases.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/parfors_max_label_error.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/pdlike_usecase.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/pycc_distutils_usecase/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/pycc_distutils_usecase/nested/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/pycc_distutils_usecase/nested/source_module.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/pycc_distutils_usecase/setup_distutils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/pycc_distutils_usecase/setup_distutils_nested.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/pycc_distutils_usecase/setup_setuptools.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/pycc_distutils_usecase/setup_setuptools_nested.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/pycc_distutils_usecase/source_module.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/recursion_usecases.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/serialize_usecases.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/support.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_alignment.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_analysis.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_annotations.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_api.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_array_analysis.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_array_attr.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_array_constants.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_array_exprs.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_array_iterators.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_array_manipulation.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_array_methods.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_array_reductions.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_array_return.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_asnumbatype.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_auto_constants.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_blackscholes.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_boundscheck.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_buffer_protocol.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_builtins.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_byteflow.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_caching.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_casting.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_cffi.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_cfunc.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_cgutils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_chained_assign.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_chrome_trace.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_cli.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_closure.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_codegen.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_compile_cache.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_compiler_flags.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_compiler_lock.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_complex.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_comprehension.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_conditions_as_predicates.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_config.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_conversion.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_copy_propagate.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_ctypes.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_dataflow.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_datamodel.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_debug.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_debuginfo.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_deprecations.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_dictimpl.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_dictobject.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_dicts.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_dispatcher.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_dummyarray.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_dyn_array.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_dyn_func.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_entrypoints.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_enums.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_errorhandling.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_errormodels.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_event.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_exceptions.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_extended_arg.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_extending.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_extending_types.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_fancy_indexing.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_fastmath.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_firstlinefinder.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_flow_control.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_func_interface.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_func_lifetime.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_funcdesc.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_function_type.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_gdb_bindings.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_gdb_dwarf.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_generators.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_gil.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_globals.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_hashing.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_heapq.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_help.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_import.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_indexing.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_init_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_inlining.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_interpreter.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_interproc.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_intwidth.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_ir.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_ir_inlining.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_ir_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_itanium_mangler.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_iteration.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_jit_module.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_jitclasses.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_jitmethod.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_linalg.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_listimpl.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_listobject.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_lists.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_literal_dispatch.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_llvm_pass_timings.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_llvm_version_check.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_locals.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_looplifting.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_make_function_to_jit_function.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_mandelbrot.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_mangling.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_map_filter_reduce.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_mathlib.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_maxmin.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_mixed_tuple_unroller.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_moved_modules.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_multi3.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_nan.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_ndarray_subclasses.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_nested_calls.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_np_functions.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_np_randomgen.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_npdatetime.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_nrt.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_nrt_refct.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_num_threads.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_numberctor.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_numbers.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_numconv.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_numpy_support.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_numpyadapt.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_obj_lifetime.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_object_mode.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_objects.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_operators.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_optional.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_overlap.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_parallel_backend.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_parfors.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_parfors_caching.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_parfors_passes.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_pipeline.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_polynomial.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_practical_lowering_issues.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_print.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_profiler.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_pycc.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_python_int.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_random.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_range.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_recarray_usecases.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_record_dtype.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_recursion.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_refop_pruning.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_remove_dead.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_retargeting.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_return_values.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_runtests.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_serialize.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_sets.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_slices.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_sort.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_ssa.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_stencils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_storeslice.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_struct_ref.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_support.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_svml.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_sys_stdin_assignment.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_sysinfo.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_target_extension.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_target_overloadselector.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_threadsafety.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_tracing.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_try_except.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_tuples.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_typeconv.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_typedlist.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_typedobjectutils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_typeguard.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_typeinfer.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_typenames.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_typeof.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_types.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_typingerror.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_ufuncs.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_unicode.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_unicode_array.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_unicode_names.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_unpack_sequence.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_unpickle_without_module.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_unsafe_intrinsics.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_usecases.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_vectorization.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_vectorization_type_inference.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_warnings.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_withlifting.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/test_wrapper.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/threading_backend_usecases.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/tests/usecases.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/typed/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/typed/dictimpl.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/typed/dictobject.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/typed/listobject.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/typed/py.typed create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/typed/typeddict.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/typed/typedlist.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/typed/typedobjectutils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/types/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/requirements.txt create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/runtests.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/setup.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/version.txt create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/numba/versioneer.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/.github/workflows/close_stale_issues.yml create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/.gitignore create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/LICENSE create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/README-ILUVATAR.md create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/README.md create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/build_openpcdet.sh create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/clean_openpcdet.sh create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/docker/Dockerfile create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/docker/README.md create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/docker/cu116.Dockerfile create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/docs/CUSTOM_DATASET_TUTORIAL.md create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/docs/DEMO.md create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/docs/GETTING_STARTED.md create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/docs/INSTALL.md create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/docs/changelog.md create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/docs/dataset_vs_model.png create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/docs/demo.png create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/docs/guidelines_of_approaches/bevfusion.md create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/docs/guidelines_of_approaches/mppnet.md create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/docs/model_framework.png create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/docs/multiple_models_demo.png create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/docs/open_mmlab.png create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/install_openpcdet.sh create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/config.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/argo2/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/argo2/argo2_dataset.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/argo2/argo2_utils/constants.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/argo2/argo2_utils/so3.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/augmentor/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/augmentor/augmentor_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/augmentor/data_augmentor.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/augmentor/database_sampler.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/custom/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/custom/custom_dataset.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/dataset.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/kitti/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/kitti/kitti_dataset.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/kitti/kitti_object_eval_python/LICENSE create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/kitti/kitti_object_eval_python/README.md create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/kitti/kitti_object_eval_python/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/kitti/kitti_object_eval_python/eval.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/kitti/kitti_object_eval_python/evaluate.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/kitti/kitti_object_eval_python/kitti_common.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/kitti/kitti_object_eval_python/rotate_iou.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/kitti/kitti_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/lyft/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/lyft/lyft_dataset.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/lyft/lyft_mAP_eval/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/lyft/lyft_mAP_eval/lyft_eval.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/lyft/lyft_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/nuscenes/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/nuscenes/nuscenes_dataset.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/nuscenes/nuscenes_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/once/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/once/once_dataset.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/once/once_eval/eval_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/once/once_eval/evaluation.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/once/once_eval/iou_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/once/once_toolkits.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/pandaset/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/pandaset/pandaset_dataset.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/processor/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/processor/data_processor.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/processor/point_feature_encoder.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/waymo/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/waymo/waymo_dataset.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/waymo/waymo_eval.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/datasets/waymo/waymo_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_2d/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_2d/base_bev_backbone.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_2d/fuser/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_2d/fuser/convfuser.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_2d/map_to_bev/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_2d/map_to_bev/conv2d_collapse.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_2d/map_to_bev/height_compression.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_2d/map_to_bev/pointpillar_scatter.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/dsvt.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/focal_sparse_conv/SemanticSeg/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/focal_sparse_conv/SemanticSeg/basic_blocks.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/focal_sparse_conv/SemanticSeg/pyramid_ffn.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/focal_sparse_conv/SemanticSeg/sem_deeplabv3.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/focal_sparse_conv/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/focal_sparse_conv/focal_sparse_conv.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/focal_sparse_conv/focal_sparse_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/pfe/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/pfe/voxel_set_abstraction.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/pointnet2_backbone.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/spconv_backbone.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/spconv_backbone_2d.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/spconv_backbone_focal.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/spconv_backbone_voxelnext.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/spconv_backbone_voxelnext2d.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/spconv_unet.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/vfe/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/vfe/dynamic_mean_vfe.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/vfe/dynamic_pillar_vfe.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/vfe/dynamic_voxel_vfe.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/vfe/image_vfe.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/vfe/image_vfe_modules/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/vfe/image_vfe_modules/f2v/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/vfe/image_vfe_modules/f2v/frustum_grid_generator.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/vfe/image_vfe_modules/f2v/frustum_to_voxel.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/vfe/image_vfe_modules/f2v/sampler.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/vfe/image_vfe_modules/ffn/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/vfe/image_vfe_modules/ffn/ddn/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/vfe/image_vfe_modules/ffn/ddn/ddn_deeplabv3.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/vfe/image_vfe_modules/ffn/ddn/ddn_template.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/vfe/image_vfe_modules/ffn/ddn_loss/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/vfe/image_vfe_modules/ffn/ddn_loss/balancer.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/vfe/image_vfe_modules/ffn/ddn_loss/ddn_loss.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/vfe/image_vfe_modules/ffn/depth_ffn.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/vfe/mean_vfe.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/vfe/pillar_vfe.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_3d/vfe/vfe_template.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_image/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_image/img_neck/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_image/img_neck/generalized_lss.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/backbones_image/swin.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/dense_heads/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/dense_heads/anchor_head_multi.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/dense_heads/anchor_head_single.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/dense_heads/anchor_head_template.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/dense_heads/center_head.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/dense_heads/point_head_box.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/dense_heads/point_head_simple.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/dense_heads/point_head_template.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/dense_heads/point_intra_part_head.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/dense_heads/target_assigner/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/dense_heads/target_assigner/anchor_generator.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/dense_heads/target_assigner/atss_target_assigner.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/dense_heads/target_assigner/axis_aligned_target_assigner.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/dense_heads/target_assigner/hungarian_assigner.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/dense_heads/transfusion_head.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/dense_heads/voxelnext_head.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/detectors/PartA2_net.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/detectors/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/detectors/bevfusion.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/detectors/caddn.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/detectors/centerpoint.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/detectors/detector3d_template.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/detectors/mppnet.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/detectors/mppnet_e2e.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/detectors/pillarnet.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/detectors/point_rcnn.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/detectors/pointpillar.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/detectors/pv_rcnn.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/detectors/pv_rcnn_plusplus.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/detectors/second_net.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/detectors/second_net_iou.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/detectors/transfusion.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/detectors/voxel_rcnn.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/detectors/voxelnext.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/model_utils/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/model_utils/basic_block_2d.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/model_utils/centernet_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/model_utils/dsvt_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/model_utils/model_nms_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/model_utils/mppnet_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/model_utils/swin_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/model_utils/transfusion_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/roi_heads/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/roi_heads/mppnet_head.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/roi_heads/mppnet_memory_bank_e2e.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/roi_heads/partA2_head.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/roi_heads/pointrcnn_head.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/roi_heads/pvrcnn_head.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/roi_heads/roi_head_template.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/roi_heads/second_head.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/roi_heads/target_assigner/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/roi_heads/target_assigner/proposal_target_layer.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/roi_heads/voxelrcnn_head.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/view_transforms/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/models/view_transforms/depth_lss.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/bev_pool/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/bev_pool/bev_pool.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/bev_pool/src/bev_pool.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/bev_pool/src/bev_pool_cuda.cu create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/ingroup_inds/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/ingroup_inds/ingroup_inds_op.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/ingroup_inds/src/error.cuh create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/ingroup_inds/src/ingroup_inds.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/ingroup_inds/src/ingroup_inds_kernel.cu create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/iou3d_nms/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/iou3d_nms/iou3d_nms_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/iou3d_nms/src/iou3d_cpu.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/iou3d_nms/src/iou3d_cpu.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/iou3d_nms/src/iou3d_nms.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/iou3d_nms/src/iou3d_nms.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/iou3d_nms/src/iou3d_nms_api.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/iou3d_nms/src/iou3d_nms_kernel.cu create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_batch/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_batch/pointnet2_modules.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_batch/pointnet2_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_batch/src/ball_query.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_batch/src/ball_query_gpu.cu create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_batch/src/ball_query_gpu.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_batch/src/cuda_utils.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_batch/src/group_points.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_batch/src/group_points_gpu.cu create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_batch/src/group_points_gpu.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_batch/src/interpolate.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_batch/src/interpolate_gpu.cu create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_batch/src/interpolate_gpu.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_batch/src/pointnet2_api.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_batch/src/sampling.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_batch/src/sampling_gpu.cu create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_batch/src/sampling_gpu.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_stack/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_stack/pointnet2_modules.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_stack/pointnet2_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_stack/src/ball_query.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_stack/src/ball_query_gpu.cu create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_stack/src/ball_query_gpu.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_stack/src/cuda_utils.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_stack/src/group_points.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_stack/src/group_points_gpu.cu create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_stack/src/group_points_gpu.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_stack/src/interpolate.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_stack/src/interpolate_gpu.cu create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_stack/src/interpolate_gpu.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_stack/src/pointnet2_api.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_stack/src/sampling.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_stack/src/sampling_gpu.cu create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_stack/src/sampling_gpu.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_stack/src/vector_pool.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_stack/src/vector_pool_gpu.cu create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_stack/src/vector_pool_gpu.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_stack/src/voxel_query.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_stack/src/voxel_query_gpu.cu create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_stack/src/voxel_query_gpu.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_stack/voxel_pool_modules.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/pointnet2/pointnet2_stack/voxel_query_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/roiaware_pool3d/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/roiaware_pool3d/roiaware_pool3d_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/roipoint_pool3d/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/roipoint_pool3d/roipoint_pool3d_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/roipoint_pool3d/src/roipoint_pool3d.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/ops/roipoint_pool3d/src/roipoint_pool3d_kernel.cu create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/utils/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/utils/box_coder_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/utils/box_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/utils/calibration_kitti.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/utils/common_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/utils/commu_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/utils/loss_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/utils/object3d_custom.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/utils/object3d_kitti.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/utils/spconv_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/pcdet/utils/transform_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/requirements.txt create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/setup.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/tools/_init_path.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/tools/demo.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/tools/eval_utils/eval_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/tools/process_tools/create_integrated_database.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/tools/scripts/slurm_test_mgpu.sh create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/tools/scripts/slurm_test_single.sh create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/tools/scripts/slurm_train.sh create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/tools/scripts/slurm_train_v2.sh create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/tools/scripts/torch_train.sh create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/tools/test.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/tools/train.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/tools/train_utils/optimization/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/tools/train_utils/optimization/fastai_optim.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/tools/train_utils/optimization/learning_schedules_fastai.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/tools/train_utils/train_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/tools/visual_utils/open3d_vis_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/openpcdet/tools/visual_utils/visualize_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/.gitignore create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/.gitmodules create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/CMakeLists.txt create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/LICENSE create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/README-ILUVATAR.md create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/README.md create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/build_spconv.sh create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/clean_spconv.sh create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/include/paramsgrid.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/include/prettyprint.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/include/pybind11_utils.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/include/spconv/box_iou.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/include/spconv/geometry.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/include/spconv/indice.cu.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/include/spconv/indice.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/include/spconv/maxpool.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/include/spconv/mp_helper.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/include/spconv/nms.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/include/spconv/nms_gpu.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/include/spconv/point2voxel.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/include/spconv/pool_ops.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/include/spconv/reordering.cu.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/include/spconv/reordering.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/include/spconv/spconv_ops.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/include/tensorview/helper_kernel.cu.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/include/tensorview/helper_launch.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/include/tensorview/tensorview.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/include/torch_utils.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/include/utility/timer.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/install_spconv.sh create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/setup.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/spconv/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/spconv/conv.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/spconv/functional.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/spconv/modules.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/spconv/ops.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/spconv/pool.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/spconv/test_utils.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/spconv/utils/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/src/spconv/CMakeLists.txt create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/src/spconv/all.cc create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/src/spconv/indice.cc create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/src/spconv/indice.cu create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/src/spconv/maxpool.cc create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/src/spconv/maxpool.cu create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/src/spconv/reordering.cc create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/src/spconv/reordering.cu create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/src/utils/CMakeLists.txt create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/src/utils/all.cc create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/src/utils/nms.cu create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/test/CMakeLists.txt create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/test/src/catch_main.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/test/src/test_conv_rule.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/test/test_SparseConv2d.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/test/test_SparseConv3d.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/test/test_SparseConvTensor.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/test/test_SparseInverseConv2d.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/test/test_SparseInverseConv3d.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/test/test_SubMConv2d.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/test/test_SubMConv3d.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/test/test_conv.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/catch2/catch.hpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/.appveyor.yml create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/.clang-format create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/.clang-tidy create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/.cmake-format.yaml create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/.gitattributes create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/.github/CODEOWNERS create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/.github/CONTRIBUTING.md create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/.github/ISSUE_TEMPLATE/bug-report.yml create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/.github/ISSUE_TEMPLATE/config.yml create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/.github/dependabot.yml create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/.github/labeler.yml create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/.github/labeler_merged.yml create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/.github/matchers/pylint.json create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/.github/pull_request_template.md create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/.github/workflows/ci.yml create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/.github/workflows/configure.yml create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/.github/workflows/format.yml create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/.github/workflows/labeler.yml create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/.github/workflows/pip.yml create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/.github/workflows/upstream.yml create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/.gitignore create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/.pre-commit-config.yaml create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/.readthedocs.yml create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/CMakeLists.txt create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/LICENSE create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/MANIFEST.in create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/README.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/Doxyfile create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/_static/css/custom.css create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/advanced/cast/chrono.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/advanced/cast/custom.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/advanced/cast/eigen.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/advanced/cast/functional.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/advanced/cast/index.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/advanced/cast/overview.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/advanced/cast/stl.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/advanced/cast/strings.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/advanced/classes.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/advanced/embedding.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/advanced/exceptions.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/advanced/functions.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/advanced/misc.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/advanced/pycpp/index.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/advanced/pycpp/numpy.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/advanced/pycpp/object.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/advanced/pycpp/utilities.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/advanced/smart_ptrs.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/basics.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/benchmark.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/benchmark.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/changelog.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/classes.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/cmake/index.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/compiling.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/conf.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/faq.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/index.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/installing.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/limitations.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/pybind11-logo.png create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/pybind11_vs_boost_python1.png create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/pybind11_vs_boost_python1.svg create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/pybind11_vs_boost_python2.png create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/pybind11_vs_boost_python2.svg create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/reference.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/release.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/requirements.txt create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/docs/upgrade.rst create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/include/pybind11/attr.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/include/pybind11/buffer_info.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/include/pybind11/cast.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/include/pybind11/chrono.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/include/pybind11/common.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/include/pybind11/complex.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/include/pybind11/detail/class.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/include/pybind11/detail/common.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/include/pybind11/detail/descr.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/include/pybind11/detail/init.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/include/pybind11/detail/internals.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/include/pybind11/detail/type_caster_base.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/include/pybind11/detail/typeid.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/include/pybind11/eigen.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/include/pybind11/embed.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/include/pybind11/eval.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/include/pybind11/functional.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/include/pybind11/gil.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/include/pybind11/iostream.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/include/pybind11/numpy.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/include/pybind11/operators.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/include/pybind11/options.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/include/pybind11/pybind11.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/include/pybind11/pytypes.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/include/pybind11/stl.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/include/pybind11/stl/filesystem.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/include/pybind11/stl_bind.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/noxfile.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/pybind11/__init__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/pybind11/__main__.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/pybind11/_version.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/pybind11/commands.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/pybind11/py.typed create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/pybind11/setup_helpers.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/pyproject.toml create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/setup.cfg create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/setup.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/CMakeLists.txt create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/conftest.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/constructor_stats.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/cross_module_gil_utils.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/cross_module_interleaved_error_already_set.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/env.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/extra_python_package/pytest.ini create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/extra_python_package/test_files.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/extra_setuptools/pytest.ini create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/extra_setuptools/test_setuphelper.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/local_bindings.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/object.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/pybind11_cross_module_tests.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/pybind11_tests.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/pybind11_tests.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/pytest.ini create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/requirements.txt create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_async.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_async.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_buffers.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_buffers.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_builtin_casters.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_builtin_casters.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_call_policies.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_call_policies.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_callbacks.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_callbacks.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_chrono.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_chrono.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_class.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_class.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_cmake_build/CMakeLists.txt create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_cmake_build/embed.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_cmake_build/installed_embed/CMakeLists.txt create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_cmake_build/installed_function/CMakeLists.txt create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_cmake_build/installed_target/CMakeLists.txt create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_cmake_build/main.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_cmake_build/subdirectory_embed/CMakeLists.txt create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_cmake_build/subdirectory_function/CMakeLists.txt create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_cmake_build/subdirectory_target/CMakeLists.txt create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_cmake_build/test.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_const_name.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_const_name.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_constants_and_functions.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_constants_and_functions.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_copy_move.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_copy_move.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_custom_type_casters.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_custom_type_casters.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_custom_type_setup.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_custom_type_setup.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_docstring_options.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_docstring_options.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_eigen.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_eigen.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_embed/CMakeLists.txt create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_embed/catch.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_embed/external_module.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_embed/test_interpreter.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_embed/test_interpreter.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_embed/test_trampoline.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_enum.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_enum.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_eval.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_eval.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_eval_call.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_exceptions.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_exceptions.h create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_exceptions.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_factory_constructors.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_factory_constructors.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_gil_scoped.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_gil_scoped.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_iostream.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_iostream.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_kwargs_and_defaults.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_kwargs_and_defaults.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_local_bindings.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_local_bindings.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_methods_and_attributes.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_methods_and_attributes.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_modules.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_modules.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_multiple_inheritance.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_multiple_inheritance.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_numpy_array.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_numpy_array.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_numpy_dtypes.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_numpy_dtypes.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_numpy_vectorize.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_numpy_vectorize.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_opaque_types.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_opaque_types.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_operator_overloading.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_operator_overloading.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_pickling.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_pickling.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_pytypes.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_pytypes.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_sequences_and_iterators.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_sequences_and_iterators.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_smart_ptr.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_smart_ptr.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_stl.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_stl.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_stl_binders.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_stl_binders.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_tagbased_polymorphic.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_tagbased_polymorphic.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_thread.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_thread.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_union.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_union.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_virtual_functions.cpp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/test_virtual_functions.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/valgrind-numpy-scipy.supp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tests/valgrind-python.supp create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tools/FindCatch.cmake create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tools/FindEigen3.cmake create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tools/FindPythonLibsNew.cmake create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tools/check-style.sh create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tools/cmake_uninstall.cmake.in create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tools/libsize.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tools/make_changelog.py create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tools/pybind11Common.cmake create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tools/pybind11Config.cmake.in create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tools/pybind11NewTools.cmake create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tools/pybind11Tools.cmake create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tools/pyproject.toml create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tools/setup_global.py.in create mode 100644 cv/3d_detection/pointrcnn-iou/pytorch/spconv/third_party/pybind11/tools/setup_main.py.in diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/README.md b/cv/3d_detection/pointrcnn-iou/pytorch/README.md new file mode 100644 index 000000000..6f689f170 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/README.md @@ -0,0 +1,69 @@ +# PointRCNN-IoU + +## Model description +PointRCNN-IoU is an extension of the PointRCNN object detection framework that incorporates Intersection over Union (IoU) as a metric for evaluation. IoU is a common metric used in object detection tasks to measure the overlap between predicted bounding boxes and ground truth bounding boxes. + +## Step 1: Installation +``` +## install libGL and libboost +yum install mesa-libGL +yum install boost-devel + +# Install numba +cd numba +python3 setup.py bdist_wheel -d build_pip 2>&1 | tee compile.log +bash install_numba.sh +cd .. + +# Install spconv +cd spconv +bash clean_spconv.sh +bash build_spconv.sh +bash install_spconv.sh +cd .. + +# Install openpcdet +cd openpcdet +pip3 install -r requirements.txt +bash build_openpcdet.sh +bash install_openpcdet.sh +cd .. +``` + +## Step 2: Preparing datasets +Download the kitti dataset from + +Download the "planes" subdataset from +``` +OpenPCDet +├── data +│ ├── kitti +│ │ │── ImageSets +│ │ │── training +│ │ │ ├──calib & velodyne & label_2 & image_2 & (optional: planes) & (optional: depth_2) +│ │ │── testing +│ │ │ ├──calib & velodyne & image_2 +├── pcdet +├── tools +``` + +``` +# Modify the `DATA_PATH` in the kitti_dataset.yaml to your own +cd openpcdet +python3 -m pcdet.datasets.kitti.kitti_dataset create_kitti_infos tools/cfgs/dataset_configs/kitti_dataset.yaml +``` + +## Step 3: Training + +### Single GPU training + +```bash +cd tools +python3 train.py --cfg_file cfgs/kitti_models/pointrcnn_iou.yaml +``` + +### Multiple GPU training + +```bash +bash scripts/dist_train.sh 16 --cfg_file cfgs/kitti_models/pointrcnn_iou.yaml +``` \ No newline at end of file diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/.coveragerc b/cv/3d_detection/pointrcnn-iou/pytorch/numba/.coveragerc new file mode 100644 index 000000000..e9eda9644 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/.coveragerc @@ -0,0 +1,24 @@ +# configuration file used by run_coverage.py +[run] +branch = True +source = numba +concurrency = multiprocessing +parallel = True + +[report] + +omit = + */__main__.py + # Vendored packages + numba/misc/appdirs.py + numba/cloudpickle/__init__.py + numba/cloudpickle/cloudpickle.py + numba/cloudpickle/cloudpickle_fast.py + numba/cloudpickle/compat.py + numba/_version.py + +exclude_lines = + pragma: no cover + if __name__ == .__main__.: + +[html] diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/.flake8 b/cv/3d_detection/pointrcnn-iou/pytorch/numba/.flake8 new file mode 100644 index 000000000..9d77a1d5e --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/.flake8 @@ -0,0 +1,263 @@ +[flake8] +ignore = + E20, # Extra space in brackets + E231,E241, # Multiple spaces around "," + E26, # Comments + E731, # Assigning lambda expression + E741, # Ambiguous variable names + W503, # line break before binary operator + W504, # line break after binary operator +max-line-length = 80 + +exclude = + __pycache__ + .git + *.pyc + *~ + *.o + *.so + *.cpp + *.c + *.h + __init__.py + # Ignore vendored files + numba/cloudpickle/* + # Grandfather in existing failing files. This list should shrink over time + numba/stencils/stencil.py + numba/core/transforms.py + numba/core/tracing.py + numba/core/withcontexts.py + numba/_version.py + numba/core/inline_closurecall.py + numba/core/ir_utils.py + numba/core/pylowering.py + numba/python_utils.py + numba/parfors/parfor.py + numba/misc/numba_entry.py + numba/stencils/stencilparfor.py + numba/core/ir.py + numba/core/generators.py + numba/misc/appdirs.py + numba/core/caching.py + numba/core/debuginfo.py + numba/core/annotations/pretty_annotate.py + numba/misc/dummyarray.py + numba/core/dataflow.py + numba/core/pythonapi.py + numba/core/decorators.py + numba/core/typeconv/rules.py + numba/core/typeconv/castgraph.py + numba/core/rewrites/registry.py + numba/core/rewrites/macros.py + numba/core/rewrites/static_binop.py + numba/core/rewrites/ir_print.py + numba/core/types/abstract.py + numba/core/types/misc.py + numba/core/types/npytypes.py + numba/core/types/common.py + numba/core/types/iterators.py + numba/core/types/scalars.py + numba/core/fastmathpass.py + numba/cpython/setobj.py + numba/core/options.py + numba/cpython/printimpl.py + numba/cpython/cmathimpl.py + numba/cpython/tupleobj.py + numba/cpython/mathimpl.py + numba/core/registry.py + numba/core/imputils.py + numba/cpython/builtins.py + numba/core/cpu.py + numba/misc/quicksort.py + numba/core/callconv.py + numba/cpython/randomimpl.py + numba/np/npyimpl.py + numba/cpython/slicing.py + numba/cpython/numbers.py + numba/cpython/listobj.py + numba/core/removerefctpass.py + numba/core/boxing.py + numba/misc/cffiimpl.py + numba/np/linalg.py + numba/cpython/rangeobj.py + numba/np/npyfuncs.py + numba/cpython/iterators.py + numba/core/codegen.py + numba/np/polynomial.py + numba/misc/mergesort.py + numba/core/base.py + numba/np/npdatetime.py + numba/pycc/cc.py + numba/pycc/compiler.py + numba/pycc/llvm_types.py + numba/pycc/platform.py + numba/pycc/decorators.py + numba/core/runtime/nrtdynmod.py + numba/core/runtime/context.py + numba/tests/test_support.py + numba/tests/test_llvm_version_check.py + numba/tests/test_builtins.py + numba/tests/test_jitmethod.py + numba/tests/test_inlining.py + numba/tests/test_array_manipulation.py + numba/tests/test_dummyarray.py + numba/tests/test_smart_array.py + numba/tests/test_linalg.py + numba/tests/test_threadsafety.py + numba/tests/test_utils.py + numba/tests/cfunc_cache_usecases.py + numba/tests/enum_usecases.py + numba/tests/test_func_lifetime.py + numba/tests/test_typeinfer.py + numba/tests/test_return_values.py + numba/tests/test_npdatetime.py + numba/tests/test_fancy_indexing.py + numba/tests/support.py + numba/tests/test_print.py + numba/tests/test_debug.py + numba/tests/test_interproc.py + numba/tests/test_typeconv.py + numba/tests/test_tracing.py + numba/tests/usecases.py + numba/tests/test_vectorization_type_inference.py + numba/tests/matmul_usecase.py + numba/tests/complex_usecases.py + numba/tests/test_array_exprs.py + numba/tests/test_polynomial.py + numba/tests/test_wrapper.py + numba/tests/test_obj_lifetime.py + numba/tests/test_intwidth.py + numba/tests/test_remove_dead.py + numba/tests/serialize_usecases.py + numba/tests/test_del.py + numba/tests/test_gil.py + numba/tests/cffi_usecases.py + numba/tests/test_slices.py + numba/tests/test_mandelbrot.py + numba/tests/compile_with_pycc.py + numba/tests/test_deprecations.py + numba/tests/test_looplifting.py + numba/tests/test_storeslice.py + numba/tests/recursion_usecases.py + numba/tests/dummy_module.py + numba/tests/test_operators.py + numba/tests/test_comprehension.py + numba/tests/ctypes_usecases.py + numba/tests/test_locals.py + numba/tests/test_dicts.py + numba/tests/test_optional.py + numba/tests/test_mathlib.py + numba/tests/test_numberctor.py + numba/tests/test_globals.py + numba/tests/test_typingerror.py + numba/tests/test_copy_propagate.py + numba/tests/test_ctypes.py + numba/tests/test_typeof.py + numba/tests/test_usecases.py + numba/tests/test_auto_constants.py + numba/tests/test_cffi.py + numba/tests/test_sort.py + numba/tests/test_cfunc.py + numba/tests/test_conversion.py + numba/tests/test_indexing.py + numba/tests/test_pycc.py + numba/tests/annotation_usecases.py + numba/tests/test_extended_arg.py + numba/tests/test_alignment.py + numba/tests/test_multi3.py + numba/tests/test_overlap.py + numba/tests/test_array_attr.py + numba/tests/test_array_methods.py + numba/tests/test_enums.py + numba/tests/test_profiler.py + numba/tests/test_numpyadapt.py + numba/tests/test_stencils.py + numba/tests/cache_usecases.py + numba/tests/true_div_usecase.py + numba/tests/test_dataflow.py + numba/tests/test_tuples.py + numba/tests/test_svml.py + numba/tests/test_array_iterators.py + numba/tests/test_buffer_protocol.py + numba/tests/test_casting.py + numba/tests/test_lists.py + numba/tests/test_array_analysis.py + numba/tests/test_serialize.py + numba/tests/test_iteration.py + numba/tests/test_recarray_usecases.py + numba/tests/test_target_overloadselector.py + numba/tests/test_compile_cache.py + numba/tests/test_array_reductions.py + numba/tests/test_dyn_func.py + numba/tests/test_unpack_sequence.py + numba/tests/test_cgutils.py + numba/tests/test_complex.py + numba/tests/test_hashing.py + numba/tests/test_sys_stdin_assignment.py + numba/tests/test_ufuncs.py + numba/tests/pdlike_usecase.py + numba/tests/test_range.py + numba/tests/test_nrt_refct.py + numba/misc/timsort.py + numba/tests/test_nested_calls.py + numba/tests/test_chained_assign.py + numba/tests/test_withlifting.py + numba/tests/test_parfors.py + numba/tests/test_sets.py + numba/tests/test_dyn_array.py + numba/tests/test_objects.py + numba/tests/test_random.py + numba/tests/test_nan.py + numba/tests/pycc_distutils_usecase/source_module.py + numba/tests/npyufunc/test_ufuncbuilding.py + numba/tests/npyufunc/test_errors.py + numba/tests/npyufunc/test_vectorize_decor.py + numba/tests/npyufunc/test_parallel_ufunc_issues.py + numba/tests/npyufunc/test_parallel_env_variable.py + numba/tests/npyufunc/test_gufunc.py + numba/core/typing/cmathdecl.py + numba/core/typing/bufproto.py + numba/core/typing/mathdecl.py + numba/core/typing/listdecl.py + numba/core/typing/builtins.py + numba/core/typing/randomdecl.py + numba/core/typing/setdecl.py + numba/core/typing/npydecl.py + numba/core/typing/arraydecl.py + numba/core/typing/collections.py + numba/core/typing/ctypes_utils.py + numba/core/typing/enumdecl.py + numba/core/typing/cffi_utils.py + numba/core/typing/npdatetime.py + numba/core/annotations/type_annotations.py + numba/testing/ddt.py + numba/testing/loader.py + numba/testing/notebook.py + numba/testing/main.py + numba/np/unsafe/ndarray.py + numba/np/ufunc/deviceufunc.py + numba/np/ufunc/sigparse.py + numba/parfors/parfor_lowering.py + numba/np/ufunc/array_exprs.py + numba/np/ufunc/decorators.py + numba/core/datamodel/models.py + numba/core/datamodel/packer.py + numba/core/datamodel/testing.py + numba/core/datamodel/manager.py + +per-file-ignores = + # Ignore star imports, unused imports, and "may be defined by star imports" + # errors in device_init because its purpose is to bring together a lot of + # the public API to be star-imported in numba.cuda.__init__ + numba/cuda/device_init.py:F401,F403,F405 + # libdevice.py is an autogenerated file containing stubs for all the device + # functions. Some of the lines in docstrings are a little over-long, as they + # contain the URLs of the reference pages in the online libdevice + # documentation. + numba/cuda/libdevice.py:E501 + # Ignore too-long lines in the CUDA doc examples, prioritising readability + # in the docs over line length in the example source (especially given that + # the test code is already indented by 8 spaces) + numba/cuda/tests/doc_examples/test_random.py:E501 + numba/cuda/tests/doc_examples/test_cg.py:E501 + numba/cuda/tests/doc_examples/test_matmul.py:E501 diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/.gitattributes b/cv/3d_detection/pointrcnn-iou/pytorch/numba/.gitattributes new file mode 100644 index 000000000..972ba2b7f --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/.gitattributes @@ -0,0 +1 @@ +numba/_version.py export-subst diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/.github/CODEOWNERS b/cv/3d_detection/pointrcnn-iou/pytorch/numba/.github/CODEOWNERS new file mode 100644 index 000000000..a9d8b4265 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/.github/CODEOWNERS @@ -0,0 +1,98 @@ +# Numba's codeowners file is dual purpose, it: +# +# 1. Provides information to github about who should be requested to review a PR +# 2. Provides contributors/czars general information about who to contact +# first about various parts of the code base. A lot of concepts in Numba are +# necessarily spread throughout the code base, consequently some of the +# "code ownership"/first contact is concept based opposed to file/directory +# based. +# +# ------------------------------------------------------------------------------ +# Information for github +# ------------------------------------------------------------------------------ +# These people are the default "owners" for everything in the repo unless a +# later match is made, they will automatically be requested to review PRs. +* @sklam @stuartarchibald @esc + +# Owners of specific parts of the code, will be requested to review if a PR +# touches code in the matched pattern +/numba/cuda/ @gmarkall +/numba/parfors/ @DrTodd13 +/numba/stencils/ @DrTodd13 + +# ------------------------------------------------------------------------------ +# Information for contributors +# ------------------------------------------------------------------------------ +# This section provides a rough list of who to contact first for help with +# various parts/concepts in the code base, first contact does not imply +# ownership! +# +# Parts of the code base: +# +# * Parfors/Parallel Accelerator (@DrTodd13) +# - Array Analysis (@DrTodd13) +# - Parfors transforms (@DrTodd13) +# * Stencils (@DrTodd13) +# * Experimental: +# - Jitclasses (@sklam) +# - StructRef (@sklam) +# * Typed containers: +# - Typed.List (@esc) +# - Typed.Dict (@sklam) +# * Documentation (Needs first contact/owner) +# * NumPy (Needs first contact/owner) +# - ufuncs (Needs first contact/owner) +# - linalg (@stuartarchibald) +# - Implementation of specific functions (Needs first contact/owner) +# - Parallel backends/threading layers (@stuartarchibald) +# * CPython implementation (Needs first contact/owner) +# * Extension API (Needs first contact/owner) +# * AOT (Needs first contact/owner) +# * Compiler: +# - Type inference (@sklam) +# - Bytecode analysis/CFA/DFA (@sklam) +# - Compiler Pipeline infrastructure (@stuartarchibald) +# - Compiler passes: +# - Rewrites (Needs first contact/owner) +# - Branch pruning (@stuartarchibald) +# - Literal unroll (@stuartarchibald) +# - Rewrite Semantic Constants (@stuartarchibald) +# - MakeFunction To Jit function (@stuartarchibald) +# - Overload and function inlining (@stuartarchibald) +# - With Lifting (@sklam) +# - Exception handling (@sklam) +# - Literally (@sklam) +# - SSA (@sklam) +# - lowering.py, codegen.py (@sklam) +# - Datamodels/call conventions (@sklam) +# - Inlining in general (@stuartarchibald) +# +# Additional Concepts: +# +# * Reference counting and NRT (@sklam) +# * Testing (Needs first contact/owner) +# * CI: +# - public CI (azure) (Needs first contact/owner) +# - Numba build farm (@esc) +# * Integration testing (https://github.com/numba/numba-integration-testing) +# (@esc) +# * ASV profiling (@esc) +# * Type Annotations (@luk-f-a and @EPronovost) +# * Ufunc/GUfunc (Needs first contact/owner) +# * Profiling (Needs first contact/owner (and code!)) +# * Debugging: +# - DWARF (@sklam) +# - gdb support (@stuartarchibald) +# * Hardware targets: +# - The CUDA target (@gmarkall) +# - The ROCm target (@stuartarchibald) +# - ARM* (@stuartarchibald) +# - POWER (Needs first contact/owner) +# - X86* (Needs first contact/owner) +# * OS: +# - Linux (@stuartarchibald) +# - OSX +# - Windows +# - BSD (@stuartarchibald) +# +# Anything not covered by someone else... ping @sklam and @stuartarchibald diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/.github/ISSUE_TEMPLATE/Bug_report.md b/cv/3d_detection/pointrcnn-iou/pytorch/numba/.github/ISSUE_TEMPLATE/Bug_report.md new file mode 100644 index 000000000..db91bced7 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/.github/ISSUE_TEMPLATE/Bug_report.md @@ -0,0 +1,34 @@ +--- +name: Bug Report +about: Report a bug. Not for asking general questions - see below. + +--- + + + +## Reporting a bug + + + +- [ ] I have tried using the latest released version of Numba (most recent is + visible in the change log (https://github.com/numba/numba/blob/main/CHANGE_LOG). +- [ ] I have included a self contained code sample to reproduce the problem. + i.e. it's possible to run as 'python bug.py'. + + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/.github/ISSUE_TEMPLATE/Feature_request.md b/cv/3d_detection/pointrcnn-iou/pytorch/numba/.github/ISSUE_TEMPLATE/Feature_request.md new file mode 100644 index 000000000..398c277a9 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/.github/ISSUE_TEMPLATE/Feature_request.md @@ -0,0 +1,23 @@ +--- +name: Feature Request +about: Tell us about something in the Python language/NumPy you'd like Numba to support. Not for asking general questions - see below. + +--- + + + +## Feature request + + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/.github/ISSUE_TEMPLATE/config.yml b/cv/3d_detection/pointrcnn-iou/pytorch/numba/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 000000000..08310d750 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,11 @@ +blank_issues_enabled: false +contact_links: + - name: General Question + url: https://numba.discourse.group/c/numba/community-support/ + about: "If you have a general question (not a bug report or feature request) then please ask on Numba's discourse instance." + - name: Quick Question/Just want to say Hi! + url: https://gitter.im/numba/numba + about: "If you have a quick question or want chat to users/developers in real time then please use gitter.im/numba/numba" + - name: Discuss an involved feature + url: https://numba.discourse.group/c/numba/development/ + about: "If you would like to suggest a more involved feature like *Can a new compiler pass be added to do X* then please start a discussion on Numba's discourse instance." diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/.github/ISSUE_TEMPLATE/first_rc_checklist.md b/cv/3d_detection/pointrcnn-iou/pytorch/numba/.github/ISSUE_TEMPLATE/first_rc_checklist.md new file mode 100644 index 000000000..be572dfec --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/.github/ISSUE_TEMPLATE/first_rc_checklist.md @@ -0,0 +1,41 @@ +--- +name: First Release Candidate Checklist (maintainer only) +about: Checklist template for the first release of every series +title: Numba X.Y.Zrc1 Checklist (FIXME) +labels: task + +--- + + +## Numba X.Y.Z + +* [ ] Merge to main. + - [ ] "remaining Pull-Requests from milestone". +* [ ] Check Numba's version support table documentation. Update via PR if + needed. +* [ ] Review deprecation schedule and notices. Make PRs if need be. +* [ ] Merge change log changes. + - [ ] "PR with changelog entries". +* [ ] Create X.Y release branch. +* [ ] Dependency version pinning on release branch + * [ ] Pin llvmlite to `>=0.A.0rc1,<0.A+1.0`. + * [ ] Pin NumPy if needed + * [ ] Pin TBB if needed +* [ ] Annotated tag X.Y.Zrc1 on release branch (no `v` prefix). +* [ ] Build and upload conda packages on buildfarm (check "upload"). +* [ ] Build wheels and sdist on the buildfarm (check "upload"). +* [ ] Verify packages uploaded to Anaconda Cloud and move to `numba/label/main`. +* [ ] Upload wheels and sdist to PyPI (upload from `ci_artifacts`). +* [ ] Verify wheels for all platforms arrived on PyPi. +* [ ] Initialize and verify ReadTheDocs build. +* [ ] Send RC announcement email / post announcement to discourse group. +* [ ] Post link to Twitter. + +### Post Release: + +* [ ] Clean up `ci_artifacts` by moving files to sub-directories +* [ ] Tag X.Y+1.0dev0 to start new development cycle on `main`. +* [ ] Update llvmlite dependency spec to match next version via PR to `main`. +* [ ] Update release checklist template with any additional bullet points that + may have arisen during the release. +* [ ] Close milestone (and then close this release issue). diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/.github/ISSUE_TEMPLATE/sub_rc_checklist.md b/cv/3d_detection/pointrcnn-iou/pytorch/numba/.github/ISSUE_TEMPLATE/sub_rc_checklist.md new file mode 100644 index 000000000..5874922a6 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/.github/ISSUE_TEMPLATE/sub_rc_checklist.md @@ -0,0 +1,37 @@ +--- +name: Subsequent Release Candidate Checklist (maintainer only) +about: Checklist template for all subsequent releases (RC 2-N, FINAL and PATCH) of every series +title: Numba X.Y.Zrc1 Checklist (FIXME) +labels: task + +--- + + +## numba X.Y.Z + +* [ ] Cherry-pick items from the X.Y.Z milestone into a PR. +* [ ] Approve change log modifications and cherry-pick. +* [ ] Merge change log modifications and cherry-picks to X.Y release branch. + * [ ] https://github.com/numba/numba/pull/XXXX +* [ ] Review, merge and check execution of release notebook. (FINAL ONLY) +* [ ] Annotated tag X.Y.Z on release branch (no `v` prefix). +* [ ] Build and upload conda packages on buildfarm (check `upload`). +* [ ] Build wheels and sdist on the buildfarm (check "upload"). +* [ ] Verify packages uploaded to Anaconda Cloud and move to + `numba/label/main`. +* [ ] Upload wheels and sdist to PyPI (upload from `ci_artifacts`). +* [ ] Verify wheels for all platforms arrived on PyPi. +* [ ] Verify ReadTheDocs build. +* [ ] Send RC/FINAL announcement email / post announcement to discourse group. +* [ ] Post link to Twitter. +* [ ] Post link to python-announce-list@python.org. + +### Post release + +* [ ] Snapshot Build Farm config +* [ ] Clean up `ci_artifacts` by moving files to subdirectories +* [ ] Update release checklist template with any additional bullet points that + may have arisen during the release. +* [ ] Ping Anaconda Distro team to trigger a build for `defaults` (FINAL ONLY). +* [ ] Create a release on Github at https://github.com/numba/numba/releases (FINAL ONLY). +* [ ] Close milestone (and then close this release issue). diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/.github/PULL_REQUEST_TEMPLATE.md b/cv/3d_detection/pointrcnn-iou/pytorch/numba/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 000000000..18d4b105f --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,39 @@ + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/.github/workflows/stale.yml b/cv/3d_detection/pointrcnn-iou/pytorch/numba/.github/workflows/stale.yml new file mode 100644 index 000000000..6415ada98 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/.github/workflows/stale.yml @@ -0,0 +1,20 @@ +name: 'Mark stale issues' +on: + schedule: + - cron: '30 1 * * *' + +jobs: + stale: + runs-on: ubuntu-latest + steps: + - uses: actions/stale@v3 + with: + stale-issue-message: > + This issue is marked as stale as it has had no activity in the past + 30 days. Please close this issue if no further response or action is + needed. Otherwise, please respond with any updates and confirm that + this issue still needs to be addressed. + stale-issue-label: 'stale' + any-of-labels: 'question,needtriage,more info needed' + days-before-issue-stale: 30 + days-before-issue-close: -1 diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/.gitignore b/cv/3d_detection/pointrcnn-iou/pytorch/numba/.gitignore new file mode 100644 index 000000000..f4a687d88 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/.gitignore @@ -0,0 +1,28 @@ +*.pyc +*.o +*.so +*.dylib +*.pyd +*.pdb +*.egg-info +*.sw[po] +*.out +*.ll +.coverage +.nfs* +tags +MANIFEST + +build/ +docs/_build/ +docs/gh-pages/ +dist/ +htmlcov/ +.idea/ +.vscode/ +.ycm_extra_conf.py +.mypy_cache/ +.ipynb_checkpoints/ +__pycache__/ + +docs/source/developer/autogen* diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/.pre-commit-config.yaml b/cv/3d_detection/pointrcnn-iou/pytorch/numba/.pre-commit-config.yaml new file mode 100644 index 000000000..bf2fb6423 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/.pre-commit-config.yaml @@ -0,0 +1,5 @@ +repos: +- repo: https://gitlab.com/pycqa/flake8 + rev: 3.7.8 + hooks: + - id: flake8 diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/.readthedocs.yml b/cv/3d_detection/pointrcnn-iou/pytorch/numba/.readthedocs.yml new file mode 100644 index 000000000..f74302f7e --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/.readthedocs.yml @@ -0,0 +1,15 @@ +version: 2 +build: + os: ubuntu-20.04 + tools: + python: mambaforge-4.10 +sphinx: + configuration: docs/source/conf.py +python: + install: + - method: setuptools + path: . +conda: + environment: docs/environment.yml +formats: +- pdf diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/CHANGE_LOG b/cv/3d_detection/pointrcnn-iou/pytorch/numba/CHANGE_LOG new file mode 100644 index 000000000..e94fa6673 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/CHANGE_LOG @@ -0,0 +1,5717 @@ +Version 0.56.4 (3 November, 2022) +--------------------------------- + +This is a bugfix release to fix a regression in the CUDA target in relation to +the ``.view()`` method on CUDA device arrays that is present when using NumPy +version 1.23.0 or later. + +Pull-Requests: + +* PR `#8537 `_: Make ol_compatible_view accessible on all targets (`gmarkall `_) +* PR `#8552 `_: Update version support table for 0.56.4. (`stuartarchibald `_) +* PR `#8553 `_: Update CHANGE_LOG for 0.56.4 (`stuartarchibald `_) +* PR `#8570 `_: Release 0.56 branch: Fix overloads with ``target="generic"`` for CUDA (`gmarkall `_) +* PR `#8571 `_: Additional update to CHANGE_LOG for 0.56.4 (`stuartarchibald `_) + +Authors: + +* `gmarkall `_ +* `stuartarchibald `_ + +Version 0.56.3 (13 October, 2022) +--------------------------------- + +This is a bugfix release to remove the version restriction applied to the +``setuptools`` package and to fix a bug in the CUDA target in relation to +copying zero length device arrays to zero length host arrays. + +Pull-Requests: + +* PR `#8475 `_: Remove setuptools version pin (`gmarkall `_) +* PR `#8482 `_: Fix #8477: Allow copies with different strides for 0-length data (`gmarkall `_) +* PR `#8486 `_: Restrict the TBB development package to supported version in Azure. (`stuartarchibald `_) +* PR `#8503 `_: Update version support table for 0.56.3 (`stuartarchibald `_) +* PR `#8504 `_: Update CHANGE_LOG for 0.56.3 (`stuartarchibald `_) + +Authors: + +* `gmarkall `_ +* `stuartarchibald `_ + +Version 0.56.2 (1 September, 2022) +---------------------------------- + +This is a bugfix release that supports NumPy 1.23 and fixes CUDA function +caching. + +Pull-Requests: + +* PR `#8239 `_: Add decorator to run a test in a subprocess (`stuartarchibald `_) +* PR `#8276 `_: Move Azure to use macos-11 (`stuartarchibald `_) +* PR `#8310 `_: CUDA: Fix Issue #8309 - atomics don't work on complex components (`Graham Markall `_) +* PR `#8342 `_: Upgrade to ubuntu-20.04 for azure pipeline CI (`jamesobutler `_) +* PR `#8356 `_: Update setup.py, buildscripts, CI and docs to require setuptools<60 (`stuartarchibald `_) +* PR `#8374 `_: Don't pickle LLVM IR for CUDA code libraries (`Graham Markall `_) +* PR `#8377 `_: Add support for NumPy 1.23 (`stuartarchibald `_) +* PR `#8384 `_: Move strace() check into tests that actually need it (`stuartarchibald `_) +* PR `#8386 `_: Fix the docs for numba.get_thread_id (`stuartarchibald `_) +* PR `#8407 `_: Pin NumPy version to 1.18-1.24 (`Andre Masella `_) +* PR `#8411 `_: update version support table for 0.56.1 (`esc `_) +* PR `#8412 `_: Create changelog for 0.56.1 (`Andre Masella `_) +* PR `#8413 `_: Fix Azure CI for NumPy 1.23 and use conda-forge scipy (`Siu Kwan Lam `_) +* PR `#8414 `_: Hotfix for 0.56.2 (`Siu Kwan Lam `_) + +Version 0.56.1 (1 September, 2022) +---------------------------------- + +The release was skipped due to issues during the release process. + +Version 0.56.0 (25 July, 2022) +------------------------------ + +This release continues to add new features, bug fixes and stability improvements +to Numba. Please note that this will be the last release that has support for +Python 3.7 as the next release series (Numba 0.57) will support Python 3.11! +Also note that, this will be the last release to support linux-32 packages +produced by the Numba team. + +Python language support enhancements: + +* Previously missing support for large, in-line dictionaries and internal calls + to functions with large numbers of keyword arguments in Python 3.10 has been + added. +* ``operator.mul`` now works for ``list`` s. +* Literal slices, e.g. ``slice(1, 10, 2)`` can be returned from ``nopython`` + mode functions. +* The ``len`` function now works on ``dict_keys``, ``dict_values`` and + ``dict_items`` . +* Numba's ``set`` implementation now supports reference counted items e.g. + strings. + +Numba specific feature enhancements: + +* The experimental ``jitclass`` feature gains support for a large number of + ``builtin`` methods e.g. declaring ``__hash__`` or ``__getitem__`` for a + ``jitclass`` type. +* It's now possible to use ``@vectorize`` on an already ``@jit`` family + decorated function. +* Name mangling has been updated to emit compiled function names that exactly + match the function name in Python. This means debuggers, like GDB, can be set + to break directly on Python function names. +* A GDB "pretty printing" support module has been added, when loaded into GDB + Numba's internal representations of Python/NumPy types are rendered inside GDB + as they would be in Python. +* An experimental option is added to the ``@jit`` family decorators to entirely + turn off LLVM's optimisation passes for a given function (see + ``_dbg_optnone`` kwarg in the ``@jit`` decorator family). +* A new environment variable is added ``NUMBA_EXTEND_VARIABLE_LIFETIMES``, which + if set will extend the lifetime of variables to the end of their basic block, + this to permit a debugging experience in GDB similar to that found in compiled + C/C++/Fortran code. + +NumPy features/enhancements: + +* Initial support for passing, using and returning ``numpy.random.Generator`` + instances has been added, this currently includes support for the ``random`` + distribution. +* The broadcasting functions ``np.broadcast_shapes`` and ``np.broadcast_arrays`` + are now supported. +* The ``min`` and ``max`` functions now work with ``np.timedelta64`` and + ``np.datetime64`` types. +* Sorting multi-dimensional arrays along the last axis is now supported in + ``np.sort()``. +* The ``np.clip`` function is updated to accept NumPy arrays for the ``a_min`` + and ``a_max`` arguments. +* The NumPy allocation routines (``np.empty`` , ``np.ones`` etc.) support shape + arguments specified using members of ``enum.IntEnum`` s. +* The function ``np.random.noncentral_chisquare`` is now supported. +* The performance of functions ``np.full`` and ``np.ones`` has been improved. + +Parallel Accelerator enhancements: + +* The ``parallel=True`` functionality is enhanced through the addition of the + functions ``numba.set_parallel_chunksize`` and + ``numba.get_parallel_chunksize`` to permit a more fine grained scheduling of + work defined in a parallel region. There is also support for adjusting the + ``chunksize`` via a context manager. +* The ``ID`` of a thread is now defined to be predictable and within a known + range, it is available through calling the function ``numba.get_thread_id``. +* The performance of ``@stencil`` s has been improved in both serial and + parallel execution. + +CUDA enhancements: + +* New functionality: + + * Self-recursive device functions. + * Vector type support (``float4``, ``int2``, etc.). + * Shared / local arrays of extension types can now be created. + * Support for linking CUDA C / C++ device functions into Python kernels. + * PTX generation for Compute Capabilities 8.6 and 8.7 - e.g. RTX A series, + GTX 3000 series. + * Comparison operations for ``float16`` types. + +* Performance improvements: + + * Context queries are no longer made during launch configuration. + * Launch configurations are now LRU cached. + * On-disk caching of CUDA kernels is now supported. + +* Documentation: many new examples added. + +Docs: + +* Numba now has an official "mission statement". +* There's now a "version support table" in the documentation to act as an easy + to use, single reference point, for looking up information about Numba + releases and their required/supported dependencies. + +General Enhancements: + +* Numba imports more quickly in environments with large numbers of packages as + it now uses ``importlib-metadata`` for querying other packages. +* Emission of chrome tracing output is now supported for the internal + compilation event handling system. +* This release is tested and known to work when using the + `Pyston `_ Python interpreter. + +Pull-Requests: + +* PR `#5209 `_: Use importlib to load numba extensions (`Stepan Rakitin `_ `Graham Markall `_ `stuartarchibald `_) +* PR `#5877 `_: Jitclass builtin methods (`Ethan Pronovost `_ `Graham Markall `_) +* PR `#6490 `_: Stencil output allocated with np.empty now and new code to initialize the borders. (`Todd A. Anderson `_) +* PR `#7005 `_: Make `numpy.searchsorted` match NumPy when first argument is unsorted (`Brandon T. Willard `_) +* PR `#7363 `_: Update cuda.local.array to clarify "simple constant expression" (e.g. no NumPy ints) (`Sterling Baird `_) +* PR `#7364 `_: Removes an instance of signed integer overflow undefined behaviour. (`Tobias Sargeant `_) +* PR `#7537 `_: Add chrome tracing (`Hadia Ahmed `_ `Siu Kwan Lam `_) +* PR `#7556 `_: Testhound/fp16 comparison (`Michael Collison `_ `Graham Markall `_) +* PR `#7586 `_: Support for len on dict.keys, dict.values, and dict.items (`Nick Riasanovsky `_) +* PR `#7617 `_: Numba gdb-python extension for printing (`stuartarchibald `_) +* PR `#7619 `_: CUDA: Fix linking with PTX when compiling lazily (`Graham Markall `_) +* PR `#7621 `_: Add support for linking CUDA C / C++ with `@cuda.jit` kernels (`Graham Markall `_) +* PR `#7625 `_: Combined parfor chunking and caching PRs. (`stuartarchibald `_ `Todd A. Anderson `_ `Siu Kwan Lam `_) +* PR `#7651 `_: DOC: pypi and conda-forge badges (`Ray Bell `_) +* PR `#7660 `_: Add support for np.broadcast_arrays (`Guilherme Leobas `_) +* PR `#7664 `_: Flatten mangling dicts into a single dict (`Graham Markall `_) +* PR `#7680 `_: CUDA Docs: include example calling slow matmul (`Graham Markall `_) +* PR `#7682 `_: performance improvements to np.full and np.ones (`Rishi Kulkarni `_) +* PR `#7684 `_: DOC: remove incorrect warning in np.random reference (`Rishi Kulkarni `_) +* PR `#7685 `_: Don't convert setitems that have dimension mismatches to parfors. (`Todd A. Anderson `_) +* PR `#7690 `_: Implemented np.random.noncentral_chisquare for all size arguments (`Rishi Kulkarni `_) +* PR `#7695 `_: `IntEnumMember` support for `np.empty`, `np.zeros`, and `np.ones` (`Benjamin Graham `_) +* PR `#7699 `_: CUDA: Provide helpful error if the return type is missing for `declare_device` (`Graham Markall `_) +* PR `#7700 `_: Support for scalar arguments in Np.ascontiguousarray (`Dhruv Patel `_) +* PR `#7703 `_: Ignore unsupported types in `ShapeEquivSet._getnames()` (`Benjamin Graham `_) +* PR `#7704 `_: Move the type annotation pass to post legalization. (`stuartarchibald `_) +* PR `#7709 `_: CUDA: Fixes missing type annotation pass following #7704 (`stuartarchibald `_) +* PR `#7712 `_: Fixing issue 7693 (`stuartarchibald `_ `Graham Markall `_ `luk-f-a `_) +* PR `#7714 `_: Support for boxing SliceLiteral type (`Nick Riasanovsky `_) +* PR `#7718 `_: Bump llvmlite dependency to 0.39.0dev0 for Numba 0.56.0dev0 (`stuartarchibald `_) +* PR `#7724 `_: Update URLs in error messages to refer to RTD docs. (`stuartarchibald `_) +* PR `#7728 `_: Document that AOT-compiled functions do not check arg types (`Graham Markall `_) +* PR `#7729 `_: Handle Omitted/OmittedArgDataModel in DI generation. (`stuartarchibald `_) +* PR `#7732 `_: update release checklist following 0.55.0 RC1 (`esc `_) +* PR `#7736 `_: Update CHANGE_LOG for 0.55.0 final. (`stuartarchibald `_) +* PR `#7740 `_: CUDA Python 11.6 support (`Graham Markall `_) +* PR `#7744 `_: Fix issues with locating/parsing source during DebugInfo emission. (`stuartarchibald `_) +* PR `#7745 `_: Fix the release year for Numba 0.55 change log entry. (`stuartarchibald `_) +* PR `#7748 `_: Fix #7713: Ensure _prng_random_hash return has correct bitwidth (`Graham Markall `_) +* PR `#7749 `_: Refactor threading layer priority tests to not use stdout/stderr (`stuartarchibald `_) +* PR `#7752 `_: Fix #7751: Use original filename for array exprs (`Graham Markall `_) +* PR `#7755 `_: CUDA: Deprecate support for CC < 5.3 and CTK < 10.2 (`Graham Markall `_) +* PR `#7763 `_: Update Read the Docs configuration (automatic) (`readthedocs-assistant `_) +* PR `#7764 `_: Add dbg_optnone and dbg_extend_lifetimes flags (`Siu Kwan Lam `_) +* PR `#7771 `_: Move function unique ID to abi-tags (`stuartarchibald `_ `Siu Kwan Lam `_) +* PR `#7772 `_: CUDA: Add Support to Creating `StructModel` Array (`Michael Wang `_) +* PR `#7776 `_: Updates coverage.py config (`stuartarchibald `_) +* PR `#7777 `_: Remove reference existing issue from GH template. (`stuartarchibald `_) +* PR `#7778 `_: Remove long deprecated flags from the CLI. (`stuartarchibald `_) +* PR `#7780 `_: Fix sets with reference counted items (`Benjamin Graham `_) +* PR `#7782 `_: adding reminder to check on deprecations (`esc `_) +* PR `#7783 `_: remove upper limit on Python version (`esc `_) +* PR `#7786 `_: Remove dependency on intel-openmp for OSX (`stuartarchibald `_) +* PR `#7788 `_: Avoid issue with DI gen for arrayexprs. (`stuartarchibald `_) +* PR `#7796 `_: update change-log for 0.55.1 (`esc `_) +* PR `#7797 `_: prune README (`esc `_) +* PR `#7799 `_: update the release checklist post 0.55.1 (`esc `_) +* PR `#7801 `_: add sdist command and umask reminder (`esc `_) +* PR `#7804 `_: update local references from master -> main (`esc `_) +* PR `#7805 `_: Enhance source line finding logic for debuginfo (`Siu Kwan Lam `_) +* PR `#7809 `_: Updates the gdb configuration to accept a binary name or a path. (`stuartarchibald `_) +* PR `#7813 `_: Extend parfors test timeout for aarch64. (`stuartarchibald `_) +* PR `#7814 `_: CUDA Dispatcher refactor (`Graham Markall `_) +* PR `#7815 `_: CUDA Dispatcher refactor 2: inherit from `dispatcher.Dispatcher` (`Graham Markall `_) +* PR `#7817 `_: Update intersphinx URLs for NumPy and llvmlite. (`stuartarchibald `_) +* PR `#7823 `_: Add renamed vars to callee scope such that it is self consistent. (`stuartarchibald `_) +* PR `#7829 `_: CUDA: Support `Enum/IntEnum` in Kernel (`Michael Wang `_) +* PR `#7833 `_: Add version support information table to docs. (`stuartarchibald `_) +* PR `#7835 `_: Fix pickling error when module cannot be imported (`idorrington `_) +* PR `#7836 `_: min() and max() support for np.datetime and np.timedelta (`Benjamin Graham `_) +* PR `#7837 `_: Initial refactoring of parfor reduction lowering (`Siu Kwan Lam `_) +* PR `#7845 `_: change time.time() to time.perf_counter() in docs (`Nopileos2 `_) +* PR `#7846 `_: Fix CUDA enum vectorize test on Windows (`Graham Markall `_) +* PR `#7848 `_: Support for int * list (`Nick Riasanovsky `_) +* PR `#7850 `_: CUDA: Pass `fastmath` compiler flag down to `compile_ptx` and `compile_device`; Improve `fastmath` tests (`Michael Wang `_) +* PR `#7855 `_: Ensure np.argmin/no.argmax return type is intp (`stuartarchibald `_) +* PR `#7858 `_: CUDA: Deprecate `ptx` Attribute and Update Tests (`Graham Markall `_ `Michael Wang `_) +* PR `#7861 `_: Fix a spelling mistake in README (`Zizheng Guo `_) +* PR `#7864 `_: Fix cross_iter_dep check. (`Todd A. Anderson `_) +* PR `#7865 `_: Remove add_user_function (`Graham Markall `_) +* PR `#7866 `_: Support for large numbers of args/kws with Python 3.10 (`Nick Riasanovsky `_) +* PR `#7878 `_: CUDA: Remove some deprecated support, add CC 8.6 and 8.7 (`Graham Markall `_) +* PR `#7893 `_: Use uuid.uuid4() as the key in serialization. (`stuartarchibald `_) +* PR `#7895 `_: Remove use of `llvmlite.llvmpy` (`Andre Masella `_) +* PR `#7898 `_: Skip test_ptds under cuda-memcheck (`Graham Markall `_) +* PR `#7901 `_: Pyston compatibility for the test suite (`Kevin Modzelewski `_) +* PR `#7904 `_: Support m1 (`esc `_) +* PR `#7911 `_: added sys import (`Nightfurex `_) +* PR `#7915 `_: CUDA: Fix test checking debug info rendering. (`stuartarchibald `_) +* PR `#7918 `_: Add JIT examples to CUDA docs (`brandon-b-miller `_ `Graham Markall `_) +* PR `#7919 `_: Disallow //= reductions in pranges. (`Todd A. Anderson `_) +* PR `#7924 `_: Retain non-modified index tuple components. (`Todd A. Anderson `_) +* PR `#7939 `_: Fix rendering in feature request template. (`stuartarchibald `_) +* PR `#7940 `_: Implemented `np.allclose` in `numba/np/arraymath.py` (`Gagandeep Singh `_) +* PR `#7941 `_: Remove debug dump output from closure inlining pass. (`stuartarchibald `_) +* PR `#7946 `_: instructions for creating a build environment were outdated (`esc `_) +* PR `#7949 `_: Add Cuda Vector Types (`Michael Wang `_) +* PR `#7950 `_: mission statement (`esc `_) +* PR `#7956 `_: Stop using pip for 3.10 on public ci (Revert "start testing Python 3.10 on public CI") (`esc `_) +* PR `#7957 `_: Use cloudpickle for disk caches (`Siu Kwan Lam `_) +* PR `#7958 `_: `numpy.clip` accept `numpy.array` for `a_min`, `a_max` (`Gagandeep Singh `_) +* PR `#7959 `_: Permit a new array model to have a super set of array model fields. (`stuartarchibald `_) +* PR `#7961 `_: `numba.typed.typeddict.Dict.get` uses `castedkey` to avoid returning default value even if the key is present (`Gagandeep Singh `_) +* PR `#7963 `_: remove the roadmap from the sphinx based docs (`esc `_) +* PR `#7964 `_: Support for large constant dictionaries in Python 3.10 (`Nick Riasanovsky `_) +* PR `#7965 `_: Use uuid4 instead of PID in cache temp name to prevent collisions. (`stuartarchibald `_) +* PR `#7971 `_: lru cache for configure call (`Tingkai Liu `_) +* PR `#7972 `_: Fix fp16 support for cuda shared array (`Michael Collison `_ `Graham Markall `_) +* PR `#7986 `_: Small caching refactor to support target cache implementations (`Graham Markall `_) +* PR `#7994 `_: Supporting multidimensional arrays in quick sort (`Gagandeep Singh `_ `Siu Kwan Lam `_) +* PR `#7996 `_: Fix binding logic in `@overload_glue`. (`stuartarchibald `_) +* PR `#7999 `_: Remove `@overload_glue` for NumPy allocators. (`stuartarchibald `_) +* PR `#8003 `_: Add np.broadcast_shapes (`Guilherme Leobas `_) +* PR `#8004 `_: CUDA fixes for Windows (`Graham Markall `_) +* PR `#8014 `_: Fix support for {real,imag} array attrs in Parfors. (`stuartarchibald `_) +* PR `#8016 `_: [Docs] [Very Minor] Make `numba.jit` boundscheck doc line consistent (`Kyle Martin `_) +* PR `#8017 `_: Update FAQ to include details about using debug-only option (`Guilherme Leobas `_) +* PR `#8027 `_: Support for NumPy 1.22 (`stuartarchibald `_) +* PR `#8031 `_: Support for Numpy BitGenerators PR#1 - Core Generator Support (`Kaustubh `_) +* PR `#8035 `_: Fix a couple of typos RE implementation (`stuartarchibald `_) +* PR `#8037 `_: CUDA self-recursion tests (`Graham Markall `_) +* PR `#8044 `_: Make Python 3.10 kwarg peephole less restrictive (`Nick Riasanovsky `_) +* PR `#8046 `_: Fix caching test failures (`Siu Kwan Lam `_) +* PR `#8049 `_: support str(bool) syntax (`LI Da `_) +* PR `#8052 `_: Ensure pthread is linked in when building for ppc64le. (`Siu Kwan Lam `_) +* PR `#8056 `_: Move caching tests from test_dispatcher to test_caching (`Graham Markall `_) +* PR `#8057 `_: Fix coverage checking (`Graham Markall `_) +* PR `#8064 `_: Rename "nb:run_pass" to "numba:run_pass" and document it. (`Siu Kwan Lam `_) +* PR `#8065 `_: Fix PyLowering mishandling starargs (`Siu Kwan Lam `_) +* PR `#8068 `_: update changelog for 0.55.2 (`esc `_) +* PR `#8077 `_: change return type of np.broadcast_shapes to a tuple (`Guilherme Leobas `_) +* PR `#8080 `_: Fix windows test failure due to timeout when the machine is slow poss… (`Siu Kwan Lam `_) +* PR `#8081 `_: Fix erroneous array count in parallel gufunc kernel generation. (`stuartarchibald `_) +* PR `#8089 `_: Support on-disk caching in the CUDA target (`Graham Markall `_) +* PR `#8097 `_: Exclude libopenblas 0.3.20 on osx-arm64 (`esc `_) +* PR `#8099 `_: Fix Py_DECREF use in case of error state (for devicearray). (`stuartarchibald `_) +* PR `#8102 `_: Combine numpy run_constrained in meta.yaml to the run requirements (`Siu Kwan Lam `_) +* PR `#8109 `_: Pin TBB support with respect to incompatible 2021.6 API. (`stuartarchibald `_) +* PR `#8118 `_: Update release checklists post 0.55.2 (`esc `_) +* PR `#8123 `_: Fix CUDA print tests on Windows (`Graham Markall `_) +* PR `#8124 `_: Add explicit checks to all allocators in the NRT. (`stuartarchibald `_) +* PR `#8126 `_: Mark gufuncs as having mutable outputs (`Andre Masella `_) +* PR `#8133 `_: Fix #8132. Regression in Record.make_c_struct for handling nestedarray (`Siu Kwan Lam `_) +* PR `#8137 `_: CUDA: Fix #7806, Division by zero stops the kernel (`Graham Markall `_) +* PR `#8142 `_: CUDA: Fix some missed changes from dropping 9.2 (`Graham Markall `_) +* PR `#8144 `_: Fix NumPy capitalisation in docs. (`stuartarchibald `_) +* PR `#8145 `_: Allow ufunc builder to use previously JITed function (`Andre Masella `_) +* PR `#8151 `_: pin NumPy to build 0 of 1.19.2 on public CI (`esc `_) +* PR `#8163 `_: CUDA: Remove context query in launch config (`Graham Markall `_) +* PR `#8165 `_: Restrict strace based tests to be linux only via support feature. (`stuartarchibald `_) +* PR `#8170 `_: CUDA: Fix missing space in low occupancy warning (`Graham Markall `_) +* PR `#8175 `_: make build and upload order consistent (`esc `_) +* PR `#8181 `_: Fix various typos (`luzpaz `_) +* PR `#8187 `_: Update CHANGE_LOG for 0.55.2 (`stuartarchibald `_ `esc `_) +* PR `#8189 `_: updated version support information for 0.55.2/0.57 (`esc `_) +* PR `#8191 `_: CUDA: Update deprecation notes for 0.56. (`Graham Markall `_) +* PR `#8192 `_: Update CHANGE_LOG for 0.56.0 (`stuartarchibald `_ `esc `_ `Siu Kwan Lam `_) +* PR `#8195 `_: Make the workqueue threading backend once again fork safe. (`stuartarchibald `_) +* PR `#8196 `_: Fix numerical tolerance in parfors caching test. (`stuartarchibald `_) +* PR `#8197 `_: Fix `isinstance` warning check test. (`stuartarchibald `_) +* PR `#8203 `_: pin llvmlite 0.39 for public CI builds (`esc `_) +* PR `#8205 `_: Pin llvmlite and NumPy on release branch.(`esc `_ `Siu Kwan Lam `_) +* PR `#8255 `_: CUDA: Make numba.cuda.tests.doc_examples.ffi a module to fix #8252 (`Graham Markall `_) +* PR `#8274 `_: Update version support table doc for 0.56. (`stuartarchibald `_) +* PR `#8275 `_: Update CHANGE_LOG for 0.56.0 final (`stuartarchibald `_) + +Authors: + +* `Andre Masella `_ +* `Benjamin Graham `_ +* `brandon-b-miller `_ +* `Brandon T. Willard `_ +* `Gagandeep Singh `_ +* `Dhruv Patel `_ +* `LI Da `_ +* `Todd A. Anderson `_ +* `Ethan Pronovost `_ +* `esc `_ +* `Tobias Sargeant `_ +* `Graham Markall `_ +* `Guilherme Leobas `_ +* `Zizheng Guo `_ +* `Hadia Ahmed `_ +* `idorrington `_ +* `Michael Wang `_ +* `Kaustubh `_ +* `Kevin Modzelewski `_ +* `luk-f-a `_ +* `luzpaz `_ +* `Kyle Martin `_ +* `Nightfurex `_ +* `Nick Riasanovsky `_ +* `Nopileos2 `_ +* `Ray Bell `_ +* `readthedocs-assistant `_ +* `Rishi Kulkarni `_ +* `Sterling Baird `_ +* `Siu Kwan Lam `_ +* `stuartarchibald `_ +* `Stepan Rakitin `_ +* `Michael Collison `_ +* `Tingkai Liu `_ + +Version 0.55.2 (25 May, 2022) +----------------------------- + +This is a maintenance release to support NumPy 1.22 and Apple M1. + +Pull-Requests: + +* PR `#8067 `_: Backport #8027: Support for NumPy 1.22 (`stuartarchibald `_) +* PR `#8069 `_: Install llvmlite 0.38 for Numba 0.55.* (`esc `_) +* PR `#8075 `_: update max NumPy for 0.55.2 (`esc `_) +* PR `#8078 `_: Backport #7804: update local references from master -> main (`esc `_) +* PR `#8082 `_: Backport #8080: fix windows failure due to timeout (`Siu Kwan Lam `_) +* PR `#8084 `_: Pin meta.yaml to llvmlite 0.38 series (`Siu Kwan Lam `_) +* PR `#8093 `_: Backport #7904: Support m1 (`esc `_) +* PR `#8094 `_: Backport #8052 Ensure pthread is linked in when building for ppc64le. (`Siu Kwan Lam `_) +* PR `#8098 `_: Backport #8097: Exclude libopenblas 0.3.20 on osx-arm64 (`esc `_) +* PR `#8100 `_: Backport #7786 for 0.55.2: Remove dependency on intel-openmp for OSX (`stuartarchibald `_) +* PR `#8103 `_: Backport #8102 to fix numpy requirements (`Siu Kwan Lam `_) +* PR `#8114 `_: Backport #8109 Pin TBB support with respect to incompatible 2021.6 API. (`stuartarchibald `_) + +Total PRs: 12 + +Authors: + +* `esc `_ +* `Siu Kwan Lam `_ +* `stuartarchibald `_ + +Total authors: 3 + +Version 0.55.1 (27 January, 2022) +--------------------------------- + +This is a bugfix release that closes all the remaining issues from the +accelerated release of 0.55.0 and also any release critical regressions +discovered since then. + +CUDA target deprecation notices: + +* Support for CUDA toolkits < 10.2 is deprecated and will be removed in Numba + 0.56. +* Support for devices with Compute Capability < 5.3 is deprecated and will be + removed in Numba 0.56. + + +Pull-Requests: + +* PR `#7755 `_: CUDA: Deprecate support for CC < 5.3 and CTK < 10.2 (`Graham Markall `_) +* PR `#7749 `_: Refactor threading layer priority tests to not use stdout/stderr (`stuartarchibald `_) +* PR `#7744 `_: Fix issues with locating/parsing source during DebugInfo emission. (`stuartarchibald `_) +* PR `#7712 `_: Fixing issue 7693 (`Graham Markall `_ `luk-f-a `_ `stuartarchibald `_) +* PR `#7729 `_: Handle Omitted/OmittedArgDataModel in DI generation. (`stuartarchibald `_) +* PR `#7788 `_: Avoid issue with DI gen for arrayexprs. (`stuartarchibald `_) +* PR `#7752 `_: Fix #7751: Use original filename for array exprs (`Graham Markall `_) +* PR `#7748 `_: Fix #7713: Ensure _prng_random_hash return has correct bitwidth (`Graham Markall `_) +* PR `#7745 `_: Fix the release year for Numba 0.55 change log entry. (`stuartarchibald `_) +* PR `#7740 `_: CUDA Python 11.6 support (`Graham Markall `_) +* PR `#7724 `_: Update URLs in error messages to refer to RTD docs. (`stuartarchibald `_) +* PR `#7709 `_: CUDA: Fixes missing type annotation pass following #7704 (`stuartarchibald `_) +* PR `#7704 `_: Move the type annotation pass to post legalization. (`stuartarchibald `_) +* PR `#7619 `_: CUDA: Fix linking with PTX when compiling lazily (`Graham Markall `_) + +Authors: + +* `Graham Markall `_ +* `luk-f-a `_ +* `stuartarchibald `_ + +Version 0.55.0 (13 January, 2022) +--------------------------------- + +This release includes a significant number important dependency upgrades along +with a number of new features and bug fixes. + +NOTE: Due to NumPy CVE-2021-33430 this release has bypassed the usual release +process so as to promptly provide a Numba release that supports NumPy 1.21. A +single release candidate (RC1) was made and a few issues were reported, these +are summarised as follows and will be fixed in a subsequent 0.55.1 release. + +Known issues with this release: + +* Incorrect result copying array-typed field of structured array (`#7693 `_) +* Two issues in DebugInfo generation (`#7726 `_, `#7730 `_) +* Compilation failure for ``hash`` of floating point values on 32 bit Windows + when using Python 3.10 (`#7713 `_). + +Highlights of core dependency upgrades: + +* Support for Python 3.10 +* Support for NumPy 1.21 + +Python language support enhancements: + +* Experimental support for ``isinstance``. + +NumPy features/enhancements: + +The following functions are now supported: + +* ``np.broadcast_to`` +* ``np.float_power`` +* ``np.cbrt`` +* ``np.logspace`` +* ``np.take_along_axis`` +* ``np.average`` +* ``np.argmin`` gains support for the ``axis`` kwarg. +* ``np.ndarray.astype`` gains support for types expressed as literal strings. + +Highlights of core changes: + +* For users of the Numba extension API, Numba now has a new error handling mode + whereby it will treat all exceptions that do not inherit from + ``numba.errors.NumbaException`` as a "hard error" and immediately unwind the + stack. This makes it much easier to debug when writing ``@overload``\s etc + from the extension API as there's now no confusion between Python errors and + Numba errors. This feature can be enabled by setting the environment + variable: ``NUMBA_CAPTURED_ERRORS='new_style'``. +* The threading layer selection priority can now be changed via the environment + variable ``NUMBA_THREADING_LAYER_PRIORITY``. + +Highlights of changes for the CUDA target: + +* Support for NVIDIA's CUDA Python bindings. +* Support for 16-bit floating point numbers and their basic operations via + intrinsics. +* Streams are provided in the ``Stream.async_done`` result, making it easier to + implement asynchronous work queues. +* Support for structured types in device arrays, character sequences in NumPy + arrays, and some array operations on nested arrays. +* Much underlying refactoring to align the CUDA target more closely with the + CPU target, which lays the groudwork for supporting the high level extension + API in CUDA in future releases. + +Intel also kindly sponsored research and development into native debug (DWARF) +support and handling per-function compilation flags: + +* Line number/location tracking is much improved. +* Numba's internal representation of containers (e.g. tuples, arrays) are now + encoded as structures. +* Numba's per-function compilation flags are encoded into the ABI field of the + mangled name of the function such that it's possible to compile and + differentiate between versions of the same function with different flags set. + +General deprecation notices: + +* There are no new general deprecations. + +CUDA target deprecation notices: + +* There are no new CUDA target deprecations. + +Version support/dependency changes: + +* Python 3.10 is supported. +* NumPy version 1.21 is supported. +* The minimum supported NumPy version is raised to 1.18 for runtime (compilation + however remains compatible with NumPy 1.11). + + +Pull-Requests: + +* PR `#6075 `_: add np.float_power and np.cbrt (`Guilherme Leobas `_) +* PR `#7047 `_: Support __hash__ for numpy.datetime64 (`Guilherme Leobas `_ `stuartarchibald `_) +* PR `#7057 `_: Fix #7041: Add charseq registry to CUDA target (`Graham Markall `_ `stuartarchibald `_) +* PR `#7082 `_: Added Add/Sub between datetime64 array and timedelta64 scalar (`Nick Riasanovsky `_ `stuartarchibald `_) +* PR `#7119 `_: Add support for `np.broadcast_to` (`Guilherme Leobas `_) +* PR `#7129 `_: Add support for axis keyword argument to np.argmin() (`Itamar Turner-Trauring `_) +* PR `#7132 `_: gh #7131 Support for astype with literal strings (`Nick Riasanovsky `_) +* PR `#7177 `_: Add debug infomation support based on datamodel. (`stuartarchibald `_) +* PR `#7185 `_: Add get_impl_key as abstract method to types.Callable (`Alexey Kozlov `_) +* PR `#7186 `_: Add support for np.logspace. (`Guoqiang QI `_) +* PR `#7189 `_: CUDA: Skip IPC tests on ARM (`Graham Markall `_) +* PR `#7190 `_: CUDA: Fix test_pinned on Jetson (`Graham Markall `_) +* PR `#7192 `_: Fix missing import in array.argsort impl and add more tests. (`stuartarchibald `_) +* PR `#7196 `_: Fixes for lineinfo emission (`stuartarchibald `_) +* PR `#7197 `_: don't post to python announce on the first RC (`esc `_) +* PR `#7202 `_: Initial implementation of np.take_along_axis (`Itamar Turner-Trauring `_) +* PR `#7203 `_: remove duplicate changelog entries (`esc `_) +* PR `#7216 `_: Update CHANGE_LOG for 0.54.0rc2 (`stuartarchibald `_) +* PR `#7219 `_: bump llvmlite dependency to 0.38.0dev0 for Numba 0.55.0dev0 (`esc `_) +* PR `#7220 `_: update release checklist post 0.54rc1+2 (`esc `_) +* PR `#7221 `_: Show GPU UUIDs in cuda.detect() output (`Graham Markall `_) +* PR `#7222 `_: CUDA: Warn when debug=True and opt=True (`Graham Markall `_) +* PR `#7223 `_: Replace assertion errors on IR assumption violation (`Siu Kwan Lam `_) +* PR `#7226 `_: Add support for structured types in Device Arrays (`Michael Collison `_) +* PR `#7227 `_: FIX: Typo (`Srinath Kailasa `_) +* PR `#7230 `_: PR #7171 bugfix only (`stuartarchibald `_ `Todd A. Anderson `_) +* PR `#7234 `_: add THREADING_LAYER_PRIORITY & NUMBA_THREADING_LAYER_PRIORITY (`Kolen Cheung `_) +* PR `#7235 `_: replace wordings of WIP by draft PR (`Kolen Cheung `_) +* PR `#7236 `_: CUDA: Skip managed alloc tests on ARM (`Graham Markall `_) +* PR `#7237 `_: fix a typo in a string (`Kolen Cheung `_) +* PR `#7241 `_: Set aliasing information for inplace_binops.. (`Todd A. Anderson `_) +* PR `#7242 `_: FIX: typo (`Srinath Kailasa `_) +* PR `#7244 `_: Implement partial literal propagation pass (support 'isinstance') (`Guilherme Leobas `_ `stuartarchibald `_) +* PR `#7247 `_: Solve memory leak to fix issue #7210 (`Siu Kwan Lam `_ `Graham Markall `_ `ysheffer `_) +* PR `#7251 `_: Fix #6001: typed.List ignores ctor arguments with JIT disabled (`Graham Markall `_) +* PR `#7256 `_: Fix link to the discourse forum in README (`Kenichi Maehashi `_) +* PR `#7257 `_: Use normal list constructor in List.__new__() (`Graham Markall `_) +* PR `#7260 `_: Support typed lists in `heapq` (`Graham Markall `_) +* PR `#7263 `_: Updated issue URL for error messages #7261 (`DeviousLab `_) +* PR `#7265 `_: Fix linspace to use np.divide and clamp to stop. (`stuartarchibald `_) +* PR `#7266 `_: CUDA: Skip multi-GPU copy test with peer access disabled (`Graham Markall `_) +* PR `#7267 `_: Fix #7258. Bug in SROA optimization (`Siu Kwan Lam `_) +* PR `#7271 `_: Update 3rd party license text. (`stuartarchibald `_) +* PR `#7272 `_: Allow annotations in njit-ed functions (`LunarLanding `_) +* PR `#7273 `_: Update CHANGE_LOG for 0.54.0rc3. (`stuartarchibald `_) +* PR `#7283 `_: Added NPM to Glossary and linked to mentions (`Nihal Shetty `_) +* PR `#7285 `_: CUDA: Fix OOB in test_kernel_arg (`Graham Markall `_) +* PR `#7288 `_: Handle cval as a np attr in stencil generation. (`stuartarchibald `_) +* PR `#7294 `_: Continuation of PR #7280, fixing lifetime of TBB task_scheduler_handle (`Sergey Pokhodenko `_ `stuartarchibald `_) +* PR `#7296 `_: Fix generator lowering not casting to the actual yielded type (`Siu Kwan Lam `_) +* PR `#7298 `_: Use CBC to pin GCC to 7 on most linux and 9 on aarch64. (`stuartarchibald `_) +* PR `#7304 `_: Continue PR#3655: add support for np.average (`Hadia Ahmed `_ `slnguyen `_) +* PR `#7307 `_: Prevent mutation of arrays in global tuples. (`stuartarchibald `_) +* PR `#7309 `_: Update MapConstraint to handle type coercion for typed.Dict correctly. (`stuartarchibald `_) +* PR `#7312 `_: Fix #7302. Workaround missing pthread problem on ppc64le (`Siu Kwan Lam `_) +* PR `#7315 `_: Link ELF obj as DSO for radare2 disassembly CFG (`stuartarchibald `_) +* PR `#7316 `_: Use float64 for consistent typing in heapq tests. (`stuartarchibald `_) +* PR `#7317 `_: In TBB tsh test switch os.fork for mp fork ctx (`stuartarchibald `_) +* PR `#7319 `_: Update CHANGE_LOG for 0.54.0 final. (`stuartarchibald `_) +* PR `#7329 `_: Improve documentation in reference to CUDA local memory (`Sterling Baird `_) +* PR `#7330 `_: Cuda matmul docs (`Sterling Baird `_) +* PR `#7340 `_: Add size_t and ssize_t types (`Bruce Merry `_) +* PR `#7345 `_: Add check for ipykernel file in IPython cache locator (`Sahil Gupta `_) +* PR `#7347 `_: fix:updated url for error report and feature rquest using issue template (`DEBARGHA SAHA `_) +* PR `#7349 `_: Allow arbitrary walk-back in reduction nodes to find inplace_binop. (`Todd A. Anderson `_) +* PR `#7359 `_: Extend support for nested arrays inside numpy records (`Graham Markall `_ `luk-f-a `_) +* PR `#7375 `_: CUDA: Run doctests as part of numba.cuda.tests and fix test_cg (`Graham Markall `_) +* PR `#7395 `_: Fix #7394 and #6550 & Added test & improved error message (`MegaIng `_) +* PR `#7397 `_: Add option to catch only Numba `numba.core.errors` derived exceptions. (`stuartarchibald `_) +* PR `#7398 `_: Add support for arrayanalysis of tuple args. (`Todd A. Anderson `_) +* PR `#7403 `_: Fix for issue 7402: implement missing numpy ufunc interface (`Guilherme Leobas `_) +* PR `#7404 `_: fix typo in literal_unroll docs (`esc `_) +* PR `#7419 `_: insert missing backtick in comment (`esc `_) +* PR `#7422 `_: Update Omitted Type to use Hashable Values as Keys for Caching (`Nick Riasanovsky `_) +* PR `#7429 `_: Update CHANGE_LOG for 0.54.1 (`stuartarchibald `_) +* PR `#7432 `_: add github release task to checklist (`esc `_) +* PR `#7440 `_: Refactor TargetConfig naming. (`stuartarchibald `_) +* PR `#7441 `_: Permit any string as a key in literalstrkeydict type. (`stuartarchibald `_) +* PR `#7442 `_: Add some diagnostics to SVML test failures. (`stuartarchibald `_) +* PR `#7443 `_: Refactor template selection logic for targets. (`stuartarchibald `_) +* PR `#7444 `_: use correct variable name in closure (`esc `_) +* PR `#7447 `_: cleanup Numba metadata (`esc `_) +* PR `#7453 `_: CUDA: Provide stream in async_done result (`Graham Markall `_) +* PR `#7456 `_: Fix invalid codegen for #7451. (`stuartarchibald `_) +* PR `#7457 `_: Factor out target registry selection logic (`stuartarchibald `_) +* PR `#7459 `_: Include compiler flags in symbol mangling (`Siu Kwan Lam `_) +* PR `#7460 `_: Add FP16 support for CUDA (`Michael Collison `_ `Graham Markall `_) +* PR `#7461 `_: Support NVIDIA's CUDA Python bindings (`Graham Markall `_) +* PR `#7465 `_: Update changelog for 0.54.1 release (`Siu Kwan Lam `_) +* PR `#7477 `_: Fix unicode operator.eq handling of Optional types. (`stuartarchibald `_) +* PR `#7479 `_: CUDA: Print format string and warn for > 32 print() args (`Graham Markall `_) +* PR `#7483 `_: NumPy 1.21 support (`Sebastian Berg `_ `stuartarchibald `_) +* PR `#7484 `_: Fixed outgoing link to nvidia documentation. (`Dhruv Patel `_) +* PR `#7493 `_: Consolidate TLS stacks in target configuration (`Siu Kwan Lam `_) +* PR `#7496 `_: CUDA: Use a single dispatcher class for all kinds of functions (`Graham Markall `_) +* PR `#7498 `_: refactor with-detection logic (`stuartarchibald `_ `esc `_) +* PR `#7499 `_: Add build scripts for CUDA testing on gpuCI (`Charles Blackmon-Luca `_ `Graham Markall `_) +* PR `#7500 `_: Update parallel.rst (`Julius Bier Kirkegaard `_) +* PR `#7506 `_: Enhance Flags mangling/demangling (`Siu Kwan Lam `_) +* PR `#7514 `_: Fixup cuda debuginfo emission for 7177 (`Siu Kwan Lam `_) +* PR `#7525 `_: Make sure` demangle()` returns `str` type. (`Siu Kwan Lam `_) +* PR `#7538 `_: Fix `@overload_glue` performance regression. (`stuartarchibald `_) +* PR `#7539 `_: Fix str decode issue from merge #7525/#7506 (`stuartarchibald `_) +* PR `#7546 `_: Fix handling of missing const key in LiteralStrKeyDict (`Siu Kwan Lam `_ `stuartarchibald `_) +* PR `#7547 `_: Remove 32bit linux scipy installation. (`stuartarchibald `_) +* PR `#7548 `_: Correct evaluation order in assert statement (`Graham Markall `_) +* PR `#7552 `_: Prepend the inlined function name to inlined variables. (`stuartarchibald `_) +* PR `#7557 `_: Python3.10 v2 (`stuartarchibald `_ `esc `_) +* PR `#7560 `_: Refactor with detection py310 (`Siu Kwan Lam `_ `esc `_) +* PR `#7561 `_: fix a typo (`Kolen Cheung `_) +* PR `#7567 `_: Update docs to note meetings are public. (`stuartarchibald `_) +* PR `#7570 `_: Update the docs and error message for errors when importing Numba. (`stuartarchibald `_) +* PR `#7580 `_: Fix #7507. catch `NotImplementedError` in `.get_function()` (`Siu Kwan Lam `_) +* PR `#7581 `_: Add support for casting from int enums (`Michael Collison `_) +* PR `#7583 `_: Make numba.types.Optional __str__ less verbose. (`stuartarchibald `_) +* PR `#7588 `_: Fix casting of start/stop in linspace (`stuartarchibald `_) +* PR `#7591 `_: Remove deprecations (`Graham Markall `_) +* PR `#7596 `_: Fix max symbol match length for r2 (`stuartarchibald `_) +* PR `#7597 `_: Update gdb docs for new DWARF enhancements. (`stuartarchibald `_) +* PR `#7603 `_: Fix list.insert() for refcounted values (`Ehsan Totoni `_) +* PR `#7605 `_: Fix TBB 2021 DSO names on OSX/Win and make TBB reporting consistent (`stuartarchibald `_) +* PR `#7606 `_: Ensure a prescribed threading layer can load in CI. (`stuartarchibald `_) +* PR `#7610 `_: Fix #7609. Type should not be mutated. (`Siu Kwan Lam `_) +* PR `#7618 `_: Fix the doc build: docutils 0.18 not compatible with pinned sphinx (`stuartarchibald `_) +* PR `#7626 `_: Fix issues with package dependencies. (`stuartarchibald `_ `esc `_) +* PR `#7627 `_: PR 7321 continued (`stuartarchibald `_ `Eric Wieser `_) +* PR `#7628 `_: Move to using windows-2019 images in Azure (`stuartarchibald `_) +* PR `#7632 `_: Capture output in CUDA matmul doctest (`Graham Markall `_) +* PR `#7636 `_: Copy prange loop header to after the parfor. (`Todd A. Anderson `_) +* PR `#7637 `_: Increase the timeout on the SVML tests for loaded machines. (`stuartarchibald `_) +* PR `#7645 `_: In debuginfo, do not add noinline to functions marked alwaysinline (`stuartarchibald `_) +* PR `#7650 `_: Move Azure builds to OSX 10.15 (`stuartarchibald `_ `esc `_ `Siu Kwan Lam `_) + +Authors: + +* `Bruce Merry `_ +* `Charles Blackmon-Luca `_ +* `DeviousLab `_ +* `Dhruv Patel `_ +* `Todd A. Anderson `_ +* `Ehsan Totoni `_ +* `Eric Wieser `_ +* `esc `_ +* `Graham Markall `_ +* `Guilherme Leobas `_ +* `Guoqiang QI `_ +* `Hadia Ahmed `_ +* `Kolen Cheung `_ +* `Itamar Turner-Trauring `_ +* `Julius Bier Kirkegaard `_ +* `Kenichi Maehashi `_ +* `Alexey Kozlov `_ +* `luk-f-a `_ +* `LunarLanding `_ +* `MegaIng `_ +* `Nihal Shetty `_ +* `Nick Riasanovsky `_ +* `Sergey Pokhodenko `_ +* `Sahil Gupta `_ +* `Sebastian Berg `_ +* `Sterling Baird `_ +* `Srinath Kailasa `_ +* `Siu Kwan Lam `_ +* `slnguyen `_ +* `DEBARGHA SAHA `_ +* `stuartarchibald `_ +* `Michael Collison `_ +* `ysheffer `_ + +Version 0.54.1 (7 October, 2021) +-------------------------------- + +This is a bugfix release for 0.54.0. It fixes a regression in structured array +type handling, a potential leak on initialization failure in the CUDA target, a +regression caused by Numba's vendored cloudpickle module resetting dynamic +classes and a few minor testing/infrastructure related problems. + +* PR `#7348 `_: test_inspect_cli: Decode exception with default (utf-8) codec (`Graham Markall `_) +* PR `#7360 `_: CUDA: Fix potential leaks when initialization fails (`Graham Markall `_) +* PR `#7386 `_: Ensure the NRT is initialized prior to use in external NRT tests. (`stuartarchibald `_) +* PR `#7388 `_: Patch cloudpickle to not reset dynamic class each time it is unpickled (`Siu Kwan Lam `_) +* PR `#7393 `_: skip azure pipeline test if file not present (`esc `_) +* PR `#7428 `_: Fix regression #7355: cannot set items in structured array data types (`Siu Kwan Lam `_) + +Authors: + +* `esc `_ +* `Graham Markall `_ +* `Siu Kwan Lam `_ +* `stuartarchibald `_ + + +Version 0.54.0 (19 August, 2021) +-------------------------------- + +This release includes a significant number of new features, important +refactoring, critical bug fixes and a number of dependency upgrades. + +Python language support enhancements: + +* Basic support for ``f-strings``. +* ``dict`` comprehensions are now supported. +* The ``sum`` built-in function is implemented. + +NumPy features/enhancements: + +The following functions are now supported: + + * ``np.clip`` + * ``np.iscomplex`` + * ``np.iscomplexobj`` + * ``np.isneginf`` + * ``np.isposinf`` + * ``np.isreal`` + * ``np.isrealobj`` + * ``np.isscalar`` + * ``np.random.dirichlet`` + * ``np.rot90`` + * ``np.swapaxes`` + +Also ``np.argmax`` has gained support for the ``axis`` keyword argument and it's +now possible to use ``0d`` NumPy arrays as scalars in ``__setitem__`` calls. + +Internal changes: + +* Debugging support through DWARF has been fixed and enhanced. +* Numba now optimises the way in which locals are emitted to help reduce time + spent in LLVM's SROA passes. + +CUDA target changes: + +* Support for emitting ``lineinfo`` to be consumed by profiling tools such as + Nsight Compute +* Improved fastmath code generation for various trig, division, and other + functions +* Faster compilation using lazy addition of libdevice to compiled units +* Support for IPC on Windows +* Support for passing tuples to CUDA ufuncs +* Performance warnings: + + * When making implicit copies by calling a kernel on arrays in host memory + * When occupancy is poor due to kernel or ufunc/gufunc configuration + +* Support for implementing warp-aggregated intrinsics: + + * Using support for more CUDA functions: ``activemask()``, ``lanemask_lt()`` + * The ``ffs()`` function now works correctly! + +* Support for ``@overload`` in the CUDA target + +Intel kindly sponsored research and development that lead to a number of new +features and internal support changes: + +* Dispatchers can now be retargetted to a new target via a user defined context + manager. +* Support for custom NumPy array subclasses has been added (including an + overloadable memory allocator). +* An inheritance based model for targets that permits targets to share + ``@overload`` implementations. +* Per function compiler flags with inheritance behaviours. +* The extension API now has support for overloading class methods via the + ``@overload_classmethod`` decorator. + +Deprecations: + +* The ``ROCm`` target (for AMD ROC GPUs) has been moved to an "unmaintained" + status and a seperate repository stub has been created for it at: + https://github.com/numba/numba-rocm + +CUDA target deprecations and breaking changes: + +* Relaxed strides checking is now the default when computing the contiguity of + device arrays. +* The ``inspect_ptx()`` method is deprecated. For use cases that obtain PTX for + further compilation outside of Numba, use ``compile_ptx()`` instead. +* Eager compilation of device functions (the case when ``device=True`` and a + signature is provided) is deprecated. + +Version support/dependency changes: + +* LLVM 11 is now supported on all platforms via llvmlite. +* The minimum supported Python version is raised to 3.7. +* NumPy version 1.20 is supported. +* The minimum supported NumPy version is raised to 1.17 for runtime (compilation + however remains compatible with NumPy 1.11). +* Vendor `cloudpickle `_ `v1.6.0` -- + now used for all ``pickle`` operations. +* TBB >= 2021 is now supported and all prior versions are unsupported (not + easily possible to maintain the ABI breaking changes). + +Pull-Requests: + +* PR `#4516 `_: Make setitem accept 0d np-arrays (`Guilherme Leobas `_) +* PR `#4610 `_: Implement np.is* functions (`Guilherme Leobas `_) +* PR `#5984 `_: Handle idx and size unification in wrap_index manually. (`Todd A. Anderson `_) +* PR `#6468 `_: Access ``replace_functions_map`` via PreParforPass instance (`Sergey Pokhodenko `_ `Reazul Hoque `_) +* PR `#6469 `_: Add address space in pointer type (`Sergey Pokhodenko `_ `Reazul Hoque `_) +* PR `#6608 `_: Support f-strings for common cases (`Ehsan Totoni `_) +* PR `#6619 `_: Improved fastmath code generation for trig, log, and exp/pow. (`Graham Markall `_ `Michael Collison `_) +* PR `#6681 `_: Explicitly catch ``with..as`` and raise error. (`stuartarchibald `_) +* PR `#6689 `_: Fix setup.py build command detection (`Hannes Pahl `_) +* PR `#6695 `_: Enable negative indexing for cuda atomic operations (`Ashutosh Varma `_) +* PR `#6696 `_: flake8: made more files flake8 compliant (`Ashutosh Varma `_) +* PR `#6698 `_: Fix #6697: Wrong dtype when using np.asarray on DeviceNDArray (`Ashutosh Varma `_) +* PR `#6700 `_: Add UUID to CUDA devices (`Graham Markall `_) +* PR `#6709 `_: Block matplotlib in test examples (`Graham Markall `_) +* PR `#6718 `_: doc: fix typo in rewrites.rst (extra iterates) (`Alexander-Makaryev `_) +* PR `#6720 `_: Faster compile (`Siu Kwan Lam `_) +* PR `#6730 `_: Fix Typeguard error (`Graham Markall `_) +* PR `#6731 `_: Add CUDA-specific pipeline (`Graham Markall `_) +* PR `#6735 `_: CUDA: Don't parse IR for modules with llvmlite (`Graham Markall `_) +* PR `#6736 `_: Support for dict comprehension (`stuartarchibald `_) +* PR `#6742 `_: Do not add overload function definitions to index. (`stuartarchibald `_) +* PR `#6750 `_: Bump to llvmlite 0.37 series (`Siu Kwan Lam `_) +* PR `#6751 `_: Suppress typeguard warnings that affect testing. (`Siu Kwan Lam `_) +* PR `#6753 `_: The check for internal types in RewriteArrayExprs (`Alexander-Makaryev `_) +* PR `#6755 `_: install llvmlite from numba/label/dev (`esc `_) +* PR `#6758 `_: patch to compile _devicearray.cpp with c++11 (`esc `_) +* PR `#6760 `_: Fix scheduler bug where it rounds to 0 divisions for a chunk. (`Todd A. Anderson `_) +* PR `#6762 `_: Glue wrappers to create @overload from split typing and lowering. (`stuartarchibald `_ `Siu Kwan Lam `_) +* PR `#6766 `_: Fix DeviceNDArray null shape issue (`Michael Collison `_) +* PR `#6769 `_: CUDA: Replace ``CachedPTX`` and ``CachedCUFunction`` with ``CUDACodeLibrary`` functionality (`Graham Markall `_) +* PR `#6776 `_: Fix issue with TBB interface causing warnings and parfors counting them (`stuartarchibald `_) +* PR `#6779 `_: Fix wrap_index type unification. (`Todd A. Anderson `_) +* PR `#6786 `_: Fix gufunc kwargs support (`Siu Kwan Lam `_) +* PR `#6788 `_: Add support for fastmath 32-bit floating point divide (`Michael Collison `_) +* PR `#6789 `_: Fix warnings struct ref typeguard (`stuartarchibald `_ `Siu Kwan Lam `_ `esc `_) +* PR `#6794 `_: refactor and move create_temp_module into numba.tests.support (`Alexander-Makaryev `_) +* PR `#6795 `_: CUDA: Lazily add libdevice to compilation units (`Graham Markall `_) +* PR `#6798 `_: CUDA: Add optional Driver API argument logging (`Graham Markall `_) +* PR `#6799 `_: Print Numba and llvmlite versions in sysinfo (`Graham Markall `_) +* PR `#6800 `_: Make a common standard API for querying ufunc impl (`Sergey Pokhodenko `_ `Siu Kwan Lam `_) +* PR `#6801 `_: ParallelAccelerator no long will convert StaticSetItem to SetItem because record arrays require StaticSetItems. (`Todd A. Anderson `_) +* PR `#6802 `_: Add lineinfo flag to PTX and SASS compilation (`Graham Markall `_ `Max Katz `_) +* PR `#6804 `_: added runtime version to ``numba -s`` (`Kalyan `_) +* PR `#6808 `_: #3468 continued: Add support for ``np.clip`` (`Graham Markall `_ `Aaron Russell Voelker `_) +* PR `#6809 `_: #3203 additional info in cuda detect (`Kalyan `_) +* PR `#6810 `_: Fix tiny formatting error in ROC kernel docs (`Felix Divo `_) +* PR `#6811 `_: CUDA: Remove test of runtime being a supported version (`Graham Markall `_) +* PR `#6813 `_: Mostly CUDA: Replace llvmpy API usage with llvmlite APIs (`Graham Markall `_) +* PR `#6814 `_: Improving context stack (`stuartarchibald `_ `Siu Kwan Lam `_) +* PR `#6818 `_: CUDA: Support IPC on Windows (`Graham Markall `_) +* PR `#6822 `_: Add support for np.rot90 (`stuartarchibald `_ `Daniel Nagel `_) +* PR `#6829 `_: Fix accuracy of np.arange and np.linspace (`stuartarchibald `_) +* PR `#6830 `_: CUDA: Use relaxed strides checking to compute contiguity (`Graham Markall `_) +* PR `#6833 `_: Raise TypeError exception if numpy array is cast to scalar (`Michael Collison `_) +* PR `#6834 `_: Remove illegal "debug" kw argument (`Shaun Cutts `_) +* PR `#6836 `_: CUDA: Documentation updates (`Graham Markall `_) +* PR `#6840 `_: CUDA: Remove items deprecated in 0.53 + simulator test fixes (`Graham Markall `_) +* PR `#6841 `_: CUDA: Fix source location on kernel entry and enable breakpoints to be set on kernels by mangled name (`Graham Markall `_) +* PR `#6843 `_: cross-referenced Array type in docs (`Kalyan `_) +* PR `#6844 `_: CUDA: Remove NUMBAPRO env var warnings, envvars.py + other small tidy-ups (`Graham Markall `_) +* PR `#6848 `_: Ignore .ycm_extra_conf.py (`Graham Markall `_) +* PR `#6849 `_: Add __hash__ for IntEnum (`Hannes Pahl `_) +* PR `#6850 `_: Fix up more internal warnings (`stuartarchibald `_) +* PR `#6854 `_: PR 6096 continued (`stuartarchibald `_ `Ivan Butygin `_) +* PR `#6861 `_: updated reference to hsa with roc (`Kalyan `_) +* PR `#6867 `_: Update changelog for 0.53.1 (`esc `_) +* PR `#6869 `_: Implement builtin sum() (`stuartarchibald `_) +* PR `#6870 `_: Add support for dispatcher retargeting using with-context (`stuartarchibald `_ `Siu Kwan Lam `_) +* PR `#6871 `_: Force text-align:left when using Annotate (`Guilherme Leobas `_) +* PR `#6873 `_: docs: Update reference to @jitclass location (`David Nadlinger `_) +* PR `#6876 `_: Add trailing slashes to dir paths in CODEOWNERS (`Graham Markall `_) +* PR `#6877 `_: Add doc for recent target extension features (`Siu Kwan Lam `_) +* PR `#6878 `_: CUDA: Support passing tuples to ufuncs (`Graham Markall `_) +* PR `#6879 `_: CUDA: NumPy and string dtypes for local and shared arrays (`Graham Markall `_) +* PR `#6880 `_: Add attribute lower_extension to CPUContext (`Reazul Hoque `_) +* PR `#6883 `_: Add support of np.swapaxes #4074 (`Daniel Nagel `_) +* PR `#6885 `_: CUDA: Explicitly specify objmode + looplifting for jit functions in cuda.random (`Graham Markall `_) +* PR `#6886 `_: CUDA: Fix parallel testing for all testsuite submodules (`Graham Markall `_) +* PR `#6888 `_: Get overload to consider compiler flags in cache lookup (`Siu Kwan Lam `_) +* PR `#6889 `_: Address guvectorize too slow for cuda target (`Michael Collison `_) +* PR `#6890 `_: fixes #6884 (`Kalyan `_) +* PR `#6898 `_: Work on overloading by hardware target. (`stuartarchibald `_) +* PR `#6911 `_: CUDA: Add support for activemask(), lanemask_lt(), and nanosleep() (`Graham Markall `_) +* PR `#6912 `_: Prevent use of varargs in closure calls. (`stuartarchibald `_) +* PR `#6913 `_: Add runtests option to gitdiff on the common ancestor (`Siu Kwan Lam `_) +* PR `#6915 `_: Update _Intrinsic for sphinx to capture the inner docstring (`Guilherme Leobas `_) +* PR `#6917 `_: Add type conversion for StringLiteral to unicode_type and test. (`stuartarchibald `_) +* PR `#6918 `_: Start section on commonly encounted unsupported parfors code. (`stuartarchibald `_) +* PR `#6924 `_: CUDA: Fix ``ffs`` (`Graham Markall `_) +* PR `#6928 `_: Add support for axis keyword arg to numpy.argmax() (`stuartarchibald `_ `Itamar Turner-Trauring `_) +* PR `#6929 `_: Fix CI failure when gitpython is missing. (`Siu Kwan Lam `_) +* PR `#6935 `_: fixes broken link in numba-runtime.rst (`Kalyan `_) +* PR `#6936 `_: CUDA: Implement support for PTDS globally (`Graham Markall `_) +* PR `#6937 `_: Fix memory leak in bytes boxing (`stuartarchibald `_) +* PR `#6940 `_: Fix function resolution for intrinsics across hardware. (`stuartarchibald `_) +* PR `#6941 `_: ABC the target descriptor and make consistent throughout. (`stuartarchibald `_) +* PR `#6944 `_: CUDA: Support for ``@overload`` (`Graham Markall `_) +* PR `#6945 `_: Fix issue with array analysis tests needing scipy. (`stuartarchibald `_) +* PR `#6948 `_: Refactor registry init. (`stuartarchibald `_ `Graham Markall `_ `Siu Kwan Lam `_) +* PR `#6953 `_: CUDA: Fix and deprecate ``inspect_ptx()``, fix NVVM option setup for device functions (`Graham Markall `_) +* PR `#6958 `_: Inconsistent behavior of reshape between numpy and numba/cuda device array (`Lauren Arnett `_) +* PR `#6961 `_: Update overload glue to deal with typing_key (`stuartarchibald `_) +* PR `#6964 `_: Move minimum supported Python version to 3.7 (`stuartarchibald `_) +* PR `#6966 `_: Fix issue with TBB test detecting forks from incorrect state. (`stuartarchibald `_) +* PR `#6971 `_: Fix CUDA ``@intrinsic`` use (`stuartarchibald `_) +* PR `#6977 `_: Vendor cloudpickle (`Siu Kwan Lam `_) +* PR `#6978 `_: Implement operator.contains for empty Tuples (`Brandon T. Willard `_) +* PR `#6981 `_: Fix LLVM IR parsing error on use of ``np.bool_`` in globals (`stuartarchibald `_) +* PR `#6983 `_: Support Optional types in ufuncs. (`stuartarchibald `_) +* PR `#6985 `_: Implement static set/get items on records with integer index (`stuartarchibald `_) +* PR `#6986 `_: document release checklist (`esc `_) +* PR `#6989 `_: update threading docs for function loading (`esc `_) +* PR `#6990 `_: Refactor hardware extension API to refer to "target" instead. (`stuartarchibald `_) +* PR `#6991 `_: Move ROCm target status to "unmaintained". (`stuartarchibald `_) +* PR `#6995 `_: Resolve issue where nan was being assigned to int type numpy array (`Michael Collison `_) +* PR `#6996 `_: Add constant lowering support for `SliceType`s (`Brandon T. Willard `_) +* PR `#6997 `_: CUDA: Remove catch of NotImplementedError in target.py (`Graham Markall `_) +* PR `#6999 `_: Fix errors introduced by the cloudpickle patch (`Siu Kwan Lam `_) +* PR `#7003 `_: More mainline fixes (`stuartarchibald `_ `Graham Markall `_ `Siu Kwan Lam `_) +* PR `#7004 `_: Test extending the CUDA target (`Graham Markall `_) +* PR `#7007 `_: Made stencil compilation not fail for arrays of conflicting types. (`MegaIng `_) +* PR `#7008 `_: Added support for np.random.dirichlet with all size arguments (`Rishi Kulkarni `_) +* PR `#7016 `_: Docs: Add DALI to list of CAI-supporting libraries (`Graham Markall `_) +* PR `#7018 `_: Remove cu{blas,sparse,rand,fft} from library checks (`Graham Markall `_) +* PR `#7019 `_: Support NumPy 1.20 (`stuartarchibald `_) +* PR `#7020 `_: Fix #7017. Adds util class PickleCallableByPath (`Siu Kwan Lam `_) +* PR `#7024 `_: fixed llvmir usage in create_module method (`stuartarchibald `_ `Kalyan `_) +* PR `#7027 `_: Fix nrt debug print (`MegaIng `_) +* PR `#7031 `_: Fix inliner to use a single scope for all blocks (`Alexey Kozlov `_ `Siu Kwan Lam `_) +* PR `#7040 `_: Add Github action to mark issues as stale (`Graham Markall `_) +* PR `#7044 `_: Fixes for LLVM 11 (`stuartarchibald `_) +* PR `#7049 `_: Make NumPy random module use @overload_glue (`stuartarchibald `_) +* PR `#7050 `_: Add overload_classmethod (`Siu Kwan Lam `_) +* PR `#7052 `_: Fix string support in CUDA target (`Graham Markall `_) +* PR `#7056 `_: Change prange conversion approach to reuse header block. (`Todd A. Anderson `_) +* PR `#7061 `_: Add ndarray allocator classmethod (`stuartarchibald `_ `Siu Kwan Lam `_) +* PR `#7064 `_: Testhound/host array performance warning (`Michael Collison `_) +* PR `#7066 `_: Fix #7065: Add expected exception messages for NumPy 1.20 to tests (`Graham Markall `_) +* PR `#7068 `_: Enhancing docs about PRNG seeding (`Jérome Eertmans `_) +* PR `#7070 `_: Improve the issue templates and pull request template. (`Guoqiang QI `_) +* PR `#7080 `_: Fix ``__eq__`` for Flags and cpu_options classes (`Siu Kwan Lam `_) +* PR `#7087 `_: Add note to docs about zero-initialization of variables. (`stuartarchibald `_) +* PR `#7088 `_: Initialize NUMBA_DEFAULT_NUM_THREADS with a batch scheduler aware value (`Thomas VINCENT `_) +* PR `#7100 `_: Replace deprecated call to cuDeviceComputeCapability (`Graham Markall `_) +* PR `#7113 `_: Temporarily disable debug env export. (`stuartarchibald `_) +* PR `#7114 `_: CUDA: Deprecate eager compilation of device functions (`Graham Markall `_) +* PR `#7116 `_: Fix various issues with dwarf emission: (`stuartarchibald `_ `vlad-perevezentsev `_) +* PR `#7118 `_: Remove print to stdout (`stuartarchibald `_) +* PR `#7121 `_: Continue work on numpy subclasses (`Todd A. Anderson `_ `Siu Kwan Lam `_) +* PR `#7122 `_: Rtd/sphinx compat (`esc `_) +* PR `#7134 `_: Move minimum LLVM version to 11. (`stuartarchibald `_) +* PR `#7137 `_: skip pycc test on Python 3.7 + macOS because of distutils issue (`esc `_) +* PR `#7138 `_: Update the Azure default linux image to Ubuntu 18.04 (`stuartarchibald `_) +* PR `#7141 `_: Require llvmlite 0.37 as minimum supported. (`stuartarchibald `_) +* PR `#7143 `_: Update version checks in __init__ for np 1.17 (`stuartarchibald `_) +* PR `#7145 `_: Fix mainline (`stuartarchibald `_) +* PR `#7146 `_: Fix ``inline_closurecall`` may not be imported (`Siu Kwan Lam `_) +* PR `#7147 `_: Revert "Workaround gitpython 3.1.18 dependency issue" (`stuartarchibald `_) +* PR `#7149 `_: Fix issue in bytecode analysis where target and next are same. (`stuartarchibald `_) +* PR `#7152 `_: Fix iterators in CUDA (`Graham Markall `_) +* PR `#7156 `_: Fix ``ir_utils._max_label`` being updated incorrectly (`Siu Kwan Lam `_) +* PR `#7160 `_: Split parfors tests (`stuartarchibald `_) +* PR `#7161 `_: Update README for 0.54 (`stuartarchibald `_) +* PR `#7162 `_: CUDA: Fix linkage of device functions when compiling for debug (`Graham Markall `_) +* PR `#7163 `_: Split legalization pass to consider IR and features separately. (`stuartarchibald `_) +* PR `#7165 `_: Fix use of np.clip where out is not provided. (`stuartarchibald `_) +* PR `#7189 `_: CUDA: Skip IPC tests on ARM (`Graham Markall `_) +* PR `#7190 `_: CUDA: Fix test_pinned on Jetson (`Graham Markall `_) +* PR `#7192 `_: Fix missing import in array.argsort impl and add more tests. (`stuartarchibald `_) +* PR `#7196 `_: Fixes for lineinfo emission. (`stuartarchibald `_) +* PR `#7203 `_: remove duplicate changelog entries (`esc `_) +* PR `#7209 `_: Clamp numpy (`esc `_) +* PR `#7216 `_: Update CHANGE_LOG for 0.54.0rc2. (`stuartarchibald `_) +* PR `#7223 `_: Replace assertion errors on IR assumption violation (`Siu Kwan Lam `_) +* PR `#7230 `_: PR #7171 bugfix only (`Todd A. Anderson `_ `stuartarchibald `_) +* PR `#7236 `_: CUDA: Skip managed alloc tests on ARM (`Graham Markall `_) +* PR `#7267 `_: Fix #7258. Bug in SROA optimization (`Siu Kwan Lam `_) +* PR `#7271 `_: Update 3rd party license text. (`stuartarchibald `_) +* PR `#7272 `_: Allow annotations in njit-ed functions (`LunarLanding `_) +* PR `#7273 `_: Update CHANGE_LOG for 0.54.0rc3. (`stuartarchibald `_) +* PR `#7285 `_: CUDA: Fix OOB in test_kernel_arg (`Graham Markall `_) +* PR `#7294 `_: Continuation of PR #7280, fixing lifetime of TBB task_scheduler_handle (`Sergey Pokhodenko `_ `stuartarchibald `_) +* PR `#7298 `_: Use CBC to pin GCC to 7 on most linux and 9 on aarch64. (`stuartarchibald `_) +* PR `#7312 `_: Fix #7302. Workaround missing pthread problem on ppc64le (`Siu Kwan Lam `_) +* PR `#7317 `_: In TBB tsh test switch os.fork for mp fork ctx (`stuartarchibald `_) +* PR `#7319 `_: Update CHANGE_LOG for 0.54.0 final. (`stuartarchibald `_) + +Authors: + +* `Alexander-Makaryev `_ +* `Todd A. Anderson `_ +* `Hannes Pahl `_ +* `Ivan Butygin `_ +* `MegaIng `_ +* `Sergey Pokhodenko `_ +* `Aaron Russell Voelker `_ +* `Ashutosh Varma `_ +* `Ben Greiner `_ +* `Brandon T. Willard `_ +* `Daniel Nagel `_ +* `David Nadlinger `_ +* `Ehsan Totoni `_ +* `esc `_ +* `Felix Divo `_ +* `Graham Markall `_ +* `Guilherme Leobas `_ +* `Guoqiang QI `_ +* `Itamar Turner-Trauring `_ +* `Jérome Eertmans `_ +* `Alexey Kozlov `_ +* `Lauren Arnett `_ +* `LunarLanding `_ +* `Max Katz `_ +* `Kalyan `_ +* `Reazul Hoque `_ +* `Rishi Kulkarni `_ +* `Shaun Cutts `_ +* `Siu Kwan Lam `_ +* `stuartarchibald `_ +* `Thomas VINCENT `_ +* `Michael Collison `_ +* `vlad-perevezentsev `_ + + +Version 0.53.1 (25 March, 2021) +------------------------------- + +This is a bugfix release for 0.53.0. It contains the following four +pull-requests which fix two critical regressions and two build failures +reported by the openSuSe team: + +* PR #6826 Fix regression on gufunc serialization +* PR #6828 Fix regression in CUDA: Set stream in mapped and managed array + device_setup +* PR #6837 Ignore warnings from packaging module when testing import behaviour. +* PR #6851 set non-reported llvm timing values to 0.0 + +Authors: + +* Ben Greiner +* Graham Markall +* Siu Kwan Lam +* Stuart Archibald + +Version 0.53.0 (11 March, 2021) +------------------------------- + +This release continues to add new features, bug fixes and stability improvements +to Numba. + +Highlights of core changes: + +* Support for Python 3.9 (Stuart Archibald). +* Function sub-typing (Lucio Fernandez-Arjona). +* Initial support for dynamic ``gufuncs`` (i.e. from ``@guvectorize``) + (Guilherme Leobas). +* Parallel Accelerator (``@njit(parallel=True)`` now supports Fortran ordered + arrays (Todd A. Anderson and Siu Kwan Lam). + +Intel also kindly sponsored research and development that lead to two new +features: + + * Exposing LLVM compilation pass timings for diagnostic purposes (Siu Kwan + Lam). + * An event system for broadcasting compiler events (Siu Kwan Lam). + +Highlights of changes for the CUDA target: + +* CUDA 11.2 onwards (versions of the toolkit using NVVM IR 1.6 / LLVM IR 7.0.1) + are now supported (Graham Markall). +* A fast cube root function is added (Michael Collison). +* Support for atomic ``xor``, increment, decrement, exchange, are added, and + compare-and-swap is extended to support 64-bit integers (Michael Collison). +* Addition of ``cuda.is_supported_version()`` to check if the CUDA runtime + version is supported (Graham Markall). +* The CUDA dispatcher now shares infrastructure with the CPU dispatcher, + improving launch times for lazily-compiled kernels (Graham Markall). +* The CUDA Array Interface is updated to version 3, with support for streams + added (Graham Markall). +* Tuples and ``namedtuples`` can now be passed to kernels (Graham Markall). +* Initial support for Cooperative Groups is added, with support for Grid Groups + and Grid Sync (Graham Markall and Nick White). +* Support for ``math.log2`` and ``math.remainder`` is added (Guilherme Leobas). + +General deprecation notices: + +* There are no new general deprecations. + +CUDA target deprecation notices: + +* CUDA support on macOS is deprecated with this release (it still works, it is + just unsupported). +* The ``argtypes``, ``restypes``, and ``bind`` keyword arguments to the + ``cuda.jit`` decorator, deprecated since 0.51.0, are removed +* The ``Device.COMPUTE_CAPABILITY`` property, deprecated since 2014, has been + removed (use ``compute_capability`` instead). +* The ``to_host`` method of device arrays is removed (use ``copy_to_host`` + instead). + +General Enhancements: + +* PR #4769: objmode complex type spelling (Siu Kwan Lam) +* PR #5579: Function subtyping (Lucio Fernandez-Arjona) +* PR #5659: Add support for parfors creating 'F'ortran layout Numpy arrays. + (Todd A. Anderson) +* PR #5936: Improve array analysis for user-defined data types. (Todd A. + Anderson) +* PR #5938: Initial support for dynamic gufuncs (Guilherme Leobas) +* PR #5958: Making typed.List a typing Generic (Lucio Fernandez-Arjona) +* PR #6334: Support attribute access from other modules (Farah Hariri) +* PR #6373: Allow Dispatchers to be cached (Eric Wieser) +* PR #6519: Avoid unnecessary ir.Del generation and removal (Ehsan Totoni) +* PR #6545: Refactoring ParforDiagnostics (Elena Totmenina) +* PR #6560: Add LLVM pass timer (Siu Kwan Lam) +* PR #6573: Improve ``__str__`` for typed.List when invoked from IPython shell + (Amin Sadeghi) +* PR #6575: Avoid temp variable assignments (Ehsan Totoni) +* PR #6578: Add support for numpy ``intersect1d`` and basic test cases + (``@caljrobe``) +* PR #6579: Python 3.9 support. (Stuart Archibald) +* PR #6580: Store partial typing errors in compiler state (Ehsan Totoni) +* PR #6626: A simple event system to broadcast compiler events (Siu Kwan Lam) +* PR #6635: Try to resolve dynamic getitems as static post unroll transform. + (Stuart Archibald) +* PR #6636: Adds llvm_lock event (Siu Kwan Lam) +* PR #6664: Adds tests for PR 5659 (Siu Kwan Lam) +* PR #6680: Allow getattr to work in objmode output type spec (Siu Kwan Lam) + +Fixes: + +* PR #6176: Remove references to deprecated numpy globals (Eric Wieser) +* PR #6374: Use Python 3 style OSError handling (Eric Wieser) +* PR #6402: Fix ``typed.Dict`` and ``typed.List`` crashing on parametrized types + (Andreas Sodeur) +* PR #6403: Add ``types.ListType.key`` (Andreas Sodeur) +* PR #6410: Fixes issue #6386 (Danny Weitekamp) +* PR #6425: Fix unicode join for issue #6405 (Teugea Ioan-Teodor) +* PR #6437: Don't pass reduction variables known in an outer parfor to inner + parfors when analyzing reductions. (Todd A. Anderson) +* PR #6453: Keep original variable names in metadata to improve diagnostics + (Ehsan Totoni) +* PR #6454: FIX: Fixes for literals (Eric Larson) +* PR #6463: Bump llvmlite to 0.36 series (Stuart Archibald) +* PR #6466: Remove the misspelling of finalize_dynamic_globals (Sergey + Pokhodenko) +* PR #6489: Improve the error message for unsupported Buffer in Buffer + situation. (Stuart Archibald) +* PR #6503: Add test to ensure Numba imports without warnings. (Stuart + Archibald) +* PR #6508: Defer requirements to setup.py (Siu Kwan Lam) +* PR #6521: Skip annotated jitclass test if typeguard is running. (Stuart + Archibald) +* PR #6524: Fix typed.List return value (Lucio Fernandez-Arjona) +* PR #6562: Correcting typo in numba sysinfo output (Nick Sutcliffe) +* PR #6574: Run parfor fusion if 2 or more parfors (Ehsan Totoni) +* PR #6582: Fix typed dict error with uninitialized padding bytes (Siu Kwan + Lam) +* PR #6584: Remove jitclass from ``__init__`` ``__all__``. (Stuart Archibald) +* PR #6586: Run closure inlining ahead of branch pruning in case of nonlocal + (Stuart Archibald) +* PR #6591: Fix inlineasm test failure. (Siu Kwan Lam) +* PR #6622: Fix 6534, handle unpack of assign-like tuples. (Stuart Archibald) +* PR #6652: Simplify PR-6334 (Siu Kwan Lam) +* PR #6653: Fix get_numba_envvar (Siu Kwan Lam) +* PR #6654: Fix #6632 support alternative dtype string spellings (Stuart + Archibald) +* PR #6685: Add Python 3.9 to classifiers. (Stuart Archibald) +* PR #6693: patch to compile _devicearray.cpp with c++11 (Valentin Haenel) +* PR #6716: Consider assignment lhs live if used in rhs (Fixes #6715) (Ehsan + Totoni) +* PR #6727: Avoid errors in array analysis for global tuples with non-int + (Ehsan Totoni) +* PR #6733: Fix segfault and errors in #6668 (Siu Kwan Lam) +* PR #6741: Enable SSA in IR inliner (Ehsan Totoni) +* PR #6763: use an alternative constraint for the conda packages (Valentin + Haenel) +* PR #6786: Fix gufunc kwargs support (Siu Kwan Lam) + +CUDA Enhancements/Fixes: + +* PR #5162: Specify synchronization semantics of CUDA Array Interface (Graham + Markall) +* PR #6245: CUDA Cooperative grid groups (Graham Markall and Nick White) +* PR #6333: Remove dead ``_Kernel.__call__`` (Graham Markall) +* PR #6343: CUDA: Add support for passing tuples and namedtuples to kernels + (Graham Markall) +* PR #6349: Refactor Dispatcher to remove unnecessary indirection (Graham + Markall) +* PR #6358: Add log2 and remainder implementations for cuda (Guilherme Leobas) +* PR #6376: Added a fixed seed in test_atomics.py for issue #6370 (Teugea + Ioan-Teodor) +* PR #6377: CUDA: Fix various issues in test suite (Graham Markall) +* PR #6409: Implement cuda atomic xor (Michael Collison) +* PR #6422: CUDA: Remove deprecated items, expect CUDA 11.1 (Graham Markall) +* PR #6427: Remove duplicate repeated definition of gufunc (Amit Kumar) +* PR #6432: CUDA: Use ``_dispatcher.Dispatcher`` as base Dispatcher class + (Graham Markall) +* PR #6447: CUDA: Add get_regs_per_thread method to Dispatcher (Graham Markall) +* PR #6499: CUDA atomic increment, decrement, exchange and compare and swap + (Michael Collison) +* PR #6510: CUDA: Make device array assignment synchronous where necessary + (Graham Markall) +* PR #6517: CUDA: Add NVVM test of all 8-bit characters (Graham Markall) +* PR #6567: Refactor llvm replacement code into separate function (Michael + Collison) +* PR #6642: Testhound/cuda cuberoot (Michael Collison) +* PR #6661: CUDA: Support NVVM70 / CUDA 11.2 (Graham Markall) +* PR #6663: Fix error caused by missing "-static" libraries defined for some + platforms (Siu Kwan Lam) +* PR #6666: CUDA: Add a function to query whether the runtime version is + supported. (Graham Markall) +* PR #6725: CUDA: Fix compile to PTX with debug for CUDA 11.2 (Graham Markall) + +Documentation Updates: + +* PR #5740: Add FAQ entry on how to create a MWR. (Stuart Archibald) +* PR #6346: DOC: add where to get dev builds from to FAQ (Eyal Trabelsi) +* PR #6418: docs: use https for homepage (``@imba-tjd``) +* PR #6430: CUDA docs: Add RNG example with 3D grid and strided loops (Graham + Markall) +* PR #6436: docs: remove typo in Deprecation Notices (Thibault Ballier) +* PR #6440: Add note about performance of typed containers from the interpreter. + (Stuart Archibald) +* PR #6457: Link to read the docs instead of numba homepage (Hannes Pahl) +* PR #6470: Adding PyCon Sweden 2020 talk on numba (Ankit Mahato) +* PR #6472: Document ``numba.extending.is_jitted`` (Stuart Archibald) +* PR #6495: Fix typo in literal list docs. (Stuart Archibald) +* PR #6501: Add doc entry on Numba's limited resources and how to help. (Stuart + Archibald) +* PR #6502: Add CODEOWNERS file. (Stuart Archibald) +* PR #6531: Update canonical URL. (Stuart Archibald) +* PR #6544: Minor typo / grammar fixes to 5 minute guide (Ollin Boer Bohan) +* PR #6599: docs: fix simple typo, consevatively -> conservatively (Tim Gates) +* PR #6609: Recommend miniforge instead of c4aarch64 (Isuru Fernando) +* PR #6671: Update environment creation example to python 3.8 (Lucio + Fernandez-Arjona) +* PR #6676: Update hardware and software versions in various docs. (Stuart + Archibald) +* PR #6682: Update deprecation notices for 0.53 (Stuart Archibald) + +CI/Infrastructure Updates: + +* PR #6458: Enable typeguard in CI (Siu Kwan Lam) +* PR #6500: Update bug and feature request templates. (Stuart Archibald) +* PR #6516: Fix RTD build by using conda. (Stuart Archibald) +* PR #6587: Add zenodo badge (Siu Kwan Lam) + +Authors: + +* Amin Sadeghi +* Amit Kumar +* Andreas Sodeur +* Ankit Mahato +* Chris Barnes +* Danny Weitekamp +* Ehsan Totoni (core dev) +* Eric Larson +* Eric Wieser +* Eyal Trabelsi +* Farah Hariri +* Graham Markall +* Guilherme Leobas +* Hannes Pahl +* Isuru Fernando +* Lucio Fernandez-Arjona +* Michael Collison +* Nick Sutcliffe +* Nick White +* Ollin Boer Bohan +* Sergey Pokhodenko +* Siu Kwan Lam (core dev) +* Stuart Archibald (core dev) +* Teugea Ioan-Teodor +* Thibault Ballier +* Tim Gates +* Todd A. Anderson (core dev) +* Valentin Haenel (core dev) +* ``@caljrobe`` +* ``@imba-tjd`` + + +Version 0.52.0 (30 November, 2020) +---------------------------------- + +This release focuses on performance improvements, but also adds some new +features and contains numerous bug fixes and stability improvements. + +Highlights of core performance improvements include: + +* Intel kindly sponsored research and development into producing a new reference + count pruning pass. This pass operates at the LLVM level and can prune a + number of common reference counting patterns. This will improve performance + for two primary reasons: + + * There will be less pressure on the atomic locks used to do the reference + counting. + * Removal of reference counting operations permits more inlining and the + optimisation passes can in general do more with what is present. + + (Siu Kwan Lam). +* Intel also sponsored work to improve the performance of the + ``numba.typed.List`` container, particularly in the case of ``__getitem__`` + and iteration (Stuart Archibald). +* Superword-level parallelism vectorization is now switched on and the + optimisation pipeline has been lightly analysed and tuned so as to be able to + vectorize more and more often (Stuart Archibald). + +Highlights of core feature changes include: + +* The ``inspect_cfg`` method on the JIT dispatcher object has been + significantly enhanced and now includes highlighted output and interleaved + line markers and Python source (Stuart Archibald). +* The BSD operating system is now unofficially supported (Stuart Archibald). +* Numerous features/functionality improvements to NumPy support, including + support for: + + * ``np.asfarray`` (Guilherme Leobas) + * "subtyping" in record arrays (Lucio Fernandez-Arjona) + * ``np.split`` and ``np.array_split`` (Isaac Virshup) + * ``operator.contains`` with ``ndarray`` (``@mugoh``). + * ``np.asarray_chkfinite`` (Rishabh Varshney). + * NumPy 1.19 (Stuart Archibald). + * the ``ndarray`` allocators, ``empty``, ``ones`` and ``zeros``, accepting a + ``dtype`` specified as a string literal (Stuart Archibald). + +* Booleans are now supported as literal types (Alexey Kozlov). +* On the CUDA target: + + * CUDA 9.0 is now the minimum supported version (Graham Markall). + * Support for Unified Memory has been added (Max Katz). + * Kernel launch overhead is reduced (Graham Markall). + * Cudasim support for mapped array, memcopies and memset has been added (Mike + Williams). + * Access has been wired in to all libdevice functions (Graham Markall). + * Additional CUDA atomic operations have been added (Michael Collison). + * Additional math library functions (``frexp``, ``ldexp``, ``isfinite``) + (Zhihao Yuan). + * Support for ``power`` on complex numbers (Graham Markall). + +Deprecations to note: + +There are no new deprecations. However, note that "compatibility" mode, which +was added some 40 releases ago to help transition from 0.11 to 0.12+, has been +removed! Also, the shim to permit the import of ``jitclass`` from Numba's top +level namespace has now been removed as per the deprecation schedule. + +General Enhancements: + +* PR #5418: Add np.asfarray impl (Guilherme Leobas) +* PR #5560: Record subtyping (Lucio Fernandez-Arjona) +* PR #5609: Jitclass Infer Spec from Type Annotations (Ethan Pronovost) +* PR #5699: Implement np.split and np.array_split (Isaac Virshup) +* PR #6015: Adding BooleanLiteral type (Alexey Kozlov) +* PR #6027: Support operators inlining in InlineOverloads (Alexey Kozlov) +* PR #6038: Closes #6037, fixing FreeBSD compilation (László Károlyi) +* PR #6086: Add more accessible version information (Stuart Archibald) +* PR #6157: Add pipeline_class argument to @cfunc as supported by @jit. (Arthur + Peters) +* PR #6262: Support dtype from str literal. (Stuart Archibald) +* PR #6271: Support ``ndarray`` contains (``@mugoh``) +* PR #6295: Enhance inspect_cfg (Stuart Archibald) +* PR #6304: Support NumPy 1.19 (Stuart Archibald) +* PR #6309: Add suitable file search path for BSDs. (Stuart Archibald) +* PR #6341: Re roll 6279 (Rishabh Varshney and Valentin Haenel) + +Performance Enhancements: + +* PR #6145: Patch to fingerprint namedtuples. (Stuart Archibald) +* PR #6202: Speed up str(int) (Stuart Archibald) +* PR #6261: Add np.ndarray.ptp() support. (Stuart Archibald) +* PR #6266: Use custom LLVM refcount pruning pass (Siu Kwan Lam) +* PR #6275: Switch on SLP vectorize. (Stuart Archibald) +* PR #6278: Improve typed list performance. (Stuart Archibald) +* PR #6335: Split optimisation passes. (Stuart Archibald) +* PR #6455: Fix refprune on obfuscated refs and stabilize optimisation WRT + wrappers. (Stuart Archibald) + +Fixes: + +* PR #5639: Make UnicodeType inherit from Hashable (Stuart Archibald) +* PR #6006: Resolves incorrectly hoisted list in parfor. (Todd A. Anderson) +* PR #6126: fix version_info if version can not be determined (Valentin Haenel) +* PR #6137: Remove references to Python 2's long (Eric Wieser) +* PR #6139: Use direct syntax instead of the ``add_metaclass`` decorator (Eric + Wieser) +* PR #6140: Replace calls to utils.iteritems(d) with d.items() (Eric Wieser) +* PR #6141: Fix #6130 objmode cache segfault (Siu Kwan Lam) +* PR #6156: Remove callers of ``reraise`` in favor of using ``with_traceback`` + directly (Eric Wieser) +* PR #6162: Move charseq support out of init (Stuart Archibald) +* PR #6165: #5425 continued (Amos Bird and Stuart Archibald) +* PR #6166: Remove Python 2 compatibility from numba.core.utils (Eric Wieser) +* PR #6185: Better error message on NotDefinedError (Luiz Almeida) +* PR #6194: Remove recursion from traverse_types (Radu Popovici) +* PR #6200: Workaround #5973 (Stuart Archibald) +* PR #6203: Make find_callname only lookup functions that are likely part of + NumPy. (Stuart Archibald) +* PR #6204: Fix unicode kind selection for getitem. (Stuart Archibald) +* PR #6206: Build all extension modules with -g -Wall -Werror on Linux x86, + provide -O0 flag option (Graham Markall) +* PR #6212: Fix for objmode recompilation issue (Alexey Kozlov) +* PR #6213: Fix #6177. Remove AOT dependency on the Numba package (Siu Kwan Lam) +* PR #6224: Add support for tuple concatenation to array analysis. (#5396 + continued) (Todd A. Anderson) +* PR #6231: Remove compatibility mode (Graham Markall) +* PR #6254: Fix win-32 hashing bug (from Stuart Archibald) (Ray Donnelly) +* PR #6265: Fix #6260 (Stuart Archibald) +* PR #6267: speed up a couple of really slow unittests (Stuart Archibald) +* PR #6281: Remove numba.jitclass shim as per deprecation schedule. (Stuart + Archibald) +* PR #6294: Make return type propagate to all return variables (Andreas Sodeur) +* PR #6300: Un-skip tests that were skipped because of #4026. (Owen Anderson) +* PR #6307: Remove restrictions on SVML version due to bug in LLVM SVML CC + (Stuart Archibald) +* PR #6316: Make IR inliner tests not self mutating. (Stuart Archibald) +* PR #6318: PR #5892 continued (Todd A. Anderson, via Stuart Archibald) +* PR #6319: Permit switching off boundschecking when debug is on. (Stuart + Archibald) +* PR #6324: PR 6208 continued (Ivan Butygin and Stuart Archibald) +* PR #6337: Implements ``key`` on ``types.TypeRef`` (Andreas Sodeur) +* PR #6354: Bump llvmlite to 0.35. series. (Stuart Archibald) +* PR #6357: Fix enumerate invalid decref (Siu Kwan Lam) +* PR #6359: Fixes typed list indexing on 32bit (Stuart Archibald) +* PR #6378: Fix incorrect CPU override in vectorization test. (Stuart Archibald) +* PR #6379: Use O0 to enable inline and not affect loop-vectorization by later + O3... (Siu Kwan Lam) +* PR #6384: Fix failing tests to match on platform invariant int spelling. + (Stuart Archibald) +* PR #6390: Updates inspect_cfg (Stuart Archibald) +* PR #6396: Remove hard dependency on tbb package. (Stuart Archibald) +* PR #6408: Don't do array analysis for tuples that contain arrays. (Todd A. + Anderson) +* PR #6441: Fix ASCII flag in Unicode slicing (0.52.0rc2 regression) (Ehsan + Totoni) +* PR #6442: Fix array analysis regression in 0.52 RC2 for tuple of 1D arrays + (Ehsan Totoni) +* PR #6446: Fix #6444: pruner issues with reference stealing functions (Siu + Kwan Lam) +* PR #6450: Fix asfarray kwarg default handling. (Stuart Archibald) +* PR #6486: fix abstract base class import (Valentin Haenel) +* PR #6487: Restrict maximum version of python (Siu Kwan Lam) +* PR #6527: setup.py: fix py version guard (Chris Barnes) + +CUDA Enhancements/Fixes: + +* PR #5465: Remove macro expansion and replace uses with FE typing + BE lowering + (Graham Markall) +* PR #5741: CUDA: Add two-argument implementation of round() (Graham Markall) +* PR #5900: Enable CUDA Unified Memory (Max Katz) +* PR #6042: CUDA: Lower launch overhead by launching kernel directly (Graham + Markall) +* PR #6064: Lower math.frexp and math.ldexp in numba.cuda (Zhihao Yuan) +* PR #6066: Lower math.isfinite in numba.cuda (Zhihao Yuan) +* PR #6092: CUDA: Add mapped_array_like and pinned_array_like (Graham Markall) +* PR #6127: Fix race in reduction kernels on Volta, require CUDA 9, add syncwarp + with default mask (Graham Markall) +* PR #6129: Extend Cudasim to support most of the memory functionality. (Mike + Williams) +* PR #6150: CUDA: Turn on flake8 for cudadrv and fix errors (Graham Markall) +* PR #6152: CUDA: Provide wrappers for all libdevice functions, and fix typing + of math function (#4618) (Graham Markall) +* PR #6227: Raise exception when no supported architectures are found (Jacob + Tomlinson) +* PR #6244: CUDA Docs: Make workflow using simulator more explicit (Graham + Markall) +* PR #6248: Add support for CUDA atomic subtract operations (Michael Collison) +* PR #6289: Refactor atomic test cases to reduce code duplication (Michael + Collison) +* PR #6290: CUDA: Add support for complex power (Graham Markall) +* PR #6296: Fix flake8 violations in numba.cuda module (Graham Markall) +* PR #6297: Fix flake8 violations in numba.cuda.tests.cudapy module (Graham + Markall) +* PR #6298: Fix flake8 violations in numba.cuda.tests.cudadrv (Graham Markall) +* PR #6299: Fix flake8 violations in numba.cuda.simulator (Graham Markall) +* PR #6306: Fix flake8 in cuda atomic test from merge. (Stuart Archibald) +* PR #6325: Refactor code for atomic operations (Michael Collison) +* PR #6329: Flake8 fix for a CUDA test (Stuart Archibald) +* PR #6331: Explicitly state that NUMBA_ENABLE_CUDASIM needs to be set before + import (Graham Markall) +* PR #6340: CUDA: Fix #6339, performance regression launching specialized + kernels (Graham Markall) +* PR #6380: Only test managed allocations on Linux (Graham Markall) + +Documentation Updates: + +* PR #6090: doc: Add doc on direct creation of Numba typed-list (``@rht``) +* PR #6110: Update CONTRIBUTING.md (Stuart Archibald) +* PR #6128: CUDA Docs: Restore Dispatcher.forall() docs (Graham Markall) +* PR #6277: fix: cross2d wrong doc. reference (issue #6276) (``@jeertmans``) +* PR #6282: Remove docs on Python 2(.7) EOL. (Stuart Archibald) +* PR #6283: Add note on how public CI is impl and what users can do to help. + (Stuart Archibald) +* PR #6292: Document support for structured array attribute access + (Graham Markall) +* PR #6310: Declare unofficial \*BSD support (Stuart Archibald) +* PR #6342: Fix docs on literally usage. (Stuart Archibald) +* PR #6348: doc: fix typo in jitclass.rst ("initilising" -> "initialising") + (``@muxator``) +* PR #6362: Move llvmlite support in README to 0.35 (Stuart Archibald) +* PR #6363: Note that reference counted types are not permitted in set(). + (Stuart Archibald) +* PR #6364: Move deprecation schedules for 0.52 (Stuart Archibald) + +CI/Infrastructure Updates: + +* PR #6252: Show channel URLs (Siu Kwan Lam) +* PR #6338: Direct user questions to Discourse instead of the Google Group. + (Stan Seibert) +* PR #6474: Add skip on PPC64LE for tests causing SIGABRT in LLVM. (Stuart + Archibald) + +Authors: + +* Alexey Kozlov +* Amos Bird +* Andreas Sodeur +* Arthur Peters +* Chris Barnes +* Ehsan Totoni (core dev) +* Eric Wieser +* Ethan Pronovost +* Graham Markall +* Guilherme Leobas +* Isaac Virshup +* Ivan Butygin +* Jacob Tomlinson +* Luiz Almeida +* László Károlyi +* Lucio Fernandez-Arjona +* Max Katz +* Michael Collison +* Mike Williams +* Owen Anderson +* Radu Popovici +* Ray Donnelly +* Rishabh Varshney +* Siu Kwan Lam (core dev) +* Stan Seibert (core dev) +* Stuart Archibald (core dev) +* Todd A. Anderson (core dev) +* Valentin Haenel (core dev) +* Zhihao Yuan +* ``@jeertmans`` +* ``@mugoh`` +* ``@muxator`` +* ``@rht`` + + + +Version 0.51.2 (September 2, 2020) +---------------------------------- + +This is a bugfix release for 0.51.1. It fixes a critical performance bug in the +CFG back edge computation algorithm that leads to exponential time complexity +arising in compilation for use cases with certain pathological properties. + +* PR #6195: PR 6187 Continue. Don't visit already checked successors + +Authors: + +* Graham Markall +* Siu Kwan Lam (core dev) + + +Version 0.51.1 (August 26, 2020) +-------------------------------- + +This is a bugfix release for 0.51.0, it fixes a critical bug in caching, another +critical bug in the CUDA target initialisation sequence and also fixes some +compile time performance regressions: + +* PR #6141: Fix #6130 objmode cache segfault +* PR #6146: Fix compilation slowdown due to controlflow analysis +* PR #6147: CUDA: Don't make a runtime call on import +* PR #6153: Fix for #6151. Make UnicodeCharSeq into str for comparison. +* PR #6168: Fix Issue #6167: Failure in test_cuda_submodules + +Authors: + +* Graham Markall +* Siu Kwan Lam (core dev) +* Stuart Archibald (core dev) + + +Version 0.51.0 (August 12, 2020) +-------------------------------- + +This release continues to add new features to Numba and also contains a +significant number of bug fixes and stability improvements. + +Highlights of core feature changes include: + +* The compilation chain is now based on LLVM 10 (Valentin Haenel). +* Numba has internally switched to prefer non-literal types over literal ones so + as to reduce function over-specialisation, this with view of speeding up + compile times (Siu Kwan Lam). +* On the CUDA target: Support for CUDA Toolkit 11, Ampere, and Compute + Capability 8.0; Printing of ``SASS`` code for kernels; Callbacks to Python + functions can be inserted into CUDA streams, and streams are async awaitable; + Atomic ``nanmin`` and ``nanmax`` functions are added; Fixes for various + miscompilations and segfaults. (mostly Graham Markall; call backs on + streams by Peter Würtz). + +Intel also kindly sponsored research and development that lead to some exciting +new features: + +* Support for heterogeneous immutable lists and heterogeneous immutable string + key dictionaries. Also optional initial/construction value capturing for all + lists and dictionaries containing literal values (Stuart Archibald). +* A new pass-by-reference mutable structure extension type ``StructRef`` (Siu + Kwan Lam). +* Object mode blocks are now cacheable, with the side effect of numerous bug + fixes and performance improvements in caching. This also permits caching of + functions defined in closures (Siu Kwan Lam). + +Deprecations to note: + +To align with other targets, the ``argtypes`` and ``restypes`` kwargs to +``@cuda.jit`` are now deprecated, the ``bind`` kwarg is also deprecated. +Further the ``target`` kwarg to the ``numba.jit`` decorator family is +deprecated. + +General Enhancements: + +* PR #5463: Add str(int) impl +* PR #5526: Impl. np.asarray(literal) +* PR #5619: Add support for multi-output ufuncs +* PR #5711: Division with timedelta input +* PR #5763: Support minlength argument to np.bincount +* PR #5779: Return zero array from np.dot when the arguments are empty. +* PR #5796: Add implementation for np.positive +* PR #5849: Setitem for records when index is StringLiteral, including literal + unroll +* PR #5856: Add support for conversion of inplace_binop to parfor. +* PR #5893: Allocate 1D iteration space one at a time for more even + distribution. +* PR #5922: Reduce objmode and unpickling overhead +* PR #5944: re-enable OpenMP in wheels +* PR #5946: Implement literal dictionaries and lists. +* PR #5956: Update numba_sysinfo.py +* PR #5978: Add structref as a mutable struct that is pass-by-ref +* PR #5980: Deprecate target kwarg for numba.jit. +* PR #6058: Add prefer_literal option to overload API + +Fixes: + +* PR #5674: Fix #3955. Allow `with objmode` to be cached +* PR #5724: Initialize process lock lazily to prevent multiprocessing issue +* PR #5783: Make np.divide and np.remainder code more similar +* PR #5808: Fix 5665 Block jit(nopython=True, forceobj=True) and suppress + njit(forceobj=True) +* PR #5834: Fix the is operator on Ellipsis +* PR #5838: Ensure ``Dispatcher.__eq__`` always returns a bool +* PR #5841: cleanup: Use PythonAPI.bool_from_bool in more places +* PR #5862: Do not leak loop iteration variables into the numba.np.npyimpl + namespace +* PR #5869: Update repomap +* PR #5879: Fix erroneous input mutation in linalg routines +* PR #5882: Type check function in jit decorator +* PR #5925: Use np.inf and -np.inf for max and min float values respectively. +* PR #5935: Fix default arguments with multiprocessing +* PR #5952: Fix "Internal error ... local variable 'errstr' referenced before + assignment during BoundFunction(...)" +* PR #5962: Fix SVML tests with LLVM 10 and AVX512 +* PR #5972: fix flake8 for numba/runtests.py +* PR #5995: Update setup.py with new llvmlite versions +* PR #5996: Set lower bound for llvmlite to 0.33 +* PR #6004: Fix problem in branch pruning with LiteralStrKeyDict +* PR #6017: Fixing up numba_do_raise +* PR #6028: Fix #6023 +* PR #6031: Continue 5821 +* PR #6035: Fix overspecialize of literal +* PR #6046: Fixes statement reordering bug in maximize fusion step. +* PR #6056: Fix issue on invalid inlining of non-empty build_list by + inline_arraycall +* PR #6057: fix aarch64/python_3.8 failure on master +* PR #6070: Fix overspecialized containers +* PR #6071: Remove f-strings in setup.py +* PR #6072: Fix for #6005 +* PR #6073: Fixes invalid C prototype in helper function. +* PR #6078: Duplicate NumPy's PyArray_DescrCheck macro +* PR #6081: Fix issue with cross drive use and relpath. +* PR #6083: Fix bug in initial value unify. +* PR #6087: remove invalid sanity check from randrange tests +* PR #6089: Fix invalid reference to TypingError +* PR #6097: Add function code and closure bytes into cache key +* PR #6099: Restrict upper limit of TBB version due to ABI changes. +* PR #6101: Restrict lower limit of icc_rt version due to assumed SVML bug. +* PR #6107: Fix and test #6095 +* PR #6109: Fixes an issue reported in #6094 +* PR #6111: Decouple LiteralList and LiteralStrKeyDict from tuple +* PR #6116: Fix #6102. Problem with non-unique label. + +CUDA Enhancements/Fixes: + +* PR #5359: Remove special-casing of 0d arrays +* PR #5709: CUDA: Refactoring of cuda.jit and kernel / dispatcher abstractions +* PR #5732: CUDA Docs: document ``forall`` method of kernels +* PR #5745: CUDA stream callbacks and async awaitable streams +* PR #5761: Add implmentation for int types for isnan and isinf for CUDA +* PR #5819: Add support for CUDA 11 and Ampere / CC 8.0 +* PR #5826: CUDA: Add function to get SASS for kernels +* PR #5846: CUDA: Allow disabling NVVM optimizations, and fix debug issues +* PR #5851: CUDA EMM enhancements - add default get_ipc_handle implementation, + skip a test conditionally +* PR #5852: CUDA: Fix ``cuda.test()`` +* PR #5857: CUDA docs: Add notes on resetting the EMM plugin +* PR #5859: CUDA: Fix reduce docs and style improvements +* PR #6016: Fixes change of list spelling in a cuda test. +* PR #6020: CUDA: Fix #5820, adding atomic nanmin / nanmax +* PR #6030: CUDA: Don't optimize IR before sending it to NVVM +* PR #6052: Fix dtype for atomic_add_double testsuite +* PR #6080: CUDA: Prevent auto-upgrade of atomic intrinsics +* PR #6123: Fix #6121 + +Documentation Updates: + +* PR #5782: Host docs on Read the Docs +* PR #5830: doc: Mention that caching uses pickle +* PR #5963: Fix broken link to numpy ufunc signature docs +* PR #5975: restructure communication section +* PR #5981: Document bounds-checking behavior in python deviations page +* PR #5993: Docs for structref +* PR #6008: Small fix so bullet points are rendered by sphinx +* PR #6013: emphasize cuda kernel functions are asynchronous +* PR #6036: Update deprecation doc from numba.errors to numba.core.errors +* PR #6062: Change references to numba.pydata.org to https + +CI updates: + +* PR #5850: Updates the "New Issue" behaviour to better redirect users. +* PR #5940: Add discourse badge +* PR #5960: Setting mypy on CI + +Enhancements from user contributed PRs (with thanks!): + +* Aisha Tammy added the ability to switch off TBB support at compile time in + #5821 (continued in #6031 by Stuart Archibald). +* Alexander Stiebing fixed a reference before assignment bug in #5952. +* Alexey Kozlov fixed a bug in tuple getitem for literals in #6028. +* Andrew Eckart updated the repomap in #5869, added support for Read the Docs + in #5782, fixed a bug in the ``np.dot`` implementation to correctly handle + empty arrays in #5779 and added support for ``minlength`` to ``np.bincount`` + in #5763. +* ``@bitsisbits`` updated ``numba_sysinfo.py`` to handle HSA agents correctly in + #5956. +* Daichi Suzuo Fixed a bug in the threading backend initialisation sequence such + that it is now correctly a lazy lock in #5724. +* Eric Wieser contributed a number of patches, particularly in enhancing and + improving the ``ufunc`` capabilities: + + * #5359: Remove special-casing of 0d arrays + * #5834: Fix the is operator on Ellipsis + * #5619: Add support for multi-output ufuncs + * #5841: cleanup: Use PythonAPI.bool_from_bool in more places + * #5862: Do not leak loop iteration variables into the numba.np.npyimpl + namespace + * #5838: Ensure ``Dispatcher.__eq__`` always returns a bool + * #5830: doc: Mention that caching uses pickle + * #5783: Make np.divide and np.remainder code more similar + +* Ethan Pronovost added a guard to prevent the common mistake of applying a jit + decorator to the same function twice in #5881. +* Graham Markall contributed many patches to the CUDA target, as follows: + + * #6052: Fix dtype for atomic_add_double tests + * #6030: CUDA: Don't optimize IR before sending it to NVVM + * #5846: CUDA: Allow disabling NVVM optimizations, and fix debug issues + * #5826: CUDA: Add function to get SASS for kernels + * #5851: CUDA EMM enhancements - add default get_ipc_handle implementation, + skip a test conditionally + * #5709: CUDA: Refactoring of cuda.jit and kernel / dispatcher abstractions + * #5819: Add support for CUDA 11 and Ampere / CC 8.0 + * #6020: CUDA: Fix #5820, adding atomic nanmin / nanmax + * #5857: CUDA docs: Add notes on resetting the EMM plugin + * #5859: CUDA: Fix reduce docs and style improvements + * #5852: CUDA: Fix ``cuda.test()`` + * #5732: CUDA Docs: document ``forall`` method of kernels + +* Guilherme Leobas added support for ``str(int)`` in #5463 and + ``np.asarray(literal value)``` in #5526. +* Hameer Abbasi deprecated the ``target`` kwarg for ``numba.jit`` in #5980. +* Hannes Pahl added a badge to the Numba github page linking to the new + discourse forum in #5940 and also fixed a bug that permitted illegal + combinations of flags to be passed into ``@jit`` in #5808. +* Kayran Schmidt emphasized that CUDA kernel functions are asynchronous in the + documentation in #6013. +* Leonardo Uieda fixed a broken link to the NumPy ufunc signature docs in #5963. +* Lucio Fernandez-Arjona added mypy to CI and started adding type annotations to + the code base in #5960, also fixed a (de)serialization problem on the + dispatcher in #5935, improved the undefined variable error message in #5876, + added support for division with timedelta input in #5711 and implemented + ``setitem`` for records when the index is a ``StringLiteral`` in #5849. +* Ludovic Tiako documented Numba's bounds-checking behavior in the python + deviations page in #5981. +* Matt Roeschke changed all ``http`` references ``https`` in #6062. +* ``@niteya-shah`` implemented ``isnan`` and ``isinf`` for integer types on the + CUDA target in #5761 and implemented ``np.positive`` in #5796. +* Peter Würtz added CUDA stream callbacks and async awaitable streams in #5745. +* ``@rht`` fixed an invalid import referred to in the deprecation documentation + in #6036. +* Sergey Pokhodenko updated the SVML tests for LLVM 10 in #5962. +* Shyam Saladi fixed a Sphinx rendering bug in #6008. + +Authors: + +* Aisha Tammy +* Alexander Stiebing +* Alexey Kozlov +* Andrew Eckart +* ``@bitsisbits`` +* Daichi Suzuo +* Eric Wieser +* Ethan Pronovost +* Graham Markall +* Guilherme Leobas +* Hameer Abbasi +* Hannes Pahl +* Kayran Schmidt +* Kozlov, Alexey +* Leonardo Uieda +* Lucio Fernandez-Arjona +* Ludovic Tiako +* Matt Roeschke +* ``@niteya-shah`` +* Peter Würtz +* Sergey Pokhodenko +* Shyam Saladi +* ``@rht`` +* Siu Kwan Lam (core dev) +* Stuart Archibald (core dev) +* Todd A. Anderson (core dev) +* Valentin Haenel (core dev) + + +Version 0.50.1 (Jun 24, 2020) +----------------------------- + +This is a bugfix release for 0.50.0, it fixes a critical bug in error reporting +and a number of other smaller issues: + +* PR #5861: Added except for possible Windows get_terminal_size exception +* PR #5876: Improve undefined variable error message +* PR #5884: Update the deprecation notices for 0.50.1 +* PR #5889: Fixes literally not forcing re-dispatch for inline='always' +* PR #5912: Fix bad attr access on certain typing templates breaking exceptions. +* PR #5918: Fix cuda test due to #5876 + +Authors: + +* ``@pepping_dore`` +* Lucio Fernandez-Arjona +* Siu Kwan Lam (core dev) +* Stuart Archibald (core dev) + + +Version 0.50.0 (Jun 10, 2020) +----------------------------- + +This is a more usual release in comparison to the others that have been made in +the last six months. It comprises the result of a number of maintenance tasks +along with some new features and a lot of bug fixes. + +Highlights of core feature changes include: + +* The compilation chain is now based on LLVM 9. +* The error handling and reporting system has been improved to reduce the size + of error messages, and also improve quality and specificity. +* The CUDA target has more stream constructors available and a new function for + compiling to PTX without linking and loading the code to a device. Further, + the macro-based system for describing CUDA threads and blocks has been + replaced with standard typing and lowering implementations, for improved + debugging and extensibility. + +IMPORTANT: The backwards compatibility shim, that was present in 0.49.x to +accommodate the refactoring of Numba's internals, has been removed. If a module +is imported from a moved location an ``ImportError`` will occur. + +General Enhancements: + +* PR #5060: Enables np.sum for timedelta64 +* PR #5225: Adjust interpreter to make conditionals predicates via bool() call. +* PR #5506: Jitclass static methods +* PR #5580: Revert shim +* PR #5591: Fix #5525 Add figure for total memory to ``numba -s`` output. +* PR #5616: Simplify the ufunc kernel registration +* PR #5617: Remove /examples from the Numba repo. +* PR #5673: Fix inliners to run all passes on IR and clean up correctly. +* PR #5700: Make it easier to understand type inference: add SSA dump, use for + ``DEBUG_TYPEINFER`` +* PR #5702: Fixes for LLVM 9 +* PR #5722: Improve error messages. +* PR #5758: Support NumPy 1.18 + +Fixes: + +* PR #5390: add error handling for lookup_module +* PR #5464: Jitclass drops annotations to avoid error +* PR #5478: Fix #5471. Issue with omitted type not recognized as literal value. +* PR #5517: Fix numba.typed.List extend for singleton and empty iterable +* PR #5549: Check type getitem +* PR #5568: Add skip to entrypoint test on windows +* PR #5581: Revert #5568 +* PR #5602: Fix segfault caused by pop from numba.typed.List +* PR #5645: Fix SSA redundant CFG computation +* PR #5686: Fix issue with SSA not minimal +* PR #5689: Fix bug in unified_function_type (issue 5685) +* PR #5694: Skip part of slice array analysis if any part is not analyzable. +* PR #5697: Fix usedef issue with parfor loopnest variables. +* PR #5705: A fix for cases where SSA looks like a reduction variable. +* PR #5714: Fix bug in test +* PR #5717: Initialise Numba extensions ahead of any compilation starting. +* PR #5721: Fix array iterator layout. +* PR #5738: Unbreak master on buildfarm +* PR #5757: Force LLVM to use ZMM registers for vectorization. +* PR #5764: fix flake8 errors +* PR #5768: Interval example: fix import +* PR #5781: Moving record array examples to a test module +* PR #5791: Fix up no cgroups problem +* PR #5795: Restore refct removal pass and make it strict +* PR #5807: Skip failing test on POWER8 due to PPC CTR Loop problem. +* PR #5812: Fix side issue from #5792, @overload inliner cached IR being + mutated. +* PR #5815: Pin llvmlite to 0.33 +* PR #5833: Fixes the source location appearing incorrectly in error messages. + +CUDA Enhancements/Fixes: + +* PR #5347: CUDA: Provide more stream constructors +* PR #5388: CUDA: Fix OOB write in test_round{f4,f8} +* PR #5437: Fix #5429: Exception using ``.get_ipc_handle(...)`` on array from + ``as_cuda_array(...)`` +* PR #5481: CUDA: Replace macros with typing and lowering implementations +* PR #5556: CUDA: Make atomic semantics match Python / NumPy, and fix #5458 +* PR #5558: CUDA: Only release primary ctx if retained +* PR #5561: CUDA: Add function for compiling to PTX (+ other small fixes) +* PR #5573: CUDA: Skip tests under cuda-memcheck that hang it +* PR #5578: Implement math.modf for CUDA target +* PR #5704: CUDA Eager compilation: Fix max_registers kwarg +* PR #5718: CUDA lib path tests: unset CUDA_PATH when CUDA_HOME unset +* PR #5800: Fix LLVM 9 IR for NVVM +* PR #5803: CUDA Update expected error messages to fix #5797 + +Documentation Updates: + +* PR #5546: DOC: Add documentation about cost model to inlining notes. +* PR #5653: Update doc with respect to try-finally case + +Enhancements from user contributed PRs (with thanks!): + +* Elias Kuthe fixed in issue with imports in the Interval example in #5768 +* Eric Wieser Simplified the ufunc kernel registration mechanism in #5616 +* Ethan Pronovost patched a problem with ``__annotations__`` in ``jitclass`` in + #5464, fixed a bug that lead to infinite loops in Numba's ``Type.__getitem__`` + in #5549, fixed a bug in ``np.arange`` testing in #5714 and added support for + ``@staticmethod`` to ``jitclass`` in #5506. +* Gabriele Gemmi implemented ``math.modf`` for the CUDA target in #5578 +* Graham Markall contributed many patches, largely to the CUDA target, as + follows: + + * #5347: CUDA: Provide more stream constructors + * #5388: CUDA: Fix OOB write in test_round{f4,f8} + * #5437: Fix #5429: Exception using ``.get_ipc_handle(...)`` on array from + ``as_cuda_array(...)`` + * #5481: CUDA: Replace macros with typing and lowering implementations + * #5556: CUDA: Make atomic semantics match Python / NumPy, and fix #5458 + * #5558: CUDA: Only release primary ctx if retained + * #5561: CUDA: Add function for compiling to PTX (+ other small fixes) + * #5573: CUDA: Skip tests under cuda-memcheck that hang it + * #5648: Unset the memory manager after EMM Plugin tests + * #5700: Make it easier to understand type inference: add SSA dump, use for + ``DEBUG_TYPEINFER`` + * #5704: CUDA Eager compilation: Fix max_registers kwarg + * #5718: CUDA lib path tests: unset CUDA_PATH when CUDA_HOME unset + * #5800: Fix LLVM 9 IR for NVVM + * #5803: CUDA Update expected error messages to fix #5797 + +* Guilherme Leobas updated the documentation surrounding try-finally in #5653 +* Hameer Abbasi added documentation about the cost model to the notes on + inlining in #5546 +* Jacques Gaudin rewrote ``numba -s`` to produce and consume a dictionary of + output about the current system in #5591 +* James Bourbeau Updated min/argmin and max/argmax to handle non-leading nans + (via #5758) +* Lucio Fernandez-Arjona moved the record array examples to a test module in + #5781 and added ``np.timedelta64`` handling to ``np.sum`` in #5060 +* Pearu Peterson Fixed a bug in unified_function_type in #5689 +* Sergey Pokhodenko fixed an issue impacting LLVM 10 regarding vectorization + widths on Intel SkyLake processors in #5757 +* Shan Sikdar added error handling for ``lookup_module`` in #5390 +* @toddrme2178 add CI testing for NumPy 1.18 (via #5758) + +Authors: + +* Elias Kuthe +* Eric Wieser +* Ethan Pronovost +* Gabriele Gemmi +* Graham Markall +* Guilherme Leobas +* Hameer Abbasi +* Jacques Gaudin +* James Bourbeau +* Lucio Fernandez-Arjona +* Pearu Peterson +* Sergey Pokhodenko +* Shan Sikdar +* Siu Kwan Lam (core dev) +* Stuart Archibald (core dev) +* Todd A. Anderson (core dev) +* ``@toddrme2178`` +* Valentin Haenel (core dev) + + +Version 0.49.1 (May 7, 2020) +---------------------------- + +This is a bugfix release for 0.49.0, it fixes some residual issues with SSA +form, a critical bug in the branch pruning logic and a number of other smaller +issues: + +* PR #5587: Fixed #5586 Threading Implementation Typos +* PR #5592: Fixes #5583 Remove references to cffi_support from docs and examples +* PR #5614: Fix invalid type in resolve for comparison expr in parfors. +* PR #5624: Fix erroneous rewrite of predicate to bit const on prune. +* PR #5627: Fixes #5623, SSA local def scan based on invalid equality + assumption. +* PR #5629: Fixes naming error in array_exprs +* PR #5630: Fix #5570. Incorrect race variable detection due to SSA naming. +* PR #5638: Make literal_unroll function work as a freevar. +* PR #5648: Unset the memory manager after EMM Plugin tests +* PR #5651: Fix some SSA issues +* PR #5652: Pin to sphinx=2.4.4 to avoid problem with C declaration +* PR #5658: Fix unifying undefined first class function types issue +* PR #5669: Update example in 5m guide WRT SSA type stability. +* PR #5676: Restore ``numba.types`` as public API + +Authors: + +* Graham Markall +* Juan Manuel Cruz Martinez +* Pearu Peterson +* Sean Law +* Stuart Archibald (core dev) +* Siu Kwan Lam (core dev) + + +Version 0.49.0 (Apr 16, 2020) +----------------------------- + +This release is very large in terms of code changes. Large scale removal of +unsupported Python and NumPy versions has taken place along with a significant +amount of refactoring to simplify the Numba code base to make it easier for +contributors. Numba's intermediate representation has also undergone some +important changes to solve a number of long standing issues. In addition some +new features have been added and a large number of bugs have been fixed! + +IMPORTANT: In this release Numba's internals have moved about a lot. A backwards +compatibility "shim" is provided for this release so as to not immediately break +projects using Numba's internals. If a module is imported from a moved location +the shim will issue a deprecation warning and suggest how to update the import +statement for the new location. The shim will be removed in 0.50.0! + +Highlights of core feature changes include: + +* Removal of all Python 2 related code and also updating the minimum supported + Python version to 3.6, the minimum supported NumPy version to 1.15 and the + minimum supported SciPy version to 1.0. (Stuart Archibald). +* Refactoring of the Numba code base. The code is now organised into submodules + by functionality. This cleans up Numba's top level namespace. + (Stuart Archibald). +* Introduction of an ``ir.Del`` free static single assignment form for Numba's + intermediate representation (Siu Kwan Lam and Stuart Archibald). +* An OpenMP-like thread masking API has been added for use with code using the + parallel CPU backends (Aaron Meurer and Stuart Archibald). +* For the CUDA target, all kernel launches now require a configuration, this + preventing accidental launches of kernels with the old default of a single + thread in a single block. The hard-coded autotuner is also now removed, such + tuning is deferred to CUDA API calls that provide the same functionality + (Graham Markall). +* The CUDA target also gained an External Memory Management plugin interface to + allow Numba to use another CUDA-aware library for all memory allocations and + deallocations (Graham Markall). +* The Numba Typed List container gained support for construction from iterables + (Valentin Haenel). +* Experimental support was added for first-class function types + (Pearu Peterson). + +Enhancements from user contributed PRs (with thanks!): + +* Aaron Meurer added support for thread masking at runtime in #4615. +* Andreas Sodeur fixed a long standing bug that was preventing ``cProfile`` from + working with Numba JIT compiled functions in #4476. +* Arik Funke fixed error messages in ``test_array_reductions`` (#5278), fixed + an issue with test discovery (#5239), made it so the documentation would build + again on windows (#5453) and fixed a nested list problem in the docs in #5489. +* Antonio Russo fixed a SyntaxWarning in #5252. +* Eric Wieser added support for inferring the types of object arrays (#5348) and + iterating over 2D arrays (#5115), also fixed some compiler warnings due to + missing (void) in #5222. Also helped improved the "shim" and associated + warnings in #5485, #5488, #5498 and partly #5532. +* Ethan Pronovost fixed a problem with the shim erroneously warning for jitclass + use in #5454 and also prevented illegal return values in jitclass ``__init__`` + in #5505. +* Gabriel Majeri added SciPy 2019 talks to the docs in #5106. +* Graham Markall changed the Numba HTML documentation theme to resolve a number + of long standing issues in #5346. Also contributed were a large number of CUDA + enhancements and fixes, namely: + + * #5519: CUDA: Silence the test suite - Fix #4809, remove autojit, delete + prints + * #5443: Fix #5196: Docs: assert in CUDA only enabled for debug + * #5436: Fix #5408: test_set_registers_57 fails on Maxwell + * #5423: Fix #5421: Add notes on printing in CUDA kernels + * #5400: Fix #4954, and some other small CUDA testsuite fixes + * #5328: NBEP 7: External Memory Management Plugin Interface + * #5144: Fix #4875: Make #2655 test with debug expect to pass + * #5323: Document lifetime semantics of CUDA Array Interface + * #5061: Prevent kernel launch with no configuration, remove autotuner + * #5099: Fix #5073: Slices of dynamic shared memory all alias + * #5136: CUDA: Enable asynchronous operations on the default stream + * #5085: Support other itemsizes with view + * #5059: Docs: Explain how to use Memcheck with Numba, fixups in CUDA + documentation + * #4957: Add notes on overwriting gufunc inputs to docs + +* Greg Jennings fixed an issue with ``np.random.choice`` not acknowledging the + RNG seed correctly in #3897/#5310. +* Guilherme Leobas added support for ``np.isnat`` in #5293. +* Henry Schreiner made the llvmlite requirements more explicit in + requirements.txt in #5150. +* Ivan Butygin helped fix an issue with parfors sequential lowering in + #5114/#5250. +* Jacques Gaudin fixed a bug for Python >= 3.8 in ``numba -s`` in #5548. +* Jim Pivarski added some hints for debugging entry points in #5280. +* John Kirkham added ``numpy.dtype`` coercion for the ``dtype`` argument to CUDA + device arrays in #5252. +* Leo Fang added a list of libraries that support ``__cuda_array_interface__`` + in #5104. +* Lucio Fernandez-Arjona added ``getitem`` for the NumPy record type when the + index is a ``StringLiteral`` type in #5182 and improved the documentation + rendering via additions to the TOC and removal of numbering in #5450. +* Mads R. B. Kristensen fixed an issue with ``__cuda_array_interface__`` not + requiring the context in #5189. +* Marcin Tolysz added support for nested modules in AOT compilation in #5174. +* Mike Williams fixed some issues with NumPy records and ``getitem`` in the CUDA + simulator in #5343. +* Pearu Peterson added experimental support for first-class function types in + #5287 (and fixes in #5459, #5473/#5429, and #5557). +* Ravi Teja Gutta added support for ``np.flip`` in #4376/#5313. +* Rohit Sanjay fixed an issue with type refinement for unicode input supplied to + typed-list ``extend()`` (#5295) and fixed unicode ``.strip()`` to strip all + whitespace characters in #5213. +* Vladimir Lukyanov fixed an awkward bug in ``typed.dict`` in #5361, added a fix + to ensure the LLVM and assembly dumps are highlighted correctly in #5357 and + implemented a Numba IR Lexer and added highlighting to Numba IR dumps in + #5333. +* hdf fixed an issue with the ``boundscheck`` flag in the CUDA jit target in + #5257. + +General Enhancements: + +* PR #4615: Allow masking threads out at runtime +* PR #4798: Add branch pruning based on raw predicates. +* PR #5115: Add support for iterating over 2D arrays +* PR #5117: Implement ord()/chr() +* PR #5122: Remove Python 2. +* PR #5127: Calling convention adaptor for boxer/unboxer to call jitcode +* PR #5151: implement None-typed typed-list +* PR #5174: Nested modules https://github.com/numba/numba/issues/4739 +* PR #5182: Add getitem for Record type when index is StringLiteral +* PR #5185: extract code-gen utilities from closures +* PR #5197: Refactor Numba, part I +* PR #5210: Remove more unsupported Python versions from build tooling. +* PR #5212: Adds support for viewing the CFG of the ELF disassembly. +* PR #5227: Immutable typed-list +* PR #5231: Added support for ``np.asarray`` to be used with + ``numba.typed.List`` +* PR #5235: Added property ``dtype`` to ``numba.typed.List`` +* PR #5272: Refactor parfor: split up ParforPass +* PR #5281: Make IR ir.Del free until legalized. +* PR #5287: First-class function type +* PR #5293: np.isnat +* PR #5294: Create typed-list from iterable +* PR #5295: refine typed-list on unicode input to extend +* PR #5296: Refactor parfor: better exception from passes +* PR #5308: Provide ``numba.extending.is_jitted`` +* PR #5320: refactor array_analysis +* PR #5325: Let literal_unroll accept types.Named*Tuple +* PR #5330: refactor common operation in parfor lowering into a new util +* PR #5333: Add: highlight Numba IR dump +* PR #5342: Support for tuples passed to parfors. +* PR #5348: Add support for inferring the types of object arrays +* PR #5351: SSA again +* PR #5352: Add shim to accommodate refactoring. +* PR #5356: implement allocated parameter in njit +* PR #5369: Make test ordering more consistent across feature availability +* PR #5428: Wip/deprecate jitclass location +* PR #5441: Additional changes to first class function +* PR #5455: Move to llvmlite 0.32.* +* PR #5457: implement repr for untyped lists + +Fixes: + +* PR #4476: Another attempt at fixing frame injection in the dispatcher tracing + path +* PR #4942: Prevent some parfor aliasing. Rename copied function var to prevent + recursive type locking. +* PR #5092: Fix 5087 +* PR #5150: More explicit llvmlite requirement in requirements.txt +* PR #5172: fix version spec for llvmlite +* PR #5176: Normalize kws going into fold_arguments. +* PR #5183: pass 'inline' explicitly to overload +* PR #5193: Fix CI failure due to missing files when installed +* PR #5213: Fix ``.strip()`` to strip all whitespace characters +* PR #5216: Fix namedtuple mistreated by dispatcher as simple tuple +* PR #5222: Fix compiler warnings due to missing (void) +* PR #5232: Fixes a bad import that breaks master +* PR #5239: fix test discovery for unittest +* PR #5247: Continue PR #5126 +* PR #5250: Part fix/5098 +* PR #5252: Trivially fix SyntaxWarning +* PR #5276: Add prange variant to has_no_side_effect. +* PR #5278: fix error messages in test_array_reductions +* PR #5310: PR #3897 continued +* PR #5313: Continues PR #4376 +* PR #5318: Remove AUTHORS file reference from MANIFEST.in +* PR #5327: Add warning if FNV hashing is found as the default for CPython. +* PR #5338: Remove refcount pruning pass +* PR #5345: Disable test failing due to removed pass. +* PR #5357: Small fix to have llvm and asm highlighted properly +* PR #5361: 5081 typed.dict +* PR #5431: Add tolerance to numba extension module entrypoints. +* PR #5432: Fix code causing compiler warnings. +* PR #5445: Remove undefined variable +* PR #5454: Don't warn for numba.experimental.jitclass +* PR #5459: Fixes issue 5448 +* PR #5480: Fix for #5477, literal_unroll KeyError searching for getitems +* PR #5485: Show the offending module in "no direct replacement" error message +* PR #5488: Add missing ``numba.config`` shim +* PR #5495: Fix missing null initializer for variable after phi strip +* PR #5498: Make the shim deprecation warnings work on python 3.6 too +* PR #5505: Better error message if __init__ returns value +* PR #5527: Attempt to fix #5518 +* PR #5529: PR #5473 continued +* PR #5532: Make ``numba.`` available without an import +* PR #5542: Fixes RC2 module shim bug +* PR #5548: Fix #5537 Removed reference to ``platform.linux_distribution`` +* PR #5555: Fix #5515 by reverting changes to ArrayAnalysis +* PR #5557: First-class function call cannot use keyword arguments +* PR #5569: Fix RewriteConstGetitems not registering calltype for new expr +* PR #5571: Pin down llvmlite requirement + +CUDA Enhancements/Fixes: + +* PR #5061: Prevent kernel launch with no configuration, remove autotuner +* PR #5085: Support other itemsizes with view +* PR #5099: Fix #5073: Slices of dynamic shared memory all alias +* PR #5104: Add a list of libraries that support __cuda_array_interface__ +* PR #5136: CUDA: Enable asynchronous operations on the default stream +* PR #5144: Fix #4875: Make #2655 test with debug expect to pass +* PR #5189: __cuda_array_interface__ not requiring context +* PR #5253: Coerce ``dtype`` to ``numpy.dtype`` +* PR #5257: boundscheck fix +* PR #5319: Make user facing error string use abs path not rel. +* PR #5323: Document lifetime semantics of CUDA Array Interface +* PR #5328: NBEP 7: External Memory Management Plugin Interface +* PR #5343: Fix cuda spoof +* PR #5400: Fix #4954, and some other small CUDA testsuite fixes +* PR #5436: Fix #5408: test_set_registers_57 fails on Maxwell +* PR #5519: CUDA: Silence the test suite - Fix #4809, remove autojit, delete + prints + +Documentation Updates: + +* PR #4957: Add notes on overwriting gufunc inputs to docs +* PR #5059: Docs: Explain how to use Memcheck with Numba, fixups in CUDA + documentation +* PR #5106: Add SciPy 2019 talks to docs +* PR #5147: Update master for 0.48.0 updates +* PR #5155: Explain what inlining at Numba IR level will do +* PR #5161: Fix README.rst formatting +* PR #5207: Remove AUTHORS list +* PR #5249: fix target path for See also +* PR #5262: fix typo in inlining docs +* PR #5270: fix 'see also' in typeddict docs +* PR #5280: Added some hints for debugging entry points. +* PR #5297: Update docs with intro to {g,}ufuncs. +* PR #5326: Update installation docs with OpenMP requirements. +* PR #5346: Docs: use sphinx_rtd_theme +* PR #5366: Remove reference to Python 2.7 in install check output +* PR #5423: Fix #5421: Add notes on printing in CUDA kernels +* PR #5438: Update package deps for doc building. +* PR #5440: Bump deprecation notices. +* PR #5443: Fix #5196: Docs: assert in CUDA only enabled for debug +* PR #5450: Docs: remove numbers and add titles to TOC +* PR #5453: fix building docs on windows +* PR #5489: docs: fix rendering of nested bulleted list + +CI updates: + +* PR #5314: Update the image used in Azure CI for OSX. +* PR #5360: Remove Travis CI badge. + +Authors: + +* Aaron Meurer +* Andreas Sodeur +* Antonio Russo +* Arik Funke +* Eric Wieser +* Ethan Pronovost +* Gabriel Majeri +* Graham Markall +* Greg Jennings +* Guilherme Leobas +* hdf +* Henry Schreiner +* Ivan Butygin +* Jacques Gaudin +* Jim Pivarski +* John Kirkham +* Leo Fang +* Lucio Fernandez-Arjona +* Mads R. B. Kristensen +* Marcin Tolysz +* Mike Williams +* Pearu Peterson +* Ravi Teja Gutta +* Rohit Sanjay +* Siu Kwan Lam (core dev) +* Stan Seibert (core dev) +* Stuart Archibald (core dev) +* Todd A. Anderson (core dev) +* Valentin Haenel (core dev) +* Vladimir Lukyanov + + +Version 0.48.0 (Jan 27, 2020) +----------------------------- + +This release is particularly small as it was present to catch anything that +missed the 0.47.0 deadline (the deadline deliberately coincided with the end of +support for Python 2.7). The next release will be considerably larger. + +The core changes in this release are dominated by the start of the clean up +needed for the end of Python 2.7 support, improvements to the CUDA target and +support for numerous additional unicode string methods. + +Enhancements from user contributed PRs (with thanks!): + +* Brian Wignall fixed more spelling typos in #4998. +* Denis Smirnov added support for string methods ``capitalize`` (#4823), + ``casefold`` (#4824), ``swapcase`` (#4825), ``rsplit`` (#4834), ``partition`` + (#4845) and ``splitlines`` (#4849). +* Elena Totmenina extended support for string methods ``startswith`` (#4867) and + added ``endswith`` (#4868). +* Eric Wieser made ``type_callable`` return the decorated function itself in + #4760 +* Ethan Pronovost added support for ``np.argwhere`` in #4617 +* Graham Markall contributed a large number of CUDA enhancements and fixes, + namely: + + * #5068: Remove Python 3.4 backports from utils + * #4975: Make ``device_array_like`` create contiguous arrays (Fixes #4832) + * #5023: Don't launch ForAll kernels with 0 elements (Fixes #5017) + * #5016: Fix various issues in CUDA library search (Fixes #4979) + * #5014: Enable use of records and bools for shared memory, remove ddt, add + additional transpose tests + * #4964: Fix #4628: Add more appropriate typing for CUDA device arrays + * #5007: test_consuming_strides: Keep dev array alive + * #4997: State that CUDA Toolkit 8.0 required in docs + +* James Bourbeau added the Python 3.8 classifier to setup.py in #5027. +* John Kirkham added a clarification to the ``__cuda_array_interface__`` + documentation in #5049. +* Leo Fang Fixed an indexing problem in ``dummyarray`` in #5012. +* Marcel Bargull fixed a build and test issue for Python 3.8 in #5029. +* Maria Rubtsov added support for string methods ``isdecimal`` (#4842), + ``isdigit`` (#4843), ``isnumeric`` (#4844) and ``replace`` (#4865). + +General Enhancements: + +* PR #4760: Make type_callable return the decorated function +* PR #5010: merge string prs + + This merge PR included the following: + + * PR #4823: Implement str.capitalize() based on CPython + * PR #4824: Implement str.casefold() based on CPython + * PR #4825: Implement str.swapcase() based on CPython + * PR #4834: Implement str.rsplit() based on CPython + * PR #4842: Implement str.isdecimal + * PR #4843: Implement str.isdigit + * PR #4844: Implement str.isnumeric + * PR #4845: Implement str.partition() based on CPython + * PR #4849: Implement str.splitlines() based on CPython + * PR #4865: Implement str.replace + * PR #4867: Functionality extension str.startswith() based on CPython + * PR #4868: Add functionality for str.endswith() + +* PR #5039: Disable help messages. +* PR #4617: Add coverage for ``np.argwhere`` + +Fixes: + +* PR #4724: Only use lives (and not aliases) to create post parfor live set. +* PR #4998: Fix more spelling typos +* PR #5024: Propagate semantic constants ahead of static rewrites. +* PR #5027: Add Python 3.8 classifier to setup.py +* PR #5046: Update setup.py and buildscripts for dependency requirements +* PR #5053: Convert from arrays to names in define() and don't invalidate for + multiple consistent defines. +* PR #5058: Permit mixed int types in wrap_index +* PR #5078: Catch the use of global typed-list in JITed functions +* PR #5092: Fix #5087, bug in bytecode analysis. + +CUDA Enhancements/Fixes: + +* PR #4964: Fix #4628: Add more appropriate typing for CUDA device arrays +* PR #4975: Make ``device_array_like`` create contiguous arrays (Fixes #4832) +* PR #4997: State that CUDA Toolkit 8.0 required in docs +* PR #5007: test_consuming_strides: Keep dev array alive +* PR #5012: Fix IndexError when accessing the "-1" element of dummyarray +* PR #5014: Enable use of records and bools for shared memory, remove ddt, add + additional transpose tests +* PR #5016: Fix various issues in CUDA library search (Fixes #4979) +* PR #5023: Don't launch ForAll kernels with 0 elements (Fixes #5017) +* PR #5068: Remove Python 3.4 backports from utils + +Documentation Updates: + +* PR #5049: Clarify what dictionary means +* PR #5062: Update docs for updated version requirements +* PR #5090: Update deprecation notices for 0.48.0 + +CI updates: + +* PR #5029: Install optional dependencies for Python 3.8 tests +* PR #5040: Drop Py2.7 and Py3.5 from public CI +* PR #5048: Fix CI py38 + +Authors: + +* Brian Wignall +* Denis Smirnov +* Elena Totmenina +* Eric Wieser +* Ethan Pronovost +* Graham Markall +* James Bourbeau +* John Kirkham +* Leo Fang +* Marcel Bargull +* Maria Rubtsov +* Siu Kwan Lam (core dev) +* Stan Seibert (core dev) +* Stuart Archibald (core dev) +* Todd A. Anderson (core dev) +* Valentin Haenel (core dev) + + +Version 0.47.0 (Jan 2, 2020) +----------------------------- + +This release expands the capability of Numba in a number of important areas and +is also significant as it is the last major point release with support for +Python 2 and Python 3.5 included. The next release (0.48.0) will be for Python +3.6+ only! (This follows NumPy's deprecation schedule as specified in +`NEP 29 `_.) + +Highlights of core feature changes include: + +* Full support for Python 3.8 (Siu Kwan Lam) +* Opt-in bounds checking (Aaron Meurer) +* Support for ``map``, ``filter`` and ``reduce`` (Stuart Archibald) + +Intel also kindly sponsored research and development that lead to some exciting +new features: + +* Initial support for basic ``try``/``except`` use (Siu Kwan Lam) +* The ability to pass functions created from closures/lambdas as arguments + (Stuart Archibald) +* ``sorted`` and ``list.sort()`` now accept the ``key`` argument (Stuart + Archibald and Siu Kwan Lam) +* A new compiler pass triggered through the use of the function + ``numba.literal_unroll`` which permits iteration over heterogeneous tuples + and constant lists of constants. (Stuart Archibald) + +Enhancements from user contributed PRs (with thanks!): + +* Ankit Mahato added a reference to a new talk on Numba at PyCon India 2019 in + #4862 +* Brian Wignall kindly fixed some spelling mistakes and typos in #4909 +* Denis Smirnov wrote numerous methods to considerable enhance string support + including: + + * ``str.rindex()`` in #4861 + * ``str.isprintable()`` in #4836 + * ``str.index()`` in #4860 + * ``start/end`` parameters for ``str.find()`` in #4866 + * ``str.isspace()`` in #4835 + * ``str.isidentifier()`` #4837 + * ``str.rpartition()`` in #4841 + * ``str.lower()`` and ``str.islower()`` in #4651 + +* Elena Totmenina implemented both ``str.isalnum()``, ``str.isalpha()`` and + ``str.isascii`` in #4839, #4840 and #4847 respectively. +* Eric Larson fixed a bug in literal comparison in #4710 +* Ethan Pronovost updated the ``np.arange`` implementation in #4770 to allow + the use of the ``dtype`` key word argument and also added ``bool`` + implementations for several types in #4715. +* Graham Markall fixed some issues with the CUDA target, namely: + + * #4931: Added physical limits for CC 7.0 / 7.5 to CUDA autotune + * #4934: Fixed bugs in TestCudaWarpOperations + * #4938: Improved errors / warnings for the CUDA vectorize decorator + +* Guilherme Leobas fixed a typo in the ``urem`` implementation in #4667 +* Isaac Virshup contributed a number of patches that fixed bugs, added support + for more NumPy functions and enhanced Python feature support. These + contributions included: + + * #4729: Allow array construction with mixed type shape tuples + * #4904: Implementing ``np.lcm`` + * #4780: Implement np.gcd and math.gcd + * #4779: Make slice constructor more similar to python. + * #4707: Added support for slice.indices + * #4578: Clarify numba ufunc supported features + +* James Bourbeau fixed some issues with tooling, #4794 add ``setuptools`` as a + dependency and #4501 add pre-commit hooks for ``flake8`` compliance. +* Leo Fang made ``numba.dummyarray.Array`` iterable in #4629 +* Marc Garcia fixed the ``numba.jit`` parameter name signature_or_function in + #4703 +* Marcelo Duarte Trevisani patched the llvmlite requirement to ``>=0.30.0`` in + #4725 +* Matt Cooper fixed a long standing CI problem in #4737 by remove maxParallel +* Matti Picus fixed an issue with ``collections.abc`` in #4734 + from Azure Pipelines. +* Rob Ennis patched a bug in ``np.interp`` ``float32`` handling in #4911 +* VDimir fixed a bug in array transposition layouts in #4777 and re-enabled and + fixed some idle tests in #4776. +* Vyacheslav Smirnov Enable support for `str.istitle()`` in #4645 + +General Enhancements: + +* PR #4432: Bounds checking +* PR #4501: Add pre-commit hooks +* PR #4536: Handle kw args in inliner when callee is a function +* PR #4599: Permits closures to become functions, enables map(), filter() +* PR #4611: Implement method title() for unicode based on Cpython +* PR #4645: Enable support for istitle() method for unicode string +* PR #4651: Implement str.lower() and str.islower() +* PR #4652: Implement str.rfind() +* PR #4695: Refactor `overload*` and support `jit_options` and `inline` +* PR #4707: Added support for slice.indices +* PR #4715: Add `bool` overload for several types +* PR #4729: Allow array construction with mixed type shape tuples +* PR #4755: Python3.8 support +* PR #4756: Add parfor support for ndarray.fill. +* PR #4768: Update typeconv error message to ask for sys.executable. +* PR #4770: Update `np.arange` implementation with `@overload` +* PR #4779: Make slice constructor more similar to python. +* PR #4780: Implement np.gcd and math.gcd +* PR #4794: Add setuptools as a dependency +* PR #4802: put git hash into build string +* PR #4803: Better compiler error messages for improperly used reduction + variables. +* PR #4817: Typed list implement and expose allocation +* PR #4818: Typed list faster copy +* PR #4835: Implement str.isspace() based on CPython +* PR #4836: Implement str.isprintable() based on CPython +* PR #4837: Implement str.isidentifier() based on CPython +* PR #4839: Implement str.isalnum() based on CPython +* PR #4840: Implement str.isalpha() based on CPython +* PR #4841: Implement str.rpartition() based on CPython +* PR #4847: Implement str.isascii() based on CPython +* PR #4851: Add graphviz output for FunctionIR +* PR #4854: Python3.8 looplifting +* PR #4858: Implement str.expandtabs() based on CPython +* PR #4860: Implement str.index() based on CPython +* PR #4861: Implement str.rindex() based on CPython +* PR #4866: Support params start/end for str.find() +* PR #4874: Bump to llvmlite 0.31 +* PR #4896: Specialise arange dtype on arch + python version. +* PR #4902: basic support for try except +* PR #4904: Implement np.lcm +* PR #4910: loop canonicalisation and type aware tuple unroller/loop body + versioning passes +* PR #4961: Update hash(tuple) for Python 3.8. +* PR #4977: Implement sort/sorted with key. +* PR #4987: Add `is_internal` property to all Type classes. + +Fixes: + +* PR #4090: Update to LLVM8 memset/memcpy intrinsic +* PR #4582: Convert sub to add and div to mul when doing the reduction across + the per-thread reduction array. +* PR #4648: Handle 0 correctly as slice parameter. +* PR #4660: Remove multiply defined variables from all blocks' equivalence sets. +* PR #4672: Fix pickling of dufunc +* PR #4710: BUG: Comparison for literal +* PR #4718: Change get_call_table to support intermediate Vars. +* PR #4725: Requires llvmlite >=0.30.0 +* PR #4734: prefer to import from collections.abc +* PR #4736: fix flake8 errors +* PR #4776: Fix and enable idle tests from test_array_manipulation +* PR #4777: Fix transpose output array layout +* PR #4782: Fix issue with SVML (and knock-on function resolution effects). +* PR #4785: Treat 0d arrays like scalars. +* PR #4787: fix missing incref on flags +* PR #4789: fix typos in numba/targets/base.py +* PR #4791: fix typos +* PR #4811: fix spelling in now-failing tests +* PR #4852: windowing test should check equality only up to double precision + errors +* PR #4881: fix refining list by using extend on an iterator +* PR #4882: Fix return type in arange and zero step size handling. +* PR #4885: suppress spurious RuntimeWarning about ufunc sizes +* PR #4891: skip the xfail test for now. Py3.8 CFG refactor seems to have + changed the test case +* PR #4892: regex needs to accept singular form of "argument" +* PR #4901: fix typed list equals +* PR #4909: Fix some spelling typos +* PR #4911: np.interp bugfix for float32 handling +* PR #4920: fix creating list with JIT disabled +* PR #4921: fix creating dict with JIT disabled +* PR #4935: Better handling of prange with multiple reductions on the same + variable. +* PR #4946: Improve the error message for `raise `. +* PR #4955: Move overload of literal_unroll to avoid circular dependency that + breaks Python 2.7 +* PR #4962: Fix test error on windows +* PR #4973: Fixes a bug in the relabelling logic in literal_unroll. +* PR #4978: Fix overload_method problem with stararg +* PR #4981: Add ind_to_const to enable fewer equivalence classes. +* PR #4991: Continuation of #4588 (Let dead code removal handle removing more of + the unneeded code after prange conversion to parfor) +* PR #4994: Remove xfail for test which has since had underlying issue fixed. +* PR #5018: Fix #5011. +* PR #5019: skip pycc test on Python 3.8 + macOS because of distutils issue + +CUDA Enhancements/Fixes: + +* PR #4629: Make numba.dummyarray.Array iterable +* PR #4675: Bump cuda array interface to version 2 +* PR #4741: Update choosing the "CUDA_PATH" for windows +* PR #4838: Permit ravel('A') for contig device arrays in CUDA target +* PR #4931: Add physical limits for CC 7.0 / 7.5 to autotune +* PR #4934: Fix fails in TestCudaWarpOperations +* PR #4938: Improve errors / warnings for cuda vectorize decorator + +Documentation Updates: + +* PR #4418: Directed graph task roadmap +* PR #4578: Clarify numba ufunc supported features +* PR #4655: fix sphinx build warning +* PR #4667: Fix typo on urem implementation +* PR #4669: Add link to ParallelAccelerator paper. +* PR #4703: Fix numba.jit parameter name signature_or_function +* PR #4862: Addition of PyCon India 2019 talk on Numba +* PR #4947: Document jitclass with numba.typed use. +* PR #4958: Add docs for `try..except` +* PR #4993: Update deprecations for 0.47 + +CI Updates: + +* PR #4737: remove maxParallel from Azure Pipelines +* PR #4767: pin to 2.7.16 for py27 on osx +* PR #4781: WIP/runtest cf pytest + +Authors: + +* Aaron Meurer +* Ankit Mahato +* Brian Wignall +* Denis Smirnov +* Ehsan Totoni (core dev) +* Elena Totmenina +* Eric Larson +* Ethan Pronovost +* Giovanni Cavallin +* Graham Markall +* Guilherme Leobas +* Isaac Virshup +* James Bourbeau +* Leo Fang +* Marc Garcia +* Marcelo Duarte Trevisani +* Matt Cooper +* Matti Picus +* Rob Ennis +* Rujal Desai +* Siu Kwan Lam (core dev) +* Stan Seibert (core dev) +* Stuart Archibald (core dev) +* Todd A. Anderson (core dev) +* VDimir +* Valentin Haenel (core dev) +* Vyacheslav Smirnov + + +Version 0.46.0 +-------------- + +This release significantly reworked one of the main parts of Numba, the compiler +pipeline, to make it more extensible and easier to use. The purpose of this was +to continue enhancing Numba's ability for use as a compiler toolkit. In a +similar vein, Numba now has an extension registration mechanism to allow other +Numba-using projects to automatically have their Numba JIT compilable functions +discovered. There were also a number of other related compiler toolkit +enhancement added along with some more NumPy features and a lot of bug fixes. + +This release has updated the CUDA Array Interface specification to version 2, +which clarifies the `strides` attribute for C-contiguous arrays and specifies +the treatment for zero-size arrays. The implementation in Numba has been +changed and may affect downstream packages relying on the old behavior +(see issue #4661). + +Enhancements from user contributed PRs (with thanks!): + +* Aaron Meurer fixed some Python issues in the code base in #4345 and #4341. +* Ashwin Srinath fixed a CUDA performance bug via #4576. +* Ethan Pronovost added support for triangular indices functions in #4601 (the + NumPy functions ``tril_indices``, ``tril_indices_from``, ``triu_indices``, and + ``triu_indices_from``). +* Gerald Dalley fixed a tear down race occurring in Python 2. +* Gregory R. Lee fixed the use of deprecated ``inspect.getargspec``. +* Guilherme Leobas contributed five PRs, adding support for ``np.append`` and + ``np.count_nonzero`` in #4518 and #4386. The typed List was fixed to accept + unsigned integers in #4510. #4463 made a fix to NamedTuple internals and #4397 + updated the docs for ``np.sum``. +* James Bourbeau added a new feature to permit the automatic application of the + `jit` decorator to a whole module in #4331. Also some small fixes to the docs + and the code base were made in #4447 and #4433, and a fix to inplace array + operation in #4228. +* Jim Crist fixed a bug in the rendering of patched errors in #4464. +* Leo Fang updated the CUDA Array Interface contract in #4609. +* Pearu Peterson added support for Unicode based NumPy arrays in #4425. +* Peter Andreas Entschev fixed a CUDA concurrency bug in #4581. +* Lucio Fernandez-Arjona extended Numba's ``np.sum`` support to now accept the + ``dtype`` kwarg in #4472. +* Pedro A. Morales Maries added support for ``np.cross`` in #4128 and also added + the necessary extension ``numba.numpy_extensions.cross2d`` in #4595. +* David Hoese, Eric Firing, Joshua Adelman, and Juan Nunez-Iglesias all made + documentation fixes in #4565, #4482, #4455, #4375 respectively. +* Vyacheslav Smirnov and Rujal Desai enabled support for ``count()`` on unicode + strings in #4606. + +General Enhancements: + +* PR #4113: Add rewrite for semantic constants. +* PR #4128: Add np.cross support +* PR #4162: Make IR comparable and legalize it. +* PR #4208: R&D inlining, jitted and overloaded. +* PR #4331: Automatic JIT of called functions +* PR #4353: Inspection tool to check what numba supports +* PR #4386: Implement np.count_nonzero +* PR #4425: Unicode array support +* PR #4427: Entrypoints for numba extensions +* PR #4467: Literal dispatch +* PR #4472: Allow dtype input argument in np.sum +* PR #4513: New compiler. +* PR #4518: add support for np.append +* PR #4554: Refactor NRT C-API +* PR #4556: 0.46 scheduled deprecations +* PR #4567: Add env var to disable performance warnings. +* PR #4568: add np.array_equal support +* PR #4595: Implement numba.cross2d +* PR #4601: Add triangular indices functions +* PR #4606: Enable support for count() method for unicode string + +Fixes: + +* PR #4228: Fix inplace operator error for arrays +* PR #4282: Detect and raise unsupported on generator expressions +* PR #4305: Don't allow the allocation of mutable objects written into a + container to be hoisted. +* PR #4311: Avoid deprecated use of inspect.getargspec +* PR #4328: Replace GC macro with function call +* PR #4330: Loosen up typed container casting checks +* PR #4341: Fix some coding lines at the top of some files (utf8 -> utf-8) +* PR #4345: Replace "import \*" with explicit imports in numba/types +* PR #4346: Fix incorrect alg in isupper for ascii strings. +* PR #4349: test using jitclass in typed-list +* PR #4361: Add allocation hoisting info to LICM section at diagnostic L4 +* PR #4366: Offset search box to avoid wrapping on some pages with Safari. + Fixes #4365. +* PR #4372: Replace all "except BaseException" with "except Exception". +* PR #4407: Restore the "free" conda channel for NumPy 1.10 support. +* PR #4408: Add lowering for constant bytes. +* PR #4409: Add exception chaining for better error context +* PR #4411: Name of type should not contain user facing description for debug. +* PR #4412: Fix #4387. Limit the number of return types for recursive functions +* PR #4426: Fixed two module teardown races in py2. +* PR #4431: Fix and test numpy.random.random_sample(n) for np117 +* PR #4463: NamedTuple - Raises an error on non-iterable elements +* PR #4464: Add a newline in patched errors +* PR #4474: Fix liveness for remove dead of parfors (and other IR extensions) +* PR #4510: Make List.__getitem__ accept unsigned parameters +* PR #4512: Raise specific error at typing time for iteration on >1D array. +* PR #4532: Fix static_getitem with Literal type as index +* PR #4547: Update to inliner cost model information. +* PR #4557: Use specific random number seed when generating arbitrary test data +* PR #4559: Adjust test timeouts +* PR #4564: Skip unicode array tests on ppc64le that trigger an LLVM bug +* PR #4621: Fix packaging issue due to missing numba/cext +* PR #4623: Fix issue 4520 due to storage model mismatch +* PR #4644: Updates for llvmlite 0.30.0 + +CUDA Enhancements/Fixes: + +* PR #4410: Fix #4111. cudasim mishandling recarray +* PR #4576: Replace use of `np.prod` with `functools.reduce` for computing size + from shape +* PR #4581: Prevent taking the GIL in ForAll +* PR #4592: Fix #4589. Just pass NULL for b2d_func for constant dynamic + sharedmem +* PR #4609: Update CUDA Array Interface & Enforce Numba compliance +* PR #4619: Implement math.{degrees, radians} for the CUDA target. +* PR #4675: Bump cuda array interface to version 2 + +Documentation Updates: + +* PR #4317: Add docs for ARMv8/AArch64 +* PR #4318: Add supported platforms to the docs. Closes #4316 +* PR #4375: Add docstrings to inspect methods +* PR #4388: Update Python 2.7 EOL statement +* PR #4397: Add note about np.sum +* PR #4447: Minor parallel performance tips edits +* PR #4455: Clarify docs for typed dict with regard to arrays +* PR #4482: Fix example in guvectorize docstring. +* PR #4541: fix two typos in architecture.rst +* PR #4548: Document numba.extending.intrinsic and inlining. +* PR #4565: Fix typo in jit-compilation docs +* PR #4607: add dependency list to docs +* PR #4614: Add documentation for implementing new compiler passes. + +CI Updates: + +* PR #4415: Make 32bit incremental builds on linux not use free channel +* PR #4433: Removes stale azure comment +* PR #4493: Fix Overload Inliner wrt CUDA Intrinsics +* PR #4593: Enable Azure CI batching + +Contributors: + +* Aaron Meurer +* Ashwin Srinath +* David Hoese +* Ehsan Totoni (core dev) +* Eric Firing +* Ethan Pronovost +* Gerald Dalley +* Gregory R. Lee +* Guilherme Leobas +* James Bourbeau +* Jim Crist +* Joshua Adelman +* Juan Nunez-Iglesias +* Leo Fang +* Lucio Fernandez-Arjona +* Pearu Peterson +* Pedro A. Morales Marie +* Peter Andreas Entschev +* Rujal Desai +* Siu Kwan Lam (core dev) +* Stan Seibert (core dev) +* Stuart Archibald (core dev) +* Todd A. Anderson (core dev) +* Valentin Haenel (core dev) +* Vyacheslav Smirnov + + +Version 0.45.1 +-------------- + +This patch release addresses some regressions reported in the 0.45.0 release and +adds support for NumPy 1.17: + +* PR #4325: accept scalar/0d-arrays +* PR #4338: Fix #4299. Parfors reduction vars not deleted. +* PR #4350: Use process level locks for fork() only. +* PR #4354: Try to fix #4352. +* PR #4357: Fix np1.17 isnan, isinf, isfinite ufuncs +* PR #4363: Fix np.interp for np1.17 nan handling +* PR #4371: Fix nump1.17 random function non-aliasing + +Contributors: + +* Siu Kwan Lam (core dev) +* Stuart Archibald (core dev) +* Valentin Haenel (core dev) + + +Version 0.45.0 +-------------- + +In this release, Numba gained an experimental :ref:`numba.typed.List +` container as a future replacement of the :ref:`reflected +list `. In addition, functions decorated with +``parallel=True`` can now be cached to reduce compilation overhead associated +with the auto-parallelization. + + +Enhancements from user contributed PRs (with thanks!): + +* James Bourbeau added the Numba version to reportable error messages in #4227, + added the ``signature`` parameter to ``inspect_types`` in #4200, improved the + docstring of ``normalize_signature`` in #4205, and fixed #3658 by adding + reference counting to ``register_dispatcher`` in #4254 + +* Guilherme Leobas implemented the dominator tree and dominance frontier + algorithms in #4216 and #4149, respectively. + +* Nick White fixed the issue with ``round`` in the CUDA target in #4137. + +* Joshua Adelman added support for determining if a value is in a `range` + (i.e. ``x in range(...)``) in #4129, and added windowing functions + (``np.bartlett``, ``np.hamming``, ``np.blackman``, ``np.hanning``, + ``np.kaiser``) from NumPy in #4076. + +* Lucio Fernandez-Arjona added support for ``np.select`` in #4077 + +* Rob Ennis added support for ``np.flatnonzero`` in #4157 + +* Keith Kraus extended the ``__cuda_array_interface__`` with an optional mask + attribute in #4199. + +* Gregory R. Lee replaced deprecated use of ``inspect.getargspec`` in #4311. + + +General Enhancements: + +* PR #4328: Replace GC macro with function call +* PR #4311: Avoid deprecated use of inspect.getargspec +* PR #4296: Slacken window function testing tol on ppc64le +* PR #4254: Add reference counting to register_dispatcher +* PR #4239: Support len() of multi-dim arrays in array analysis +* PR #4234: Raise informative error for np.kron array order +* PR #4232: Add unicodetype db, low level str functions and examples. +* PR #4229: Make hashing cacheable +* PR #4227: Include numba version in reportable error message +* PR #4216: Add dominator tree +* PR #4200: Add signature parameter to inspect_types +* PR #4196: Catch missing imports of internal functions. +* PR #4180: Update use of unlowerable global message. +* PR #4166: Add tests for PR #4149 +* PR #4157: Support for np.flatnonzero +* PR #4149: Implement dominance frontier for SSA for the Numba IR +* PR #4148: Call branch pruning in inline_closure_call() +* PR #4132: Reduce usage of inttoptr +* PR #4129: Support contains for range +* PR #4112: better error messages for np.transpose and tuples +* PR #4110: Add range attrs, start, stop, step +* PR #4077: Add np select +* PR #4076: Add numpy windowing functions support (np.bartlett, np.hamming, + np.blackman, np.hanning, np.kaiser) +* PR #4095: Support ir.Global/FreeVar in find_const() +* PR #3691: Make TypingError abort compiling earlier +* PR #3646: Log internal errors encountered in typeinfer + +Fixes: + +* PR #4303: Work around scipy bug 10206 +* PR #4302: Fix flake8 issue on master +* PR #4301: Fix integer literal bug in np.select impl +* PR #4291: Fix pickling of jitclass type +* PR #4262: Resolves #4251 - Fix bug in reshape analysis. +* PR #4233: Fixes issue revealed by #4215 +* PR #4224: Fix #4223. Looplifting error due to StaticSetItem in objectmode +* PR #4222: Fix bad python path. +* PR #4178: Fix unary operator overload, check with unicode impl +* PR #4173: Fix return type in np.bincount with weights +* PR #4153: Fix slice shape assignment in array analysis +* PR #4152: fix status check in dict lookup +* PR #4145: Use callable instead of checking __module__ +* PR #4118: Fix inline assembly support on CPU. +* PR #4088: Resolves #4075 - parfors array_analysis bug. +* PR #4085: Resolves #3314 - parfors array_analysis bug with reshape. + +CUDA Enhancements/Fixes: + +* PR #4199: Extend `__cuda_array_interface__` with optional mask attribute, + bump version to 1 +* PR #4137: CUDA - Fix round Builtin +* PR #4114: Support 3rd party activated CUDA context + +Documentation Updates: + +* PR #4317: Add docs for ARMv8/AArch64 +* PR #4318: Add supported platforms to the docs. Closes #4316 +* PR #4295: Alter deprecation schedules +* PR #4253: fix typo in pysupported docs +* PR #4252: fix typo on repomap +* PR #4241: remove unused import +* PR #4240: fix typo in jitclass docs +* PR #4205: Update return value order in normalize_signature docstring +* PR #4237: Update doc links to point to latest not dev docs. +* PR #4197: hyperlink repomap +* PR #4170: Clarify docs on accumulating into arrays in prange +* PR #4147: fix docstring for DictType iterables +* PR #3951: A guide to overloading + +CI Updates: + +* PR #4300: AArch64 has no faulthandler package +* PR #4273: pin to MKL BLAS for testing to get consistent results +* PR #4209: Revert previous network tol patch and try with conda config +* PR #4138: Remove tbb before Azure test only on Python 3, since it was already + removed for Python 2 + +Contributors: + +* Ehsan Totoni (core dev) +* Gregory R. Lee +* Guilherme Leobas +* James Bourbeau +* Joshua L. Adelman +* Keith Kraus +* Lucio Fernandez-Arjona +* Nick White +* Rob Ennis +* Siu Kwan Lam (core dev) +* Stan Seibert (core dev) +* Stuart Archibald (core dev) +* Todd A. Anderson (core dev) +* Valentin Haenel (core dev) + + +Version 0.44.1 +-------------- + +This patch release addresses some regressions reported in the 0.44.0 release: + +- PR #4165: Fix #4164 issue with NUMBAPRO_NVVM. +- PR #4172: Abandon branch pruning if an arg name is redefined. (Fixes #4163) +- PR #4183: Fix #4156. Problem with defining in-loop variables. + + +Version 0.44.0 +-------------- + +IMPORTANT: In this release a few significant deprecations (and some less +significant ones) are being made, users are encouraged to read the related +documentation. + +General enhancements in this release include: + +- Numba is backed by LLVM 8 on all platforms apart from ppc64le, which, due to + bugs, remains on the LLVM 7.x series. +- Numba's dictionary support now includes type inference for keys and values. +- The .view() method now works for NumPy scalar types. +- Newly supported NumPy functions added: np.delete, np.nanquantile, np.quantile, + np.repeat, np.shape. + +In addition considerable effort has been made to fix some long standing bugs and +a large number of other bugs, the "Fixes" section is very large this time! + +Enhancements from user contributed PRs (with thanks!): + +- Max Bolingbroke added support for the selective use of ``fastmath`` flags in + #3847. +- Rob Ennis made min() and max() work on iterables in #3820 and added + np.quantile and np.nanquantile in #3899. +- Sergey Shalnov added numerous unicode string related features, zfill in #3978, + ljust in #4001, rjust and center in #4044 and strip, lstrip and rstrip in + #4048. +- Guilherme Leobas added support for np.delete in #3890 +- Christoph Deil exposed the Numba CLI via ``python -m numba`` in #4066 and made + numerous documentation fixes. +- Leo Schwarz wrote the bulk of the code for jitclass default constructor + arguments in #3852. +- Nick White enhanced the CUDA backend to use min/max PTX instructions where + possible in #4054. +- Lucio Fernandez-Arjona implemented the unicode string ``__mul__`` function in + #3952. +- Dimitri Vorona wrote the bulk of the code to implement getitem and setitem for + jitclass in #3861. + +General Enhancements: + +* PR #3820: Min max on iterables +* PR #3842: Unicode type iteration +* PR #3847: Allow fine-grained control of fastmath flags to partially address #2923 +* PR #3852: Continuation of PR #2894 +* PR #3861: Continuation of PR #3730 +* PR #3890: Add support for np.delete +* PR #3899: Support for np.quantile and np.nanquantile +* PR #3900: Fix 3457 :: Implements np.repeat +* PR #3928: Add .view() method for NumPy scalars +* PR #3939: Update icc_rt clone recipe. +* PR #3952: __mul__ for strings, initial implementation and tests +* PR #3956: Type-inferred dictionary +* PR #3959: Create a view for string slicing to avoid extra allocations +* PR #3978: zfill operation implementation +* PR #4001: ljust operation implementation +* PR #4010: Support `dict()` and `{}` +* PR #4022: Support for llvm 8 +* PR #4034: Make type.Optional str more representative +* PR #4041: Deprecation warnings +* PR #4044: rjust and center operations implementation +* PR #4048: strip, lstrip and rstrip operations implementation +* PR #4066: Expose numba CLI via python -m numba +* PR #4081: Impl `np.shape` and support function for `asarray`. +* PR #4091: Deprecate the use of iternext_impl without RefType + +CUDA Enhancements/Fixes: + +* PR #3933: Adds `.nbytes` property to CUDA device array objects. +* PR #4011: Add .inspect_ptx() to cuda device function +* PR #4054: CUDA: Use min/max PTX Instructions +* PR #4096: Update env-vars for CUDA libraries lookup + +Documentation Updates: + +* PR #3867: Code repository map +* PR #3918: adding Joris' Fosdem 2019 presentation +* PR #3926: order talks on applications of Numba by date +* PR #3943: fix two small typos in vectorize docs +* PR #3944: Fixup jitclass docs +* PR #3990: mention preprint repo in FAQ. Fixes #3981 +* PR #4012: Correct runtests command in contributing.rst +* PR #4043: fix typo +* PR #4047: Ambiguous Documentation fix for guvectorize. +* PR #4060: Remove remaining mentions of autojit in docs +* PR #4063: Fix annotate example in docstring +* PR #4065: Add FAQ entry explaining Numba project name +* PR #4079: Add Documentation for atomicity of typed.Dict +* PR #4105: Remove info about CUDA ENVVAR potential replacement + +Fixes: + +* PR #3719: Resolves issue #3528. Adds support for slices when not using parallel=True. +* PR #3727: Remove dels for known dead vars. +* PR #3845: Fix mutable flag transmission in .astype +* PR #3853: Fix some minor issues in the C source. +* PR #3862: Correct boolean reinterpretation of data +* PR #3863: Comments out the appveyor badge +* PR #3869: fixes flake8 after merge +* PR #3871: Add assert to ir.py to help enforce correct structuring +* PR #3881: fix preparfor dtype transform for datetime64 +* PR #3884: Prevent mutation of objmode fallback IR. +* PR #3885: Updates for llvmlite 0.29 +* PR #3886: Use `safe_load` from pyyaml. +* PR #3887: Add tolerance to network errors by permitting conda to retry +* PR #3893: Fix casting in namedtuple ctor. +* PR #3894: Fix array inliner for multiple array definition. +* PR #3905: Cherrypick #3903 to main +* PR #3920: Raise better error if unsupported jump opcode found. +* PR #3927: Apply flake8 to the numpy related files +* PR #3935: Silence DeprecationWarning +* PR #3938: Better error message for unknown opcode +* PR #3941: Fix typing of ufuncs in parfor conversion +* PR #3946: Return variable renaming dict from inline_closurecall +* PR #3962: Fix bug in alignment computation of `Record.make_c_struct` +* PR #3967: Fix error with pickling unicode +* PR #3964: Unicode split algo versioning +* PR #3975: Add handler for unknown locale to numba -s +* PR #3991: Permit Optionals in ufunc machinery +* PR #3995: Remove assert in type inference causing poor error message. +* PR #3996: add is_ascii flag to UnicodeType +* PR #4009: Prevent zero division error in np.linalg.cond +* PR #4014: Resolves #4007. +* PR #4021: Add a more specific error message for invalid write to a global. +* PR #4023: Fix handling of titles in record dtype +* PR #4024: Do a check if a call is const before saying that an object is multiply defined. +* PR #4027: Fix issue #4020. Turn off no_cpython_wrapper flag when compiling for… +* PR #4033: [WIP] Fixing wrong dtype of array inside reflected list #4028 +* PR #4061: Change IPython cache dir name to numba_cache +* PR #4067: Delete examples/notebooks/LinearRegr.py +* PR #4070: Catch writes to global typed.Dict and raise. +* PR #4078: Check tuple length +* PR #4084: Fix missing incref on optional return None +* PR #4089: Make the warnings fixer flush work for warning comparing on type. +* PR #4094: Fix function definition finding logic for commented def +* PR #4100: Fix alignment check on 32-bit. +* PR #4104: Use PEP 508 compliant env markers for install deps + +Contributors: + +* Benjamin Zaitlen +* Christoph Deil +* David Hirschfeld +* Dimitri Vorona +* Ehsan Totoni (core dev) +* Guilherme Leobas +* Leo Schwarz +* Lucio Fernandez-Arjona +* Max Bolingbroke +* NanduTej +* Nick White +* Ravi Teja Gutta +* Rob Ennis +* Sergey Shalnov +* Siu Kwan Lam (core dev) +* Stan Seibert (core dev) +* Stuart Archibald (core dev) +* Todd A. Anderson (core dev) +* Valentin Haenel (core dev) + + +Version 0.43.1 +-------------- + +This is a bugfix release that provides minor changes to fix: a bug in branch +pruning, bugs in `np.interp` functionality, and also fully accommodate the +NumPy 1.16 release series. + +* PR #3826: NumPy 1.16 support +* PR #3850: Refactor np.interp +* PR #3883: Rewrite pruned conditionals as their evaluated constants. + +Contributors: + +* Rob Ennis +* Siu Kwan Lam (core dev) +* Stuart Archibald (core dev) + + +Version 0.43.0 +-------------- + +In this release, the major new features are: + +- Initial support for statically typed dictionaries +- Improvements to `hash()` to match Python 3 behavior +- Support for the heapq module +- Ability to pass C structs to Numba +- More NumPy functions: asarray, trapz, roll, ptp, extract + + +NOTE: + +The vast majority of NumPy 1.16 behaviour is supported, however +``datetime`` and ``timedelta`` use involving ``NaT`` matches the behaviour +present in earlier release. The ufunc suite has not been extending to +accommodate the two new time computation related additions present in NumPy +1.16. In addition the functions ``ediff1d`` and ``interp`` have known minor +issues in replicating outputs exactly when ``NaN``'s occur in certain input +patterns. + +General Enhancements: + +* PR #3563: Support for np.roll +* PR #3572: Support for np.ptp +* PR #3592: Add dead branch prune before type inference. +* PR #3598: Implement np.asarray() +* PR #3604: Support for np.interp +* PR #3607: Some simplication to lowering +* PR #3612: Exact match flag in dispatcher +* PR #3627: Support for np.trapz +* PR #3630: np.where with broadcasting +* PR #3633: Support for np.extract +* PR #3657: np.max, np.min, np.nanmax, np.nanmin - support for complex dtypes +* PR #3661: Access C Struct as Numpy Structured Array +* PR #3678: Support for str.split and str.join +* PR #3684: Support C array in C struct +* PR #3696: Add intrinsic to help debug refcount +* PR #3703: Implementations of type hashing. +* PR #3715: Port CPython3.7 dictionary for numba internal use +* PR #3716: Support inplace concat of strings +* PR #3718: Add location to ConstantInferenceError exceptions. +* PR #3720: improve error msg about invalid signature +* PR #3731: Support for heapq +* PR #3754: Updates for llvmlite 0.28 +* PR #3760: Overloadable operator.setitem +* PR #3775: Support overloading operator.delitem +* PR #3777: Implement compiler support for dictionary +* PR #3791: Implement interpreter-side interface for numba dict +* PR #3799: Support refcount'ed types in numba dict + +CUDA Enhancements/Fixes: + +* PR #3713: Fix the NvvmSupportError message when CC too low +* PR #3722: Fix #3705: slicing error with negative strides +* PR #3755: Make cuda.to_device accept readonly host array +* PR #3773: Adapt library search to accommodate multiple locations + +Documentation Updates: + +* PR #3651: fix link to berryconda in docs +* PR #3668: Add Azure Pipelines build badge +* PR #3749: DOC: Clarify when prange is different from range +* PR #3771: fix a few typos +* PR #3785: Clarify use of range as function only. +* PR #3829: Add docs for typed-dict + +Fixes: + +* PR #3614: Resolve #3586 +* PR #3618: Skip gdb tests on ARM. +* PR #3643: Remove support_literals usage +* PR #3645: Enforce and fix that AbstractTemplate.generic must be returning a Signature +* PR #3648: Fail on @overload signature mismatch. +* PR #3660: Added Ignore message to test numba.tests.test_lists.TestLists.test_mul_error +* PR #3662: Replace six with numba.six +* PR #3663: Removes coverage computation from travisci builds +* PR #3672: Avoid leaking memory when iterating over uniform tuple +* PR #3676: Fixes constant string lowering inside tuples +* PR #3677: Ensure all referenced compiled functions are linked properly +* PR #3692: Fix test failure due to overly strict test on floating point values. +* PR #3693: Intercept failed import to help users. +* PR #3694: Fix memory leak in enumerate iterator +* PR #3695: Convert return of None from intrinsic implementation to dummy value +* PR #3697: Fix for issue #3687 +* PR #3701: Fix array.T analysis (fixes #3700) +* PR #3704: Fixes for overload_method +* PR #3706: Don't push call vars recursively into nested parfors. Resolves #3686. +* PR #3710: Set as non-hoistable if a mutable variable is passed to a function in a loop. Resolves #3699. +* PR #3712: parallel=True to use better builtin mechanism to resolve call types. Resolves issue #3671 +* PR #3725: Fix invalid removal of dead empty list +* PR #3740: add uintp as a valid type to the tuple operator.getitem +* PR #3758: Fix target definition update in inlining +* PR #3782: Raise typing error on yield optional. +* PR #3792: Fix non-module object used as the module of a function. +* PR #3800: Bugfix for np.interp +* PR #3808: Bump macro to include VS2014 to fix py3.5 build +* PR #3809: Add debug guard to debug only C function. +* PR #3816: Fix array.sum(axis) 1d input return type. +* PR #3821: Replace PySys_WriteStdout with PySys_FormatStdout to ensure no truncation. +* PR #3830: Getitem should not return optional type +* PR #3832: Handle single string as path in find_file() + +Contributors: + +* Ehsan Totoni +* Gryllos Prokopis +* Jonathan J. Helmus +* Kayla Ngan +* lalitparate +* luk-f-a +* Matyt +* Max Bolingbroke +* Michael Seifert +* Rob Ennis +* Siu Kwan Lam +* Stan Seibert +* Stuart Archibald +* Todd A. Anderson +* Tao He +* Valentin Haenel + + +Version 0.42.1 +-------------- + +Bugfix release to fix the incorrect hash in OSX wheel packages. +No change in source code. + + +Version 0.42.0 +-------------- + +In this release the major features are: + +- The capability to launch and attach the GDB debugger from within a jitted + function. +- The upgrading of LLVM to version 7.0.0. + +We added a draft of the project roadmap to the developer manual. The roadmap is +for informational purposes only as priorities and resources may change. + +Here are some enhancements from contributed PRs: + +- #3532. Daniel Wennberg improved the ``cuda.{pinned, mapped}`` API so that + the associated memory is released immediately at the exit of the context + manager. +- #3531. Dimitri Vorona enabled the inlining of jitclass methods. +- #3516. Simon Perkins added the support for passing numpy dtypes (i.e. + ``np.dtype("int32")``) and their type constructor (i.e. ``np.int32``) into + a jitted function. +- #3509. Rob Ennis added support for ``np.corrcoef``. + +A regression issue (#3554, #3461) relating to making an empty slice in parallel +mode is resolved by #3558. + +General Enhancements: + +* PR #3392: Launch and attach gdb directly from Numba. +* PR #3437: Changes to accommodate LLVM 7.0.x +* PR #3509: Support for np.corrcoef +* PR #3516: Typeof dtype values +* PR #3520: Fix @stencil ignoring cval if out kwarg supplied. +* PR #3531: Fix jitclass method inlining and avoid unnecessary increfs +* PR #3538: Avoid future C-level assertion error due to invalid visibility +* PR #3543: Avoid implementation error being hidden by the try-except +* PR #3544: Add `long_running` test flag and feature to exclude tests. +* PR #3549: ParallelAccelerator caching improvements +* PR #3558: Fixes array analysis for inplace binary operators. +* PR #3566: Skip alignment tests on armv7l. +* PR #3567: Fix unifying literal types in namedtuple +* PR #3576: Add special copy routine for NumPy out arrays +* PR #3577: Fix example and docs typos for `objmode` context manager. + reorder statements. +* PR #3580: Use alias information when determining whether it is safe to +* PR #3583: Use `ir.unknown_loc` for unknown `Loc`, as #3390 with tests +* PR #3587: Fix llvm.memset usage changes in llvm7 +* PR #3596: Fix Array Analysis for Global Namedtuples +* PR #3597: Warn users if threading backend init unsafe. +* PR #3605: Add guard for writing to read only arrays from ufunc calls +* PR #3606: Improve the accuracy of error message wording for undefined type. +* PR #3611: gdb test guard needs to ack ptrace permissions +* PR #3616: Skip gdb tests on ARM. + +CUDA Enhancements: + +* PR #3532: Unregister temporarily pinned host arrays at once +* PR #3552: Handle broadcast arrays correctly in host->device transfer. +* PR #3578: Align cuda and cuda simulator kwarg names. + +Documentation Updates: + +* PR #3545: Fix @njit description in 5 min guide +* PR #3570: Minor documentation fixes for numba.cuda +* PR #3581: Fixing minor typo in `reference/types.rst` +* PR #3594: Changing `@stencil` docs to correctly reflect `func_or_mode` param +* PR #3617: Draft roadmap as of Dec 2018 + +Contributors: + +* Aaron Critchley +* Daniel Wennberg +* Dimitri Vorona +* Dominik Stańczak +* Ehsan Totoni (core dev) +* Iskander Sharipov +* Rob Ennis +* Simon Muller +* Simon Perkins +* Siu Kwan Lam (core dev) +* Stan Seibert (core dev) +* Stuart Archibald (core dev) +* Todd A. Anderson (core dev) + + +Version 0.41.0 +-------------- + +This release adds the following major features: + +* Diagnostics showing the optimizations done by ParallelAccelerator +* Support for profiling Numba-compiled functions in Intel VTune +* Additional NumPy functions: partition, nancumsum, nancumprod, ediff1d, cov, + conj, conjugate, tri, tril, triu +* Initial support for Python 3 Unicode strings + +General Enhancements: + +* PR #1968: armv7 support +* PR #2983: invert mapping b/w binop operators and the operator module #2297 +* PR #3160: First attempt at parallel diagnostics +* PR #3307: Adding NUMBA_ENABLE_PROFILING envvar, enabling jit event +* PR #3320: Support for np.partition +* PR #3324: Support for np.nancumsum and np.nancumprod +* PR #3325: Add location information to exceptions. +* PR #3337: Support for np.ediff1d +* PR #3345: Support for np.cov +* PR #3348: Support user pipeline class in with lifting +* PR #3363: string support +* PR #3373: Improve error message for empty imprecise lists. +* PR #3375: Enable overload(operator.getitem) +* PR #3402: Support negative indexing in tuple. +* PR #3414: Refactor Const type +* PR #3416: Optimized usage of alloca out of the loop +* PR #3424: Updates for llvmlite 0.26 +* PR #3462: Add support for `np.conj/np.conjugate`. +* PR #3480: np.tri, np.tril, np.triu - default optional args +* PR #3481: Permit dtype argument as sole kwarg in np.eye + +CUDA Enhancements: + +* PR #3399: Add max_registers Option to cuda.jit + +Continuous Integration / Testing: + +* PR #3303: CI with Azure Pipelines +* PR #3309: Workaround race condition with apt +* PR #3371: Fix issues with Azure Pipelines +* PR #3362: Fix #3360: `RuntimeWarning: 'numba.runtests' found in sys.modules` +* PR #3374: Disable openmp in wheel building +* PR #3404: Azure Pipelines templates +* PR #3419: Fix cuda tests and error reporting in test discovery +* PR #3491: Prevent faulthandler installation on armv7l +* PR #3493: Fix CUDA test that used negative indexing behaviour that's fixed. +* PR #3495: Start Flake8 checking of Numba source + +Fixes: + +* PR #2950: Fix dispatcher to only consider contiguous-ness. +* PR #3124: Fix 3119, raise for 0d arrays in reductions +* PR #3228: Reduce redundant module linking +* PR #3329: Fix AOT on windows. +* PR #3335: Fix memory management of __cuda_array_interface__ views. +* PR #3340: Fix typo in error name. +* PR #3365: Fix the default unboxing logic +* PR #3367: Allow non-global reference to objmode() context-manager +* PR #3381: Fix global reference in objmode for dynamically created function +* PR #3382: CUDA_ERROR_MISALIGNED_ADDRESS Using Multiple Const Arrays +* PR #3384: Correctly handle very old versions of colorama +* PR #3394: Add 32bit package guard for non-32bit installs +* PR #3397: Fix with-objmode warning +* PR #3403 Fix label offset in call inline after parfor pass +* PR #3429: Fixes raising of user defined exceptions for exec(). +* PR #3432: Fix error due to function naming in CI in py2.7 +* PR #3444: Fixed TBB's single thread execution and test added for #3440 +* PR #3449: Allow matching non-array objects in find_callname() +* PR #3455: Change getiter and iternext to not be pure. Resolves #3425 +* PR #3467: Make ir.UndefinedType singleton class. +* PR #3478: Fix np.random.shuffle sideeffect +* PR #3487: Raise unsupported for kwargs given to `print()` +* PR #3488: Remove dead script. +* PR #3498: Fix stencil support for boolean as return type +* PR #3511: Fix handling make_function literals (regression of #3414) +* PR #3514: Add missing unicode != unicode +* PR #3527: Fix complex math sqrt implementation for large -ve values +* PR #3530: This adds arg an check for the pattern supplied to Parfors. +* PR #3536: Sets list dtor linkage to `linkonce_odr` to fix visibility in AOT + +Documentation Updates: + +* PR #3316: Update 0.40 changelog with additional PRs +* PR #3318: Tweak spacing to avoid search box wrapping onto second line +* PR #3321: Add note about memory leaks with exceptions to docs. Fixes #3263 +* PR #3322: Add FAQ on CUDA + fork issue. Fixes #3315. +* PR #3343: Update docs for argsort, kind kwarg partially supported. +* PR #3357: Added mention of njit in 5minguide.rst +* PR #3434: Fix parallel reduction example in docs. +* PR #3452: Fix broken link and mark up problem. +* PR #3484: Size Numba logo in docs in em units. Fixes #3313 +* PR #3502: just two typos +* PR #3506: Document string support +* PR #3513: Documentation for parallel diagnostics. +* PR #3526: Fix 5 min guide with respect to @njit decl + +Contributors: + +* Alex Ford +* Andreas Sodeur +* Anton Malakhov +* Daniel Stender +* Ehsan Totoni (core dev) +* Henry Schreiner +* Marcel Bargull +* Matt Cooper +* Nick White +* Nicolas Hug +* rjenc29 +* Siu Kwan Lam (core dev) +* Stan Seibert (core dev) +* Stuart Archibald (core dev) +* Todd A. Anderson (core dev) + + +Version 0.40.1 +-------------- + +This is a PyPI-only patch release to ensure that PyPI wheels can enable the +TBB threading backend, and to disable the OpenMP backend in the wheels. +Limitations of manylinux1 and variation in user environments can cause +segfaults when OpenMP is enabled on wheel builds. Note that this release has +no functional changes for users who obtained Numba 0.40.0 via conda. + +Patches: + +* PR #3338: Accidentally left Anton off contributor list for 0.40.0 +* PR #3374: Disable OpenMP in wheel building +* PR #3376: Update 0.40.1 changelog and docs on OpenMP backend + +Version 0.40.0 +-------------- + +This release adds a number of major features: + +* A new GPU backend: kernels for AMD GPUs can now be compiled using the ROCm + driver on Linux. +* The thread pool implementation used by Numba for automatic multithreading + is configurable to use TBB, OpenMP, or the old "workqueue" implementation. + (TBB is likely to become the preferred default in a future release.) +* New documentation on thread and fork-safety with Numba, along with overall + improvements in thread-safety. +* Experimental support for executing a block of code inside a nopython mode + function in object mode. +* Parallel loops now allow arrays as reduction variables +* CUDA improvements: FMA, faster float64 atomics on supporting hardware, + records in const memory, and improved datatime dtype support +* More NumPy functions: vander, tri, triu, tril, fill_diagonal + + +General Enhancements: + +* PR #3017: Add facility to support with-contexts +* PR #3033: Add support for multidimensional CFFI arrays +* PR #3122: Add inliner to object mode pipeline +* PR #3127: Support for reductions on arrays. +* PR #3145: Support for np.fill_diagonal +* PR #3151: Keep a queue of references to last N deserialized functions. Fixes #3026 +* PR #3154: Support use of list() if typeable. +* PR #3166: Objmode with-block +* PR #3179: Updates for llvmlite 0.25 +* PR #3181: Support function extension in alias analysis +* PR #3189: Support literal constants in typing of object methods +* PR #3190: Support passing closures as literal values in typing +* PR #3199: Support inferring stencil index as constant in simple unary expressions +* PR #3202: Threading layer backend refactor/rewrite/reinvention! +* PR #3209: Support for np.tri, np.tril and np.triu +* PR #3211: Handle unpacking in building tuple (BUILD_TUPLE_UNPACK opcode) +* PR #3212: Support for np.vander +* PR #3227: Add NumPy 1.15 support +* PR #3272: Add MemInfo_data to runtime._nrt_python.c_helpers +* PR #3273: Refactor. Removing thread-local-storage based context nesting. +* PR #3278: compiler threadsafety lockdown +* PR #3291: Add CPU count and CFS restrictions info to numba -s. + +CUDA Enhancements: + +* PR #3152: Use cuda driver api to get best blocksize for best occupancy +* PR #3165: Add FMA intrinsic support +* PR #3172: Use float64 add Atomics, Where Available +* PR #3186: Support Records in CUDA Const Memory +* PR #3191: CUDA: fix log size +* PR #3198: Fix GPU datetime timedelta types usage +* PR #3221: Support datetime/timedelta scalar argument to a CUDA kernel. +* PR #3259: Add DeviceNDArray.view method to reinterpret data as a different type. +* PR #3310: Fix IPC handling of sliced cuda array. + +ROCm Enhancements: + +* PR #3023: Support for AMDGCN/ROCm. +* PR #3108: Add ROC info to `numba -s` output. +* PR #3176: Move ROC vectorize init to npyufunc +* PR #3177: Add auto_synchronize support to ROC stream +* PR #3178: Update ROC target documentation. +* PR #3294: Add compiler lock to ROC compilation path. +* PR #3280: Add wavebits property to the HSA Agent. +* PR #3281: Fix ds_permute types and add tests + +Continuous Integration / Testing: + +* PR #3091: Remove old recipes, switch to test config based on env var. +* PR #3094: Add higher ULP tolerance for products in complex space. +* PR #3096: Set exit on error in incremental scripts +* PR #3109: Add skip to test needing jinja2 if no jinja2. +* PR #3125: Skip cudasim only tests +* PR #3126: add slack, drop flowdock +* PR #3147: Improve error message for arg type unsupported during typing. +* PR #3128: Fix recipe/build for jetson tx2/ARM +* PR #3167: In build script activate env before installing. +* PR #3180: Add skip to broken test. +* PR #3216: Fix libcuda.so loading in some container setup +* PR #3224: Switch to new Gitter notification webhook URL and encrypt it +* PR #3235: Add 32bit Travis CI jobs +* PR #3257: This adds scipy/ipython back into windows conda test phase. + +Fixes: + +* PR #3038: Fix random integer generation to match results from NumPy. +* PR #3045: Fix #3027 - Numba reassigns sys.stdout +* PR #3059: Handler for known LoweringErrors. +* PR #3060: Adjust attribute error for NumPy functions. +* PR #3067: Abort simulator threads on exception in thread block. +* PR #3079: Implement +/-(types.boolean) Fix #2624 +* PR #3080: Compute np.var and np.std correctly for complex types. +* PR #3088: Fix #3066 (array.dtype.type in prange) +* PR #3089: Fix invalid ParallelAccelerator hoisting issue. +* PR #3136: Fix #3135 (lowering error) +* PR #3137: Fix for issue3103 (race condition detection) +* PR #3142: Fix Issue #3139 (parfors reuse of reduction variable across prange blocks) +* PR #3148: Remove dead array equal @infer code +* PR #3153: Fix canonicalize_array_math typing for calls with kw args +* PR #3156: Fixes issue with missing pygments in testing and adds guards. +* PR #3168: Py37 bytes output fix. +* PR #3171: Fix #3146. Fix CFUNCTYPE void* return-type handling +* PR #3193: Fix setitem/getitem resolvers +* PR #3222: Fix #3214. Mishandling of POP_BLOCK in while True loop. +* PR #3230: Fixes liveness analysis issue in looplifting +* PR #3233: Fix return type difference for 32bit ctypes.c_void_p +* PR #3234: Fix types and layout for `np.where`. +* PR #3237: Fix DeprecationWarning about imp module +* PR #3241: Fix #3225. Normalize 0nd array to scalar in typing of indexing code. +* PR #3256: Fix #3251: Move imports of ABCs to collections.abc for Python >= 3.3 +* PR #3292: Fix issue3279. +* PR #3302: Fix error due to mismatching dtype + +Documentation Updates: + +* PR #3104: Workaround for #3098 (test_optional_unpack Heisenbug) +* PR #3132: Adds an ~5 minute guide to Numba. +* PR #3194: Fix docs RE: np.random generator fork/thread safety +* PR #3242: Page with Numba talks and tutorial links +* PR #3258: Allow users to choose the type of issue they are reporting. +* PR #3260: Fixed broken link +* PR #3266: Fix cuda pointer ownership problem with user/externally allocated pointer +* PR #3269: Tweak typography with CSS +* PR #3270: Update FAQ for functions passed as arguments +* PR #3274: Update installation instructions +* PR #3275: Note pyobject and voidptr are types in docs +* PR #3288: Do not need to call parallel optimizations "experimental" anymore +* PR #3318: Tweak spacing to avoid search box wrapping onto second line + +Contributors: + +* Anton Malakhov +* Alex Ford +* Anthony Bisulco +* Ehsan Totoni (core dev) +* Leonard Lausen +* Matthew Petroff +* Nick White +* Ray Donnelly +* rjenc29 +* Siu Kwan Lam (core dev) +* Stan Seibert (core dev) +* Stuart Archibald (core dev) +* Stuart Reynolds +* Todd A. Anderson (core dev) + + +Version 0.39.0 +-------------- + +Here are the highlights for the Numba 0.39.0 release. + +* This is the first version that supports Python 3.7. +* With help from Intel, we have fixed the issues with SVML support (related + issues #2938, #2998, #3006). +* List has gained support for containing reference-counted types like NumPy + arrays and `list`. Note, list still cannot hold heterogeneous types. +* We have made a significant change to the internal calling-convention, + which should be transparent to most users, to allow for a future feature that + will permitting jumping back into python-mode from a nopython-mode function. + This also fixes a limitation to `print` that disabled its use from nopython + functions that were deep in the call-stack. +* For CUDA GPU support, we added a `__cuda_array_interface__` following the + NumPy array interface specification to allow Numba to consume externally + defined device arrays. We have opened a corresponding pull request to CuPy to + test out the concept and be able to use a CuPy GPU array. +* The Numba dispatcher `inspect_types()` method now supports the kwarg `pretty` + which if set to `True` will produce ANSI/HTML output, showing the annotated + types, when invoked from ipython/jupyter-notebook respectively. +* The NumPy functions `ndarray.dot`, `np.percentile` and `np.nanpercentile`, and + `np.unique` are now supported. +* Numba now supports the use of a per-project configuration file to permanently + set behaviours typically set via `NUMBA_*` family environment variables. +* Support for the `ppc64le` architecture has been added. + +Enhancements: + +* PR #2793: Simplify and remove javascript from html_annotate templates. +* PR #2840: Support list of refcounted types +* PR #2902: Support for np.unique +* PR #2926: Enable fence for all architecture and add developer notes +* PR #2928: Making error about untyped list more informative. +* PR #2930: Add configuration file and color schemes. +* PR #2932: Fix encoding to 'UTF-8' in `check_output` decode. +* PR #2938: Python 3.7 compat: _Py_Finalizing becomes _Py_IsFinalizing() +* PR #2939: Comprehensive SVML unit test +* PR #2946: Add support for `ndarray.dot` method and tests. +* PR #2953: percentile and nanpercentile +* PR #2957: Add new 3.7 opcode support. +* PR #2963: Improve alias analysis to be more comprehensive +* PR #2984: Support for namedtuples in array analysis +* PR #2986: Fix environment propagation +* PR #2990: Improve function call matching for intrinsics +* PR #3002: Second pass at error rewrites (interpreter errors). +* PR #3004: Add numpy.empty to the list of pure functions. +* PR #3008: Augment SVML detection with llvmlite SVML patch detection. +* PR #3012: Make use of the common spelling of heterogeneous/homogeneous. +* PR #3032: Fix pycc ctypes test due to mismatch in calling-convention +* PR #3039: Add SVML detection to Numba environment diagnostic tool. +* PR #3041: This adds @needs_blas to tests that use BLAS +* PR #3056: Require llvmlite>=0.24.0 + +CUDA Enhancements: + +* PR #2860: __cuda_array_interface__ +* PR #2910: More CUDA intrinsics +* PR #2929: Add Flag To Prevent Unneccessary D->H Copies +* PR #3037: Add CUDA IPC support on non-peer-accessible devices + +CI Enhancements: + +* PR #3021: Update appveyor config. +* PR #3040: Add fault handler to all builds +* PR #3042: Add catchsegv +* PR #3077: Adds optional number of processes for `-m` in testing + +Fixes: + +* PR #2897: Fix line position of delete statement in numba ir +* PR #2905: Fix for #2862 +* PR #3009: Fix optional type returning in recursive call +* PR #3019: workaround and unittest for issue #3016 +* PR #3035: [TESTING] Attempt delayed removal of Env +* PR #3048: [WIP] Fix cuda tests failure on buildfarm +* PR #3054: Make test work on 32-bit +* PR #3062: Fix cuda.In freeing devary before the kernel launch +* PR #3073: Workaround #3072 +* PR #3076: Avoid ignored exception due to missing globals at interpreter teardown + +Documentation Updates: + +* PR #2966: Fix syntax in env var docs. +* PR #2967: Fix typo in CUDA kernel layout example. +* PR #2970: Fix docstring copy paste error. + +Contributors: + +The following people contributed to this release. + +* Anton Malakhov +* Ehsan Totoni (core dev) +* Julia Tatz +* Matthias Bussonnier +* Nick White +* Ray Donnelly +* Siu Kwan Lam (core dev) +* Stan Seibert (core dev) +* Stuart Archibald (core dev) +* Todd A. Anderson (core dev) +* Rik-de-Kort +* rjenc29 + + +Version 0.38.1 +-------------- + +This is a critical bug fix release addressing: +https://github.com/numba/numba/issues/3006 + +The bug does not impact users using conda packages from Anaconda or Intel Python +Distribution (but it does impact conda-forge). It does not impact users of pip +using wheels from PyPI. + +This only impacts a small number of users where: + + * The ICC runtime (specifically libsvml) is present in the user's environment. + * The user is using an llvmlite statically linked against a version of LLVM + that has not been patched with SVML support. + * The platform is 64-bit. + +The release fixes a code generation path that could lead to the production of +incorrect results under the above situation. + +Fixes: + +* PR #3007: Augment SVML detection with llvmlite SVML patch detection. + +Contributors: + +The following people contributed to this release. + +* Stuart Archibald (core dev) + + +Version 0.38.0 +-------------- + +Following on from the bug fix focus of the last release, this release swings +back towards the addition of new features and usability improvements based on +community feedback. This release is comparatively large! Three key features/ +changes to note are: + + * Numba (via llvmlite) is now backed by LLVM 6.0, general vectorization is + improved as a result. A significant long standing LLVM bug that was causing + corruption was also found and fixed. + * Further considerable improvements in vectorization are made available as + Numba now supports Intel's short vector math library (SVML). + Try it out with `conda install -c numba icc_rt`. + * CUDA 8.0 is now the minimum supported CUDA version. + +Other highlights include: + + * Bug fixes to `parallel=True` have enabled more vectorization opportunities + when using the ParallelAccelerator technology. + * Much effort has gone into improving error reporting and the general usability + of Numba. This includes highlighted error messages and performance tips + documentation. Try it out with `conda install colorama`. + * A number of new NumPy functions are supported, `np.convolve`, `np.correlate` + `np.reshape`, `np.transpose`, `np.permutation`, `np.real`, `np.imag`, and + `np.searchsorted` now supports the`side` kwarg. Further, `np.argsort` now + supports the `kind` kwarg with `quicksort` and `mergesort` available. + * The Numba extension API has gained the ability operate more easily with + functions from Cython modules through the use of + `numba.extending.get_cython_function_address` to obtain function addresses + for direct use in `ctypes.CFUNCTYPE`. + * Numba now allows the passing of jitted functions (and containers of jitted + functions) as arguments to other jitted functions. + * The CUDA functionality has gained support for a larger selection of bit + manipulation intrinsics, also SELP, and has had a number of bugs fixed. + * Initial work to support the PPC64LE platform has been added, full support is + however waiting on the LLVM 6.0.1 release as it contains critical patches + not present in 6.0.0. + It is hoped that any remaining issues will be fixed in the next release. + * The capacity for advanced users/compiler engineers to define their own + compilation pipelines. + +Enhancements: + +* PR #2660: Support bools from cffi in nopython. +* PR #2741: Enhance error message for undefined variables. +* PR #2744: Add diagnostic error message to test suite discovery failure. +* PR #2748: Added Intel SVML optimizations as opt-out choice working by default +* PR #2762: Support transpose with axes arguments. +* PR #2777: Add support for np.correlate and np.convolve +* PR #2779: Implement np.random.permutation +* PR #2801: Passing jitted functions as args +* PR #2802: Support np.real() and np.imag() +* PR #2807: Expose `import_cython_function` +* PR #2821: Add kwarg 'side' to np.searchsorted +* PR #2822: Adds stable argsort +* PR #2832: Fixups for llvmlite 0.23/llvm 6 +* PR #2836: Support `index` method on tuples +* PR #2839: Support for np.transpose and np.reshape. +* PR #2843: Custom pipeline +* PR #2847: Replace signed array access indices in unsiged prange loop body +* PR #2859: Add support for improved error reporting. +* PR #2880: This adds a github issue template. +* PR #2881: Build recipe to clone Intel ICC runtime. +* PR #2882: Update TravisCI to test SVML +* PR #2893: Add reference to the data buffer in array.ctypes object +* PR #2895: Move to CUDA 8.0 + +Fixes: + +* PR #2737: Fix #2007 (part 1). Empty array handling in np.linalg. +* PR #2738: Fix install_requires to allow pip getting pre-release version +* PR #2740: Fix 2208. Generate better error message. +* PR #2765: Fix Bit-ness +* PR #2780: PowerPC reference counting memory fences +* PR #2805: Fix six imports. +* PR #2813: Fix #2812: gufunc scalar output bug. +* PR #2814: Fix the build post #2727 +* PR #2831: Attempt to fix #2473 +* PR #2842: Fix issue with test discovery and broken CUDA drivers. +* PR #2850: Add rtsys init guard and test. +* PR #2852: Skip vectorization test with targets that are not x86 +* PR #2856: Prevent printing to stdout in `test_extending.py` +* PR #2864: Correct C code to prevent compiler warnings. +* PR #2889: Attempt to fix #2386. +* PR #2891: Removed test skipping for inspect_cfg +* PR #2898: Add guard to parallel test on unsupported platforms +* PR #2907: Update change log for PPC64LE LLVM dependency. +* PR #2911: Move build requirement to llvmlite>=0.23.0dev0 +* PR #2912: Fix random permutation test. +* PR #2914: Fix MD list syntax in issue template. + +Documentation Updates: + +* PR #2739: Explicitly state default value of error_model in docstring +* PR #2803: DOC: parallel vectorize requires signatures +* PR #2829: Add Python 2.7 EOL plan to docs +* PR #2838: Use automatic numbering syntax in list. +* PR #2877: Add performance tips documentation. +* PR #2883: Fix #2872: update rng doc about thread/fork-safety +* PR #2908: Add missing link and ref to docs. +* PR #2909: Tiny typo correction + +ParallelAccelerator enhancements/fixes: + +* PR #2727: Changes to enable vectorization in ParallelAccelerator. +* PR #2816: Array analysis for transpose with arbitrary arguments +* PR #2874: Fix dead code eliminator not to remove a call with side-effect +* PR #2886: Fix ParallelAccelerator arrayexpr repr + +CUDA enhancements: + +* PR #2734: More Constants From cuda.h +* PR #2767: Add len(..) Support to DeviceNDArray +* PR #2778: Add More Device Array API Functions to CUDA Simulator +* PR #2824: Add CUDA Primitives for Population Count +* PR #2835: Emit selp Instructions to Avoid Branching +* PR #2867: Full support for CUDA device attributes + +CUDA fixes: +* PR #2768: Don't Compile Code on Every Assignment +* PR #2878: Fixes a Win64 issue with the test in Pr/2865 + +Contributors: + +The following people contributed to this release. + +* Abutalib Aghayev +* Alex Olivas +* Anton Malakhov +* Dong-hee Na +* Ehsan Totoni (core dev) +* John Zwinck +* Josh Wilson +* Kelsey Jordahl +* Nick White +* Olexa Bilaniuk +* Rik-de-Kort +* Siu Kwan Lam (core dev) +* Stan Seibert (core dev) +* Stuart Archibald (core dev) +* Thomas Arildsen +* Todd A. Anderson (core dev) + + +Version 0.37.0 +-------------- + +This release focuses on bug fixing and stability but also adds a few new +features including support for Numpy 1.14. The key change for Numba core was the +long awaited addition of the final tranche of thread safety improvements that +allow Numba to be run concurrently on multiple threads without hitting known +thread safety issues inside LLVM itself. Further, a number of fixes and +enhancements went into the CUDA implementation and ParallelAccelerator gained +some new features and underwent some internal refactoring. + +Misc enhancements: + +* PR #2627: Remove hacks to make llvmlite threadsafe +* PR #2672: Add ascontiguousarray +* PR #2678: Add Gitter badge +* PR #2691: Fix #2690: add intrinsic to convert array to tuple +* PR #2703: Test runner feature: failed-first and last-failed +* PR #2708: Patch for issue #1907 +* PR #2732: Add support for array.fill + +Misc Fixes: + +* PR #2610: Fix #2606 lowering of optional.setattr +* PR #2650: Remove skip for win32 cosine test +* PR #2668: Fix empty_like from readonly arrays. +* PR #2682: Fixes 2210, remove _DisableJitWrapper +* PR #2684: Fix #2340, generator error yielding bool +* PR #2693: Add travis-ci testing of NumPy 1.14, and also check on Python 2.7 +* PR #2694: Avoid type inference failure due to a typing template rejection +* PR #2695: Update llvmlite version dependency. +* PR #2696: Fix tuple indexing codegeneration for empty tuple +* PR #2698: Fix #2697 by deferring deletion in the simplify_CFG loop. +* PR #2701: Small fix to avoid tempfiles being created in the current directory +* PR #2725: Fix 2481, LLVM IR parsing error due to mutated IR +* PR #2726: Fix #2673: incorrect fork error msg. +* PR #2728: Alternative to #2620. Remove dead code ByteCodeInst.get. +* PR #2730: Add guard for test needing SciPy/BLAS + +Documentation updates: + +* PR #2670: Update communication channels +* PR #2671: Add docs about diagnosing loop vectorizer +* PR #2683: Add docs on const arg requirements and on const mem alloc +* PR #2722: Add docs on numpy support in cuda +* PR #2724: Update doc: warning about unsupported arguments + +ParallelAccelerator enhancements/fixes: + +Parallel support for `np.arange` and `np.linspace`, also `np.mean`, `np.std` +and `np.var` are added. This was performed as part of a general refactor and +cleanup of the core ParallelAccelerator code. + +* PR #2674: Core pa +* PR #2704: Generate Dels after parfor sequential lowering +* PR #2716: Handle matching directly supported functions + +CUDA enhancements: + +* PR #2665: CUDA DeviceNDArray: Support numpy tranpose API +* PR #2681: Allow Assigning to DeviceNDArrays +* PR #2702: Make DummyArray do High Dimensional Reshapes +* PR #2714: Use CFFI to Reuse Code + +CUDA fixes: + +* PR #2667: Fix CUDA DeviceNDArray slicing +* PR #2686: Fix #2663: incorrect offset when indexing cuda array. +* PR #2687: Ensure Constructed Stream Bound +* PR #2706: Workaround for unexpected warp divergence due to exception raising + code +* PR #2707: Fix regression: cuda test submodules not loading properly in + runtests +* PR #2731: Use more challenging values in slice tests. +* PR #2720: A quick testsuite fix to not run the new cuda testcase in the + multiprocess pool + +Contributors: + +The following people contributed to this release. + +* Coutinho Menezes Nilo +* Daniel +* Ehsan Totoni +* Nick White +* Paul H. Liu +* Siu Kwan Lam +* Stan Seibert +* Stuart Archibald +* Todd A. Anderson + + +Version 0.36.2 +-------------- + +This is a bugfix release that provides minor changes to address: + +* PR #2645: Avoid CPython bug with ``exec`` in older 2.7.x. +* PR #2652: Add support for CUDA 9. + + +Version 0.36.1 +-------------- + +This release continues to add new features to the work undertaken in partnership +with Intel on ParallelAccelerator technology. Other changes of note include the +compilation chain being updated to use LLVM 5.0 and the production of conda +packages using conda-build 3 and the new compilers that ship with it. + +NOTE: A version 0.36.0 was tagged for internal use but not released. + +ParallelAccelerator: + +NOTE: The ParallelAccelerator technology is under active development and should +be considered experimental. + +New features relating to ParallelAccelerator, from work undertaken with Intel, +include the addition of the `@stencil` decorator for ease of implementation of +stencil-like computations, support for general reductions, and slice and +range fusion for parallel slice/bit-array assignments. Documentation on both the +use and implementation of the above has been added. Further, a new debug +environment variable `NUMBA_DEBUG_ARRAY_OPT_STATS` is made available to give +information about which operators/calls are converted to parallel for-loops. + +ParallelAccelerator features: + +* PR #2457: Stencil Computations in ParallelAccelerator +* PR #2548: Slice and range fusion, parallelizing bitarray and slice assignment +* PR #2516: Support general reductions in ParallelAccelerator + +ParallelAccelerator fixes: + +* PR #2540: Fix bug #2537 +* PR #2566: Fix issue #2564. +* PR #2599: Fix nested multi-dimensional parfor type inference issue +* PR #2604: Fixes for stencil tests and cmath sin(). +* PR #2605: Fixes issue #2603. + +Additional features of note: + +This release of Numba (and llvmlite) is updated to use LLVM version 5.0 as the +compiler back end, the main change to Numba to support this was the addition of +a custom symbol tracker to avoid the calls to LLVM's `ExecutionEngine` that was +crashing when asking for non-existent symbol addresses. Further, the conda +packages for this release of Numba are built using conda build version 3 and the +new compilers/recipe grammar that are present in that release. + +* PR #2568: Update for LLVM 5 +* PR #2607: Fixes abort when getting address to "nrt_unresolved_abort" +* PR #2615: Working towards conda build 3 + +Thanks to community feedback and bug reports, the following fixes were also +made. + +Misc fixes/enhancements: + +* PR #2534: Add tuple support to np.take. +* PR #2551: Rebranding fix +* PR #2552: relative doc links +* PR #2570: Fix issue #2561, handle missing successor on loop exit +* PR #2588: Fix #2555. Disable libpython.so linking on linux +* PR #2601: Update llvmlite version dependency. +* PR #2608: Fix potential cache file collision +* PR #2612: Fix NRT test failure due to increased overhead when running in coverage +* PR #2619: Fix dubious pthread_cond_signal not in lock +* PR #2622: Fix `np.nanmedian` for all NaN case. +* PR #2633: Fix markdown in CONTRIBUTING.md +* PR #2635: Make the dependency on compilers for AOT optional. + +CUDA support fixes: + +* PR #2523: Fix invalid cuda context in memory transfer calls in another thread +* PR #2575: Use CPU to initialize xoroshiro states for GPU RNG. Fixes #2573 +* PR #2581: Fix cuda gufunc mishandling of scalar arg as array and out argument + + +Version 0.35.0 +-------------- + +This release includes some exciting new features as part of the work +performed in partnership with Intel on ParallelAccelerator technology. +There are also some additions made to Numpy support and small but +significant fixes made as a result of considerable effort spent chasing bugs +and implementing stability improvements. + + +ParallelAccelerator: + +NOTE: The ParallelAccelerator technology is under active development and should +be considered experimental. + +New features relating to ParallelAccelerator, from work undertaken with Intel, +include support for a larger range of `np.random` functions in `parallel` +mode, printing Numpy arrays in no Python mode, the capacity to initialize Numpy +arrays directly from list comprehensions, and the axis argument to `.sum()`. +Documentation on the ParallelAccelerator technology implementation has also +been added. Further, a large amount of work on equivalence relations was +undertaken to enable runtime checks of broadcasting behaviours in parallel mode. + +ParallelAccelerator features: + +* PR #2400: Array comprehension +* PR #2405: Support printing Numpy arrays +* PR #2438: from Support more np.random functions in ParallelAccelerator +* PR #2482: Support for sum with axis in nopython mode. +* PR #2487: Adding developer documentation for ParallelAccelerator technology. +* PR #2492: Core PA refactor adds assertions for broadcast semantics + +ParallelAccelerator fixes: + +* PR #2478: Rename cfg before parfor translation (#2477) +* PR #2479: Fix broken array comprehension tests on unsupported platforms +* PR #2484: Fix array comprehension test on win64 +* PR #2506: Fix for 32-bit machines. + + +Additional features of note: + +Support for `np.take`, `np.finfo`, `np.iinfo` and `np.MachAr` in no Python +mode is added. Further, three new environment variables are added, two for +overriding CPU target/features and another to warn if `parallel=True` was set +no such transform was possible. + +* PR #2490: Implement np.take and ndarray.take +* PR #2493: Display a warning if parallel=True is set but not possible. +* PR #2513: Add np.MachAr, np.finfo, np.iinfo +* PR #2515: Allow environ overriding of cpu target and cpu features. + + +Due to expansion of the test farm and a focus on fixing bugs, the following +fixes were also made. + +Misc fixes/enhancements: + +* PR #2455: add contextual information to runtime errors +* PR #2470: Fixes #2458, poor performance in np.median +* PR #2471: Ensure LLVM threadsafety in {g,}ufunc building. +* PR #2494: Update doc theme +* PR #2503: Remove hacky code added in 2482 and feature enhancement +* PR #2505: Serialise env mutation tests during multithreaded testing. +* PR #2520: Fix failing cpu-target override tests + +CUDA support fixes: + +* PR #2504: Enable CUDA toolkit version testing +* PR #2509: Disable tests generating code unavailable in lower CC versions. +* PR #2511: Fix Windows 64 bit CUDA tests. + + +Version 0.34.0 +-------------- + +This release adds a significant set of new features arising from combined work +with Intel on ParallelAccelerator technology. It also adds list comprehension +and closure support, support for Numpy 1.13 and a new, faster, CUDA reduction +algorithm. For Linux users this release is the first to be built on Centos 6, +which will be the new base platform for future releases. Finally a number of +thread-safety, type inference and other smaller enhancements and bugs have been +fixed. + + +ParallelAccelerator features: + +NOTE: The ParallelAccelerator technology is under active development and should +be considered experimental. + +The ParallelAccelerator technology is accessed via a new "nopython" mode option +"parallel". The ParallelAccelerator technology attempts to identify operations +which have parallel semantics (for instance adding a scalar to a vector), fuse +together adjacent such operations, and then parallelize their execution across +a number of CPU cores. This is essentially auto-parallelization. + +In addition to the auto-parallelization feature, explicit loop based +parallelism is made available through the use of `prange` in place of `range` +as a loop iterator. + +More information and examples on both auto-parallelization and `prange` are +available in the documentation and examples directory respectively. + +As part of the necessary work for ParallelAccelerator, support for closures +and list comprehensions is added: + +* PR #2318: Transfer ParallelAccelerator technology to Numba +* PR #2379: ParallelAccelerator Core Improvements +* PR #2367: Add support for len(range(...)) +* PR #2369: List comprehension +* PR #2391: Explicit Parallel Loop Support (prange) + +The ParallelAccelerator features are available on all supported platforms and +Python versions with the exceptions of (with view of supporting in a future +release): + +* The combination of Windows operating systems with Python 2.7. +* Systems running 32 bit Python. + + +CUDA support enhancements: + +* PR #2377: New GPU reduction algorithm + + +CUDA support fixes: + +* PR #2397: Fix #2393, always set alignment of cuda static memory regions + + +Misc Fixes: + +* PR #2373, Issue #2372: 32-bit compatibility fix for parfor related code +* PR #2376: Fix #2375 missing stdint.h for py2.7 vc9 +* PR #2378: Fix deadlock in parallel gufunc when kernel acquires the GIL. +* PR #2382: Forbid unsafe casting in bitwise operation +* PR #2385: docs: fix Sphinx errors +* PR #2396: Use 64-bit RHS operand for shift +* PR #2404: Fix threadsafety logic issue in ufunc compilation cache. +* PR #2424: Ensure consistent iteration order of blocks for type inference. +* PR #2425: Guard code to prevent the use of 'parallel' on win32 + py27 +* PR #2426: Basic test for Enum member type recovery. +* PR #2433: Fix up the parfors tests with respect to windows py2.7 +* PR #2442: Skip tests that need BLAS/LAPACK if scipy is not available. +* PR #2444: Add test for invalid array setitem +* PR #2449: Make the runtime initialiser threadsafe +* PR #2452: Skip CFG test on 64bit windows + + +Misc Enhancements: + +* PR #2366: Improvements to IR utils +* PR #2388: Update README.rst to indicate the proper version of LLVM +* PR #2394: Upgrade to llvmlite 0.19.* +* PR #2395: Update llvmlite version to 0.19 +* PR #2406: Expose environment object to ufuncs +* PR #2407: Expose environment object to target-context inside lowerer +* PR #2413: Add flags to pass through to conda build for buildbot +* PR #2414: Add cross compile flags to local recipe +* PR #2415: A few cleanups for rewrites +* PR #2418: Add getitem support for Enum classes +* PR #2419: Add support for returning enums in vectorize +* PR #2421: Add copyright notice for Intel contributed files. +* PR #2422: Patch code base to work with np 1.13 release +* PR #2448: Adds in warning message when using 'parallel' if cache=True +* PR #2450: Add test for keyword arg on .sum-like and .cumsum-like array + methods + + +Version 0.33.0 +-------------- + +This release resolved several performance issues caused by atomic +reference counting operations inside loop bodies. New optimization +passes have been added to reduce the impact of these operations. We +observe speed improvements between 2x-10x in affected programs due to +the removal of unnecessary reference counting operations. + +There are also several enhancements to the CUDA GPU support: + +* A GPU random number generator based on `xoroshiro128+ algorithm `_ is added. + See details and examples in :ref:`documentation `. +* ``@cuda.jit`` CUDA kernels can now call ``@jit`` and ``@njit`` + CPU functions and they will automatically be compiled as CUDA device + functions. +* CUDA IPC memory API is exposed for sharing memory between proceses. + See usage details in :ref:`documentation `. + +Reference counting enhancements: + +* PR #2346, Issue #2345, #2248: Add extra refcount pruning after inlining +* PR #2349: Fix refct pruning not removing refct op with tail call. +* PR #2352, Issue #2350: Add refcount pruning pass for function that does not need refcount + +CUDA support enhancements: + +* PR #2023: Supports CUDA IPC for device array +* PR #2343, Issue #2335: Allow CPU jit decorated function to be used as cuda device function +* PR #2347: Add random number generator support for CUDA device code +* PR #2361: Update autotune table for CC: 5.3, 6.0, 6.1, 6.2 + +Misc fixes: + +* PR #2362: Avoid test failure due to typing to int32 on 32-bit platforms +* PR #2359: Fixed nogil example that threw a TypeError when executed. +* PR #2357, Issue #2356: Fix fragile test that depends on how the script is executed. +* PR #2355: Fix cpu dispatcher referenced as attribute of another module +* PR #2354: Fixes an issue with caching when function needs NRT and refcount pruning +* PR #2342, Issue #2339: Add warnings to inspection when it is used on unserialized cached code +* PR #2329, Issue #2250: Better handling of missing op codes + +Misc enhancements: + +* PR #2360: Adds missing values in error mesasge interp. +* PR #2353: Handle when get_host_cpu_features() raises RuntimeError +* PR #2351: Enable SVML for erf/erfc/gamma/lgamma/log2 +* PR #2344: Expose error_model setting in jit decorator +* PR #2337: Align blocking terminate support for fork() with new TBB version +* PR #2336: Bump llvmlite version to 0.18 +* PR #2330: Core changes in PR #2318 + + +Version 0.32.0 +-------------- + +In this release, we are upgrading to LLVM 4.0. A lot of work has been done +to fix many race-condition issues inside LLVM when the compiler is +used concurrently, which is likely when Numba is used with Dask. + +Improvements: + +* PR #2322: Suppress test error due to unknown but consistent error with tgamma +* PR #2320: Update llvmlite dependency to 0.17 +* PR #2308: Add details to error message on why cuda support is disabled. +* PR #2302: Add os x to travis +* PR #2294: Disable remove_module on MCJIT due to memory leak inside LLVM +* PR #2291: Split parallel tests and recycle workers to tame memory usage +* PR #2253: Remove the pointer-stuffing hack for storing meminfos in lists + +Fixes: + +* PR #2331: Fix a bug in the GPU array indexing +* PR #2326: Fix #2321 docs referring to non-existing function. +* PR #2316: Fixing more race-condition problems +* PR #2315: Fix #2314. Relax strict type check to allow optional type. +* PR #2310: Fix race condition due to concurrent compilation and cache loading +* PR #2304: Fix intrinsic 1st arg not a typing.Context as stated by the docs. +* PR #2287: Fix int64 atomic min-max +* PR #2286: Fix #2285 `@overload_method` not linking dependent libs +* PR #2303: Missing import statements to interval-example.rst + + +Version 0.31.0 +-------------- + +In this release, we added preliminary support for debugging with GDB +version >= 7.0. The feature is enabled by setting the ``debug=True`` compiler +option, which causes GDB compatible debug info to be generated. +The CUDA backend also gained limited debugging support so that source locations +are showed in memory-checking and profiling tools. +For details, see :ref:`numba-troubleshooting`. + +Also, we added the ``fastmath=True`` compiler option to enable unsafe +floating-point transformations, which allows LLVM to auto-vectorize more code. + +Other important changes include upgrading to LLVM 3.9.1 and adding support for +Numpy 1.12. + +Improvements: + +* PR #2281: Update for numpy1.12 +* PR #2278: Add CUDA atomic.{max, min, compare_and_swap} +* PR #2277: Add about section to conda recipies to identify license and other + metadata in Anaconda Cloud +* PR #2271: Adopt itanium C++-style mangling for CPU and CUDA targets +* PR #2267: Add fastmath flags +* PR #2261: Support dtype.type +* PR #2249: Changes for llvm3.9 +* PR #2234: Bump llvmlite requirement to 0.16 and add install_name_tool_fixer to + mviewbuf for OS X +* PR #2230: Add python3.6 to TravisCi +* PR #2227: Enable caching for gufunc wrapper +* PR #2170: Add debugging support +* PR #2037: inspect_cfg() for easier visualization of the function operation + +Fixes: + +* PR #2274: Fix nvvm ir patch in mishandling "load" +* PR #2272: Fix breakage to cuda7.5 +* PR #2269: Fix caching of copy_strides kernel in cuda.reduce +* PR #2265: Fix #2263: error when linking two modules with dynamic globals +* PR #2252: Fix path separator in test +* PR #2246: Fix overuse of memory in some system with fork +* PR #2241: Fix #2240: __module__ in dynamically created function not a str +* PR #2239: Fix fingerprint computation failure preventing fallback + + +Version 0.30.1 +-------------- + +This is a bug-fix release to enable Python 3.6 support. In addition, +there is now early Intel TBB support for parallel ufuncs when building from +source with TBBROOT defined. The TBB feature is not enabled in our official +builds. + +Fixes: + +* PR #2232: Fix name clashes with _Py_hashtable_xxx in Python 3.6. + +Improvements: + +* PR #2217: Add Intel TBB threadpool implementation for parallel ufunc. + + +Version 0.30.0 +-------------- + +This release adds preliminary support for Python 3.6, but no official build is +available yet. A new system reporting tool (``numba --sysinfo``) is added to +provide system information to help core developers in replication and debugging. +See below for other improvements and bug fixes. + +Improvements: + +* PR #2209: Support Python 3.6. +* PR #2175: Support ``np.trace()``, ``np.outer()`` and ``np.kron()``. +* PR #2197: Support ``np.nanprod()``. +* PR #2190: Support caching for ufunc. +* PR #2186: Add system reporting tool. + +Fixes: + +* PR #2214, Issue #2212: Fix memory error with ndenumerate and flat iterators. +* PR #2206, Issue #2163: Fix ``zip()`` consuming extra elements in early + exhaustion. +* PR #2185, Issue #2159, #2169: Fix rewrite pass affecting objmode fallback. +* PR #2204, Issue #2178: Fix annotation for liftedloop. +* PR #2203: Fix Appveyor segfault with Python 3.5. +* PR #2202, Issue #2198: Fix target context not initialized when loading from + ufunc cache. +* PR #2172, Issue #2171: Fix optional type unpacking. +* PR #2189, Issue #2188: Disable freezing of big (>1MB) global arrays. +* PR #2180, Issue #2179: Fix invalid variable version in looplifting. +* PR #2156, Issue #2155: Fix divmod, floordiv segfault on CUDA. + + +Version 0.29.0 +-------------- + +This release extends the support of recursive functions to include direct and +indirect recursion without explicit function type annotations. See new example +in `examples/mergesort.py`. Newly supported numpy features include array +stacking functions, np.linalg.eig* functions, np.linalg.matrix_power, np.roots +and array to array broadcasting in assignments. + +This release depends on llvmlite 0.14.0 and supports CUDA 8 but it is not +required. + +Improvements: + +* PR #2130, #2137: Add type-inferred recursion with docs and examples. +* PR #2134: Add ``np.linalg.matrix_power``. +* PR #2125: Add ``np.roots``. +* PR #2129: Add ``np.linalg.{eigvals,eigh,eigvalsh}``. +* PR #2126: Add array-to-array broadcasting. +* PR #2069: Add hstack and related functions. +* PR #2128: Allow for vectorizing a jitted function. (thanks to @dhirschfeld) +* PR #2117: Update examples and make them test-able. +* PR #2127: Refactor interpreter class and its results. + +Fixes: + +* PR #2149: Workaround MSVC9.0 SP1 fmod bug kb982107. +* PR #2145, Issue #2009: Fixes kwargs for jitclass ``__init__`` method. +* PR #2150: Fix slowdown in objmode fallback. +* PR #2050, Issue #1259: Fix liveness problem with some generator loops. +* PR #2072, Issue #1995: Right shift of unsigned LHS should be logical. +* PR #2115, Issue #1466: Fix inspect_types() error due to mangled variable name. +* PR #2119, Issue #2118: Fix array type created from record-dtype. +* PR #2122, Issue #1808: Fix returning a generator due to datamodel error. + + +Version 0.28.1 +-------------- + +This is a bug-fix release to resolve packaging issues with setuptools +dependency. + + +Version 0.28.0 +-------------- + +Amongst other improvements, this version improves again the level of +support for linear algebra -- functions from the :mod:`numpy.linalg` +module. Also, our random generator is now guaranteed to be thread-safe +and fork-safe. + +Improvements: + +* PR #2019: Add the ``@intrinsic`` decorator to define low-level + subroutines callable from JIT functions (this is considered + a private API for now). +* PR #2059: Implement ``np.concatenate`` and ``np.stack``. +* PR #2048: Make random generation fork-safe and thread-safe, producing + independent streams of random numbers for each thread or process. +* PR #2031: Add documentation of floating-point pitfalls. +* Issue #2053: Avoid polling in parallel CPU target (fixes severe performance + regression on Windows). +* Issue #2029: Make default arguments fast. +* PR #2052: Add logging to the CUDA driver. +* PR #2049: Implement the built-in ``divmod()`` function. +* PR #2036: Implement the ``argsort()`` method on arrays. +* PR #2046: Improving CUDA memory management by deferring deallocations + until certain thresholds are reached, so as to avoid breaking asynchronous + execution. +* PR #2040: Switch the CUDA driver implementation to use CUDA's + "primary context" API. +* PR #2017: Allow ``min(tuple)`` and ``max(tuple)``. +* PR #2039: Reduce fork() detection overhead in CUDA. +* PR #2021: Handle structured dtypes with titles. +* PR #1996: Rewrite looplifting as a transformation on Numba IR. +* PR #2014: Implement ``np.linalg.matrix_rank``. +* PR #2012: Implement ``np.linalg.cond``. +* PR #1985: Rewrite even trivial array expressions, which opens the door + for other optimizations (for example, ``array ** 2`` can be converted + into ``array * array``). +* PR #1950: Have ``typeof()`` always raise ValueError on failure. + Previously, it would either raise or return None, depending on the input. +* PR #1994: Implement ``np.linalg.norm``. +* PR #1987: Implement ``np.linalg.det`` and ``np.linalg.slogdet``. +* Issue #1979: Document integer width inference and how to workaround. +* PR #1938: Numba is now compatible with LLVM 3.8. +* PR #1967: Restrict ``np.linalg`` functions to homogeneous dtypes. Users + wanting to pass mixed-typed inputs have to convert explicitly, which + makes the performance implications more obvious. + +Fixes: + +* PR #2006: ``array(float32) ** int`` should return ``array(float32)``. +* PR #2044: Allow reshaping empty arrays. +* Issue #2051: Fix refcounting issue when concatenating tuples. +* Issue #2000: Make Numpy optional for setup.py, to allow ``pip install`` + to work without Numpy pre-installed. +* PR #1989: Fix assertion in ``Dispatcher.disable_compile()``. +* Issue #2028: Ignore filesystem errors when caching from multiple processes. +* Issue #2003: Allow unicode variable and function names (on Python 3). +* Issue #1998: Fix deadlock in parallel ufuncs that reacquire the GIL. +* PR #1997: Fix random crashes when AOT compiling on certain Windows platforms. +* Issue #1988: Propagate jitclass docstring. +* Issue #1933: Ensure array constants are emitted with the right alignment. + + +Version 0.27.0 +-------------- + +Improvements: + +* Issue #1976: improve error message when non-integral dimensions are given + to a CUDA kernel. +* PR #1970: Optimize the power operator with a static exponent. +* PR #1710: Improve contextual information for compiler errors. +* PR #1961: Support printing constant strings. +* PR #1959: Support more types in the print() function. +* PR #1823: Support ``compute_50`` in CUDA backend. +* PR #1955: Support ``np.linalg.pinv``. +* PR #1896: Improve the ``SmartArray`` API. +* PR #1947: Support ``np.linalg.solve``. +* Issue #1943: Improve error message when an argument fails typing.4 +* PR #1927: Support ``np.linalg.lstsq``. +* PR #1934: Use system functions for hypot() where possible, instead of our + own implementation. +* PR #1929: Add cffi support to ``@cfunc`` objects. +* PR #1932: Add user-controllable thread pool limits for parallel CPU target. +* PR #1928: Support self-recursion when the signature is explicit. +* PR #1890: List all lowering implementations in the developer docs. +* Issue #1884: Support ``np.lib.stride_tricks.as_strided()``. + +Fixes: + +* Issue #1960: Fix sliced assignment when source and destination areas are + overlapping. +* PR #1963: Make CUDA print() atomic. +* PR #1956: Allow 0d array constants. +* Issue #1945: Allow using Numpy ufuncs in AOT compiled code. +* Issue #1916: Fix documentation example for ``@generated_jit``. +* Issue #1926: Fix regression when caching functions in an IPython session. +* Issue #1923: Allow non-intp integer arguments to carray() and farray(). +* Issue #1908: Accept non-ASCII unicode docstrings on Python 2. +* Issue #1874: Allow ``del container[key]`` in object mode. +* Issue #1913: Fix set insertion bug when the lookup chain contains deleted + entries. +* Issue #1911: Allow function annotations on jitclass methods. + + +Version 0.26.0 +-------------- + +This release adds support for ``cfunc`` decorator for exporting numba jitted +functions to 3rd party API that takes C callbacks. Most of the overhead of +using jitclasses inside the interpreter are eliminated. Support for +decompositions in ``numpy.linalg`` are added. Finally, Numpy 1.11 is +supported. + +Improvements: + +* PR #1889: Export BLAS and LAPACK wrappers for pycc. +* PR #1888: Faster array power. +* Issue #1867: Allow "out" keyword arg for dufuncs. +* PR #1871: ``carray()`` and ``farray()`` for creating arrays from pointers. +* PR #1855: ``@cfunc`` decorator for exporting as ctypes function. +* PR #1862: Add support for ``numpy.linalg.qr``. +* PR #1851: jitclass support for '_' and '__' prefixed attributes. +* PR #1842: Optimize jitclass in Python interpreter. +* Issue #1837: Fix CUDA simulator issues with device function. +* PR #1839: Add support for decompositions from ``numpy.linalg``. +* PR #1829: Support Python enums. +* PR #1828: Add support for ``numpy.random.rand()``` and + ``numpy.random.randn()`` +* Issue #1825: Use of 0-darray in place of scalar index. +* Issue #1824: Scalar arguments to object mode gufuncs. +* Issue #1813: Let bitwise bool operators return booleans, not integers. +* Issue #1760: Optional arguments in generators. +* PR #1780: Numpy 1.11 support. + + +Version 0.25.0 +-------------- + +This release adds support for ``set`` objects in nopython mode. It also +adds support for many missing Numpy features and functions. It improves +Numba's compatibility and performance when using a distributed execution +framework such as dask, distributed or Spark. Finally, it removes +compatibility with Python 2.6, Python 3.3 and Numpy 1.6. + +Improvements: + +* Issue #1800: Add erf(), erfc(), gamma() and lgamma() to CUDA targets. +* PR #1793: Implement more Numpy functions: np.bincount(), np.diff(), + np.digitize(), np.histogram(), np.searchsorted() as well as NaN-aware + reduction functions (np.nansum(), np.nanmedian(), etc.) +* PR #1789: Optimize some reduction functions such as np.sum(), np.prod(), + np.median(), etc. +* PR #1752: Make CUDA features work in dask, distributed and Spark. +* PR #1787: Support np.nditer() for fast multi-array indexing with + broadcasting. +* PR #1799: Report JIT-compiled functions as regular Python functions + when profiling (allowing to see the filename and line number where a + function is defined). +* PR #1782: Support np.any() and np.all(). +* Issue #1788: Support the iter() and next() built-in functions. +* PR #1778: Support array.astype(). +* Issue #1775: Allow the user to set the target CPU model for AOT compilation. +* PR #1758: Support creating random arrays using the ``size`` parameter + to the np.random APIs. +* PR #1757: Support len() on array.flat objects. +* PR #1749: Remove Numpy 1.6 compatibility. +* PR #1748: Remove Python 2.6 and 3.3 compatibility. +* PR #1735: Support the ``not in`` operator as well as operator.contains(). +* PR #1724: Support homogeneous sets in nopython mode. +* Issue #875: make compilation of array constants faster. + +Fixes: + +* PR #1795: Fix a massive performance issue when calling Numba functions + with distributed, Spark or a similar mechanism using serialization. +* Issue #1784: Make jitclasses usable with NUMBA_DISABLE_JIT=1. +* Issue #1786: Allow using linear algebra functions when profiling. +* Issue #1796: Fix np.dot() memory leak on non-contiguous inputs. +* PR #1792: Fix static negative indexing of tuples. +* Issue #1771: Use fallback cache directory when __pycache__ isn't writable, + such as when user code is installed in a system location. +* Issue #1223: Use Numpy error model in array expressions (e.g. division + by zero returns ``inf`` or ``nan`` instead of raising an error). +* Issue #1640: Fix np.random.binomial() for large n values. +* Issue #1643: Improve error reporting when passing an invalid spec to + ``@jitclass``. +* PR #1756: Fix slicing with a negative step and an omitted start. + + +Version 0.24.0 +-------------- + +This release introduces several major changes, including the ``@generated_jit`` +decorator for flexible specializations as with Julia's "``@generated``" macro, +or the SmartArray array wrapper type that allows seamless transfer of array +data between the CPU and the GPU. + +This will be the last version to support Python 2.6, Python 3.3 and Numpy 1.6. + +Improvements: + +* PR #1723: Improve compatibility of JIT functions with the Python profiler. +* PR #1509: Support array.ravel() and array.flatten(). +* PR #1676: Add SmartArray type to support transparent data management in + multiple address spaces (host & GPU). +* PR #1689: Reduce startup overhead of importing Numba. +* PR #1705: Support registration of CFFI types as corresponding to known + Numba types. +* PR #1686: Document the extension API. +* PR #1698: Improve warnings raised during type inference. +* PR #1697: Support np.dot() and friends on non-contiguous arrays. +* PR #1692: cffi.from_buffer() improvements (allow more pointer types, + allow non-Numpy buffer objects). +* PR #1648: Add the ``@generated_jit`` decorator. +* PR #1651: Implementation of np.linalg.inv using LAPACK. Thanks to + Matthieu Dartiailh. +* PR #1674: Support np.diag(). +* PR #1673: Improve error message when looking up an attribute on an + unknown global. +* Issue #1569: Implement runtime check for the LLVM locale bug. +* PR #1612: Switch to LLVM 3.7 in sync with llvmlite. +* PR #1624: Allow slice assignment of sequence to array. +* PR #1622: Support slicing tuples with a constant slice. + +Fixes: + +* Issue #1722: Fix returning an optional boolean (bool or None). +* Issue #1734: NRT decref bug when variable is del'ed before being defined, + leading to a possible memory leak. +* PR #1732: Fix tuple getitem regression for CUDA target. +* PR #1718: Mishandling of optional to optional casting. +* PR #1714: Fix .compile() on a JIT function not respecting ._can_compile. +* Issue #1667: Fix np.angle() on arrays. +* Issue #1690: Fix slicing with an omitted stop and a negative step value. +* PR #1693: Fix gufunc bug in handling scalar formal arg with non-scalar + input value. +* PR #1683: Fix parallel testing under Windows. +* Issue #1616: Use system-provided versions of C99 math where possible. +* Issue #1652: Reductions of bool arrays (e.g. sum() or mean()) should + return integers or floats, not bools. +* Issue #1664: Fix regression when indexing a record array with a constant + index. +* PR #1661: Disable AVX on old Linux kernels. +* Issue #1636: Allow raising an exception looked up on a module. + + +Version 0.23.1 +-------------- + +This is a bug-fix release to address several regressions introduced +in the 0.23.0 release, and a couple other issues. + +Fixes: + +* Issue #1645: CUDA ufuncs were broken in 0.23.0. +* Issue #1638: Check tuple sizes when passing a list of tuples. +* Issue #1630: Parallel ufunc would keep eating CPU even after finishing + under Windows. +* Issue #1628: Fix ctypes and cffi tests under Windows with Python 3.5. +* Issue #1627: Fix xrange() support. +* PR #1611: Rewrite variable liveness analysis. +* Issue #1610: Allow nested calls between explicitly-typed ufuncs. +* Issue #1593: Fix `*args` in object mode. + + +Version 0.23.0 +-------------- + +This release introduces JIT classes using the new ``@jitclass`` decorator, +allowing user-defined structures for nopython mode. Other improvements +and bug fixes are listed below. + +Improvements: + +* PR #1609: Speed up some simple math functions by inlining them + in their caller +* PR #1571: Implement JIT classes +* PR #1584: Improve typing of array indexing +* PR #1583: Allow printing booleans +* PR #1542: Allow negative values in np.reshape() +* PR #1560: Support vector and matrix dot product, including ``np.dot()`` + and the ``@`` operator in Python 3.5 +* PR #1546: Support field lookup on record arrays and scalars (i.e. + ``array['field']`` in addition to ``array.field``) +* PR #1440: Support the HSA wavebarrier() and activelanepermute_wavewidth() + intrinsics +* PR #1540: Support np.angle() +* PR #1543: Implement CPU multithreaded gufuncs (target="parallel") +* PR #1551: Allow scalar arguments in np.where(), np.empty_like(). +* PR #1516: Add some more examples from NumbaPro +* PR #1517: Support np.sinc() + +Fixes: + +* Issue #1603: Fix calling a non-cached function from a cached function +* Issue #1594: Ensure a list is homogeneous when unboxing +* Issue #1595: Replace deprecated use of get_pointer_to_function() +* Issue #1586: Allow tests to be run by different users on the same machine +* Issue #1587: Make CudaAPIError picklable +* Issue #1568: Fix using Numba from inside Visual Studio 2015 +* Issue #1559: Fix serializing a jit function referring a renamed module +* PR #1508: Let reshape() accept integer argument(s), not just a tuple +* Issue #1545: Improve error checking when unboxing list objects +* Issue #1538: Fix array broadcasting in CUDA gufuncs +* Issue #1526: Fix a reference count handling bug + + +Version 0.22.1 +-------------- + +This is a bug-fix release to resolve some packaging issues and other +problems found in the 0.22.0 release. + +Fixes: + +* PR #1515: Include MANIFEST.in in MANIFEST.in so that sdist still works from + source tar files. +* PR #1518: Fix reference counting bug caused by hidden alias +* PR #1519: Fix erroneous assert when passing nopython=True to guvectorize. +* PR #1521: Fix cuda.test() + +Version 0.22.0 +-------------- + +This release features several highlights: Python 3.5 support, Numpy 1.10 +support, Ahead-of-Time compilation of extension modules, additional +vectorization features that were previously only available with the +proprietary extension NumbaPro, improvements in array indexing. + +Improvements: + +* PR #1497: Allow scalar input type instead of size-1 array to @guvectorize +* PR #1480: Add distutils support for AOT compilation +* PR #1460: Create a new API for Ahead-of-Time (AOT) compilation +* PR #1451: Allow passing Python lists to JIT-compiled functions, and + reflect mutations on function return +* PR #1387: Numpy 1.10 support +* PR #1464: Support cffi.FFI.from_buffer() +* PR #1437: Propagate errors raised from Numba-compiled ufuncs; also, + let "division by zero" and other math errors produce a warning instead + of exiting the function early +* PR #1445: Support a subset of fancy indexing +* PR #1454: Support "out-of-line" CFFI modules +* PR #1442: Improve array indexing to support more kinds of basic slicing +* PR #1409: Support explicit CUDA memory fences +* PR #1435: Add support for vectorize() and guvectorize() with HSA +* PR #1432: Implement numpy.nonzero() and numpy.where() +* PR #1416: Add support for vectorize() and guvectorize() with CUDA, + as originally provided in NumbaPro +* PR #1424: Support in-place array operators +* PR #1414: Python 3.5 support +* PR #1404: Add the parallel ufunc functionality originally provided in + NumbaPro +* PR #1393: Implement sorting on arrays and lists +* PR #1415: Add functions to estimate the occupancy of a CUDA kernel +* PR #1360: The JIT cache now stores the compiled object code, yielding + even larger speedups. +* PR #1402: Fixes for the ARMv7 (armv7l) architecture under Linux +* PR #1400: Add the cuda.reduce() decorator originally provided in NumbaPro + +Fixes: + +* PR #1483: Allow np.empty_like() and friends on non-contiguous arrays +* Issue #1471: Allow caching JIT functions defined in IPython +* PR #1457: Fix flat indexing of boolean arrays +* PR #1421: Allow calling Numpy ufuncs, without an explicit output, on + non-contiguous arrays +* Issue #1411: Fix crash when unpacking a tuple containing a Numba-allocated array +* Issue #1394: Allow unifying range_state32 and range_state64 +* Issue #1373: Fix code generation error on lists of bools + + +Version 0.21.0 +-------------- + +This release introduces support for AMD's Heterogeneous System Architecture, +which allows memory to be shared directly between the CPU and the GPU. +Other major enhancements are support for lists and the introduction of +an opt-in compilation cache. + +Improvements: + +* PR #1391: Implement print() for CUDA code +* PR #1366: Implement integer typing enhancement proposal (NBEP 1) +* PR #1380: Support the one-argument type() builtin +* PR #1375: Allow boolean evaluation of lists and tuples +* PR #1371: Support array.view() in CUDA mode +* PR #1369: Support named tuples in nopython mode +* PR #1250: Implement numpy.median(). +* PR #1289: Make dispatching faster when calling a JIT-compiled function + from regular Python +* Issue #1226: Improve performance of integer power +* PR #1321: Document features supported with CUDA +* PR #1345: HSA support +* PR #1343: Support lists in nopython mode +* PR #1356: Make Numba-allocated memory visible to tracemalloc +* PR #1363: Add an environment variable NUMBA_DEBUG_TYPEINFER +* PR #1051: Add an opt-in, per-function compilation cache + +Fixes: + +* Issue #1372: Some array expressions would fail rewriting when involved + the same variable more than once, or a unary operator +* Issue #1385: Allow CUDA local arrays to be declared anywhere in a function +* Issue #1285: Support datetime64 and timedelta64 in Numpy reduction functions +* Issue #1332: Handle the EXTENDED_ARG opcode. +* PR #1329: Handle the ``in`` operator in object mode +* Issue #1322: Fix augmented slice assignment on Python 2 +* PR #1357: Fix slicing with some negative bounds or step values. + + +Version 0.20.0 +-------------- + +This release updates Numba to use LLVM 3.6 and CUDA 7 for CUDA support. +Following the platform deprecation in CUDA 7, Numba's CUDA feature is no +longer supported on 32-bit platforms. The oldest supported version of +Windows is Windows 7. + +Improvements: + +* Issue #1203: Support indexing ndarray.flat +* PR #1200: Migrate cgutils to llvmlite +* PR #1190: Support more array methods: .transpose(), .T, .copy(), .reshape(), .view() +* PR #1214: Simplify setup.py and avoid manual maintenance +* PR #1217: Support datetime64 and timedelta64 constants +* PR #1236: Reload environment variables when compiling +* PR #1225: Various speed improvements in generated code +* PR #1252: Support cmath module in CUDA +* PR #1238: Use 32-byte aligned allocator to optimize for AVX +* PR #1258: Support numpy.frombuffer() +* PR #1274: Use TravisCI container infrastructure for lower wait time +* PR #1279: Micro-optimize overload resolution in call dispatch +* Issue #1248: Improve error message when return type unification fails + +Fixes: + +* Issue #1131: Handling of negative zeros in np.conjugate() and np.arccos() +* Issue #1188: Fix slow array return +* Issue #1164: Avoid warnings from CUDA context at shutdown +* Issue #1229: Respect the writeable flag in arrays +* Issue #1244: Fix bug in refcount pruning pass +* Issue #1251: Fix partial left-indexing of Fortran contiguous array +* Issue #1264: Fix compilation error in array expression +* Issue #1254: Fix error when yielding array objects +* Issue #1276: Fix nested generator use + + +Version 0.19.2 +-------------- + +This release fixes the source distribution on pypi. The only change is in the +setup.py file. We do not plan to provide a conda package as this release is +essentially the same as 0.19.1 for conda users. + + +Version 0.19.1 +-------------- + +* Issue #1196: + + * fix double-free segfault due to redundant variable deletion in the + Numba IR (#1195) + * fix use-after-delete in array expression rewrite pass + + +Version 0.19.0 +-------------- + +This version introduces memory management in the Numba runtime, allowing to +allocate new arrays inside Numba-compiled functions. There is also a rework +of the ufunc infrastructure, and an optimization pass to collapse cascading +array operations into a single efficient loop. + +.. warning:: + Support for Windows XP and Vista with all compiler targets and support + for 32-bit platforms (Win/Mac/Linux) with the CUDA compiler target are + deprecated. In the next release of Numba, the oldest version of Windows + supported will be Windows 7. CPU compilation will remain supported + on 32-bit Linux and Windows platforms. + +Known issues: + +* There are some performance regressions in very short running ``nopython`` + functions due to the additional overhead incurred by memory management. + We will work to reduce this overhead in future releases. + +Features: + +* Issue #1181: Add a Frequently Asked Questions section to the documentation. +* Issue #1162: Support the ``cumsum()`` and ``cumprod()`` methods on Numpy + arrays. +* Issue #1152: Support the ``*args`` argument-passing style. +* Issue #1147: Allow passing character sequences as arguments to + JIT-compiled functions. +* Issue #1110: Shortcut deforestation and loop fusion for array expressions. +* Issue #1136: Support various Numpy array constructors, for example + numpy.zeros() and numpy.zeros_like(). +* Issue #1127: Add a CUDA simulator running on the CPU, enabled with the + NUMBA_ENABLE_CUDASIM environment variable. +* Issue #1086: Allow calling standard Numpy ufuncs without an explicit + output array from ``nopython`` functions. +* Issue #1113: Support keyword arguments when calling numpy.empty() + and related functions. +* Issue #1108: Support the ``ctypes.data`` attribute of Numpy arrays. +* Issue #1077: Memory management for array allocations in ``nopython`` mode. +* Issue #1105: Support calling a ctypes function that takes ctypes.py_object + parameters. +* Issue #1084: Environment variable NUMBA_DISABLE_JIT disables compilation + of ``@jit`` functions, instead calling into the Python interpreter + when called. This allows easier debugging of multiple jitted functions. +* Issue #927: Allow gufuncs with no output array. +* Issue #1097: Support comparisons between tuples. +* Issue #1075: Numba-generated ufuncs can now be called from ``nopython`` + functions. +* Issue #1062: ``@vectorize`` now allows omitting the signatures, and will + compile the required specializations on the fly (like ``@jit`` does). +* Issue #1027: Support numpy.round(). +* Issue #1085: Allow returning a character sequence (as fetched from a + structured array) from a JIT-compiled function. + +Fixes: + +* Issue #1170: Ensure ``ndindex()``, ``ndenumerate()`` and ``ndarray.flat`` + work properly inside generators. +* Issue #1151: Disallow unpacking of tuples with the wrong size. +* Issue #1141: Specify install dependencies in setup.py. +* Issue #1106: Loop-lifting would fail when the lifted loop does not + produce any output values for the function tail. +* Issue #1103: Fix mishandling of some inputs when a JIT-compiled function + is called with multiple array layouts. +* Issue #1089: Fix range() with large unsigned integers. +* Issue #1088: Install entry-point scripts (numba, pycc) from the conda + build recipe. +* Issue #1081: Constant structured scalars now work properly. +* Issue #1080: Fix automatic promotion of booleans to integers. + + +Version 0.18.2 +-------------- + +Bug fixes: + +* Issue #1073: Fixes missing template file for HTML annotation +* Issue #1074: Fixes CUDA support on Windows machine due to NVVM API mismatch + + +Version 0.18.1 +-------------- + +Version 0.18.0 is not officially released. + +This version removes the old deprecated and undocumented ``argtypes`` and +``restype`` arguments to the ``@jit`` decorator. Function signatures +should always be passed as the first argument to ``@jit``. + +Features: + +* Issue #960: Add inspect_llvm() and inspect_asm() methods to JIT-compiled + functions: they output the LLVM IR and the native assembler source of the + compiled function, respectively. +* Issue #990: Allow passing tuples as arguments to JIT-compiled functions + in ``nopython`` mode. +* Issue #774: Support two-argument round() in ``nopython`` mode. +* Issue #987: Support missing functions from the math module in nopython + mode: frexp(), ldexp(), gamma(), lgamma(), erf(), erfc(). +* Issue #995: Improve code generation for round() on Python 3. +* Issue #981: Support functions from the random and numpy.random modules + in ``nopython`` mode. +* Issue #979: Add cuda.atomic.max(). +* Issue #1006: Improve exception raising and reporting. It is now allowed + to raise an exception with an error message in ``nopython`` mode. +* Issue #821: Allow ctypes- and cffi-defined functions as arguments to + ``nopython`` functions. +* Issue #901: Allow multiple explicit signatures with ``@jit``. The + signatures must be passed in a list, as with ``@vectorize``. +* Issue #884: Better error message when a JIT-compiled function is called + with the wrong types. +* Issue #1010: Simpler and faster CUDA argument marshalling thanks to a + refactoring of the data model. +* Issue #1018: Support arrays of scalars inside Numpy structured types. +* Issue #808: Reduce Numba import time by half. +* Issue #1021: Support the buffer protocol in ``nopython`` mode. + Buffer-providing objects, such as ``bytearray``, ``array.array`` or + ``memoryview`` support array-like operations such as indexing and iterating. + Furthermore, some standard attributes on the ``memoryview`` object are + supported. +* Issue #1030: Support nested arrays in Numpy structured arrays. +* Issue #1033: Implement the inspect_types(), inspect_llvm() and inspect_asm() + methods for CUDA kernels. +* Issue #1029: Support Numpy structured arrays with CUDA as well. +* Issue #1034: Support for generators in nopython and object mode. +* Issue #1044: Support default argument values when calling Numba-compiled + functions. +* Issue #1048: Allow calling Numpy scalar constructors from CUDA functions. +* Issue #1047: Allow indexing a multi-dimensional array with a single integer, + to take a view. +* Issue #1050: Support len() on tuples. +* Issue #1011: Revive HTML annotation. + +Fixes: + +* Issue #977: Assignment optimization was too aggressive. +* Issue #561: One-argument round() now returns an int on Python 3. +* Issue #1001: Fix an unlikely bug where two closures with the same name + and id() would compile to the same LLVM function name, despite different + closure values. +* Issue #1006: Fix reference leak when a JIT-compiled function is disposed of. +* Issue #1017: Update instructions for CUDA in the README. +* Issue #1008: Generate shorter LLVM type names to avoid segfaults with CUDA. +* Issue #1005: Properly clean up references when raising an exception from + object mode. +* Issue #1041: Fix incompatibility between Numba and the third-party + library "future". +* Issue #1053: Fix the size attribute of CUDA shared arrays. + + +Version 0.17.0 +-------------- + +The major focus in this release has been a rewrite of the documentation. +The new documentation is better structured and has more detailed coverage +of Numba features and APIs. It can be found online at +https://numba.pydata.org/numba-doc/dev/index.html + +Features: + +* Issue #895: LLVM can now inline nested function calls in ``nopython`` mode. +* Issue #863: CUDA kernels can now infer the types of their arguments + ("autojit"-like). +* Issue #833: Support numpy.{min,max,argmin,argmax,sum,mean,var,std} + in ``nopython`` mode. +* Issue #905: Add a ``nogil`` argument to the ``@jit`` decorator, to + release the GIL in ``nopython`` mode. +* Issue #829: Add a ``identity`` argument to ``@vectorize`` and + ``@guvectorize``, to set the identity value of the ufunc. +* Issue #843: Allow indexing 0-d arrays with the empty tuple. +* Issue #933: Allow named arguments, not only positional arguments, when + calling a Numba-compiled function. +* Issue #902: Support numpy.ndenumerate() in ``nopython`` mode. +* Issue #950: AVX is now enabled by default except on Sandy Bridge and + Ivy Bridge CPUs, where it can produce slower code than SSE. +* Issue #956: Support constant arrays of structured type. +* Issue #959: Indexing arrays with floating-point numbers isn't allowed + anymore. +* Issue #955: Add support for 3D CUDA grids and thread blocks. +* Issue #902: Support numpy.ndindex() in ``nopython`` mode. +* Issue #951: Numpy number types (``numpy.int8``, etc.) can be used as + constructors for type conversion in ``nopython`` mode. + +Fixes: + +* Issue #889: Fix ``NUMBA_DUMP_ASSEMBLY`` for the CUDA backend. +* Issue #903: Fix calling of stdcall functions with ctypes under Windows. +* Issue #908: Allow lazy-compiling from several threads at once. +* Issue #868: Wrong error message when multiplying a scalar by a non-scalar. +* Issue #917: Allow vectorizing with datetime64 and timedelta64 in the + signature (only with unit-less values, though, because of a Numpy limitation). +* Issue #431: Allow overloading of cuda device function. +* Issue #917: Print out errors occurred in object mode ufuncs. +* Issue #923: Numba-compiled ufuncs now inherit the name and doc of the + original Python function. +* Issue #928: Fix boolean return value in nested calls. +* Issue #915: ``@jit`` called with an explicit signature with a mismatching + type of arguments now raises an error. +* Issue #784: Fix the truth value of NaNs. +* Issue #953: Fix using shared memory in more than one function (kernel or + device). +* Issue #970: Fix an uncommon double to uint64 conversion bug on CentOS5 + 32-bit (C compiler issue). + + +Version 0.16.0 +-------------- + +This release contains a major refactor to switch from llvmpy to `llvmlite `_ +as our code generation backend. The switch is necessary to reconcile +different compiler requirements for LLVM 3.5 (needs C++11) and Python +extensions (need specific compiler versions on Windows). As a bonus, we have +found the use of llvmlite speeds up compilation by a factor of 2! + +Other Major Changes: + +* Faster dispatch for numpy structured arrays +* Optimized array.flat() +* Improved CPU feature selection +* Fix constant tuple regression in macro expansion code + +Known Issues: + +* AVX code generation is still disabled by default due to performance + regressions when operating on misaligned NumPy arrays. We hope to have a + workaround in the future. +* In *extremely* rare circumstances, a `known issue with LLVM 3.5 `_ + code generation can cause an ELF relocation error on 64-bit Linux systems. + + +Version 0.15.1 +-------------- + +(This was a bug-fix release that superceded version 0.15 before it was +announced.) + +Fixes: + +* Workaround for missing __ftol2 on Windows XP. +* Do not lift loops for compilation that contain break statements. +* Fix a bug in loop-lifting when multiple values need to be returned to + the enclosing scope. +* Handle the loop-lifting case where an accumulator needs to be updated when + the loop count is zero. + +Version 0.15 +------------ + +Features: + +* Support for the Python ``cmath`` module. (NumPy complex functions were + already supported.) +* Support for ``.real``, ``.imag``, and `.conjugate()`` on non-complex + numbers. +* Add support for ``math.isfinite()`` and ``math.copysign()``. +* Compatibility mode: If enabled (off by default), a failure to compile in + object mode will fall back to using the pure Python implementation of the + function. +* *Experimental* support for serializing JIT functions with cloudpickle. +* Loop-jitting in object mode now works with loops that modify scalars that + are accessed after the loop, such as accumulators. +* ``@vectorize`` functions can be compiled in object mode. +* Numba can now be built using the `Visual C++ Compiler for Python 2.7 `_ + on Windows platforms. +* CUDA JIT functions can be returned by factory functions with variables in + the closure frozen as constants. +* Support for "optional" types in nopython mode, which allow ``None`` to be a + valid value. + +Fixes: + +* If nopython mode compilation fails for any reason, automatically fall back + to object mode (unless nopython=True is passed to @jit) rather than raise + an exeception. +* Allow function objects to be returned from a function compiled in object + mode. +* Fix a linking problem that caused slower platform math functions (such as + ``exp()``) to be used on Windows, leading to performance regressions against + NumPy. +* ``min()`` and ``max()`` no longer accept scalars arguments in nopython mode. +* Fix handling of ambigous type promotion among several compiled versions of a + JIT function. The dispatcher will now compile a new version to resolve the + problem. (issue #776) +* Fix float32 to uint64 casting bug on 32-bit Linux. +* Fix type inference to allow forced casting of return types. +* Allow the shape of a 1D ``cuda.shared.array`` and ``cuda.local.array`` to be + a one-element tuple. +* More correct handling of signed zeros. +* Add custom implementation of ``atan2()`` on Windows to handle special cases + properly. +* Eliminated race condition in the handling of the pagelocked staging area + used when transferring CUDA arrays. +* Fix non-deterministic type unification leading to varying performance. + (issue #797) + + +Version 0.14 +------------ + +Features: + +* Support for nearly all the Numpy math functions (including comparison, + logical, bitwise and some previously missing float functions) in nopython mode. +* The Numpy datetime64 and timedelta64 dtypes are supported in nopython mode + with Numpy 1.7 and later. +* Support for Numpy math functions on complex numbers in nopython mode. +* ndarray.sum() is supported in nopython mode. +* Better error messages when unsupported types are used in Numpy math functions. +* Set NUMBA_WARNINGS=1 in the environment to see which functions are compiled + in object mode vs. nopython mode. +* Add support for the two-argument pow() builtin function in nopython mode. +* New developer documentation describing how Numba works, and how to + add new types. +* Support for Numpy record arrays on the GPU. (Note: Improper alignment of dtype + fields will cause an exception to be raised.) +* Slices on GPU device arrays. +* GPU objects can be used as Python context managers to select the active + device in a block. +* GPU device arrays can be bound to a CUDA stream. All subsequent operations + (such as memory copies) will be queued on that stream instead of the default. + This can prevent unnecessary synchronization with other streams. + +Fixes: + +* Generation of AVX instructions has been disabled to avoid performance bugs + when calling external math functions that may use SSE instructions, + especially on OS X. +* JIT functions can be removed by the garbage collector when they are no + longer accessible. +* Various other reference counting fixes to prevent memory leaks. +* Fixed handling of exception when input argument is out of range. +* Prevent autojit functions from making unsafe numeric conversions when + called with different numeric types. +* Fix a compilation error when an unhashable global value is accessed. +* Gracefully handle failure to enable faulthandler in the IPython Notebook. +* Fix a bug that caused loop lifting to fail if the loop was inside an + ``else`` block. +* Fixed a problem with selecting CUDA devices in multithreaded programs on + Linux. +* The ``pow()`` function (and ``**`` operation) applied to two integers now + returns an integer rather than a float. +* Numpy arrays using the object dtype no longer cause an exception in the + autojit. +* Attempts to write to a global array will cause compilation to fall back + to object mode, rather than attempt and fail at nopython mode. +* ``range()`` works with all negative arguments (ex: ``range(-10, -12, -1)``) + +Version 0.13.4 +-------------- + +Features: + +* Setting and deleting attributes in object mode +* Added documentation of supported and currently unsupported numpy ufuncs +* Assignment to 1-D numpy array slices +* Closure variables and functions can be used in object mode +* All numeric global values in modules can be used as constants in JIT + compiled code +* Support for the start argument in enumerate() +* Inplace arithmetic operations (+=, -=, etc.) +* Direct iteration over a 1D numpy array (e.g. "for x in array: ...") + in nopython mode + +Fixes: + +* Support for NVIDIA compute capability 5.0 devices (such as the GTX 750) +* Vectorize no longer crashes/gives an error when bool\_ is used as return type +* Return the correct dictionary when globals() is used in JIT functions +* Fix crash bug when creating dictionary literals in object +* Report more informative error message on import if llvmpy is too old +* Temporarily disable pycc --header, which generates incorrect function + signatures. + +Version 0.13.3 +-------------- + +Features: + +* Support for enumerate() and zip() in nopython mode +* Increased LLVM optimization of JIT functions to -O1, enabling automatic + vectorization of compiled code in some cases +* Iteration over tuples and unpacking of tuples in nopython mode +* Support for dict and set (Python >= 2.7) literals in object mode + +Fixes: + +* JIT functions have the same __name__ and __doc__ as the original function. +* Numerous improvements to better match the data types and behavior of Python + math functions in JIT compiled code on different platforms. +* Importing Numba will no longer throw an exception if the CUDA driver is + present, but cannot be initialized. +* guvectorize now properly supports functions with scalar arguments. +* CUDA driver is lazily initialized + +Version 0.13.2 +-------------- + +Features: + +* @vectorize ufunc now can generate SIMD fast path for unit strided array +* Added cuda.gridsize +* Added preliminary exception handling (raise exception class) + +Fixes: + +* UNARY_POSITIVE +* Handling of closures and dynamically generated functions +* Global None value + +Version 0.13.1 +-------------- + +Features: + +* Initial support for CUDA array slicing + +Fixes: + +* Indirectly fixes numbapro when the system has a incompatible CUDA driver +* Fix numba.cuda.detect +* Export numba.intp and numba.intc + +Version 0.13 +------------ + +Features: + +* Opensourcing NumbaPro CUDA python support in `numba.cuda` +* Add support for ufunc array broadcasting +* Add support for mixed input types for ufuncs +* Add support for returning tuple from jitted function + +Fixes: + +* Fix store slice bytecode handling for Python2 +* Fix inplace subtract +* Fix pycc so that correct header is emitted +* Allow vectorize to work on functions with jit decorator + + +Version 0.12.2 +-------------- + +Fixes: + +* Improved NumPy ufunc support in nopython mode +* Misc bug fixes + + +Version 0.12.1 +-------------- + +This version fixed many regressions reported by user for the 0.12 release. +This release contains a new loop-lifting mechanism that specializes certains +loop patterns for nopython mode compilation. This avoid direct support +for heap-allocating and other very dynamic operations. + +Improvements: + +* Add loop-lifting--jit-ing loops in nopython for object mode code. This allows + functions to allocate NumPy arrays and use Python objects, while the tight + loops in the function can still be compiled in nopython mode. Any arrays that + the tight loop uses should be created before the loop is entered. + +Fixes: + +* Add support for majority of "math" module functions +* Fix for...else handling +* Add support for builtin round() +* Fix tenary if...else support +* Revive "numba" script +* Fix problems with some boolean expressions +* Add support for more NumPy ufuncs + + +Version 0.12 +------------ + +Version 0.12 contains a big refactor of the compiler. The main objective for +this refactor was to simplify the code base to create a better foundation for +further work. A secondary objective was to improve the worst case performance +to ensure that compiled functions in object mode never run slower than pure +Python code (this was a problem in several cases with the old code base). This +refactor is still a work in progress and further testing is needed. + +Main improvements: + +* Major refactor of compiler for performance and maintenance reasons +* Better fallback to object mode when native mode fails +* Improved worst case performance in object mode + +The public interface of numba has been slightly changed. The idea is to +make it cleaner and more rational: + +* jit decorator has been modified, so that it can be called without a signature. + When called without a signature, it behaves as the old autojit. Autojit + has been deprecated in favour of this approach. +* Jitted functions can now be overloaded. +* Added a "njit" decorator that behaves like "jit" decorator with nopython=True. +* The numba.vectorize namespace is gone. The vectorize decorator will + be in the main numba namespace. +* Added a guvectorize decorator in the main numba namespace. It is + similar to numba.vectorize, but takes a dimension signature. It + generates gufuncs. This is a replacement for the GUVectorize gufunc + factory which has been deprecated. + +Main regressions (will be fixed in a future release): + +* Creating new NumPy arrays is not supported in nopython mode +* Returning NumPy arrays is not supported in nopython mode +* NumPy array slicing is not supported in nopython mode +* lists and tuples are not supported in nopython mode +* string, datetime, cdecimal, and struct types are not implemented yet +* Extension types (classes) are not supported in nopython mode +* Closures are not supported +* Raise keyword is not supported +* Recursion is not support in nopython mode + +Version 0.11 +------------ +* Experimental support for NumPy datetime type + +Version 0.10 +------------ +* Annotation tool (./bin/numba --annotate --fancy) (thanks to Jay Bourque) +* Open sourced prange +* Support for raise statement +* Pluggable array representation +* Support for enumerate and zip (thanks to Eugene Toder) +* Better string formatting support (thanks to Eugene Toder) +* Builtins min(), max() and bool() (thanks to Eugene Toder) +* Fix some code reloading issues (thanks to Björn Linse) +* Recognize NumPy scalar objects (thanks to Björn Linse) + + +Version 0.9 +----------- +* Improved math support +* Open sourced generalized ufuncs +* Improved array expressions + +Version 0.8 +----------- +* Support for autojit classes + * Inheritance not yet supported +* Python 3 support for pycc +* Allow retrieval of ctypes function wrapper + * And hence support retrieval of a pointer to the function +* Fixed a memory leak of array slicing views + +Version 0.7.2 +------------- +* Official Python 3 support (python 3.2 and 3.3) +* Support for intrinsics and instructions +* Various bug fixes (see https://github.com/numba/numba/issues?milestone=7&state=closed) + +Version 0.7.1 +------------- +* Various bug fixes + +Version 0.7 +----------- +* Open sourced single-threaded ufunc vectorizer +* Open sourced NumPy array expression compilation +* Open sourced fast NumPy array slicing +* Experimental Python 3 support +* Support for typed containers + * typed lists and tuples +* Support for iteration over objects +* Support object comparisons +* Preliminary CFFI support + * Jit calls to CFFI functions (passed into autojit functions) + * TODO: Recognize ffi_lib.my_func attributes +* Improved support for ctypes +* Allow declaring extension attribute types as through class attributes +* Support for type casting in Python + * Get the same semantics with or without numba compilation +* Support for recursion + * For jit methods and extension classes +* Allow jit functions as C callbacks +* Friendlier error reporting +* Internal improvements +* A variety of bug fixes + +Version 0.6.1 +-------------- +* Support for bitwise operations + +Version 0.6 +-------------- +* Python 2.6 support +* Programmable typing + * Allow users to add type inference for external code +* Better NumPy type inference + * outer, inner, dot, vdot, tensordot, nonzero, where, + binary ufuncs + methods (reduce, accumulate, reduceat, outer) +* Type based alias analysis + * Support for strict aliasing +* Much faster autojit dispatch when calling from Python +* Faster numerical loops through data and stride pre-loading +* Integral overflow and underflow checking for conversions from objects +* Make Meta dependency optional + +Version 0.5 +-------------- +* SSA-based type inference + * Allows variable reuse + * Allow referring to variables before lexical definition +* Support multiple comparisons +* Support for template types +* List comprehensions +* Support for pointers +* Many bug fixes +* Added user documentation + +Version 0.4 +-------------- + +Version 0.3.2 +-------------- + +* Add support for object arithmetic (issue 56). +* Bug fixes (issue 55). + +Version 0.3 +-------------- +* Changed default compilation approach to ast +* Added support for cross-module linking +* Added support for closures (can jit inner functions and return them) (see examples/closure.py) +* Added support for dtype structures (can access elements of structure with attribute access) (see examples/structures.py) +* Added support for extension types (numba classes) (see examples/numbaclasses.py) +* Added support for general Python code (use nopython to raise an error if Python C-API is used to avoid unexpected slowness because of lack of implementation defaulting to generic Python) +* Fixed many bugs +* Added support to detect math operations. +* Added with python and with nopython contexts +* Added more examples + +Many features need to be documented still. Look at examples and tests for more information. + + +Version 0.2 +-------------- +* Added an ast approach to compilation +* Removed d, f, i, b from numba namespace (use f8, f4, i4, b1) +* Changed function to autojit2 +* Added autojit function to decorate calls to the function and use types of the variable to create compiled versions. +* changed keyword arguments to jit and autojit functions to restype and argtypes to be consistent with ctypes module. +* Added pycc -- a python to shared library compiler diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/CONTRIBUTING.md b/cv/3d_detection/pointrcnn-iou/pytorch/numba/CONTRIBUTING.md new file mode 100644 index 000000000..59d6b807c --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/CONTRIBUTING.md @@ -0,0 +1,53 @@ + +We welcome people who want to make contributions to Numba, big or small! +Even simple documentation improvements are encouraged. + +# Asking questions + +Numba has a [discourse forum](https://numba.discourse.group/) for longer/more +involved questions and an IRC channel on +[gitter.im](https://gitter.im/numba/numba) for quick questions and interactive +help. + +# Ways to help: + +There's lots of ways to help improve Numba, some of these require creating code +changes, see **contributing patches** below. + +## Quick things: + +* Answer a question asked on [discourse](https://numba.discourse.group/) or + [gitter.im](https://gitter.im/numba/numba). +* Review a page of documentation, check it makes sense, that it's clear and + still relevant, that the examples are present, good and working. Fix anything + that needs updating in a pull request. +* Make a file that is not `flake8` compliant meet the standard, a list of all + failing files is in the `exclude` section of the [`.flake8` config](https://github.com/numba/numba/blob/main/.flake8), + then create a pull request with the change. + +## More involved things: + +* Review a pull request, you don't need to be a compiler engineer to do an + initial review of a pull request. It's incredibly helpful to have pull + requests go through a review to just make sure the code change is well formed, + documented, efficient and clear. Further, if the code is fixing a bug, making + sure that tests are present demonstrating it is fixed! Look out for PRs with + the [`needs initial review`](https://github.com/numba/numba/labels/needs%20initial%20review) + label. +* Work on fixing or implementing something in the code base, there are a lot of + [`good first issue's`](https://github.com/numba/numba/labels/good%20first%20issue) + and [`good second issue's`](https://github.com/numba/numba/labels/good%20first%20issue). + For implementing new features/functionality, the extension API is the best + thing to use and a guide to using `@overload` in particular is + [here](https://numba.pydata.org/numba-doc/dev/extending/overloading-guide.html) + and the API documentation is [here](https://numba.pydata.org/numba-doc/latest/extending/high-level.html#implementing-functions). + +## Contributing patches + +Please fork the Numba repository on Github, and create a new branch +containing your work. When you are done, open a pull request. + +# Further reading + +Please read the [contributing guide]( +https://numba.pydata.org/numba-doc/dev/developer/contributing.html). diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/LICENSE b/cv/3d_detection/pointrcnn-iou/pytorch/numba/LICENSE new file mode 100644 index 000000000..7d19426e7 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/LICENSE @@ -0,0 +1,24 @@ +Copyright (c) 2012, Anaconda, Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/LICENSES.third-party b/cv/3d_detection/pointrcnn-iou/pytorch/numba/LICENSES.third-party new file mode 100644 index 000000000..056142b01 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/LICENSES.third-party @@ -0,0 +1,493 @@ +The Numba source tree includes vendored libraries governed by the following +licenses. + + +appdirs +------- + +# This is the MIT license + +Copyright (c) 2010 ActiveState Software Inc. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +NetworkX +-------- +The dominance frontier algorithm is from a pull request +https://github.com/numba/numba/pull/4149/files which is based +on the implementation of NetworkX of dominance. NetworkX has the following +license: + +NetworkX is distributed with the 3-clause BSD license. + +:: + + Copyright (C) 2004-2019, NetworkX Developers + Aric Hagberg + Dan Schult + Pieter Swart + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the NetworkX Developers nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +jquery.graphviz.svg (https://github.com/mountainstorm/jquery.graphviz.svg/) +--------------------------------------------------------------------------- +The DAG roadmap rendering code in docs/dagmap/ uses Javascript from this +package to draw graphs in HTML. + +Copyright (c) 2015 Mountainstorm +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + + + +CPython (https://github.com/python/cpython) +------------------------------------------- +Numba source code that references URLs starting with: + +https://github.com/python/cpython/ + +relates to use/inclusion of CPython source code which has the following license: + +A. HISTORY OF THE SOFTWARE +========================== + +Python was created in the early 1990s by Guido van Rossum at Stichting +Mathematisch Centrum (CWI, see http://www.cwi.nl) in the Netherlands +as a successor of a language called ABC. Guido remains Python's +principal author, although it includes many contributions from others. + +In 1995, Guido continued his work on Python at the Corporation for +National Research Initiatives (CNRI, see http://www.cnri.reston.va.us) +in Reston, Virginia where he released several versions of the +software. + +In May 2000, Guido and the Python core development team moved to +BeOpen.com to form the BeOpen PythonLabs team. In October of the same +year, the PythonLabs team moved to Digital Creations, which became +Zope Corporation. In 2001, the Python Software Foundation (PSF, see +https://www.python.org/psf/) was formed, a non-profit organization +created specifically to own Python-related Intellectual Property. +Zope Corporation was a sponsoring member of the PSF. + +All Python releases are Open Source (see http://www.opensource.org for +the Open Source Definition). Historically, most, but not all, Python +releases have also been GPL-compatible; the table below summarizes +the various releases. + + Release Derived Year Owner GPL- + from compatible? (1) + + 0.9.0 thru 1.2 1991-1995 CWI yes + 1.3 thru 1.5.2 1.2 1995-1999 CNRI yes + 1.6 1.5.2 2000 CNRI no + 2.0 1.6 2000 BeOpen.com no + 1.6.1 1.6 2001 CNRI yes (2) + 2.1 2.0+1.6.1 2001 PSF no + 2.0.1 2.0+1.6.1 2001 PSF yes + 2.1.1 2.1+2.0.1 2001 PSF yes + 2.1.2 2.1.1 2002 PSF yes + 2.1.3 2.1.2 2002 PSF yes + 2.2 and above 2.1.1 2001-now PSF yes + +Footnotes: + +(1) GPL-compatible doesn't mean that we're distributing Python under + the GPL. All Python licenses, unlike the GPL, let you distribute + a modified version without making your changes open source. The + GPL-compatible licenses make it possible to combine Python with + other software that is released under the GPL; the others don't. + +(2) According to Richard Stallman, 1.6.1 is not GPL-compatible, + because its license has a choice of law clause. According to + CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1 + is "not incompatible" with the GPL. + +Thanks to the many outside volunteers who have worked under Guido's +direction to make these releases possible. + + +B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON +=============================================================== + +PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 +-------------------------------------------- + +1. This LICENSE AGREEMENT is between the Python Software Foundation +("PSF"), and the Individual or Organization ("Licensee") accessing and +otherwise using this software ("Python") in source or binary form and +its associated documentation. + +2. Subject to the terms and conditions of this License Agreement, PSF hereby +grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, +analyze, test, perform and/or display publicly, prepare derivative works, +distribute, and otherwise use Python alone or in any derivative version, +provided, however, that PSF's License Agreement and PSF's notice of copyright, +i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019 Python Software Foundation; +All Rights Reserved" are retained in Python alone or in any derivative version +prepared by Licensee. + +3. In the event Licensee prepares a derivative work that is based on +or incorporates Python or any part thereof, and wants to make +the derivative work available to others as provided herein, then +Licensee hereby agrees to include in any such work a brief summary of +the changes made to Python. + +4. PSF is making Python available to Licensee on an "AS IS" +basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +7. Nothing in this License Agreement shall be deemed to create any +relationship of agency, partnership, or joint venture between PSF and +Licensee. This License Agreement does not grant permission to use PSF +trademarks or trade name in a trademark sense to endorse or promote +products or services of Licensee, or any third party. + +8. By copying, installing or otherwise using Python, Licensee +agrees to be bound by the terms and conditions of this License +Agreement. + + +BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0 +------------------------------------------- + +BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1 + +1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an +office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the +Individual or Organization ("Licensee") accessing and otherwise using +this software in source or binary form and its associated +documentation ("the Software"). + +2. Subject to the terms and conditions of this BeOpen Python License +Agreement, BeOpen hereby grants Licensee a non-exclusive, +royalty-free, world-wide license to reproduce, analyze, test, perform +and/or display publicly, prepare derivative works, distribute, and +otherwise use the Software alone or in any derivative version, +provided, however, that the BeOpen Python License is retained in the +Software, alone or in any derivative version prepared by Licensee. + +3. BeOpen is making the Software available to Licensee on an "AS IS" +basis. BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE +SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS +AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY +DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +5. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +6. This License Agreement shall be governed by and interpreted in all +respects by the law of the State of California, excluding conflict of +law provisions. Nothing in this License Agreement shall be deemed to +create any relationship of agency, partnership, or joint venture +between BeOpen and Licensee. This License Agreement does not grant +permission to use BeOpen trademarks or trade names in a trademark +sense to endorse or promote products or services of Licensee, or any +third party. As an exception, the "BeOpen Python" logos available at +http://www.pythonlabs.com/logos.html may be used according to the +permissions granted on that web page. + +7. By copying, installing or otherwise using the software, Licensee +agrees to be bound by the terms and conditions of this License +Agreement. + + +CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1 +--------------------------------------- + +1. This LICENSE AGREEMENT is between the Corporation for National +Research Initiatives, having an office at 1895 Preston White Drive, +Reston, VA 20191 ("CNRI"), and the Individual or Organization +("Licensee") accessing and otherwise using Python 1.6.1 software in +source or binary form and its associated documentation. + +2. Subject to the terms and conditions of this License Agreement, CNRI +hereby grants Licensee a nonexclusive, royalty-free, world-wide +license to reproduce, analyze, test, perform and/or display publicly, +prepare derivative works, distribute, and otherwise use Python 1.6.1 +alone or in any derivative version, provided, however, that CNRI's +License Agreement and CNRI's notice of copyright, i.e., "Copyright (c) +1995-2001 Corporation for National Research Initiatives; All Rights +Reserved" are retained in Python 1.6.1 alone or in any derivative +version prepared by Licensee. Alternately, in lieu of CNRI's License +Agreement, Licensee may substitute the following text (omitting the +quotes): "Python 1.6.1 is made available subject to the terms and +conditions in CNRI's License Agreement. This Agreement together with +Python 1.6.1 may be located on the Internet using the following +unique, persistent identifier (known as a handle): 1895.22/1013. This +Agreement may also be obtained from a proxy server on the Internet +using the following URL: http://hdl.handle.net/1895.22/1013". + +3. In the event Licensee prepares a derivative work that is based on +or incorporates Python 1.6.1 or any part thereof, and wants to make +the derivative work available to others as provided herein, then +Licensee hereby agrees to include in any such work a brief summary of +the changes made to Python 1.6.1. + +4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS" +basis. CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1, +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +7. This License Agreement shall be governed by the federal +intellectual property law of the United States, including without +limitation the federal copyright law, and, to the extent such +U.S. federal law does not apply, by the law of the Commonwealth of +Virginia, excluding Virginia's conflict of law provisions. +Notwithstanding the foregoing, with regard to derivative works based +on Python 1.6.1 that incorporate non-separable material that was +previously distributed under the GNU General Public License (GPL), the +law of the Commonwealth of Virginia shall govern this License +Agreement only as to issues arising under or with respect to +Paragraphs 4, 5, and 7 of this License Agreement. Nothing in this +License Agreement shall be deemed to create any relationship of +agency, partnership, or joint venture between CNRI and Licensee. This +License Agreement does not grant permission to use CNRI trademarks or +trade name in a trademark sense to endorse or promote products or +services of Licensee, or any third party. + +8. By clicking on the "ACCEPT" button where indicated, or by copying, +installing or otherwise using Python 1.6.1, Licensee agrees to be +bound by the terms and conditions of this License Agreement. + + ACCEPT + + +CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2 +-------------------------------------------------- + +Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam, +The Netherlands. All rights reserved. + +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose and without fee is hereby granted, +provided that the above copyright notice appear in all copies and that +both that copyright notice and this permission notice appear in +supporting documentation, and that the name of Stichting Mathematisch +Centrum or CWI not be used in advertising or publicity pertaining to +distribution of the software without specific, written prior +permission. + +STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO +THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE +FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT +OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + +CPython unicode (https://github.com/python/cpython) +--------------------------------------------------- +Numba's unicode support includes source code/algorithms from CPython's unicode +implementation, Numba source code that has a reference starting with: + +https://github.com/python/cpython/ + +and contains in the path "Objects/unicodeobject.c" relates to use/inclusion of +CPython source code which has the following license along with the standard +CPython license: + + +Unicode implementation based on original code by Fredrik Lundh, +modified by Marc-Andre Lemburg . + +Major speed upgrades to the method implementations at the Reykjavik +NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke. + +Copyright (c) Corporation for National Research Initiatives. + +-------------------------------------------------------------------- +The original string type implementation is: + + Copyright (c) 1999 by Secret Labs AB + Copyright (c) 1999 by Fredrik Lundh + +By obtaining, using, and/or copying this software and/or its +associated documentation, you agree that you have read, understood, +and will comply with the following terms and conditions: + +Permission to use, copy, modify, and distribute this software and its +associated documentation for any purpose and without fee is hereby +granted, provided that the above copyright notice appears in all +copies, and that both that copyright notice and this permission notice +appear in supporting documentation, and that the name of Secret Labs +AB or the author not be used in advertising or publicity pertaining to +distribution of the software without specific, written prior +permission. + +SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO +THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR +ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT +OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +-------------------------------------------------------------------- + + +cloudpickle +----------- + +This module was extracted from the `cloud` package, developed by +PiCloud, Inc. + +Copyright (c) 2015, Cloudpickle contributors. +Copyright (c) 2012, Regents of the University of California. +Copyright (c) 2009 PiCloud, Inc. http://www.picloud.com. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the University of California, Berkeley nor the + names of its contributors may be used to endorse or promote + products derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +© 2020 GitHub, Inc. + + +NumPy (https://github.com/numpy/numpy) +-------------------------------------- +Numba source code that references URLs starting with: + +https://github.com/numpy/numpy + +relates to use of/inclusion of/derivate work based on NumPy source code which +has the following license: + + +Copyright (c) 2005-2021, NumPy Developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the NumPy Developers nor the names of any + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/MANIFEST.in b/cv/3d_detection/pointrcnn-iou/pytorch/numba/MANIFEST.in new file mode 100644 index 000000000..449dab31c --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/MANIFEST.in @@ -0,0 +1,10 @@ +include MANIFEST.in +include README.rst setup.py runtests.py versioneer.py CHANGE_LOG LICENSE + +recursive-include numba *.c *.cpp *.h *.hpp *.inc +recursive-include docs *.ipynb *.txt *.py Makefile *.rst +recursive-include examples *.py + +prune docs/_build +prune docs/gh-pages +include numba/_version.py diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/README.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/README.rst new file mode 100644 index 000000000..48b2855b6 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/README.rst @@ -0,0 +1,61 @@ +***** +Numba +***** + +.. image:: https://badges.gitter.im/numba/numba.svg + :target: https://gitter.im/numba/numba?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge + :alt: Gitter + +.. image:: https://img.shields.io/badge/discuss-on%20discourse-blue + :target: https://numba.discourse.group/ + :alt: Discourse + +.. image:: https://zenodo.org/badge/3659275.svg + :target: https://zenodo.org/badge/latestdoi/3659275 + :alt: Zenodo DOI + +.. image:: https://img.shields.io/pypi/v/numba.svg + :target: https://pypi.python.org/pypi/numba/ + :alt: PyPI + +.. image:: https://dev.azure.com/numba/numba/_apis/build/status/numba.numba?branchName=main + :target: https://dev.azure.com/numba/numba/_build/latest?definitionId=1?branchName=main + :alt: Azure Pipelines + +A Just-In-Time Compiler for Numerical Functions in Python +######################################################### + +Numba is an open source, NumPy-aware optimizing compiler for Python sponsored +by Anaconda, Inc. It uses the LLVM compiler project to generate machine code +from Python syntax. + +Numba can compile a large subset of numerically-focused Python, including many +NumPy functions. Additionally, Numba has support for automatic +parallelization of loops, generation of GPU-accelerated code, and creation of +ufuncs and C callbacks. + +For more information about Numba, see the Numba homepage: +https://numba.pydata.org and the online documentation: +https://numba.readthedocs.io/en/stable/index.html + +Installation +============ + +Please follow the instructions: + +https://numba.readthedocs.io/en/stable/user/installing.html + +Demo +==== + +Please have a look and the demo notebooks via the mybinder service: + +https://mybinder.org/v2/gh/numba/numba-examples/master?filepath=notebooks + +Contact +======= + +Numba has a discourse forum for discussions: + +* https://numba.discourse.group + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/azure-pipelines.yml b/cv/3d_detection/pointrcnn-iou/pytorch/numba/azure-pipelines.yml new file mode 100644 index 000000000..79980d31b --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/azure-pipelines.yml @@ -0,0 +1,129 @@ +trigger: + batch: true + +variables: + # Change the following along with adding new TEST_START_INDEX. + TEST_COUNT: 20 + +jobs: +# Mac and Linux use the same template with different matrixes +- template: buildscripts/azure/azure-linux-macos.yml + parameters: + name: macOS + vmImage: macos-11 + matrix: + py37_np118: + PYTHON: '3.7' + NUMPY: '1.18' + CONDA_ENV: 'azure_ci' + TEST_START_INDEX: 0 + py310_np123: + PYTHON: '3.10' + NUMPY: '1.23' + CONDA_ENV: 'azure_ci' + TEST_THREADING: 'tbb' + TEST_START_INDEX: 1 + +- template: buildscripts/azure/azure-linux-macos.yml + parameters: + name: Linux + vmImage: ubuntu-20.04 + matrix: + py37_np118_32bit: + # 32 bit linux only has np 1.15 + PYTHON: '3.7' + NUMPY: '1.18' + CONDA_ENV: azure_ci + BITS32: yes + TEST_START_INDEX: 2 + py37_np118_vanilla: + PYTHON: '3.7' + NUMPY: '1.18' + CONDA_ENV: azure_ci + VANILLA_INSTALL: yes + TEST_START_INDEX: 3 + py38_np118_cov: + PYTHON: '3.8' + NUMPY: '1.18' + CONDA_ENV: azure_ci + RUN_COVERAGE: yes + RUN_FLAKE8: yes + RUN_MYPY: yes + TEST_START_INDEX: 4 + py38_np119_tbb: + PYTHON: '3.8' + NUMPY: '1.19.2=*_0' + CONDA_ENV: azure_ci + TEST_THREADING: 'tbb' + TEST_START_INDEX: 5 + py38_np119_omp: + PYTHON: '3.8' + NUMPY: '1.19.2=*_0' + CONDA_ENV: azure_ci + TEST_THREADING: omp + TEST_START_INDEX: 6 + py38_np119_workqueue: + PYTHON: '3.8' + NUMPY: '1.19.2=*_0' + CONDA_ENV: azure_ci + TEST_THREADING: workqueue + TEST_START_INDEX: 7 + py38_np120_doc: + PYTHON: '3.8' + NUMPY: '1.20' + CONDA_ENV: azure_ci + BUILD_DOC: yes + TEST_START_INDEX: 8 + py38_np120_pickle5: + PYTHON: '3.8' + NUMPY: '1.20' + CONDA_ENV: azure_ci + TEST_PICKLE5: yes + TEST_START_INDEX: 9 + py38_np120_svml: + PYTHON: '3.8' + NUMPY: '1.20' + CONDA_ENV: azure_ci + TEST_SVML: yes + TEST_START_INDEX: 10 + py38_np122: + PYTHON: '3.8' + NUMPY: '1.22' + CONDA_ENV: azure_ci + TEST_START_INDEX: 11 + py39_np119: + PYTHON: '3.9' + NUMPY: '1.19.2=*_0' + CONDA_ENV: azure_ci + TEST_START_INDEX: 12 + py39_np120_typeguard: + PYTHON: '3.9' + NUMPY: '1.20' + CONDA_ENV: azure_ci + RUN_TYPEGUARD: yes + TEST_START_INDEX: 13 + py39_np121: + PYTHON: '3.9' + NUMPY: '1.21' + CONDA_ENV: azure_ci + TEST_START_INDEX: 14 + py39_np123: + PYTHON: '3.9' + NUMPY: '1.23' + CONDA_ENV: azure_ci + TEST_START_INDEX: 15 + py310_np121: + PYTHON: '3.10' + NUMPY: '1.21' + CONDA_ENV: azure_ci + TEST_START_INDEX: 16 + py310_np123: + PYTHON: '3.10' + NUMPY: '1.23' + CONDA_ENV: azure_ci + TEST_START_INDEX: 17 + +- template: buildscripts/azure/azure-windows.yml + parameters: + name: Windows + vmImage: windows-2019 diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/bin/numba b/cv/3d_detection/pointrcnn-iou/pytorch/numba/bin/numba new file mode 100644 index 000000000..32e7180df --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/bin/numba @@ -0,0 +1,8 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +from __future__ import print_function, division, absolute_import + +from numba.misc.numba_entry import main + +if __name__ == "__main__": + main() diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/build_numba.sh b/cv/3d_detection/pointrcnn-iou/pytorch/numba/build_numba.sh new file mode 100644 index 000000000..2e4dda01e --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/build_numba.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +clang_version=`clang --version | grep "clang version 16."` +if [[ "${clang_version}" != "" ]]; then + echo "Not support LLVM16 now!" + exit 0 +fi + +COREX_VERSION=${COREX_VERSION:-latest} + +PYTHON_PATH=$(which python3) + +if [[ "${COREX_VERSION}" == "latest" ]]; then + COREX_VERSION=`date --utc +%Y%m%d%H%M%S` +fi +export NUMBA_LOCAL_IDENTIFIER="corex.${COREX_VERSION}" + +${PYTHON_PATH} setup.py bdist_wheel -d build_pip 2>&1 | tee compile.log; [[ ${PIPESTATUS[0]} == 0 ]] || exit + +# Return 0 status if all finished +exit 0 diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/appveyor/run_with_env.cmd b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/appveyor/run_with_env.cmd new file mode 100644 index 000000000..3a56e3e84 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/appveyor/run_with_env.cmd @@ -0,0 +1,90 @@ +:: From https://github.com/ogrisel/python-appveyor-demo +:: +:: To build extensions for 64 bit Python 3, we need to configure environment +:: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of: +:: MS Windows SDK for Windows 7 and .NET Framework 4 (SDK v7.1) +:: +:: To build extensions for 64 bit Python 2, we need to configure environment +:: variables to use the MSVC 2008 C++ compilers from GRMSDKX_EN_DVD.iso of: +:: MS Windows SDK for Windows 7 and .NET Framework 3.5 (SDK v7.0) +:: +:: 32 bit builds, and 64-bit builds for 3.5 and beyond, do not require specific +:: environment configurations. +:: +:: Note: this script needs to be run with the /E:ON and /V:ON flags for the +:: cmd interpreter, at least for (SDK v7.0) +:: +:: More details at: +:: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows +:: http://stackoverflow.com/a/13751649/163740 +:: +:: Author: Olivier Grisel +:: License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/ +:: +:: Notes about batch files for Python people: +:: +:: Quotes in values are literally part of the values: +:: SET FOO="bar" +:: FOO is now five characters long: " b a r " +:: If you don't want quotes, don't include them on the right-hand side. +:: +:: The CALL lines at the end of this file look redundant, but if you move them +:: outside of the IF clauses, they do not run properly in the SET_SDK_64==Y +:: case, I don't know why. +@ECHO OFF + +SET COMMAND_TO_RUN=%* +SET WIN_SDK_ROOT=C:\Program Files\Microsoft SDKs\Windows +SET WIN_WDK=c:\Program Files (x86)\Windows Kits\10\Include\wdf + +:: Extract the major and minor versions, and allow for the minor version to be +:: more than 9. This requires the version number to have two dots in it. +SET MAJOR_PYTHON_VERSION=%PYTHON:~0,1% +IF "%PYTHON:~3,1%" == "." ( + SET MINOR_PYTHON_VERSION=%PYTHON:~2,1% +) ELSE ( + SET MINOR_PYTHON_VERSION=%PYTHON:~2,2% +) + +:: Based on the Python version, determine what SDK version to use, and whether +:: to set the SDK for 64-bit. +IF %MAJOR_PYTHON_VERSION% == 2 ( + SET WINDOWS_SDK_VERSION="v7.0" + SET SET_SDK_64=Y +) ELSE ( + IF %MAJOR_PYTHON_VERSION% == 3 ( + SET WINDOWS_SDK_VERSION="v7.1" + IF %MINOR_PYTHON_VERSION% LEQ 4 ( + SET SET_SDK_64=Y + ) ELSE ( + SET SET_SDK_64=N + IF EXIST "%WIN_WDK%" ( + :: See: https://connect.microsoft.com/VisualStudio/feedback/details/1610302/ + REN "%WIN_WDK%" 0wdf + ) + ) + ) ELSE ( + ECHO Unsupported Python version: "%MAJOR_PYTHON_VERSION%" + EXIT 1 + ) +) + +IF %ARCH% == 64 ( + IF %SET_SDK_64% == Y ( + ECHO Configuring Windows SDK %WINDOWS_SDK_VERSION% for Python %MAJOR_PYTHON_VERSION% on a 64 bit architecture + SET DISTUTILS_USE_SDK=1 + SET MSSdk=1 + "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Setup\WindowsSdkVer.exe" -q -version:%WINDOWS_SDK_VERSION% + "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Bin\SetEnv.cmd" /x64 /release + ECHO Executing: %COMMAND_TO_RUN% + call %COMMAND_TO_RUN% || EXIT 1 + ) ELSE ( + ECHO Using default MSVC build environment for 64 bit architecture + ECHO Executing: %COMMAND_TO_RUN% + call %COMMAND_TO_RUN% || EXIT 1 + ) +) ELSE ( + ECHO Using default MSVC build environment for 32 bit architecture + ECHO Executing: %COMMAND_TO_RUN% + call %COMMAND_TO_RUN% || EXIT 1 +) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/azure/azure-linux-macos.yml b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/azure/azure-linux-macos.yml new file mode 100644 index 000000000..74cd37f41 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/azure/azure-linux-macos.yml @@ -0,0 +1,47 @@ +parameters: + name: '' + vmImage: '' + matrix: [] + +jobs: +- job: ${{ parameters.name }} + pool: + vmImage: ${{ parameters.vmImage }} + strategy: + matrix: + ${{ insert }}: ${{ parameters.matrix }} + + steps: + - script: | + if [ "$(uname)" == "Linux" ] && [[ "$CONDA_SUBDIR" == "linux-32" || "$BITS32" == "yes" ]]; then sudo apt-get install -y libc6-dev-i386; fi + if [ "$(uname)" == "Linux" ] && [[ "$CONDA_SUBDIR" != "linux-32" && "$BITS32" != "yes" ]]; then sudo apt-get install -y gdb; fi + echo "Installing Miniconda" + buildscripts/incremental/install_miniconda.sh + export PATH=$HOME/miniconda3/bin:$PATH + echo "Setting up Conda environment" + buildscripts/incremental/setup_conda_environment.sh + displayName: 'Before Install' + + - script: | + export PATH=$HOME/miniconda3/bin:$PATH + buildscripts/incremental/build.sh + displayName: 'Build' + + - script: | + export PATH=$HOME/miniconda3/bin:$PATH + conda install -y flake8 + flake8 numba + displayName: 'Flake8' + condition: eq(variables['RUN_FLAKE8'], 'yes') + + - script: | + export PATH=$HOME/miniconda3/bin:$PATH + conda install -y mypy + mypy + displayName: 'Mypy' + condition: eq(variables['RUN_MYPY'], 'yes') + + - script: | + export PATH=$HOME/miniconda3/bin:$PATH + buildscripts/incremental/test.sh + displayName: 'Test' diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/azure/azure-windows.yml b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/azure/azure-windows.yml new file mode 100644 index 000000000..aa38d8b32 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/azure/azure-windows.yml @@ -0,0 +1,62 @@ +parameters: + name: '' + vmImage: '' + +jobs: +- job: ${{ parameters.name }} + pool: + vmImage: ${{ parameters.vmImage }} + strategy: + matrix: + py310_np123: + PYTHON: '3.10' + NUMPY: '1.23' + CONDA_ENV: 'testenv' + TEST_START_INDEX: 18 + py37_np118: + PYTHON: '3.7' + NUMPY: '1.18' + CONDA_ENV: 'testenv' + TEST_START_INDEX: 19 + + steps: + - task: CondaEnvironment@1 + inputs: + updateConda: no + packageSpecs: '' + + - script: | + buildscripts\\incremental\\setup_conda_environment.cmd + displayName: 'Before Install' + + - script: | + buildscripts\\incremental\\build.cmd + displayName: 'Build' + + - script: | + call activate %CONDA_ENV% + python -m numba -s + displayName: 'Display numba system information' + + - script: | + call activate %CONDA_ENV% + python -m numba.tests.test_runtests + displayName: 'Verify runtests' + + - script: | + call activate %CONDA_ENV% + python -m numba.runtests -l + displayName: 'List discovered tests' + + - script: | + call activate %CONDA_ENV% + set NUMBA_CAPTURED_ERRORS=new_style + echo "Running slice of discovered tests: %TEST_START_INDEX%,None,%TEST_COUNT%" + python -m numba.runtests -b -v -g -m 2 -- numba.tests + displayName: 'Test modified test files' + + - script: | + call activate %CONDA_ENV% + set NUMBA_CAPTURED_ERRORS=new_style + python runtests.py -m 2 -b -j "%TEST_START_INDEX%,None,%TEST_COUNT%" --exclude-tags='long_running' -- numba.tests + displayName: 'Test slice of test files' diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe.local/bld.bat b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe.local/bld.bat new file mode 100644 index 000000000..6372f3a4d --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe.local/bld.bat @@ -0,0 +1,3 @@ +%PYTHON% setup.py build install --single-version-externally-managed --record=record.txt + +exit /b %errorlevel% diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe.local/build.sh b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe.local/build.sh new file mode 100644 index 000000000..f08a73391 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe.local/build.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +if [[ "$(uname -s)" == *"Linux"* ]] && [[ "$(uname -p)" == *"86"* ]]; then + EXTRA_BUILD_EXT_FLAGS="--werror --wall" +else + EXTRA_BUILD_EXT_FLAGS="" +fi + +if [[ "$(uname -s)" == *"Linux"* ]] && [[ "$(uname -p)" == *"ppc64le"* ]]; then + # To workaround https://github.com/numba/numba/issues/7302 + # because of a python build problem that the -pthread could be stripped. + export CC="$CC -pthread" + export CXX="$CXX -pthread" +fi + +MACOSX_DEPLOYMENT_TARGET=10.10 $PYTHON setup.py build_ext $EXTRA_BUILD_EXT_FLAGS build install --single-version-externally-managed --record=record.txt diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe.local/conda_build_config.yaml b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe.local/conda_build_config.yaml new file mode 100644 index 000000000..9798e4b69 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe.local/conda_build_config.yaml @@ -0,0 +1,12 @@ +# Numba/llvmlite stack needs an older compiler for backwards compatibility. +c_compiler_version: # [linux] + - 7 # [linux and (x86_64 or ppc64le)] + - 9 # [linux and aarch64] + +cxx_compiler_version: # [linux] + - 7 # [linux and (x86_64 or ppc64le)] + - 9 # [linux and aarch64] + +fortran_compiler_version: # [linux] + - 7 # [linux and (x86_64 or ppc64le)] + - 9 # [linux and aarch64] diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe.local/license.txt b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe.local/license.txt new file mode 100644 index 000000000..7d19426e7 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe.local/license.txt @@ -0,0 +1,24 @@ +Copyright (c) 2012, Anaconda, Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe.local/meta.yaml b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe.local/meta.yaml new file mode 100644 index 000000000..ad94a6896 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe.local/meta.yaml @@ -0,0 +1,94 @@ +package: + name: numba + version: {{ GIT_DESCRIBE_TAG }} + +source: + path: ../.. + +build: + number: {{ GIT_DESCRIBE_NUMBER|int }} + string: np{{ NPY_VER }}py{{ PY_VER }}h{{ PKG_HASH }}_{{GIT_DESCRIBE_HASH}}_{{ GIT_DESCRIBE_NUMBER }} + entry_points: + - pycc = numba.pycc:main + - numba = numba.misc.numba_entry:main + script_env: + - PY_VCRUNTIME_REDIST + missing_dso_whitelist: # [osx] + # optional dependency: required only when omp is chosen as the backend for + # the threading layer + - lib/libiomp5.dylib # [osx] + ignore_run_exports: + # tbb-devel triggers hard dependency on tbb, this is not the case. + - tbb # [not (armv6l or armv7l or aarch64 or linux32)] + +requirements: + # build and run dependencies are duplicated to avoid setuptools issues + # when we also set install_requires in setup.py + build: + - {{ compiler('c') }} # [not (armv6l or armv7l or aarch64)] + - {{ compiler('cxx') }} # [not (armv6l or armv7l or aarch64)] + # OpenMP headers from llvm needed for OSX. + - llvm-openmp # [osx] + host: + - python + - numpy + - setuptools + - importlib_metadata # [py<39] + # On channel https://anaconda.org/numba/ + - llvmlite 0.39.* + # TBB devel version is to match TBB libs. + # 2020.3 is the last version with the "old" ABI + # NOTE: 2021.1..2021.5 are API compatible for Numba's purposes. + # NOTE: ppc64le exclusion is temporary until packages are more generally + # available. + - tbb-devel >=2021,<2021.6 # [not (armv6l or armv7l or aarch64 or linux32 or ppc64le)] + run: + - python >=3.7 + # NumPy 1.22.0, 1.22.1, 1.22.2 are all broken for ufuncs, see #7756 + - numpy >=1.18, !=1.22.0, !=1.22.1, !=1.22.2, <1.24 + - setuptools + - importlib_metadata # [py<39] + # On channel https://anaconda.org/numba/ + - llvmlite 0.39.* + run_constrained: + # If TBB is present it must be at least version 2021 + - tbb >=2021 # [not (armv6l or armv7l or aarch64 or linux32 or ppc64le)] + # avoid confusion from openblas bugs + - libopenblas !=0.3.6 # [x86_64] + # 0.3.17 buggy on M1 silicon + # https://github.com/xianyi/OpenBLAS/blob/v0.3.20/Changelog.txt#L118 + # https://github.com/numba/numba/issues/7822#issuecomment-1063229855 + # Exclude 0.3.20 too + # https://github.com/numba/numba/issues/8096 + - libopenblas >=0.3.18, !=0.3.20 # [arm64] + # CUDA 10.2 or later is required for CUDA support + - cudatoolkit >=10.2 + # scipy 1.0 or later + - scipy >=1.0 + # CUDA Python 11.6 or later + - cuda-python >=11.6 + +test: + requires: + - jinja2 + # Required to test optional Numba features + - cffi + # temporarily disable scipy testing on ARM, need to build out more packages + - scipy # [not (armv6l or armv7l)] + - ipython # [not (armv6l or armv7l or aarch64)] + - setuptools + - tbb >=2021 # [not (armv6l or armv7l or aarch64 or linux32 or ppc64le)] + - llvm-openmp # [osx] + # This is for driving gdb tests + - pexpect # [linux64] + # For testing ipython + - ipykernel + # Need these for AOT. Do not init msvc as it may not be present + - {{ compiler('c') }} # [not (win or armv6l or armv7l or aarch64)] + - {{ compiler('cxx') }} # [not (win or armv6l or armv7l or aarch64)] + +about: + home: https://numba.pydata.org/ + license: BSD + license_file: LICENSE + summary: a just-in-time Python function compiler based on LLVM diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe.local/run_test.bat b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe.local/run_test.bat new file mode 100644 index 000000000..077067e06 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe.local/run_test.bat @@ -0,0 +1,19 @@ +set NUMBA_DEVELOPER_MODE=1 +set NUMBA_DISABLE_ERROR_MESSAGE_HIGHLIGHTING=1 +set NUMBA_CAPTURED_ERRORS=new_style +set PYTHONFAULTHANDLER=1 + +@rem Check Numba executables are there +pycc -h +numba -h + +@rem Run system info tool +numba -s + +@rem Check test discovery works +python -m numba.tests.test_runtests + +@rem Run the whole test suite +python -m numba.runtests -b -m -- %TESTS_TO_RUN% + +if errorlevel 1 exit 1 diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe.local/run_test.sh b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe.local/run_test.sh new file mode 100644 index 000000000..69cc46008 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe.local/run_test.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +set -e + +export NUMBA_DEVELOPER_MODE=1 +export NUMBA_DISABLE_ERROR_MESSAGE_HIGHLIGHTING=1 +export NUMBA_CAPTURED_ERRORS="new_style" +export PYTHONFAULTHANDLER=1 + +# Disable NumPy dispatching to AVX512_SKX feature extensions if the chip is +# reported to support the feature and NumPy >= 1.22 as this results in the use +# of low accuracy SVML libm replacements in ufunc loops. +_NPY_CMD='from numba.misc import numba_sysinfo;\ + sysinfo=numba_sysinfo.get_sysinfo();\ + print(sysinfo["NumPy AVX512_SKX detected"] and + sysinfo["NumPy Version"]>="1.22")' +NUMPY_DETECTS_AVX512_SKX_NP_GT_122=$(python -c "$_NPY_CMD") +echo "NumPy >= 1.22 with AVX512_SKX detected: $NUMPY_DETECTS_AVX512_SKX_NP_GT_122" + +if [[ "$NUMPY_DETECTS_AVX512_SKX_NP_GT_122" == "True" ]]; then + export NPY_DISABLE_CPU_FEATURES="AVX512_SKX" +fi + + +unamestr=`uname` +if [[ "$unamestr" == 'Linux' ]]; then + SEGVCATCH=catchsegv +elif [[ "$unamestr" == 'Darwin' ]]; then + SEGVCATCH="" +else + echo Error +fi + +# limit CPUs in use on PPC64LE, fork() issues +# occur on high core count systems +archstr=`uname -m` +if [[ "$archstr" == 'ppc64le' ]]; then + TEST_NPROCS=16 +fi + +# Check Numba executables are there +pycc -h +numba -h + +# run system info tool +numba -s + +# Check test discovery works +python -m numba.tests.test_runtests + +# Run the whole test suite +echo "Running: $SEGVCATCH python -m numba.runtests -b -m $TEST_NPROCS -- $TESTS_TO_RUN" +$SEGVCATCH python -m numba.runtests -b -m $TEST_NPROCS -- $TESTS_TO_RUN diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe_clone_icc_rt/bld.bat b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe_clone_icc_rt/bld.bat new file mode 100644 index 000000000..e2fd587c5 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe_clone_icc_rt/bld.bat @@ -0,0 +1,3 @@ +%PYTHON% build.py + +exit /b %errorlevel% diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe_clone_icc_rt/build.sh b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe_clone_icc_rt/build.sh new file mode 100644 index 000000000..9ed12875c --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe_clone_icc_rt/build.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +set -x + +${PYTHON} build.py diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe_clone_icc_rt/meta.yaml b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe_clone_icc_rt/meta.yaml new file mode 100644 index 000000000..76f64a6b1 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe_clone_icc_rt/meta.yaml @@ -0,0 +1,38 @@ +{% set version = "2019.3" %} # this is the intel version to get +{% set win_build_number = "203" %} # the build number from the intel windows version +{% set osx_build_number = "199" %} # the build number from the intel osx version +{% set lnx_build_number = "199" %} # the build number from the intel linux version + +package: + name: icc_rt + version: {{ version }} + +build: + number: {{ win_build_number }} # [win] + number: {{ osx_build_number }} # [osx] + number: {{ lnx_build_number }} # [linux] + +source: + - url: https://anaconda.org/intel/icc_rt/{{ version }}/download/win-64/icc_rt-{{version}}-intel_{{win_build_number}}.tar.bz2 # [win] + - md5: d39bae3218457a4ea045763fdcfc1562 # [win] + - sha256: 2c55b8af1dea35ee4648b671050899a93b7eba1b26acad019bf569ca777a944e # [win] + + - url: https://anaconda.org/intel/icc_rt/{{ version }}/download/osx-64/icc_rt-{{version}}-intel_{{osx_build_number}}.tar.bz2 # [osx] + - md5: 064566ac53e729d3f008e32b1f73d1fa # [osx] + - sha256: 54a372b0d8d5b4d750c28ea122851b52ec9aa3cccb8d4cf4a2999494dfda6656 # [osx] + + - url: https://anaconda.org/intel/icc_rt/{{ version }}/download/linux-64/icc_rt-{{version}}-intel_{{lnx_build_number}}.tar.bz2 # [linux] + - md5: 306c3ee9491577715dbd76c838147078 # [linux] + - sha256: 4cedd10343d1ab4403af2ff080b47afe5399be550f1c215e5a7c7eceec672516 # [linux] + + - path: scripts + +requirements: + build: + - python>=3 + +about: + license: "Intel" + license_family: "Proprietary" + license_file: LICENSE.txt +summary: Intel ICC runtime. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe_clone_icc_rt/scripts/build.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe_clone_icc_rt/scripts/build.py new file mode 100644 index 000000000..11eac6282 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/condarecipe_clone_icc_rt/scripts/build.py @@ -0,0 +1,33 @@ +import sys +import os +import shutil + +libdir = {'w': 'Library', + 'l': 'lib', + 'd': 'lib'} + + +def run(): + src_dir = os.environ.get('SRC_DIR') + prefix = os.environ.get('PREFIX') + + libd = libdir.get(sys.platform[0], None) + assert libd is not None + + # remove 'lib' from the prefix so a direct copy from the original + # package can be made + lib_dir = os.path.join(prefix, libd) + shutil.rmtree(lib_dir) + # copy in the original package lib dir + shutil.copytree(os.path.join(src_dir, libd), lib_dir) + + # and copy the license + info_dir = os.path.join(src_dir, 'info') + shutil.copy(os.path.join(info_dir, 'LICENSE.txt'), src_dir) + shutil.rmtree(info_dir) + + +if __name__ == "__main__": + args = sys.argv + assert len(args) == 1 + run() diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/gpuci/axis.yaml b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/gpuci/axis.yaml new file mode 100644 index 000000000..417971411 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/gpuci/axis.yaml @@ -0,0 +1,19 @@ +PYTHON_VER: +- "3.8" + +CUDA_VER: +- "11.2" + +CUDA_TOOLKIT_VER: +- "10.2" +- "11.1" +- "11.2" +- "11.5" + +LINUX_VER: +- ubuntu18.04 + +RAPIDS_VER: +- "21.12" + +excludes: diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/gpuci/build.sh b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/gpuci/build.sh new file mode 100644 index 000000000..1088b1eeb --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/gpuci/build.sh @@ -0,0 +1,74 @@ +############################################## +# Numba GPU build and test script for CI # +############################################## +set -e + +# Set path and build parallel level +export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH +export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} + +# Set home to the job's workspace +export HOME="$WORKSPACE" + +# Switch to project root; also root of repo checkout +cd "$WORKSPACE" + +# Determine CUDA release version +export CUDA_REL=${CUDA_VERSION%.*} + +# Test with NVIDIA Bindings on CUDA 11.5 +if [ $CUDA_TOOLKIT_VER == "11.5" ] +then + export NUMBA_CUDA_USE_NVIDIA_BINDING=1; +else + export NUMBA_CUDA_USE_NVIDIA_BINDING=0; +fi; + +################################################################################ +# SETUP - Check environment +################################################################################ + +gpuci_logger "Check environment variables" +env + +gpuci_logger "Check GPU usage" +nvidia-smi + +gpuci_logger "Create testing env" +. /opt/conda/etc/profile.d/conda.sh +gpuci_mamba_retry create -n numba_ci -y \ + "python=${PYTHON_VER}" \ + "cudatoolkit=${CUDA_TOOLKIT_VER}" \ + "numba/label/dev::llvmlite" \ + "numpy=1.21" \ + "scipy" \ + "cffi" \ + "psutil" \ + "gcc_linux-64=7" \ + "gxx_linux-64=7" \ + "setuptools" + +conda activate numba_ci + +if [ $NUMBA_CUDA_USE_NVIDIA_BINDING == "1" ] +then + gpuci_logger "Install NVIDIA CUDA Python bindings"; + gpuci_mamba_retry install nvidia::cuda-python=11.7.0; +fi; + +gpuci_logger "Install numba" +python setup.py develop + +gpuci_logger "Check Compiler versions" +$CC --version +$CXX --version + +gpuci_logger "Check conda environment" +conda info +conda config --show-sources + +gpuci_logger "Dump system information from Numba" +python -m numba -s + +gpuci_logger "Run tests in numba.cuda.tests" +python -m numba.runtests numba.cuda.tests -v -m diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/MacOSX10.10.sdk.checksum b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/MacOSX10.10.sdk.checksum new file mode 100644 index 000000000..0a82e4317 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/MacOSX10.10.sdk.checksum @@ -0,0 +1 @@ +ea40a3b9dc48cd3593628490f2738b89282f00ab ./MacOSX10.10.sdk.tar.xz diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/after_success.sh b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/after_success.sh new file mode 100644 index 000000000..63c55b63d --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/after_success.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +source activate $CONDA_ENV + +# Make sure any error below is reported as such +set -v -e + +if [ "$RUN_COVERAGE" == "yes" ]; then + coverage combine + codecov +fi diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/build.cmd b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/build.cmd new file mode 100644 index 000000000..645a15625 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/build.cmd @@ -0,0 +1,10 @@ + +call activate %CONDA_ENV% + +@rem Build numba extensions without silencing compile errors +python setup.py build_ext -q --inplace + +@rem Install numba locally for use in `numba -s` sys info tool at test time +python -m pip install -e . + +if %errorlevel% neq 0 exit /b %errorlevel% diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/build.sh b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/build.sh new file mode 100644 index 000000000..4c696d69c --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/build.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +source activate +conda activate $CONDA_ENV + +# Make sure any error below is reported as such +set -v -e + +# Build numba extensions without silencing compile errors +if [[ "$(uname -s)" == *"Linux"* ]] && [[ "$(uname -p)" == *"86"* ]]; then + EXTRA_BUILD_EXT_FLAGS="--werror --wall" +else + EXTRA_BUILD_EXT_FLAGS="" +fi + +if [[ $(uname) == "Darwin" ]]; then + # The following is suggested in https://docs.conda.io/projects/conda-build/en/latest/resources/compiler-tools.html?highlight=SDK#macos-sdk + wget -q https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacOSX10.10.sdk.tar.xz + shasum -c ./buildscripts/incremental/MacOSX10.10.sdk.checksum + tar -xf ./MacOSX10.10.sdk.tar.xz + export SDKROOT=`pwd`/MacOSX10.10.sdk +fi +python setup.py build_ext -q --inplace --debug $EXTRA_BUILD_EXT_FLAGS --verbose +# (note we don't install to avoid problems with extra long Windows paths +# during distutils-dependent tests -- e.g. test_pycc) + +# Install numba locally for use in `numba -s` sys info tool at test time +python -m pip install --no-deps -e . diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/install_miniconda.sh b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/install_miniconda.sh new file mode 100644 index 000000000..4aa73bac6 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/install_miniconda.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +set -v -e + +# Install Miniconda +unamestr=`uname` +if [[ "$unamestr" == 'Linux' ]]; then + if [[ "$BITS32" == "yes" ]]; then + wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86.sh -O miniconda.sh + else + wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh + fi +elif [[ "$unamestr" == 'Darwin' ]]; then + wget https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh +else + echo Error +fi +chmod +x miniconda.sh +./miniconda.sh -b diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/setup_conda_environment.cmd b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/setup_conda_environment.cmd new file mode 100644 index 000000000..1a4ecb7c7 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/setup_conda_environment.cmd @@ -0,0 +1,45 @@ +@rem first configure conda to have more tolerance of network problems, these +@rem numbers are not scientifically chosen, just merely larger than defaults +set CONDA_CONFIG=cmd /C conda config +%CONDA_CONFIG% --write-default +%CONDA_CONFIG% --set remote_connect_timeout_secs 30.15 +%CONDA_CONFIG% --set remote_max_retries 10 +%CONDA_CONFIG% --set remote_read_timeout_secs 120.2 +%CONDA_CONFIG% --set restore_free_channel true +%CONDA_CONFIG% --set show_channel_urls true +cmd /C conda info +%CONDA_CONFIG% --show + +@rem The cmd /C hack circumvents a regression where conda installs a conda.bat +@rem script in non-root environments. +set CONDA_INSTALL=cmd /C conda install -q -y +set PIP_INSTALL=pip install -q + +@echo on + +@rem Deactivate any environment +call deactivate +@rem Display root environment (for debugging) +conda list +@rem Scipy, CFFI, jinja2 and IPython are optional dependencies, but exercised in the test suite +conda create -n %CONDA_ENV% -q -y python=%PYTHON% numpy=%NUMPY% cffi pip jinja2 ipython gitpython pyyaml + +call activate %CONDA_ENV% +@rem Scipy comes from conda-forge for NumPy 1.23 +if %NUMPY% == "1.23" (%CONDA_INSTALL% conda-forge::scipy) else (%CONDA_INSTALL% scipy) +@rem Install latest llvmlite build +%CONDA_INSTALL% -c numba/label/dev llvmlite=0.39 +@rem Install required backports for older Pythons +if %PYTHON% LSS 3.9 (%CONDA_INSTALL% importlib_metadata) +@rem Install dependencies for building the documentation +if "%BUILD_DOC%" == "yes" (%CONDA_INSTALL% sphinx sphinx_rtd_theme pygments) +@rem Install dependencies for code coverage (codecov.io) +if "%RUN_COVERAGE%" == "yes" (%PIP_INSTALL% codecov) +@rem Install TBB +%CONDA_INSTALL% -c numba tbb=2021 "tbb-devel>=2021,<2021.6" +if %errorlevel% neq 0 exit /b %errorlevel% + +echo "DEBUG ENV:" +echo "-------------------------------------------------------------------------" +conda env export +echo "-------------------------------------------------------------------------" diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/setup_conda_environment.sh b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/setup_conda_environment.sh new file mode 100644 index 000000000..943c428ad --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/setup_conda_environment.sh @@ -0,0 +1,117 @@ +#!/bin/bash + +set -v -e + +# first configure conda to have more tolerance of network problems, these +# numbers are not scientifically chosen, just merely larger than defaults +conda config --write-default +conda config --set remote_connect_timeout_secs 30.15 +conda config --set remote_max_retries 10 +conda config --set remote_read_timeout_secs 120.2 +conda config --set show_channel_urls true +if [[ $(uname) == Linux ]]; then + if [[ "$CONDA_SUBDIR" != "linux-32" && "$BITS32" != "yes" ]] ; then + conda config --set restore_free_channel true + fi +fi +conda info +conda config --show + +CONDA_INSTALL="conda install -q -y" +PIP_INSTALL="pip install -q" + + +EXTRA_CHANNELS="" +if [ "${USE_C3I_TEST_CHANNEL}" == "yes" ]; then + EXTRA_CHANNELS="${EXTRA_CHANNELS} -c c3i_test" +fi + + +# Deactivate any environment +source deactivate +# Display root environment (for debugging) +conda list + +# If VANILLA_INSTALL is yes, then only Python, NumPy and pip are installed, this +# is to catch tests/code paths that require an optional package and are not +# guarding against the possibility that it does not exist in the environment. +# Create a base env first and then add to it... +# NOTE: gitpython is needed for CI testing to do the test slicing +# NOTE: pyyaml is used to ensure that the Azure CI config is valid +# NOTE: 32 bit linux... do not install NumPy, there's no conda package for >1.15 +# so it has to come from pip later +if [[ "$CONDA_SUBDIR" == "linux-32" || "$BITS32" == "yes" ]]; then + conda create -n $CONDA_ENV -q -y ${EXTRA_CHANNELS} python=$PYTHON pip gitpython pyyaml +else + conda create -n $CONDA_ENV -q -y ${EXTRA_CHANNELS} python=$PYTHON numpy=$NUMPY pip gitpython pyyaml +fi + +# Activate first +set +v +source activate $CONDA_ENV +set -v + +# Install optional packages into activated env +echo "PYTHON=$PYTHON" +echo "VANILLA_INSTALL=$VANILLA_INSTALL" +if [ "${VANILLA_INSTALL}" != "yes" ]; then + # Scipy, CFFI, jinja2, IPython and pygments are optional + # dependencies, but exercised in the test suite + # pexpect is used to run the gdb tests. + # ipykernel is used for testing ipython behaviours. + $CONDA_INSTALL ${EXTRA_CHANNELS} cffi jinja2 ipython ipykernel pygments pexpect + # Only install scipy on 64bit, else it'll pull in NumPy, 32bit linux needs + # to get scipy from pip + if [[ "$CONDA_SUBDIR" != "linux-32" && "$BITS32" != "yes" ]] ; then + if [[ "$NUMPY" == "1.23" ]] ; then + $CONDA_INSTALL ${EXTRA_CHANNELS} conda-forge::scipy + else + $CONDA_INSTALL ${EXTRA_CHANNELS} scipy + fi + fi +fi + +# Install the compiler toolchain +if [[ $(uname) == Linux ]]; then + if [[ "$CONDA_SUBDIR" == "linux-32" || "$BITS32" == "yes" ]] ; then + $CONDA_INSTALL gcc_linux-32 gxx_linux-32 + else + $CONDA_INSTALL gcc_linux-64 gxx_linux-64 + fi +elif [[ $(uname) == Darwin ]]; then + $CONDA_INSTALL clang_osx-64 clangxx_osx-64 + # Install llvm-openmp on OSX for headers during build and runtime during + # testing + $CONDA_INSTALL llvm-openmp +fi + +# If on 32bit linux, now pip install NumPy (no conda package), SciPy is broken?! +if [[ "$CONDA_SUBDIR" == "linux-32" || "$BITS32" == "yes" ]] ; then + $PIP_INSTALL numpy==$NUMPY +fi + +# Install latest correct build +$CONDA_INSTALL -c numba/label/dev llvmlite=0.39 + +# Install importlib-metadata for Python < 3.9 +if [ $PYTHON \< "3.9" ]; then $CONDA_INSTALL importlib_metadata; fi + +# Install dependencies for building the documentation +if [ "$BUILD_DOC" == "yes" ]; then $CONDA_INSTALL sphinx=2.4.4 docutils=0.17 sphinx_rtd_theme pygments numpydoc; fi +if [ "$BUILD_DOC" == "yes" ]; then $PIP_INSTALL rstcheck; fi +# Install dependencies for code coverage (codecov.io) +if [ "$RUN_COVERAGE" == "yes" ]; then $PIP_INSTALL codecov; fi +# Install SVML +if [ "$TEST_SVML" == "yes" ]; then $CONDA_INSTALL -c numba icc_rt; fi +# Install Intel TBB parallel backend +if [ "$TEST_THREADING" == "tbb" ]; then $CONDA_INSTALL -c numba tbb=2021 "tbb-devel>=2021,<2021.6"; fi +# Install pickle5 +if [ "$TEST_PICKLE5" == "yes" ]; then $PIP_INSTALL pickle5; fi +# Install typeguard +if [ "$RUN_TYPEGUARD" == "yes" ]; then $CONDA_INSTALL conda-forge::typeguard; fi + +# environment dump for debug +# echo "DEBUG ENV:" +# echo "-------------------------------------------------------------------------" +# conda env export +# echo "-------------------------------------------------------------------------" diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/test.cmd b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/test.cmd new file mode 100644 index 000000000..c35278d2b --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/test.cmd @@ -0,0 +1,34 @@ + +call activate %CONDA_ENV% + +@rem Ensure that the documentation builds without warnings +if "%BUILD_DOC%" == "yes" python setup.py build_doc +@rem Run system info tool +pushd bin +numba -s +popd + +@rem switch off color messages +set NUMBA_DISABLE_ERROR_MESSAGE_HIGHLIGHTING=1 +@rem switch on developer mode +set NUMBA_DEVELOPER_MODE=1 +@rem enable the faulthandler +set PYTHONFAULTHANDLER=1 +@rem enable new style error handling +set NUMBA_CAPTURED_ERRORS=new_style + +@rem First check that the test discovery works +python -m numba.tests.test_runtests +@rem Now run the Numba test suite +@rem Note that coverage is run from the checkout dir to match the "source" +@rem directive in .coveragerc +if "%RUN_COVERAGE%" == "yes" ( + set PYTHONPATH=. + coverage erase + coverage run runtests.py -b --exclude-tags='long_running' -m -- numba.tests +) else ( + set NUMBA_ENABLE_CUDASIM=1 + python -m numba.runtests -b --exclude-tags='long_running' -m -- numba.tests +) + +if %errorlevel% neq 0 exit /b %errorlevel% diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/test.sh b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/test.sh new file mode 100644 index 000000000..a3e32bd4a --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/buildscripts/incremental/test.sh @@ -0,0 +1,131 @@ +#!/bin/bash + +source activate $CONDA_ENV + +# Make sure any error below is reported as such +set -v -e + +# Ensure the README is correctly formatted +if [ "$BUILD_DOC" == "yes" ]; then rstcheck README.rst; fi +# Ensure that the documentation builds without warnings +pushd docs +if [ "$BUILD_DOC" == "yes" ]; then make SPHINXOPTS=-W clean html; fi +popd +# Run system info tool +pushd bin +numba -s +popd + +# switch off color messages +export NUMBA_DISABLE_ERROR_MESSAGE_HIGHLIGHTING=1 +# switch on developer mode +export NUMBA_DEVELOPER_MODE=1 +# enable the fault handler +export PYTHONFAULTHANDLER=1 + +# enable new style error handling +export NUMBA_CAPTURED_ERRORS="new_style" + +# Disable NumPy dispatching to AVX512_SKX feature extensions if the chip is +# reported to support the feature and NumPy >= 1.22 as this results in the use +# of low accuracy SVML libm replacements in ufunc loops. +_NPY_CMD='from numba.misc import numba_sysinfo;\ + sysinfo=numba_sysinfo.get_sysinfo();\ + print(sysinfo["NumPy AVX512_SKX detected"] and + sysinfo["NumPy Version"]>="1.22")' +NUMPY_DETECTS_AVX512_SKX_NP_GT_122=$(python -c "$_NPY_CMD") +echo "NumPy >= 1.22 with AVX512_SKX detected: $NUMPY_DETECTS_AVX512_SKX_NP_GT_122" + +if [[ "$NUMPY_DETECTS_AVX512_SKX_NP_GT_122" == "True" ]]; then + export NPY_DISABLE_CPU_FEATURES="AVX512_SKX" +fi + +# deal with threading layers +if [ -z ${TEST_THREADING+x} ]; then + echo "INFO: Threading layer not explicitly set." +else + case "${TEST_THREADING}" in "workqueue"|"omp"|"tbb") + export NUMBA_THREADING_LAYER="$TEST_THREADING" + echo "INFO: Threading layer set as: $TEST_THREADING" + ;; + *) + echo "INFO: Threading layer explicitly set to bad value: $TEST_THREADING." + exit 1 + ;; + esac +fi + +# If TEST_THREADING is set in the env, then check that Numba agrees that the +# environment can support the requested threading. +function check_sysinfo() { + cmd="import os;\ + from numba.misc.numba_sysinfo import get_sysinfo;\ + assert get_sysinfo()['$1 Threading'] is True, 'Threading layer $1 '\ + 'is not supported';\ + print('Threading layer $1 is supported')" + python -c "$cmd" +} + +if [[ "$TEST_THREADING" ]]; then + if [[ "$TEST_THREADING" == "tbb" ]]; then + check_sysinfo "TBB" + elif [[ "$TEST_THREADING" == "omp" ]]; then + check_sysinfo "OpenMP" + elif [[ "$TEST_THREADING" == "workqueue" ]]; then + check_sysinfo "Workqueue" + else + echo "Unknown threading layer requested: $TEST_THREADING" + exit 1 + fi +fi + +# Find catchsegv +unamestr=`uname` +if [[ "$unamestr" == 'Linux' ]]; then + if [[ "${BITS32}" == "yes" ]]; then + SEGVCATCH="" + else + SEGVCATCH=catchsegv + fi +elif [[ "$unamestr" == 'Darwin' ]]; then + SEGVCATCH="" +else + echo Error +fi + +# limit CPUs in use on PPC64LE, fork() issues +# occur on high core count systems +archstr=`uname -m` +if [[ "$archstr" == 'ppc64le' ]]; then + TEST_NPROCS=16 +fi + +# setup SDKROOT on Mac +if [[ $(uname) == "Darwin" ]]; then + export SDKROOT=`pwd`/MacOSX10.10.sdk +fi + +# First check that the test discovery works +python -m numba.tests.test_runtests + +# Now run tests based on the changes identified via git +NUMBA_ENABLE_CUDASIM=1 $SEGVCATCH python -m numba.runtests -b -v -g -m $TEST_NPROCS -- numba.tests + +# List the tests found +echo "INFO: All discovered tests:" +python -m numba.runtests -l + +# Now run the Numba test suite with slicing +# Note that coverage is run from the checkout dir to match the "source" +# directive in .coveragerc +echo "INFO: Running slice of discovered tests: ($TEST_START_INDEX,None,$TEST_COUNT)" +if [ "$RUN_COVERAGE" == "yes" ]; then + export PYTHONPATH=. + coverage erase + $SEGVCATCH coverage run runtests.py -b -j "$TEST_START_INDEX,None,$TEST_COUNT" --exclude-tags='long_running' -m $TEST_NPROCS -- numba.tests +elif [ "$RUN_TYPEGUARD" == "yes" ]; then + echo "INFO: Running with typeguard" + NUMBA_USE_TYPEGUARD=1 NUMBA_ENABLE_CUDASIM=1 PYTHONWARNINGS="ignore:::typeguard" $SEGVCATCH python runtests.py -b -j "$TEST_START_INDEX,None,$TEST_COUNT" --exclude-tags='long_running' -m $TEST_NPROCS -- numba.tests +else + NUMBA_ENABLE_CUDASIM=1 $SEGVCATCH python -m numba.runtests -b -j "$TEST_START_INDEX,None,$TEST_COUNT" --exclude-tags='long_running' -m $TEST_NPROCS -- numba.tests +fi diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/clean_numba.sh b/cv/3d_detection/pointrcnn-iou/pytorch/numba/clean_numba.sh new file mode 100644 index 000000000..7d8d2e487 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/clean_numba.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +PYTHON_PATH=$(which python3) + +${PYTHON_PATH} setup.py clean || true +rm -rf build build_pip + +# Return 0 status if all finished +exit 0 diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/codecov.yml b/cv/3d_detection/pointrcnn-iou/pytorch/numba/codecov.yml new file mode 100644 index 000000000..c293219ba --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/codecov.yml @@ -0,0 +1,22 @@ +# Configuration for codecov.io +# When editing this file, please validate its contents using: +# curl -X POST --data-binary @- https://codecov.io/validate < codecov.yml + +comment: + layout: "header, diff, changes, uncovered" + +coverage: + ignore: + - "numba/cuda/.*" + - "numba/hsa/.*" + + status: + project: + default: + # The build fails if total project coverage drops by more than 3% + target: auto + threshold: "3%" + # These checks can mark a build failed if too much new code + # is not covered (which happens often with JITted functions). + changes: false + patch: false diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/contrib/valgrind-numba.supp b/cv/3d_detection/pointrcnn-iou/pytorch/numba/contrib/valgrind-numba.supp new file mode 100644 index 000000000..26271eb4e --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/contrib/valgrind-numba.supp @@ -0,0 +1,21 @@ +{ + + Memcheck:Cond + fun:_ZN4llvm3sys14getHostCPUNameEv + fun:LLVMPY_GetHostCPUName +} + +{ + + Memcheck:Value8 + fun:_ZN4llvm3sys14getHostCPUNameEv + fun:LLVMPY_GetHostCPUName +} + +{ + + Memcheck:Cond + fun:__intel_sse2_strrchr + fun:_ZN67_INTERNAL_45_______src_thirdparty_tbb_omp_dynamic_link_cpp_c306cade5__kmp12init_dl_dataEv + fun:__sti__$E +} diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/Makefile b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/Makefile new file mode 100644 index 000000000..b60d7c1d6 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/Makefile @@ -0,0 +1,177 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = -j1 +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = _build + +# User-friendly check for sphinx-build +ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) +$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) +endif + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source + +.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext + +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " xml to make Docutils-native XML files" + @echo " pseudoxml to make pseudoxml-XML files for display purposes" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + +clean: + rm -rf $(BUILDDIR)/* + +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Numba.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Numba.qhc" + +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/Numba" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Numba" + @echo "# devhelp" + +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +latexpdfja: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through platex and dvipdfmx..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." + +xml: + $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml + @echo + @echo "Build finished. The XML files are in $(BUILDDIR)/xml." + +pseudoxml: + $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml + @echo + @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/_static/js/modernizr.min.js b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/_static/js/modernizr.min.js new file mode 100644 index 000000000..939eaaf03 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/_static/js/modernizr.min.js @@ -0,0 +1,7 @@ +/* modernizr.min.js is unused but causes a reflow on load. In firefox, this + * manifests as the Numba logo flashing up across the whole browser window for a + * split second every time the page is loaded or a documentation link is + * clicked. This empty file overrides the version included by the theme. + * + * Reference: https://github.com/readthedocs/sphinx_rtd_theme/issues/724 + */ diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/_static/numba-blue-icon-rgb.svg b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/_static/numba-blue-icon-rgb.svg new file mode 100644 index 000000000..0df9c042e --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/_static/numba-blue-icon-rgb.svg @@ -0,0 +1,27 @@ + + + + + + + + + + + + + + + + + + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/_static/numba-white-icon-rgb.svg b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/_static/numba-white-icon-rgb.svg new file mode 100644 index 000000000..904b61c13 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/_static/numba-white-icon-rgb.svg @@ -0,0 +1,27 @@ + + + + + + + + + + + + + + + + + + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/_static/rtd-overrides.css b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/_static/rtd-overrides.css new file mode 100644 index 000000000..50c71cdb9 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/_static/rtd-overrides.css @@ -0,0 +1,3 @@ +.wy-nav-content { + max-width: 1200px +} diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/_templates/EMPTY b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/_templates/EMPTY new file mode 100644 index 000000000..e69de29bb diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/dagmap/README.md b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/dagmap/README.md new file mode 100644 index 000000000..66fd85853 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/dagmap/README.md @@ -0,0 +1,57 @@ +# DAG Roadmap + +This directory includes a representation of the Numba roadmap in the form of a +DAG. We have done this to enable a highly granular display of enhancements to +Numba that also shows the relationships between these tasks. Many tasks have +prerequisites, and we've found that issue trackers, Kanban boards, and +time-bucketed roadmap documentation all fail to represent this information in +different ways. + +## Requirements + +``` +conda install jinja2 python-graphviz pyyaml +``` + +## Usage + +``` +./render.py -o dagmap.html dagmap.yaml +``` + +The generated HTML file will look for `jquery.graphviz.svg.js` in the same +directory. + +## Updating the DAG + +Copy one of the existing tasks and edit: + * `label`: text appears on the node. Embed `\n` for line breaks. + * `id`: Referenced to indicate a dependency + * `description`: Shown in the tooltip. Automatically word-wrapped. + * `depends_on`: Optional list of task IDs which this task depends on. + +The `style` section of the file is not used yet. + +## Notes + +The HTML rendering of the graph is based on a slightly modified version of +(jquery.graphviz.svg)[https://github.com/mountainstorm/jquery.graphviz.svg/]. +Its license is: +``` +Copyright (c) 2015 Mountainstorm +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +``` \ No newline at end of file diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/dagmap/dagmap.yaml b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/dagmap/dagmap.yaml new file mode 100644 index 000000000..57df7b755 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/dagmap/dagmap.yaml @@ -0,0 +1,195 @@ +meta: + version: 1 +style: + tags: + performance: + border: red +tasks: + - label: Track allocations in functions + id: track_alloc + description: | + Maintain a list of allocations inside each function which can be used + for freeing things on return, and also for debugging memory usage. + + - label: Catch exceptions + id: catch_exceptions + description: | + Allow exceptions raised in nopython mode to be caught in nopython mode. + depends_on: + - track_alloc + + + - label: New IR + id: new_ir + description: | + New intermediate representation for Numba that is backed by a dictionary + + - label: New Type Matching DSL + id: type_matching + description: | + Replace the current DSL for Numba types with something more expressive + that can match type patterns + + - label: Declarative type signatures\nfor @overload/@overload_method + id: declarative_overload + description: | + Replace the current DSL for Numba types with something more expressive\n + that can match type patterns + depends_on: + - type_matching + + - label: Rewrite "old-style" implementations + id: rewrite_old_impls + description: | + Rewrite implementations of functions that use the old extension API that + separates typing from implementation, and often uses the LLVM builder + unnecessarily. + depends_on: + - declarative_overload + - improve_test_suite_tooling + - faster_pr_testing + + - label: Unify and add more test suite tooling + id: improve_test_suite_tooling + description: | + Add tools to help with common patterns in testing and unify the ones we + have, there's no need for 12 spellings of "is this Python 3" Also decide + on "what to test", do all types need testing if inputs are being + "as_array"'d? + + - label: Pipeline pass formalisation + id: pass_formalisation + description: | + Decide on a formal description of a compiler pass and create supporting + code for it + + - label: Array expression fusion pass + id: new_array_expr_fusion_pass + description: + From parfors extract out the array expression fusion pass + depends_on: + - parfors_clean_up + - pass_formalisation + + - label: LICM Pass + id: new_licm_pass + description: | + Create a LICM Pass + depends_on: + - parfors_clean_up + - pass_formalisation + + - label: Clean up Parfors + id: parfors_clean_up + description: | + General clean up and refactoring of parfors ahead of any additional work + + - label: Mode based pipeline + id: mode_based_pipeline + description: | + Switch the jit decorator to use a mode based pipeline with + `nopython=True` equivalent as default. + + - label: Remove object mode fallback + id: remove_objmode_fallback + description: | + Remove the deprecated object mode fallback + depends_on: + - mode_based_pipeline + + - label: Switch to ORC JIT + id: orc_jit + description: | + MCJIT has been deprecated for some time. Need to switch to the newer + ORC JIT class. + + - label: Performance analysis suite + id: perform_analysis_suite + description: | + Meta task for all performance analysis related functionality + depends_on: + - line_profiling + - assembly_analysis_tooling + - vectorisation_analysis + + - label: Vectorisation analysis + id: vectorisation_analysis + description: | + Obtain LLVMs vectorisation reports and present these in a user friendly + manner + + - label: Line profiling + id: line_profiling + description: | + Support collection of profiling statistics from compiled machine code + and map back to lines of Python. + depends_on: + - orc_jit + - assembly_analysis_tooling + + - label: Assembly analysis tooling + id: assembly_analysis_tooling + description: | + Tie generated assembly back to python lines and annotate instruction + quality + depends_on: + - capstone + + - label: Build capstone against llvmdev + id: capstone + description: | + Build capstone against llvmdev and create conda packages/wheels + + - label: Increase JIT class method performance + id: jit_class_method_performance + description: | + Increase the performance of jitclass methods + depends_on: + - llvm_ref_count_pruning + - new_licm_pass + + - label: LLVM level ref count pruning + id: llvm_ref_count_pruning + description: | + Add a LLVM compiler pass to prune refcounts across entire functions + + - label: JITted coverage information + id: jitted_coverage_info + description: | + Work out how to leverage gcov support in LLVM to enable coverage + information + depends_on: + - compiler_rt + + - label: LLVM compiler_rt support + id: compiler_rt + description: | + Work out how to build compiler_rt into LLVM and how to use it in Numba + + - label: Switch to pytest + id: pytest + description: | + Make it possible to use pytest as test runner for Numba + + - label: Option to run modified tests only + id: run_new_tests + description: | + Use / make pytest plugin to detect all test files which are new / + changed relative to a given branch, and run only those tests + depends_on: + - pytest + + - label: Option to run 1/N slice of tests + id: run_test_slice + description: | + Use / make pytest plugin to run 1/N of enumerated tests. + depends_on: + - pytest + + - label: Faster PR testing + id: faster_pr_testing + description: | + Make automated PR testing with public CI services give faster feedback. + depends_on: + - run_new_tests + - run_test_slice diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/dagmap/jquery.graphviz.svg.js b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/dagmap/jquery.graphviz.svg.js new file mode 100644 index 000000000..15bea27bd --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/dagmap/jquery.graphviz.svg.js @@ -0,0 +1,537 @@ +/* + * Copyright (c) 2015 Mountainstorm + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + + +function ($) { + 'use strict' + + // Cross Browser starts/endsWith support + // ===================================== + String.prototype.startsWith = function(prefix) { + return this.indexOf(prefix) == 0; + }; + + String.prototype.endsWith = function(suffix) { + return this.indexOf(suffix, this.length - suffix.length) !== -1; + }; + + // GRAPHVIZSVG PUBLIC CLASS DEFINITION + // =================================== + + var GraphvizSvg = function (element, options) { + this.type = null + this.options = null + this.enabled = null + this.$element = null + + this.init('graphviz.svg', element, options) + } + + GraphvizSvg.VERSION = '1.0.1' + + GraphvizSvg.GVPT_2_PX = 32.5 // used to ease removal of extra space + + GraphvizSvg.DEFAULTS = { + url: null, + svg: null, + shrink: '0.125pt', + tooltips: { + init: function ($graph) { + var $a = $(this) + $a.tooltip({ + container: $graph, + placement: 'auto left', + animation: false, + viewport: null + }).on('hide.bs.tooltip', function() { + // keep them visible even if you accidentally mouse over + if ($a.attr('data-tooltip-keepvisible')) { + return false + } + }) + }, + show: function () { + var $a = $(this) + $a.attr('data-tooltip-keepvisible', true) + $a.tooltip('show') + }, + hide: function () { + var $a = $(this) + $a.removeAttr('data-tooltip-keepvisible') + $a.tooltip('hide') + }, + update: function () { + var $this = $(this) + if ($this.attr('data-tooltip-keepvisible')) { + $this.tooltip('show') + return + } + } + }, + zoom: true, + highlight: { + selected: function (col, bg) { + return col + }, + unselected: function (col, bg) { + return jQuery.Color(col).transition(bg, 0.9) + } + }, + ready: null + } + + GraphvizSvg.prototype.init = function (type, element, options) { + this.enabled = true + this.type = type + this.$element = $(element) + this.options = this.getOptions(options) + + if (options.url) { + var that = this + $.get(options.url, null, function(data) { + var svg = $("svg", data) + that.$element.html(document.adoptNode(svg[0])) + that.setup() + }, "xml") + } else { + if (options.svg) { + this.$element.html(options.svg) + } + this.setup() + } + } + + GraphvizSvg.prototype.getDefaults = function () { + return GraphvizSvg.DEFAULTS + } + + GraphvizSvg.prototype.getOptions = function (options) { + options = $.extend({}, this.getDefaults(), this.$element.data(), options) + + if (options.shrink) { + if (typeof options.shrink != 'object') { + options.shrink = { + x: options.shrink, + y: options.shrink + } + } + options.shrink.x = this.convertToPx(options.shrink.x) + options.shrink.y = this.convertToPx(options.shrink.y) + } + return options + } + + GraphvizSvg.prototype.setup = function () { + var options = this.options + + // save key elements in the graph for easy access + var $svg = $(this.$element.children('svg')) + var $graph = $svg.children('g:first') + this.$svg = $svg + this.$graph = $graph + this.$background = $graph.children('polygon:first') // might not exist + this.$nodes = $graph.children('.node') + this.$edges = $graph.children('.edge') + this._nodesByName = {} + this._edgesByName = {} + + // add top level class and copy background color to element + this.$element.addClass('graphviz-svg') + if (this.$background.length) { + this.$element.css('background', this.$background.attr('fill')) + } + + // setup all the nodes and edges + var that = this + this.$nodes.each(function () { that.setupNodesEdges($(this), true) }) + this.$edges.each(function () { that.setupNodesEdges($(this), false) }) + + // remove the graph title element + var $title = this.$graph.children('title') + this.$graph.attr('data-name', $title.text()) + $title.remove() + + if (options.zoom) { + this.setupZoom() + } + + // tell people we're done + if (options.ready) { + options.ready.call(this) + } + } + + GraphvizSvg.prototype.setupNodesEdges = function ($el, isNode) { + var that = this + var options = this.options + + // save the colors of the paths, ellipses and polygons + $el.find('polygon, ellipse, path').each(function () { + var $this = $(this) + // save original colors + $this.data('graphviz.svg.color', { + fill: $this.attr('fill'), + stroke: $this.attr('stroke') + + }) + + // shrink it if it's a node + if (isNode && options.shrink) { + that.scaleNode($this) + } + }) + + // save the node name and check if theres a comment above; save it + var $title = $el.children('title') + if ($title[0]) { + // remove any compass points: + var title = $title.text().replace(/:[snew][ew]?/g,'') + $el.attr('data-name', title) + $title.remove() + if (isNode) { + this._nodesByName[title] = $el[0] + } else { + this._edgesByName[title] = $el[0] + } + // without a title we can't tell if its a user comment or not + var previousSibling = $el[0].previousSibling + while (previousSibling && previousSibling.nodeType != 8) { + previousSibling = previousSibling.previousSibling + } + if (previousSibling != null && previousSibling.nodeType == 8) { + var htmlDecode = function (input) { + var e = document.createElement('div') + e.innerHTML = input + return e.childNodes[0].nodeValue + } + var value = htmlDecode(previousSibling.nodeValue.trim()) + if (value != title) { + // user added comment + $el.attr('data-comment', value) + } + } + } + + // remove namespace from a[xlink:title] + $el.find('a').filter(function () { + return $(this).attr('xlink:title') }).each(function () { + var $a = $(this) + $a.attr('title', $a.attr('xlink:title')) + $a.removeAttr('xlink:title') + if (options.tooltips) { + options.tooltips.init.call(this, that.$element) + } + }) + } + + GraphvizSvg.prototype.setupZoom = function() { + var that = this + var $element = this.$element + var $svg = this.$svg + this.zoom = {width: $svg.attr('width'), height: $svg.attr('height'), percentage: null } + this.scaleView(100.0) + $element.mousewheel(function (evt) { + if (evt.shiftKey) { + var percentage = that.zoom.percentage + percentage -= evt.deltaY * evt.deltaFactor + if (percentage < 100.0) { + percentage = 100.0 + } + // get pointer offset in view + // ratio offset within svg + var dx = evt.pageX - $svg.offset().left + var dy = evt.pageY - $svg.offset().top + var rx = dx / $svg.width() + var ry = dy / $svg.height() + + // offset within frame ($element) + var px = evt.pageX - $element.offset().left + var py = evt.pageY - $element.offset().top + + that.scaleView(percentage) + // scroll so pointer is still in same place + $element.scrollLeft((rx * $svg.width()) + 0.5 - px) + $element.scrollTop((ry * $svg.height()) + 0.5 - py) + return false // stop propagation + } + }) + } + + GraphvizSvg.prototype.scaleView = function(percentage) { + var that = this + var $svg = this.$svg + $svg.attr('width', percentage + '%') + $svg.attr('height', percentage + '%') + this.zoom.percentage = percentage + // now callback to update tooltip position + var $everything = this.$nodes.add(this.$edges) + $everything.children('a[title]').each(function () { + that.options.tooltips.update.call(this) + }) + } + + GraphvizSvg.prototype.scaleNode = function($node) { + var dx = this.options.shrink.x + var dy = this.options.shrink.y + var tagName = $node.prop('tagName') + if (tagName == 'ellipse') { + $node.attr('rx', parseFloat($node.attr('rx')) - dx) + $node.attr('ry', parseFloat($node.attr('ry')) - dy) + } else if (tagName == 'polygon') { + // this is more complex - we need to scale it manually + var bbox = $node[0].getBBox() + var cx = bbox.x + (bbox.width / 2) + var cy = bbox.y + (bbox.height / 2) + var pts = $node.attr('points').split(' ') + var points = '' // new value + for (var i in pts) { + var xy = pts[i].split(',') + var ox = parseFloat(xy[0]) + var oy = parseFloat(xy[1]) + points += (((cx - ox) / (bbox.width / 2) * dx) + ox) + + ',' + + (((cy - oy) / (bbox.height / 2) * dy) + oy) + + ' ' + } + $node.attr('points', points) + } + } + + GraphvizSvg.prototype.convertToPx = function (val) { + var retval = val + if (typeof val == 'string') { + var end = val.length + var factor = 1.0 + if (val.endsWith('px')) { + end -= 2 + } else if (val.endsWith('pt')) { + end -= 2 + factor = GraphvizSvg.GVPT_2_PX + } + retval = parseFloat(val.substring(0, end)) * factor + } + return retval + } + + GraphvizSvg.prototype.findEdge = function (nodeName, testEdge, $retval) { + var retval = [] + for (var name in this._edgesByName) { + var match = testEdge(nodeName, name) + if (match) { + if ($retval) { + $retval.push(this._edgesByName[name]) + } + retval.push(match) + } + } + return retval + } + + GraphvizSvg.prototype.findLinked = function (node, includeEdges, testEdge, $retval) { + var that = this + var $node = $(node) + var $edges = null + if (includeEdges) { + $edges = $retval + } + var names = this.findEdge($node.attr('data-name'), testEdge, $edges) + for (var i in names) { + var n = this._nodesByName[names[i]] + if (!$retval.is(n)) { + $retval.push(n) + that.findLinked(n, includeEdges, testEdge, $retval) + } + } + } + + GraphvizSvg.prototype.colorElement = function ($el, getColor) { + var bg = this.$element.css('background') + $el.find('polygon, ellipse, path').each(function() { + var $this = $(this) + var color = $this.data('graphviz.svg.color') + if (color.fill && $this.prop('tagName') != 'path') { + $this.attr('fill', getColor(color.fill, bg)) // don't set fill if it's a path + } + if (color.stroke) { + $this.attr('stroke', getColor(color.stroke, bg)) + } + }) + } + + GraphvizSvg.prototype.restoreElement = function ($el) { + $el.find('polygon, ellipse, path').each(function() { + var $this = $(this) + var color = $this.data('graphviz.svg.color') + if (color.fill) { + $this.attr('fill', color.fill) // don't set fill if it's a path + } + if (color.stroke) { + $this.attr('stroke', color.stroke) + } + }) + } + + + // methods users can actually call + GraphvizSvg.prototype.nodes = function () { + return this.$nodes + } + + GraphvizSvg.prototype.edges = function () { + return this.$edges + } + + GraphvizSvg.prototype.nodesByName = function () { + return this._nodesByName + } + + GraphvizSvg.prototype.edgesByName = function () { + return this._edgesByName + } + + GraphvizSvg.prototype.linkedTo = function (node, includeEdges) { + var $retval = $() + this.findLinked(node, includeEdges, function (nodeName, edgeName) { + var other = null; + var match = '->' + nodeName + if (edgeName.endsWith(match)) { + other = edgeName.substring(0, edgeName.length - match.length); + } + return other; + }, $retval) + return $retval + } + + GraphvizSvg.prototype.linkedFrom = function (node, includeEdges) { + var $retval = $() + this.findLinked(node, includeEdges, function (nodeName, edgeName) { + var other = null; + var match = nodeName + '->' + if (edgeName.startsWith(match)) { + other = edgeName.substring(match.length); + } + return other; + }, $retval) + return $retval + } + + GraphvizSvg.prototype.linked = function (node, includeEdges) { + var $retval = $() + this.findLinked(node, includeEdges, function (nodeName, edgeName) { + return '^' + name + '--(.*)$' + }, $retval) + this.findLinked(node, includeEdges, function (nodeName, edgeName) { + return '^(.*)--' + name + '$' + }, $retval) + return $retval + } + + GraphvizSvg.prototype.tooltip = function ($elements, show) { + var that = this + var options = this.options + $elements.each(function () { + $(this).find('a[title]').each(function () { + if (show) { + options.tooltips.show.call(this) + } else { + options.tooltips.hide.call(this) + } + }) + }) + } + + GraphvizSvg.prototype.bringToFront = function ($elements) { + $elements.detach().appendTo(this.$graph) + } + + GraphvizSvg.prototype.sendToBack = function ($elements) { + if (this.$background.length) { + $element.insertAfter(this.$background) + } else { + $elements.detach().prependTo(this.$graph) + } + } + + GraphvizSvg.prototype.highlight = function ($nodesEdges, tooltips) { + var that = this + var options = this.options + var $everything = this.$nodes.add(this.$edges) + if ($nodesEdges && $nodesEdges.length > 0) { + // create set of all other elements and dim them + $everything.not($nodesEdges).each(function () { + that.colorElement($(this), options.highlight.unselected) + $(this).css('font-weight', 'normal') + that.tooltip($(this)) + }) + $nodesEdges.each(function () { + that.colorElement($(this), options.highlight.selected) + $(this).css('font-weight', 'normal') + }) + + this.tooltip($nodesEdges, tooltips) + } else { + $everything.each(function () { + that.restoreElement($(this)) + $(this).css('font-weight', 'normal') + }) + this.tooltip($everything) + } + } + + GraphvizSvg.prototype.destroy = function () { + var that = this + this.hide(function () { + that.$element.off('.' + that.type).removeData(that.type) + }) + } + + + // GRAPHVIZSVG PLUGIN DEFINITION + // ============================= + + function Plugin(option) { + return this.each(function () { + var $this = $(this) + var data = $this.data('graphviz.svg') + var options = typeof option == 'object' && option + + if (!data && /destroy/.test(option)) return + if (!data) $this.data('graphviz.svg', (data = new GraphvizSvg(this, options))) + if (typeof option == 'string') data[option]() + }) + } + + var old = $.fn.graphviz + + $.fn.graphviz = Plugin + $.fn.graphviz.Constructor = GraphvizSvg + + + // GRAPHVIZ NO CONFLICT + // ==================== + + $.fn.graphviz.noConflict = function () { + $.fn.graphviz = old + return this + } + +}(jQuery) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/dagmap/render.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/dagmap/render.py new file mode 100644 index 000000000..28791e0b7 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/dagmap/render.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python + +import os.path +import json +import collections +import yaml +import graphviz +from jinja2 import Environment, FileSystemLoader + + +Dagmap = collections.namedtuple('Dagmap', + ['version', 'meta', 'style', 'tasks']) + + +def parse_yaml(filename): + with open(filename, 'r') as f: + contents = yaml.safe_load(f) + + meta = contents['meta'] + version = meta['version'] + if version > 1: + raise Exception('Unsupported version %d' % version) + del meta['version'] + + style = contents['style'] + tasks = contents['tasks'] + if not isinstance(tasks, list): + raise Exception('"tasks" must be a list') + + return Dagmap(version=version, meta=meta, style=style, tasks=tasks) + + +def to_graphviz(dagmap): + G = graphviz.Digraph(format='svg', engine='neato', + graph_attr=dict(bgcolor="#f4f4f4", pad="0.5", overlap="false"), + node_attr=dict(width="0.6", style="filled", + fillcolor="#83c6de", color="#83c6de", penwidth="3", label="", + fontname="helvetica Neue Ultra Light", fontsize="28"), + edge_attr=dict(color="#616a72", arrowsize="2.0", penwidth="4", fontname="helvetica Neue Ultra Light")) + + G.node(name='_nothing', label='', style='invis') + + for task in dagmap.tasks: + G.node(name=task['id'], label=task['label'], + tooltip=task['description'].strip()) + depends_on = task.get('depends_on', ['_nothing']) + for dep in depends_on: + if dep == '_nothing': + attrs = { + 'style': 'invis', + } + else: + attrs = {} + G.edge(dep, task['id'], **attrs) + + return G + + +def main(argv): + import argparse + parser = argparse.ArgumentParser(description='Render Dagmap to Graphviz') + parser.add_argument('-o', '--output', required=True, help='output svg filename') + parser.add_argument('-t', '--template', default='template.html', help='HTML rendering template') + parser.add_argument('input', metavar='INPUT', type=str, + help='YAML input filename') + + args = parser.parse_args(argv[1:]) + + dagmap = parse_yaml(args.input) + graph = to_graphviz(dagmap) + svg = graph.pipe().decode('utf-8') + + template_env = Environment(loader=FileSystemLoader(os.path.dirname(__file__))) + template = template_env.get_template(args.template) + html = template.render(svg=json.dumps(svg)) + + with open(args.output, 'w') as f: + f.write(html) + + return 0 + + +if __name__ == '__main__': + import sys + sys.exit(main(sys.argv)) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/dagmap/template.html b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/dagmap/template.html new file mode 100644 index 000000000..0a634757c --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/dagmap/template.html @@ -0,0 +1,110 @@ + + + + + + + + + + +

Click node to highlight; Shift-scroll to zoom; Esc to unhighlight

+ +
+
Details
+

(Click on a node for details)

+
+ +
+ + + + + + + + + + + + + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/environment.yml b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/environment.yml new file mode 100644 index 000000000..8028a3366 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/environment.yml @@ -0,0 +1,16 @@ +# This environment is used by the RTD config for PR builds. RTD uses this as the +# base environment and then adds in the sphinx etc tools on top. +# See: https://docs.readthedocs.io/en/stable/guides/conda.html +name: rtd +channels: + - numba/label/dev +dependencies: + - python=3.7 + - llvmlite=0.39 + - numpy + - numpydoc + - setuptools + # https://stackoverflow.com/questions/67542699/readthedocs-sphinx-not-rendering-bullet-list-from-rst-fileA + - docutils==0.16 + # The following is needed to fix RTD. + - conda diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/gh-pages.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/gh-pages.py new file mode 100644 index 000000000..3c8093d06 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/gh-pages.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""Script to commit the doc build outputs into the github-pages repo. + +Use: + + gh-pages.py [tag] + +If no tag is given, the current output of 'git describe' is used. If given, +that is how the resulting directory will be named. + +In practice, you should use either actual clean tags from a current build or +something like 'current' as a stable URL for the most current version of the """ +from __future__ import print_function, division, absolute_import + +#----------------------------------------------------------------------------- +# Imports +#----------------------------------------------------------------------------- +import os +import re +import shutil +import sys +from os import chdir as cd +from os.path import join as pjoin + +from subprocess import Popen, PIPE, CalledProcessError, check_call + +#----------------------------------------------------------------------------- +# Globals +#----------------------------------------------------------------------------- + +pages_dir = 'gh-pages' +html_dir = '_build/html' +pdf_dir = '_build/latex' +pages_repo = 'git@github.com:numba/numba-doc.git' + +#----------------------------------------------------------------------------- +# Functions +#----------------------------------------------------------------------------- +def sub_environment(): + """Return an environment dict for executing subcommands in.""" + env = os.environ.copy() + # Force untranslated messages for regex matching + env['LANG'] = 'C' + return env + + +def sh(cmd): + """Execute command in a subshell, return status code.""" + return check_call(cmd, shell=True, env=sub_environment()) + + +def sh2(cmd): + """Execute command in a subshell, return stdout. + + Stderr is unbuffered from the subshell.x""" + p = Popen(cmd, stdout=PIPE, shell=True, env=sub_environment()) + out = p.communicate()[0] + retcode = p.returncode + if retcode: + raise CalledProcessError(retcode, cmd) + else: + return out.rstrip() + + +def sh3(cmd): + """Execute command in a subshell, return stdout, stderr + + If anything appears in stderr, print it out to sys.stderr""" + p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True, + env=sub_environment()) + out, err = p.communicate() + retcode = p.returncode + if retcode: + raise CalledProcessError(retcode, cmd) + else: + return out.rstrip(), err.rstrip() + + +def init_repo(path): + """clone the gh-pages repo if we haven't already.""" + sh("git clone %s %s"%(pages_repo, path)) + here = os.getcwd() + cd(path) + sh('git checkout gh-pages') + cd(here) + +#----------------------------------------------------------------------------- +# Script starts +#----------------------------------------------------------------------------- +if __name__ == '__main__': + # The tag can be given as a positional argument + try: + tag = sys.argv[1] + except IndexError: + try: + tag = sh2('git describe --exact-match').decode() + except CalledProcessError: + tag = "dev" # Fallback + print("Using dev") + + startdir = os.getcwd() + if not os.path.exists(pages_dir): + # init the repo + init_repo(pages_dir) + else: + # ensure up-to-date before operating + cd(pages_dir) + sh('git checkout gh-pages') + sh('git pull') + cd(startdir) + + dest = pjoin(pages_dir, tag) + + # don't `make html` here, because gh-pages already depends on html in Makefile + # sh('make html') + if tag != 'dev': + # only build pdf for non-dev targets + #sh2('make pdf') + pass + + # This is pretty unforgiving: we unconditionally nuke the destination + # directory, and then copy the html tree in there + shutil.rmtree(dest, ignore_errors=True) + shutil.copytree(html_dir, dest) + if tag != 'dev': + #shutil.copy(pjoin(pdf_dir, 'ipython.pdf'), pjoin(dest, 'ipython.pdf')) + pass + + try: + cd(pages_dir) + status = sh2('git status | head -1').decode() + branch = re.match('\#?\s*On branch (.*)$', status).group(1) + if branch != 'gh-pages': + e = 'On %r, git branch is %r, MUST be "gh-pages"' % (pages_dir, + branch) + raise RuntimeError(e) + + sh('git add -A %s' % tag) + sh('git commit -m"Updated doc release: %s"' % tag) + print() + print('Most recent 3 commits:') + sys.stdout.flush() + sh('git --no-pager log --oneline HEAD~3..') + finally: + cd(startdir) + + print() + print('Now verify the build in: %r' % dest) + print("If everything looks good, 'git push'") diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/make.bat b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/make.bat new file mode 100644 index 000000000..29b481d88 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/make.bat @@ -0,0 +1,242 @@ +@ECHO OFF + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set BUILDDIR=_build +set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source +set I18NSPHINXOPTS=%SPHINXOPTS% source +if NOT "%PAPER%" == "" ( + set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% + set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% +) + +if "%1" == "" goto help + +if "%1" == "help" ( + :help + echo.Please use `make ^` where ^ is one of + echo. html to make standalone HTML files + echo. dirhtml to make HTML files named index.html in directories + echo. singlehtml to make a single large HTML file + echo. pickle to make pickle files + echo. json to make JSON files + echo. htmlhelp to make HTML files and a HTML help project + echo. qthelp to make HTML files and a qthelp project + echo. devhelp to make HTML files and a Devhelp project + echo. epub to make an epub + echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter + echo. text to make text files + echo. man to make manual pages + echo. texinfo to make Texinfo files + echo. gettext to make PO message catalogs + echo. changes to make an overview over all changed/added/deprecated items + echo. xml to make Docutils-native XML files + echo. pseudoxml to make pseudoxml-XML files for display purposes + echo. linkcheck to check all external links for integrity + echo. doctest to run all doctests embedded in the documentation if enabled + goto end +) + +if "%1" == "clean" ( + for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i + del /q /s %BUILDDIR%\* + goto end +) + + +%SPHINXBUILD% 2> nul +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "html" ( + %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/html. + goto end +) + +if "%1" == "dirhtml" ( + %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. + goto end +) + +if "%1" == "singlehtml" ( + %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. + goto end +) + +if "%1" == "pickle" ( + %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the pickle files. + goto end +) + +if "%1" == "json" ( + %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the JSON files. + goto end +) + +if "%1" == "htmlhelp" ( + %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run HTML Help Workshop with the ^ +.hhp project file in %BUILDDIR%/htmlhelp. + goto end +) + +if "%1" == "qthelp" ( + %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run "qcollectiongenerator" with the ^ +.qhcp project file in %BUILDDIR%/qthelp, like this: + echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Numba.qhcp + echo.To view the help file: + echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Numba.ghc + goto end +) + +if "%1" == "devhelp" ( + %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. + goto end +) + +if "%1" == "epub" ( + %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The epub file is in %BUILDDIR%/epub. + goto end +) + +if "%1" == "latex" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "latexpdf" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + cd %BUILDDIR%/latex + make all-pdf + cd %BUILDDIR%/.. + echo. + echo.Build finished; the PDF files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "latexpdfja" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + cd %BUILDDIR%/latex + make all-pdf-ja + cd %BUILDDIR%/.. + echo. + echo.Build finished; the PDF files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "text" ( + %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The text files are in %BUILDDIR%/text. + goto end +) + +if "%1" == "man" ( + %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The manual pages are in %BUILDDIR%/man. + goto end +) + +if "%1" == "texinfo" ( + %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. + goto end +) + +if "%1" == "gettext" ( + %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The message catalogs are in %BUILDDIR%/locale. + goto end +) + +if "%1" == "changes" ( + %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes + if errorlevel 1 exit /b 1 + echo. + echo.The overview file is in %BUILDDIR%/changes. + goto end +) + +if "%1" == "linkcheck" ( + %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck + if errorlevel 1 exit /b 1 + echo. + echo.Link check complete; look for any errors in the above output ^ +or in %BUILDDIR%/linkcheck/output.txt. + goto end +) + +if "%1" == "doctest" ( + %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest + if errorlevel 1 exit /b 1 + echo. + echo.Testing of doctests in the sources finished, look at the ^ +results in %BUILDDIR%/doctest/output.txt. + goto end +) + +if "%1" == "xml" ( + %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The XML files are in %BUILDDIR%/xml. + goto end +) + +if "%1" == "pseudoxml" ( + %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. + goto end +) + +:end diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/requirements.txt b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/requirements.txt new file mode 100644 index 000000000..66f88aae4 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/requirements.txt @@ -0,0 +1 @@ +numpydoc \ No newline at end of file diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/_ext/ghfiles.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/_ext/ghfiles.py new file mode 100644 index 000000000..d0320cb3a --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/_ext/ghfiles.py @@ -0,0 +1,75 @@ +import os.path as path +import subprocess +import shlex +from sphinx.util import logging +from docutils import nodes +logger = logging.getLogger(__name__) + + +# use an old git trick, to get the top-level, could have used ../ etc.. but +# this will be fine.. +top = subprocess.check_output(shlex.split( + "git rev-parse --show-toplevel")).strip().decode("utf-8") + + +def make_ref(text): + """ Make hyperlink to Github """ + full_path = path.join(top, text) + if path.isfile(full_path): + ref = "https://www.github.com/numba/numba/blob/main/" + text + elif path.isdir(full_path): + ref = "https://www.github.com/numba/numba/tree/main/" + text + else: + logger.warn("Failed to find file in repomap: " + text) + ref = "https://www.github.com/numba/numba" + return ref + + +def intersperse(lst, item): + """ Insert item between each item in lst. + + Copied under CC-BY-SA from stackoverflow at: + + https://stackoverflow.com/questions/5920643/ + add-an-item-between-each-item-already-in-the-list + + """ + result = [item] * (len(lst) * 2 - 1) + result[0::2] = lst + return result + + +def ghfile_role(name, rawtext, text, lineno, inliner, options={}, content=[]): + """ Emit hyperlink nodes for a given file in repomap. """ + my_nodes = [] + if "{" in text: # myfile.{c,h} - make two nodes + # could have used regexes, but this will be fine.. + base = text[:text.find(".") + 1] + exts = text[text.find("{") + 1:text.find("}")].split(",") + for e in exts: + node = nodes.reference(rawtext, + base + e, + refuri=make_ref(base + e), + **options) + my_nodes.append(node) + elif "*" in text: # path/*_files.py - link to directory + # Could have used something from os.path, but this will be fine.. + ref = path.dirname(text) + path.sep + node = nodes.reference(rawtext, text, refuri=make_ref(ref), **options) + my_nodes.append(node) + else: # everything else is taken verbatim + node = nodes.reference(rawtext, text, refuri=make_ref(text), **options) + my_nodes.append(node) + + # insert separators if needed + if len(my_nodes) > 1: + my_nodes = intersperse(my_nodes, nodes.Text(" | ")) + return my_nodes, [] + + +def setup(app): + logger.info('Initializing ghfiles plugin') + app.add_role('ghfile', ghfile_role) + + metadata = {'parallel_read_safe': True, 'parallel_write_safe': True} + return metadata diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/conf.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/conf.py new file mode 100644 index 000000000..f81d55abd --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/conf.py @@ -0,0 +1,343 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Numba documentation build configuration file, created by +# sphinx-quickstart on Tue Dec 30 11:55:40 2014. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys +import os + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +try: + # Numba is installed + import numba +except ImportError: + # Numba is run from its source checkout + sys.path.insert(0, os.path.abspath('../..')) + import numba + + +on_rtd = os.environ.get('READTHEDOCS') == 'True' + +if on_rtd: + # The following is needed to fix RTD issue with numpydoc + # https://github.com/readthedocs/sphinx_rtd_theme/issues/766 + from conda.cli.python_api import run_command as conda_cmd + + conda_cmd("install", "-c", "conda-forge", "sphinx_rtd_theme>=0.5.1", "-y") + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.intersphinx', + 'sphinx.ext.todo', + #'sphinx.ext.mathjax', + 'sphinx.ext.autodoc', + #'sphinx.ext.graphviz', + 'numpydoc', +] + +# Adding the github files extension +sys.path.append(os.path.abspath(os.path.join(".", "_ext"))) +extensions.append('ghfiles') + +todo_include_todos = True + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['../_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'Numba' +copyright = u'2012-2020, Anaconda, Inc. and others' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +version = '.'.join(numba.__version__.split('.')[:2]) +# The full version, including alpha/beta/rc tags. +release = numba.__version__ + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = [] + +# The reST default role (used for this markup: `text`) to use for all +# documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + +# If true, keep warnings as "system message" paragraphs in the built documents. +#keep_warnings = False + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'sphinx_rtd_theme' + +# All sphinx_rtd_theme options. Default values commented out; uncomment to +# change. +html_theme_options = { + 'canonical_url': 'https://numba.readthedocs.io/en/stable/', + # 'logo_only': False, + # 'display_version': True, + # 'prev_next_buttons_location': 'bottom', + 'style_external_links': True, + # 'vcs_pageview_mode': '', + 'style_nav_header_background': '#00A3E0', + # Toc options + 'collapse_navigation': False, + # 'sticky_navigation': True, + # 'navigation_depth': 4, + # 'includehidden': True, + # 'titles_only': False +} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = None + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +html_logo = "../_static/numba-white-icon-rgb.svg" + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +html_favicon = '../_static/numba-blue-icon-rgb.svg' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['../_static'] + +# Add any extra paths that contain custom files (such as robots.txt or +# .htaccess) here, relative to this directory. These files are copied +# directly to the root of the documentation. +#html_extra_path = [] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Output file base name for HTML help builder. +htmlhelp_basename = 'Numbadoc' + + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + #'papersize': 'letterpaper', + + # The font size ('10pt', '11pt' or '12pt'). + #'pointsize': '10pt', + + # Additional stuff for the LaTeX preamble. + #'preamble': '', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + ('index', 'numba.tex', u'Numba Documentation', + u'Anaconda', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ('index', 'numba', 'Numba Documentation', + ['Anaconda'], 1) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ('index', 'Numba', 'Numba Documentation', + 'Anaconda', 'Numba', 'One line description of project.', + 'Miscellaneous'), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' + +# If true, do not generate a @detailmenu in the "Top" node's menu. +#texinfo_no_detailmenu = False + + +# Configuration for intersphinx: refer to the Python standard library +# and the Numpy documentation. +intersphinx_mapping = { + 'python': ('https://docs.python.org/3', None), + 'numpy': ('https://numpy.org/doc/stable/', None), + 'llvmlite': ('https://llvmlite.readthedocs.io/en/latest/', None), +} + + +# numpydoc options + +# To silence "WARNING: toctree contains reference to nonexisting document" +numpydoc_show_class_members = False + +# -- Custom autogeneration ------------------------------------------------ + + +def _autogenerate(): + from numba.scripts.generate_lower_listing import gen_lower_listing + from numba.misc.help.inspector import write_listings + + basedir = os.path.dirname(__file__) + gen_lower_listing(os.path.join(basedir, + 'developer/autogen_lower_listing.rst')) + + # Run inspector on supported packages + for package in ['builtins', 'math', 'cmath', 'numpy']: + write_listings( + package_name=package, + filename=os.path.join( + basedir, 'developer', 'autogen_{}_listing'.format(package), + ), + output_format='rst', + ) + + +_autogenerate() + + +def setup(app): + app.add_css_file('rtd-overrides.css') diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda-reference/host.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda-reference/host.rst new file mode 100644 index 000000000..4c2dd0cfb --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda-reference/host.rst @@ -0,0 +1,232 @@ +CUDA Host API +============= + +Device Management +----------------- + +Device detection and enquiry +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The following functions are available for querying the available hardware: + +.. autofunction:: numba.cuda.is_available + +.. autofunction:: numba.cuda.detect + +Context management +~~~~~~~~~~~~~~~~~~ + +CUDA Python functions execute within a CUDA context. Each CUDA device in a +system has an associated CUDA context, and Numba presently allows only one context +per thread. For further details on CUDA Contexts, refer to the `CUDA Driver API +Documentation on Context Management +`_ and the +`CUDA C Programming Guide Context Documentation +`_. CUDA Contexts +are instances of the :class:`~numba.cuda.cudadrv.driver.Context` class: + +.. autoclass:: numba.cuda.cudadrv.driver.Context + :members: reset, get_memory_info, push, pop + +The following functions can be used to get or select the context: + +.. autofunction:: numba.cuda.current_context +.. autofunction:: numba.cuda.require_context + +The following functions affect the current context: + +.. autofunction:: numba.cuda.synchronize +.. autofunction:: numba.cuda.close + +Device management +~~~~~~~~~~~~~~~~~ + +Numba maintains a list of supported CUDA-capable devices: + +.. attribute:: numba.cuda.gpus + + An indexable list of supported CUDA devices. This list is indexed by integer + device ID. + +Alternatively, the current device can be obtained: + +.. function:: numba.cuda.gpus.current + + Return the currently-selected device. + +Getting a device through :attr:`numba.cuda.gpus` always provides an instance of +:class:`numba.cuda.cudadrv.devices._DeviceContextManager`, which acts as a +context manager for the selected device: + +.. autoclass:: numba.cuda.cudadrv.devices._DeviceContextManager + +One may also select a context and device or get the current device using the +following three functions: + +.. autofunction:: numba.cuda.select_device +.. autofunction:: numba.cuda.get_current_device +.. autofunction:: numba.cuda.list_devices + +The :class:`numba.cuda.cudadrv.driver.Device` class can be used to enquire about +the functionality of the selected device: + +.. class:: numba.cuda.cudadrv.driver.Device + + The device associated with a particular context. + + .. attribute:: compute_capability + + A tuple, *(major, minor)* indicating the supported compute capability. + + .. attribute:: id + + The integer ID of the device. + + .. attribute:: name + + The name of the device (e.g. "GeForce GTX 970"). + + .. attribute:: uuid + + The UUID of the device (e.g. "GPU-e6489c45-5b68-3b03-bab7-0e7c8e809643"). + + .. method:: reset + + Delete the context for the device. This will destroy all memory + allocations, events, and streams created within the context. + + +Compilation +----------- + +Numba provides an entry point for compiling a Python function to PTX without +invoking any of the driver API. This can be useful for: + +- Generating PTX that is to be inlined into other PTX code (e.g. from outside + the Numba / Python ecosystem). +- Generating code when there is no device present. +- Generating code prior to a fork without initializing CUDA. + +.. note:: It is the user's responsibility to manage any ABI issues arising from + the use of compilation to PTX. + +.. autofunction:: numba.cuda.compile_ptx + + +The environment variable ``NUMBA_CUDA_DEFAULT_PTX_CC`` can be set to control +the default compute capability targeted by ``compile_ptx`` - see +:ref:`numba-envvars-gpu-support`. If PTX for the compute capability of the +current device is required, the ``compile_ptx_for_current_device`` function can +be used: + +.. autofunction:: numba.cuda.compile_ptx_for_current_device + + + +Measurement +----------- + +.. _cuda-profiling: + +Profiling +~~~~~~~~~ + +The NVidia Visual Profiler can be used directly on executing CUDA Python code - +it is not a requirement to insert calls to these functions into user code. +However, these functions can be used to allow profiling to be performed +selectively on specific portions of the code. For further information on +profiling, see the `NVidia Profiler User's Guide +`_. + +.. autofunction:: numba.cuda.profile_start +.. autofunction:: numba.cuda.profile_stop +.. autofunction:: numba.cuda.profiling + + +.. _events: + +Events +~~~~~~ + +Events can be used to monitor the progress of execution and to record the +timestamps of specific points being reached. Event creation returns immediately, +and the created event can be queried to determine if it has been reached. For +further information, see the `CUDA C Programming Guide Events section +`_. + +The following functions are used for creating and measuring the time between +events: + +.. autofunction:: numba.cuda.event +.. autofunction:: numba.cuda.event_elapsed_time + +Events are instances of the :class:`numba.cuda.cudadrv.driver.Event` class: + +.. autoclass:: numba.cuda.cudadrv.driver.Event + :members: query, record, synchronize, wait + + +.. _streams: + +Stream Management +----------------- + +Streams allow concurrency of execution on a single device within a given +context. Queued work items in the same stream execute sequentially, but work +items in different streams may execute concurrently. Most operations involving a +CUDA device can be performed asynchronously using streams, including data +transfers and kernel execution. For further details on streams, see the `CUDA C +Programming Guide Streams section +`_. + +Numba defaults to using the legacy default stream as the default stream. The +per-thread default stream can be made the default stream by setting the +environment variable ``NUMBA_CUDA_PER_THREAD_DEFAULT_STREAM`` to ``1`` (see the +:ref:`CUDA Environment Variables section `). +Regardless of this setting, the objects representing the legacy and per-thread +default streams can be constructed using the functions below. + +Streams are instances of :class:`numba.cuda.cudadrv.driver.Stream`: + +.. autoclass:: numba.cuda.cudadrv.driver.Stream + :members: synchronize, auto_synchronize, add_callback, async_done + +To create a new stream: + +.. autofunction:: numba.cuda.stream + +To get the default stream: + +.. autofunction:: numba.cuda.default_stream + +To get the default stream with an explicit choice of whether it is the legacy +or per-thread default stream: + +.. autofunction:: numba.cuda.legacy_default_stream + +.. autofunction:: numba.cuda.per_thread_default_stream + +To construct a Numba ``Stream`` object using a stream allocated elsewhere, the +``external_stream`` function is provided. Note that the lifetime of external +streams must be managed by the user - Numba will not deallocate an external +stream, and the stream must remain valid whilst the Numba ``Stream`` object is +in use. + +.. autofunction:: numba.cuda.external_stream + + +Runtime +------- + +Numba generally uses the Driver API, but it provides a simple wrapper to the +Runtime API so that the version of the runtime in use can be queried. This is +accessed through ``cuda.runtime``, which is an instance of the +:class:`numba.cuda.cudadrv.runtime.Runtime` class: + +.. autoclass:: numba.cuda.cudadrv.runtime.Runtime + :members: get_version, is_supported_version, supported_versions + +Whether the current runtime is officially supported and tested with the current +version of Numba can also be queried: + +.. autofunction:: numba.cuda.is_supported_version diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda-reference/index.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda-reference/index.rst new file mode 100644 index 000000000..771afea19 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda-reference/index.rst @@ -0,0 +1,10 @@ +CUDA Python Reference +===================== + +.. toctree:: + + host.rst + kernel.rst + types.rst + memory.rst + libdevice.rst diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda-reference/kernel.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda-reference/kernel.rst new file mode 100644 index 000000000..d23b2eed4 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda-reference/kernel.rst @@ -0,0 +1,586 @@ +CUDA Kernel API +=============== + +Kernel declaration +------------------ + +The ``@cuda.jit`` decorator is used to create a CUDA dispatcher object that can +be configured and launched: + +.. autofunction:: numba.cuda.jit + + +Dispatcher objects +------------------ + +The usual syntax for configuring a Dispatcher with a launch configuration uses +subscripting, with the arguments being as in the following: + +.. code-block:: python + + # func is some function decorated with @cuda.jit + func[griddim, blockdim, stream, sharedmem] + + +The ``griddim`` and ``blockdim`` arguments specify the size of the grid and +thread blocks, and may be either integers or tuples of length up to 3. The +``stream`` parameter is an optional stream on which the kernel will be launched, +and the ``sharedmem`` parameter specifies the size of dynamic shared memory in +bytes. + +Subscripting the Dispatcher returns a configuration object that can be called +with the kernel arguments: + +.. code-block:: python + + configured = func[griddim, blockdim, stream, sharedmem] + configured(x, y, z) + + +However, it is more idiomatic to configure and call the kernel within a single +statement: + +.. code-block:: python + + func[griddim, blockdim, stream, sharedmem](x, y, z) + +This is similar to launch configuration in CUDA C/C++: + +.. code-block:: cuda + + func<<>>(x, y, z) + +.. note:: The order of ``stream`` and ``sharedmem`` are reversed in Numba + compared to in CUDA C/C++. + +Dispatcher objects also provide several utility methods for inspection and +creating a specialized instance: + +.. autoclass:: numba.cuda.dispatcher.CUDADispatcher + :members: inspect_asm, inspect_llvm, inspect_sass, inspect_types, + get_regs_per_thread, specialize, specialized, extensions, forall + + +Intrinsic Attributes and Functions +---------------------------------- + +The remainder of the attributes and functions in this section may only be called +from within a CUDA Kernel. + +Thread Indexing +~~~~~~~~~~~~~~~ + +.. attribute:: numba.cuda.threadIdx + + The thread indices in the current thread block, accessed through the + attributes ``x``, ``y``, and ``z``. Each index is an integer spanning the + range from 0 inclusive to the corresponding value of the attribute in + :attr:`numba.cuda.blockDim` exclusive. + +.. attribute:: numba.cuda.blockIdx + + The block indices in the grid of thread blocks, accessed through the + attributes ``x``, ``y``, and ``z``. Each index is an integer spanning the + range from 0 inclusive to the corresponding value of the attribute in + :attr:`numba.cuda.gridDim` exclusive. + +.. attribute:: numba.cuda.blockDim + + The shape of a block of threads, as declared when instantiating the + kernel. This value is the same for all threads in a given kernel, even + if they belong to different blocks (i.e. each block is "full"). + +.. attribute:: numba.cuda.gridDim + + The shape of the grid of blocks, accessed through the attributes ``x``, + ``y``, and ``z``. + +.. attribute:: numba.cuda.laneid + + The thread index in the current warp, as an integer spanning the range + from 0 inclusive to the :attr:`numba.cuda.warpsize` exclusive. + +.. attribute:: numba.cuda.warpsize + + The size in threads of a warp on the GPU. Currently this is always 32. + +.. function:: numba.cuda.grid(ndim) + + Return the absolute position of the current thread in the entire + grid of blocks. *ndim* should correspond to the number of dimensions + declared when instantiating the kernel. If *ndim* is 1, a single integer + is returned. If *ndim* is 2 or 3, a tuple of the given number of + integers is returned. + + Computation of the first integer is as follows:: + + cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x + + and is similar for the other two indices, but using the ``y`` and ``z`` + attributes. + +.. function:: numba.cuda.gridsize(ndim) + + Return the absolute size (or shape) in threads of the entire grid of + blocks. *ndim* should correspond to the number of dimensions declared when + instantiating the kernel. + + Computation of the first integer is as follows:: + + cuda.blockDim.x * cuda.gridDim.x + + and is similar for the other two indices, but using the ``y`` and ``z`` + attributes. + +Memory Management +~~~~~~~~~~~~~~~~~ + +.. function:: numba.cuda.shared.array(shape, dtype) + + Creates an array in the local memory space of the CUDA kernel with + the given ``shape`` and ``dtype``. + + Returns an array with its content uninitialized. + + .. note:: All threads in the same thread block sees the same array. + +.. function:: numba.cuda.local.array(shape, dtype) + + Creates an array in the local memory space of the CUDA kernel with the + given ``shape`` and ``dtype``. + + Returns an array with its content uninitialized. + + .. note:: Each thread sees a unique array. + +.. function:: numba.cuda.const.array_like(ary) + + Copies the ``ary`` into constant memory space on the CUDA kernel at compile + time. + + Returns an array like the ``ary`` argument. + + .. note:: All threads and blocks see the same array. + +Synchronization and Atomic Operations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. function:: numba.cuda.atomic.add(array, idx, value) + + Perform ``array[idx] += value``. Support int32, int64, float32 and + float64 only. The ``idx`` argument can be an integer or a tuple of integer + indices for indexing into multiple dimensional arrays. The number of element + in ``idx`` must match the number of dimension of ``array``. + + Returns the value of ``array[idx]`` before the storing the new value. + Behaves like an atomic load. + +.. function:: numba.cuda.atomic.sub(array, idx, value) + + Perform ``array[idx] -= value``. Supports int32, int64, float32 and + float64 only. The ``idx`` argument can be an integer or a tuple of integer + indices for indexing into multi-dimensional arrays. The number of elements + in ``idx`` must match the number of dimensions of ``array``. + + Returns the value of ``array[idx]`` before the storing the new value. + Behaves like an atomic load. + +.. function:: numba.cuda.atomic.and_(array, idx, value) + + Perform ``array[idx] &= value``. Supports int32, uint32, int64, + and uint64 only. The ``idx`` argument can be an integer or a tuple of + integer indices for indexing into multi-dimensional arrays. The number + of elements in ``idx`` must match the number of dimensions of ``array``. + + Returns the value of ``array[idx]`` before the storing the new value. + Behaves like an atomic load. + +.. function:: numba.cuda.atomic.or_(array, idx, value) + + Perform ``array[idx] |= value``. Supports int32, uint32, int64, + and uint64 only. The ``idx`` argument can be an integer or a tuple of + integer indices for indexing into multi-dimensional arrays. The number + of elements in ``idx`` must match the number of dimensions of ``array``. + + Returns the value of ``array[idx]`` before the storing the new value. + Behaves like an atomic load. + +.. function:: numba.cuda.atomic.xor(array, idx, value) + + Perform ``array[idx] ^= value``. Supports int32, uint32, int64, + and uint64 only. The ``idx`` argument can be an integer or a tuple of + integer indices for indexing into multi-dimensional arrays. The number + of elements in ``idx`` must match the number of dimensions of ``array``. + + Returns the value of ``array[idx]`` before the storing the new value. + Behaves like an atomic load. + +.. function:: numba.cuda.atomic.exch(array, idx, value) + + Perform ``array[idx] = value``. Supports int32, uint32, int64, + and uint64 only. The ``idx`` argument can be an integer or a tuple of + integer indices for indexing into multi-dimensional arrays. The number + of elements in ``idx`` must match the number of dimensions of ``array``. + + Returns the value of ``array[idx]`` before the storing the new value. + Behaves like an atomic load. + +.. function:: numba.cuda.atomic.inc(array, idx, value) + + Perform ``array[idx] = (0 if array[idx] >= value else array[idx] + 1)``. + Supports uint32, and uint64 only. The ``idx`` argument can be an integer + or a tuple of integer indices for indexing into multi-dimensional arrays. + The number of elements in ``idx`` must match the number of dimensions of + ``array``. + + Returns the value of ``array[idx]`` before the storing the new value. + Behaves like an atomic load. + +.. function:: numba.cuda.atomic.dec(array, idx, value) + + Perform ``array[idx] = + (value if (array[idx] == 0) or (array[idx] > value) else array[idx] - 1)``. + Supports uint32, and uint64 only. The ``idx`` argument can be an integer + or a tuple of integer indices for indexing into multi-dimensional arrays. + The number of elements in ``idx`` must match the number of dimensions of + ``array``. + + Returns the value of ``array[idx]`` before the storing the new value. + Behaves like an atomic load. + +.. function:: numba.cuda.atomic.max(array, idx, value) + + Perform ``array[idx] = max(array[idx], value)``. Support int32, int64, + float32 and float64 only. The ``idx`` argument can be an integer or a + tuple of integer indices for indexing into multiple dimensional arrays. + The number of element in ``idx`` must match the number of dimension of + ``array``. + + Returns the value of ``array[idx]`` before the storing the new value. + Behaves like an atomic load. + + +.. function:: numba.cuda.syncthreads + + Synchronize all threads in the same thread block. This function implements + the same pattern as barriers in traditional multi-threaded programming: this + function waits until all threads in the block call it, at which point it + returns control to all its callers. + +.. function:: numba.cuda.syncthreads_count(predicate) + + An extension to :attr:`numba.cuda.syncthreads` where the return value is a count + of the threads where ``predicate`` is true. + +.. function:: numba.cuda.syncthreads_and(predicate) + + An extension to :attr:`numba.cuda.syncthreads` where 1 is returned if ``predicate`` is + true for all threads or 0 otherwise. + +.. function:: numba.cuda.syncthreads_or(predicate) + + An extension to :attr:`numba.cuda.syncthreads` where 1 is returned if ``predicate`` is + true for any thread or 0 otherwise. + + .. warning:: All syncthreads functions must be called by every thread in the + thread-block. Falling to do so may result in undefined behavior. + + +Cooperative Groups +~~~~~~~~~~~~~~~~~~ + +.. function:: numba.cuda.cg.this_grid() + + Get the current grid group. + + :return: The current grid group + :rtype: numba.cuda.cg.GridGroup + +.. class:: numba.cuda.cg.GridGroup + + A grid group. Users should not construct a GridGroup directly - instead, get + the current grid group using :func:`cg.this_grid() `. + + .. method:: sync() + + Synchronize the current grid group. + + +Memory Fences +~~~~~~~~~~~~~ + +The memory fences are used to guarantee the effect of memory operations +are visible by other threads within the same thread-block, the same GPU device, +and the same system (across GPUs on global memory). Memory loads and stores +are guaranteed to not move across the memory fences by optimization passes. + +.. warning:: The memory fences are considered to be advanced API and most + usercases should use the thread barrier (e.g. ``syncthreads()``). + + + +.. function:: numba.cuda.threadfence + + A memory fence at device level (within the GPU). + +.. function:: numba.cuda.threadfence_block + + A memory fence at thread block level. + +.. function:: numba.cuda.threadfence_system + + + A memory fence at system level (across GPUs). + +Warp Intrinsics +~~~~~~~~~~~~~~~ + +The argument ``membermask`` is a 32 bit integer mask with each bit +corresponding to a thread in the warp, with 1 meaning the thread is in the +subset of threads within the function call. The ``membermask`` must be all 1 if +the GPU compute capability is below 7.x. + +.. function:: numba.cuda.syncwarp(membermask) + + Synchronize a masked subset of the threads in a warp. + +.. function:: numba.cuda.all_sync(membermask, predicate) + + If the ``predicate`` is true for all threads in the masked warp, then + a non-zero value is returned, otherwise 0 is returned. + +.. function:: numba.cuda.any_sync(membermask, predicate) + + If the ``predicate`` is true for any thread in the masked warp, then + a non-zero value is returned, otherwise 0 is returned. + +.. function:: numba.cuda.eq_sync(membermask, predicate) + + If the boolean ``predicate`` is the same for all threads in the masked warp, + then a non-zero value is returned, otherwise 0 is returned. + +.. function:: numba.cuda.ballot_sync(membermask, predicate) + + Returns a mask of all threads in the warp whose ``predicate`` is true, + and are within the given mask. + +.. function:: numba.cuda.shfl_sync(membermask, value, src_lane) + + Shuffles ``value`` across the masked warp and returns the ``value`` + from ``src_lane``. If this is outside the warp, then the + given ``value`` is returned. + +.. function:: numba.cuda.shfl_up_sync(membermask, value, delta) + + Shuffles ``value`` across the masked warp and returns the ``value`` + from ``laneid - delta``. If this is outside the warp, then the + given ``value`` is returned. + +.. function:: numba.cuda.shfl_down_sync(membermask, value, delta) + + Shuffles ``value`` across the masked warp and returns the ``value`` + from ``laneid + delta``. If this is outside the warp, then the + given ``value`` is returned. + +.. function:: numba.cuda.shfl_xor_sync(membermask, value, lane_mask) + + Shuffles ``value`` across the masked warp and returns the ``value`` + from ``laneid ^ lane_mask``. + +.. function:: numba.cuda.match_any_sync(membermask, value, lane_mask) + + Returns a mask of threads that have same ``value`` as the given ``value`` + from within the masked warp. + +.. function:: numba.cuda.match_all_sync(membermask, value, lane_mask) + + Returns a tuple of (mask, pred), where mask is a mask of threads that have + same ``value`` as the given ``value`` from within the masked warp, if they + all have the same value, otherwise it is 0. And pred is a boolean of whether + or not all threads in the mask warp have the same warp. + +.. function:: numba.cuda.activemask() + + Returns a 32-bit integer mask of all currently active threads in the + calling warp. The Nth bit is set if the Nth lane in the warp is active when + activemask() is called. Inactive threads are represented by 0 bits in the + returned mask. Threads which have exited the kernel are always marked as + inactive. + +.. function:: numba.cuda.lanemask_lt() + + Returns a 32-bit integer mask of all lanes (including inactive ones) with + ID less than the current lane. + + +Integer Intrinsics +~~~~~~~~~~~~~~~~~~ + +A subset of the CUDA Math API's integer intrinsics are available. For further +documentation, including semantics, please refer to the `CUDA Toolkit +documentation +`_. + + +.. function:: numba.cuda.popc(x) + + Returns the number of bits set in ``x``. + +.. function:: numba.cuda.brev(x) + + Returns the reverse of the bit pattern of ``x``. For example, ``0b10110110`` + becomes ``0b01101101``. + +.. function:: numba.cuda.clz(x) + + Returns the number of leading zeros in ``x``. + +.. function:: numba.cuda.ffs(x) + + Returns the position of the first (least significant) bit set to 1 in ``x``, + where the least significant bit position is 1. ``ffs(0)`` returns 0. + + +Floating Point Intrinsics +~~~~~~~~~~~~~~~~~~~~~~~~~ + +A subset of the CUDA Math API's floating point intrinsics are available. For further +documentation, including semantics, please refer to the `single +`_ and +`double `_ +precision parts of the CUDA Toolkit documentation. + + +.. function:: numba.cuda.fma + + Perform the fused multiply-add operation. Named after the ``fma`` and ``fmaf`` in + the C api, but maps to the ``fma.rn.f32`` and ``fma.rn.f64`` (round-to-nearest-even) + PTX instructions. + +.. function:: numba.cuda.cbrt (x) + + Perform the cube root operation, x ** (1/3). Named after the functions + ``cbrt`` and ``cbrtf`` in the C api. Supports float32, and float64 arguments + only. + +16-bit Floating Point Intrinsics +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The following functions are used to operate on 16-bit floating point operands. +These functions return a 16-bit floating point result. + + +.. function:: numba.cuda.fp16.hfma (a, b, c) + + Perform the fused multiply-add operation ``(a * b) + c`` on 16-bit + floating point arguments in round to nearest mode. Maps to the ``fma.rn.f16`` + PTX instruction. + + Returns the 16-bit floating point result of the fused multiply-add. + +.. function:: numba.cuda.fp16.hadd (a, b) + + Perform the add operation ``a + b`` on 16-bit floating point arguments in + round to nearest mode. Maps to the ``add.f16`` PTX instruction. + + Returns the 16-bit floating point result of the addition. + +.. function:: numba.cuda.fp16.hsub (a, b) + + Perform the subtract operation ``a - b`` on 16-bit floating point arguments in + round to nearest mode. Maps to the ``sub.f16`` PTX instruction. + + Returns the 16-bit floating point result of the subtraction. + +.. function:: numba.cuda.fp16.hmul (a, b) + + Perform the multiply operation ``a * b`` on 16-bit floating point arguments in + round to nearest mode. Maps to the ``mul.f16`` PTX instruction. + + Returns the 16-bit floating point result of the multiplication. + +.. function:: numba.cuda.fp16.hneg (a) + + Perform the negation operation ``-a`` on the 16-bit floating point argument. + Maps to the ``neg.f16`` PTX instruction. + + Returns the 16-bit floating point result of the negation. + +.. function:: numba.cuda.fp16.habs (a) + + Perform the absolute value operation ``|a|`` on the 16-bit floating point argument. + + Returns the 16-bit floating point result of the absolute value operation. + +.. function:: numba.cuda.fp16.heq (a, b) + + Perform the comparison operation ``a == b`` on 16-bit floating point arguments. + + Returns a boolean. + +.. function:: numba.cuda.fp16.hne (a, b) + + Perform the comparison operation ``a != b`` on 16-bit floating point arguments. + + Returns a boolean. + +.. function:: numba.cuda.fp16.hgt (a, b) + + Perform the comparison operation ``a > b`` on 16-bit floating point arguments. + + Returns a boolean. + +.. function:: numba.cuda.fp16.hge (a, b) + + Perform the comparison operation ``a >= b`` on 16-bit floating point arguments. + + Returns a boolean. + +.. function:: numba.cuda.fp16.hlt (a, b) + + Perform the comparison operation ``a < b`` on 16-bit floating point arguments. + + Returns a boolean. + +.. function:: numba.cuda.fp16.hle (a, b) + + Perform the comparison operation ``a <= b`` on 16-bit floating point arguments. + + Returns a boolean. + +.. function:: numba.cuda.fp16.hmax (a, b) + + Perform the operation ``a if a > b else b.`` + + Returns a 16-bit floating point value. + +.. function:: numba.cuda.fp16.hmin (a, b) + + Perform the operation ``a if a < b else b.`` + + Returns a 16-bit floating point value. + +Control Flow Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +A subset of the CUDA's control flow instructions are directly available as +intrinsics. Avoiding branches is a key way to improve CUDA performance, and +using these intrinsics mean you don't have to rely on the ``nvcc`` optimizer +identifying and removing branches. For further documentation, including +semantics, please refer to the `relevant CUDA Toolkit documentation +`_. + + +.. function:: numba.cuda.selp + + Select between two expressions, depending on the value of the first + argument. Similar to LLVM's ``select`` instruction. + + +Timer Intrinsics +~~~~~~~~~~~~~~~~ + +.. function:: numba.cuda.nanosleep(ns) + + Suspends the thread for a sleep duration approximately close to the delay + ``ns``, specified in nanoseconds. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda-reference/libdevice.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda-reference/libdevice.rst new file mode 100644 index 000000000..be3cf2080 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda-reference/libdevice.rst @@ -0,0 +1,16 @@ +Libdevice functions +=================== + +All wrapped libdevice functions are listed in this section. All functions in +libdevice are wrapped, with the exception of ``__nv_nan`` and ``__nv_nanf``. +These functions return a representation of a quiet NaN, but the argument they +take (a pointer to an object specifying the representation) is undocumented, and +follows an unusual form compared to the rest of libdevice - it is not an output +like every other pointer argument. If a NaN is required, one can be obtained in +CUDA Python by other means, e.g. ``math.nan``. + +Wrapped functions +----------------- + +.. automodule:: numba.cuda.libdevice + :members: diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda-reference/memory.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda-reference/memory.rst new file mode 100644 index 000000000..b70995e96 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda-reference/memory.rst @@ -0,0 +1,24 @@ +Memory Management +================= + +.. autofunction:: numba.cuda.to_device +.. autofunction:: numba.cuda.device_array +.. autofunction:: numba.cuda.device_array_like +.. autofunction:: numba.cuda.pinned_array +.. autofunction:: numba.cuda.pinned_array_like +.. autofunction:: numba.cuda.mapped_array +.. autofunction:: numba.cuda.mapped_array_like +.. autofunction:: numba.cuda.managed_array +.. autofunction:: numba.cuda.pinned +.. autofunction:: numba.cuda.mapped + +Device Objects +-------------- + +.. autoclass:: numba.cuda.cudadrv.devicearray.DeviceNDArray + :members: copy_to_device, copy_to_host, is_c_contiguous, is_f_contiguous, + ravel, reshape, split +.. autoclass:: numba.cuda.cudadrv.devicearray.DeviceRecord + :members: copy_to_device, copy_to_host +.. autoclass:: numba.cuda.cudadrv.devicearray.MappedNDArray + :members: copy_to_device, copy_to_host, split diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda-reference/types.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda-reference/types.rst new file mode 100644 index 000000000..31197241e --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda-reference/types.rst @@ -0,0 +1,56 @@ +CUDA-Specific Types +==================== + +.. note:: + + This page is about types specific to CUDA targets. Many other types are also + available in the CUDA target - see :ref:`cuda-built-in-types`. + +Vector Types +~~~~~~~~~~~~ + +`CUDA Vector Types `_ +are usable in kernels. There are two important distinctions from vector types in CUDA C/C++: + +First, the recommended names for vector types in Numba CUDA is formatted as ``x``, +where ``base_type`` is the base type of the vector, and ``N`` is the number of elements in the vector. +Examples include ``int64x3``, ``uint16x4``, ``float32x4``, etc. For new Numba CUDA kernels, +this is the recommended way to instantiate vector types. + +For convenience, users adapting existing kernels from CUDA C/C++ to Python may use +aliases consistent with the C/C++ namings. For example, ``float3`` aliases ``float32x3``, +``long3`` aliases ``int32x3`` or ``int64x3`` (depending on the platform), etc. + +Second, unlike CUDA C/C++ where factory functions are used, vector types are constructed directly +with their constructor. For example, to construct a ``float32x3``: + +.. code-block:: python3 + + from numba.cuda import float32x3 + + # In kernel + f3 = float32x3(0.0, -1.0, 1.0) + +Additionally, vector types can be constructed from a combination of vector and +primitive types, as long as the total number of components matches the result +vector type. For example, all of the following constructions are valid: + +.. code-block:: python3 + + zero = uint32(0) + u2 = uint32x2(1, 2) + # Construct a 3-component vector with primitive type and a 2-component vector + u3 = uint32x3(zero, u2) + # Construct a 4-component vector with 2 2-component vectors + u4 = uint32x4(u2, u2) + +The 1st, 2nd, 3rd and 4th component of the vector type can be accessed through fields +``x``, ``y``, ``z``, and ``w`` respectively. The components are immutable after +construction in the present version of Numba; it is expected that support for +mutating vector components will be added in a future release. + +.. code-block:: python3 + + v1 = float32x2(1.0, 1.0) + v2 = float32x2(1.0, -1.0) + dotprod = v1.x * v2.x + v1.y * v2.y diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/bindings.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/bindings.rst new file mode 100644 index 000000000..d8425a91f --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/bindings.rst @@ -0,0 +1,43 @@ +CUDA Bindings +============= + +Numba supports two bindings to the CUDA Driver APIs: its own internal bindings +based on ctypes, and the official `NVIDIA CUDA Python bindings +`_. Functionality is equivalent between +the two bindings. + +The internal bindings are used by default. If the NVIDIA bindings are installed, +then they can be used by setting the environment variable +``NUMBA_CUDA_USE_NVIDIA_BINDING`` to ``1`` prior to the import of Numba. Once +Numba has been imported, the selected binding cannot be changed. + + +Per-Thread Default Streams +-------------------------- + +Responsibility for handling Per-Thread Default Streams (PTDS) is delegated to +the NVIDIA bindings when they are in use. To use PTDS with the NVIDIA bindings, +set the environment variable ``CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM`` to +``1`` instead of Numba's environmnent variable +:envvar:`NUMBA_CUDA_PER_THREAD_DEFAULT_STREAM`. + +.. seealso:: + + The `Default Stream section + `_ + in the NVIDIA Bindings documentation. + + +Roadmap +------- + +In Numba 0.56, the NVIDIA Bindings will be used by default, if they are +installed. + +In future versions of Numba: + +- The internal bindings will be deprecated. +- The internal bindings will be removed. + +At present, no specific release is planned for the deprecation or removal of +the internal bindings. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/caching.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/caching.rst new file mode 100644 index 000000000..06c84ff77 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/caching.rst @@ -0,0 +1,35 @@ +On-disk Kernel Caching +====================== + +When the ``cache`` keyword argument of the :func:`@cuda.jit ` +decorator is ``True``, a file-based cache is enabled. This shortens compilation +times when the function was already compiled in a previous invocation. + +The cache is maintained in the ``__pycache__`` subdirectory of the directory +containing the source file; if the current user is not allowed to write to it, +the cache implementation falls back to a platform-specific user-wide cache +directory (such as ``$HOME/.cache/numba`` on Unix platforms). + + +Compute capability considerations +--------------------------------- + +Separate cache files are maintained for each compute capability. When a cached +kernel is loaded, the compute capability of the device the kernel is first +launched on in the current run is used to determine which version to load. +Therefore, on systems that have multiple GPUs with differing compute +capabilities, the cached versions of kernels are only used for one compute +capability, and recompilation will occur for other compute capabilities. + +For example: if a system has two GPUs, one of compute capability 7.5 and one of +8.0, then: + +* If a cached kernel is first launched on the CC 7.5 device, then the cached + version for CC 7.5 is used. If it is subsequently launched on the CC 8.0 + device, a recompilation will occur. +* If in a subsequent run the cached kernel is first launched on the CC 8.0 + device, then the cached version for CC 8.0 is used. A subsequent launch on + the CC 7.5 device will require a recompilation. + +This limitation is not expected to present issues in most practical scenarios, +as multi-GPU production systems tend to have identical GPUs within each node. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/cooperative_groups.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/cooperative_groups.rst new file mode 100644 index 000000000..a51e8ffcb --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/cooperative_groups.rst @@ -0,0 +1,111 @@ +================== +Cooperative Groups +================== + +Supported features +------------------ + +Numba's Cooperative Groups support presently provides grid groups and grid +synchronization, along with cooperative kernel launches. + +Cooperative groups are supported on Linux, and Windows for devices in `TCC +mode +`_. +Cooperative Groups also require the CUDA Device Runtime library, ``cudadevrt``, +to be available - for conda default channel-installed CUDA toolkit packages, it +is only available in versions 10.2 onwards. System-installed toolkits (e.g. from +NVIDIA distribution packages or runfiles) all include ``cudadevrt``. + +Using Grid Groups +----------------- + +To get the current grid group, use the :meth:`cg.this_grid() +` function: + +.. code-block:: python + + g = cuda.cg.this_grid() + +Synchronizing the grid is done with the :meth:`sync() +` method of the grid group: + +.. code-block:: python + + g.sync() + + +Cooperative Launches +-------------------- + +Unlike the CUDA C/C++ API, a cooperative launch is invoked using the same syntax +as a normal kernel launch - Numba automatically determines whether a cooperative +launch is required based on whether a grid group is synchronized in the kernel. + +The grid size limit for a cooperative launch is more restrictive than for a +normal launch - the grid must be no larger than the maximum number of active +blocks on the device on which it is launched. To get maximum grid size for a +cooperative launch of a kernel with a given block size and dynamic shared +memory requirement, use the ``max_cooperative_grid_blocks()`` method of kernel +overloads: + +.. automethod:: numba.cuda.dispatcher._Kernel.max_cooperative_grid_blocks + +This can be used to ensure that the kernel is launched with no more than the +maximum number of blocks. Exceeding the maximum number of blocks for the +cooperative launch will result in a ``CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE`` +error. + + +Applications and Example +------------------------ + +Grid group synchronization can be used to implement a global barrier across all +threads in the grid - applications of this include a global reduction to a +single value, or looping over rows of a large matrix sequentially using the +entire grid to operate on column elements in parallel. + +In the following example, rows are written sequentially by the grid. Each thread +in the grid reads a value from the previous row written by it's *opposite* +thread. A grid sync is needed to ensure that threads in the grid don't run ahead +of threads in other blocks, or fail to see updates from their opposite thread. + +First we'll define our kernel: + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_cg.py + :language: python + :caption: from ``test_grid_sync`` of ``numba/cuda/tests/doc_example/test_cg.py`` + :start-after: magictoken.ex_grid_sync_kernel.begin + :end-before: magictoken.ex_grid_sync_kernel.end + :dedent: 8 + :linenos: + +Then create some empty input data and determine the grid and block sizes: + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_cg.py + :language: python + :caption: from ``test_grid_sync`` of ``numba/cuda/tests/doc_example/test_cg.py`` + :start-after: magictoken.ex_grid_sync_data.begin + :end-before: magictoken.ex_grid_sync_data.end + :dedent: 8 + :linenos: + +Finally we launch the kernel and print the result: + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_cg.py + :language: python + :caption: from ``test_grid_sync`` of ``numba/cuda/tests/doc_example/test_cg.py`` + :start-after: magictoken.ex_grid_sync_launch.begin + :end-before: magictoken.ex_grid_sync_launch.end + :dedent: 8 + :linenos: + + +The maximum grid size for ``sequential_rows`` can be enquired using: + + +.. code-block:: python + + overload = sequential_rows.overloads[(int32[:,::1],) + max_blocks = overload.max_cooperative_grid_blocks(blockdim) + print(max_blocks) + # 1152 (e.g. on Quadro RTX 8000 with Numba 0.52.1 and CUDA 11.0) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/cuda_array_interface.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/cuda_array_interface.rst new file mode 100644 index 000000000..304f4ecab --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/cuda_array_interface.rst @@ -0,0 +1,531 @@ +.. _cuda-array-interface: + +================================ +CUDA Array Interface (Version 3) +================================ + +The *CUDA Array Interface* (or CAI) is created for interoperability between +different implementations of CUDA array-like objects in various projects. The +idea is borrowed from the `NumPy array interface`_. + + +.. note:: + Currently, we only define the Python-side interface. In the future, we may + add a C-side interface for efficient exchange of the information in + compiled code. + + +Python Interface Specification +============================== + +.. note:: Experimental feature. Specification may change. + +The ``__cuda_array_interface__`` attribute returns a dictionary (``dict``) +that must contain the following entries: + +- **shape**: ``(integer, ...)`` + + A tuple of ``int`` (or ``long``) representing the size of each dimension. + +- **typestr**: ``str`` + + The type string. This has the same definition as ``typestr`` in the + `NumPy array interface`_. + +- **data**: ``(integer, boolean)`` + + The **data** is a 2-tuple. The first element is the data pointer + as a Python ``int`` (or ``long``). The data must be device-accessible. + For zero-size arrays, use ``0`` here. + The second element is the read-only flag as a Python ``bool``. + + Because the user of the interface may or may not be in the same context, + the most common case is to use ``cuPointerGetAttribute`` with + ``CU_POINTER_ATTRIBUTE_DEVICE_POINTER`` in the CUDA driver API (or the + equivalent CUDA Runtime API) to retrieve a device pointer that + is usable in the currently active context. + +- **version**: ``integer`` + + An integer for the version of the interface being exported. + The current version is *3*. + + +The following are optional entries: + +- **strides**: ``None`` or ``(integer, ...)`` + + If **strides** is not given, or it is ``None``, the array is in + C-contiguous layout. Otherwise, a tuple of ``int`` (or ``long``) is explicitly + given for representing the number of bytes to skip to access the next + element at each dimension. + +- **descr** + + This is for describing more complicated types. This follows the same + specification as in the `NumPy array interface`_. + +- **mask**: ``None`` or object exposing the ``__cuda_array_interface__`` + + If ``None`` then all values in **data** are valid. All elements of the mask + array should be interpreted only as true or not true indicating which + elements of this array are valid. This has the same definition as ``mask`` + in the `NumPy array interface`_. + + .. note:: Numba does not currently support working with masked CUDA arrays + and will raise a ``NotImplementedError`` exception if one is passed + to a GPU function. + +- **stream**: ``None`` or ``integer`` + + An optional stream upon which synchronization must take place at the point of + consumption, either by synchronizing on the stream or enqueuing operations on + the data on the given stream. Integer values in this entry are as follows: + + - ``0``: This is disallowed as it would be ambiguous between ``None`` and the + default stream, and also between the legacy and per-thread default streams. + Any use case where ``0`` might be given should either use ``None``, ``1``, + or ``2`` instead for clarity. + - ``1``: The legacy default stream. + - ``2``: The per-thread default stream. + - Any other integer: a ``cudaStream_t`` represented as a Python integer. + + When ``None``, no synchronization is required. See the + :ref:`cuda-array-interface-synchronization` section below for further details. + + In a future revision of the interface, this entry may be expanded (or another + entry added) so that an event to synchronize on can be specified instead of a + stream. + + +.. _cuda-array-interface-synchronization: + +Synchronization +--------------- + +Definitions +~~~~~~~~~~~ + +When discussing synchronization, the following definitions are used: + +- *Producer*: The library / object on which ``__cuda_array_interface__`` is + accessed. +- *Consumer*: The library / function that accesses the + ``__cuda_array_interface__`` of the Producer. +- *User Code*: Code that induces a Producer and Consumer to share data through + the CAI. +- *User*: The person writing or maintaining the User Code. The User may + implement User Code without knowledge of the CAI, since the CAI accesses can + be hidden from their view. + +In the following example: + +.. code-block:: python + + import cupy + from numba import cuda + + @cuda.jit + def add(x, y, out): + start = cuda.grid(1) + stride = cuda.gridsize(1) + for i in range(start, x.shape[0], stride): + out[i] = x[i] + y[i] + + a = cupy.arange(10) + b = a * 2 + out = cupy.zeros_like(a) + + add[1, 32](a, b, out) + +When the ``add`` kernel is launched: + +- ``a``, ``b``, ``out`` are Producers. +- The ``add`` kernel is the Consumer. +- The User Code is specifically ``add[1, 32](a, b, out)``. +- The author of the code is the User. + + +Design Motivations +~~~~~~~~~~~~~~~~~~ + +Elements of the CAI design related to synchronization seek to fulfill these +requirements: + +1. Producers and Consumers that exchange data through the CAI must be able to do + so without data races. +2. Requirement 1 should be met without requiring the user to be + aware of any particulars of the CAI - in other words, exchanging data between + Producers and Consumers that operate on data asynchronously should be correct + by default. + + - An exception to this requirement is made for Producers and Consumers that + explicitly document that the User is required to take additional steps to + ensure correctness with respect to synchronization. In this case, Users + are required to understand the details of the CUDA Array Interface, and + the Producer/Consumer library documentation must specify the steps that + Users are required to take. + + Use of this exception should be avoided where possible, as it is provided + for libraries that cannot implement the synchronization semantics without + the involvement of the User - for example, those interfacing with + third-party libraries oblivious to the CUDA Array Interface. + +3. Where the User is aware of the particulars of the CAI and implementation + details of the Producer and Consumer, they should be able to, at their + discretion, override some of the synchronization semantics of the interface + to reduce the synchronization overhead. Overriding synchronization semantics + implies that: + + - The CAI design, and the design and implementation of the Producer and + Consumer do not specify or guarantee correctness with respect to data + races. + - Instead, the User is responsible for ensuring correctness with respect to + data races. + + +Interface Requirements +~~~~~~~~~~~~~~~~~~~~~~ + +The ``stream`` entry enables Producers and Consumers to avoid hazards when +exchanging data. Expected behaviour of the Consumer is as follows: + +* When ``stream`` is not present or is ``None``: + + - No synchronization is required on the part of the Consumer. + - The Consumer may enqueue operations on the underlying data immediately on + any stream. + +* When ``stream`` is an integer, its value indicates the stream on which the + Producer may have in-progress operations on the data, and which the Consumer + is expected to either: + + - Synchronize on before accessing the data, or + - Enqueue operations in when accessing the data. + + The Consumer can choose which mechanism to use, with the following + considerations: + + - If the Consumer synchronizes on the provided stream prior to accessing the + data, then it must ensure that no computation can take place in the provided + stream until its operations in its own choice of stream have taken place. + This could be achieved by either: + + - Placing a wait on an event in the provided stream that occurs once all + of the Consumer's operations on the data are completed, or + - Avoiding returning control to the user code until after its operations + on its own stream have completed. + + - If the consumer chooses to only enqueue operations on the data in the + provided stream, then it may return control to the User code immediately + after enqueueing its work, as the work will all be serialized on the + exported array's stream. This is sufficient to ensure correctness even if + the User code were to induce the Producer to subsequently start enqueueing + more work on the same stream. + +* If the User has set the Consumer to ignore CAI synchronization semantics, the + Consumer may assume it can operate on the data immediately in any stream with + no further synchronization, even if the ``stream`` member has an integer + value. + + +When exporting an array through the CAI, Producers must ensure that: + +* If there is work on the data enqueued in one or more streams, then + synchronization on the provided ``stream`` is sufficient to ensure + synchronization with all pending work. + + - If the Producer has no enqueued work, or work only enqueued on the stream + identified by ``stream``, then this condition is met. + - If the Producer has enqueued work on the data on multiple streams, then it + must enqueue events on those streams that follow the enqueued work, and + then wait on those events in the provided ``stream``. For example: + + 1. Work is enqueued by the Producer on streams ``7``, ``9``, and ``15``. + 2. Events are then enqueued on each of streams ``7``, ``9``, and ``15``. + 3. Producer then tells stream ``3`` to wait on the events from Step 2, and + the ``stream`` entry is set to ``3``. + +* If there is no work enqueued on the data, then the ``stream`` entry may be + either ``None``, or not provided. + +Optionally, to facilitate the User relaxing conformance to synchronization +semantics: + +* Producers may provide a configuration option to always set ``stream`` to + ``None``. +* Consumers may provide a configuration option to ignore the value of ``stream`` + and act as if it were ``None`` or not provided. This elides synchronization + on the Producer-provided streams, and allows enqueuing work on streams other + than that provided by the Producer. + +These options should not be set by default in either a Producer or a Consumer. +The CAI specification does not prescribe the exact mechanism by which these +options are set, or related options that Producers or Consumers might provide +to allow the user further control over synchronization behavior. + + +Synchronization in Numba +~~~~~~~~~~~~~~~~~~~~~~~~ + +Numba is neither strictly a Producer nor a Consumer - it may be used to +implement either by a User. In order to facilitate the correct implementation of +synchronization semantics, Numba exhibits the following behaviors related to +synchronization of the interface: + +- When Numba acts as a Consumer (for example when an array-like object is passed + to a kernel launch): If ``stream`` is an integer, then Numba will immediately + synchronize on the provided ``stream``. A Numba :class:`Device Array + ` created from an array-like + object has its *default stream* set to the provided stream. + +- When Numba acts as a Producer (when the ``__cuda_array_interface__`` property + of a Numba CUDA Array is accessed): If the exported CUDA Array has a + *default stream*, then it is given as the ``stream`` entry. Otherwise, + ``stream`` is set to ``None``. + +.. note:: In Numba's terminology, an array's *default stream* is a property + specifying the stream that Numba will enqueue asynchronous + transfers in if no other stream is provided as an argument to the + function invoking the transfer. It is not the same as the `Default + Stream + `_ + in normal CUDA terminology. + +Numba's synchronization behavior results in the following intended +consequences: + +- Exchanging data either as a Producer or a Consumer will be correct without + the need for any further action from the User, provided that the other side + of the interaction also follows the CAI synchronization semantics. +- The User is expected to either: + + - Avoid launching kernels or other operations on streams that + are not the default stream for their parameters, or + - When launching operations on a stream that is not the default stream for + a given parameter, they should then insert an event into the stream that + they are operating in, and wait on that event in the default stream for + the parameter. For an example of this, :ref:`see below + `. + +The User may override Numba's synchronization behavior by setting the +environment variable ``NUMBA_CUDA_ARRAY_INTERFACE_SYNC`` or the config variable +``CUDA_ARRAY_INTERFACE_SYNC`` to ``0`` (see :ref:`GPU Support Environment +Variables `). When set, Numba will not synchronize +on the streams of imported arrays, and it is the responsibility of the user to +ensure correctness with respect to stream synchronization. Synchronization when +creating a Numba CUDA Array from an object exporting the CUDA Array Interface +may also be elided by passing ``sync=False`` when creating the Numba CUDA +Array with :func:`numba.cuda.as_cuda_array` or +:func:`numba.cuda.from_cuda_array_interface`. + +There is scope for Numba's synchronization implementation to be optimized in +the future, by eliding synchronizations when a kernel or driver API operation +(e.g. a memcopy or memset) is launched on the same stream as an imported +array. + + +.. _example-multi-streams: + +An example launching on an array's non-default stream +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This example shows how to ensure that a Consumer can safely consume an array +with a default stream when it is passed to a kernel launched in a different +stream. + +First we need to import Numba and a consumer library (a fictitious library named +``other_cai_library`` for this example): + +.. code-block:: python + + from numba import cuda, int32, void + import other_cai_library + +Now we'll define a kernel - this initializes the elements of the array, setting +each entry to its index: + +.. code-block:: python + + @cuda.jit(void, int32[::1]) + def initialize_array(x): + i = cuda.grid(1) + if i < len(x): + x[i] = i + +Next we will create two streams: + +.. code-block:: python + + array_stream = cuda.stream() + kernel_stream = cuda.stream() + +Then create an array with one of the streams as its default stream: + +.. code-block:: python + + N = 16384 + x = cuda.device_array(N, stream=array_stream) + +Now we launch the kernel in the other stream: + +.. code-block:: python + + nthreads = 256 + nblocks = N // nthreads + + initialize_array[nthreads, nblocks, kernel_stream](x) + +If we were to pass ``x`` to a Consumer now, there is a risk that it may operate on +it in ``array_stream`` whilst the kernel is still running in ``kernel_stream``. +To prevent operations in ``array_stream`` starting before the kernel launch is +finished, we create an event and wait on it: + +.. code-block:: python + + # Create event + evt = cuda.event() + # Record the event after the kernel launch in kernel_stream + evt.record(kernel_stream) + # Wait for the event in array_stream + evt.wait(array_stream) + +It is now safe for ``other_cai_library`` to consume ``x``: + +.. code-block:: python + + other_cai_library.consume(x) + + +Lifetime management +------------------- + +Data +~~~~ + +Obtaining the value of the ``__cuda_array_interface__`` property of any object +has no effect on the lifetime of the object from which it was created. In +particular, note that the interface has no slot for the owner of the data. + +The User code must preserve the lifetime of the object owning the data for as +long as the Consumer might use it. + + +Streams +~~~~~~~ + +Like data, CUDA streams also have a finite lifetime. It is therefore required +that a Producer exporting data on the interface with an associated stream +ensures that the exported stream's lifetime is equal to or surpasses the +lifetime of the object from which the interface was exported. + + +Lifetime management in Numba +---------------------------- + +Producing Arrays +~~~~~~~~~~~~~~~~ + +Numba takes no steps to maintain the lifetime of an object from which the +interface is exported - it is the user's responsibility to ensure that the +underlying object is kept alive for the duration that the exported interface +might be used. + +The lifetime of any Numba-managed stream exported on the interface is guaranteed +to equal or surpass the lifetime of the underlying object, because the +underlying object holds a reference to the stream. + +.. note:: Numba-managed streams are those created with + ``cuda.default_stream()``, ``cuda.legacy_default_stream()``, or + ``cuda.per_thread_default_stream()``. Streams not managed by Numba + are created from an external stream with ``cuda.external_stream()``. + + +Consuming Arrays +~~~~~~~~~~~~~~~~ + +Numba provides two mechanisms for creating device arrays from objects exporting +the CUDA Array Interface. Which to use depends on whether the created device +array should maintain the life of the object from which it is created: + +- ``as_cuda_array``: This creates a device array that holds a reference to the + owning object. As long as a reference to the device array is held, its + underlying data will also be kept alive, even if all other references to the + original owning object have been dropped. +- ``from_cuda_array_interface``: This creates a device array with no reference + to the owning object by default. The owning object, or some other object to + be considered the owner can be passed in the ``owner`` parameter. + +The interfaces of these functions are: + +.. automethod:: numba.cuda.as_cuda_array + +.. automethod:: numba.cuda.from_cuda_array_interface + + +Pointer Attributes +------------------ + +Additional information about the data pointer can be retrieved using +``cuPointerGetAttribute`` or ``cudaPointerGetAttributes``. Such information +include: + +- the CUDA context that owns the pointer; +- is the pointer host-accessible? +- is the pointer a managed memory? + + +.. _NumPy array interface: https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.interface.html#__array_interface__ + + +Differences with CUDA Array Interface (Version 0) +------------------------------------------------- + +Version 0 of the CUDA Array Interface did not have the optional **mask** +attribute to support masked arrays. + + +Differences with CUDA Array Interface (Version 1) +------------------------------------------------- + +Versions 0 and 1 of the CUDA Array Interface neither clarified the +**strides** attribute for C-contiguous arrays nor specified the treatment for +zero-size arrays. + + +Differences with CUDA Array Interface (Version 2) +------------------------------------------------- + +Prior versions of the CUDA Array Interface made no statement about +synchronization. + + +Interoperability +---------------- + +The following Python libraries have adopted the CUDA Array Interface: + +- Numba +- `CuPy `_ +- `PyTorch `_ +- `PyArrow `_ +- `mpi4py `_ +- `ArrayViews `_ +- `JAX `_ +- `PyCUDA `_ +- `DALI: the NVIDIA Data Loading Library `_ : + + - `TensorGPU objects + `_ + expose the CUDA Array Interface. + - `The External Source operator + `_ + consumes objects exporting the CUDA Array Interface. +- The RAPIDS stack: + + - `cuDF `_ + - `cuML `_ + - `cuSignal `_ + - `RMM `_ + +If your project is not on this list, please feel free to report it on the `Numba issue tracker `_. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/cuda_ffi.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/cuda_ffi.rst new file mode 100644 index 000000000..1ee441254 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/cuda_ffi.rst @@ -0,0 +1,158 @@ + +.. _cuda_ffi: + +Calling foreign functions from Python kernels +============================================= + +Python kernels can call device functions written in other languages. CUDA C/C++, +PTX, and binary objects (cubins, fat binaries, etc.) are directly supported; +sources in other languages must be compiled to PTX first. The constituent parts +of a Python kernel call to a foreign device function are: + +- The device function implementation in a foreign language (e.g. CUDA C). +- A declaration of the device function in Python. +- A kernel that links with and calls the foreign function. + + +Device function ABI +------------------- + +Numba's ABI for calling device functions defines the following prototype in +C/C++: + +.. code:: C + + extern "C" + __device__ int + function( + T* return_value, + ... + ); + + +Components of the prototype are as follows: + +- ``extern "C"`` is used to prevent name-mangling so that it is easy to declare + the function in Python. It can be removed, but then the mangled name must be + used in the declaration of the function in Python. +- ``__device__`` is required to define the function as a device function. +- The return value is always of type ``int``, and is used to signal whether a + Python exception occurred. Since Python exceptions don't occur in foreign + functions, this should always be set to 0 by the callee. +- The first argument is a pointer to the return value of type ``T``, which is + allocated in the local address space [#f1]_ and passed in by the caller. If + the function returns a value, the pointee should be set by the callee to + store the return value. +- Subsequent arguments should match the types and order of arguments passed to + the function from the Python kernel. + +Functions written in other languages must compile to PTX that conforms to this +prototype specification. + +A function that accepts two floats and returns a float would have the following +prototype: + +.. code:: C + + extern "C" + __device__ int + mul_f32_f32( + float* return_value, + float x, + float y + ) + +.. rubric:: Notes + +.. [#f1] Care must be taken to ensure that any operations on the return value + are applicable to data in the local address space. Some operations, + such as atomics, cannot be performed on data in the local address + space. + +Declaration in Python +--------------------- + +To declare a foreign device function in Python, use :func:`declare_device() +`: + +.. autofunction:: numba.cuda.declare_device + +The returned descriptor name need not match the name of the foreign function. +For example, when: + +.. code:: + + mul = cuda.declare_device('mul_f32_f32', 'float32(float32, float32)') + +is declared, calling ``mul(a, b)`` inside a kernel will translate into a call to +``mul_f32_f32(a, b)`` in the compiled code. + + +Linking and Calling functions +----------------------------- + +The ``link`` keyword argument of the :func:`@cuda.jit ` +decorator accepts a list of file names specified by absolute path or a path +relative to the current working directory. Files whose name ends in ``.cu`` +will be compiled with the `NVIDIA Runtime Compiler (NVRTC) +`_ and linked into the kernel as +PTX; other files will be passed directly to the CUDA Linker. + +For example, the following kernel calls the ``mul()`` function declared above +with the implementation ``mul_f32_f32()`` in a file called ``functions.cu``: + +.. code:: + + @cuda.jit(link=['functions.cu']) + def multiply_vectors(r, x, y): + i = cuda.grid(1) + + if i < len(r): + r[i] = mul(x[i], y[i]) + + +C/C++ Support +------------- + +Support for compiling and linking of CUDA C/C++ code is provided through the use +of NVRTC subject to the following considerations: + +- It is only available when using the NVIDIA Bindings. See + :envvar:`NUMBA_CUDA_USE_NVIDIA_BINDING`. +- A suitable version of the NVRTC library for the installed version of the + NVIDIA CUDA Bindings must be available. +- The CUDA include path is assumed by default to be ``/usr/local/cuda/include`` + on Linux and ``$env:CUDA_PATH\include`` on Windows. It can be modified using + the environment variable :envvar:`NUMBA_CUDA_INCLUDE_PATH`. +- The CUDA include directory will be made available to NVRTC on the include + path; additional includes are not supported. + + +Complete Example +---------------- + +This example demonstrates calling a foreign function written in CUDA C to +multiply pairs of numbers from two arrays. + +The foreign function is written as follows: + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/ffi/functions.cu + :language: C + :caption: ``numba/cuda/tests/doc_examples/ffi/functions.cu`` + :linenos: + +The Python code and kernel are: + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_ffi.py + :language: python + :caption: from ``test_ex_linking_cu`` in ``numba/cuda/tests/doc_examples/test_ffi.py`` + :start-after: magictoken.ex_linking_cu.begin + :end-before: magictoken.ex_linking_cu.end + :dedent: 8 + :linenos: + +.. note:: + + The example above is minimal in order to illustrate a foreign function call - + it would not be expected to be particularly performant due to the small grid + and light workload of the foreign function. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/cudapysupported.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/cudapysupported.rst new file mode 100644 index 000000000..d5dc5a790 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/cudapysupported.rst @@ -0,0 +1,296 @@ +======================================== +Supported Python features in CUDA Python +======================================== + +This page lists the Python features supported in the CUDA Python. This includes +all kernel and device functions compiled with ``@cuda.jit`` and other higher +level Numba decorators that targets the CUDA GPU. + +Language +======== + +Execution Model +--------------- + +CUDA Python maps directly to the *single-instruction multiple-thread* +execution (SIMT) model of CUDA. Each instruction is implicitly +executed by multiple threads in parallel. With this execution model, array +expressions are less useful because we don't want multiple threads to perform +the same task. Instead, we want threads to perform a task in a cooperative +fashion. + +For details please consult the +`CUDA Programming Guide +`_. + +Floating Point Error Model +-------------------------- + +By default, CUDA Python kernels execute with the NumPy error model. In this +model, division by zero raises no exception and instead produces a result of +``inf``, ``-inf`` or ``nan``. This differs from the normal Python error model, +in which division by zero raises a ``ZeroDivisionError``. + +When debug is enabled (by passing ``debug=True`` to the +:func:`@cuda.jit ` decorator), the Python error model is used. +This allows division-by-zero errors during kernel execution to be identified. + +Constructs +---------- + +The following Python constructs are not supported: + +* Exception handling (``try .. except``, ``try .. finally``) +* Context management (the ``with`` statement) +* Comprehensions (either list, dict, set or generator comprehensions) +* Generator (any ``yield`` statements) + +The ``raise`` and ``assert`` statements are supported, with the following +constraints: + +- They can only be used in kernels, not in device functions. +- They only have an effect when ``debug=True`` is passed to the + :func:`@cuda.jit ` decorator. This is similar to the behavior + of the ``assert`` keyword in CUDA C/C++, which is ignored unless compiling + with device debug turned on. + + +Printing of strings, integers, and floats is supported, but printing is an +asynchronous operation - in order to ensure that all output is printed after a +kernel launch, it is necessary to call :func:`numba.cuda.synchronize`. Eliding +the call to ``synchronize`` is acceptable, but output from a kernel may appear +during other later driver operations (e.g. subsequent kernel launches, memory +transfers, etc.), or fail to appear before the program execution completes. Up +to 32 arguments may be passed to the ``print`` function - if more are passed +then a format string will be emitted instead and a warning will be produced. +This is due to a general limitation in CUDA printing, as outlined in the +`section on limitations in printing +`_ +in the CUDA C++ Programming Guide. + + +Recursion +--------- + +Self-recursive device functions are supported, with the constraint that +recursive calls must have the same argument types as the initial call to +the function. For example, the following form of recursion is supported: + +.. code:: python + + @cuda.jit("int64(int64)", device=True) + def fib(n): + if n < 2: + return n + return fib(n - 1) + fib(n - 2) + +(the ``fib`` function always has an ``int64`` argument), whereas the following +is unsupported: + +.. code:: python + + # Called with x := int64, y := float64 + @cuda.jit + def type_change_self(x, y): + if x > 1 and y > 0: + return x + type_change_self(x - y, y) + else: + return y + +The outer call to ``type_change_self`` provides ``(int64, float64)`` arguments, +but the inner call uses ``(float64, float64)`` arguments (because ``x - y`` / +``int64 - float64`` results in a ``float64`` type). Therefore, this function is +unsupported. + +Mutual recursion between functions (e.g. where a function ``func1()`` calls +``func2()`` which again calls ``func1()``) is unsupported. + +.. note:: + + The call stack in CUDA is typically quite limited in size, so it is easier + to overflow it with recursive calls on CUDA devices than it is on CPUs. + + Stack overflow will result in an Unspecified Launch Failure (ULF) during + kernel execution. In order to identify whether a ULF is due to stack + overflow, programs can be run under `Compute Sanitizer + `_, + which explicitly states when stack overflow has occurred. + +.. _cuda-built-in-types: + +Built-in types +=============== + +The following built-in types support are inherited from CPU nopython mode. + +* int +* float +* complex +* bool +* None +* tuple +* Enum, IntEnum + +See :ref:`nopython built-in types `. + +There is also some very limited support for character sequences (bytes and +unicode strings) used in NumPy arrays. Note that this support can only be used +with CUDA 11.2 onwards. + +Built-in functions +================== + +The following built-in functions are supported: + +* :func:`abs` +* :class:`bool` +* :class:`complex` +* :func:`enumerate` +* :class:`float` +* :class:`int`: only the one-argument form +* :func:`len` +* :func:`min`: only the multiple-argument form +* :func:`max`: only the multiple-argument form +* :func:`pow` +* :class:`range` +* :func:`round` +* :func:`zip` + + +Standard library modules +======================== + + +``cmath`` +--------- + +The following functions from the :mod:`cmath` module are supported: + +* :func:`cmath.acos` +* :func:`cmath.acosh` +* :func:`cmath.asin` +* :func:`cmath.asinh` +* :func:`cmath.atan` +* :func:`cmath.atanh` +* :func:`cmath.cos` +* :func:`cmath.cosh` +* :func:`cmath.exp` +* :func:`cmath.isfinite` +* :func:`cmath.isinf` +* :func:`cmath.isnan` +* :func:`cmath.log` +* :func:`cmath.log10` +* :func:`cmath.phase` +* :func:`cmath.polar` +* :func:`cmath.rect` +* :func:`cmath.sin` +* :func:`cmath.sinh` +* :func:`cmath.sqrt` +* :func:`cmath.tan` +* :func:`cmath.tanh` + +``math`` +-------- + +The following functions from the :mod:`math` module are supported: + +* :func:`math.acos` +* :func:`math.asin` +* :func:`math.atan` +* :func:`math.acosh` +* :func:`math.asinh` +* :func:`math.atanh` +* :func:`math.cos` +* :func:`math.sin` +* :func:`math.tan` +* :func:`math.hypot` +* :func:`math.cosh` +* :func:`math.sinh` +* :func:`math.tanh` +* :func:`math.atan2` +* :func:`math.erf` +* :func:`math.erfc` +* :func:`math.exp` +* :func:`math.expm1` +* :func:`math.fabs` +* :func:`math.frexp` +* :func:`math.ldexp` +* :func:`math.gamma` +* :func:`math.lgamma` +* :func:`math.log` +* :func:`math.log2` +* :func:`math.log10` +* :func:`math.log1p` +* :func:`math.sqrt` +* :func:`math.remainder`: Python 3.7+ +* :func:`math.pow` +* :func:`math.ceil` +* :func:`math.floor` +* :func:`math.copysign` +* :func:`math.fmod` +* :func:`math.modf` +* :func:`math.isnan` +* :func:`math.isinf` +* :func:`math.isfinite` + + +``operator`` +------------ + +The following functions from the :mod:`operator` module are supported: + +* :func:`operator.add` +* :func:`operator.and_` +* :func:`operator.eq` +* :func:`operator.floordiv` +* :func:`operator.ge` +* :func:`operator.gt` +* :func:`operator.iadd` +* :func:`operator.iand` +* :func:`operator.ifloordiv` +* :func:`operator.ilshift` +* :func:`operator.imod` +* :func:`operator.imul` +* :func:`operator.invert` +* :func:`operator.ior` +* :func:`operator.ipow` +* :func:`operator.irshift` +* :func:`operator.isub` +* :func:`operator.itruediv` +* :func:`operator.ixor` +* :func:`operator.le` +* :func:`operator.lshift` +* :func:`operator.lt` +* :func:`operator.mod` +* :func:`operator.mul` +* :func:`operator.ne` +* :func:`operator.neg` +* :func:`operator.not_` +* :func:`operator.or_` +* :func:`operator.pos` +* :func:`operator.pow` +* :func:`operator.rshift` +* :func:`operator.sub` +* :func:`operator.truediv` +* :func:`operator.xor` + + +NumPy support +============= + +Due to the CUDA programming model, dynamic memory allocation inside a kernel is +inefficient and is often not needed. Numba disallows any memory allocating features. +This disables a large number of NumPy APIs. For best performance, users should write +code such that each thread is dealing with a single element at a time. + +Supported NumPy features: + +* accessing `ndarray` attributes `.shape`, `.strides`, `.ndim`, `.size`, etc.. +* scalar ufuncs that have equivalents in the `math` module; i.e. ``np.sin(x[0])``, where x is a 1D array. +* indexing and slicing works. + +Unsupported NumPy features: + +* array creation APIs. +* array methods. +* functions that returns a new array. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/device-functions.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/device-functions.rst new file mode 100644 index 000000000..4fba8c66f --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/device-functions.rst @@ -0,0 +1,15 @@ + +Writing Device Functions +======================== + +CUDA device functions can only be invoked from within the device (by a kernel +or another device function). To define a device function:: + + from numba import cuda + + @cuda.jit(device=True) + def a_device_function(a, b): + return a + b + +Unlike a kernel function, a device function can return a value like normal +functions. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/device-management.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/device-management.rst new file mode 100644 index 000000000..8f9beb4db --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/device-management.rst @@ -0,0 +1,92 @@ + +Device management +================= + +For multi-GPU machines, users may want to select which GPU to use. +By default the CUDA driver selects the fastest GPU as the device 0, +which is the default device used by Numba. + +The features introduced on this page are generally not of interest +unless working with systems hosting/offering more than one CUDA-capable GPU. + +Device Selection +---------------- + +If at all required, device selection must be done before any CUDA feature is +used. + +:: + + from numba import cuda + cuda.select_device(0) + +The device can be closed by: + +:: + + cuda.close() + +Users can then create a new context with another device. + +:: + + cuda.select_device(1) # assuming we have 2 GPUs + + +.. function:: numba.cuda.select_device(device_id) + :noindex: + + Create a new CUDA context for the selected *device_id*. *device_id* + should be the number of the device (starting from 0; the device order + is determined by the CUDA libraries). The context is associated with + the current thread. Numba currently allows only one context per thread. + + If successful, this function returns a device instance. + + .. XXX document device instances? + + +.. function:: numba.cuda.close + :noindex: + + Explicitly close all contexts in the current thread. + + .. note:: + Compiled functions are associated with the CUDA context. + This makes it not very useful to close and create new devices, though it + is certainly useful for choosing which device to use when the machine + has multiple GPUs. + +The Device List +=============== + +The Device List is a list of all the GPUs in the system, and can be indexed to +obtain a context manager that ensures execution on the selected GPU. + +.. attribute:: numba.cuda.gpus + :noindex: +.. attribute:: numba.cuda.cudadrv.devices.gpus + +:py:data:`numba.cuda.gpus` is an instance of the ``_DeviceList`` class, from +which the current GPU context can also be retrieved: + +.. autoclass:: numba.cuda.cudadrv.devices._DeviceList + :members: current + :noindex: + + +Device UUIDs +============ + +The UUID of a device (equal to that returned by ``nvidia-smi -L``) is available +in the :attr:`uuid ` attribute of a CUDA +device object. + +For example, to obtain the UUID of the current device: + +.. code-block:: python + + dev = cuda.current_context().device + # prints e.g. "GPU-e6489c45-5b68-3b03-bab7-0e7c8e809643" + print(dev.uuid) + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/examples.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/examples.rst new file mode 100644 index 000000000..793d13ba2 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/examples.rst @@ -0,0 +1,527 @@ + +======== +Examples +======== + +.. _cuda-vecadd: + +Vector Addition +=============== +This example uses Numba to create on-device arrays and a vector addition kernel; +it is a warmup for learning how to write GPU kernels using Numba. We'll begin +with some required imports: + + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_vecadd.py + :language: python + :caption: from ``test_ex_vecadd`` in ``numba/cuda/tests/doc_examples/test_vecadd.py`` + :start-after: ex_vecadd.import.begin + :end-before: ex_vecadd.import.end + :dedent: 8 + :linenos: + +The following function is the kernel. Note that it is defined in terms of Python +variables with unspecified types. When the kernel is launched, Numba will +examine the types of the arguments that are passed at runtime and generate a +CUDA kernel specialized for them. + +Note that Numba kernels do not return values and must write any output into +arrays passed in as parameters (this is similar to the requirement that CUDA +C/C++ kernels have ``void`` return type). Here we pass in ``c`` for the results +to be written into. + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_vecadd.py + :language: python + :caption: from ``test_ex_vecadd`` in ``numba/cuda/tests/doc_examples/test_vecadd.py`` + :start-after: ex_vecadd.kernel.begin + :end-before: ex_vecadd.kernel.end + :dedent: 8 + :linenos: + +:func:`cuda.to_device() ` can be used create device-side +copies of arrays. :func:`cuda.device_array_like() +` creates an uninitialized array of the same shape +and type as an existing array. Here we transfer two vectors and create an empty +vector to hold our results: + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_vecadd.py + :language: python + :caption: from ``test_ex_vecadd`` in ``numba/cuda/tests/doc_examples/test_vecadd.py`` + :start-after: ex_vecadd.allocate.begin + :end-before: ex_vecadd.allocate.end + :dedent: 8 + :linenos: + +A call to :meth:`forall() ` generates +an appropriate launch configuration with a 1D grid (see +:ref:`cuda-kernel-invocation`) for a given data size and is often the simplest +way of launching a kernel: + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_vecadd.py + :language: python + :caption: from ``test_ex_vecadd`` in ``numba/cuda/tests/doc_examples/test_vecadd.py`` + :start-after: ex_vecadd.forall.begin + :end-before: ex_vecadd.forall.end + :dedent: 8 + :linenos: + +This prints: + +.. code-block:: none + + [0.73548323 1.32061059 0.12582968 ... 1.25925809 1.49335059 1.59315414] + +One can also configure the grid manually using the subscripting syntax. The +following example launches a grid with sufficient threads to operate on every +vector element: + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_vecadd.py + :language: python + :caption: from ``test_ex_vecadd`` in ``numba/cuda/tests/doc_examples/test_vecadd.py`` + :start-after: ex_vecadd.launch.begin + :end-before: ex_vecadd.launch.end + :dedent: 8 + :linenos: + +This also prints: + +.. code-block:: none + + [0.73548323 1.32061059 0.12582968 ... 1.25925809 1.49335059 1.59315414] + +.. _cuda-laplace: + +1D Heat Equation +===================== +This example solves Laplace's equation in one dimension for a certain set of initial +conditions and boundary conditions. A full discussion of Laplace's equation is out of +scope for this documentation, but it will suffice to say that it describes how heat +propagates through an object over time. It works by discretizing the problem in two ways: + +1. The domain is partitioned into a mesh of points that each have an individual temperature. +2. Time is partitioned into discrete intervals that are advanced forward sequentially. + +Then, the following assumption is applied: The temperature of a point after some interval +has passed is some weighted average of the temperature of the points that are directly +adjacent to it. Intuitively, if all the points in the domain are very hot +and a single point in the middle is very cold, as time passes, the hot points will cause +the cold one to heat up and the cold point will cause the surrounding hot pieces to cool +slightly. Simply put, the heat spreads throughout the object. + +We can implement this simulation using a Numba kernel. Let's start simple by assuming +we have a one dimensional object which we'll represent with an array of values. The position +of the element in the array is the position of a point within the object, and the value +of the element represents the temperature. + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_laplace.py + :language: python + :caption: from ``test_ex_laplace`` in ``numba/cuda/tests/doc_examples/test_laplace.py`` + :start-after: ex_laplace.import.begin + :end-before: ex_laplace.import.end + :dedent: 8 + :linenos: + + +Some initial setup here. Let's make one point in the center of the object very hot. + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_laplace.py + :language: python + :caption: from ``test_ex_laplace`` in ``numba/cuda/tests/doc_examples/test_laplace.py`` + :start-after: ex_laplace.allocate.begin + :end-before: ex_laplace.allocate.end + :dedent: 8 + :linenos: + +The initial state of the problem can be visualized as: + +.. image:: laplace_initial.svg + +In our kernel each thread will be responsible for managing the temperature update for a single element +in a loop over the desired number of timesteps. The kernel is below. Note the use of cooperative group +synchronization and the use of two buffers swapped at each iteration to avoid race conditions. See +:func:`numba.cuda.cg.this_grid() ` for details. + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_laplace.py + :language: python + :caption: from ``test_ex_laplace`` in ``numba/cuda/tests/doc_examples/test_laplace.py`` + :start-after: ex_laplace.kernel.begin + :end-before: ex_laplace.kernel.end + :dedent: 8 + :linenos: + + +Calling the kernel: + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_laplace.py + :language: python + :caption: from ``test_ex_laplace`` in ``numba/cuda/tests/doc_examples/test_laplace.py`` + :start-after: ex_laplace.launch.begin + :end-before: ex_laplace.launch.end + :dedent: 8 + :linenos: + + +Plotting the final data shows an arc that is highest where +the object was hot initially and gradually sloping down to zero towards the +edges where the temperature is fixed at zero. In the limit of infinite time, +the arc will flatten out completely. + +.. image:: laplace_final.svg + +.. _cuda_reduction_shared: + +Shared Memory Reduction +======================= +Numba exposes many CUDA features, including :ref:`shared memory +`. To demonstrate shared memory, let's reimplement a +famous CUDA solution for summing a vector which works by "folding" the data up +using a successively smaller number of threads. + + +Note that this is a fairly naive implementation, and there are more efficient ways of implementing reductions +using Numba - see :ref:`cuda_montecarlo` for an example. + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_reduction.py + :language: python + :caption: from ``test_ex_reduction`` in ``numba/cuda/tests/doc_examples/test_reduction.py`` + :start-after: ex_reduction.import.begin + :end-before: ex_reduction.import.end + :dedent: 8 + :linenos: + +Let's create some one dimensional data that we'll use to demonstrate the +kernel itself: + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_reduction.py + :language: python + :caption: from ``test_ex_reduction`` in ``numba/cuda/tests/doc_examples/test_reduction.py`` + :start-after: ex_reduction.allocate.begin + :end-before: ex_reduction.allocate.end + :dedent: 8 + :linenos: + + +Here is a version of the kernel implemented using Numba: + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_reduction.py + :language: python + :caption: from ``test_ex_reduction`` in ``numba/cuda/tests/doc_examples/test_reduction.py`` + :start-after: ex_reduction.kernel.begin + :end-before: ex_reduction.kernel.end + :dedent: 8 + :linenos: + +We can run kernel and verify that the same result is obtained through +summing data on the host as follows: + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_reduction.py + :language: python + :caption: from ``test_ex_reduction`` in ``numba/cuda/tests/doc_examples/test_reduction.py`` + :start-after: ex_reduction.launch.begin + :end-before: ex_reduction.launch.end + :dedent: 8 + :linenos: + +This algorithm can be greatly improved upon by redesigning the inner loop +to use sequential memory accesses, and even further by using strategies that +keep more threads active and working, since in this example most threads quickly +become idle. + +.. _cuda_sessionization: + +Dividing Click Data into Sessions +================================= + + +A common problem in business analytics is that of grouping the activity of users of an online platform into +sessions, called "sessionization". The idea is that users generally traverse through a website and perform +various actions (clicking something, filling out a form, etc.) in discrete groups. Perhaps a customer spends +some time shopping for an item in the morning and then again at night - often the business is interested in +treating these periods as separate interactions with their service, and this creates the problem of +programmatically splitting up activity in some agreed-upon way. + +Here we'll illustrate how to write a Numba kernel to solve this problem. We'll start with data +containing two fields: let ``user_id`` represent a unique ID corresponding to an individual customer, and let +``action_time`` be a time that some unknown action was taken on the service. Right now, we'll assume there's +only one type of action, so all there is to know is when it happened. + +Our goal will be to create a new column called ``session_id``, which contains a label corresponding to a unique +session. We'll define the boundary between sessions as when there has been at least one hour between clicks. + + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_sessionize.py + :language: python + :caption: from ``test_ex_sessionize`` in ``numba/cuda/tests/doc_examples/test_sessionize.py`` + :start-after: ex_sessionize.import.begin + :end-before: ex_sessionize.import.end + :dedent: 8 + :linenos: + +Here is a solution using Numba: + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_sessionize.py + :language: python + :caption: from ``test_ex_sessionize`` in ``numba/cuda/tests/doc_examples/test_sessionize.py`` + :start-after: ex_sessionize.kernel.begin + :end-before: ex_sessionize.kernel.end + :dedent: 8 + :linenos: + +Let's generate some data and try out the kernel: + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_sessionize.py + :language: python + :caption: from ``test_ex_sessionize`` in ``numba/cuda/tests/doc_examples/test_sessionize.py`` + :start-after: ex_sessionize.allocate.begin + :end-before: ex_sessionize.allocate.end + :dedent: 8 + :linenos: + +As can be seen above, the kernel successfully divided the first three datapoints from the second three for the first user ID, +and a similar pattern is seen throughout. + +.. _cuda_reuse_function: + +JIT Function CPU-GPU Compatibility +================================== + +This example demonstrates how ``numba.jit`` can be used to jit compile a function for the CPU, while at the same time making +it available for use inside CUDA kernels. This can be very useful for users that are migrating workflows from CPU to GPU as +they can directly reuse potential business logic with fewer code changes. + +Take the following example function: + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py + :language: python + :caption: from ``test_ex_cpu_gpu_compat`` in ``numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py`` + :start-after: ex_cpu_gpu_compat.define.begin + :end-before: ex_cpu_gpu_compat.define.end + :dedent: 8 + :linenos: + +The function ``business_logic`` can be run standalone in compiled form on the CPU: + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py + :language: python + :caption: from ``test_ex_cpu_gpu_compat`` in ``numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py`` + :start-after: ex_cpu_gpu_compat.cpurun.begin + :end-before: ex_cpu_gpu_compat.cpurun.end + :dedent: 8 + :linenos: + +It can also be directly reused threadwise inside a GPU kernel. For example one may +generate some vectors to represent ``x``, ``y``, and ``z``: + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py + :language: python + :caption: from ``test_ex_cpu_gpu_compat`` in ``numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py`` + :start-after: ex_cpu_gpu_compat.allocate.begin + :end-before: ex_cpu_gpu_compat.allocate.end + :dedent: 8 + :linenos: + +And a numba kernel referencing the decorated function: + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py + :language: python + :caption: from ``test_ex_cpu_gpu_compat`` in ``numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py`` + :start-after: ex_cpu_gpu_compat.usegpu.begin + :end-before: ex_cpu_gpu_compat.usegpu.end + :dedent: 8 + :linenos: + +This kernel can be invoked in the normal way: + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py + :language: python + :caption: from ``test_ex_cpu_gpu_compat`` in ``numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py`` + :start-after: ex_cpu_gpu_compat.launch.begin + :end-before: ex_cpu_gpu_compat.launch.end + :dedent: 8 + :linenos: + +.. _cuda_montecarlo: + +Monte Carlo Integration +======================= + +This example shows how to use Numba to approximate the value of a definite integral by rapidly generating +random numbers on the GPU. A detailed description of the mathematical mechanics of Monte Carlo integeration +is out of the scope of the example, but it can briefly be described as an averaging process where the area +under the curve is approximated by taking the average of many rectangles formed by its function values. + +In addition, this example shows how to perform reductions in numba using the +:func:`cuda.reduce() ` API. + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_montecarlo.py + :language: python + :caption: from ``test_ex_montecarlo`` in ``numba/cuda/tests/doc_examples/test_montecarlo.py`` + :start-after: ex_montecarlo.import.begin + :end-before: ex_montecarlo.import.end + :dedent: 8 + :linenos: + +Let's create a variable to control the number of samples drawn: + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_montecarlo.py + :language: python + :caption: from ``test_ex_montecarlo`` in ``numba/cuda/tests/doc_examples/test_montecarlo.py`` + :start-after: ex_montecarlo.define.begin + :end-before: ex_montecarlo.define.end + :dedent: 8 + :linenos: + + +The following kernel implements the main integration routine: + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_montecarlo.py + :language: python + :caption: from ``test_ex_montecarlo`` in ``numba/cuda/tests/doc_examples/test_montecarlo.py`` + :start-after: ex_montecarlo.kernel.begin + :end-before: ex_montecarlo.kernel.end + :dedent: 8 + :linenos: + +This convenience function calls the kernel performs some +preprocessing and post processing steps. Note the use of Numba's reduction API to +take sum of the array and compute the final result: + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_montecarlo.py + :language: python + :caption: from ``test_ex_montecarlo`` in ``numba/cuda/tests/doc_examples/test_montecarlo.py`` + :start-after: ex_montecarlo.callfunc.begin + :end-before: ex_montecarlo.callfunc.end + :dedent: 8 + :linenos: + + +We can now use ``mc_integrate`` to compute the definite integral of this function between +two limits: + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_montecarlo.py + :language: python + :caption: from ``test_ex_montecarlo`` in ``numba/cuda/tests/doc_examples/test_montecarlo.py`` + :start-after: ex_montecarlo.launch.begin + :end-before: ex_montecarlo.launch.end + :dedent: 8 + :linenos: + + +.. _cuda-matmul: + +Matrix multiplication +===================== +First, import the modules needed for this example: + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_matmul.py + :language: python + :caption: from ``test_ex_matmul`` in ``numba/cuda/tests/doc_examples/test_matmul.py`` + :start-after: magictoken.ex_import.begin + :end-before: magictoken.ex_import.end + :dedent: 8 + :linenos: + +Here is a naïve implementation of matrix multiplication using a CUDA kernel: + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_matmul.py + :language: python + :caption: from ``test_ex_matmul`` in ``numba/cuda/tests/doc_examples/test_matmul.py`` + :start-after: magictoken.ex_matmul.begin + :end-before: magictoken.ex_matmul.end + :dedent: 8 + :linenos: + +An example usage of this function is as follows: + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_matmul.py + :language: python + :caption: from ``test_ex_matmul`` in ``numba/cuda/tests/doc_examples/test_matmul.py`` + :start-after: magictoken.ex_run_matmul.begin + :end-before: magictoken.ex_run_matmul.end + :dedent: 8 + :linenos: + +This implementation is straightforward and intuitive but performs poorly, +because the same matrix elements will be loaded multiple times from device +memory, which is slow (some devices may have transparent data caches, but +they may not be large enough to hold the entire inputs at once). + +It will be faster if we use a blocked algorithm to reduce accesses to the +device memory. CUDA provides a fast :ref:`shared memory ` +for threads in a block to cooperatively compute on a task. The following +implements a faster version of the square matrix multiplication using shared +memory: + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_matmul.py + :language: python + :caption: from ``test_ex_matmul`` in ``numba/cuda/tests/doc_examples/test_matmul.py`` + :start-after: magictoken.ex_fast_matmul.begin + :end-before: magictoken.ex_fast_matmul.end + :dedent: 8 + :linenos: + + +Because the shared memory is a limited resource, the code preloads a small +block at a time from the input arrays. Then, it calls +:func:`~numba.cuda.syncthreads` to wait until all threads have finished +preloading and before doing the computation on the shared memory. +It synchronizes again after the computation to ensure all threads +have finished with the data in shared memory before overwriting it +in the next loop iteration. + +An example usage of the ``fast_matmul`` function is as follows: + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_matmul.py + :language: python + :caption: from ``test_ex_matmul`` in ``numba/cuda/tests/doc_examples/test_matmul.py`` + :start-after: magictoken.ex_run_fast_matmul.begin + :end-before: magictoken.ex_run_fast_matmul.end + :dedent: 8 + :linenos: + + +This passes a :ref:`CUDA memory check test `, which +can help with debugging. Running the code above produces the following output: + +.. code-block:: none + + $ python fast_matmul.py + [[ 6. 6. 6. 6.] + [22. 22. 22. 22.] + [38. 38. 38. 38.] + [54. 54. 54. 54.]] + [[ 6. 6. 6. 6.] + [22. 22. 22. 22.] + [38. 38. 38. 38.] + [54. 54. 54. 54.]] + +.. note:: For high performance matrix multiplication in CUDA, see also the `CuPy implementation `_. + +The approach outlined here generalizes to non-square matrix multiplication as +follows by adjusting the ``blockspergrid`` variable: + +Again, here is an example usage: + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_matmul.py + :language: python + :caption: from ``test_ex_matmul`` in ``numba/cuda/tests/doc_examples/test_matmul.py`` + :start-after: magictoken.ex_run_nonsquare.begin + :end-before: magictoken.ex_run_nonsquare.end + :dedent: 8 + :linenos: + +and the corresponding output: + +.. code-block:: none + + $ python nonsquare_matmul.py + [[ 253. 253. 253. 253. 253. 253. 253.] + [ 782. 782. 782. 782. 782. 782. 782.] + [1311. 1311. 1311. 1311. 1311. 1311. 1311.] + [1840. 1840. 1840. 1840. 1840. 1840. 1840.] + [2369. 2369. 2369. 2369. 2369. 2369. 2369.]] + [[ 253. 253. 253. 253. 253. 253. 253.] + [ 782. 782. 782. 782. 782. 782. 782.] + [1311. 1311. 1311. 1311. 1311. 1311. 1311.] + [1840. 1840. 1840. 1840. 1840. 1840. 1840.] + [2369. 2369. 2369. 2369. 2369. 2369. 2369.]] diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/external-memory.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/external-memory.rst new file mode 100644 index 000000000..28a8f59f0 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/external-memory.rst @@ -0,0 +1,320 @@ +.. _cuda-emm-plugin: + +================================================= +External Memory Management (EMM) Plugin interface +================================================= + +The :ref:`CUDA Array Interface ` enables sharing of data +between different Python libraries that access CUDA devices. However, each +library manages its own memory distinctly from the others. For example: + +- By default, Numba allocates memory on CUDA devices by interacting with the + CUDA driver API to call functions such as ``cuMemAlloc`` and ``cuMemFree``, + which is suitable for many use cases. +- The RAPIDS libraries (cuDF, cuML, etc.) use the `RAPIDS Memory Manager (RMM) + `_ for allocating device memory. +- `CuPy `_ includes a `memory pool implementation + `_ for both + device and pinned memory. + +When multiple CUDA-aware libraries are used together, it may be preferable for +Numba to defer to another library for memory management. The EMM Plugin +interface facilitates this, by enabling Numba to use another CUDA-aware library +for all allocations and deallocations. + +An EMM Plugin is used to facilitate the use of an external library for memory +management. An EMM Plugin can be a part of an external library, or could be +implemented as a separate library. + + +Overview of External Memory Management +====================================== + +When an EMM Plugin is in use (see :ref:`setting-emm-plugin`), Numba will make +memory allocations and deallocations through the Plugin. It will never directly call +functions such as ``cuMemAlloc``, ``cuMemFree``, etc. + +EMM Plugins always take responsibility for the management of device memory. +However, not all CUDA-aware libraries also support managing host memory, so a +facility for Numba to continue the management of host memory whilst ceding +control of device memory to the EMM is provided (see +:ref:`host-only-cuda-memory-manager`). + + +Effects on Deallocation Strategies +---------------------------------- + +Numba's internal :ref:`deallocation-behavior` is designed to increase efficiency +by deferring deallocations until a significant quantity are pending. It also +provides a mechanism for preventing deallocations entirely during critical +sections, using the :func:`~numba.cuda.defer_cleanup` context manager. + +When an EMM Plugin is in use, the deallocation strategy is implemented by the +EMM, and Numba's internal deallocation mechanism is not used. The EMM +Plugin could implement: + +- A similar strategy to the Numba deallocation behaviour, or +- Something more appropriate to the plugin - for example, deallocated memory + might immediately be returned to a memory pool. + +The ``defer_cleanup`` context manager may behave differently with an EMM Plugin +- an EMM Plugin should be accompanied by documentation of the behaviour of the +``defer_cleanup`` context manager when it is in use. For example, a pool +allocator could always immediately return memory to a pool even when the +context manager is in use, but could choose not to free empty pools until +``defer_cleanup`` is not in use. + + +Management of other objects +--------------------------- + +In addition to memory, Numba manages the allocation and deallocation of +:ref:`events `, :ref:`streams `, and modules (a module is a +compiled object, which is generated from ``@cuda.jit``\ -ted functions). The +management of events, streams, and modules is unchanged by the use of an EMM +Plugin. + + +Asynchronous allocation and deallocation +---------------------------------------- + +The present EMM Plugin interface does not provide support for asynchronous +allocation and deallocation. This may be added to a future version of the +interface. + + +Implementing an EMM Plugin +========================== + +An EMM Plugin is implemented by deriving from +:class:`~numba.cuda.BaseCUDAMemoryManager`. A summary of considerations for the +implementation follows: + +- Numba instantiates one instance of the EMM Plugin class per context. The + context that owns an EMM Plugin object is accessible through ``self.context``, + if required. +- The EMM Plugin is transparent to any code that uses Numba - all its methods + are invoked by Numba, and never need to be called by code that uses Numba. +- The allocation methods ``memalloc``, ``memhostalloc``, and ``mempin``, should + use the underlying library to allocate and/or pin device or host memory, and + construct an instance of a :ref:`memory pointer ` + representing the memory to return back to Numba. These methods are always + called when the current CUDA context is the context that owns the EMM Plugin + instance. +- The ``initialize`` method is called by Numba prior to the first use of the EMM + Plugin object for a context. This method should do anything required to + prepare the underlying library for allocations in the current context. This + method may be called multiple times, and must not invalidate previous state + when it is called. +- The ``reset`` method is called when all allocations in the context are to be + cleaned up. It may be called even prior to ``initialize``, and an EMM Plugin + implementation needs to guard against this. +- To support inter-GPU communication, the ``get_ipc_handle`` method should + provide an :class:`~numba.cuda.IpcHandle` for a given + :class:`~numba.cuda.MemoryPointer` instance. This method is part of the EMM + interface (rather than being handled within Numba) because the base address of + the allocation is only known by the underlying library. Closing an IPC handle + is handled internally within Numba. +- It is optional to provide memory info from the ``get_memory_info`` method, which + provides a count of the total and free memory on the device for the context. + It is preferable to implement the method, but this may not be practical for + all allocators. If memory info is not provided, this method should raise a + :class:`RuntimeError`. +- The ``defer_cleanup`` method should return a context manager that ensures that + expensive cleanup operations are avoided whilst it is active. The nuances of + this will vary between plugins, so the plugin documentation should include an + explanation of how deferring cleanup affects deallocations, and performance in + general. +- The ``interface_version`` property is used to ensure that the plugin version + matches the interface provided by the version of Numba. At present, this + should always be 1. + +Full documentation for the base class follows: + +.. autoclass:: numba.cuda.BaseCUDAMemoryManager + :members: memalloc, memhostalloc, mempin, initialize, get_ipc_handle, + get_memory_info, reset, defer_cleanup, interface_version + :member-order: bysource + + +.. _host-only-cuda-memory-manager: + +The Host-Only CUDA Memory Manager +--------------------------------- + +Some external memory managers will support management of on-device memory but +not host memory. For implementing EMM Plugins using one of these memory +managers, a partial implementation of a plugin that implements host-side +allocation and pinning is provided. To use it, derive from +:class:`~numba.cuda.HostOnlyCUDAMemoryManager` instead of +:class:`~numba.cuda.BaseCUDAMemoryManager`. Guidelines for using this class +are: + +- The host-only memory manager implements ``memhostalloc`` and ``mempin`` - the + EMM Plugin should still implement ``memalloc``. +- If ``reset`` is overridden, it must also call ``super().reset()`` to allow the + host allocations to be cleaned up. +- If ``defer_cleanup`` is overridden, it must hold an active context manager + from ``super().defer_cleanup()`` to ensure that host-side cleanup is also + deferred. + +Documentation for the methods of :class:`~numba.cuda.HostOnlyCUDAMemoryManager` +follows: + +.. autoclass:: numba.cuda.HostOnlyCUDAMemoryManager + :members: memhostalloc, mempin, reset, defer_cleanup + :member-order: bysource + + +The IPC Handle Mixin +-------------------- + +An implementation of the ``get_ipc_handle()`` function is is provided in the +``GetIpcHandleMixin`` class. This uses the driver API to determine the base +address of an allocation for opening an IPC handle. If this implementation is +appropriate for an EMM plugin, it can be added by mixing in the +``GetIpcHandleMixin`` class: + +.. autoclass:: numba.cuda.GetIpcHandleMixin + :members: get_ipc_handle + + +Classes and structures of returned objects +========================================== + +This section provides an overview of the classes and structures that need to be +constructed by an EMM Plugin. + +.. _memory-pointers: + +Memory Pointers +--------------- + +EMM Plugins should construct memory pointer instances that represent their +allocations, for return to Numba. The appropriate memory pointer class to use in +each method is: + +- :class:`~numba.cuda.MemoryPointer`: returned from ``memalloc`` +- :class:`~numba.cuda.MappedMemory`: returned from ``memhostalloc`` or + ``mempin`` when the host memory is mapped into the device memory space. +- :class:`~numba.cuda.PinnedMemory`: return from ``memhostalloc`` or ``mempin`` + when the host memory is not mapped into the device memory space. + +Memory pointers can take a finalizer, which is a function that is called when +the buffer is no longer needed. Usually the finalizer will make a call to the +memory management library (either internal to Numba, or external if allocated +by an EMM Plugin) to inform it that the memory is no longer required, and that +it could potentially be freed and/or unpinned. The memory manager may choose to +defer actually cleaning up the memory to any later time after the finalizer +runs - it is not required to free the buffer immediately. + +Documentation for the memory pointer classes follows. + +.. autoclass:: numba.cuda.MemoryPointer + +The ``AutoFreePointer`` class need not be used directly, but is documented here +as it is subclassed by :class:`numba.cuda.MappedMemory`: + +.. autoclass:: numba.cuda.cudadrv.driver.AutoFreePointer + +.. autoclass:: numba.cuda.MappedMemory + +.. autoclass:: numba.cuda.PinnedMemory + + +Memory Info +----------- + +If an implementation of +:meth:`~numba.cuda.BaseCUDAMemoryManager.get_memory_info` is to provide a +result, then it should return an instance of the ``MemoryInfo`` named tuple: + +.. autoclass:: numba.cuda.MemoryInfo + + +IPC +--- + +An instance of ``IpcHandle`` is required to be returned from an implementation +of :meth:`~numba.cuda.BaseCUDAMemoryManager.get_ipc_handle`: + +.. autoclass:: numba.cuda.IpcHandle + +Guidance for constructing an IPC handle in the context of implementing an EMM +Plugin: + +- The ``memory`` parameter passed to the ``get_ipc_handle`` method of an EMM + Plugin can be passed as the ``base`` parameter. +- A suitable type for the ``handle`` can be constructed as ``ctypes.c_byte * + 64``. The data for ``handle`` must be populated using a method for obtaining a + CUDA IPC handle appropriate to the underlying library. +- ``size`` should match the size of the original allocation, which can be + obtained with ``memory.size`` in ``get_ipc_handle``. +- An appropriate value for ``source_info`` can be created by calling + ``self.context.device.get_device_identity()``. +- If the underlying memory does not point to the base of an allocation returned + by the CUDA driver or runtime API (e.g. if a pool allocator is in use) then + the ``offset`` from the base must be provided. + + +.. _setting-emm-plugin: + +Setting the EMM Plugin +====================== + +By default, Numba uses its internal memory management - if an EMM Plugin is to +be used, it must be configured. There are two mechanisms for configuring the use +of an EMM Plugin: an environment variable, and a function. + + +Environment variable +-------------------- + +A module name can be provided in the environment variable, +``NUMBA_CUDA_MEMORY_MANAGER``. If this environment variable is set, Numba will +attempt to import the module, and and use its ``_numba_memory_manager`` global +variable as the memory manager class. This is primarily useful for running the +Numba test suite with an EMM Plugin, e.g.: + +.. code:: + + $ NUMBA_CUDA_MEMORY_MANAGER=rmm python -m numba.runtests numba.cuda.tests + + +Function +-------- + +The :func:`~numba.cuda.set_memory_manager` function can be used to set the +memory manager at runtime. This should be called prior to the initialization of +any contexts, as EMM Plugin instances are instantiated along with contexts. + +.. autofunction:: numba.cuda.set_memory_manager + + +Resetting the memory manager +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +It is recommended that the memory manager is set once prior to using any CUDA +functionality, and left unchanged for the remainder of execution. It is possible +to set the memory manager multiple times, noting the following: + +* At the time of their creation, contexts are bound to an instance of a memory + manager for their lifetime. +* Changing the memory manager will have no effect on existing contexts - only + contexts created after the memory manager was updated will use instances of + the new memory manager. +* :func:`numba.cuda.close` can be used to destroy contexts after setting the + memory manager so that they get re-created with the new memory manager. + + - This will invalidate any arrays, streams, events, and modules owned by the + context. + - Attempting to use invalid arrays, streams, or events will likely fail with + an exception being raised due to a ``CUDA_ERROR_INVALID_CONTEXT`` or + ``CUDA_ERROR_CONTEXT_IS_DESTROYED`` return code from a Driver API function. + - Attempting to use an invalid module will result in similar, or in some + cases a segmentation fault / access violation. + +.. note:: The invalidation of modules means that all functions compiled with + ``@cuda.jit`` prior to context destruction will need to be + redefined, as the code underlying them will also have been unloaded + from the GPU. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/faq.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/faq.rst new file mode 100644 index 000000000..b4392e6ab --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/faq.rst @@ -0,0 +1,20 @@ + +.. _cudafaq: + +================================================= +CUDA Frequently Asked Questions +================================================= + +nvprof reports "No kernels were profiled" +----------------------------------------- + +When using the ``nvprof`` tool to profile Numba jitted code for the CUDA +target, the output contains ``No kernels were profiled`` but there are clearly +running kernels present, what is going on? + +This is quite likely due to the profiling data not being flushed on program +exit, see the `NVIDIA CUDA documentation +`_ for +details. To fix this simply add a call to ``numba.cuda.profile_stop()`` prior +to the exit point in your program (or wherever you want to stop profiling). +For more on CUDA profiling support in Numba, see :ref:`cuda-profiling`. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/fastmath.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/fastmath.rst new file mode 100644 index 000000000..fb9de10ea --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/fastmath.rst @@ -0,0 +1,36 @@ + +.. _cuda-fast-math: + +CUDA Fast Math +============== + +As noted in :ref:`fast-math`, for certain classes of applications that utilize +floating point, strict IEEE-754 conformance is not required. For this subset of +applications, performance speedups may be possible. + +The CUDA target implements :ref:`fast-math` behavior with two differences. + +* First, the ``fastmath`` argument to the :func:`@jit decorator + ` is limited to the values ``True`` and ``False``. + When ``True``, the following optimizations are enabled: + + - Flushing of denormals to zero. + - Use of a fast approximation to the square root function. + - Use of a fast approximation to the division operation. + - Contraction of multiply and add operations into single fused multiply-add + operations. + + See the `documentation for nvvmCompileProgram `_ for more details of these optimizations. + +* Secondly, calls to a subset of math module functions on ``float32`` operands + will be implemented using fast approximate implementations from the libdevice + library. + + - :func:`math.cos`: Implemented using `__nv_fast_cosf `_. + - :func:`math.sin`: Implemented using `__nv_fast_sinf `_. + - :func:`math.tan`: Implemented using `__nv_fast_tanf `_. + - :func:`math.exp`: Implemented using `__nv_fast_expf `_. + - :func:`math.log2`: Implemented using `__nv_fast_log2f `_. + - :func:`math.log10`: Implemented using `__nv_fast_log10f `_. + - :func:`math.log`: Implemented using `__nv_fast_logf `_. + - :func:`math.pow`: Implemented using `__nv_fast_powf `_. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/index.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/index.rst new file mode 100644 index 000000000..686b4ad0b --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/index.rst @@ -0,0 +1,29 @@ + +.. _cuda-index: + +Numba for CUDA GPUs +=================== + +.. toctree:: + + overview.rst + kernels.rst + memory.rst + device-functions.rst + cudapysupported.rst + fastmath.rst + intrinsics.rst + cooperative_groups.rst + random.rst + device-management.rst + examples.rst + simulator.rst + reduction.rst + ufunc.rst + ipc.rst + cuda_array_interface.rst + external-memory.rst + bindings.rst + cuda_ffi.rst + caching.rst + faq.rst diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/intrinsics.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/intrinsics.rst new file mode 100644 index 000000000..521c1d918 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/intrinsics.rst @@ -0,0 +1,58 @@ + +Supported Atomic Operations +=========================== + +Numba provides access to some of the atomic operations supported in CUDA. Those +that are presently implemented are as follows: + +.. automodule:: numba.cuda + :members: atomic + :noindex: + +Example +''''''' + +The following code demonstrates the use of :class:`numba.cuda.atomic.max` to +find the maximum value in an array. Note that this is not the most efficient way +of finding a maximum in this case, but that it serves as an example:: + + from numba import cuda + import numpy as np + + @cuda.jit + def max_example(result, values): + """Find the maximum value in values and store in result[0]""" + tid = cuda.threadIdx.x + bid = cuda.blockIdx.x + bdim = cuda.blockDim.x + i = (bid * bdim) + tid + cuda.atomic.max(result, 0, values[i]) + + + arr = np.random.rand(16384) + result = np.zeros(1, dtype=np.float64) + + max_example[256,64](result, arr) + print(result[0]) # Found using cuda.atomic.max + print(max(arr)) # Print max(arr) for comparison (should be equal!) + + +Multiple dimension arrays are supported by using a tuple of ints for the index:: + + + @cuda.jit + def max_example_3d(result, values): + """ + Find the maximum value in values and store in result[0]. + Both result and values are 3d arrays. + """ + i, j, k = cuda.grid(3) + # Atomically store to result[0,1,2] from values[i, j, k] + cuda.atomic.max(result, (0, 1, 2), values[i, j, k]) + + arr = np.random.rand(1000).reshape(10,10,10) + result = np.zeros((3, 3, 3), dtype=np.float64) + max_example_3d[(2, 2, 2), (5, 5, 5)](result, arr) + print(result[0, 1, 2], '==', np.max(arr)) + + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/ipc.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/ipc.rst new file mode 100644 index 000000000..ce0f508e5 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/ipc.rst @@ -0,0 +1,36 @@ +=================== +Sharing CUDA Memory +=================== + +.. _cuda-ipc-memory: + +Sharing between process +======================= + +Sharing between processes is implemented using the Legacy CUDA IPC API +(functions whose names begin with ``cuIpc``), and is supported only on Linux. + + +Export device array to another process +-------------------------------------- + +A device array can be shared with another process in the same machine using +the CUDA IPC API. To do so, use the ``.get_ipc_handle()`` method on the device +array to get a ``IpcArrayHandle`` object, which can be transferred to another +process. + + +.. automethod:: numba.cuda.cudadrv.devicearray.DeviceNDArray.get_ipc_handle + :noindex: + +.. autoclass:: numba.cuda.cudadrv.devicearray.IpcArrayHandle + :members: open, close + + +Import IPC memory from another process +-------------------------------------- + +The following function is used to open IPC handle from another process +as a device array. + +.. automethod:: numba.cuda.open_ipc_array diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/kernels.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/kernels.rst new file mode 100644 index 000000000..b4af2ccf8 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/kernels.rst @@ -0,0 +1,233 @@ + +==================== +Writing CUDA Kernels +==================== + +Introduction +============ + +CUDA has an execution model unlike the traditional sequential model used +for programming CPUs. In CUDA, the code you write will be executed by +multiple threads at once (often hundreds or thousands). Your solution will +be modeled by defining a thread hierarchy of *grid*, *blocks* and *threads*. + +Numba's CUDA support exposes facilities to declare and manage this +hierarchy of threads. The facilities are largely similar to those +exposed by NVidia's CUDA C language. + +Numba also exposes three kinds of GPU memory: global :ref:`device memory +` (the large, relatively slow +off-chip memory that's connected to the GPU itself), on-chip +:ref:`shared memory ` and :ref:`local memory `. +For all but the simplest algorithms, it is important that you carefully +consider how to use and access memory in order to minimize bandwidth +requirements and contention. + + +Kernel declaration +================== + +A *kernel function* is a GPU function that is meant to be called from CPU +code (*). It gives it two fundamental characteristics: + +* kernels cannot explicitly return a value; all result data must be written + to an array passed to the function (if computing a scalar, you will + probably pass a one-element array); + +* kernels explicitly declare their thread hierarchy when called: i.e. + the number of thread blocks and the number of threads per block + (note that while a kernel is compiled once, it can be called multiple + times with different block sizes or grid sizes). + +At first sight, writing a CUDA kernel with Numba looks very much like +writing a :term:`JIT function` for the CPU:: + + @cuda.jit + def increment_by_one(an_array): + """ + Increment all array elements by one. + """ + # code elided here; read further for different implementations + +(*) Note: newer CUDA devices support device-side kernel launching; this feature +is called *dynamic parallelism* but Numba does not support it currently) + + +.. _cuda-kernel-invocation: + +Kernel invocation +================= + +A kernel is typically launched in the following way:: + + threadsperblock = 32 + blockspergrid = (an_array.size + (threadsperblock - 1)) // threadsperblock + increment_by_one[blockspergrid, threadsperblock](an_array) + +We notice two steps here: + +* Instantiate the kernel proper, by specifying a number of blocks + (or "blocks per grid"), and a number of threads per block. The product + of the two will give the total number of threads launched. Kernel + instantiation is done by taking the compiled kernel function + (here ``increment_by_one``) and indexing it with a tuple of integers. + +* Running the kernel, by passing it the input array (and any separate + output arrays if necessary). Kernels run asynchronously: launches queue their + execution on the device and then return immediately. You can use + :func:`cuda.synchronize() ` to wait for all previous + kernel launches to finish executing. + +.. note:: Passing an array that resides in host memory will implicitly cause a + copy back to the host, which will be synchronous. In this case, the kernel + launch will not return until the data is copied back, and therefore appears + to execute synchronously. + +Choosing the block size +----------------------- + +It might seem curious to have a two-level hierarchy when declaring the +number of threads needed by a kernel. The block size (i.e. number of +threads per block) is often crucial: + +* On the software side, the block size determines how many threads + share a given area of :ref:`shared memory `. + +* On the hardware side, the block size must be large enough for full + occupation of execution units; recommendations can be found in the + `CUDA C Programming Guide`_. + +Multi-dimensional blocks and grids +---------------------------------- + +To help deal with multi-dimensional arrays, CUDA allows you to specify +multi-dimensional blocks and grids. In the example above, you could +make ``blockspergrid`` and ``threadsperblock`` tuples of one, two +or three integers. Compared to 1D declarations of equivalent sizes, +this doesn't change anything to the efficiency or behaviour of generated +code, but can help you write your algorithms in a more natural way. + + +Thread positioning +================== + +When running a kernel, the kernel function's code is executed by every +thread once. It therefore has to know which thread it is in, in order +to know which array element(s) it is responsible for (complex algorithms +may define more complex responsibilities, but the underlying principle +is the same). + +One way is for the thread to determine its position in the grid and block +and manually compute the corresponding array position:: + + @cuda.jit + def increment_by_one(an_array): + # Thread id in a 1D block + tx = cuda.threadIdx.x + # Block id in a 1D grid + ty = cuda.blockIdx.x + # Block width, i.e. number of threads per block + bw = cuda.blockDim.x + # Compute flattened index inside the array + pos = tx + ty * bw + if pos < an_array.size: # Check array boundaries + an_array[pos] += 1 + +.. note:: Unless you are sure the block size and grid size is a divisor + of your array size, you **must** check boundaries as shown above. + +:attr:`.threadIdx`, :attr:`.blockIdx`, :attr:`.blockDim` and :attr:`.gridDim` +are special objects provided by the CUDA backend for the sole purpose of +knowing the geometry of the thread hierarchy and the position of the +current thread within that geometry. + +These objects can be 1D, 2D or 3D, depending on how the kernel was +:ref:`invoked `. To access the value at each +dimension, use the ``x``, ``y`` and ``z`` attributes of these objects, +respectively. + +.. attribute:: numba.cuda.threadIdx + :noindex: + + The thread indices in the current thread block. For 1D blocks, the index + (given by the ``x`` attribute) is an integer spanning the range from 0 + inclusive to :attr:`numba.cuda.blockDim` exclusive. A similar rule + exists for each dimension when more than one dimension is used. + +.. attribute:: numba.cuda.blockDim + :noindex: + + The shape of the block of threads, as declared when instantiating the + kernel. This value is the same for all threads in a given kernel, even + if they belong to different blocks (i.e. each block is "full"). + +.. attribute:: numba.cuda.blockIdx + :noindex: + + The block indices in the grid of threads launched a kernel. For a 1D grid, + the index (given by the ``x`` attribute) is an integer spanning the range + from 0 inclusive to :attr:`numba.cuda.gridDim` exclusive. A similar rule + exists for each dimension when more than one dimension is used. + +.. attribute:: numba.cuda.gridDim + :noindex: + + The shape of the grid of blocks, i.e. the total number of blocks launched + by this kernel invocation, as declared when instantiating the kernel. + +Absolute positions +------------------ + +Simple algorithms will tend to always use thread indices in the +same way as shown in the example above. Numba provides additional facilities +to automate such calculations: + +.. function:: numba.cuda.grid(ndim) + :noindex: + + Return the absolute position of the current thread in the entire + grid of blocks. *ndim* should correspond to the number of dimensions + declared when instantiating the kernel. If *ndim* is 1, a single integer + is returned. If *ndim* is 2 or 3, a tuple of the given number of + integers is returned. + +.. function:: numba.cuda.gridsize(ndim) + :noindex: + + Return the absolute size (or shape) in threads of the entire grid of + blocks. *ndim* has the same meaning as in :func:`.grid` above. + +With these functions, the incrementation example can become:: + + @cuda.jit + def increment_by_one(an_array): + pos = cuda.grid(1) + if pos < an_array.size: + an_array[pos] += 1 + +The same example for a 2D array and grid of threads would be:: + + @cuda.jit + def increment_a_2D_array(an_array): + x, y = cuda.grid(2) + if x < an_array.shape[0] and y < an_array.shape[1]: + an_array[x, y] += 1 + +Note the grid computation when instantiating the kernel must still be +done manually, for example:: + + threadsperblock = (16, 16) + blockspergrid_x = math.ceil(an_array.shape[0] / threadsperblock[0]) + blockspergrid_y = math.ceil(an_array.shape[1] / threadsperblock[1]) + blockspergrid = (blockspergrid_x, blockspergrid_y) + increment_a_2D_array[blockspergrid, threadsperblock](an_array) + + +Further Reading +---------------- + +Please refer to the the `CUDA C Programming Guide`_ for a detailed discussion +of CUDA programming. + + +.. _CUDA C Programming Guide: http://docs.nvidia.com/cuda/cuda-c-programming-guide diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/laplace_final.svg b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/laplace_final.svg new file mode 100644 index 000000000..4f3b197fb --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/laplace_final.svg @@ -0,0 +1,1953 @@ + + + + + + + + 2022-04-18T06:58:19.244680 + image/svg+xml + + + Matplotlib v3.5.1, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/laplace_initial.svg b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/laplace_initial.svg new file mode 100644 index 000000000..dbede3687 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/laplace_initial.svg @@ -0,0 +1,1838 @@ + + + + + + + + 2022-04-18T06:58:18.768147 + image/svg+xml + + + Matplotlib v3.5.1, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/memory.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/memory.rst new file mode 100644 index 000000000..fa6fe6a97 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/memory.rst @@ -0,0 +1,257 @@ +================= +Memory management +================= + +.. _cuda-device-memory: + +Data transfer +============= + +Even though Numba can automatically transfer NumPy arrays to the device, +it can only do so conservatively by always transferring device memory back to +the host when a kernel finishes. To avoid the unnecessary transfer for +read-only arrays, you can use the following APIs to manually control the +transfer: + +.. autofunction:: numba.cuda.device_array + :noindex: +.. autofunction:: numba.cuda.device_array_like + :noindex: +.. autofunction:: numba.cuda.to_device + :noindex: + +In addition to the device arrays, Numba can consume any object that implements +:ref:`cuda array interface `. These objects also can be +manually converted into a Numba device array by creating a view of the GPU +buffer using the following APIs: + +.. autofunction:: numba.cuda.as_cuda_array + :noindex: +.. autofunction:: numba.cuda.is_cuda_array + :noindex: + + +Device arrays +------------- + +Device array references have the following methods. These methods are to be +called in host code, not within CUDA-jitted functions. + +.. autoclass:: numba.cuda.cudadrv.devicearray.DeviceNDArray + :members: copy_to_host, is_c_contiguous, is_f_contiguous, ravel, reshape + :noindex: + + +.. note:: DeviceNDArray defines the :ref:`cuda array interface `. + + +Pinned memory +============= + +.. autofunction:: numba.cuda.pinned + :noindex: +.. autofunction:: numba.cuda.pinned_array + :noindex: +.. autofunction:: numba.cuda.pinned_array_like + :noindex: + + +Mapped memory +============= + +.. autofunction:: numba.cuda.mapped + :noindex: +.. autofunction:: numba.cuda.mapped_array + :noindex: +.. autofunction:: numba.cuda.mapped_array_like + :noindex: + + + +Managed memory +============== + +.. autofunction:: numba.cuda.managed_array + :noindex: + + +Streams +======= + +Streams can be passed to functions that accept them (e.g. copies between the +host and device) and into kernel launch configurations so that the operations +are executed asynchronously. + +.. autofunction:: numba.cuda.stream + :noindex: + +.. autofunction:: numba.cuda.default_stream + :noindex: + +.. autofunction:: numba.cuda.legacy_default_stream + :noindex: + +.. autofunction:: numba.cuda.per_thread_default_stream + :noindex: + +.. autofunction:: numba.cuda.external_stream + :noindex: + +CUDA streams have the following methods: + +.. autoclass:: numba.cuda.cudadrv.driver.Stream + :members: synchronize, auto_synchronize + :noindex: + +.. _cuda-shared-memory: + +Shared memory and thread synchronization +======================================== + +A limited amount of shared memory can be allocated on the device to speed +up access to data, when necessary. That memory will be shared (i.e. both +readable and writable) amongst all threads belonging to a given block +and has faster access times than regular device memory. It also allows +threads to cooperate on a given solution. You can think of it as a +manually-managed data cache. + +The memory is allocated once for the duration of the kernel, unlike +traditional dynamic memory management. + +.. function:: numba.cuda.shared.array(shape, type) + :noindex: + + Allocate a shared array of the given *shape* and *type* on the device. + This function must be called on the device (i.e. from a kernel or + device function). *shape* is either an integer or a tuple of integers + representing the array's dimensions and must be a simple constant + expression. A "simple constant expression" includes, but is not limited to: + + #. A literal (e.g. ``10``) + #. A local variable whose right-hand side is a literal or a simple constant + expression (e.g. ``shape``, where ``shape`` is defined earlier in the function + as ``shape = 10``) + #. A global variable that is defined in the jitted function's globals by the time + of compilation (e.g. ``shape``, where ``shape`` is defined using any expression + at global scope). + + The definition must result in a Python ``int`` (i.e. not a NumPy scalar or other + scalar / integer-like type). *type* is a :ref:`Numba type ` of the + elements needing to be stored in the array. The returned array-like object can be + read and written to like any normal device array (e.g. through indexing). + + A common pattern is to have each thread populate one element in the + shared array and then wait for all threads to finish using :func:`.syncthreads`. + + +.. function:: numba.cuda.syncthreads() + :noindex: + + Synchronize all threads in the same thread block. This function + implements the same pattern as `barriers `_ + in traditional multi-threaded programming: this function waits + until all threads in the block call it, at which point it returns + control to all its callers. + +.. seealso:: + :ref:`Matrix multiplication example `. + +.. _cuda-local-memory: + +Local memory +============ + +Local memory is an area of memory private to each thread. Using local +memory helps allocate some scratchpad area when scalar local variables +are not enough. The memory is allocated once for the duration of the kernel, +unlike traditional dynamic memory management. + +.. function:: numba.cuda.local.array(shape, type) + :noindex: + + Allocate a local array of the given *shape* and *type* on the device. + *shape* is either an integer or a tuple of integers representing the array's + dimensions and must be a simple constant expression. A "simple constant expression" + includes, but is not limited to: + + #. A literal (e.g. ``10``) + #. A local variable whose right-hand side is a literal or a simple constant + expression (e.g. ``shape``, where ``shape`` is defined earlier in the function + as ``shape = 10``) + #. A global variable that is defined in the jitted function's globals by the time + of compilation (e.g. ``shape``, where ``shape`` is defined using any expression + at global scope). + + The definition must result in a Python ``int`` (i.e. not a NumPy scalar or other + scalar / integer-like type). *type* is a :ref:`Numba type ` + of the elements needing to be stored in the array. The array is private to + the current thread. An array-like object is returned which can be read and + written to like any standard array (e.g. through indexing). + + .. seealso:: The Local Memory section of `Device Memory Accesses + `_ + in the CUDA programming guide. + +Constant memory +=============== + +Constant memory is an area of memory that is read only, cached and off-chip, it +is accessible by all threads and is host allocated. A method of +creating an array in constant memory is through the use of: + +.. function:: numba.cuda.const.array_like(arr) + :noindex: + + Allocate and make accessible an array in constant memory based on array-like + *arr*. + + +.. _deallocation-behavior: + +Deallocation Behavior +===================== + +This section describes the deallocation behaviour of Numba's internal memory +management. If an External Memory Management Plugin is in use (see +:ref:`cuda-emm-plugin`), then deallocation behaviour may differ; you may refer to the +documentation for the EMM Plugin to understand its deallocation behaviour. + +Deallocation of all CUDA resources are tracked on a per-context basis. +When the last reference to a device memory is dropped, the underlying memory +is scheduled to be deallocated. The deallocation does not occur immediately. +It is added to a queue of pending deallocations. This design has two benefits: + +1. Resource deallocation API may cause the device to synchronize; thus, breaking + any asynchronous execution. Deferring the deallocation could avoid latency + in performance critical code section. +2. Some deallocation errors may cause all the remaining deallocations to fail. + Continued deallocation errors can cause critical errors at the CUDA driver + level. In some cases, this could mean a segmentation fault in the CUDA + driver. In the worst case, this could cause the system GUI to freeze and + could only recover with a system reset. When an error occurs during a + deallocation, the remaining pending deallocations are cancelled. Any + deallocation error will be reported. When the process is terminated, the + CUDA driver is able to release all allocated resources by the terminated + process. + +The deallocation queue is flushed automatically as soon as the following events +occur: + +- An allocation failed due to out-of-memory error. Allocation is retried after + flushing all deallocations. +- The deallocation queue has reached its maximum size, which is default to 10. + User can override by setting the environment variable + `NUMBA_CUDA_MAX_PENDING_DEALLOCS_COUNT`. For example, + `NUMBA_CUDA_MAX_PENDING_DEALLOCS_COUNT=20`, increases the limit to 20. +- The maximum accumulated byte size of resources that are pending deallocation + is reached. This is default to 20% of the device memory capacity. + User can override by setting the environment variable + `NUMBA_CUDA_MAX_PENDING_DEALLOCS_RATIO`. For example, + `NUMBA_CUDA_MAX_PENDING_DEALLOCS_RATIO=0.5` sets the limit to 50% of the + capacity. + +Sometimes, it is desired to defer resource deallocation until a code section +ends. Most often, users want to avoid any implicit synchronization due to +deallocation. This can be done by using the following context manager: + +.. autofunction:: numba.cuda.defer_cleanup diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/overview.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/overview.rst new file mode 100644 index 000000000..4d5f7d1c7 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/overview.rst @@ -0,0 +1,133 @@ +======== +Overview +======== + +Numba supports CUDA GPU programming by directly compiling a restricted subset +of Python code into CUDA kernels and device functions following the CUDA +execution model. Kernels written in Numba appear to have direct access +to NumPy arrays. NumPy arrays are transferred between the CPU and the +GPU automatically. + + +Terminology +=========== + +Several important terms in the topic of CUDA programming are listed here: + +- *host*: the CPU +- *device*: the GPU +- *host memory*: the system main memory +- *device memory*: onboard memory on a GPU card +- *kernels*: a GPU function launched by the host and executed on the device +- *device function*: a GPU function executed on the device which can only be + called from the device (i.e. from a kernel or another device function) + + +Programming model +================= + +Most CUDA programming facilities exposed by Numba map directly to the CUDA +C language offered by NVidia. Therefore, it is recommended you read the +official `CUDA C programming guide `_. + + +Requirements +============ + +Supported GPUs +-------------- + +Numba supports CUDA-enabled GPUs with Compute Capability 3.5 or greater. +Support for devices with Compute Capability less than 5.3 is deprecated, and +will be removed in a future Numba release. + +Devices with Compute Capability 5.3 or greater include (but are not limited to): + +- Embedded platforms: NVIDIA Jetson Nano, TX1, TX2, Xavier NX, AGX Xavier, AGX + Orin. +- Desktop / Server GPUs: All GPUs with Pascal microarchitecture or later. E.g. + GTX 10 / 16 series, RTX 20 / 30 series, Quadro P / V / RTX series, RTX A + series, H100. +- Laptop GPUs: All GPUs with Pascal microarchitecture or later. E.g. MX series, + Quadro P / T series (mobile), RTX 20 / 30 series (mobile), RTX A series (mobile). + +Software +-------- + +Numba aims to support CUDA Toolkit versions released within the last 3 years. +An NVIDIA driver sufficient for the toolkit version is also required. +Presently: + +* 10.2 is the minimum required toolkit version. +* 11.2 or later is recommended, as it uses an NVVM version based on LLVM 7 (as + opposed to 3.4 in earlier releases). + +CUDA is supported on 64-bit Linux and Windows. + +If you are using Conda, you can install the CUDA toolkit with:: + + $ conda install cudatoolkit + +If you are not using Conda or if you want to use a different version of CUDA +toolkit, the following describes how Numba searches for a CUDA toolkit +installation. + +.. _cuda-bindings: + +CUDA Bindings +~~~~~~~~~~~~~ + +Numba supports interacting with the CUDA Driver API via the `NVIDIA CUDA Python +bindings `_ and its own ctypes-based +bindings. Functionality is equivalent between the two bindings. The +ctypes-based bindings are presently the default, but the NVIDIA bindings will +be used by default (if they are available in the environment) in a future Numba +release. + +You can install the NVIDIA bindings with:: + + $ conda install nvidia::cuda-python + +if you are using Conda, or:: + + $ pip install cuda-python + +if you are using pip. + +The use of the NVIDIA bindings is enabled by setting the environment variable +:envvar:`NUMBA_CUDA_USE_NVIDIA_BINDING` to ``"1"``. + +.. _cudatoolkit-lookup: + +Setting CUDA Installation Path +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Numba searches for a CUDA toolkit installation in the following order: + +1. Conda installed `cudatoolkit` package. +2. Environment variable ``CUDA_HOME``, which points to the directory of the + installed CUDA toolkit (i.e. ``/home/user/cuda-10``) +3. System-wide installation at exactly ``/usr/local/cuda`` on Linux platforms. + Versioned installation paths (i.e. ``/usr/local/cuda-10.0``) are intentionally + ignored. Users can use ``CUDA_HOME`` to select specific versions. + +In addition to the CUDA toolkit libraries, which can be installed by conda into +an environment or installed system-wide by the `CUDA SDK installer +<(https://developer.nvidia.com/cuda-downloads)>`_, the CUDA target in Numba +also requires an up-to-date NVIDIA graphics driver. Updated graphics drivers +are also installed by the CUDA SDK installer, so there is no need to do both. +Note that on macOS, the CUDA SDK must be installed to get the required driver, +and the driver is only supported on macOS prior to 10.14 (Mojave). If the +``libcuda`` library is in a non-standard location, users can set environment +variable ``NUMBA_CUDA_DRIVER`` to the file path (not the directory path) of the +shared library file. + + +Missing CUDA Features +===================== + +Numba does not implement all features of CUDA, yet. Some missing features +are listed below: + +* dynamic parallelism +* texture memory diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/random.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/random.rst new file mode 100644 index 000000000..6f3ebd85a --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/random.rst @@ -0,0 +1,90 @@ + +.. _cuda-random: + +Random Number Generation +======================== + +Numba provides a random number generation algorithm that can be executed on +the GPU. Due to technical issues with how NVIDIA implemented cuRAND, however, +Numba's GPU random number generator is not based on cuRAND. Instead, Numba's +GPU RNG is an implementation of the `xoroshiro128+ algorithm +`_. The xoroshiro128+ algorithm has a period of +``2**128 - 1``, which is shorter than the period of the XORWOW algorithm +used by default in cuRAND, but xoroshiro128+ still passes the BigCrush tests +of random number generator quality. + +When using any RNG on the GPU, it is important to make sure that each thread +has its own RNG state, and they have been initialized to produce non-overlapping +sequences. The numba.cuda.random module provides a host function to do this, +as well as CUDA device functions to obtain uniformly or normally distributed +random numbers. + +.. note:: Numba (like cuRAND) uses the + `Box-Muller transform ` + to generate normally distributed random numbers from a uniform generator. + However, Box-Muller generates pairs of random numbers, and the current + implementation only returns one of them. As a result, generating normally + distributed values is half the speed of uniformly distributed values. + +.. automodule:: numba.cuda.random + :members: create_xoroshiro128p_states, init_xoroshiro128p_states, xoroshiro128p_uniform_float32, xoroshiro128p_uniform_float64, xoroshiro128p_normal_float32, xoroshiro128p_normal_float64 + :noindex: + +A simple example +'''''''''''''''' + +Here is a sample program that uses the random number generator:: + + from __future__ import print_function, absolute_import + + from numba import cuda + from numba.cuda.random import create_xoroshiro128p_states, xoroshiro128p_uniform_float32 + import numpy as np + + @cuda.jit + def compute_pi(rng_states, iterations, out): + """Find the maximum value in values and store in result[0]""" + thread_id = cuda.grid(1) + + # Compute pi by drawing random (x, y) points and finding what + # fraction lie inside a unit circle + inside = 0 + for i in range(iterations): + x = xoroshiro128p_uniform_float32(rng_states, thread_id) + y = xoroshiro128p_uniform_float32(rng_states, thread_id) + if x**2 + y**2 <= 1.0: + inside += 1 + + out[thread_id] = 4.0 * inside / iterations + + threads_per_block = 64 + blocks = 24 + rng_states = create_xoroshiro128p_states(threads_per_block * blocks, seed=1) + out = np.zeros(threads_per_block * blocks, dtype=np.float32) + + compute_pi[blocks, threads_per_block](rng_states, 10000, out) + print('pi:', out.mean()) + +An example of managing RNG state size and using a 3D grid +''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +The number of RNG states scales with the number of threads using the RNG, so it +is often better to use strided loops in conjunction with the RNG in order to +keep the state size manageable. + +In the following example, which initializes a large 3D array with random +numbers, using one thread per output element would result in 453,617,100 RNG +states. This would take a long time to initialize and poorly utilize the GPU. +Instead, it uses a fixed size 3D grid with a total of 2,097,152 (``(16 ** 3) * +(8 ** 3)``) threads striding over the output array. The 3D thread indices +``startx``, ``starty``, and ``startz`` are linearized into a 1D index, +``tid``, to index into the 2,097,152 RNG states. + + +.. literalinclude:: ../../../numba/cuda/tests/doc_examples/test_random.py + :language: python + :caption: from ``test_ex_3d_grid of ``numba/cuda/tests/doc_example/test_random.py`` + :start-after: magictoken.ex_3d_grid.begin + :end-before: magictoken.ex_3d_grid.end + :dedent: 8 + :linenos: diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/reduction.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/reduction.rst new file mode 100644 index 000000000..674728408 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/reduction.rst @@ -0,0 +1,33 @@ +GPU Reduction +============== + +Writing a reduction algorithm for CUDA GPU can be tricky. Numba provides a +``@reduce`` decorator for converting a simple binary operation into a reduction +kernel. An example follows:: + + import numpy + from numba import cuda + + @cuda.reduce + def sum_reduce(a, b): + return a + b + + A = (numpy.arange(1234, dtype=numpy.float64)) + 1 + expect = A.sum() # NumPy sum reduction + got = sum_reduce(A) # cuda sum reduction + assert expect == got + +Lambda functions can also be used here:: + + sum_reduce = cuda.reduce(lambda a, b: a + b) + +The Reduce class +---------------- + +The ``reduce`` decorator creates an instance of the ``Reduce`` class. +Currently, ``reduce`` is an alias to ``Reduce``, but this behavior is not +guaranteed. + +.. autoclass:: numba.cuda.Reduce + :members: __init__, __call__ + :member-order: bysource diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/simulator.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/simulator.rst new file mode 100644 index 000000000..099ffc347 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/simulator.rst @@ -0,0 +1,104 @@ + +.. _simulator: + +================================================= +Debugging CUDA Python with the the CUDA Simulator +================================================= + +Numba includes a CUDA Simulator that implements most of the semantics in CUDA +Python using the Python interpreter and some additional Python code. This can +be used to debug CUDA Python code, either by adding print statements to your +code, or by using the debugger to step through the execution of an individual +thread. + +The simulator deliberately allows running non-CUDA code like starting a debugger +and printing arbitrary expressions for debugging purposes. Therefore, it is +best to start from code that compiles for the CUDA target, and then move over to +the simulator to investigate issues. + +Execution of kernels is performed by the simulator one block at a time. One +thread is spawned for each thread in the block, and scheduling of the execution +of these threads is left up to the operating system. + +Using the simulator +=================== + +The simulator is enabled by setting the environment variable +:envvar:`NUMBA_ENABLE_CUDASIM` to 1 prior to importing Numba. CUDA Python code +may then be executed as normal. The easiest way to use the debugger inside a +kernel is to only stop a single thread, otherwise the interaction with the +debugger is difficult to handle. For example, the kernel below will stop in +the thread ``<<<(3,0,0), (1, 0, 0)>>>``:: + + @cuda.jit + def vec_add(A, B, out): + x = cuda.threadIdx.x + bx = cuda.blockIdx.x + bdx = cuda.blockDim.x + if x == 1 and bx == 3: + from pdb import set_trace; set_trace() + i = bx * bdx + x + out[i] = A[i] + B[i] + +when invoked with a one-dimensional grid and one-dimensional blocks. + +Supported features +================== + +The simulator aims to provide as complete a simulation of execution on a real +GPU as possible - in particular, the following are supported: + +* Atomic operations +* Constant memory +* Local memory +* Shared memory: declarations of shared memory arrays must be on separate source + lines, since the simulator uses source line information to keep track of + allocations of shared memory across threads. +* Mapped arrays. +* Host and device memory operations: copying and setting memory. +* :func:`.syncthreads` is supported - however, in the case where divergent + threads enter different :func:`.syncthreads` calls, the launch will not fail, + but unexpected behaviour will occur. A future version of the simulator may + detect this condition. +* The stream API is supported, but all operations occur sequentially and + synchronously, unlike on a real device. Synchronising on a stream is therefore + a no-op. +* The event API is also supported, but provides no meaningful timing + information. +* Data transfer to and from the GPU - in particular, creating array objects with + :func:`.device_array` and :func:`.device_array_like`. The APIs for pinned memory + :func:`.pinned` and :func:`.pinned_array` are also supported, but no pinning + takes place. +* The driver API implementation of the list of GPU contexts (``cuda.gpus`` and + ``cuda.cudadrv.devices.gpus``) is supported, and reports a single GPU context. + This context can be closed and reset as the real one would. +* The :func:`.detect` function is supported, and reports one device called + `SIMULATOR`. +* Cooperative grids: A cooperative kernel can be launched, but with only one + block - the simulator always returns ``1`` from a kernel overload's + :meth:`~numba.cuda.dispatcher._Kernel.max_cooperative_grid_blocks` method. + +Some limitations of the simulator include: + +* It does not perform type checking/type inference. If any argument types to a + jitted function are incorrect, or if the specification of the type of any + local variables are incorrect, this will not be detected by the simulator. +* Only one GPU is simulated. +* Multithreaded accesses to a single GPU are not supported, and will result in + unexpected behaviour. +* Most of the driver API is unimplemented. +* It is not possible to link PTX code with CUDA Python functions. +* Warps and warp-level operations are not yet implemented. +* Because the simulator executes kernels using the Python interpreter, + structured array access by attribute that works with the hardware target may + fail in the simulator - see :ref:`structured-array-access`. +* Operations directly against device arrays are only partially supported, that + is, testing equality, less than, greater than, and basic mathematical + operations are supported, but many other operations, such as the in-place + operators and bit operators are not. +* The :func:`ffs() ` function only works correctly for values + that can be represented using 32-bit integers. + +Obviously, the speed of the simulator is also much lower than that of a real +device. It may be necessary to reduce the size of input data and the size of the +CUDA grid in order to make debugging with the simulator tractable. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/ufunc.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/ufunc.rst new file mode 100644 index 000000000..c690557fc --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/cuda/ufunc.rst @@ -0,0 +1,154 @@ +CUDA Ufuncs and Generalized Ufuncs +================================== + +This page describes the CUDA ufunc-like object. + +To support the programming pattern of CUDA programs, CUDA Vectorize and +GUVectorize cannot produce a conventional ufunc. Instead, a ufunc-like +object is returned. This object is a close analog but not fully +compatible with a regular NumPy ufunc. The CUDA ufunc adds support for +passing intra-device arrays (already on the GPU device) to reduce +traffic over the PCI-express bus. It also accepts a `stream` keyword +for launching in asynchronous mode. + +Example: Basic Example +------------------------ + +:: + + import math + from numba import vectorize, cuda + import numpy as np + + @vectorize(['float32(float32, float32, float32)', + 'float64(float64, float64, float64)'], + target='cuda') + def cu_discriminant(a, b, c): + return math.sqrt(b ** 2 - 4 * a * c) + + N = 10000 + dtype = np.float32 + + # prepare the input + A = np.array(np.random.sample(N), dtype=dtype) + B = np.array(np.random.sample(N) + 10, dtype=dtype) + C = np.array(np.random.sample(N), dtype=dtype) + + D = cu_discriminant(A, B, C) + + print(D) # print result + +Example: Calling Device Functions +---------------------------------- + +All CUDA ufunc kernels have the ability to call other CUDA device functions:: + + from numba import vectorize, cuda + + # define a device function + @cuda.jit('float32(float32, float32, float32)', device=True, inline=True) + def cu_device_fn(x, y, z): + return x ** y / z + + # define a ufunc that calls our device function + @vectorize(['float32(float32, float32, float32)'], target='cuda') + def cu_ufunc(x, y, z): + return cu_device_fn(x, y, z) + + +Generalized CUDA ufuncs +----------------------- + +Generalized ufuncs may be executed on the GPU using CUDA, analogous to +the CUDA ufunc functionality. This may be accomplished as follows:: + + from numba import guvectorize + + @guvectorize(['void(float32[:,:], float32[:,:], float32[:,:])'], + '(m,n),(n,p)->(m,p)', target='cuda') + def matmulcore(A, B, C): + ... + +There are times when the gufunc kernel uses too many of a GPU's +resources, which can cause the kernel launch to fail. The user can +explicitly control the maximum size of the thread block by setting +the `max_blocksize` attribute on the compiled gufunc object. + +:: + + from numba import guvectorize + + @guvectorize(..., target='cuda') + def very_complex_kernel(A, B, C): + ... + + very_complex_kernel.max_blocksize = 32 # limits to 32 threads per block + +.. comment + + Example: A Chunk at a Time + --------------------------- + + Partitioning your data into chunks allows computation and memory transfer + to be overlapped. This can increase the throughput of your ufunc and + enables your ufunc to operate on data that is larger than the memory + capacity of your GPU. For example: + + :: + + import math + from numba import vectorize, cuda + import numpy as np + + # the ufunc kernel + def discriminant(a, b, c): + return math.sqrt(b ** 2 - 4 * a * c) + + cu_discriminant = vectorize(['float32(float32, float32, float32)', + 'float64(float64, float64, float64)'], + target='cuda')(discriminant) + + N = int(1e+8) + dtype = np.float32 + + # prepare the input + A = np.array(np.random.sample(N), dtype=dtype) + B = np.array(np.random.sample(N) + 10, dtype=dtype) + C = np.array(np.random.sample(N), dtype=dtype) + D = np.empty(A.shape, dtype=A.dtype) + + # create a CUDA stream + stream = cuda.stream() + + chunksize = 1e+6 + chunkcount = N // chunksize + + # partition NumPy arrays into chunks + # no copying is performed + sA = np.split(A, chunkcount) + sB = np.split(B, chunkcount) + sC = np.split(C, chunkcount) + sD = np.split(D, chunkcount) + + device_ptrs = [] + + with stream.auto_synchronize(): + # every operation in this context with be launched asynchronously + # by using the CUDA stream + + # for each chunk + for a, b, c, d in zip(sA, sB, sC, sD): + # transfer to device + dA = cuda.to_device(a, stream) + dB = cuda.to_device(b, stream) + dC = cuda.to_device(c, stream) + dD = cuda.to_device(d, stream, copy=False) # no copying + # launch kernel + cu_discriminant(dA, dB, dC, out=dD, stream=stream) + # retrieve result + dD.copy_to_host(d, stream) + # store device pointers to prevent them from freeing before + # the kernel is scheduled + device_ptrs.extend([dA, dB, dC, dD]) + + # data is ready at this point inside D diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/architecture.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/architecture.rst new file mode 100644 index 000000000..3e9dffe1b --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/architecture.rst @@ -0,0 +1,909 @@ + +.. _architecture: + +================== +Numba architecture +================== + +Introduction +============ + +Numba is a compiler for Python bytecode with optional type-specialization. + +Suppose you enter a function like this into the standard Python interpreter +(henceforward referred to as "CPython"):: + + def add(a, b): + return a + b + +The interpreter will immediately parse the function and convert it into a +bytecode representation that describes how the CPython interpreter should +execute the function at a low level. For the example above, it looks +something like this:: + + >>> import dis + >>> dis.dis(add) + 2 0 LOAD_FAST 0 (a) + 3 LOAD_FAST 1 (b) + 6 BINARY_ADD + 7 RETURN_VALUE + + +CPython uses a stack-based interpreter (much like an HP calculator), so the +code first pushes two local variables onto the stack. The ``BINARY_ADD`` +opcode pops the top two arguments off the stack and makes a Python C API +function call that is equivalent to calling ``a.__add__(b)``. The result is +then pushed onto the top of the interpreter stack. Finally, the +``RETURN_VALUE`` opcode returns value on the top of the stack as the result of +the function call. + +Numba can take this bytecode and compile it to machine code that performs the +same operations as the CPython interpreter, treating ``a`` and ``b`` as +generic Python objects. The full semantics of Python are preserved, and the +compiled function can be used with any kind of objects that have the add +operator defined. When a Numba function is compiled this way, we say that it +has been compiled in :term:`object mode`, because the code still manipulates +Python objects. + +Numba code compiled in object mode is not much faster than executing the +original Python function in the CPython interpreter. However, if we +specialize the function to only run with certain data types, Numba can +generate much shorter and more efficient code that manipulates the data +natively without any calls into the Python C API. When code has been compiled +for specific data types so that the function body no longer relies on the +Python runtime, we say the function has been compiled in :term:`nopython mode`. +Numeric code compiled in nopython mode can be hundreds of times faster +than the original Python. + + +Compiler architecture +===================== + +Like many compilers, Numba can be conceptually divided into a +*frontend* and a *backend*. + +The Numba *frontend* comprises the stages which analyze the Python bytecode, +translate it to :term:`Numba IR` and perform various transformations and +analysis steps on the IR. One of the key steps is :term:`type inference`. +The frontend must succeed in typing all variables unambiguously in order +for the backend to generate code in :term:`nopython mode`, because the +backend uses type information to match appropriate code generators with +the values they operate on. + +The Numba *backend* walks the Numba IR resulting from the frontend analyses +and exploits the type information deduced by the type inference phase to +produce the right LLVM code for each encountered operation. After LLVM +code is produced, the LLVM library is asked to optimize it and generate +native processor code for the final, native function. + +There are other pieces besides the compiler frontend and backend, such +as the caching machinery for JIT functions. Those pieces are not considered +in this document. + + +Contexts +======== + +Numba is quite flexible, allowing it to generate code for different hardware +architectures like CPUs and GPUs. In order to support these different +applications, Numba uses a *typing context* and a *target context*. + +A *typing context* is used in the compiler frontend to perform type inference +on operations and values in the function. Similar typing contexts could be +used for many architectures because for nearly all cases, typing inference +is hardware-independent. However, Numba currently has a different typing +context for each target. + +A *target context* is used to generate the specific instruction sequence +required to operate on the Numba types identified during type inference. +Target contexts are architecture-specific and are flexible in defining +the execution model and available Python APIs. For example, Numba has a "cpu" +and a "cuda" context for those two kinds of architecture, and a "parallel" +context which produces multithreaded CPU code. + + +Compiler stages +=============== + +The :func:`~numba.jit` decorator in Numba ultimately calls +``numba.compiler.compile_extra()`` which compiles the Python function in a +multi-stage process, described below. + +Stage 1: Analyze bytecode +------------------------- + +At the start of compilation, the function bytecode is passed to an instance of +the Numba interpreter (``numba.interpreter``). The interpreter object +analyzes the bytecode to find the control flow graph (``numba.controlflow``). +The control flow graph (CFG) describes the ways that execution can move from one +block to the next inside the function as a result of loops and branches. + +The data flow analysis (``numba.dataflow``) takes the control flow graph and +traces how values get pushed and popped off the Python interpreter stack for +different code paths. This is important to understand the lifetimes of +variables on the stack, which are needed in Stage 2. + +If you set the environment variable ``NUMBA_DUMP_CFG`` to 1, Numba will dump +the results of the control flow graph analysis to the screen. Our ``add()`` +example is pretty boring, since there is only one statement block:: + + CFG adjacency lists: + {0: []} + CFG dominators: + {0: set([0])} + CFG post-dominators: + {0: set([0])} + CFG back edges: [] + CFG loops: + {} + CFG node-to-loops: + {0: []} + +A function with more complex flow control will have a more interesting +control flow graph. This function:: + + def doloops(n): + acc = 0 + for i in range(n): + acc += 1 + if n == 10: + break + return acc + +compiles to this bytecode:: + + 9 0 LOAD_CONST 1 (0) + 3 STORE_FAST 1 (acc) + + 10 6 SETUP_LOOP 46 (to 55) + 9 LOAD_GLOBAL 0 (range) + 12 LOAD_FAST 0 (n) + 15 CALL_FUNCTION 1 + 18 GET_ITER + >> 19 FOR_ITER 32 (to 54) + 22 STORE_FAST 2 (i) + + 11 25 LOAD_FAST 1 (acc) + 28 LOAD_CONST 2 (1) + 31 INPLACE_ADD + 32 STORE_FAST 1 (acc) + + 12 35 LOAD_FAST 0 (n) + 38 LOAD_CONST 3 (10) + 41 COMPARE_OP 2 (==) + 44 POP_JUMP_IF_FALSE 19 + + 13 47 BREAK_LOOP + 48 JUMP_ABSOLUTE 19 + 51 JUMP_ABSOLUTE 19 + >> 54 POP_BLOCK + + 14 >> 55 LOAD_FAST 1 (acc) + 58 RETURN_VALUE + +The corresponding CFG for this bytecode is:: + + CFG adjacency lists: + {0: [6], 6: [19], 19: [54, 22], 22: [19, 47], 47: [55], 54: [55], 55: []} + CFG dominators: + {0: set([0]), + 6: set([0, 6]), + 19: set([0, 6, 19]), + 22: set([0, 6, 19, 22]), + 47: set([0, 6, 19, 22, 47]), + 54: set([0, 6, 19, 54]), + 55: set([0, 6, 19, 55])} + CFG post-dominators: + {0: set([0, 6, 19, 55]), + 6: set([6, 19, 55]), + 19: set([19, 55]), + 22: set([22, 55]), + 47: set([47, 55]), + 54: set([54, 55]), + 55: set([55])} + CFG back edges: [(22, 19)] + CFG loops: + {19: Loop(entries=set([6]), exits=set([54, 47]), header=19, body=set([19, 22]))} + CFG node-to-loops: + {0: [], 6: [], 19: [19], 22: [19], 47: [], 54: [], 55: []} + +The numbers in the CFG refer to the bytecode offsets shown just to the left +of the opcode names above. + +.. _arch_generate_numba_ir: + +Stage 2: Generate the Numba IR +------------------------------ + +Once the control flow and data analyses are complete, the Numba interpreter +can step through the bytecode and translate it into an Numba-internal +intermediate representation. This translation process changes the function +from a stack machine representation (used by the Python interpreter) to a +register machine representation (used by LLVM). + +Although the IR is stored in memory as a tree of objects, it can be serialized +to a string for debugging. If you set the environment variable +``NUMBA_DUMP_IR`` equal to 1, the Numba IR will be dumped to the screen. For +the ``add()`` function described above, the Numba IR looks like:: + + label 0: + a = arg(0, name=a) ['a'] + b = arg(1, name=b) ['b'] + $0.3 = a + b ['$0.3', 'a', 'b'] + del b [] + del a [] + $0.4 = cast(value=$0.3) ['$0.3', '$0.4'] + del $0.3 [] + return $0.4 ['$0.4'] + +The ``del`` instructions are produced by :ref:`live variable analysis`. +Those instructions ensure references are not leaked. +In :term:`nopython mode`, some objects are tracked by the Numba runtime and +some are not. For tracked objects, a dereference operation is emitted; +otherwise, the instruction is an no-op. +In :term:`object mode` each variable contains an owned reference to a PyObject. + + +.. _`rewrite-untyped-ir`: + +Stage 3: Rewrite untyped IR +--------------------------- + +Before running type inference, it may be desired to run certain +transformations on the Numba IR. One such example is to detect ``raise`` +statements which have an implicitly constant argument, so as to +support them in :term:`nopython mode`. Let's say you compile the +following function with Numba:: + + def f(x): + if x == 0: + raise ValueError("x cannot be zero") + +If you set the :envvar:`NUMBA_DUMP_IR` environment variable to ``1``, +you'll see the IR being rewritten before the type inference phase:: + + REWRITING: + del $0.3 [] + $12.1 = global(ValueError: ) ['$12.1'] + $const12.2 = const(str, x cannot be zero) ['$const12.2'] + $12.3 = call $12.1($const12.2) ['$12.1', '$12.3', '$const12.2'] + del $const12.2 [] + del $12.1 [] + raise $12.3 ['$12.3'] + ____________________________________________________________ + del $0.3 [] + $12.1 = global(ValueError: ) ['$12.1'] + $const12.2 = const(str, x cannot be zero) ['$const12.2'] + $12.3 = call $12.1($const12.2) ['$12.1', '$12.3', '$const12.2'] + del $const12.2 [] + del $12.1 [] + raise ('x cannot be zero') [] + + +.. _arch_type_inference: + +Stage 4: Infer types +-------------------- + +Now that the Numba IR has been generated, type analysis can be performed. The +types of the function arguments can be taken either from the explicit function +signature given in the ``@jit`` decorator (such as ``@jit('float64(float64, +float64)')``), or they can be taken from the types of the actual function +arguments if compilation is happening when the function is first called. + +The type inference engine is found in ``numba.typeinfer``. Its job is to +assign a type to every intermediate variable in the Numba IR. The result of +this pass can be seen by setting the :envvar:`NUMBA_DUMP_ANNOTATION` +environment variable to 1: + +.. code-block:: python + + -----------------------------------ANNOTATION----------------------------------- + # File: archex.py + # --- LINE 4 --- + + @jit(nopython=True) + + # --- LINE 5 --- + + def add(a, b): + + # --- LINE 6 --- + # label 0 + # a = arg(0, name=a) :: int64 + # b = arg(1, name=b) :: int64 + # $0.3 = a + b :: int64 + # del b + # del a + # $0.4 = cast(value=$0.3) :: int64 + # del $0.3 + # return $0.4 + + return a + b + + +If type inference fails to find a consistent type assignment for all the +intermediate variables, it will label every variable as type ``pyobject`` and +fall back to object mode. Type inference can fail when unsupported Python +types, language features, or functions are used in the function body. + + +.. _`rewrite-typed-ir`: + +Stage 5a: Rewrite typed IR +-------------------------- + +This pass's purpose is to perform any high-level optimizations that still +require, or could at least benefit from, Numba IR type information. + +One example of a problem domain that isn't as easily optimized once +lowered is the domain of multidimensional array operations. When +Numba lowers an array operation, Numba treats the operation like a +full ufunc kernel. During lowering a single array operation, Numba +generates an inline broadcasting loop that creates a new result array. +Then Numba generates an application loop that applies the operator +over the array inputs. Recognizing and rewriting these loops once +they are lowered into LLVM is hard, if not impossible. + +An example pair of optimizations in the domain of array operators is +loop fusion and shortcut deforestation. When the optimizer +recognizes that the output of one array operator is being fed into +another array operator, and only to that array operator, it can fuse +the two loops into a single loop. The optimizer can further eliminate +the temporary array allocated for the initial operation by directly +feeding the result of the first operation into the second, skipping +the store and load to the intermediate array. This elimination is +known as shortcut deforestation. Numba currently uses the rewrite +pass to implement these array optimizations. For more information, +please consult the ":ref:`case-study-array-expressions`" subsection, +later in this document. + +One can see the result of rewriting by setting the +:envvar:`NUMBA_DUMP_IR` environment variable to a non-zero value (such +as 1). The following example shows the output of the rewrite pass as +it recognizes an array expression consisting of a multiply and add, +and outputs a fused kernel as a special operator, :func:`arrayexpr`:: + + ______________________________________________________________________ + REWRITING: + a0 = arg(0, name=a0) ['a0'] + a1 = arg(1, name=a1) ['a1'] + a2 = arg(2, name=a2) ['a2'] + $0.3 = a0 * a1 ['$0.3', 'a0', 'a1'] + del a1 [] + del a0 [] + $0.5 = $0.3 + a2 ['$0.3', '$0.5', 'a2'] + del a2 [] + del $0.3 [] + $0.6 = cast(value=$0.5) ['$0.5', '$0.6'] + del $0.5 [] + return $0.6 ['$0.6'] + ____________________________________________________________ + a0 = arg(0, name=a0) ['a0'] + a1 = arg(1, name=a1) ['a1'] + a2 = arg(2, name=a2) ['a2'] + $0.5 = arrayexpr(ty=array(float64, 1d, C), expr=('+', [('*', [Var(a0, test.py (14)), Var(a1, test.py (14))]), Var(a2, test.py (14))])) ['$0.5', 'a0', 'a1', 'a2'] + del a0 [] + del a1 [] + del a2 [] + $0.6 = cast(value=$0.5) ['$0.5', '$0.6'] + del $0.5 [] + return $0.6 ['$0.6'] + ______________________________________________________________________ + +Following this rewrite, Numba lowers the array expression into a new +ufunc-like function that is inlined into a single loop that only +allocates a single result array. + + +.. _`parallel-accelerator`: + +Stage 5b: Perform Automatic Parallelization +------------------------------------------- + +This pass is only performed if the ``parallel`` option in the :func:`~numba.jit` +decorator is set to ``True``. This pass finds parallelism implicit in the +semantics of operations in the Numba IR and replaces those operations +with explicitly parallel representations of those operations using a +special `parfor` operator. Then, optimizations are performed to maximize +the number of parfors that are adjacent to each other such that they can +then be fused together into one parfor that takes only one pass over the +data and will thus typically have better cache performance. Finally, +during lowering, these parfor operators are converted to a form similar +to guvectorize to implement the actual parallelism. + +The automatic parallelization pass has a number of sub-passes, many of +which are controllable using a dictionary of options passed via the +``parallel`` keyword argument to :func:`~numba.jit`:: + + { 'comprehension': True/False, # parallel comprehension + 'prange': True/False, # parallel for-loop + 'numpy': True/False, # parallel numpy calls + 'reduction': True/False, # parallel reduce calls + 'setitem': True/False, # parallel setitem + 'stencil': True/False, # parallel stencils + 'fusion': True/False, # enable fusion or not + } + +The default is set to `True` for all of them. The sub-passes are +described in more detail in the following paragraphs. + +#. CFG Simplification + Sometimes Numba IR will contain chains of blocks containing no loops which + are merged in this sub-pass into single blocks. This sub-pass simplifies + subsequent analysis of the IR. + +#. Numpy canonicalization + Some Numpy operations can be written as operations on Numpy objects (e.g. + ``arr.sum()``), or as calls to Numpy taking those objects (e.g. + ``numpy.sum(arr)``). This sub-pass converts all such operations to the + latter form for cleaner subsequent analysis. + +#. Array analysis + A critical requirement for later parfor fusion is that parfors have + identical iteration spaces and these iteration spaces typically correspond + to the sizes of the dimensions of Numpy arrays. In this sub-pass, the IR is + analyzed to determine equivalence classes for the dimensions of Numpy + arrays. Consider the example, ``a = b + 1``, where ``a`` and ``b`` are both + Numpy arrays. Here, we know that each dimension of ``a`` must have the same + equivalence class as the corresponding dimension of ``b``. Typically, + routines rich in Numpy operations will enable equivalence classes to be + fully known for all arrays created within a function. + + Array analysis will also reason about size equivalence for slice selection, + and boolean array masking (one dimensional only). For example, it is able to + infer that ``a[1 : n-1]`` is of the same size as ``b[0 : n-2]``. + + Array analysis may also insert safety assumptions to ensure pre-conditions + related to array sizes are met before an operation can be parallelized. + For example, ``np.dot(X, w)`` between a 2-D matrix ``X`` and a 1-D vector ``w`` + requires that the second dimension of ``X`` is of the same size as ``w``. + Usually this kind of runtime check is automatically inserted, but if array + analysis can infer such equivalence, it will skip them. + + Users can even help array analysis by turning implicit knowledge about + array sizes into explicit assertions. For example, in the code below: + + .. code-block:: python + + @numba.njit(parallel=True) + def logistic_regression(Y, X, w, iterations): + assert(X.shape == (Y.shape[0], w.shape[0])) + for i in range(iterations): + w -= np.dot(((1.0 / (1.0 + np.exp(-Y * np.dot(X, w))) - 1.0) * Y), X) + return w + + Making the explicit assertion helps eliminate all bounds checks in the + rest of the function. + +#. ``prange()`` to parfor + The use of prange (:ref:`numba-prange`) in a for loop is an explicit + indication from the programmer that all iterations of the for loop can + execute in parallel. In this sub-pass, we analyze the CFG to locate loops + and to convert those loops controlled by a prange object to the explicit + `parfor` operator. Each explicit parfor operator consists of: + + a. A list of loop nest information that describes the iteration space of the + parfor. Each entry in the loop nest list contains an indexing variable, + the start of the range, the end of the range, and the step value for each + iteration. + #. An initialization (init) block which contains instructions to be executed + one time before the parfor begins executing. + #. A loop body comprising a set of basic blocks that correspond to the body + of the loop and compute one point in the iteration space. + #. The index variables used for each dimension of the iteration space. + + For parfor `pranges`, the loop nest is a single entry where the start, + stop, and step fields come from the specified `prange`. The init block is + empty for `prange` parfors and the loop body is the set of blocks in the + loop minus the loop header. + + With parallelization on, array comprehensions (:ref:`pysupported-comprehension`) + will also be translated to prange so as to run in parallel. This behavior + be disabled by setting ``parallel={'comprehension': False}``. + + Likewise, the overall `prange` to `parfor` translation can be disabled by + setting ``parallel={'prange': False}``, in which case `prange` is treated the + same as `range`. + +#. Numpy to parfor + In this sub-pass, Numpy functions such as ``ones``, ``zeros``, ``dot``, most + of the random number generating functions, arrayexprs (from Section + :ref:`rewrite-typed-ir`), and Numpy reductions are converted to parfors. + Generally, this conversion creates the loop nest list, whose length is equal + to the number of dimensions of the left-hand side of the assignment + instruction in the IR. The number and size of the dimensions of the + left-hand-side array is taken from the array analysis information generated + in sub-pass 3 above. An instruction to create the result Numpy array is + generated and stored in the new parfor's init block. A basic block is + created for the loop body and an instruction is generated and added to the + end of that block to store the result of the computation into the array at + the current point in the iteration space. The result stored into the array + depends on the operation that is being converted. For example, for ``ones``, + the value stored is a constant 1. For calls to generate a random array, the + value comes from a call to the same random number function but with the size + parameter dropped and therefore returning a scalar. For arrayexpr operators, + the arrayexpr tree is converted to Numba IR and the value at the root of that + expression tree is used to write into the output array. The translation from + Numpy functions and arrayexpr operators to `parfor` can be disabled by + setting ``parallel={'numpy': False}``. + + For reductions, the loop nest list is similarly created using the array + analysis information for the array being reduced. In the init block, the + initial value is assigned to the reduction variable. The loop body consists + of a single block in which the next value in the iteration space is fetched + and the reduction operation is applied to that value and the current + reduction value and the result stored back into the reduction value. + The translation of reduction functions to `parfor` can be disabled by + setting ``parallel={'reduction': False}``. + + Setting the :envvar:`NUMBA_DEBUG_ARRAY_OPT_STATS` environment variable to + 1 will show some statistics about parfor conversions in general. + +#. Setitem to parfor + Setting a range of array elements using a slice or boolean array selection + can also run in parallel. Statement such as ``A[P] = B[Q]`` + (or a simpler case ``A[P] = c``, where ``c`` is a scalar) is translated to + `parfor` if one of the following conditions is met: + + a. ``P`` and ``Q`` are slices or multi-dimensional selector involving + scalar and slices, and ``A[P]`` and ``B[Q]`` are considered size + equivalent by array analysis. Only 2-value slice/range is supported, + 3-value with a step will not be translated to `parfor`. + #. ``P`` and ``Q`` are the same boolean array. + + This translation can be disabled by setting ``parallel={'setitem': False}``. + +#. Simplification + Performs a copy propagation and dead code elimination pass. + +#. Fusion + This sub-pass first processes each basic block and does a reordering of the + instructions within the block with the goal of pushing parfors lower in the + block and lifting non-parfors towards the start of the block. In practice, + this approach does a good job of getting parfors adjacent to each other in + the IR, which enables more parfors to then be fused. During parfor fusion, + each basic block is repeatedly scanned until no further fusion is possible. + During this scan, each set of adjacent instructions are considered. + Adjacent instructions are fused together if: + + a. they are both parfors + #. the parfors' loop nests are the same size and the array equivalence + classes for each dimension of the loop nests are the same, and + #. the first parfor does not create a reduction variable used by the + second parfor. + + The two parfors are fused together by adding the second parfor's init block + to the first's, merging the two parfors' loop bodies together and replacing + the instances of the second parfor's loop index variables in the second + parfor's body with the loop index variables for the first parfor. + Fusion can be disabled by setting ``parallel={'fusion': False}``. + + Setting the :envvar:`NUMBA_DEBUG_ARRAY_OPT_STATS` environment variable to + 1 will show some statistics about parfor fusions. + +#. Push call objects and compute parfor parameters + In the lowering phase described in Section :ref:`lowering`, each parfor + becomes a separate function executed in parallel in ``guvectorize`` + (:ref:`guvectorize`) style. Since parfors may use variables defined + previously in a function, when those parfors become separate functions, + those variables must be passed to the parfor function as parameters. In + this sub-pass, a use-def scan is made over each parfor body and liveness + information is used to determine which variables are used but not defined by + the parfor. That list of variables is stored here in the parfor for use + during lowering. Function variables are a special case in this process + since function variables cannot be passed to functions compiled in nopython + mode. Instead, for function variables, this sub-pass pushes the assignment + instruction to the function variable into the parfor body so that those do + not need to be passed as parameters. + + To see the intermediate IR between the above sub-passes and other debugging + information, set the :envvar:`NUMBA_DEBUG_ARRAY_OPT` environment variable to + 1. For the example in Section :ref:`rewrite-typed-ir`, the following IR with + a parfor is generated during this stage:: + + ______________________________________________________________________ + label 0: + a0 = arg(0, name=a0) ['a0'] + a0_sh_attr0.0 = getattr(attr=shape, value=a0) ['a0', 'a0_sh_attr0.0'] + $consta00.1 = const(int, 0) ['$consta00.1'] + a0size0.2 = static_getitem(value=a0_sh_attr0.0, index_var=$consta00.1, index=0) ['$consta00.1', 'a0_sh_attr0.0', 'a0size0.2'] + a1 = arg(1, name=a1) ['a1'] + a1_sh_attr0.3 = getattr(attr=shape, value=a1) ['a1', 'a1_sh_attr0.3'] + $consta10.4 = const(int, 0) ['$consta10.4'] + a1size0.5 = static_getitem(value=a1_sh_attr0.3, index_var=$consta10.4, index=0) ['$consta10.4', 'a1_sh_attr0.3', 'a1size0.5'] + a2 = arg(2, name=a2) ['a2'] + a2_sh_attr0.6 = getattr(attr=shape, value=a2) ['a2', 'a2_sh_attr0.6'] + $consta20.7 = const(int, 0) ['$consta20.7'] + a2size0.8 = static_getitem(value=a2_sh_attr0.6, index_var=$consta20.7, index=0) ['$consta20.7', 'a2_sh_attr0.6', 'a2size0.8'] + ---begin parfor 0--- + index_var = parfor_index.9 + LoopNest(index_variable=parfor_index.9, range=0,a0size0.2,1 correlation=5) + init block: + $np_g_var.10 = global(np: ) ['$np_g_var.10'] + $empty_attr_attr.11 = getattr(attr=empty, value=$np_g_var.10) ['$empty_attr_attr.11', '$np_g_var.10'] + $np_typ_var.12 = getattr(attr=float64, value=$np_g_var.10) ['$np_g_var.10', '$np_typ_var.12'] + $0.5 = call $empty_attr_attr.11(a0size0.2, $np_typ_var.12, kws=(), func=$empty_attr_attr.11, vararg=None, args=[Var(a0size0.2, test2.py (7)), Var($np_typ_var.12, test2.py (7))]) ['$0.5', '$empty_attr_attr.11', '$np_typ_var.12', 'a0size0.2'] + label 1: + $arg_out_var.15 = getitem(value=a0, index=parfor_index.9) ['$arg_out_var.15', 'a0', 'parfor_index.9'] + $arg_out_var.16 = getitem(value=a1, index=parfor_index.9) ['$arg_out_var.16', 'a1', 'parfor_index.9'] + $arg_out_var.14 = $arg_out_var.15 * $arg_out_var.16 ['$arg_out_var.14', '$arg_out_var.15', '$arg_out_var.16'] + $arg_out_var.17 = getitem(value=a2, index=parfor_index.9) ['$arg_out_var.17', 'a2', 'parfor_index.9'] + $expr_out_var.13 = $arg_out_var.14 + $arg_out_var.17 ['$arg_out_var.14', '$arg_out_var.17', '$expr_out_var.13'] + $0.5[parfor_index.9] = $expr_out_var.13 ['$0.5', '$expr_out_var.13', 'parfor_index.9'] + ----end parfor 0---- + $0.6 = cast(value=$0.5) ['$0.5', '$0.6'] + return $0.6 ['$0.6'] + ______________________________________________________________________ + + .. _`lowering`: + +Stage 6a: Generate nopython LLVM IR +----------------------------------- + +If type inference succeeds in finding a Numba type for every intermediate +variable, then Numba can (potentially) generate specialized native code. This +process is called :term:`lowering`. The Numba IR tree is translated into +LLVM IR by using helper classes from `llvmlite `_. +The machine-generated LLVM IR can seem unnecessarily verbose, but the LLVM +toolchain is able to optimize it quite easily into compact, efficient code. + +The basic lowering algorithm is generic, but the specifics of how particular +Numba IR nodes are translated to LLVM instructions is handled by the +target context selected for compilation. The default target context is +the "cpu" context, defined in ``numba.targets.cpu``. + +The LLVM IR can be displayed by setting the :envvar:`NUMBA_DUMP_LLVM` environment +variable to 1. For the "cpu" context, our ``add()`` example would look like: + +.. code-block:: llvm + + define i32 @"__main__.add$1.int64.int64"(i64* %"retptr", + {i8*, i32}** %"excinfo", + i8* %"env", + i64 %"arg.a", i64 %"arg.b") + { + entry: + %"a" = alloca i64 + %"b" = alloca i64 + %"$0.3" = alloca i64 + %"$0.4" = alloca i64 + br label %"B0" + B0: + store i64 %"arg.a", i64* %"a" + store i64 %"arg.b", i64* %"b" + %".8" = load i64* %"a" + %".9" = load i64* %"b" + %".10" = add i64 %".8", %".9" + store i64 %".10", i64* %"$0.3" + %".12" = load i64* %"$0.3" + store i64 %".12", i64* %"$0.4" + %".14" = load i64* %"$0.4" + store i64 %".14", i64* %"retptr" + ret i32 0 + } + +The post-optimization LLVM IR can be output by setting +:envvar:`NUMBA_DUMP_OPTIMIZED` to 1. The optimizer shortens the code +generated above quite significantly: + +.. code-block:: llvm + + define i32 @"__main__.add$1.int64.int64"(i64* nocapture %retptr, + { i8*, i32 }** nocapture readnone %excinfo, + i8* nocapture readnone %env, + i64 %arg.a, i64 %arg.b) + { + entry: + %.10 = add i64 %arg.b, %arg.a + store i64 %.10, i64* %retptr, align 8 + ret i32 0 + } + +If created during :ref:`parallel-accelerator`, parfor operations are +lowered in the following manner. First, instructions in the parfor's init +block are lowered into the existing function using the normal lowering code. +Second, the loop body of the parfor is turned into a separate GUFunc. +Third, code is emitted for the current function to call the parallel GUFunc. + +To create a GUFunc from the parfor body, the signature of the GUFunc is +created by taking the parfor parameters as identified in step 9 of +Stage :ref:`parallel-accelerator` and adding to that a special `schedule` +parameter, across which the GUFunc will be parallelized. The schedule +parameter is in effect a static schedule mapping portions of the parfor +iteration space to Numba threads and so the length of the schedule +array is the same as the number of configured Numba threads. To make +this process easier and somewhat less dependent on changes to Numba IR, +this stage creates a Python function as text that contains the parameters +to the GUFunc and iteration code that takes the current schedule entry +and loops through the specified portion of the iteration space. In the +body of that loop, a special sentinel is inserted for subsequent easy +location. This code that handles the processing of the iteration space +is then ``eval``'ed into existence and the Numba compiler's run_frontend +function is called to generate IR. That IR is scanned to locate the +sentinel and the sentinel is replaced with the loop body of the parfor. +Then, the process of creating the parallel GUFunc is completed by +compiling this merged IR with the Numba compiler's ``compile_ir`` function. + +To call the parallel GUFunc, the static schedule must be created. +Code is inserted to call a function named ``do_scheduling.`` This function +is called with the size of each of the parfor's dimensions and the number +`N` of configured Numba threads (:envvar:`NUMBA_NUM_THREADS`). +The ``do_scheduling`` function will divide +the iteration space into N approximately equal sized regions (linear for +1D, rectangular for 2D, or hyperrectangles for 3+D) and the resulting +schedule is passed to the parallel GUFunc. The number of threads +dedicated to a given dimension of the full iteration space is roughly +proportional to the ratio of the size of the given dimension to the sum +of the sizes of all the dimensions of the iteration space. + +Parallel reductions are not natively provided by GUFuncs but the parfor +lowering strategy allows us to use GUFuncs in a way that reductions can +be performed in parallel. To accomplish this, for each reduction variable +computed by a parfor, the parallel GUFunc and the code that calls it are +modified to make the scalar reduction variable into an array of reduction +variables whose length is equal to the number of Numba threads. In addition, +the GUFunc still contains a scalar version of the reduction variable that +is updated by the parfor body during each iteration. One time at the +end of the GUFunc this local reduction variable is copied into the +reduction array. In this way, false sharing of the reduction array is +prevented. Code is also inserted into the main +function after the parallel GUFunc has returned that does a reduction +across this smaller reduction array and this final reduction value is +then stored into the original scalar reduction variable. + +The GUFunc corresponding to the example from Section :ref:`parallel-accelerator` +can be seen below:: + + ______________________________________________________________________ + label 0: + sched.29 = arg(0, name=sched) ['sched.29'] + a0 = arg(1, name=a0) ['a0'] + a1 = arg(2, name=a1) ['a1'] + a2 = arg(3, name=a2) ['a2'] + _0_5 = arg(4, name=_0_5) ['_0_5'] + $3.1.24 = global(range: ) ['$3.1.24'] + $const3.3.21 = const(int, 0) ['$const3.3.21'] + $3.4.23 = getitem(value=sched.29, index=$const3.3.21) ['$3.4.23', '$const3.3.21', 'sched.29'] + $const3.6.28 = const(int, 1) ['$const3.6.28'] + $3.7.27 = getitem(value=sched.29, index=$const3.6.28) ['$3.7.27', '$const3.6.28', 'sched.29'] + $const3.8.32 = const(int, 1) ['$const3.8.32'] + $3.9.31 = $3.7.27 + $const3.8.32 ['$3.7.27', '$3.9.31', '$const3.8.32'] + $3.10.36 = call $3.1.24($3.4.23, $3.9.31, kws=[], func=$3.1.24, vararg=None, args=[Var($3.4.23, (2)), Var($3.9.31, (2))]) ['$3.1.24', '$3.10.36', '$3.4.23', '$3.9.31'] + $3.11.30 = getiter(value=$3.10.36) ['$3.10.36', '$3.11.30'] + jump 1 [] + label 1: + $28.2.35 = iternext(value=$3.11.30) ['$28.2.35', '$3.11.30'] + $28.3.25 = pair_first(value=$28.2.35) ['$28.2.35', '$28.3.25'] + $28.4.40 = pair_second(value=$28.2.35) ['$28.2.35', '$28.4.40'] + branch $28.4.40, 2, 3 ['$28.4.40'] + label 2: + $arg_out_var.15 = getitem(value=a0, index=$28.3.25) ['$28.3.25', '$arg_out_var.15', 'a0'] + $arg_out_var.16 = getitem(value=a1, index=$28.3.25) ['$28.3.25', '$arg_out_var.16', 'a1'] + $arg_out_var.14 = $arg_out_var.15 * $arg_out_var.16 ['$arg_out_var.14', '$arg_out_var.15', '$arg_out_var.16'] + $arg_out_var.17 = getitem(value=a2, index=$28.3.25) ['$28.3.25', '$arg_out_var.17', 'a2'] + $expr_out_var.13 = $arg_out_var.14 + $arg_out_var.17 ['$arg_out_var.14', '$arg_out_var.17', '$expr_out_var.13'] + _0_5[$28.3.25] = $expr_out_var.13 ['$28.3.25', '$expr_out_var.13', '_0_5'] + jump 1 [] + label 3: + $const44.1.33 = const(NoneType, None) ['$const44.1.33'] + $44.2.39 = cast(value=$const44.1.33) ['$44.2.39', '$const44.1.33'] + return $44.2.39 ['$44.2.39'] + ______________________________________________________________________ + + +Stage 6b: Generate object mode LLVM IR +-------------------------------------- + +If type inference fails to find Numba types for all values inside a function, +the function will be compiled in object mode. The generated LLVM will be +significantly longer, as the compiled code will need to make calls to the +`Python C API `_ to perform basically all +operations. The optimized LLVM for our example ``add()`` function is: + +.. code-block:: llvm + + @PyExc_SystemError = external global i8 + @".const.Numba_internal_error:_object_mode_function_called_without_an_environment" = internal constant [73 x i8] c"Numba internal error: object mode function called without an environment\00" + @".const.name_'a'_is_not_defined" = internal constant [24 x i8] c"name 'a' is not defined\00" + @PyExc_NameError = external global i8 + @".const.name_'b'_is_not_defined" = internal constant [24 x i8] c"name 'b' is not defined\00" + + define i32 @"__main__.add$1.pyobject.pyobject"(i8** nocapture %retptr, { i8*, i32 }** nocapture readnone %excinfo, i8* readnone %env, i8* %arg.a, i8* %arg.b) { + entry: + %.6 = icmp eq i8* %env, null + br i1 %.6, label %entry.if, label %entry.endif, !prof !0 + + entry.if: ; preds = %entry + tail call void @PyErr_SetString(i8* @PyExc_SystemError, i8* getelementptr inbounds ([73 x i8]* @".const.Numba_internal_error:_object_mode_function_called_without_an_environment", i64 0, i64 0)) + ret i32 -1 + + entry.endif: ; preds = %entry + tail call void @Py_IncRef(i8* %arg.a) + tail call void @Py_IncRef(i8* %arg.b) + %.21 = icmp eq i8* %arg.a, null + br i1 %.21, label %B0.if, label %B0.endif, !prof !0 + + B0.if: ; preds = %entry.endif + tail call void @PyErr_SetString(i8* @PyExc_NameError, i8* getelementptr inbounds ([24 x i8]* @".const.name_'a'_is_not_defined", i64 0, i64 0)) + tail call void @Py_DecRef(i8* null) + tail call void @Py_DecRef(i8* %arg.b) + ret i32 -1 + + B0.endif: ; preds = %entry.endif + %.30 = icmp eq i8* %arg.b, null + br i1 %.30, label %B0.endif1, label %B0.endif1.1, !prof !0 + + B0.endif1: ; preds = %B0.endif + tail call void @PyErr_SetString(i8* @PyExc_NameError, i8* getelementptr inbounds ([24 x i8]* @".const.name_'b'_is_not_defined", i64 0, i64 0)) + tail call void @Py_DecRef(i8* %arg.a) + tail call void @Py_DecRef(i8* null) + ret i32 -1 + + B0.endif1.1: ; preds = %B0.endif + %.38 = tail call i8* @PyNumber_Add(i8* %arg.a, i8* %arg.b) + %.39 = icmp eq i8* %.38, null + br i1 %.39, label %B0.endif1.1.if, label %B0.endif1.1.endif, !prof !0 + + B0.endif1.1.if: ; preds = %B0.endif1.1 + tail call void @Py_DecRef(i8* %arg.a) + tail call void @Py_DecRef(i8* %arg.b) + ret i32 -1 + + B0.endif1.1.endif: ; preds = %B0.endif1.1 + tail call void @Py_DecRef(i8* %arg.b) + tail call void @Py_DecRef(i8* %arg.a) + tail call void @Py_IncRef(i8* %.38) + tail call void @Py_DecRef(i8* %.38) + store i8* %.38, i8** %retptr, align 8 + ret i32 0 + } + + declare void @PyErr_SetString(i8*, i8*) + + declare void @Py_IncRef(i8*) + + declare void @Py_DecRef(i8*) + + declare i8* @PyNumber_Add(i8*, i8*) + + +The careful reader might notice several unnecessary calls to ``Py_IncRef`` +and ``Py_DecRef`` in the generated code. Currently Numba isn't able to +optimize those away. + +Object mode compilation will also attempt to identify loops which can be +extracted and statically-typed for "nopython" compilation. This process is +called *loop-lifting*, and results in the creation of a hidden nopython mode +function just containing the loop which is then called from the original +function. Loop-lifting helps improve the performance of functions that +need to access uncompilable code (such as I/O or plotting code) but still +contain a time-intensive section of compilable code. + +Stage 7: Compile LLVM IR to machine code +---------------------------------------- + +In both :term:`object mode` and :term:`nopython mode`, the generated LLVM IR +is compiled by the LLVM JIT compiler and the machine code is loaded into +memory. A Python wrapper is also created (defined in +``numba.dispatcher.Dispatcher``) which can do the dynamic dispatch to the +correct version of the compiled function if multiple type specializations +were generated (for example, for both ``float32`` and ``float64`` versions +of the same function). + +The machine assembly code generated by LLVM can be dumped to the screen by +setting the :envvar:`NUMBA_DUMP_ASSEMBLY` environment variable to 1: + +.. code-block:: gas + + .globl __main__.add$1.int64.int64 + .align 16, 0x90 + .type __main__.add$1.int64.int64,@function + __main__.add$1.int64.int64: + addq %r8, %rcx + movq %rcx, (%rdi) + xorl %eax, %eax + retq + +The assembly output will also include the generated wrapper function that +translates the Python arguments to native data types. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/caching.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/caching.rst new file mode 100644 index 000000000..29dad8f52 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/caching.rst @@ -0,0 +1,111 @@ +.. _developer-caching: + +================ +Notes on Caching +================ + +Numba supports caching of compiled functions into the filesystem for future +use of the same functions. + + +The Implementation +================== + +Caching is done by saving the compiled *object code*, the ELF object of the +executable code. By using the *object code*, cached functions have minimal +overhead because no compilation is needed. The cached data is saved under the +cache directory (see :envvar:`NUMBA_CACHE_DIR`). The index of the cache is +stored in a ``.nbi`` file, with one index per function, and it lists all the +overloaded signatures compiled for the function. The *object code* is stored in +files with an ``.nbc`` extension, one file per overload. The data in both files +is serialized with :mod:`pickle`. + +.. note:: On Python <=3.7, Numba extends ``pickle`` using the pure-Python + pickler. To use the faster C Pickler, install ``pickle5`` + from ``pip``. ``pickle5`` backports Python 3.8 pickler features. + + +Requirements for Cacheability +----------------------------- + +Developers should note the requirements of a function to permit it to be cached +to ensure that the features they are working on are compatible with caching. + +Requirements for cacheable function: + +- The LLVM module must be *self-contained*, meaning that it cannot rely on + other compiled units without linking to them. +- The only allowed external symbols are from the + :ref:`NRT ` or other common symbols from system libraries + (i.e. libc and libm). + +Debugging note: + +- Look for the usage of ``inttoptr`` in the LLVM IR or + ``target_context.add_dynamic_add()`` in the lowering code in Python. + They indicate potential usage of runtime address. Not all uses are + problematic and some are necessary. Only the conversion of constant integers + into pointers will affect caching. +- Misuse of dynamic address or dynamic symbols will likely result in a + segfault. +- Linking order matters because unused symbols are dropped after linking. + Linking should start from the leaf nodes of the dependency graph. + + +Features Compatible with Caching +-------------------------------- + +The following features are explicitly verified to work with caching. + +- ufuncs and gufuncs for the ``cpu`` and ``parallel`` target +- parallel accelerator features (i.e. ``parallel=True``) + + +Caching Limitations +------------------- + +This is a list of known limitation of the cache: + +- Cache invalidation fails to recognize changes in symbols defined in a + different file. +- Global variables are treated as constants. The cache will remember the value + in the global variable used at compilation. On cache load, the cached + function will not rebind to the new value of the global variable. + + +.. _cache-sharing: + +Cache Sharing +------------- + +It is safe to share and reuse the contents in the cache directory on a +different machine. The cache remembers the CPU model and the available +CPU features during compilation. If the CPU model and the CPU features do +not match exactly, the cache contents will not be considered. +(Also see :envvar:`NUMBA_CPU_NAME`) + +If the cache directory is shared on a network filesystem, concurrent +read/write of the cache is safe only if file replacement operation is atomic +for the filesystem. Numba always writes to a unique temporary file first, it +then replaces the target cache file path with the temporary file. Numba is +tolerant against lost cache files and lost cache entries. + +.. _cache-clearing: + +Cache Clearing +-------------- + +The cache is invalidated when the corresponding source file is modified. +However, it is necessary sometimes to clear the cache directory manually. +For instance, changes in the compiler will not be recognized because the source +files are not modified. + +To clear the cache, the cache directory can be simply removed. + +Removing the cache directory when a Numba application is running may cause an +``OSError`` exception to be raised at the compilation site. + +Related Environment Variables +----------------------------- + +See :ref:`env-vars for caching `. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/compiler_pass_example.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/compiler_pass_example.py new file mode 100644 index 000000000..15a91bd0e --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/compiler_pass_example.py @@ -0,0 +1,78 @@ +def ex_compiler_pass(): + + # magictoken.ex_compiler_pass.begin + from numba import njit + from numba.core import ir + from numba.core.compiler import CompilerBase, DefaultPassBuilder + from numba.core.compiler_machinery import FunctionPass, register_pass + from numba.core.untyped_passes import IRProcessing + from numbers import Number + + # Register this pass with the compiler framework, declare that it will not + # mutate the control flow graph and that it is not an analysis_only pass (it + # potentially mutates the IR). + @register_pass(mutates_CFG=False, analysis_only=False) + class ConstsAddOne(FunctionPass): + _name = "consts_add_one" # the common name for the pass + + def __init__(self): + FunctionPass.__init__(self) + + # implement method to do the work, "state" is the internal compiler + # state from the CompilerBase instance. + def run_pass(self, state): + func_ir = state.func_ir # get the FunctionIR object + mutated = False # used to record whether this pass mutates the IR + # walk the blocks + for blk in func_ir.blocks.values(): + # find the assignment nodes in the block and walk them + for assgn in blk.find_insts(ir.Assign): + # if an assignment value is a ir.Consts + if isinstance(assgn.value, ir.Const): + const_val = assgn.value + # if the value of the ir.Const is a Number + if isinstance(const_val.value, Number): + # then add one! + const_val.value += 1 + mutated |= True + return mutated # return True if the IR was mutated, False if not. + # magictoken.ex_compiler_pass.end + + # magictoken.ex_compiler_defn.begin + class MyCompiler(CompilerBase): # custom compiler extends from CompilerBase + + def define_pipelines(self): + # define a new set of pipelines (just one in this case) and for ease + # base it on an existing pipeline from the DefaultPassBuilder, + # namely the "nopython" pipeline + pm = DefaultPassBuilder.define_nopython_pipeline(self.state) + # Add the new pass to run after IRProcessing + pm.add_pass_after(ConstsAddOne, IRProcessing) + # finalize + pm.finalize() + # return as an iterable, any number of pipelines may be defined! + return [pm] + # magictoken.ex_compiler_defn.end + + # magictoken.ex_compiler_call.begin + @njit(pipeline_class=MyCompiler) # JIT compile using the custom compiler + def foo(x): + a = 10 + b = 20.2 + c = x + a + b + return c + + print(foo(100)) # 100 + 10 + 20.2 (+ 1 + 1), extra + 1 + 1 from the rewrite! + # magictoken.ex_compiler_call.end + + # magictoken.ex_compiler_timings.begin + compile_result = foo.overloads[foo.signatures[0]] + nopython_times = compile_result.metadata['pipeline_times']['nopython'] + for k in nopython_times.keys(): + if ConstsAddOne._name in k: + print(nopython_times[k]) + # magictoken.ex_compiler_timings.end + + assert foo(100) == 132.2 + +ex_compiler_pass() diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/contributing.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/contributing.rst new file mode 100644 index 000000000..9ec5f9d9f --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/contributing.rst @@ -0,0 +1,491 @@ + +Contributing to Numba +===================== + +We welcome people who want to make contributions to Numba, big or small! +Even simple documentation improvements are encouraged. If you have +questions, don't hesitate to ask them (see below). + + +Communication +------------- + +Real-time Chat +'''''''''''''' + +Numba uses Gitter for public real-time chat. To help improve the +signal-to-noise ratio, we have two channels: + +* `numba/numba `_: General Numba discussion, + questions, and debugging help. +* `numba/numba-dev `_: Discussion of PRs, + planning, release coordination, etc. + +Both channels are public, but we may ask that discussions on numba-dev move to +the numba channel. This is simply to ensure that numba-dev is easy for core +developers to keep up with. + +Note that the Github issue tracker is the best place to report bugs. Bug +reports in chat are difficult to track and likely to be lost. + +Forum +..... + +Numba uses Discourse as a forum for longer running threads such as design +discussions and roadmap planning. There are various categories available and it +can be reached at: `numba.discourse.group `_. + +Weekly Meetings +''''''''''''''' + +The core Numba developers have a weekly video conference to discuss roadmap, +feature planning, and outstanding issues. These meetings are entirely public, +details are posted on +`numba.discourse.group Announcements `_ +and everyone is welcome to join the discussion. Minutes will be taken and will +be posted to the +`Numba wiki `_. + +.. _report-numba-bugs: + +Bug tracker +'''''''''''' + +We use the `Github issue tracker `_ +to track both bug reports and feature requests. If you report an issue, +please include specifics: + +* what you are trying to do; +* which operating system you have and which version of Numba you are running; +* how Numba is misbehaving, e.g. the full error traceback, or the unexpected + results you are getting; +* as far as possible, a code snippet that allows full reproduction of your + problem. + +Getting set up +-------------- + +If you want to contribute, we recommend you fork our `Github repository +`_, then create a branch representing +your work. When your work is ready, you should submit it as a pull +request from the Github interface. + +If you want, you can submit a pull request even when you haven't finished +working. This can be useful to gather feedback, or to stress your changes +against the :ref:`continuous integration ` +platform. In this case, please prepend ``[WIP]`` to your pull request's title. + +.. _buildenv: + +Build environment +''''''''''''''''' + +Numba has a number of dependencies (mostly `NumPy `_ and +`llvmlite `_) with non-trivial build +instructions. Unless you want to build those dependencies yourself, we +recommend you use `conda `_ to create a +dedicated development environment and install precompiled versions of those +dependencies there. Read more about the Numba dependencies here: +`numba-source-install-check`. + +When working with a source checkout of Numba you will also need a development +build of llvmlite. These are available from the ``numba/label/dev`` channel on +`anaconda.org `_. + + +Then, to create an environment with a few of the most common dependencies:: + + $ conda create -n numbaenv python=3.10 numba/label/dev::llvmlite numpy scipy jinja2 cffi + +.. note:: + This installs an environment based on Python 3.10, but you can of course + choose another version supported by Numba. To test additional features, + you may also need to install ``tbb`` and/or ``llvm-openmp``. Check the + dependency list above for details. + +To activate the environment for the current shell session:: + + $ conda activate numbaenv + +.. note:: + These instructions are for a standard Linux shell. You may need to + adapt them for other platforms. + +Once the environment is activated, you have a dedicated Python with the +required dependencies:: + + $ python + Python 3.10.3 (main, Mar 28 2022, 04:26:28) [Clang 12.0.0 ] on darwin + Type "help", "copyright", "credits" or "license" for more information. + + >>> import llvmlite + >>> llvmlite.__version__ + 0.39.0dev0+61.gf27ac6f + + +Building Numba +'''''''''''''' + +For a convenient development workflow, we recommend you build Numba inside +its source checkout:: + + $ git clone git@github.com:numba/numba.git + $ cd numba + $ python setup.py build_ext --inplace + +This assumes you have a working C compiler and runtime on your development +system. You will have to run this command again whenever you modify +C files inside the Numba source tree. + +The ``build_ext`` command in Numba's setup also accepts the following +arguments: + +- ``--noopt``: This disables optimization when compiling Numba's CPython + extensions, which makes debugging them much easier. Recommended in + conjunction with the standard ``build_ext`` option ``--debug``. +- ``--werror``: Compiles Numba's CPython extensions with the ``-Werror`` flag. +- ``--wall``: Compiles Numba's CPython extensions with the ``-Wall`` flag. + +Note that Numba's CI and the conda recipe for Linux build with the ``--werror`` +and ``--wall`` flags, so any contributions that change the CPython extensions +should be tested with these flags too. + +Running tests +''''''''''''' + +Numba is validated using a test suite comprised of various kind of tests +(unit tests, functional tests). The test suite is written using the +standard :py:mod:`unittest` framework. + +The tests can be executed via ``python -m numba.runtests``. If you are +running Numba from a source checkout, you can type ``./runtests.py`` +as a shortcut. Various options are supported to influence test running +and reporting. Pass ``-h`` or ``--help`` to get a glimpse at those options. +Examples: + +* to list all available tests:: + + $ python -m numba.runtests -l + +* to list tests from a specific (sub-)suite:: + + $ python -m numba.runtests -l numba.tests.test_usecases + +* to run those tests:: + + $ python -m numba.runtests numba.tests.test_usecases + +* to run all tests in parallel, using multiple sub-processes:: + + $ python -m numba.runtests -m + +* For a detailed list of all options:: + + $ python -m numba.runtests -h + +The numba test suite can take a long time to complete. When you want to avoid +the long wait, it is useful to focus on the failing tests first with the +following test runner options: + +* The ``--failed-first`` option is added to capture the list of failed tests + and to re-execute them first:: + + $ python -m numba.runtests --failed-first -m -v -b + +* The ``--last-failed`` option is used with ``--failed-first`` to execute + the previously failed tests only:: + + $ python -m numba.runtests --last-failed -m -v -b + +When debugging, it is useful to turn on logging. Numba logs using the +standard ``logging`` module. One can use the standard ways (i.e. +``logging.basicConfig``) to configure the logging behavior. To enable logging +in the test runner, there is a ``--log`` flag for convenience:: + + $ python -m numba.runtests --log + +To enable :ref:`runtime type-checking `, set the environment +variable ``NUMBA_USE_TYPEGUARD=1`` and use `runtests.py` from the source root +instead. For example:: + + $ NUMBA_USE_TYPEGUARD=1 python runtests.py + + +Running coverage +'''''''''''''''' + +Coverage reports can be produced using `coverage.py +`_. To record coverage +info for the test suite, run:: + + coverage run -m numba.runtests + +Next, combine coverage files (potentially for multiple runs) with:: + + coverage combine + +The combined output can be transformed into various report formats - see the +`coverage CLI usage reference +`_. +For example, to produce an HTML report, run:: + + coverage html + +Following this command, the report can be viewed by opening ``htmlcov/index.html``. + + +Development rules +----------------- + +Code reviews +'''''''''''' + +Any non-trivial change should go through a code review by one or several of +the core developers. The recommended process is to submit a pull request +on github. + +A code review should try to assess the following criteria: + +* general design and correctness +* code structure and maintainability +* coding conventions +* docstrings, comments +* test coverage + +Coding conventions +'''''''''''''''''' + +All Python code should follow :pep:`8`. Our C code doesn't have a +well-defined coding style (would it be nice to follow :pep:`7`?). +Code and documentation should generally fit within 80 columns, for +maximum readability with all existing tools (such as code review UIs). + +Numba uses `Flake8 `_ to ensure a consistent +Python code format throughout the project. ``flake8`` can be installed +with ``pip`` or ``conda`` and then run from the root of the Numba repository:: + + flake8 numba + +Optionally, you may wish to setup `pre-commit hooks `_ +to automatically run ``flake8`` when you make a git commit. This can be +done by installing ``pre-commit``:: + + pip install pre-commit + +and then running:: + + pre-commit install + +from the root of the Numba repository. Now ``flake8`` will be run each time +you commit changes. You can skip this check with ``git commit --no-verify``. + +Numba has started the process of using `type hints `_ in its code base. This +will be a gradual process of extending the number of files that use type hints, as well as going from voluntary to +mandatory type hints for new features. `Mypy `_ is used for automated static checking. + +At the moment, only certain files are checked by mypy. The list can be found in ``mypy.ini``. When making changes to +those files, it is necessary to add the required type hints such that mypy tests will pass. Only in exceptional +circumstances should ``type: ignore`` comments be used. + +If you are contributing a new feature, we encourage you to use type hints, even if the file is not currently in the +checklist. If you want to contribute type hints to enable a new file to be in the checklist, please add the file to the +``files`` variable in ``mypy.ini``, and decide what level of compliance you are targeting. Level 3 is basic static +checks, while levels 2 and 1 represent stricter checking. The levels are described in details in ``mypy.ini``. + +There is potential for confusion between the Numba module ``typing`` and Python built-in module ``typing`` used for type +hints, as well as between Numba types---such as ``Dict`` or ``Literal``---and ``typing`` types of the same name. +To mitigate the risk of confusion we use a naming convention by which objects of the built-in ``typing`` module are +imported with an ``pt`` prefix. For example, ``typing.Dict`` is imported as ``from typing import Dict as ptDict``. + +Stability +''''''''' + +The repository's ``main`` branch is expected to be stable at all times. +This translates into the fact that the test suite passes without errors +on all supported platforms (see below). This also means that a pull request +also needs to pass the test suite before it is merged in. + +.. _platform_support: + +Platform support +'''''''''''''''' + +Every commit to the main branch is automatically tested on all of the +platforms Numba supports. This includes ARMv8, POWER8, and NVIDIA GPUs. +The build system however is internal to Anaconda, so we also use +`Azure `_ to provide public continuous +integration information for as many combinations as can be supported by the +service. Azure CI automatically tests all pull requests on Windows, OS X and +Linux, as well as a sampling of different Python and NumPy versions. If you see +problems on platforms you are unfamiliar with, feel free to ask for help in your +pull request. The Numba core developers can help diagnose cross-platform +compatibility issues. Also see the :ref:`continuous integration +` section on how public CI is implemented. + +.. _continuous_integration_testing: + +Continuous integration testing +'''''''''''''''''''''''''''''' + +The Numba test suite causes CI systems a lot of grief: + +#. It's huge, 9000+ tests. +#. In part because of 1. and that compilers are pretty involved, the test suite + takes a long time to run. +#. There's sections of the test suite that are deliberately designed to stress + systems almost to the point of failure (tests which concurrently compile and + execute with threads and fork processes etc). +#. The combination of things that Numba has to test well exceeds the capacity of + any public CI system, (Python versions x NumPy versions x Operating systems + x Architectures x feature libraries (e.g. SVML) x threading backends + (e.g. OpenMP, TBB)) and then there's CUDA too and all its version + variants. + +As a result of the above, public CI is implemented as follows: + +#. The combination of OS x Python x NumPy x Various Features in the testing + matrix is designed to give a good indicative result for whether "this pull + request is probably ok". +#. When public CI runs it: + + #. Looks for files that contain tests that have been altered by the proposed + change and runs these on the whole testing matrix. + #. Runs a subset of the test suite on each part of the testing matrix. i.e. + slice the test suite up by the number of combinations in the testing + matrix and each combination runs one chunk. This is done for speed, + because public CI cannot cope with the load else. + +If a Pull Request (PR) changes CUDA code or will affect the CUDA target, it +needs to be run on `gpuCI `_. +This can be triggered by one of the Numba maintainers commenting ``run gpuCI +tests`` on the PR discussion. This runs the CUDA testsuite with various CUDA +toolkit versions on Linux, to provide some initial confidence in the +correctness of the changes with respect to CUDA. Following approval, the PR +will also be run on Numba's build farm to test other configurations with CUDA +(including Windows, which is not tested by gpuCI). + +If the PR is not CUDA-related but makes changes to something that the core +developers consider risky, then it will also be run on the Numba farm just to +make sure. The Numba project's private build and test farm will actually +exercise all the applicable tests on all the combinations noted above on real +hardware! + + +.. _type_anno_check: + +Type annotation and runtime type checking +''''''''''''''''''''''''''''''''''''''''' + +Numba is slowly gaining type annotations. To facilitate the review of pull +requests that are incrementally adding type annotations, the test suite uses +`typeguard`_ to perform runtime type checking. This helps verify the validity +of type annotations. + +To enable runtime type checking in the test suite, users can use +`runtests.py`_ in the source root as the test runner and set environment +variable ``NUMBA_USE_TYPEGUARD=1``. For example:: + + $ NUMBA_USE_TYPEGUARD=1 python runtests.py numba.tests + +Things that help with pull requests +''''''''''''''''''''''''''''''''''' + +Even with the mitigating design above public CI can get overloaded which causes +a backlog of builds. It's therefore really helpful when opening pull requests if +you can limit the frequency of pushing changes. Ideally, please squash commits +to reduce the number of patches and/or push as infrequently as possible. Also, +once a pull request review has started, please don't rebase/force push/squash +or do anything that rewrites history of the reviewed code as GitHub cannot track +this and it makes it very hard for reviewers to see what has changed. + +The core developers thank everyone for their cooperation with the above! + +Why is my pull request/issue seemingly being ignored? +''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Numba is an open source project and like many similar projects it has limited +resources. As a result, it is unfortunately necessary for the core developers to +associate a priority with issues/pull requests (PR). A great way to move your +issue/PR up the priority queue is to help out somewhere else in the project so +as to free up core developer time. Examples of ways to help: + +* Perform an initial review on a PR. This often doesn't require compiler + engineering knowledge and just involves checking that the proposed patch is of + good quality, fixes the problem/implements the feature, is well tested and + documented. +* Debug an issue, there are numerous issues which `"need triage" `_ + which essentially involves debugging the reported problem. Even if you cannot + get right to the bottom of a problem, leaving notes about what was discovered + for someone else is also helpful. +* Answer questions/provide help for users on `discourse `_ + and/or `gitter.im `_. + +The core developers thank everyone for their understanding with the above! + +Documentation +------------- + +The Numba documentation is split over two repositories: + +* This documentation is in the ``docs`` directory inside the + `Numba repository `_. + +* The `Numba homepage `_ has its sources in a + separate repository at https://github.com/numba/numba-webpage + + +Main documentation +'''''''''''''''''' + +This documentation is under the ``docs`` directory of the `Numba repository`_. +It is built with `Sphinx `_ and +`numpydoc `_, which are available using +conda or pip; i.e. ``conda install sphinx numpydoc``. + +To build the documentation, you need the bootstrap theme:: + + $ pip install sphinx_bootstrap_theme + +You can edit the source files under ``docs/source/``, after which you can +build and check the documentation:: + + $ make html + $ open _build/html/index.html + +Core developers can upload this documentation to the Numba website +at https://numba.pydata.org by using the ``gh-pages.py`` script under ``docs``:: + + $ python gh-pages.py version # version can be 'dev' or '0.16' etc + +then verify the repository under the ``gh-pages`` directory and use +``git push``. + +Web site homepage +''''''''''''''''' + +The Numba homepage on https://numba.pydata.org can be fetched from here: +https://github.com/numba/numba-webpage + +After pushing documentation to a new version, core developers will want to +update the website. Some notable files: + +* ``index.rst`` # Update main page +* ``_templates/sidebar_versions.html`` # Update sidebar links +* ``doc.rst`` # Update after adding a new version for numba docs +* ``download.rst`` # Updata after uploading new numba version to pypi + +After updating run:: + + $ make html + +and check out ``_build/html/index.html``. To push updates to the Web site:: + + $ python _scripts/gh-pages.py + +then verify the repository under the ``gh-pages`` directory. Make sure the +``CNAME`` file is present and contains a single line for ``numba.pydata.org``. +Finally, use ``git push`` to update the website. + + +.. _typeguard: https://typeguard.readthedocs.io/en/latest/ +.. _runtests.py: https://github.com/numba/numba/blob/main/runtests.py diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/custom_pipeline.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/custom_pipeline.rst new file mode 100644 index 000000000..e670acf29 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/custom_pipeline.rst @@ -0,0 +1,173 @@ +.. _arch-pipeline: + +======================== +Customizing the Compiler +======================== + +.. warning:: The custom pipeline feature is for expert use only. Modifying + the compiler behavior can invalidate internal assumptions in the + numba source code. + + +For library developers looking for a way to extend or modify the compiler +behavior, you can do so by defining a custom compiler by inheriting from +``numba.compiler.CompilerBase``. The default Numba compiler is defined +as ``numba.compiler.Compiler``, implementing the ``.define_pipelines()`` +method, which adds the *nopython-mode*, *object-mode* and *interpreted-mode* +pipelines. For convenience these three pipelines are defined in +``numba.compiler.DefaultPassBuilder`` by the methods: + +* ``.define_nopython_pipeline()`` +* ``.define_objectmode_pipeline()`` +* ``.define_interpreted_pipeline()`` + +respectively. + +To use a custom subclass of ``CompilerBase``, supply it as the +``pipeline_class`` keyword argument to the ``@jit`` and ``@generated_jit`` +decorators. By doing so, the effect of the custom pipeline is limited to the +function being decorated. + +Implementing a compiler pass +---------------------------- + +Numba makes it possible to implement a new compiler pass and does so through the +use of an API similar to that of LLVM. The following demonstrates the basic +process involved. + + +Compiler pass classes +##################### + +All passes must inherit from ``numba.compiler_machinery.CompilerPass``, commonly +used subclasses are: + +* ``numba.compiler_machinery.FunctionPass`` for describing a pass that operates + on a function-at-once level and may mutate the IR state. +* ``numba.compiler_machinery.AnalysisPass`` for describing a pass that performs + analysis only. +* ``numba.compiler_machinery.LoweringPass`` for describing a pass that performs + lowering only. + +In this example a new compiler pass will be implemented that will rewrite all +``ir.Const(x)`` nodes, where ``x`` is a subclass of ``numbers.Number``, such +that the value of x is incremented by one. There is no use for this pass other +than to serve as a pedagogical vehicle! + +The ``numba.compiler_machinery.FunctionPass`` is appropriate for the suggested +pass behavior and so is the base class of the new pass. Further, a ``run_pass`` +method is defined to do the work (this method is abstract, all compiler passes +must implement it). + +First the new class: + +.. literalinclude:: compiler_pass_example.py + :language: python + :dedent: 4 + :start-after: magictoken.ex_compiler_pass.begin + :end-before: magictoken.ex_compiler_pass.end + + +Note also that the class must be registered with Numba's compiler machinery +using ``@register_pass``. This in part is to allow the declaration of whether +the pass mutates the control flow graph and whether it is an analysis only pass. + +Next, define a new compiler based on the existing +``numba.compiler.CompilerBase``. The compiler pipeline is defined through the +use of an existing pipeline and the new pass declared above is added to be run +after the ``IRProcessing`` pass. + + +.. literalinclude:: compiler_pass_example.py + :language: python + :dedent: 4 + :start-after: magictoken.ex_compiler_defn.begin + :end-before: magictoken.ex_compiler_defn.end + +Finally update the ``@njit`` decorator at the call site to make use of the newly +defined compilation pipeline. + +.. literalinclude:: compiler_pass_example.py + :language: python + :dedent: 4 + :start-after: magictoken.ex_compiler_call.begin + :end-before: magictoken.ex_compiler_call.end + +Debugging compiler passes +------------------------- + +Observing IR Changes +#################### + +It is often useful to be able to see the changes a pass makes to the IR. Numba +conveniently permits this through the use of the environment variable +:envvar:`NUMBA_DEBUG_PRINT_AFTER`. In the case of the above pass, running the +example code with ``NUMBA_DEBUG_PRINT_AFTER="ir_processing,consts_add_one"`` +gives: + + +.. code-block:: none + :emphasize-lines: 4, 7, 24, 27 + + ----------------------------nopython: ir_processing----------------------------- + label 0: + x = arg(0, name=x) ['x'] + $const0.1 = const(int, 10) ['$const0.1'] + a = $const0.1 ['$const0.1', 'a'] + del $const0.1 [] + $const0.2 = const(float, 20.2) ['$const0.2'] + b = $const0.2 ['$const0.2', 'b'] + del $const0.2 [] + $0.5 = x + a ['$0.5', 'a', 'x'] + del x [] + del a [] + $0.7 = $0.5 + b ['$0.5', '$0.7', 'b'] + del b [] + del $0.5 [] + c = $0.7 ['$0.7', 'c'] + del $0.7 [] + $0.9 = cast(value=c) ['$0.9', 'c'] + del c [] + return $0.9 ['$0.9'] + ----------------------------nopython: consts_add_one---------------------------- + label 0: + x = arg(0, name=x) ['x'] + $const0.1 = const(int, 11) ['$const0.1'] + a = $const0.1 ['$const0.1', 'a'] + del $const0.1 [] + $const0.2 = const(float, 21.2) ['$const0.2'] + b = $const0.2 ['$const0.2', 'b'] + del $const0.2 [] + $0.5 = x + a ['$0.5', 'a', 'x'] + del x [] + del a [] + $0.7 = $0.5 + b ['$0.5', '$0.7', 'b'] + del b [] + del $0.5 [] + c = $0.7 ['$0.7', 'c'] + del $0.7 [] + $0.9 = cast(value=c) ['$0.9', 'c'] + del c [] + return $0.9 ['$0.9'] + +Note the change in the values in the ``const`` nodes. + +Pass execution times +#################### + +Numba has built-in support for timing all compiler passes, the execution times +are stored in the metadata associated with a compilation result. This +demonstrates one way of accessing this information based on the previously +defined function, ``foo``: + +.. literalinclude:: compiler_pass_example.py + :language: python + :dedent: 4 + :start-after: magictoken.ex_compiler_timings.begin + :end-before: magictoken.ex_compiler_timings.end + +the output of which is, for example:: + + pass_timings(init=1.914000677061267e-06, run=4.308700044930447e-05, finalize=1.7400006981915794e-06) + +this displaying the pass initialization, run and finalization times in seconds. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/debugging.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/debugging.rst new file mode 100644 index 000000000..544e9a6eb --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/debugging.rst @@ -0,0 +1,138 @@ +.. _developer-debugging: + +================== +Notes on Debugging +================== + +This section describes techniques that can be useful in debugging the +compilation and execution of generated code. + +.. seealso:: + :ref:`debugging-jit-compiled-code` + + +Memcheck +-------- + +Memcheck_ is a memory error detector implemented using Valgrind_. It is useful +for detecting memory errors in compiled code, particularly out-of-bounds +accesses and use-after-free errors. Buggy or miscompiled native code can +generate these kinds of errors. The `Memcheck documentation +`_ explains its usage; here, we +discuss only the specifics of using it with Numba. + +.. _Memcheck: https://valgrind.org/docs/manual/mc-manual.html +.. _Valgrind: https://valgrind.org/ + +The Python interpreter and some of the libraries used by Numba can generate +false positives with Memcheck - see `this section of the manual +`_ for more +information on why false positives occur. The false positives can make it +difficult to determine when an actual error has occurred, so it is helpful to +suppress known false positives. This can be done by supplying a suppressions +file, which instructs Memcheck to ignore errors that match the suppressions +defined in it. + +The CPython source distribution includes a suppressions file, in the file +``Misc/valgrind-python.supp``. Using this file prevents a lot of spurious errors +generated by Python's memory allocation implementation. Additionally, the Numba +repository includes a suppressions file in ``contrib/valgrind-numba.supp``. + +.. note:: It is important to use the suppressions files from the versions of the + Python interpreter and Numba that you are using - these files evolve over + time, so non-current versions can fail to suppress some errors, or + erroneously suppress actual errors. + +To run the Python interpreter under Memcheck with both suppressions +files, it is invoked with the following command:: + + valgrind --tool=memcheck \ + --suppressions=${CPYTHON_SRC_DIR}/Misc/valgrind-python.supp \ + --suppressions=${NUMBA_SRC_DIR}/contrib/valgrind-numba.supp \ + python ${PYTHON_ARGS} + +where ``${CPYTHON_SRC_DIR}`` is set to the location of the CPython source +distribution, ``${NUMBA_SRC_DIR}`` is the location of the Numba source dir, and +``${PYTHON_ARGS}`` are the arguments to the Python interpreter. + +If there are errors, then messages describing them will be printed to standard +error. An example of an error is:: + + ==77113== at 0x24169A: PyLong_FromLong (longobject.c:251) + ==77113== by 0x241881: striter_next (bytesobject.c:3084) + ==77113== by 0x2D3C95: _PyEval_EvalFrameDefault (ceval.c:2809) + ==77113== by 0x21B499: _PyEval_EvalCodeWithName (ceval.c:3930) + ==77113== by 0x26B436: _PyFunction_FastCallKeywords (call.c:433) + ==77113== by 0x2D3605: call_function (ceval.c:4616) + ==77113== by 0x2D3605: _PyEval_EvalFrameDefault (ceval.c:3124) + ==77113== by 0x21B977: _PyEval_EvalCodeWithName (ceval.c:3930) + ==77113== by 0x21C2A4: _PyFunction_FastCallDict (call.c:376) + ==77113== by 0x2D5129: do_call_core (ceval.c:4645) + ==77113== by 0x2D5129: _PyEval_EvalFrameDefault (ceval.c:3191) + ==77113== by 0x21B499: _PyEval_EvalCodeWithName (ceval.c:3930) + ==77113== by 0x26B436: _PyFunction_FastCallKeywords (call.c:433) + ==77113== by 0x2D46DA: call_function (ceval.c:4616) + ==77113== by 0x2D46DA: _PyEval_EvalFrameDefault (ceval.c:3139) + ==77113== + ==77113== Use of uninitialised value of size 8 + +The traceback provided only outlines the C call stack, which can make it +difficult to determine what the Python interpreter was doing at the time of the +error. One can learn more about the state of the stack by looking at the +backtrace in the `GNU Debugger (GDB) `_. +Launch ``valgrind`` with an additional argument, ``--vgdb-error=0`` and attach +to the process using GDB as instructed by the output. Once an error is +encountered, GDB will stop at the error and the stack can be inspected. + +GDB does provide support for backtracing through the Python stack, but this +requires symbols which may not be easily available in your Python distribution. +In this case, it is still possible to determine some information about what was +happening in Python, but this depends on examining the backtrace closely. For +example, in a backtrace corresponding to the above error, we see items such as: + +.. code-block:: + + #18 0x00000000002722da in slot_tp_call ( + self=<_wrap_impl(_callable=<_wrap_missing_loc(func=) at remote 0x1d200bd0>, _imp=, + _context=, , , , , , , , , , , , , , , , , , , , , ], attributes=[, , , , , + , identified_types={}) at remote + 0xbb5add0>, name='cuconstRecAlign$7', + data_layout='e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64', + scope=, triple='nvptx64-nvidia-cuda', + globals={'_ZN08NumbaEnv5numba4cuda5tests6cudapy13test_constmem19cuconstRecAlign$247E5ArrayIdLi1E1C7mutable7ali...(truncated), + kwds=0x0) + +We can see some of the arguments, in particular the names of the compiled functions, e.g:: + + _ZN5numba4cuda5tests6cudapy13test_constmem19cuconstRecAlign$247E5ArrayIdLi1E1C7mutable7alignedE5ArrayIdLi1E1C7mutable7alignedE5ArrayIdLi1E1C7mutable7alignedE5ArrayIdLi1E1C7mutable7alignedE5ArrayIdLi1E1C7mutable7alignedE + +We can run this through ``c++filt`` to see a more human-readable representation:: + + numba::cuda::tests::cudapy::test_constmem::cuconstRecAlign$247( + Array, + Array, + Array, + Array, + Array) + +which is the fully qualified name of a jitted function and the types with which +it was called. + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/dispatching.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/dispatching.rst new file mode 100644 index 000000000..ca50e6d66 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/dispatching.rst @@ -0,0 +1,267 @@ + +======================= +Polymorphic dispatching +======================= + +Functions compiled using :func:`~numba.jit` or :func:`~numba.vectorize` +are open-ended: they can be called with many different input types and +have to select (possibly compile on-the-fly) the right low-level +specialization. We hereby explain how this mechanism is implemented. + + +Requirements +============ + +JIT-compiled functions can take several arguments and each of them is +taken into account when selecting a specialization. Thus it is a +form of multiple dispatch, more complex than single dispatch. + +Each argument weighs in the selection based on its :ref:`Numba type +`. Numba types are often more granular than Python types: +for example, Numba types Numpy arrays differently depending on their +dimensionality and their layout (C-contiguous, etc.). + +Once a Numba type is inferred for each argument, a specialization must +be chosen amongst the available ones; or, if not suitable specialization +is found, a new one must be compiled. This is not a trivial decision: +there can be multiple specializations compatible with a given concrete +signature (for example, say a two-argument function has compiled +specializations for ``(float64, float64)`` and ``(complex64, complex64)``, +and it is called with ``(float32, float32)``). + +Therefore, there are two crucial steps in the dispatch mechanism: + +1. infer the Numba types of the concrete arguments +2. select the best available specialization (or choose to compile a new one) + for the inferred Numba types + +Compile-time vs. run-time +------------------------- + +This document discusses dispatching when it is done at runtime, i.e. +when a JIT-compiled function is called from pure Python. In that context, +performance is important. To stay in the realm of normal function call +overhead in Python, the overhead of dispatching should stay under a +microsecond. Of course, *the faster the better*... + +When a JIT-compiled function is called from another JIT-compiled +function (in :term:`nopython mode`), the polymorphism is resolved at +compile-time, using a non-performance critical mechanism, bearing zero +runtime performance overhead. + +.. note:: + In practice, the performance-critical parts described here are coded in C. + + +Type resolution +=============== + +The first step is therefore to infer, at call-time, a Numba type for each +of the function's concrete arguments. Given the finer granularity of +Numba types compared to Python types, one cannot simply lookup an object's +class and key a dictionary with it to obtain the corresponding Numba type. + +Instead, there is a machinery to inspect the object and, based on its +Python type, query various properties to infer the appropriate Numba +type. This can be more or less complex: for example, a Python ``int`` +argument will always infer to a Numba ``intp`` (a pointer-sized integer), +but a Python ``tuple`` argument can infer to multiple Numba types (depending +on the tuple's size and the concrete type of each of its elements). + +The Numba type system is high-level and written in pure Python; there is +a pure Python machinery, based on a generic function, to do said inference +(in :mod:`numba.typing.typeof`). That machinery is used for compile-time +inference, e.g. on constants. Unfortunately, it is too slow for run-time +value-based dispatching. It is only used as a fallback for rarely used +(or difficult to infer) types, and exhibits multiple-microsecond overhead. + +Typecodes +--------- + +The Numba type system is really too high-level to be manipulated efficiently +from C code. Therefore, the C dispatching layer uses another representation +based on integer typecodes. Each Numba type gets a unique integer typecode +when constructed; also, an interning system ensure no two instances of same +type are created. The dispatching layer is therefore able to *eschew* +the overhead of the Numba type system by working with simple integer +typecodes, amenable to well-known optimizations (fast hash tables, etc.). + +The goal of the type resolution step becomes: infer a Numba *typecode* +for each of the function's concrete arguments. Ideally, it doesn't deal +with Numba types anymore... + +Hard-coded fast paths +--------------------- + +While eschewing the abstraction and object-orientation overhead of the type +system, the integer typecodes still have the same conceptual complexity. +Therefore, an important technique to speed up inference is to first go +through checks for the most important types, and hard-code a fast resolution +for each of them. + +Several types benefit from such an optimization, notably: + +* basic Python scalars (``bool``, ``int``, ``float``, ``complex``); +* basic Numpy scalars (the various kinds of integer, floating-point, + complex numbers); +* Numpy arrays of certain dimensionalities and basic element types. + +Each of those fast paths ideally uses a hard-coded result value or a direct +table lookup after a few simple checks. + +However, we can't apply that technique to all argument types; there would +be an explosion of ad-hoc internal caches, and it would become difficult to +maintain. Besides, the recursive application of hard-coded fast paths +would not necessarily combine into a low overhead (in the nested tuple +case, for example). + +Fingerprint-based typecode cache +-------------------------------- + +For non-so-trivial types (imagine a tuple, or a Numpy ``datetime64`` array, +for example), the hard-coded fast paths don't match. Another mechanism +then kicks in, more generic. + +The principle here is to examine each argument value, as the pure Python +machinery would do, and to describe its Numba type unambiguously. The +difference is that *we don't actually compute a Numba type*. Instead, we +compute a simple bytestring, a low-level possible denotation of that +Numba type: a *fingerprint*. The fingerprint format is designed to be +short and extremely simple to compute from C code (in practice, it has +a bytecode-like format). + +Once the fingerprint is computed, it is looked up in a cache mapping +fingerprints to typecodes. The cache is a hash table, and the lookup +is fast thanks to the fingerprints being generally very short (rarely +more than 20 bytes). + +If the cache lookup fails, the typecode must first be computed using the +slow pure Python machinery. Luckily, this would only happen once: on +subsequent calls, the cached typecode would be returned for the given +fingerprint. + +In rare cases, a fingerprint cannot be computed efficiently. This is +the case for some types which cannot be easily inspected from C: for +example ``cffi`` function pointers. Then, the slow Pure Python machinery +is invoked at each function call with such an argument. + +.. note:: + Two fingerprints may denote a single Numba type. This does not make + the mechanism incorrect; it only creates more cache entries. + + +Summary +------- + +Type resolution of a function argument involves the following mechanisms +in order: + +* Try a few hard-coded fast paths, for common simple types. +* If the above failed, compute a fingerprint for the argument and lookup + its typecode in a cache. +* If all the above failed, invoke the pure Python machinery which will + determine a Numba type for the argument (and look up its typecode). + + +Specialization selection +======================== + +At the previous step, an integer typecode has been determined for each +concrete argument to the JIT-compiled function. Now it remains to match +that concrete signature against each of the available specializations for +the function. There can be three outcomes: + +* There is a satisfying best match: the corresponding specialization + is then invoked (it will handle argument unboxing and other details). +* There is a tie between two or more "best matches": an exception is raised, + refusing to solve the ambiguity. +* There is no satisfying match: a new specialization is compiled tailored + for the concrete argument types that were inferred. + +The selection works by looping over all available specializations, and +computing the compatibility of each concrete argument type with the +corresponding type in the specialization's intended signature. Specifically, +we are interested in: + +1. Whether the concrete argument type is allowed to convert implicitly to + the specialization's argument type; +2. If so, at what semantic (user-visible) cost the conversion comes. + +Implicit conversion rules +------------------------- + +There are five possible kinds of implicit conversion from a source type +to a destination type (note this is an asymmetric relationship): + +1. *exact match*: the two types are identical; this is the ideal case, + since the specialization would behave exactly as intended; +2. *same-kind promotion*: the two types belong to the same "kind" (for + example ``int32`` and ``int64`` are two integer types), and the source + type can be converted losslessly to the destination type (e.g. from + ``int32`` to ``int64``, but not the reverse); +3. *safe conversion*: the two types belong to different kinds, but the + source type can be reasonably converted to the destination type + (e.g. from ``int32`` to ``float64``, but not the reverse); +4. *unsafe conversion*: a conversion is available from the source type + to the destination type, but it may lose precision, magnitude, or + another desirable quality. +5. *no conversion*: there is no correct or reasonably efficient way to + convert between the two types (for example between an ``int64`` and a + ``datetime64``, or a C-contiguous array and a Fortran-contiguous array). + +When a specialization is examined, the latter two cases eliminate it from +the final choice: i.e. when at least one argument has *no conversion* or +only an *unsafe conversion* to the signature's argument type. + +.. note:: + However, if the function is compiled with explicit signatures + in the :func:`~numba.jit` call (and therefore it is not allowed to compile + new specializations), *unsafe conversion* is allowed. + +Candidates and best match +------------------------- + +If a specialization is not eliminated by the rule above, it enters the +list of *candidates* for the final choice. Those candidates are ranked +by an ordered 4-uple of integers: ``(number of unsafe conversions, +number of safe conversions, number of same-kind promotions, number of +exact matches)`` (note the sum of the tuple's elements is equal to the +number of arguments). The best match is then the #1 result in sorted +ascending order, thereby preferring exact matches over promotions, +promotions over safe conversions, safe conversions over unsafe conversions. + +Implementation +-------------- + +The above-described mechanism works on integer typecodes, not on Numba +types. It uses an internal hash table storing the possible conversion +kind for each pair of compatible types. The internal hash table is in part +built at startup (for built-in trivial types such as ``int32``, ``int64`` +etc.), in part filled dynamically (for arbitrarily complex types such +as array types: for example to allow using a C-contiguous 2D array where +a function expects a non-contiguous 2D array). + +Summary +------- + +Selecting the right specialization involves the following steps: + +* Examine each available specialization and match it against the concrete + argument types. +* Eliminate any specialization where at least one argument doesn't offer + sufficient compatibility. +* If there are remaining candidates, choose the best one in terms of + preserving the types' semantics. + + +Miscellaneous +============= + +Some `benchmarks of dispatch performance +`_ +exist in the `Numba benchmarks `_ +repository. + +Some unit tests of specific aspects of the machinery are available +in :mod:`numba.tests.test_typeinfer` and :mod:`numba.tests.test_typeof`. +Higher-level dispatching tests are in :mod:`numba.tests.test_dispatcher`. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/environment.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/environment.rst new file mode 100644 index 000000000..39f4bd234 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/environment.rst @@ -0,0 +1,59 @@ + +================== +Environment Object +================== + +The Environment object (Env) is used to maintain references to python objects +that are needed to support compiled functions for both object-mode and +nopython-mode. + +In nopython-mode, the Env is used for: + +* Storing pyobjects for reconstruction from native values, + such as: + + * for printing native values of NumPy arrays; + * for returning or yielding native values back to the interpreter. + +In object-mode, the Env is used for: + +* storing constant values referenced in the code. +* storing a reference to the function's global dictionary to load global + values. + + +The Implementation +================== + +The Env is implemented in two parts. In ``_dynfunc.c``, the Env is defined +as ``EnvironmentObject`` as a Python C-extension type. In ``lowering.py``, +the `EnvironmentObject`` (exported as ``_dynfunc.Environment``) is extended +to support necessary operations needed at lowering. + + +Serialization +------------- + +The Env supports being pickled. Compilation cache files and ahead-of-time +compiled modules serialize all the used Envs for recreation at the runtime. + +Usage +----- + +At the start of the lowering for a function or a generator, an Env is created. +Throughout the compilation, the Env is mutated to attach additional +information. The compiled code references an Env via a global variable in +the emitted LLVM IR. The global variable is zero-initialized with "common" +linkage, which is the default linkage for C global values. The use of this +linkage allows multiple definitions of the global variable to be merged into +a single definition when the modules are linked together. The name of the +global variable is computed from the name of the function +(see ``FunctionDescriptor.env_name`` and ``.get_env_name()`` of the target +context). + +The Env is initialized when the compiled-function is loaded. The JIT engine +finds the address of the associated global variable for the Env and stores the +address of the Env into it. For cached functions, the same process applies. +For ahead-of-time compiled functions, the module initializer in the generated +library is responsible for initializing the global variables of all the Envs +in the module. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/event_api.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/event_api.rst new file mode 100644 index 000000000..31aac3313 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/event_api.rst @@ -0,0 +1,5 @@ +Event API +========= + +.. automodule:: numba.core.event + :members: \ No newline at end of file diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/generators.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/generators.rst new file mode 100644 index 000000000..b2936817b --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/generators.rst @@ -0,0 +1,307 @@ + +.. _arch-generators: + +=================== +Notes on generators +=================== + +Numba recently gained support for compiling generator functions. This +document explains some of the implementation choices. + + +Terminology +=========== + +For clarity, we distinguish between *generator functions* and +*generators*. A generator function is a function containing one or +several ``yield`` statements. A generator (sometimes also called "generator +iterator") is the return value of a generator function; it resumes +execution inside its frame each time :py:func:`next` is called. + +A *yield point* is the place where a ``yield`` statement is called. +A *resumption point* is the place just after a *yield point* where execution +is resumed when :py:func:`next` is called again. + + +Function analysis +================= + +Suppose we have the following simple generator function:: + + def gen(x, y): + yield x + y + yield x - y + +Here is its CPython bytecode, as printed out using :py:func:`dis.dis`:: + + 7 0 LOAD_FAST 0 (x) + 3 LOAD_FAST 1 (y) + 6 BINARY_ADD + 7 YIELD_VALUE + 8 POP_TOP + + 8 9 LOAD_FAST 0 (x) + 12 LOAD_FAST 1 (y) + 15 BINARY_SUBTRACT + 16 YIELD_VALUE + 17 POP_TOP + 18 LOAD_CONST 0 (None) + 21 RETURN_VALUE + +When compiling this function with :envvar:`NUMBA_DUMP_IR` set to 1, the +following information is printed out:: + + ----------------------------------IR DUMP: gen---------------------------------- + label 0: + x = arg(0, name=x) ['x'] + y = arg(1, name=y) ['y'] + $0.3 = x + y ['$0.3', 'x', 'y'] + $0.4 = yield $0.3 ['$0.3', '$0.4'] + del $0.4 [] + del $0.3 [] + $0.7 = x - y ['$0.7', 'x', 'y'] + del y [] + del x [] + $0.8 = yield $0.7 ['$0.7', '$0.8'] + del $0.8 [] + del $0.7 [] + $const0.9 = const(NoneType, None) ['$const0.9'] + $0.10 = cast(value=$const0.9) ['$0.10', '$const0.9'] + del $const0.9 [] + return $0.10 ['$0.10'] + ------------------------------GENERATOR INFO: gen------------------------------- + generator state variables: ['$0.3', '$0.7', 'x', 'y'] + yield point #1: live variables = ['x', 'y'], weak live variables = ['$0.3'] + yield point #2: live variables = [], weak live variables = ['$0.7'] + + +What does it mean? The first part is the Numba IR, as already seen in +:ref:`arch_generate_numba_ir`. We can see the two yield points (``yield $0.3`` +and ``yield $0.7``). + +The second part shows generator-specific information. To understand it +we have to understand what suspending and resuming a generator means. + +When suspending a generator, we are not merely returning a value to the +caller (the operand of the ``yield`` statement). We also have to save the +generator's *current state* in order to resume execution. In trivial use +cases, perhaps the CPU's register values or stack slots would be preserved +until the next call to next(). However, any non-trivial case will hopelessly +clobber those values, so we have to save them in a well-defined place. + +What are the values we need to save? Well, in the context of the Numba +Intermediate Representation, we must save all *live variables* at each +yield point. These live variables are computed thanks to the control +flow graph. + +Once live variables are saved and the generator is suspended, resuming +the generator simply involves the inverse operation: the live variables +are restored from the saved generator state. + +.. note:: + It is the same analysis which helps insert Numba ``del`` instructions + where appropriate. + +Let's go over the generator info again:: + + generator state variables: ['$0.3', '$0.7', 'x', 'y'] + yield point #1: live variables = ['x', 'y'], weak live variables = ['$0.3'] + yield point #2: live variables = [], weak live variables = ['$0.7'] + +Numba has computed the union of all live variables (denoted as "state +variables"). This will help define the layout of the :ref:`generator +structure `. Also, for each yield point, we have +computed two sets of variables: + +* the *live variables* are the variables which are used by code following + the resumption point (i.e. after the ``yield`` statement) + +* the *weak live variables* are variables which are del'ed immediately + after the resumption point; they have to be saved in :term:`object mode`, + to ensure proper reference cleanup + + +.. _generator-structure: + +The generator structure +======================= + +Layout +------ + +Function analysis helps us gather enough information to define the +layout of the generator structure, which will store the entire execution +state of a generator. Here is a sketch of the generator structure's layout, +in pseudo-code:: + + struct gen_struct_t { + int32_t resume_index; + struct gen_args_t { + arg_0_t arg0; + arg_1_t arg1; + ... + arg_N_t argN; + } + struct gen_state_t { + state_0_t state_var0; + state_1_t state_var1; + ... + state_N_t state_varN; + } + } + +Let's describe those fields in order. + +* The first member, the *resume index*, is an integer telling the generator + at which resumption point execution must resume. By convention, it can + have two special values: 0 means execution must start at the beginning of + the generator (i.e. the first time :py:func:`next` is called); -1 means + the generator is exhausted and resumption must immediately raise + StopIteration. Other values indicate the yield point's index starting from 1 + (corresponding to the indices shown in the generator info above). + +* The second member, the *arguments structure* is read-only after it is first + initialized. It stores the values of the arguments the generator function + was called with. In our example, these are the values of ``x`` and ``y``. + +* The third member, the *state structure*, stores the live variables as + computed above. + +Concretely, our example's generator structure (assuming the generator +function is called with floating-point numbers) is then:: + + struct gen_struct_t { + int32_t resume_index; + struct gen_args_t { + double arg0; + double arg1; + } + struct gen_state_t { + double $0.3; + double $0.7; + double x; + double y; + } + } + +Note that here, saving ``x`` and ``y`` is redundant: Numba isn't able to +recognize that the state variables ``x`` and ``y`` have the same value +as ``arg0`` and ``arg1``. + +Allocation +---------- + +How does Numba ensure the generator structure is preserved long enough? +There are two cases: + +* When a Numba-compiled generator function is called from a Numba-compiled + function, the structure is allocated on the stack by the callee. In this + case, generator instantiation is practically costless. + +* When a Numba-compiled generator function is called from regular Python + code, a CPython-compatible wrapper is instantiated that has the right + amount of allocated space to store the structure, and whose + :c:member:`~PyTypeObject.tp_iternext` slot is a wrapper around the + generator's native code. + + +Compiling to native code +======================== + +When compiling a generator function, three native functions are actually +generated by Numba: + +* An initialization function. This is the function corresponding + to the generator function itself: it receives the function arguments and + stores them inside the generator structure (which is passed by pointer). + It also initialized the *resume index* to 0, indicating that the generator + hasn't started yet. + +* A next() function. This is the function called to resume execution + inside the generator. Its single argument is a pointer to the generator + structure and it returns the next yielded value (or a special exit code + is used if the generator is exhausted, for quick checking when called + from Numba-compiled functions). + +* An optional finalizer. In object mode, this function ensures that all + live variables stored in the generator state are decref'ed, even if the + generator is destroyed without having been exhausted. + +The next() function +------------------- + +The next() function is the least straight-forward of the three native +functions. It starts with a trampoline which dispatches execution to the +right resume point depending on the *resume index* stored in the generator +structure. Here is how the function start may look like in our example: + +.. code-block:: llvm + + define i32 @"__main__.gen.next"( + double* nocapture %retptr, + { i8*, i32 }** nocapture readnone %excinfo, + i8* nocapture readnone %env, + { i32, { double, double }, { double, double, double, double } }* nocapture %arg.gen) + { + entry: + %gen.resume_index = getelementptr { i32, { double, double }, { double, double, double, double } }* %arg.gen, i64 0, i32 0 + %.47 = load i32* %gen.resume_index, align 4 + switch i32 %.47, label %stop_iteration [ + i32 0, label %B0 + i32 1, label %generator_resume1 + i32 2, label %generator_resume2 + ] + + ; rest of the function snipped + +(uninteresting stuff trimmed from the LLVM IR to make it more readable) + +We recognize the pointer to the generator structure in ``%arg.gen``. +The trampoline switch has three targets (one for each *resume index* 0, 1 +and 2), and a fallback target label named ``stop_iteration``. Label ``B0`` +represents the function's start, ``generator_resume1`` (resp. +``generator_resume2``) is the resumption point after the first +(resp. second) yield point. + +After generation by LLVM, the whole native assembly code for this function +may look like this (on x86-64): + +.. code-block:: asm + + .globl __main__.gen.next + .align 16, 0x90 + __main__.gen.next: + movl (%rcx), %eax + cmpl $2, %eax + je .LBB1_5 + cmpl $1, %eax + jne .LBB1_2 + movsd 40(%rcx), %xmm0 + subsd 48(%rcx), %xmm0 + movl $2, (%rcx) + movsd %xmm0, (%rdi) + xorl %eax, %eax + retq + .LBB1_5: + movl $-1, (%rcx) + jmp .LBB1_6 + .LBB1_2: + testl %eax, %eax + jne .LBB1_6 + movsd 8(%rcx), %xmm0 + movsd 16(%rcx), %xmm1 + movaps %xmm0, %xmm2 + addsd %xmm1, %xmm2 + movsd %xmm1, 48(%rcx) + movsd %xmm0, 40(%rcx) + movl $1, (%rcx) + movsd %xmm2, (%rdi) + xorl %eax, %eax + retq + .LBB1_6: + movl $-3, %eax + retq + +Note the function returns 0 to indicate a value is yielded, -3 to indicate +StopIteration. ``%rcx`` points to the start of the generator structure, +where the resume index is stored. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/hashing.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/hashing.rst new file mode 100644 index 000000000..b95532441 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/hashing.rst @@ -0,0 +1,54 @@ + +================ +Notes on Hashing +================ + +Numba supports the built-in :func:`hash` and does so by simply calling the +:func:`__hash__` member function on the supplied argument. This makes it +trivial to add hash support for new types as all that is required is the +application of the extension API :func:`overload_method` decorator to overload +a function for computing the hash value for the new type registered to the +type's :func:`__hash__` method. For example:: + + from numba.extending import overload_method + + @overload_method(myType, '__hash__') + def myType_hash_overload(obj): + # implementation details + + +The Implementation +================== + +The implementation of the Numba hashing functions strictly follows that of +Python 3. The only exception to this is that for hashing Unicode and bytes (for +content longer than ``sys.hash_info.cutoff``) the only supported algorithm is +``siphash24`` (default in CPython 3). As a result Numba will match Python 3 +hash values for all supported types under the default conditions described. + +Unicode hash cache differences +------------------------------ + +Both Numba and CPython Unicode string internal representations have a ``hash`` +member for the purposes of caching the string's hash value. This member is +always checked ahead of computing a hash value with the view of simply providing +a value from cache as it is considerably cheaper to do so. The Numba Unicode +string hash caching implementation behaves in a similar way to that of +CPython's. The only notable behavioral change (and its only impact is a minor +potential change in performance) is that Numba always computes and caches the +hash for Unicode strings created in ``nopython mode`` at the time they are boxed +for reuse in Python, this is too eager in some cases in comparison to CPython +which may delay hashing a new Unicode string depending on creation method. It +should also be noted that Numba copies in the ``hash`` member of the CPython +internal representation for Unicode strings when unboxing them to its own +representation so as to not recompute the hash of a string that already has a +hash value associated with it. + +The accommodation of ``PYTHONHASHSEED`` +--------------------------------------- + +The ``PYTHONHASHSEED`` environment variable can be used to seed the CPython +hashing algorithms for e.g. the purposes of reproducibility. The Numba hashing +implementation directly reads the CPython hashing algorithms' internal state and +as a result the influence of ``PYTHONHASHSEED`` is replicated in Numba's +hashing implementations. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/index.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/index.rst new file mode 100644 index 000000000..2a8cbe53e --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/index.rst @@ -0,0 +1,32 @@ + +.. _developer-manual: + +Developer Manual +================ + +.. toctree:: + :maxdepth: 2 + + contributing.rst + release.rst + repomap.rst + architecture.rst + dispatching.rst + generators.rst + numba-runtime.rst + rewrites.rst + live_variable_analysis.rst + listings.rst + stencil.rst + custom_pipeline.rst + inlining.rst + environment.rst + hashing.rst + caching.rst + threading_implementation.rst + literal.rst + llvm_timings.rst + debugging.rst + event_api.rst + target_extension.rst + mission.rst diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/inline_example.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/inline_example.py new file mode 100644 index 000000000..e57ba5c58 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/inline_example.py @@ -0,0 +1,82 @@ +from numba import njit +import numba +from numba.core import ir + + +@njit(inline='never') +def never_inline(): + return 100 + + +@njit(inline='always') +def always_inline(): + return 200 + + +def sentinel_cost_model(expr, caller_info, callee_info): + # this cost model will return True (i.e. do inlining) if either: + # a) the callee IR contains an `ir.Const(37)` + # b) the caller IR contains an `ir.Const(13)` logically prior to the call + # site + + # check the callee + for blk in callee_info.blocks.values(): + for stmt in blk.body: + if isinstance(stmt, ir.Assign): + if isinstance(stmt.value, ir.Const): + if stmt.value.value == 37: + return True + + # check the caller + before_expr = True + for blk in caller_info.blocks.values(): + for stmt in blk.body: + if isinstance(stmt, ir.Assign): + if isinstance(stmt.value, ir.Expr): + if stmt.value == expr: + before_expr = False + if isinstance(stmt.value, ir.Const): + if stmt.value.value == 13: + return True & before_expr + return False + + +@njit(inline=sentinel_cost_model) +def maybe_inline1(): + # Will not inline based on the callee IR with the declared cost model + # The following is ir.Const(300). + return 300 + + +@njit(inline=sentinel_cost_model) +def maybe_inline2(): + # Will inline based on the callee IR with the declared cost model + # The following is ir.Const(37). + return 37 + + +@njit +def foo(): + a = never_inline() # will never inline + b = always_inline() # will always inline + + # will not inline as the function does not contain a magic constant known to + # the cost model, and the IR up to the call site does not contain a magic + # constant either + d = maybe_inline1() + + # declare this magic constant to trigger inlining of maybe_inline1 in a + # subsequent call + magic_const = 13 + + # will inline due to above constant declaration + e = maybe_inline1() + + # will inline as the maybe_inline2 function contains a magic constant known + # to the cost model + c = maybe_inline2() + + return a + b + c + d + e + magic_const + + +foo() diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/inline_overload_example.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/inline_overload_example.py new file mode 100644 index 000000000..f28f44a55 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/inline_overload_example.py @@ -0,0 +1,61 @@ +import numba +from numba.extending import overload +from numba import njit, types + + +def bar(x): + """A function stub to overload""" + pass + + +@overload(bar, inline='always') +def ol_bar_tuple(x): + # An overload that will always inline, there is a type guard so that this + # only applies to UniTuples. + if isinstance(x, types.UniTuple): + def impl(x): + return x[0] + return impl + + +def cost_model(expr, caller, callee): + # Only inline if the type of the argument is an Integer + return isinstance(caller.typemap[expr.args[0].name], types.Integer) + + +@overload(bar, inline=cost_model) +def ol_bar_scalar(x): + # An overload that will inline based on a cost model, it only applies to + # scalar values in the numerical domain as per the type guard on Number + if isinstance(x, types.Number): + def impl(x): + return x + 1 + return impl + + +@njit +def foo(): + + # This will resolve via `ol_bar_tuple` as the argument is a types.UniTuple + # instance. It will always be inlined as specified in the decorator for this + # overload. + a = bar((1, 2, 3)) + + # This will resolve via `ol_bar_scalar` as the argument is a types.Number + # instance, hence the cost_model will be used to determine whether to + # inline. + # The function will be inlined as the value 100 is an IntegerLiteral which + # is an instance of a types.Integer as required by the cost_model function. + b = bar(100) + + # This will also resolve via `ol_bar_scalar` as the argument is a + # types.Number instance, again the cost_model will be used to determine + # whether to inline. + # The function will not be inlined as the complex value is not an instance + # of a types.Integer as required by the cost_model function. + c = bar(300j) + + return a + b + c + + +foo() diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/inlining.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/inlining.rst new file mode 100644 index 000000000..d48240234 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/inlining.rst @@ -0,0 +1,281 @@ + +================= +Notes on Inlining +================= + +There are occasions where it is useful to be able to inline a function at its +call site, at the Numba IR level of representation. The decorators such as +:func:`numba.jit`, :func:`numba.extending.overload` and +:func:`register_jitable` support the keyword argument ``inline``, to facilitate +this behaviour. + +When attempting to inline at this level, it is important to understand what +purpose this serves and what effect this will have. In contrast to the inlining +performed by LLVM, which is aimed at improving performance, the main reason to +inline at the Numba IR level is to allow type inference to cross function +boundaries. + +As an example, consider the following snippet: + +.. code:: python + + from numba import njit + + + @njit + def bar(a): + a.append(10) + + + @njit + def foo(): + z = [] + bar(z) + + + foo() + +This will fail to compile and run, because the type of ``z`` can not be inferred +as it will only be refined within ``bar``. If we now add ``inline=True`` to the +decorator for ``bar`` the snippet will compile and run. This is because inlining +the call to ``a.append(10)`` will mean that ``z`` will be refined to hold integers +and so type inference will succeed. + +So, to recap, inlining at the Numba IR level is unlikely to have a performance +benefit. Whereas inlining at the LLVM level stands a better chance. + +The ``inline`` keyword argument can be one of three values: + +* The string ``'never'``, this is the default and results in the function not + being inlined under any circumstances. +* The string ``'always'``, this results in the function being inlined at all + call sites. +* A python function that takes three arguments. The first argument is always the + ``ir.Expr`` node that is the ``call`` requesting the inline, this is present + to allow the function to make call contextually aware decisions. The second + and third arguments are: + + * In the case of an untyped inline, i.e. that which occurs when using the + :func:`numba.jit` family of decorators, both arguments are + ``numba.ir.FunctionIR`` instances. The second argument corresponding to the + IR of the caller, the third argument corresponding to the IR of the callee. + + * In the case of a typed inline, i.e. that which occurs when using + :func:`numba.extending.overload`, both arguments are instances of a + ``namedtuple`` with fields (corresponding to their standard use in the + compiler internals): + + * ``func_ir`` - the function's Numba IR. + * ``typemap`` - the function's type map. + * ``calltypes`` - the call types of any calls in the function. + * ``signature`` - the function's signature. + + The second argument holds the information from the caller, the third holds + the information from the callee. + + In all cases the function should return True to inline and return False to not + inline, this essentially permitting custom inlining rules (typical use might + be cost models). +* Recursive functions with ``inline='always'`` will result in a non-terminating + compilation. If you wish to avoid this, supply a function to limit the + recursion depth (see below). + +.. note:: No guarantee is made about the order in which functions are assessed + for inlining or about the order in which they are inlined. + + +Example using :func:`numba.jit` +=============================== + +An example of using all three options to ``inline`` in the :func:`numba.njit` +decorator: + +.. literalinclude:: inline_example.py + +which produces the following when executed (with a print of the IR after the +legalization pass, enabled via the environment variable +``NUMBA_DEBUG_PRINT_AFTER="ir_legalization"``): + +.. code-block:: none + :emphasize-lines: 2, 3, 9, 16, 17, 21, 22, 26, 35 + + label 0: + $0.1 = global(never_inline: CPUDispatcher()) ['$0.1'] + $0.2 = call $0.1(func=$0.1, args=[], kws=(), vararg=None) ['$0.1', '$0.2'] + del $0.1 [] + a = $0.2 ['$0.2', 'a'] + del $0.2 [] + $0.3 = global(always_inline: CPUDispatcher()) ['$0.3'] + del $0.3 [] + $const0.1.0 = const(int, 200) ['$const0.1.0'] + $0.2.1 = $const0.1.0 ['$0.2.1', '$const0.1.0'] + del $const0.1.0 [] + $0.4 = $0.2.1 ['$0.2.1', '$0.4'] + del $0.2.1 [] + b = $0.4 ['$0.4', 'b'] + del $0.4 [] + $0.5 = global(maybe_inline1: CPUDispatcher()) ['$0.5'] + $0.6 = call $0.5(func=$0.5, args=[], kws=(), vararg=None) ['$0.5', '$0.6'] + del $0.5 [] + d = $0.6 ['$0.6', 'd'] + del $0.6 [] + $const0.7 = const(int, 13) ['$const0.7'] + magic_const = $const0.7 ['$const0.7', 'magic_const'] + del $const0.7 [] + $0.8 = global(maybe_inline1: CPUDispatcher()) ['$0.8'] + del $0.8 [] + $const0.1.2 = const(int, 300) ['$const0.1.2'] + $0.2.3 = $const0.1.2 ['$0.2.3', '$const0.1.2'] + del $const0.1.2 [] + $0.9 = $0.2.3 ['$0.2.3', '$0.9'] + del $0.2.3 [] + e = $0.9 ['$0.9', 'e'] + del $0.9 [] + $0.10 = global(maybe_inline2: CPUDispatcher()) ['$0.10'] + del $0.10 [] + $const0.1.4 = const(int, 37) ['$const0.1.4'] + $0.2.5 = $const0.1.4 ['$0.2.5', '$const0.1.4'] + del $const0.1.4 [] + $0.11 = $0.2.5 ['$0.11', '$0.2.5'] + del $0.2.5 [] + c = $0.11 ['$0.11', 'c'] + del $0.11 [] + $0.14 = a + b ['$0.14', 'a', 'b'] + del b [] + del a [] + $0.16 = $0.14 + c ['$0.14', '$0.16', 'c'] + del c [] + del $0.14 [] + $0.18 = $0.16 + d ['$0.16', '$0.18', 'd'] + del d [] + del $0.16 [] + $0.20 = $0.18 + e ['$0.18', '$0.20', 'e'] + del e [] + del $0.18 [] + $0.22 = $0.20 + magic_const ['$0.20', '$0.22', 'magic_const'] + del magic_const [] + del $0.20 [] + $0.23 = cast(value=$0.22) ['$0.22', '$0.23'] + del $0.22 [] + return $0.23 ['$0.23'] + + +Things to note in the above: + +1. The call to the function ``never_inline`` remains as a call. +2. The ``always_inline`` function has been inlined, note its + ``const(int, 200)`` in the caller body. +3. There is a call to ``maybe_inline1`` before the ``const(int, 13)`` + declaration, the cost model prevented this from being inlined. +4. After the ``const(int, 13)`` the subsequent call to ``maybe_inline1`` has + been inlined as shown by the ``const(int, 300)`` in the caller body. +5. The function ``maybe_inline2`` has been inlined as demonstrated by + ``const(int, 37)`` in the caller body. +6. That dead code elimination has not been performed and as a result there are + superfluous statements present in the IR. + + +Example using :func:`numba.extending.overload` +============================================== + +An example of using inlining with the :func:`numba.extending.overload` +decorator. It is most interesting to note that if a function is supplied as the +argument to ``inline`` a lot more information is available via the supplied +function arguments for use in decision making. Also that different +``@overload`` s can have different inlining behaviours, with multiple ways to +achieve this: + +.. literalinclude:: inline_overload_example.py + +which produces the following when executed (with a print of the IR after the +legalization pass, enabled via the environment variable +``NUMBA_DEBUG_PRINT_AFTER="ir_legalization"``): + +.. code-block:: none + :emphasize-lines: 2, 3, 4, 5, 6, 15, 16, 17, 18, 19, 20, 21, 22, 28, 29, 30 + + label 0: + $const0.2 = const(tuple, (1, 2, 3)) ['$const0.2'] + x.0 = $const0.2 ['$const0.2', 'x.0'] + del $const0.2 [] + $const0.2.2 = const(int, 0) ['$const0.2.2'] + $0.3.3 = getitem(value=x.0, index=$const0.2.2) ['$0.3.3', '$const0.2.2', 'x.0'] + del x.0 [] + del $const0.2.2 [] + $0.4.4 = $0.3.3 ['$0.3.3', '$0.4.4'] + del $0.3.3 [] + $0.3 = $0.4.4 ['$0.3', '$0.4.4'] + del $0.4.4 [] + a = $0.3 ['$0.3', 'a'] + del $0.3 [] + $const0.5 = const(int, 100) ['$const0.5'] + x.5 = $const0.5 ['$const0.5', 'x.5'] + del $const0.5 [] + $const0.2.7 = const(int, 1) ['$const0.2.7'] + $0.3.8 = x.5 + $const0.2.7 ['$0.3.8', '$const0.2.7', 'x.5'] + del x.5 [] + del $const0.2.7 [] + $0.4.9 = $0.3.8 ['$0.3.8', '$0.4.9'] + del $0.3.8 [] + $0.6 = $0.4.9 ['$0.4.9', '$0.6'] + del $0.4.9 [] + b = $0.6 ['$0.6', 'b'] + del $0.6 [] + $0.7 = global(bar: ) ['$0.7'] + $const0.8 = const(complex, 300j) ['$const0.8'] + $0.9 = call $0.7($const0.8, func=$0.7, args=[Var($const0.8, inline_overload_example.py (56))], kws=(), vararg=None) ['$0.7', '$0.9', '$const0.8'] + del $const0.8 [] + del $0.7 [] + c = $0.9 ['$0.9', 'c'] + del $0.9 [] + $0.12 = a + b ['$0.12', 'a', 'b'] + del b [] + del a [] + $0.14 = $0.12 + c ['$0.12', '$0.14', 'c'] + del c [] + del $0.12 [] + $0.15 = cast(value=$0.14) ['$0.14', '$0.15'] + del $0.14 [] + return $0.15 ['$0.15'] + +Things to note in the above: + +1. The first highlighted section is the always inlined overload for the + ``UniTuple`` argument type. +2. The second highlighted section is the overload for the ``Number`` argument + type that has been inlined as the cost model function decided to do so as the + argument was an ``Integer`` type instance. +3. The third highlighted section is the overload for the ``Number`` argument + type that has not inlined as the cost model function decided to reject it as + the argument was an ``Complex`` type instance. +4. That dead code elimination has not been performed and as a result there are + superfluous statements present in the IR. + +Using a function to limit the inlining depth of a recursive function +==================================================================== + +When using recursive inlines, you can terminate the compilation by using +a cost model. + +.. code:: python + + from numba import njit + import numpy as np + + class CostModel(object): + def __init__(self, max_inlines): + self._count = 0 + self._max_inlines = max_inlines + + def __call__(self, expr, caller, callee): + ret = self._count < self._max_inlines + self._count += 1 + return ret + + @njit(inline=CostModel(3)) + def factorial(n): + if n <= 0: + return 1 + return n * factorial(n - 1) + + factorial(5) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/listings.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/listings.rst new file mode 100644 index 000000000..1c4b1873d --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/listings.rst @@ -0,0 +1,29 @@ +Listings +======== + +This shows listings from compiler internal registries (e.g. lowering +definitions). The information is provided as developer reference. +When possible, links to source code are provided via github links. + +New style listings +------------------ + +The following listings are generated from ``numba.help.inspector.write_listings()``. Users can run ``python -m numba.help.inspector --format=rst `` to recreate the the documentation. + +.. toctree:: + :maxdepth: 2 + + autogen_builtins_listing.rst + autogen_math_listing.rst + autogen_cmath_listing.rst + autogen_numpy_listing.rst + + +Old style listings +------------------ + +.. toctree:: + :maxdepth: 2 + + autogen_lower_listing.rst + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/literal.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/literal.rst new file mode 100644 index 000000000..dd6d8d187 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/literal.rst @@ -0,0 +1,82 @@ +.. _developer-literally: + +====================== +Notes on Literal Types +====================== + +.. note:: This document describes an advanced feature designed to overcome + some limitations of the compilation mechanism relating to types. + +Some features need to specialize based on the literal value during +compilation to produce type stable code necessary for successful compilation in +Numba. This can be achieved by propagating the literal value through the type +system. Numba recognizes inline literal values as :class:`numba.types.Literal`. +For example:: + + def foo(x): + a = 123 + return bar(x, a) + +Numba will infer the type of ``a`` as ``Literal[int](123)``. The definition of +``bar()`` can subsequently specialize its implementation knowing that the +second argument is an ``int`` with the value ``123``. + +``Literal`` Type +---------------- + +Classes and methods related to the ``Literal`` type. + +.. autoclass:: numba.types.Literal + +.. autofunction:: numba.types.literal + +.. autofunction:: numba.types.unliteral + +.. autofunction:: numba.types.maybe_literal + +Specifying for Literal Typing +----------------------------- + +To specify a value as a ``Literal`` type in code scheduled for JIT compilation, +use the following function: + +.. autofunction:: numba.literally + +Code Example +~~~~~~~~~~~~ + +.. literalinclude:: ../../../numba/tests/doc_examples/test_literally_usage.py + :language: python + :caption: from ``test_literally_usage`` of ``numba/tests/doc_examples/test_literally_usage.py`` + :start-after: magictoken.ex_literally_usage.begin + :end-before: magictoken.ex_literally_usage.end + :dedent: 4 + :linenos: + + +Internal Details +~~~~~~~~~~~~~~~~ + +Internally, the compiler raises a ``ForceLiteralArgs`` exception to signal +the dispatcher to wrap specified arguments using the ``Literal`` type. + +.. autoclass:: numba.errors.ForceLiteralArg + :members: __init__, combine, __or__ + + +Inside Extensions +----------------- + +``@overload`` extensions can use ``literally`` inside the implementation body +like in normal jit-code. + +Explicit handling of literal requirements is possible through use of the +following: + +.. autoclass:: numba.extending.SentryLiteralArgs + :members: + +.. autoclass:: numba.extending.BoundLiteralArgs + :members: + +.. autofunction:: numba.extending.sentry_literal_args diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/live_variable_analysis.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/live_variable_analysis.rst new file mode 100644 index 000000000..fdeedfa63 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/live_variable_analysis.rst @@ -0,0 +1,86 @@ +.. _live variable analysis: + +====================== +Live Variable Analysis +====================== + +(Related issue https://github.com/numba/numba/pull/1611) + +Numba uses reference-counting for garbage collection, a technique that +requires cooperation by the compiler. The Numba IR encodes the location +where a decref must be inserted. These locations are determined by live +variable analysis. The corresponding source code is the ``_insert_var_dels()`` +method in https://github.com/numba/numba/blob/main/numba/interpreter.py. + + +In Python semantic, once a variable is defined inside a function, it is alive +until the variable is explicitly deleted or the function scope is ended. +However, Numba analyzes the code to determine the minimum bound of the lifetime +of each variable by its definition and usages during compilation. +As soon as a variable is unreachable, a ``del`` instruction is inserted at the +closest basic-block (either at the start of the next block(s) or at the +end of the current block). This means variables can be released earlier than in +regular Python code. + +The behavior of the live variable analysis affects memory usage of the compiled +code. Internally, Numba does not differentiate temporary variables and user +variables. Since each operation generates at least one temporary variable, +a function can accumulate a high number of temporary variables if they are not +released as soon as possible. +Our generator implementation can benefit from early releasing of variables, +which reduces the size of the state to suspend at each yield point. + + +Notes on behavior of the live variable analysis +================================================ + + +Variable deleted before definition +----------------------------------- + +(Related issue: https://github.com/numba/numba/pull/1738) + +When a variable lifetime is confined within the loop body (its definition and +usage does not escape the loop body), like: + +.. code-block:: python + + def f(arr): + # BB 0 + res = 0 + # BB 1 + for i in (0, 1): + # BB 2 + t = arr[i] + if t[i] > 1: + # BB 3 + res += t[i] + # BB 4 + return res + + +Variable ``t`` is never referenced outside of the loop. +A ``del`` instruction is emitted for ``t`` at the head of the loop (BB 1) +before a variable is defined. The reason is obvious once we know the control +flow graph:: + + +------------------------------> BB4 + | + | + BB 0 --> BB 1 --> BB 2 ---> BB 3 + ^ | | + | V V + +---------------------+ + + +Variable ``t`` is defined in BB 1. In BB 2, the evaluation of +``t[i] > 1`` uses ``t``, which is the last use if execution takes the false +branch and goto BB 1. In BB 3, ``t`` is only used in ``res += t[i]``, which is +the last use if execution takes the true branch. Because BB 3, an outgoing +branch of BB 2 uses ``t``, ``t`` must be deleted at the common predecessor. +The closest point is BB 1, which does not have ``t`` defined from the incoming +edge of BB 0. + +Alternatively, if ``t`` is deleted at BB 4, we will still have to delete the +variable before its definition because BB4 can be executed without executing +the loop body (BB 2 and BB 3), where the variable is defined. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/llvm_timings.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/llvm_timings.rst new file mode 100644 index 000000000..f25a58451 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/llvm_timings.rst @@ -0,0 +1,107 @@ +.. _developer-llvm-timings: + +==================== +Notes on timing LLVM +==================== + + +Getting LLVM Pass Timings +------------------------- + +The dispatcher stores LLVM pass timings in the dispatcher object metadata under +the ``llvm_pass_timings`` key when :envvar:`NUMBA_LLVM_PASS_TIMINGS` is +enabled or ``numba.config.LLVM_PASS_TIMINGS`` is set to truthy. +The timings information contains details on how much time +has been spent in each pass. The pass timings are also grouped by their purpose. +For example, there will be pass timings for function-level pre-optimizations, +module-level optimizations, and object code generation. + + +Code Example +~~~~~~~~~~~~ + +.. literalinclude:: ../../../numba/tests/doc_examples/test_llvm_pass_timings.py + :language: python + :caption: from ``test_pass_timings`` of ``numba/tests/doc_examples/test_llvm_pass_timings.py`` + :start-after: magictoken.ex_llvm_pass_timings.begin + :end-before: magictoken.ex_llvm_pass_timings.end + :dedent: 16 + :linenos: + +Example output: + +.. code-block:: text + + Printing pass timings for JITCodeLibrary('DocsLLVMPassTimings.test_pass_timings..foo') + Total time: 0.0376 + == #0 Function passes on '_ZN5numba5tests12doc_examples22test_llvm_pass_timings19DocsLLVMPassTimings17test_pass_timings12$3clocals$3e7foo$241Ex' + Percent: 4.8% + Total 0.0018s + Top timings: + 0.0015s ( 81.6%) SROA #3 + 0.0002s ( 9.3%) Early CSE #2 + 0.0001s ( 4.0%) Simplify the CFG #9 + 0.0000s ( 1.5%) Prune NRT refops #4 + 0.0000s ( 1.1%) Post-Dominator Tree Construction #5 + == #1 Function passes on '_ZN7cpython5numba5tests12doc_examples22test_llvm_pass_timings19DocsLLVMPassTimings17test_pass_timings12$3clocals$3e7foo$241Ex' + Percent: 0.8% + Total 0.0003s + Top timings: + 0.0001s ( 30.4%) Simplify the CFG #10 + 0.0001s ( 24.1%) Early CSE #3 + 0.0001s ( 17.8%) SROA #4 + 0.0000s ( 8.8%) Prune NRT refops #5 + 0.0000s ( 5.6%) Post-Dominator Tree Construction #6 + == #2 Function passes on 'cfunc._ZN5numba5tests12doc_examples22test_llvm_pass_timings19DocsLLVMPassTimings17test_pass_timings12$3clocals$3e7foo$241Ex' + Percent: 0.5% + Total 0.0002s + Top timings: + 0.0001s ( 27.7%) Early CSE #4 + 0.0001s ( 26.8%) Simplify the CFG #11 + 0.0000s ( 13.8%) Prune NRT refops #6 + 0.0000s ( 7.4%) Post-Dominator Tree Construction #7 + 0.0000s ( 6.7%) Dominator Tree Construction #29 + == #3 Module passes (cheap optimization for refprune) + Percent: 3.7% + Total 0.0014s + Top timings: + 0.0007s ( 52.0%) Combine redundant instructions + 0.0001s ( 5.4%) Function Integration/Inlining + 0.0001s ( 4.9%) Prune NRT refops #2 + 0.0001s ( 4.8%) Natural Loop Information + 0.0001s ( 4.6%) Post-Dominator Tree Construction #2 + == #4 Module passes (full optimization) + Percent: 43.9% + Total 0.0165s + Top timings: + 0.0032s ( 19.5%) Combine redundant instructions #9 + 0.0022s ( 13.5%) Combine redundant instructions #7 + 0.0010s ( 6.1%) Induction Variable Simplification + 0.0008s ( 4.8%) Unroll loops #2 + 0.0007s ( 4.5%) Loop Vectorization + == #5 Finalize object + Percent: 46.3% + Total 0.0174s + Top timings: + 0.0060s ( 34.6%) X86 DAG->DAG Instruction Selection #2 + 0.0019s ( 11.0%) Greedy Register Allocator #2 + 0.0013s ( 7.4%) Machine Instruction Scheduler #2 + 0.0012s ( 7.1%) Loop Strength Reduction + 0.0004s ( 2.3%) Induction Variable Users + + +API for custom analysis +~~~~~~~~~~~~~~~~~~~~~~~ + +It is possible to get more details then the summary text in the above example. +The pass timings are stored in a +:class:`numba.misc.llvm_pass_timings.PassTimingsCollection`, which contains +methods for accessing individual record for each pass. + +.. autoclass:: numba.misc.llvm_pass_timings.PassTimingsCollection + :members: get_total_time, list_longest_first, summary, __getitem__, __len__ + +.. autoclass:: numba.misc.llvm_pass_timings.ProcessedPassTimings + :members: get_raw_data, get_total_time, list_records, list_top, summary + +.. autoclass:: numba.misc.llvm_pass_timings.PassTimingRecord diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/mission.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/mission.rst new file mode 100644 index 000000000..07fb3c2d3 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/mission.rst @@ -0,0 +1,82 @@ +Numba Mission Statement +======================= + +Introduction +------------ + +This document is the mission statement for the Numba project. It exists to +provide a clear description of the purposes and goals of the project. As such, +this document provides background on Numba's users and use-cases, and outlines +the project's overall goals. + +This is a living document: + +=========================== ============= +The first revision date is: May 2022 +The last updated date is: May 2022 +The next review date is: November 2022 +=========================== ============= + +Background +---------- + +The Numba project provides tools to improve the performance of Python software. +It comprises numerous facilities including just-in-time (JIT) compilation, +extension points for library authors, and a compiler toolkit on which new +computational acceleration technologies can be explored and built. + +The range of use-cases and applications that can be targeted by Numba includes, +but is not limited to: + +* Scientific Computing +* Computationally intensive tasks +* Numerically oriented applications +* Data science utilities and programs + +The user base of Numba includes anyone needing to perform intensive +computational work, including users from a wide range of disciplines, examples +include: + +* The most common use case, a user wanting to JIT compile some numerical + functions. +* Users providing JIT accelerated libraries for domain specific use cases e.g. + scientific researchers. +* Users providing JIT accelerated libraries for use as part of the numerical + Python ecosystem. +* Those writing more advanced JIT accelerated libraries containing their own + domain specific data types etc. +* Compiler engineers who explore new compiler use-cases and/or need a custom + compiler. +* Hardware vendors looking to extend Numba to provide Python support for their + custom silicon or new hardware. + +Project Goals +------------- + +The primary aims of the Numba project are: + +* To make it easier for Python users to write high performance code. +* To have a core package with a well defined and pragmatically selected feature + scope that meets the needs of the user base without being overly complex. +* To provide a compiler toolkit for Python that is extensible and can be + customized to meet the needs of the user base. This comes with the expectation + that users potentially need to invest time and effort to extend and/or + customize the software themselves. +* To support both the Python core language/standard libraries and NumPy. +* To consistently produce high quality software: + + * Feature stability across versions. + * Well established and tested public APIs. + * Clearly documented deprecation cycles. + * Internally stable code base. + * Externally tested release candidates. + * Regular releases with a predictable and published release cycle. + * Maintain suitable infrastructure for both testing and releasing. With as + much in public as feasible. + +* To make it as easy as possible for people to contribute. +* To have a maintained public roadmap which will also include areas under active + development. +* To have a governance document in place and it working in practice. +* To ensure that Numba receives timely updates for its core dependencies: LLVM, + NumPy and Python. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/numba-runtime.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/numba-runtime.rst new file mode 100644 index 000000000..2eb290062 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/numba-runtime.rst @@ -0,0 +1,200 @@ +.. _arch-numba-runtime: + +====================== +Notes on Numba Runtime +====================== + + +The *Numba Runtime (NRT)* provides the language runtime to the *nopython mode* +Python subset. NRT is a standalone C library with a Python binding. This +allows :term:`NPM` runtime feature to be used without the GIL. Currently, the +only language feature implemented in NRT is memory management. + + +Memory Management +================= + +NRT implements memory management for :term:`NPM` code. It uses *atomic +reference count* for threadsafe, deterministic memory management. NRT maintains +a separate ``MemInfo`` structure for storing information about each allocation. + +Cooperating with CPython +------------------------ + +For NRT to cooperate with CPython, the NRT python binding provides adaptors for +converting python objects that export a memory region. When such an +object is used as an argument to a :term:`NPM` function, a new ``MemInfo`` is +created and it acquires a reference to the Python object. When a :term:`NPM` +value is returned to the Python interpreter, the associated ``MemInfo`` +(if any) is checked. If the ``MemInfo`` references a Python object, the +underlying Python object is released and returned instead. Otherwise, the +``MemInfo`` is wrapped in a Python object and returned. Additional process +maybe required depending on the type. + +The current implementation supports Numpy array and any buffer-exporting types. + + +Compiler-side Cooperation +------------------------- + +NRT reference counting requires the compiler to emit incref/decref operations +according to the usage. When the reference count drops to zero, the compiler +must call the destructor routine in NRT. + + +.. _nrt-refct-opt-pass: + +Optimizations +------------- + +The compiler is allowed to emit incref/decref operations naively. It relies +on an optimization pass to remove redundant reference count operations. + +A new optimization pass is implemented in version 0.52.0 to remove reference +count operations that fall into the following four categories of control-flow +structure---per basic-block, diamond, fanout, fanout+raise. See the documentation +for :envvar:`NUMBA_LLVM_REFPRUNE_FLAGS` for their descriptions. + +The old optimization pass runs at block level to avoid control flow analysis. +It depends on LLVM function optimization pass to simplify the control flow, +stack-to-register, and simplify instructions. It works by matching and +removing incref and decref pairs within each block. The old pass can be +enabled by setting :envvar:`NUMBA_LLVM_REFPRUNE_PASS` to `0`. + +Important assumptions +--------------------- + +Both the old (pre-0.52.0) and the new (post-0.52.0) optimization passes assume +that the only function that can consume a reference is ``NRT_decref``. +It is important that there are no other functions that will consume references. +Since the passes operate on LLVM IR, the "functions" here are referring to any +callee in a LLVM call instruction. + +To summarize, all functions exposed to the refcount optimization pass +**must not** consume counted references unless done so via ``NRT_decref``. + + +Quirks of the old optimization pass +----------------------------------- + +Since the pre-0.52.0 `refcount optimization pass `_ +requires the LLVM function optimization pass, the pass works on the LLVM IR as +text. The optimized IR is then materialized again as a new LLVM in-memory +bitcode object. + + +Debugging Leaks +--------------- + +To debug reference leaks in NRT MemInfo, each MemInfo python object has a +``.refcount`` attribute for inspection. To get the MemInfo from a ndarray +allocated by NRT, use the ``.base`` attribute. + +To debug memory leaks in NRT, the ``numba.core.runtime.rtsys`` defines +``.get_allocation_stats()``. It returns a namedtuple containing the +number of allocation and deallocation since the start of the program. +Checking that the allocation and deallocation counters are matching is the +simplest way to know if the NRT is leaking. + + +Debugging Leaks in C +-------------------- + +The start of `numba/core/runtime/nrt.h +`_ +has these lines: + +.. code-block:: C + + /* Debugging facilities - enabled at compile-time */ + /* #undef NDEBUG */ + #if 0 + # define NRT_Debug(X) X + #else + # define NRT_Debug(X) if (0) { X; } + #endif + +Undefining NDEBUG (uncomment the ``#undef NDEBUG`` line) enables the assertion +check in NRT. + +Enabling the NRT_Debug (replace ``#if 0`` with ``#if 1``) turns on +debug print inside NRT. + + +Recursion Support +================= + +During the compilation of a pair of mutually recursive functions, one of the +functions will contain unresolved symbol references since the compiler handles +one function at a time. The memory for the unresolved symbols is allocated and +initialized to the address of the *unresolved symbol abort* function +(``nrt_unresolved_abort``) just before the machine code is +generated by LLVM. These symbols are tracked and resolved as new functions are +compiled. If a bug prevents the resolution of these symbols, +the abort function will be called, raising a ``RuntimeError`` exception. + +The *unresolved symbol abort* function is defined in the NRT with a zero-argument +signature. The caller is safe to call it with arbitrary number of +arguments. Therefore, it is safe to be used inplace of the intended callee. + +Using the NRT from C code +========================= + +Externally compiled C code should use the ``NRT_api_functions`` struct as a +function table to access the NRT API. The struct is defined in +:ghfile:`numba/core/runtime/nrt_external.h`. Users can use the utility function +``numba.extending.include_path()`` to determine the include directory for +Numba provided C headers. + +.. literalinclude:: ../../../numba/core/runtime/nrt_external.h + :language: C + :caption: `numba/core/runtime/nrt_external.h` + +Inside Numba compiled code, the ``numba.core.unsafe.nrt.NRT_get_api()`` +intrinsic can be used to obtain a pointer to the ``NRT_api_functions``. + +Here is an example that uses the ``nrt_external.h``: + +.. code-block:: C + + #include + #include "numba/core/runtime/nrt_external.h" + + void my_dtor(void *ptr) { + free(ptr); + } + + NRT_MemInfo* my_allocate(NRT_api_functions *nrt) { + /* heap allocate some memory */ + void * data = malloc(10); + /* wrap the allocated memory; yield a new reference */ + NRT_MemInfo *mi = nrt->manage_memory(data, my_dtor); + /* acquire reference */ + nrt->acquire(mi); + /* release reference */ + nrt->release(mi); + return mi; + } + +It is important to ensure that the NRT is initialized prior to making calls to +it, calling ``numba.core.runtime.nrt.rtsys.initialize(context)`` from Python +will have the desired effect. Similarly the code snippet: + +.. code-block:: Python + + from numba.core.registry import cpu_target # Get the CPU target singleton + cpu_target.target_context # Access the target_context property to initialize + +will achieve the same specifically for Numba's CPU target (the default). Failure +to initialize the NRT will result in access violations as function pointers for +various internal atomic operations will be missing in the ``NRT_MemSys`` struct. + +Future Plan +=========== + +The plan for NRT is to make a standalone shared library that can be linked to +Numba compiled code, including use within the Python interpreter and without +the Python interpreter. To make that work, we will be doing some refactoring: + +* numba :term:`NPM` code references statically compiled code in "helperlib.c". + Those functions should be moved to NRT. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/release.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/release.rst new file mode 100644 index 000000000..b547ee2bb --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/release.rst @@ -0,0 +1,49 @@ +Numba Release Process +===================== + +The goal of the Numba release process -- from a high level perspective -- is to +publish source and binary artifacts that correspond to a given version +number. This usually involves a sequence of individual tasks that must be +performed in the correct order and with diligence. Numba and llvmlite are +commonly released in lockstep since there is usually a one-to-one mapping +between a Numba version and a corresponding llvmlite version. + +This section contains various notes and templates that can be used to create a +Numba release checklist on the Numba Github issue tracker. This is an aid for +the maintainers during the release process and helps to ensure that all tasks +are completed in the correct order and that no tasks are accidentally omitted. + +If new or additional items do appear during release, please do remember to add +them to the checklist templates. Also note that the release process itself is +always a work in progress. This means that some of the information here may be +outdated. If you notice this please do remember to submit a pull-request to +update this document. + +All release checklists are available as Gitub issue templates. To create a new +release checklist simply open a new issue and select the correct template. + + +Primary Release Candidate Checklist +----------------------------------- + +This is for the first/primary release candidate for minor release i.e. the +first release of every series. It is special, because during this release, the +release branch will have to be created. Release candidate indexing begins at 1. + +.. literalinclude:: ../../../.github/ISSUE_TEMPLATE/first_rc_checklist.md + :language: md + :lines: 9- + +`Open a primary release checklist `_. + +Subsequent Release Candidates, Final Releases and Patch Releases +---------------------------------------------------------------- + +Releases subsequent to the first release in a series usually involves a series +of cherry-picks, the recipe is therefore slightly different. + +.. literalinclude:: ../../../.github/ISSUE_TEMPLATE/sub_rc_checklist.md + :language: md + :lines: 9- + +`Open a subsequent release checklist `_. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/repomap.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/repomap.rst new file mode 100644 index 000000000..12bfd1b16 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/repomap.rst @@ -0,0 +1,582 @@ +A Map of the Numba Repository +============================= + +The Numba repository is quite large, and due to age has functionality spread +around many locations. To help orient developers, this document will try to +summarize where different categories of functionality can be found. + + +Support Files +------------- + +Build and Packaging +''''''''''''''''''' + +- :ghfile:`setup.py` - Standard Python distutils/setuptools script +- :ghfile:`MANIFEST.in` - Distutils packaging instructions +- :ghfile:`requirements.txt` - Pip package requirements, not used by conda +- :ghfile:`versioneer.py` - Handles automatic setting of version in + installed package from git tags +- :ghfile:`.flake8` - Preferences for code formatting. Files should be + fixed and removed from the exception list as time allows. +- :ghfile:`.pre-commit-config.yaml` - Configuration file for pre-commit hooks. +- :ghfile:`.readthedocs.yml` - Configuration file for Read the Docs. +- :ghfile:`buildscripts/condarecipe.local` - Conda build recipe +- :ghfile:`buildscripts/condarecipe_clone_icc_rt` - Recipe to build a + standalone icc_rt package. + + +Continuous Integration +'''''''''''''''''''''' +- :ghfile:`azure-pipelines.yml` - Azure Pipelines CI config (active: + Win/Mac/Linux) +- :ghfile:`buildscripts/azure/` - Azure Pipeline configuration for specific + platforms +- :ghfile:`buildscripts/appveyor/` - Appveyor build scripts +- :ghfile:`buildscripts/incremental/` - Generic scripts for building Numba + on various CI systems +- :ghfile:`codecov.yml` - Codecov.io coverage reporting + + +Documentation +''''''''''''' +- :ghfile:`LICENSE` - License for Numba +- :ghfile:`LICENSES.third-party` - License for third party code vendored + into Numba +- :ghfile:`README.rst` - README for repo, also uploaded to PyPI +- :ghfile:`CONTRIBUTING.md` - Documentation on how to contribute to project + (out of date, should be updated to point to Sphinx docs) +- :ghfile:`CHANGE_LOG` - History of Numba releases, also directly embedded + into Sphinx documentation +- :ghfile:`docs/` - Documentation source +- :ghfile:`docs/_templates/` - Directory for templates (to override defaults + with Sphinx theme) +- :ghfile:`docs/Makefile` - Used to build Sphinx docs with ``make`` +- :ghfile:`docs/source` - ReST source for Numba documentation +- :ghfile:`docs/_static/` - Static CSS and image assets for Numba docs +- :ghfile:`docs/gh-pages.py` - Utility script to update Numba docs (stored + as gh-pages) +- :ghfile:`docs/make.bat` - Not used (remove?) +- :ghfile:`docs/requirements.txt` - Pip package requirements for building docs + with Read the Docs. +- :ghfile:`numba/scripts/generate_lower_listing.py` - Dump all registered + implementations decorated with ``@lower*`` for reference + documentation. Currently misses implementations from the higher + level extension API. + + +Numba Source Code +----------------- + +Numba ships with both the source code and tests in one package. + +- :ghfile:`numba/` - all of the source code and tests + + +Public API +'''''''''' + +These define aspects of the public Numba interface. + +- :ghfile:`numba/core/decorators.py` - User-facing decorators for compiling + regular functions on the CPU +- :ghfile:`numba/core/extending.py` - Public decorators for extending Numba + (``overload``, ``intrinsic``, etc) + - :ghfile:`numba/experimental/structref.py` - Public API for defining a mutable struct +- :ghfile:`numba/core/ccallback.py` - ``@cfunc`` decorator for compiling + functions to a fixed C signature. Used to make callbacks. +- :ghfile:`numba/np/ufunc/decorators.py` - ufunc/gufunc compilation + decorators +- :ghfile:`numba/core/config.py` - Numba global config options and environment + variable handling +- :ghfile:`numba/core/annotations` - Gathering and printing type annotations of + Numba IR +- :ghfile:`numba/core/annotations/pretty_annotate.py` - Code highlighting of + Numba functions and types (both ANSI terminal and HTML) +- :ghfile:`numba/core/event.py` - A simple event system for applications to + listen to specific compiler events. + + +Dispatching +''''''''''' + +- :ghfile:`numba/core/dispatcher.py` - Dispatcher objects are compiled functions + produced by ``@jit``. A dispatcher has different implementations + for different type signatures. +- :ghfile:`numba/_dispatcher.cpp` - C++ dispatcher implementation (for speed on + common data types) +- :ghfile:`numba/core/retarget.py` - Support for dispatcher objects to switch + target via a specific with-context. + + +Compiler Pipeline +''''''''''''''''' + +- :ghfile:`numba/core/compiler.py` - Compiler pipelines and flags +- :ghfile:`numba/core/errors.py` - Numba exception and warning classes +- :ghfile:`numba/core/ir.py` - Numba IR data structure objects +- :ghfile:`numba/core/bytecode.py` - Bytecode parsing and function identity (??) +- :ghfile:`numba/core/interpreter.py` - Translate Python interpreter bytecode to + Numba IR +- :ghfile:`numba/core/analysis.py` - Utility functions to analyze Numba IR + (variable lifetime, prune branches, etc) +- :ghfile:`numba/core/dataflow.py` - Dataflow analysis for Python bytecode (used + in analysis.py) +- :ghfile:`numba/core/controlflow.py` - Control flow analysis of Numba IR and + Python bytecode +- :ghfile:`numba/core/typeinfer.py` - Type inference algorithm +- :ghfile:`numba/core/transforms.py` - Numba IR transformations +- :ghfile:`numba/core/rewrites` - Rewrite passes used by compiler +- :ghfile:`numba/core/rewrites/__init__.py` - Loads all rewrite passes so they + are put into the registry +- :ghfile:`numba/core/rewrites/registry.py` - Registry object for collecting + rewrite passes +- :ghfile:`numba/core/rewrites/ir_print.py` - Write print() calls into special + print nodes in the IR +- :ghfile:`numba/core/rewrites/static_raise.py` - Converts exceptions with + static arguments into a special form that can be lowered +- :ghfile:`numba/core/rewrites/static_getitem.py` - Rewrites getitem and setitem + with constant arguments to allow type inference +- :ghfile:`numba/core/rewrites/static_binop.py` - Rewrites binary operations + (specifically ``**``) with constant arguments so faster code can be + generated +- :ghfile:`numba/core/inline_closurecall.py` - Inlines body of closure functions + to call site. Support for array comprehensions, reduction inlining, + and stencil inlining. +- :ghfile:`numba/core/postproc.py` - Postprocessor for Numba IR that computes + variable lifetime, inserts del operations, and handles generators +- :ghfile:`numba/core/lowering.py` - General implementation of lowering Numba IR + to LLVM + :ghfile:`numba/core/environment.py` - Runtime environment object +- :ghfile:`numba/core/withcontexts.py` - General scaffolding for implementing + context managers in nopython mode, and the objectmode context + manager +- :ghfile:`numba/core/pylowering.py` - Lowering of Numba IR in object mode +- :ghfile:`numba/core/pythonapi.py` - LLVM IR code generation to interface with + CPython API +- :ghfile:`numba/core/targetconfig.py` - Utils for target configurations such + as compiler flags. + + +Type Management +''''''''''''''' + +- :ghfile:`numba/core/typeconv/` - Implementation of type casting and type + signature matching in both C++ and Python +- :ghfile:`numba/capsulethunk.h` - Used by typeconv +- :ghfile:`numba/core/types/` - definition of the Numba type hierarchy, used + everywhere in compiler to select implementations +- :ghfile:`numba/core/consts.py` - Constant inference (used to make constant + values available during codegen when possible) +- :ghfile:`numba/core/datamodel` - LLVM IR representations of data types in + different contexts +- :ghfile:`numba/core/datamodel/models.py` - Models for most standard types +- :ghfile:`numba/core/datamodel/registry.py` - Decorator to register new data + models +- :ghfile:`numba/core/datamodel/packer.py` - Pack typed values into a data + structure +- :ghfile:`numba/core/datamodel/testing.py` - Data model tests (this should + move??) +- :ghfile:`numba/core/datamodel/manager.py` - Map types to data models + + +Compiled Extensions +''''''''''''''''''' + +Numba uses a small amount of compiled C/C++ code for core +functionality, like dispatching and type matching where performance +matters, and it is more convenient to encapsulate direct interaction +with CPython APIs. + +- :ghfile:`numba/_arraystruct.h` - Struct for holding NumPy array + attributes. Used in helperlib and the Numba Runtime. +- :ghfile:`numba/_helperlib.c` - C functions required by Numba compiled code + at runtime. Linked into ahead-of-time compiled modules +- :ghfile:`numba/_helpermod.c` - Python extension module with pointers to + functions from ``_helperlib.c`` and ``_npymath_exports.c`` +- :ghfile:`numba/_npymath_exports.c` - Export function pointer table to + NumPy C math functions +- :ghfile:`numba/_dynfuncmod.c` - Python extension module exporting + _dynfunc.c functionality +- :ghfile:`numba/_dynfunc.c` - C level Environment and Closure objects (keep + in sync with numba/target/base.py) +- :ghfile:`numba/mathnames.h` - Macros for defining names of math functions +- :ghfile:`numba/_pymodule.h` - C macros for Python 2/3 portable naming of C + API functions +- :ghfile:`numba/mviewbuf.c` - Handles Python memoryviews +- :ghfile:`numba/_typeof.{h,c}` - C implementation of type fingerprinting, + used by dispatcher +- :ghfile:`numba/_numba_common.h` - Portable C macro for marking symbols + that can be shared between object files, but not outside the + library. + + + +Misc Support +'''''''''''' + +- :ghfile:`numba/_version.py` - Updated by versioneer +- :ghfile:`numba/core/runtime` - Language runtime. Currently manages + reference-counted memory allocated on the heap by Numba-compiled + functions +- :ghfile:`numba/core/ir_utils.py` - Utility functions for working with Numba IR + data structures +- :ghfile:`numba/core/cgutils.py` - Utility functions for generating common code + patterns in LLVM IR +- :ghfile:`numba/core/utils.py` - Python 2 backports of Python 3 functionality + (also imports local copy of ``six``) +- :ghfile:`numba/core/overload_glue.py` - Functions for wrapping split typing + and lowering API use cases into overloads. +- :ghfile:`numba/misc/appdirs.py` - Vendored package for determining application + config directories on every platform +- :ghfile:`numba/core/compiler_lock.py` - Global compiler lock because Numba's + usage of LLVM is not thread-safe +- :ghfile:`numba/misc/special.py` - Python stub implementations of special Numba + functions (prange, gdb*) +- :ghfile:`numba/core/itanium_mangler.py` - Python implementation of Itanium C++ + name mangling +- :ghfile:`numba/misc/findlib.py` - Helper function for locating shared + libraries on all platforms +- :ghfile:`numba/core/debuginfo.py` - Helper functions to construct LLVM IR + debug + info +- :ghfile:`numba/core/unsafe/refcount.py` - Read reference count of object +- :ghfile:`numba/core/unsafe/eh.py` - Exception handling helpers +- :ghfile:`numba/core/unsafe/nrt.py` - Numba runtime (NRT) helpers +- :ghfile:`numba/cpython/unsafe/tuple.py` - Replace a value in a tuple slot +- :ghfile:`numba/np/unsafe/ndarray.py` - NumPy array helpers +- :ghfile:`numba/core/unsafe/bytes.py` - Copying and dereferencing data from + void pointers +- :ghfile:`numba/misc/dummyarray.py` - Used by GPU backends to hold array + information on the host, but not the data. +- :ghfile:`numba/core/callwrapper.py` - Handles argument unboxing and releasing + the GIL when moving from Python to nopython mode +- :ghfile:`numba/np/numpy_support.py` - Helper functions for working with NumPy + and translating Numba types to and from NumPy dtypes. +- :ghfile:`numba/core/tracing.py` - Decorator for tracing Python calls and + emitting log messages +- :ghfile:`numba/core/funcdesc.py` - Classes for describing function metadata + (used in the compiler) +- :ghfile:`numba/core/sigutils.py` - Helper functions for parsing and + normalizing Numba type signatures +- :ghfile:`numba/core/serialize.py` - Support for pickling compiled functions +- :ghfile:`numba/core/caching.py` - Disk cache for compiled functions +- :ghfile:`numba/np/npdatetime.py` - Helper functions for implementing NumPy + datetime64 support +- :ghfile:`numba/misc/llvm_pass_timings.py` - Helper to record timings of + LLVM passes. +- :ghfile:`numba/cloudpickle` - Vendored cloudpickle subpackage + +Core Python Data Types +'''''''''''''''''''''' + +- :ghfile:`numba/_hashtable.{h,c}` - Adaptation of the Python 3.7 hash table + implementation +- :ghfile:`numba/cext/dictobject.{h,c}` - C level implementation of typed + dictionary +- :ghfile:`numba/typed/dictobject.py` - Nopython mode wrapper for typed + dictionary +- :ghfile:`numba/cext/listobject.{h,c}` - C level implementation of typed list +- :ghfile:`numba/typed/listobject.py` - Nopython mode wrapper for typed list +- :ghfile:`numba/typed/typedobjectutils.py` - Common utilities for typed + dictionary and list +- :ghfile:`numba/cpython/unicode.py` - Unicode strings (Python 3.5 and later) +- :ghfile:`numba/typed` - Python interfaces to statically typed containers +- :ghfile:`numba/typed/typeddict.py` - Python interface to typed dictionary +- :ghfile:`numba/typed/typedlist.py` - Python interface to typed list +- :ghfile:`numba/experimental/jitclass` - Implementation of experimental JIT + compilation of Python classes +- :ghfile:`numba/core/generators.py` - Support for lowering Python generators + + +Math +'''' + +- :ghfile:`numba/_random.c` - Reimplementation of NumPy / CPython random + number generator +- :ghfile:`numba/_lapack.c` - Wrappers for calling BLAS and LAPACK functions + (requires SciPy) + + +ParallelAccelerator +''''''''''''''''''' + +Code transformation passes that extract parallelizable code from +a function and convert it into multithreaded gufunc calls. + +- :ghfile:`numba/parfors/parfor.py` - General ParallelAccelerator +- :ghfile:`numba/parfors/parfor_lowering.py` - gufunc lowering for + ParallelAccelerator +- :ghfile:`numba/parfors/array_analysis.py` - Array analysis passes used in + ParallelAccelerator + + +Stencil +''''''' + +Implementation of ``@stencil``: + +- :ghfile:`numba/stencils/stencil.py` - Stencil function decorator (implemented + without ParallelAccelerator) +- :ghfile:`numba/stencils/stencilparfor.py` - ParallelAccelerator implementation + of stencil + + +Debugging Support +''''''''''''''''' + +- :ghfile:`numba/misc/gdb_hook.py` - Hooks to jump into GDB from nopython + mode +- :ghfile:`numba/misc/cmdlang.gdb` - Commands to setup GDB for setting + explicit breakpoints from Python + + +Type Signatures (CPU) +''''''''''''''''''''' + +Some (usually older) Numba supported functionality separates the +declaration of allowed type signatures from the definition of +implementations. This package contains registries of type signatures +that must be matched during type inference. + +- :ghfile:`numba/core/typing` - Type signature module +- :ghfile:`numba/core/typing/templates.py` - Base classes for type signature + templates +- :ghfile:`numba/core/typing/cmathdecl.py` - Python complex math (``cmath``) + module +- :ghfile:`numba/core/typing/bufproto.py` - Interpreting objects supporting the + buffer protocol +- :ghfile:`numba/core/typing/mathdecl.py` - Python ``math`` module +- :ghfile:`numba/core/typing/listdecl.py` - Python lists +- :ghfile:`numba/core/typing/builtins.py` - Python builtin global functions and + operators +- :ghfile:`numba/core/typing/randomdecl.py` - Python and NumPy ``random`` + modules +- :ghfile:`numba/core/typing/setdecl.py` - Python sets +- :ghfile:`numba/core/typing/npydecl.py` - NumPy ndarray (and operators), NumPy + functions +- :ghfile:`numba/core/typing/arraydecl.py` - Python ``array`` module +- :ghfile:`numba/core/typing/context.py` - Implementation of typing context + (class that collects methods used in type inference) +- :ghfile:`numba/core/typing/collections.py` - Generic container operations and + namedtuples +- :ghfile:`numba/core/typing/ctypes_utils.py` - Typing ctypes-wrapped function + pointers +- :ghfile:`numba/core/typing/enumdecl.py` - Enum types +- :ghfile:`numba/core/typing/cffi_utils.py` - Typing of CFFI objects +- :ghfile:`numba/core/typing/typeof.py` - Implementation of typeof operations + (maps Python object to Numba type) +- :ghfile:`numba/core/typing/asnumbatype.py` - Implementation of + ``as_numba_type`` operations (maps Python types to Numba type) +- :ghfile:`numba/core/typing/npdatetime.py` - Datetime dtype support for NumPy + arrays + + +Target Implementations (CPU) +'''''''''''''''''''''''''''' + +Implementations of Python / NumPy functions and some data models. +These modules are responsible for generating LLVM IR during lowering. +Note that some of these modules do not have counterparts in the typing +package because newer Numba extension APIs (like overload) allow +typing and implementation to be specified together. + +- :ghfile:`numba/core/cpu.py` - Context for code gen on CPU +- :ghfile:`numba/core/base.py` - Base class for all target contexts +- :ghfile:`numba/core/codegen.py` - Driver for code generation +- :ghfile:`numba/core/boxing.py` - Boxing and unboxing for most data + types +- :ghfile:`numba/core/intrinsics.py` - Utilities for converting LLVM + intrinsics to other math calls +- :ghfile:`numba/core/callconv.py` - Implements different calling + conventions for Numba-compiled functions +- :ghfile:`numba/core/options.py` - Container for options that control + lowering +- :ghfile:`numba/core/optional.py` - Special type representing value or + ``None`` +- :ghfile:`numba/core/registry.py` - Registry object for collecting + implementations for a specific target +- :ghfile:`numba/core/imputils.py` - Helper functions for lowering +- :ghfile:`numba/core/externals.py` - Registers external C functions + needed to link generated code +- :ghfile:`numba/core/fastmathpass.py` - Rewrite pass to add fastmath + attributes to function call sites and binary operations +- :ghfile:`numba/core/removerefctpass.py` - Rewrite pass to remove + unnecessary incref/decref pairs +- :ghfile:`numba/core/descriptors.py` - empty base class for all target + descriptors (is this needed?) +- :ghfile:`numba/cpython/builtins.py` - Python builtin functions and + operators +- :ghfile:`numba/cpython/cmathimpl.py` - Python complex math module +- :ghfile:`numba/cpython/enumimpl.py` - Enum objects +- :ghfile:`numba/cpython/hashing.py` - Hashing algorithms +- :ghfile:`numba/cpython/heapq.py` - Python ``heapq`` module +- :ghfile:`numba/cpython/iterators.py` - Iterable data types and iterators +- :ghfile:`numba/cpython/listobj.py` - Python lists +- :ghfile:`numba/cpython/mathimpl.py` - Python ``math`` module +- :ghfile:`numba/cpython/numbers.py` - Numeric values (int, float, etc) +- :ghfile:`numba/cpython/printimpl.py` - Print function +- :ghfile:`numba/cpython/randomimpl.py` - Python and NumPy ``random`` + modules +- :ghfile:`numba/cpython/rangeobj.py` - Python `range` objects +- :ghfile:`numba/cpython/slicing.py` - Slice objects, and index calculations + used in slicing +- :ghfile:`numba/cpython/setobj.py` - Python set type +- :ghfile:`numba/cpython/tupleobj.py` - Tuples (statically typed as + immutable struct) +- :ghfile:`numba/misc/cffiimpl.py` - CFFI functions +- :ghfile:`numba/misc/quicksort.py` - Quicksort implementation used with + list and array objects +- :ghfile:`numba/misc/mergesort.py` - Mergesort implementation used with + array objects +- :ghfile:`numba/np/arraymath.py` - Math operations on arrays (both + Python and NumPy) +- :ghfile:`numba/np/arrayobj.py` - Array operations (both NumPy and + buffer protocol) +- :ghfile:`numba/np/linalg.py` - NumPy linear algebra operations +- :ghfile:`numba/np/npdatetime.py` - NumPy datetime operations +- :ghfile:`numba/np/npyfuncs.py` - Kernels used in generating some + NumPy ufuncs +- :ghfile:`numba/np/npyimpl.py` - Implementations of most NumPy ufuncs +- :ghfile:`numba/np/polynomial.py` - ``numpy.roots`` function +- :ghfile:`numba/np/ufunc_db.py` - Big table mapping types to ufunc + implementations + + +Ufunc Compiler and Runtime +'''''''''''''''''''''''''' + +- :ghfile:`numba/np/ufunc` - ufunc compiler implementation +- :ghfile:`numba/np/ufunc/_internal.{h,c}` - Python extension module with + helper functions that use CPython & NumPy C API +- :ghfile:`numba/np/ufunc/_ufunc.c` - Used by `_internal.c` +- :ghfile:`numba/np/ufunc/deviceufunc.py` - Custom ufunc dispatch for + non-CPU targets +- :ghfile:`numba/np/ufunc/gufunc_scheduler.{h,cpp}` - Schedule work chunks + to threads +- :ghfile:`numba/np/ufunc/dufunc.py` - Special ufunc that can compile new + implementations at call time +- :ghfile:`numba/np/ufunc/ufuncbuilder.py` - Top-level orchestration of + ufunc/gufunc compiler pipeline +- :ghfile:`numba/np/ufunc/sigparse.py` - Parser for generalized ufunc + indexing signatures +- :ghfile:`numba/np/ufunc/parallel.py` - Codegen for ``parallel`` target +- :ghfile:`numba/np/ufunc/array_exprs.py` - Rewrite pass for turning array + expressions in regular functions into ufuncs +- :ghfile:`numba/np/ufunc/wrappers.py` - Wrap scalar function kernel with + loops +- :ghfile:`numba/np/ufunc/workqueue.{h,c}` - Threading backend based on + pthreads/Windows threads and queues +- :ghfile:`numba/np/ufunc/omppool.cpp` - Threading backend based on OpenMP +- :ghfile:`numba/np/ufunc/tbbpool.cpp` - Threading backend based on TBB + + + +Unit Tests (CPU) +'''''''''''''''' + +CPU unit tests (GPU target unit tests listed in later sections + +- :ghfile:`runtests.py` - Convenience script that launches test runner and + turns on full compiler tracebacks +- :ghfile:`.coveragerc` - Coverage.py configuration +- :ghfile:`numba/runtests.py` - Entry point to unittest runner +- :ghfile:`numba/testing/_runtests.py` - Implementation of custom test runner + command line interface +- :ghfile:`numba/tests/test_*` - Test cases +- :ghfile:`numba/tests/*_usecases.py` - Python functions compiled by some + unit tests +- :ghfile:`numba/tests/support.py` - Helper functions for testing and + special TestCase implementation +- :ghfile:`numba/tests/dummy_module.py` - Module used in + ``test_dispatcher.py`` +- :ghfile:`numba/tests/npyufunc` - ufunc / gufunc compiler tests +- :ghfile:`numba/testing` - Support code for testing +- :ghfile:`numba/testing/loader.py` - Find tests on disk +- :ghfile:`numba/testing/notebook.py` - Support for testing notebooks +- :ghfile:`numba/testing/main.py` - Numba test runner + + +Command Line Utilities +'''''''''''''''''''''' +- :ghfile:`bin/numba` - Command line stub, delegates to main in + ``numba_entry.py`` +- :ghfile:`numba/misc/numba_entry.py` - Main function for ``numba`` command line + tool +- :ghfile:`numba/pycc` - Ahead of time compilation of functions to shared + library extension +- :ghfile:`numba/pycc/__init__.py` - Main function for ``pycc`` command line + tool +- :ghfile:`numba/pycc/cc.py` - User-facing API for tagging functions to + compile ahead of time +- :ghfile:`numba/pycc/compiler.py` - Compiler pipeline for creating + standalone Python extension modules +- :ghfile:`numba/pycc/llvm_types.py` - Aliases to LLVM data types used by + ``compiler.py`` +- :ghfile:`numba/pycc/pycc` - Stub to call main function. Is this still + used? +- :ghfile:`numba/pycc/modulemixin.c` - C file compiled into every compiled + extension. Pulls in C source from Numba core that is needed to make + extension standalone +- :ghfile:`numba/pycc/platform.py` - Portable interface to platform-specific + compiler toolchains +- :ghfile:`numba/pycc/decorators.py` - Deprecated decorators for tagging + functions to compile. Use ``cc.py`` instead. + + +CUDA GPU Target +''''''''''''''' + +Note that the CUDA target does reuse some parts of the CPU target. + +- :ghfile:`numba/cuda/` - The implementation of the CUDA (NVIDIA GPU) target + and associated unit tests +- :ghfile:`numba/cuda/decorators.py` - Compiler decorators for CUDA kernels + and device functions +- :ghfile:`numba/cuda/dispatcher.py` - Dispatcher for CUDA JIT functions +- :ghfile:`numba/cuda/printimpl.py` - Special implementation of device printing +- :ghfile:`numba/cuda/libdevice.py` - Registers libdevice functions +- :ghfile:`numba/cuda/kernels/` - Custom kernels for reduction and transpose +- :ghfile:`numba/cuda/device_init.py` - Initializes the CUDA target when + imported +- :ghfile:`numba/cuda/compiler.py` - Compiler pipeline for CUDA target +- :ghfile:`numba/cuda/intrinsic_wrapper.py` - CUDA device intrinsics + (shuffle, ballot, etc) +- :ghfile:`numba/cuda/initialize.py` - Deferred initialization of the CUDA + device and subsystem. Called only when user imports ``numba.cuda`` +- :ghfile:`numba/cuda/simulator_init.py` - Initializes the CUDA simulator + subsystem (only when user requests it with env var) +- :ghfile:`numba/cuda/random.py` - Implementation of random number generator +- :ghfile:`numba/cuda/api.py` - User facing APIs imported into ``numba.cuda.*`` +- :ghfile:`numba/cuda/stubs.py` - Python placeholders for functions that + only can be used in GPU device code +- :ghfile:`numba/cuda/simulator/` - Simulate execution of CUDA kernels in + Python interpreter +- :ghfile:`numba/cuda/vectorizers.py` - Subclasses of ufunc/gufunc compilers + for CUDA +- :ghfile:`numba/cuda/args.py` - Management of kernel arguments, including + host<->device transfers +- :ghfile:`numba/cuda/target.py` - Typing and target contexts for GPU +- :ghfile:`numba/cuda/cudamath.py` - Type signatures for math functions in + CUDA Python +- :ghfile:`numba/cuda/errors.py` - Validation of kernel launch configuration +- :ghfile:`numba/cuda/nvvmutils.py` - Helper functions for generating + NVVM-specific IR +- :ghfile:`numba/cuda/testing.py` - Support code for creating CUDA unit + tests and capturing standard out +- :ghfile:`numba/cuda/cudadecl.py` - Type signatures of CUDA API (threadIdx, + blockIdx, atomics) in Python on GPU +- :ghfile:`numba/cuda/cudaimpl.py` - Implementations of CUDA API functions + on GPU +- :ghfile:`numba/cuda/codegen.py` - Code generator object for CUDA target +- :ghfile:`numba/cuda/cudadrv/` - Wrapper around CUDA driver API +- :ghfile:`numba/cuda/tests/` - CUDA unit tests, skipped when CUDA is not + detected +- :ghfile:`numba/cuda/tests/cudasim/` - Tests of CUDA simulator +- :ghfile:`numba/cuda/tests/nocuda/` - Tests for NVVM functionality when + CUDA not present +- :ghfile:`numba/cuda/tests/cudapy/` - Tests of compiling Python functions + for GPU +- :ghfile:`numba/cuda/tests/cudadrv/` - Tests of Python wrapper around CUDA + API + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/rewrites.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/rewrites.rst new file mode 100644 index 000000000..ff162c611 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/rewrites.rst @@ -0,0 +1,397 @@ +===================================================== +Using the Numba Rewrite Pass for Fun and Optimization +===================================================== + +Overview +======== + +This section introduces intermediate representation (IR) rewrites, and +how they can be used to implement optimizations. + +As discussed earlier in ":ref:`rewrite-typed-ir`", rewriting the Numba +IR allows us to perform optimizations that would be much more +difficult to perform at the lower LLVM level. Similar to the Numba +type and lowering subsystems, the rewrite subsystem is user +extensible. This extensibility affords Numba the possibility of +supporting a wide variety of domain-specific optimizations (DSO's). + +The remaining subsections detail the mechanics of implementing a +rewrite, registering a rewrite with the rewrite registry, and provide +examples of adding new rewrites, as well as internals of the array +expression optimization pass. We conclude by reviewing some use cases +exposed in the examples, as well as reviewing any points where +developers should take care. + + +Rewriting Passes +================ + +Rewriting passes have a simple :func:`~Rewrite.match` and +:func:`~Rewrite.apply` interface. The division between matching and +rewriting follows how one would define a term rewrite in a declarative +domain-specific languages (DSL's). In such DSL's, one may write a +rewrite as follows:: + + => + + +The ```` and ```` symbols represent IR term +expressions, where the left-hand side presents a pattern to match, and +the right-hand side an IR term constructor to build upon matching. +Whenever the rewrite matches an IR pattern, any free variables in the +left-hand side are bound within a custom environment. When applied, +the rewrite uses the pattern matching environment to bind any free +variables in the right-hand side. + +As Python is not commonly used in a declarative capacity, Numba uses +object state to handle the transfer of information between the +matching and application steps. + + +The :class:`Rewrite` Base Class +------------------------------- + +.. class:: Rewrite + + The :class:`Rewrite` class simply defines an abstract base class + for Numba rewrites. Developers should define rewrites as + subclasses of this base type, overloading the + :func:`~Rewrite.match` and :func:`~Rewrite.apply` methods. + + .. attribute:: pipeline + + The pipeline attribute contains the + :class:`numba.compiler.Pipeline` instance that is currently + compiling the function under consideration for rewriting. + + .. method:: __init__(self, pipeline, *args, **kws) + + The base constructor for rewrites simply stashes its arguments + into attributes of the same name. Unless being used in + debugging or testing, rewrites should only be constructed by + the :class:`RewriteRegistry` in the + :func:`RewriteRegistry.apply` method, and the construction + interface should remain stable (though the pipeline will + commonly contain just about everything there is to know). + + .. method:: match(self, block, typemap, callmap) + + The :func:`~Rewrite.match` method takes four arguments other + than *self*: + + * *func_ir*: This is an instance of :class:`numba.ir.FunctionIR` for the + function being rewritten. + + * *block*: This is an instance of :class:`numba.ir.Block`. The + matching method should iterate over the instructions contained + in the :attr:`numba.ir.Block.body` member. + + * *typemap*: This is a Python :class:`dict` instance mapping + from symbol names in the IR, represented as strings, to Numba + types. + + * *callmap*: This is another :class:`dict` instance mapping from + calls, represented as :class:`numba.ir.Expr` instances, to + their corresponding call site type signatures, represented as + a :class:`numba.typing.templates.Signature` instance. + + The :func:`~Rewrite.match` method should return a :class:`bool` + result. A :obj:`True` result should indicate that one or more + matches were found, and the :func:`~Rewrite.apply` method will + return a new replacement :class:`numba.ir.Block` instance. A + :obj:`False` result should indicate that no matches were found, and + subsequent calls to :func:`~Rewrite.apply` will return undefined + or invalid results. + + .. method:: apply(self) + + The :func:`~Rewrite.apply` method should only be invoked + following a successful call to :func:`~Rewrite.match`. This + method takes no additional parameters other than *self*, and + should return a replacement :class:`numba.ir.Block` instance. + + As mentioned above, the behavior of calling + :func:`~Rewrite.apply` is undefined unless + :func:`~Rewrite.match` has already been called and returned + :obj:`True`. + + +Subclassing :class:`Rewrite` +---------------------------- + +Before going into the expectations for the overloaded methods any +:class:`Rewrite` subclass must have, let's step back a minute to +review what is taking place here. By providing an extensible +compiler, Numba opens itself to user-defined code generators which may +be incomplete, or worse, incorrect. When a code generator goes awry, +it can cause abnormal program behavior or early termination. +User-defined rewrites add a new level of complexity because they must +not only generate correct code, but the code they generate should +ensure that the compiler does not get stuck in a match/apply loop. +Non-termination by the compiler will directly lead to non-termination +of user function calls. + +There are several ways to help ensure that a rewrite terminates: + +* *Typing*: A rewrite should generally attempt to decompose composite + types, and avoid composing new types. If the rewrite is matching a + specific type, changing expression types to a lower-level type will + ensure they will no long match after the rewrite is applied. + +* *Special instructions*: A rewrite may synthesize custom operators or + use special functions in the target IR. This technique again + generates code that is no longer within the domain of the original + match, and the rewrite will terminate. + +In the ":ref:`case-study-array-expressions`" subsection, below, we'll +see how the array expression rewriter uses both of these techniques. + + +Overloading :func:`Rewrite.match` +--------------------------------- + +Every rewrite developer should seek to have their implementation of +:func:`~Rewrite.match` return a :obj:`False` value as quickly as +possible. Numba is a just-in-time compiler, and adding compilation +time ultimately adds to the user's run time. When a rewrite returns +:obj:`False` for a given block, the registry will no longer process that +block with that rewrite, and the compiler is that much closer to +proceeding to lowering. + +This need for timeliness has to be balanced against collecting the +necessary information to make a match for a rewrite. Rewrite +developers should be comfortable adding dynamic attributes to their +subclasses, and then having these new attributes guide construction of +the replacement basic block. + + +Overloading :func:`Rewrite.apply` +----------------------------------- + +The :func:`~Rewrite.apply` method should return a replacement +:class:`numba.ir.Block` instance to replace the basic block that +contained a match for the rewrite. As mentioned above, the IR built +by :func:`~Rewrite.apply` methods should preserve the semantics of the +user's code, but also seek to avoid generating another match for the +same rewrite or set of rewrites. + + +The Rewrite Registry +==================== + +When you want to include a rewrite in the rewrite pass, you should +register it with the rewrite registry. The :mod:`numba.rewrites` +module provides both the abstract base class and a class decorator for +hooking into the Numba rewrite subsystem. The following illustrates a +stub definition of a new rewrite:: + + from numba import rewrites + + @rewrites.register_rewrite + class MyRewrite(rewrites.Rewrite): + + def match(self, block, typemap, calltypes): + raise NotImplementedError("FIXME") + + def apply(self): + raise NotImplementedError("FIXME") + + +Developers should note that using the class decorator as shown above +will register a rewrite at import time. It is the developer's +responsibility to ensure their extensions are loaded before +compilation starts. + + +.. _`case-study-array-expressions`: + +Case study: Array Expressions +============================= + +This subsection looks at the array expression rewriter in more depth. +The array expression rewriter, and most of its support functionality, +are found in the :mod:`numba.npyufunc.array_exprs` module. The +rewriting pass itself is implemented in the :class:`RewriteArrayExprs` +class. In addition to the rewriter, the +:mod:`~numba.npyufunc.array_exprs` module includes a function for +lowering array expressions, +:func:`~numba.npyufunc.array_exprs._lower_array_expr`. The overall +optimization process is as follows: + +* :func:`RewriteArrayExprs.match`: The rewrite pass looks for two or + more array operations that form an array expression. + +* :func:`RewriteArrayExprs.apply`: Once an array expression is found, + the rewriter replaces the individual array operations with a new + kind of IR expression, the ``arrayexpr``. + +* :func:`numba.npyufunc.array_exprs._lower_array_expr`: During + lowering, the code generator calls + :func:`~numba.npyufunc.array_exprs._lower_array_expr` whenever it + finds an ``arrayexpr`` IR expression. + +More details on each step of the optimization are given below. + + +The :func:`RewriteArrayExprs.match` method +------------------------------------------ + +The array expression optimization pass starts by looking for array +operations, including calls to supported :class:`~numpy.ufunc`\'s and +user-defined :class:`~numba.DUFunc`\'s. Numba IR follows the +conventions of a static single assignment (SSA) language, meaning that +the search for array operators begins with looking for assignment +instructions. + +When the rewriting pass calls the :func:`RewriteArrayExprs.match` +method, it first checks to see if it can trivially reject the basic +block. If the method determines the block to be a candidate for +matching, it sets up the following state variables in the rewrite +object: + +* *crnt_block*: The current basic block being matched. + +* *typemap*: The *typemap* for the function being matched. + +* *matches*: A list of variable names that reference array expressions. + +* *array_assigns*: A map from assignment variable names to the actual + assignment instructions that define the given variable. + +* *const_assigns*: A map from assignment variable names to the + constant valued expression that defines the constant variable. + +At this point, the match method iterates over the assignment +instructions in the input basic block. For each assignment +instruction, the matcher looks for one of two things: + +* Array operations: If the right-hand side of the assignment + instruction is an expression, and the result of that expression is + an array type, the matcher checks to see if the expression is either + a known array operation, or a call to a universal function. If an + array operator is found, the matcher stores the left-hand variable + name and the whole instruction in the *array_assigns* member. + Finally, the matcher tests to see if any operands of the array + operation have also been identified as targets of other array + operations. If one or more operands are also targets of array + operations, then the matcher will also append the left-hand side + variable name to the *matches* member. + +* Constants: Constants (even scalars) can be operands to array + operations. Without worrying about the constant being apart of an + array expression, the matcher stores constant names and values in + the *const_assigns* member. + +The end of the matching method simply checks for a non-empty *matches* +list, returning :obj:`True` if there were one or more matches, and +:obj:`False` when *matches* is empty. + + +The :func:`RewriteArrayExprs.apply` method +------------------------------------------ + +When one or matching array expressions are found by +:func:`RewriteArrayExprs.match`, the rewriting pass will call +:func:`RewriteArrayExprs.apply`. The apply method works in two +passes. The first pass iterates over the matches found, and builds a +map from instructions in the old basic block to new instructions in +the new basic block. The second pass iterates over the instructions +in the old basic block, copying instructions that are not changed by +the rewrite, and replacing or deleting instructions that were +identified by the first pass. + +The :func:`RewriteArrayExprs._handle_matches` implements the first +pass of the code generation portion of the rewrite. For each match, +this method builds a special IR expression that contains an expression +tree for the array expression. To compute the leaves of the +expression tree, the :func:`~RewriteArrayExprs._handle_matches` method +iterates over the operands of the identified root operation. If the +operand is another array operation, it is translated into an +expression sub-tree. If the operand is a constant, +:func:`~RewriteArrayExprs._handle_matches` copies the constant value. +Otherwise, the operand is marked as being used by an array expression. +As the method builds array expression nodes, it builds a map from old +instructions to new instructions (*replace_map*), as well as sets of +variables that may have moved (*used_vars*), and variables that should +be removed altogether (*dead_vars*). These three data structures are +returned back to the calling :func:`RewriteArrayExprs.apply` method. + +The remaining part of the :func:`RewriteArrayExprs.apply` method +iterates over the instructions in the old basic block. For each +instruction, this method either replaces, deletes, or duplicates that +instruction based on the results of +:func:`RewriteArrayExprs._handle_matches`. The following list +describes how the optimization handles individual instructions: + +* When an instruction is an assignment, + :func:`~RewriteArrayExprs.apply` checks to see if it is in the + replacement instruction map. When an assignment instruction is found + in the instruction map, :func:`~RewriteArrayExprs.apply` must then + check to see if the replacement instruction is also in the replacement + map. The optimizer continues this check until it either arrives at a + :obj:`None` value or an instruction that isn't in the replacement map. + Instructions that have a replacement that is :obj:`None` are deleted. + Instructions that have a non-:obj:`None` replacement are replaced. + Assignment instructions not in the replacement map are appended to the + new basic block with no changes made. + +* When the instruction is a delete instruction, the rewrite checks to + see if it deletes a variable that may still be used by a later array + expression, or if it deletes a dead variable. Delete instructions for + used variables are added to a map of deferred delete instructions that + :func:`~RewriteArrayExprs.apply` uses to move them past any uses of + that variable. The loop copies delete instructions for non-dead + variables, and ignores delete instructions for dead variables + (effectively removing them from the basic block). + +* All other instructions are appended to the new basic block. + +Finally, the :func:`~RewriteArrayExprs.apply` method returns the new +basic block for lowering. + + +The :func:`~numba.npyufunc.array_exprs._lower_array_expr` function +------------------------------------------------------------------ + +If we left things at just the rewrite, then the lowering stage of the +compiler would fail, complaining it doesn't know how to lower +``arrayexpr`` operations. We start by hooking a lowering function +into the target context whenever the :class:`RewriteArrayExprs` class +is instantiated by the compiler. This hook causes the lowering pass to +call :func:`~numba.npyufunc.array_exprs._lower_array_expr` whenever it +encounters an ``arrayexr`` operator. + +This function has two steps: + +* Synthesize a Python function that implements the array expression: + This new Python function essentially behaves like a Numpy + :class:`~numpy.ufunc`, returning the result of the expression on + scalar values in the broadcasted array arguments. The lowering + function accomplishes this by translating from the array expression + tree into a Python AST. + +* Compile the synthetic Python function into a kernel: At this point, + the lowering function relies on existing code for lowering ufunc and + DUFunc kernels, calling + :func:`numba.targets.numpyimpl.numpy_ufunc_kernel` after defining + how to lower calls to the synthetic function. + +The end result is similar to loop lifting in Numba's object mode. + + +Conclusions and Caveats +======================= + +We have seen how to implement rewrites in Numba, starting with the +interface, and ending with an actual optimization. The key points of +this section are: + +* When writing a good plug-in, the matcher should try to get a + go/no-go result as soon as possible. + +* The rewrite application portion can be more computationally + expensive, but should still generate code that won't cause infinite + loops in the compiler. + +* We use object state to communicate any results of matching to the + rewrite application pass. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/stencil.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/stencil.rst new file mode 100644 index 000000000..f27447a7f --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/stencil.rst @@ -0,0 +1,170 @@ +.. Copyright (c) 2017 Intel Corporation + SPDX-License-Identifier: BSD-2-Clause + +.. _arch-stencil: + +================= +Notes on stencils +================= + +Numba provides the :ref:`@stencil decorator ` to +represent stencil computations. This document explains how this +feature is implemented in the several different modes available in +Numba. Currently, calls to the stencil from non-jitted code is +supported as well as calls from jitted code, either with or without +the :ref:`parallel=True ` option. + +The stencil decorator +===================== + +The stencil decorator itself just returns a ``StencilFunc`` object. +This object encapsulates the original stencil kernel function +as specified in the program and the options passed to the +stencil decorator. Also of note is that after the first compilation +of the stencil, the computed neighborhood of the stencil is +stored in the ``StencilFunc`` object in the ``neighborhood`` attribute. + +Handling the three modes +======================== + +As mentioned above, Numba supports the calling of stencils +from inside or outside a ``@jit`` compiled function, with or +without the :ref:`parallel=True ` option. + +Outside jit context +------------------- + +``StencilFunc`` overrides the ``__call__`` method so that calls +to ``StencilFunc`` objects execute the stencil:: + + def __call__(self, *args, **kwargs): + result = kwargs.get('out') + + new_stencil_func = self._stencil_wrapper(result, None, *args) + + if result is None: + return new_stencil_func.entry_point(*args) + else: + return new_stencil_func.entry_point(*args, result) + +First, the presence of the optional :ref:`out ` +parameter is checked. If it is present then the output array is +stored in ``result``. Then, the call to ``_stencil_wrapper`` +generates the stencil function given the result and argument types +and finally the generated stencil function is executed and its result +returned. + +Jit without ``parallel=True`` +----------------------------- + +When constructed, a ``StencilFunc`` inserts itself into the typing +context's set of user functions and provides the ``_type_me`` +callback. In this way, the standard Numba compiler is able to +determine the output type and signature of a ``StencilFunc``. +Each ``StencilFunc`` maintains a cache of previously seen combinations +of input argument types and keyword types. If previously seen, +the ``StencilFunc`` returns the computed signature. If not previously +computed, the ``StencilFunc`` computes the return type of the stencil +by running the Numba compiler frontend on the stencil kernel and +then performing type inference on the :term:`Numba IR` (IR) to get the scalar +return type of the kernel. From that, a Numpy array type is constructed +whose element type matches that scalar return type. + +After computing the signature of the stencil for a previously +unseen combination of input and keyword types, the ``StencilFunc`` +then :ref:`creates the stencil function ` itself. +``StencilFunc`` then installs the new stencil function's definition +in the target context so that jitted code is able to call it. + +Thus, in this mode, the generated stencil function is a stand-alone +function called like a normal function from within jitted code. + +Jit with ``parallel=True`` +-------------------------- + +When calling a ``StencilFunc`` from a jitted context with ``parallel=True``, +a separate stencil function as generated by :ref:`arch-stencil-create-function` +is not used. Instead, `parfors` (:ref:`parallel-accelerator`) are +created within the current function that implements the stencil. +This code again starts with the stencil kernel and does a similar kernel +size computation but then rather than standard Python looping syntax, +corresponding `parfors` are created so that the execution of the stencil +will take place in parallel. + +The stencil to `parfor` translations can also be selectively disabled +by setting ``parallel={'stencil': False}``, among other sub-options +described in :ref:`parallel-accelerator`. + +.. _arch-stencil-create-function: + +Creating the stencil function +============================= + +Conceptually, a stencil function is created from the user-specified +stencil kernel by adding looping code around the kernel, transforming +the relative kernel indices into absolute array indices based on the +loop indices, and replacing the kernel's ``return`` statement with +a statement to assign the computed value into the output array. + +To accomplish this transformation, first, a copy of the stencil +kernel IR is created so that subsequent modifications of the IR +for different stencil signatures will not effect each other. + +Then, an approach similar to how GUFunc's are created for `parfors` +is employed. In a text buffer, a Python function is created with +a unique name. The input array parameter is added to the function +definition and if the ``out`` argument type is present then an +``out`` parameter is added to the stencil function definition. +If the ``out`` argument is not present then first an output array +is created with ``numpy.zeros`` having the same shape as the +input array. + +The kernel is then analyzed to compute the stencil size and the +shape of the boundary (or the ``neighborhood`` stencil decorator +argument is used for this purpose if present). +Then, one ``for`` loop for each dimension of the input array is +added to the stencil function definition. The range of each +loop is controlled by the stencil kernel size previously computed +so that the boundary of the output image is not modified but instead +left as is. The body of the innermost ``for`` loop is a single +``sentinel`` statement that is easily recognized in the IR. +A call to ``exec`` with the text buffer is used to force the +stencil function into existence and an ``eval`` is used to get +access to the corresponding function on which ``run_frontend`` is +used to get the stencil function IR. + +Various renaming and relabeling is performed on the stencil function +IR and the kernel IR so that the two can be combined without conflict. +The relative indices in the kernel IR (i.e., ``getitem`` calls) are +replaced with expressions where the corresponding loop index variables +are added to the relative indices. The ``return`` statement in the +kernel IR is replaced with a ``setitem`` for the corresponding element +in the output array. +The stencil function IR is then scanned for the sentinel and the +sentinel replaced with the modified kernel IR. + +Next, ``compile_ir`` is used to compile the combined stencil function +IR. The resulting compile result is cached in the ``StencilFunc`` so that +other calls to the same stencil do not need to undertake this process +again. + +Exceptions raised +================= + +Various checks are performed during stencil compilation to make sure +that user-specified options do not conflict with each other or with +other runtime parameters. For example, if the user has manually +specified a ``neighborhood`` to the stencil decorator, the length of +that neighborhood must match the dimensionality of the input array. +If this is not the case, a ``ValueError`` is raised. + +If the neighborhood has not been specified then it must be inferred +and a requirement to infer the kernel is that all indices are constant +integers. If they are not, a ``ValueError`` is raised indicating that +kernel indices may not be non-constant. + +Finally, the stencil implementation detects the output array type +by running Numba type inference on the stencil kernel. If the +return type of this kernel does not match the type of the value +passed to the ``cval`` stencil decorator option then a ``ValueError`` +is raised. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/target_extension.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/target_extension.rst new file mode 100644 index 000000000..bebd574ad --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/target_extension.rst @@ -0,0 +1,61 @@ +========================== +Notes on Target Extensions +========================== + +.. warning:: All features and APIs described in this page are in-development and + may change at any time without deprecation notices being issued. + + +Inheriting compiler flags from the caller +========================================= + +Compiler flags, i.e. options such as ``fastmath``, ``nrt`` in +``@jit(nrt=True, fastmath=True))`` are specified per-function but their +effects are not well-defined---some flags affect the entire callgraph, some +flags affect only the current function. Sometimes it is necessary for callees +to inherit flags from the caller; for example the ``fastmath`` flag should be +infectious. + +To address the problem, the following are needed: + +1. Better definitions for the semantics of compiler flags. Preferably, all flags should + limit their effect to the current function. (TODO) +2. Allow compiler flags to be inherited from the caller. (Done) +3. Consider compiler flags in function resolution. (TODO) + +:class:`numba.core.targetconfig.ConfigStack` is used to propagate the compiler flags +throughout the compiler. At the start of the compilation, the flags are pushed +into the ``ConfigStack``, which maintains a thread-local stack for the +compilation. Thus, callees can check the flags in the caller. + +.. autoclass:: numba.core.targetconfig.ConfigStack + :members: + +Compiler flags +-------------- + +`Compiler flags`_ are defined as a subclass of ``TargetConfig``: + +.. _Compiler flags: https://github.com/numba/numba/blob/7e8538140ce3f8d01a5273a39233b5481d8b20b1/numba/core/compiler.py#L39 + +.. autoclass:: numba.core.targetconfig.TargetConfig + :members: + + +These are internal compiler flags and they are different from the user-facing +options used in the jit decorators. + +Internally, `the user-facing options are mapped to the internal compiler flags `_ +by :class:`numba.core.options.TargetOptions`. Each target can override the +default compiler flags and control the flag inheritance in +``TargetOptions.finalize``. `The CPU target overrides it. +`_ + +.. autoclass:: numba.core.options.TargetOptions + :members: finalize + + +In :meth:`numba.core.options.TargetOptions.finalize`, +use :meth:`numba.core.targetconfig.TargetConfig.inherit_if_not_set` +to request a compiler flag from the caller if it is not set for the current +function. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/threading_implementation.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/threading_implementation.rst new file mode 100644 index 000000000..487bc2894 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/developer/threading_implementation.rst @@ -0,0 +1,249 @@ +========================================= +Notes on Numba's threading implementation +========================================= + +The execution of the work presented by the Numba ``parallel`` targets is +undertaken by the Numba threading layer. Practically, the "threading layer" +is a Numba built-in library that can perform the required concurrent execution. +At the time of writing there are three threading layers available, each +implemented via a different lower level native threading library. More +information on the threading layers and appropriate selection of a threading +layer for a given application/system can be found in the +:ref:`threading layer documentation `. + +The pertinent information to note for the following sections is that the +function in the threading library that performs the parallel execution is the +``parallel_for`` function. The job of this function is to both orchestrate and +execute the parallel tasks. + +The relevant source files referenced in this document are + +- ``numba/np/ufunc/tbbpool.cpp`` +- ``numba/np/ufunc/omppool.cpp`` +- ``numba/np/ufunc/workqueue.c`` + + These files contain the TBB, OpenMP, and workqueue threadpool + implementations, respectively. Each includes the functions + ``set_num_threads()``, ``get_num_threads()``, and ``get_thread_id()``, as + well as the relevant logic for thread masking in their respective + schedulers. Note that the basic thread local variable logic is duplicated in + each of these files, and not shared between them. + +- ``numba/np/ufunc/parallel.py`` + + This file contains the Python and JIT compatible wrappers for + ``set_num_threads()``, ``get_num_threads()``, and ``get_thread_id()``, as + well as the code that loads the above libraries into Python and launches the + threadpool. + +- ``numba/parfors/parfor_lowering.py`` + + This file contains the main logic for generating code for the parallel + backend. The thread mask is accessed in this file in the code that generates + scheduler code, and passed to the relevant backend scheduler function (see + below). + +Thread masking +-------------- + +As part of its design, Numba never launches new threads beyond the threads +that are launched initially with ``numba.np.ufunc.parallel._launch_threads()`` +when the first parallel execution is run. This is due to the way threads were +already implemented in Numba prior to thread masking being implemented. This +restriction was kept to keep the design simple, although it could be removed +in the future. Consequently, it's possible to programmatically set the number +of threads, but only to less than or equal to the total number that have +already been launched. This is done by "masking" out unused threads, causing +them to do no work. For example, on a 16 core machine, if the user were to +call ``set_num_threads(4)``, Numba would always have 16 threads present, but +12 of them would sit idle for parallel computations. A further call to +``set_num_threads(16)`` would cause those same threads to do work in later +computations. + +:ref:`Thread masking ` was added to make +it possible for a user to programmatically alter the number of threads +performing work in the threading layer. Thread masking proved challenging to +implement as it required the development of a programming model that is suitable +for users, easy to reason about, and could be implemented safely, with +consistent behavior across the various threading layers. + +Programming model +~~~~~~~~~~~~~~~~~ + +The programming model chosen is similar to that found in OpenMP. The reasons +for this choice were that it is familiar to a lot of users, restricted in +scope and also simple. The number of threads in use is specified by calling +``set_num_threads`` and the number of threads in use can be queried by calling +``get_num_threads``.These two functions are synonymous with their OpenMP +counterparts (with the above restriction that the mask must be less than or +equal to the number of launched threads). The execution semantics are also +similar to OpenMP in that once a parallel region is launched, altering the +thread mask has no impact on the currently executing region, but will have an +impact on parallel regions executed subsequently. + +The Implementation +~~~~~~~~~~~~~~~~~~ + +So as to place no further restrictions on user code other than those that +already existed in the threading layer libraries, careful consideration of the +design of thread masking was required. The "thread mask" cannot be stored in a +global value as concurrent use of the threading layer may result in classic +forms of race conditions on the value itself. Numerous designs were discussed +involving various types of mutex on such a global value, all of which were +eventually broken through thought experiment alone. It eventually transpired +that, following some OpenMP implementations, the "thread mask" is best +implemented as a ``thread local``. This means each thread that executes a Numba +parallel function will have a thread local storage (TLS) slot that contains the +value of the thread mask to use when scheduling threads in the ``parallel_for`` +function. + +The above notion of TLS use for a thread mask is relatively easy to implement, +``get_num_threads`` and ``set_num_threads`` simply need to address the TLS slot +in a given threading layer. This also means that the execution schedule for a +parallel region can be derived from a run time call to ``get_num_threads``. This +is achieved via a well known and relatively easy to implement pattern of a ``C`` +library function registration and wrapping it in the internal Numba +implementation. + +In addition to satisfying the original upfront thread masking requirements, a +few more complicated scenarios needed consideration as follows. + +Nested parallelism +****************** + +In all threading layers a "main thread" will invoke the ``parallel_for`` +function and then in the parallel region, depending on the threading layer, +some number of additional threads will assist in doing the actual work. +If the work contains a call to another parallel function (i.e. nested +parallelism) it is necessary for the thread making the call to know what the +"thread mask" of the main thread is so that it can propagate it into the +``parallel_for`` call it makes when executing the nested parallel function. +The implementation of this behavior is threading layer specific but the general +principle is for the "main thread" to always "send" the value of the thread mask +from its TLS slot to all threads in the threading layer that are active in the +parallel region. These active threads then update their TLS slots with this +value prior to performing any work. The net result of this implementation detail +is that: + +* thread masks correctly propagate into nested functions +* it's still possible for each thread in a parallel region to safely have a + different mask with which to call nested functions, if it's not set explicitly + then the inherited mask from the "main thread" is used +* threading layers which have dynamic scheduling with threads potentially + joining and leaving the active pool during a ``parallel_for`` execution are + successfully accommodated +* any "main thread" thread mask is entirely decoupled from the in-flux nature + of the thread masks of the threads in the active thread pool + +Python threads independently invoking parallel functions +******************************************************** + +The threading layer launch sequence is heavily guarded to ensure that the +launch is both thread and process safe and run once per process. In a system +with numerous Python ``threading`` module threads all using Numba, the first +thread through the launch sequence will get its thread mask set appropriately, +but no further threads can run the launch sequence. This means that other +threads will need their initial thread mask set some other way. This is +achieved when ``get_num_threads`` is called and no thread mask is present, in +this case the thread mask will be set to the default. In the implementation, +"no thread mask is present" is represented by the value ``-1`` and the "default +thread mask" (unset) is represented by the value ``0``. The implementation also +immediately calls ``set_num_threads(NUMBA_NUM_THREADS)`` after doing this, so +if either ``-1`` or ``0`` is encountered as a result from ``get_num_threads()`` it +indicates a bug in the above processes. + +OS ``fork()`` calls +******************* + +The use of TLS was also in part driven by the Linux (the most popular +platform for Numba use by far) having a ``fork(2, 3P)`` call that will do TLS +propagation into child processes, see ``clone(2)``\ 's ``CLONE_SETTLS``. + +Thread ID +********* + +A private ``get_thread_id()`` function was added to each threading backend, +which returns a unique ID for each thread. This can be accessed from Python by +``numba.np.ufunc.parallel._get_thread_id()`` (it can also be used inside a +JIT compiled function). The thread ID function is useful for testing that the +thread masking behavior is correct, but it should not be used outside of the +tests. For example, one can call ``set_num_threads(4)`` and then collect all +unique ``_get_thread_id()``\ s in a parallel region to verify that only 4 +threads are run. + +Caveats +~~~~~~~ + +Some caveats to be aware of when testing thread masking: + +- The TBB backend may choose to schedule fewer than the given mask number of + threads. Thus a test such as the one described above may return fewer than 4 + unique threads. + +- The workqueue backend is not threadsafe, so attempts to do multithreading + nested parallelism with it may result in deadlocks or other undefined + behavior. The workqueue backend will raise a SIGABRT signal if it detects + nested parallelism. + +- Certain backends may reuse the main thread for computation, but this + behavior shouldn't be relied upon (for instance, if propagating exceptions). + +Use in Code Generation +~~~~~~~~~~~~~~~~~~~~~~ + +The general pattern for using ``get_num_threads`` in code generation is + +.. code:: python + + from llvmlite import ir as llvmir + + get_num_threads = cgutils.get_or_insert_function(builder.module + llvmir.FunctionType(llvmir.IntType(types.intp.bitwidth), []), + name="get_num_threads") + + num_threads = builder.call(get_num_threads, []) + + with cgutils.if_unlikely(builder, builder.icmp_signed('<=', num_threads, + num_threads.type(0))): + cgutils.printf(builder, "num_threads: %d\n", num_threads) + context.call_conv.return_user_exc(builder, RuntimeError, + ("Invalid number of threads. " + "This likely indicates a bug in Numba.",)) + + # Pass num_threads through to the appropriate backend function here + +See the code in ``numba/parfors/parfor_lowering.py``. + +The guard against ``num_threads`` being <= 0 is not strictly necessary, but it +can protect against accidentally incorrect behavior in case the thread masking +logic contains a bug. + +The ``num_threads`` variable should be passed through to the appropriate +backend function, such as ``do_scheduling`` or ``parallel_for``. If it's used +in some way other than passing it through to the backend function, the above +considerations should be taken into account to ensure the use of the +``num_threads`` variable is safe. It would probably be better to keep such +logic in the threading backends, rather than trying to do it in code +generation. + +.. _chunk-details-label: + +Parallel Chunksize Details +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +There are some cases in which the actual parallel work chunk sizes may differ +from the requested +chunk size that is requested through :func:`numba.set_parallel_chunksize`. +First, if the number of required chunks based on the specified chunk size +is less than the number of configured threads then Numba will use all of the configured +threads to execute the parallel region. In this case, the actual chunk size will be +less than the requested chunk size. Second, due to truncation, in cases where the +iteration count is slightly less than a multiple of the chunk size +(e.g., 14 iterations and a specified chunk size of 5), the actual chunk size will be +larger than the specified chunk size. As in the given example, the number of chunks +would be 2 and the actual chunk size would be 7 (i.e. 14 / 2). Lastly, since Numba +divides an N-dimensional iteration space into N-dimensional (hyper)rectangular chunks, +it may be the case there are not N integer factors whose product is equal to the chunk +size. In this case, some chunks will have an area/volume larger than the chunk size +whereas others will be less than the specified chunk size. + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/extending/entrypoints.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/extending/entrypoints.rst new file mode 100644 index 000000000..143c2e090 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/extending/entrypoints.rst @@ -0,0 +1,65 @@ +Registering Extensions with Entry Points +======================================== + +Often, third party packages will have a user-facing API as well as define +extensions to the Numba compiler. In those situations, the new types and +overloads can registered with Numba when the package is imported by the user. +However, there are situations where a Numba extension would not normally be +imported directly by the user, but must still be registered with the Numba +compiler. An example of this is the `numba-scipy +`_ package, which adds support for some +SciPy functions to Numba. The end user does not need to ``import +numba_scipy`` to enable compiler support for SciPy, the extension only needs +to be installed in the Python environment. + +Numba discovers extensions using the `entry points +`_ +feature of ``setuptools``. This allows a Python package to register an +initializer function that will be called before ``numba`` compiles for the +first time. The delay ensures that the cost of importing extensions is +deferred until it is necessary. + + +Adding Support for the "Init" Entry Point +----------------------------------------- + +A package can register an initialization function with Numba by adding the +``entry_points`` argument to the ``setup()`` function call in ``setup.py``: + +.. code-block:: python + + setup( + ..., + entry_points={ + "numba_extensions": [ + "init = numba_scipy:_init_extension", + ], + }, + ... + ) + +Numba currently only looks for the ``init`` entry point in the +``numba_extensions`` group. The entry point should be a function (any name, +as long as it matches what is listed in ``setup.py``) that takes no arguments, +and the return value is ignored. This function should register types, +overloads, or call other Numba extension APIs. The order of initialization of +extensions is undefined. + +Testing your Entry Point +------------------------ + +Numba loads all entry points when the first function is compiled. To test your +entry point, it is not sufficient to just ``import numba``; you have to define +and run a small function, like this: + +.. code-block:: python + + import numba; numba.njit(lambda x: x + 1)(123) + +It is not necessary to import your module: entry points are identified by the +``entry_points.txt`` file in your library's ``*.egg-info`` directory. + +The ``setup.py build`` command does not create eggs, but ``setup.py sdist`` +(for testing in a local directory) and ``setup.py install`` do. All entry points +registered in eggs that are on the Python path are loaded. Be sure to check for +stale ``entry_points.txt`` when debugging. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/extending/high-level.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/extending/high-level.rst new file mode 100644 index 000000000..4e4877336 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/extending/high-level.rst @@ -0,0 +1,254 @@ + +.. _high-level-extending: + +High-level extension API +======================== + +This extension API is exposed through the :mod:`numba.extending` module. + +To aid debugging extensions to Numba, it's recommended to set the following +environment variable:: + + NUMBA_CAPTURED_ERRORS="new_style" + +this makes it easy to differentiate between errors in implementation and +acceptable errors that can take part in e.g. type inference. For more +information see :envvar:`NUMBA_CAPTURED_ERRORS`. + +Implementing functions +---------------------- + +The ``@overload`` decorator allows you to implement arbitrary functions +for use in :term:`nopython mode` functions. The function decorated with +``@overload`` is called at compile-time with the *types* of the function's +runtime arguments. It should return a callable representing the +*implementation* of the function for the given types. The returned +implementation is compiled by Numba as if it were a normal function +decorated with ``@jit``. Additional options to ``@jit`` can be passed as +dictionary using the ``jit_options`` argument. + +For example, let's pretend Numba doesn't support the :func:`len` function +on tuples yet. Here is how to implement it using ``@overload``:: + + from numba import types + from numba.extending import overload + + @overload(len) + def tuple_len(seq): + if isinstance(seq, types.BaseTuple): + n = len(seq) + def len_impl(seq): + return n + return len_impl + + +You might wonder, what happens if :func:`len()` is called with something +else than a tuple? If a function decorated with ``@overload`` doesn't +return anything (i.e. returns None), other definitions are tried until +one succeeds. Therefore, multiple libraries may overload :func:`len()` +for different types without conflicting with each other. + +Implementing methods +-------------------- + +The ``@overload_method`` decorator similarly allows implementing a +method on a type well-known to Numba. + +.. autofunction:: numba.core.extending.overload_method + +Implementing classmethods +------------------------- + +The ``@overload_classmethod`` decorator similarly allows implementing a +classmethod on a type well-known to Numba. + +.. autofunction:: numba.core.extending.overload_classmethod + + +Implementing attributes +----------------------- + +The ``@overload_attribute`` decorator allows implementing a data +attribute (or property) on a type. Only reading the attribute is +possible; writable attributes are only supported through the +:ref:`low-level API `. + +The following example implements the :attr:`~numpy.ndarray.nbytes` attribute +on Numpy arrays:: + + @overload_attribute(types.Array, 'nbytes') + def array_nbytes(arr): + def get(arr): + return arr.size * arr.itemsize + return get + +.. _cython-support: + +Importing Cython Functions +-------------------------- + +The function ``get_cython_function_address`` obtains the address of a +C function in a Cython extension module. The address can be used to +access the C function via a :func:`ctypes.CFUNCTYPE` callback, thus +allowing use of the C function inside a Numba jitted function. For +example, suppose that you have the file ``foo.pyx``:: + + from libc.math cimport exp + + cdef api double myexp(double x): + return exp(x) + +You can access ``myexp`` from Numba in the following way:: + + import ctypes + from numba.extending import get_cython_function_address + + addr = get_cython_function_address("foo", "myexp") + functype = ctypes.CFUNCTYPE(ctypes.c_double, ctypes.c_double) + myexp = functype(addr) + +The function ``myexp`` can now be used inside jitted functions, for +example:: + + @njit + def double_myexp(x): + return 2*myexp(x) + +One caveat is that if your function uses Cython's fused types, then +the function's name will be mangled. To find out the mangled name of +your function you can check the extension module's ``__pyx_capi__`` +attribute. + +Implementing intrinsics +----------------------- + +The ``@intrinsic`` decorator is used for marking a function *func* as typing and +implementing the function in ``nopython`` mode using the +`llvmlite IRBuilder API `_. +This is an escape hatch for expert users to build custom LLVM IR that will be +inlined into the caller, there is no safety net! + +The first argument to *func* is the typing context. The rest of the arguments +corresponds to the type of arguments of the decorated function. These arguments +are also used as the formal argument of the decorated function. If *func* has +the signature ``foo(typing_context, arg0, arg1)``, the decorated function will +have the signature ``foo(arg0, arg1)``. + +The return values of *func* should be a 2-tuple of expected type signature, and +a code-generation function that will passed to +:func:`~numba.targets.imputils.lower_builtin`. For an unsupported operation, +return ``None``. + +Here is an example that cast any integer to a byte pointer:: + + from numba import types + from numba.extending import intrinsic + + @intrinsic + def cast_int_to_byte_ptr(typingctx, src): + # check for accepted types + if isinstance(src, types.Integer): + # create the expected type signature + result_type = types.CPointer(types.uint8) + sig = result_type(types.uintp) + # defines the custom code generation + def codegen(context, builder, signature, args): + # llvm IRBuilder code here + [src] = args + rtype = signature.return_type + llrtype = context.get_value_type(rtype) + return builder.inttoptr(src, llrtype) + return sig, codegen + +it may be used as follows:: + + from numba import njit + + @njit('void(int64)') + def foo(x): + y = cast_int_to_byte_ptr(x) + + foo.inspect_types() + +and the output of ``.inspect_types()`` demonstrates the cast (note the +``uint8*``):: + + def foo(x): + + # x = arg(0, name=x) :: int64 + # $0.1 = global(cast_int_to_byte_ptr: ) :: Function() + # $0.3 = call $0.1(x, func=$0.1, args=[Var(x, check_intrin.py (24))], kws=(), vararg=None) :: (uint64,) -> uint8* + # del x + # del $0.1 + # y = $0.3 :: uint8* + # del y + # del $0.3 + # $const0.4 = const(NoneType, None) :: none + # $0.5 = cast(value=$const0.4) :: none + # del $const0.4 + # return $0.5 + + y = cast_int_to_byte_ptr(x) + + +Implementing mutable structures +------------------------------- + +.. warning:: This is an experimental feature, the API may change without warning. + +The ``numba.experimental.structref`` module provides utilities for defining +mutable pass-by-reference structures, a ``StructRef``. The following example +demonstrates how to define a basic mutable structure: + +Defining a StructRef +'''''''''''''''''''' + +.. literalinclude:: ../../../numba/tests/doc_examples/test_structref_usage.py + :language: python + :caption: from ``numba/tests/doc_examples/test_structref_usage.py`` + :start-after: magictoken.ex_structref_type_definition.begin + :end-before: magictoken.ex_structref_type_definition.end + :dedent: 0 + :linenos: + +The following demonstrates using the above mutable struct definition: + +.. literalinclude:: ../../../numba/tests/doc_examples/test_structref_usage.py + :language: python + :caption: from ``test_type_definition`` of ``numba/tests/doc_examples/test_structref_usage.py`` + :start-after: magictoken.ex_structref_type_definition_test.begin + :end-before: magictoken.ex_structref_type_definition_test.end + :dedent: 8 + :linenos: + + +Defining a method on StructRef +'''''''''''''''''''''''''''''' + +Methods and attributes can be attached using ``@overload_*`` as shown in the +previous sections. + +The following demonstrates the use of ``@overload_method`` to insert a +method for instances of ``MyStructType``: + +.. literalinclude:: ../../../numba/tests/doc_examples/test_structref_usage.py + :language: python + :caption: from ``test_overload_method`` of ``numba/tests/doc_examples/test_structref_usage.py`` + :start-after: magictoken.ex_structref_method.begin + :end-before: magictoken.ex_structref_method.end + :dedent: 8 + :linenos: + + +``numba.experimental.structref`` API Reference +'''''''''''''''''''''''''''''''''''''''''''''' + +.. automodule:: numba.experimental.structref + :members: + +Determining if a function is already wrapped by a ``jit`` family decorator +-------------------------------------------------------------------------- + +The following function is provided for this purpose. + +.. automethod:: numba.extending.is_jitted diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/extending/index.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/extending/index.rst new file mode 100644 index 000000000..fb6cd5160 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/extending/index.rst @@ -0,0 +1,30 @@ + +Extending Numba +=============== + +.. module:: numba.extending + +This chapter describes how to extend Numba to make it recognize and support +additional operations, functions or types. Numba provides two categories +of APIs to this end: + +* The high-level APIs provide abstracted entry points which are sufficient + for simple uses. They require little knowledge of Numba's internal + compilation chain. + +* The low-level APIs reflect Numba's internal compilation chain and allow + flexible interaction with its various layers, but require more effort + and experience with Numba internals. + +It may be helpful for readers of this chapter to also read some of the +documents in the :doc:`developer manual <../developer/index>`, especially +the :doc:`architecture document <../developer/architecture>`. + + +.. toctree:: + high-level.rst + low-level.rst + interval-example.rst + overloading-guide.rst + entrypoints.rst + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/extending/interval-example.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/extending/interval-example.rst new file mode 100644 index 000000000..e561ee9bd --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/extending/interval-example.rst @@ -0,0 +1,345 @@ + +Example: an interval type +========================= + +We will extend the Numba frontend to support a class that it does not +currently support so as to allow: + +* Passing an instance of the class to a Numba function +* Accessing attributes of the class in a Numba function +* Constructing and returning a new instance of the class from a Numba function + +(all the above in :term:`nopython mode`) + +We will mix APIs from the :ref:`high-level extension API ` +and the :ref:`low-level extension API `, depending on what is +available for a given task. + +The starting point for our example is the following pure Python class:: + + class Interval(object): + """ + A half-open interval on the real number line. + """ + def __init__(self, lo, hi): + self.lo = lo + self.hi = hi + + def __repr__(self): + return 'Interval(%f, %f)' % (self.lo, self.hi) + + @property + def width(self): + return self.hi - self.lo + + +Extending the typing layer +"""""""""""""""""""""""""" + +Creating a new Numba type +------------------------- + +As the ``Interval`` class is not known to Numba, we must create a new Numba +type to represent instances of it. Numba does not deal with Python types +directly: it has its own type system that allows a different level of +granularity as well as various meta-information not available with regular +Python types. + +We first create a type class ``IntervalType`` and, since we don't need the +type to be parametric, we instantiate a single type instance ``interval_type``:: + + from numba import types + + class IntervalType(types.Type): + def __init__(self): + super(IntervalType, self).__init__(name='Interval') + + interval_type = IntervalType() + + +Type inference for Python values +-------------------------------- + +In itself, creating a Numba type doesn't do anything. We must teach Numba +how to infer some Python values as instances of that type. In this example, +it is trivial: any instance of the ``Interval`` class should be treated as +belonging to the type ``interval_type``:: + + from numba.extending import typeof_impl + + @typeof_impl.register(Interval) + def typeof_index(val, c): + return interval_type + +Function arguments and global values will thusly be recognized as belonging +to ``interval_type`` whenever they are instances of ``Interval``. + + +Type inference for Python annotations +------------------------------------- + +While ``typeof`` is used to infer the Numba type of Python objects, +``as_numba_type`` is used to infer the Numba type of Python types. For simple +cases, we can simply register that the Python type ``Interval`` corresponds with +the Numba type ``interval_type``:: + + from numba.extending import as_numba_type + + as_numba_type.register(Interval, interval_type) + +Note that ``as_numba_type`` is only used to infer types from type annotations at +compile time. The ``typeof`` registry above is used to infer the type of +objects at runtime. + + +Type inference for operations +----------------------------- + +We want to be able to construct interval objects from Numba functions, so +we must teach Numba to recognize the two-argument ``Interval(lo, hi)`` +constructor. The arguments should be floating-point numbers:: + + from numba.extending import type_callable + + @type_callable(Interval) + def type_interval(context): + def typer(lo, hi): + if isinstance(lo, types.Float) and isinstance(hi, types.Float): + return interval_type + return typer + + +The :func:`type_callable` decorator specifies that the decorated function +should be invoked when running type inference for the given callable object +(here the ``Interval`` class itself). The decorated function must simply +return a typer function that will be called with the argument types. The +reason for this seemingly convoluted setup is for the typer function to have +*exactly* the same signature as the typed callable. This allows handling +keyword arguments correctly. + +The *context* argument received by the decorated function is useful in +more sophisticated cases where computing the callable's return type +requires resolving other types. + + +Extending the lowering layer +"""""""""""""""""""""""""""" + +We have finished teaching Numba about our type inference additions. +We must now teach Numba how to actually generate code and data for +the new operations. + + +Defining the data model for native intervals +-------------------------------------------- + +As a general rule, :term:`nopython mode` does not work on Python objects +as they are generated by the CPython interpreter. The representations +used by the interpreter are far too inefficient for fast native code. +Each type supported in :term:`nopython mode` therefore has to define +a tailored native representation, also called a *data model*. + +A common case of data model is an immutable struct-like data model, that +is akin to a C ``struct``. Our interval datatype conveniently falls in +that category, and here is a possible data model for it:: + + from numba.extending import models, register_model + + @register_model(IntervalType) + class IntervalModel(models.StructModel): + def __init__(self, dmm, fe_type): + members = [ + ('lo', types.float64), + ('hi', types.float64), + ] + models.StructModel.__init__(self, dmm, fe_type, members) + + +This instructs Numba that values of type ``IntervalType`` (or any instance +thereof) are represented as a structure of two fields ``lo`` and ``hi``, +each of them a double-precision floating-point number (``types.float64``). + +.. note:: + Mutable types need more sophisticated data models to be able to + persist their values after modification. They typically cannot be + stored and passed on the stack or in registers like immutable types do. + + +Exposing data model attributes +------------------------------ + +We want the data model attributes ``lo`` and ``hi`` to be exposed under +the same names for use in Numba functions. Numba provides a convenience +function to do exactly that:: + + from numba.extending import make_attribute_wrapper + + make_attribute_wrapper(IntervalType, 'lo', 'lo') + make_attribute_wrapper(IntervalType, 'hi', 'hi') + +This will expose the attributes in read-only mode. As mentioned above, +writable attributes don't fit in this model. + + +Exposing a property +------------------- + +As the ``width`` property is computed rather than stored in the structure, +we cannot simply expose it like we did for ``lo`` and ``hi``. We have to +re-implement it explicitly:: + + from numba.extending import overload_attribute + + @overload_attribute(IntervalType, "width") + def get_width(interval): + def getter(interval): + return interval.hi - interval.lo + return getter + +You might ask why we didn't need to expose a type inference hook for this +attribute? The answer is that ``@overload_attribute`` is part of the +high-level API: it combines type inference and code generation in a +single API. + + +Implementing the constructor +---------------------------- + +Now we want to implement the two-argument ``Interval`` constructor:: + + from numba.extending import lower_builtin + from numba.core import cgutils + + @lower_builtin(Interval, types.Float, types.Float) + def impl_interval(context, builder, sig, args): + typ = sig.return_type + lo, hi = args + interval = cgutils.create_struct_proxy(typ)(context, builder) + interval.lo = lo + interval.hi = hi + return interval._getvalue() + + +There is a bit more going on here. ``@lower_builtin`` decorates the +implementation of the given callable or operation (here the ``Interval`` +constructor) for some specific argument types. This allows defining +type-specific implementations of a given operation, which is important +for heavily overloaded functions such as :func:`len`. + +``types.Float`` is the class of all floating-point types (``types.float64`` +is an instance of ``types.Float``). It is generally more future-proof +to match argument types on their class rather than on specific instances +(however, when *returning* a type -- chiefly during the type inference +phase --, you must usually return a type instance). + +``cgutils.create_struct_proxy()`` and ``interval._getvalue()`` are a bit +of boilerplate due to how Numba passes values around. Values are passed +as instances of :class:`llvmlite.ir.Value`, which can be too limited: +LLVM structure values especially are quite low-level. A struct proxy +is a temporary wrapper around a LLVM structure value allowing to easily +get or set members of the structure. The ``_getvalue()`` call simply +gets the LLVM value out of the wrapper. + + +Boxing and unboxing +------------------- + +If you try to use an ``Interval`` instance at this point, you'll certainly +get the error *"cannot convert Interval to native value"*. This is because +Numba doesn't yet know how to make a native interval value from a Python +``Interval`` instance. Let's teach it how to do it:: + + from numba.extending import unbox, NativeValue + + @unbox(IntervalType) + def unbox_interval(typ, obj, c): + """ + Convert a Interval object to a native interval structure. + """ + lo_obj = c.pyapi.object_getattr_string(obj, "lo") + hi_obj = c.pyapi.object_getattr_string(obj, "hi") + interval = cgutils.create_struct_proxy(typ)(c.context, c.builder) + interval.lo = c.pyapi.float_as_double(lo_obj) + interval.hi = c.pyapi.float_as_double(hi_obj) + c.pyapi.decref(lo_obj) + c.pyapi.decref(hi_obj) + is_error = cgutils.is_not_null(c.builder, c.pyapi.err_occurred()) + return NativeValue(interval._getvalue(), is_error=is_error) + +*Unbox* is the other name for "convert a Python object to a native value" +(it fits the idea of a Python object as a sophisticated box containing +a simple native value). The function returns a ``NativeValue`` object +which gives its caller access to the computed native value, the error bit +and possibly other information. + +The snippet above makes abundant use of the ``c.pyapi`` object, which +gives access to a subset of the +`Python interpreter's C API `_. +Note the use of ``c.pyapi.err_occurred()`` to detect any errors that +may have happened when unboxing the object (try passing ``Interval('a', 'b')`` +for example). + +We also want to do the reverse operation, called *boxing*, so as to return +interval values from Numba functions:: + + from numba.extending import box + + @box(IntervalType) + def box_interval(typ, val, c): + """ + Convert a native interval structure to an Interval object. + """ + interval = cgutils.create_struct_proxy(typ)(c.context, c.builder, value=val) + lo_obj = c.pyapi.float_from_double(interval.lo) + hi_obj = c.pyapi.float_from_double(interval.hi) + class_obj = c.pyapi.unserialize(c.pyapi.serialize_object(Interval)) + res = c.pyapi.call_function_objargs(class_obj, (lo_obj, hi_obj)) + c.pyapi.decref(lo_obj) + c.pyapi.decref(hi_obj) + c.pyapi.decref(class_obj) + return res + + +Using it +"""""""" + +:term:`nopython mode` functions are now able to make use of Interval objects +and the various operations you have defined on them. You can try for +example the following functions:: + + from numba import jit + + @jit(nopython=True) + def inside_interval(interval, x): + return interval.lo <= x < interval.hi + + @jit(nopython=True) + def interval_width(interval): + return interval.width + + @jit(nopython=True) + def sum_intervals(i, j): + return Interval(i.lo + j.lo, i.hi + j.hi) + + +Conclusion +"""""""""" + +We have shown how to do the following tasks: + +* Define a new Numba type class by subclassing the ``Type`` class +* Define a singleton Numba type instance for a non-parametric type +* Teach Numba how to infer the Numba type of Python values of a certain class, + using ``typeof_impl.register`` +* Teach Numba how to infer the Numba type of the Python type itself, using + ``as_numba_type.register`` +* Define the data model for a Numba type using ``StructModel`` + and ``register_model`` +* Implementing a boxing function for a Numba type using the ``@box`` decorator +* Implementing an unboxing function for a Numba type using the ``@unbox`` decorator + and the ``NativeValue`` class +* Type and implement a callable using the ``@type_callable`` and + ``@lower_builtin`` decorators +* Expose a read-only structure attribute using the ``make_attribute_wrapper`` + convenience function +* Implement a read-only property using the ``@overload_attribute`` decorator diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/extending/low-level.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/extending/low-level.rst new file mode 100644 index 000000000..8eba72b3e --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/extending/low-level.rst @@ -0,0 +1,194 @@ + +.. _low-level-extending: + +Low-level extension API +======================= + +This extension API is available through the :mod:`numba.extending` module. +It allows you to hook directly into the Numba compilation chain. As such, +it distinguished between several compilation phases: + +* The :term:`typing` phase deduces the types of variables in a compiled + function by looking at the operations performed. + +* The :term:`lowering` phase converts high-level Python operations into + low-level LLVM code. This phase exploits the typing information derived + by the typing phase. + +* *Boxing* and *unboxing* convert Python objects into native values, and + vice-versa. They occur at the boundaries of calling a Numba function + from the Python interpreter. + + +Typing +------ + +.. XXX the API described here can be insufficient for some use cases. + Should we describe the whole templates menagerie? + +Type inference -- or simply *typing* -- is the process of assigning +Numba types to all values involved in a function, so as to enable +efficient code generation. Broadly speaking, typing comes in two flavours: +typing plain Python *values* (e.g. function arguments or global variables) +and typing *operations* (or *functions*) on known value types. + +.. decorator:: typeof_impl.register(cls) + + Register the decorated function as typing Python values of class *cls*. + The decorated function will be called with the signature ``(val, c)`` + where *val* is the Python value being typed and *c* is a context + object. + + +.. decorator:: type_callable(func) + + Register the decorated function as typing the callable *func*. + *func* can be either an actual Python callable or a string denoting + a operation internally known to Numba (for example ``'getitem'``). + The decorated function is called with a single *context* argument + and must return a typer function. The typer function should have + the same signature as the function being typed, and it is called + with the Numba *types* of the function arguments; it should return + either the Numba type of the function's return value, or ``None`` + if inference failed. + +.. function:: as_numba_type.register(py_type, numba_type) + + Register that the Python type *py_type* corresponds with the Numba type + *numba_type*. This can be used to register a new type or overwrite the + existing default (e.g. to treat ``float`` as ``numba.float32`` instead of + ``numba.float64``). + +.. decorator:: as_numba_type.register + + Register the decorated function as a type inference function used by + ``as_numba_type`` when trying to infer the Numba type of a Python type. + The decorated function is called with a single *py_type* argument + and returns either a corresponding Numba type, or None if it cannot infer + that *py_type*. + + +Lowering +-------- + +The following decorators all take a type specification of some kind. +A type specification is usually a type class (such as ``types.Float``) +or a specific type instance (such as ``types.float64``). Some values +have a special meaning: + +* ``types.Any`` matches any type; this allows doing your own dispatching + inside the implementation + +* ``types.VarArg()`` matches any number of arguments of the + given type; it can only appear as the last type specification when + describing a function's arguments. + +A *context* argument in the following APIs is a target context providing +various utility methods for code generation (such as creating a constant, +converting from a type to another, looking up the implementation of a +specific function, etc.). A *builder* argument is a +:class:`llvmlite.ir.IRBuilder` instance for the LLVM code being generated. + +A *signature* is an object specifying the concrete type of an operation. +The ``args`` attribute of the signature is a tuple of the argument types. +The ``return_type`` attribute of the signature is the type that the +operation should return. + +.. note:: + Numba always reasons on Numba types, but the values being passed + around during lowering are LLVM values: they don't hold the required + type information, which is why Numba types are passed explicitly too. + + LLVM has its own, very low-level type system: you can access the LLVM + type of a value by looking up its ``.type`` attribute. + + +Native operations +''''''''''''''''' + +.. decorator:: lower_builtin(func, typespec, ...) + + Register the decorated function as implementing the callable *func* + for the arguments described by the given Numba *typespecs*. + As with :func:`type_callable`, *func* can be either an actual Python + callable or a string denoting a operation internally known to Numba + (for example ``'getitem'``). + + The decorated function is called with four arguments + ``(context, builder, sig, args)``. ``sig`` is the concrete signature + the callable is being invoked with. ``args`` is a tuple of the values + of the arguments the callable is being invoked with; each value in + ``args`` corresponds to a type in ``sig.args``. The function + must return a value compatible with the type ``sig.return_type``. + +.. decorator:: lower_getattr(typespec, name) + + Register the decorated function as implementing the attribute *name* + of the given *typespec*. The decorated function is called with four + arguments ``(context, builder, typ, value)``. *typ* is the concrete + type the attribute is being looked up on. *value* is the value the + attribute is being looked up on. + +.. decorator:: lower_getattr_generic(typespec) + + Register the decorated function as a fallback for attribute lookup + on a given *typespec*. Any attribute that does not have a corresponding + :func:`lower_getattr` declaration will go through + :func:`lower_getattr_generic`. The decorated function is called with + five arguments ``(context, builder, typ, value, name)``. *typ* + and *value* are as in :func:`lower_getattr`. *name* is the name + of the attribute being looked up. + +.. decorator:: lower_cast(fromspec, tospec) + + Register the decorated function as converting from types described by + *fromspec* to types described by *tospec*. The decorated function + is called with five arguments ``(context, builder, fromty, toty, value)``. + *fromty* and *toty* are the concrete types being converted from and to, + respectively. *value* is the value being converted. The function + must return a value compatible with the type ``toty``. + + +Constants +''''''''' + +.. decorator:: lower_constant(typespec) + + Register the decorated function as implementing the creation of + constants for the Numba *typespec*. The decorated function + is called with four arguments ``(context, builder, ty, pyval)``. + *ty* is the concrete type to create a constant for. *pyval* + is the Python value to convert into a LLVM constant. + The function must return a value compatible with the type ``ty``. + + +Boxing and unboxing +''''''''''''''''''' + +In these functions, *c* is a convenience object with several attributes: + +* its ``context`` attribute is a target context as above +* its ``builder`` attribute is a :class:`llvmlite.ir.IRBuilder` as above +* its ``pyapi`` attribute is an object giving access to a subset of the + `Python interpreter's C API `_ + +An object, as opposed to a native value, is a ``PyObject *`` pointer. +Such pointers can be produced or processed by the methods in the ``pyapi`` +object. + +.. decorator:: box(typespec) + + Register the decorated function as boxing values matching the *typespec*. + The decorated function is called with three arguments ``(typ, val, c)``. + *typ* is the concrete type being boxed. *val* is the value being + boxed. The function should return a Python object, or NULL to signal + an error. + +.. decorator:: unbox(typespec) + + Register the decorated function as unboxing values matching the *typespec*. + The decorated function is called with three arguments ``(typ, obj, c)``. + *typ* is the concrete type being unboxed. *obj* is the Python object + (a ``PyObject *`` pointer, in C terms) being unboxed. The function + should return a ``NativeValue`` object giving the unboxing result value + and an optional error bit. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/extending/mynorm.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/extending/mynorm.py new file mode 100644 index 000000000..884634ea3 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/extending/mynorm.py @@ -0,0 +1,72 @@ +import numpy as np +from numba import njit, types +from numba.extending import overload, register_jitable +from numba.core.errors import TypingError + +import scipy.linalg + + +@register_jitable +def _oneD_norm_2(a): + # re-usable implementation of the 2-norm + val = np.abs(a) + return np.sqrt(np.sum(val * val)) + + +@overload(scipy.linalg.norm) +def jit_norm(a, ord=None): + if isinstance(ord, types.Optional): + ord = ord.type + # Reject non integer, floating-point or None types for ord + if not isinstance(ord, (types.Integer, types.Float, types.NoneType)): + raise TypingError("'ord' must be either integer or floating-point") + # Reject non-ndarray types + if not isinstance(a, types.Array): + raise TypingError("Only accepts NumPy ndarray") + # Reject ndarrays with non integer or floating-point dtype + if not isinstance(a.dtype, (types.Integer, types.Float)): + raise TypingError("Only integer and floating point types accepted") + # Reject ndarrays with unsupported dimensionality + if not (0 <= a.ndim <= 2): + raise TypingError('3D and beyond are not allowed') + # Implementation for scalars/0d-arrays + elif a.ndim == 0: + return a.item() + # Implementation for vectors + elif a.ndim == 1: + def _oneD_norm_x(a, ord=None): + if ord == 2 or ord is None: + return _oneD_norm_2(a) + elif ord == np.inf: + return np.max(np.abs(a)) + elif ord == -np.inf: + return np.min(np.abs(a)) + elif ord == 0: + return np.sum(a != 0) + elif ord == 1: + return np.sum(np.abs(a)) + else: + return np.sum(np.abs(a)**ord)**(1. / ord) + return _oneD_norm_x + # Implementation for matrices + elif a.ndim == 2: + def _two_D_norm_2(a, ord=None): + return _oneD_norm_2(a.ravel()) + return _two_D_norm_2 + + +if __name__ == "__main__": + @njit + def use(a, ord=None): + # simple test function to check that the overload works + return scipy.linalg.norm(a, ord) + + # spot check for vectors + a = np.arange(10) + print(use(a)) + print(scipy.linalg.norm(a)) + + # spot check for matrices + b = np.arange(9).reshape((3, 3)) + print(use(b)) + print(scipy.linalg.norm(b)) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/extending/overloading-guide.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/extending/overloading-guide.rst new file mode 100644 index 000000000..a5645067e --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/extending/overloading-guide.rst @@ -0,0 +1,192 @@ + +.. _overloading-guide: + +============================== +A guide to using ``@overload`` +============================== + + +As mentioned in the :ref:`high-level extension API `, you +can use the ``@overload`` decorator to create a Numba implementation of a +function that can be used in :term:`nopython mode` functions. A common use case +is to re-implement NumPy functions so that they can be called in ``@jit`` +decorated code. This section discusses how and when to use the ``@overload`` +decorator and what contributing such a function to the Numba code base might +entail. This should help you get started when needing to use the ``@overload`` +decorator or when attempting to contribute new functions to Numba itself. + +The ``@overload`` decorator and it's variants are useful when you have a +third-party library that you do not control and you wish to provide Numba +compatible implementations for specific functions from that library. + +Concrete Example +================ + +Let's assume that you are working on a minimization algorithm that makes use of +|scipy.linalg.norm|_ to find different vector norms and the `frobenius +norm `_ for matrices. +You know that only integer and real numbers will be involved. (While this may +sound like an artificial example, especially because a Numba implementation of +``numpy.linalg.norm`` exists, it is largely pedagogical and serves to +illustrate how and when to use ``@overload``). + +.. |scipy.linalg.norm| replace:: ``scipy.linalg.norm`` +.. _scipy.linalg.norm: https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.norm.html + +The skeleton might look something like this:: + + def algorithm(): + # setup + v = ... + while True: + # take a step + d = scipy.linalg.norm(v) + if d < tolerance: + break + +Now, let's further assume, that you have heard of Numba and you now wish to use +it to accelerate your function. However, after adding the +``jit(nopython=True)`` +decorator, Numba complains that ``scipy.linalg.norm`` isn't supported. From +looking at the documentation, you realize that a norm is probably fairly easy +to implement using NumPy. A good starting point is the following template. + +.. literalinclude:: template.py + +After some deliberation and tinkering, you end up with the following code: + +.. literalinclude:: mynorm.py + +As you can see, the implementation only supports what you need right now: + +* Only supports integer and floating-point types +* All vector norms +* Only the Frobenius norm for matrices +* Code sharing between vector and matrix implementations using + ``@register_jitable``. +* Norms are implemented using NumPy syntax. (This is possible because + Numba is very aware of NumPy and many functions are supported.) + +So what actually happens here? The ``overload`` decorator registers a suitable +implementation for ``scipy.linalg.norm`` in case a call to this is encountered +in code that is being JIT-compiled, for example when you decorate your +``algorithm`` function with ``@jit(nopython=True)``. In that case, the function +``jit_norm`` will be called with the currently encountered types and will then +return either ``_oneD_norm_x`` in the vector case and ``_two_D_norm_2``. + +You can download the example code here: :download:`mynorm.py ` + +Implementing ``@overload`` for NumPy functions +============================================== + +Numba supports NumPy through the provision of ``@jit`` compatible +re-implementations of NumPy functions. In such cases ``@overload`` is a very +convenient option for writing such implementations, however there are a few +additional things to watch out for. + +* The Numba implementation should match the NumPy implementation as closely as + feasible with respect to accepted types, arguments, raised exceptions and + algorithmic complexity (Big-O / Landau order). + +* When implementing supported argument types, bear in mind that, due to + duck typing, NumPy does tend to accept a multitude of argument types beyond + NumPy arrays such as scalar, list, tuple, set, iterator, generator etc. + You will need to account for that during type inference and subsequently as + part of the tests. + +* A NumPy function may return a scalar, array or a data structure + which matches one of its inputs, you need to be aware of type + unification problems and dispatch to appropriate implementations. For + example, |np.corrcoef|_ may return an array or a scalar depending on its + inputs. + +.. |np.corrcoef| replace:: ``np.corrcoef`` +.. _np.corrcoef: https://docs.scipy.org/doc/numpy/reference/generated/numpy.corrcoef.html + +* If you are implementing a new function, you should always update the + `documentation + `_. + The sources can be found in ``docs/source/reference/numpysupported.rst``. Be + sure to mention any limitations that your implementation has, e.g. no support + for the ``axis`` keyword. + +* When writing tests for the functionality itself, it's useful to include + handling of non-finite values, arrays with different shapes and layouts, + complex inputs, scalar inputs, inputs with types for which support is not + documented (e.g. a function which the NumPy docs say requires a float or int + input might also 'work' if given a bool or complex input). + +* When writing tests for exceptions, for example if adding tests to + ``numba/tests/test_np_functions.py``, you may encounter the following error + message: + + .. code:: + + ====================================================================== + FAIL: test_foo (numba.tests.test_np_functions.TestNPFunctions) + ---------------------------------------------------------------------- + Traceback (most recent call last): + File "/numba/numba/tests/support.py", line 645, in tearDown + self.memory_leak_teardown() + File "/numba/numba/tests/support.py", line 619, in memory_leak_teardown + self.assert_no_memory_leak() + File "/numba/numba/tests/support.py", line 628, in assert_no_memory_leak + self.assertEqual(total_alloc, total_free) + AssertionError: 36 != 35 + + This occurs because raising exceptions from jitted code leads to reference + leaks. Ideally, you will place all exception testing in a separate test + method and then add a call in each test to ``self.disable_leak_check()`` to + disable the leak-check (inherit from ``numba.tests.support.TestCase`` to make + that available). + +* For many of the functions that are available in NumPy, there are + corresponding methods defined on the NumPy ``ndarray`` type. For example, the + function ``repeat`` is available as a NumPy module level function and a + member function on the ``ndarray`` class. + + .. code:: python + + import numpy as np + a = np.arange(10) + # function + np.repeat(a, 10) + # method + a.repeat(10) + + Once you have written the function implementation, you can easily use + ``@overload_method`` and reuse it. Just be sure to check that NumPy doesn't + diverge in the implementations of its function/method. + + As an example, the ``repeat`` function/method: + + .. code:: python + + @extending.overload_method(types.Array, 'repeat') + def array_repeat(a, repeats): + def array_repeat_impl(a, repeat): + # np.repeat has already been overloaded + return np.repeat(a, repeat) + + return array_repeat_impl + +* If you need to create ancillary functions, for example to re-use a small + utility function or to split your implementation across functions for the + sake of readability, you can make use of the ``@register_jitable`` decorator. + This will make those functions available from within your ``@jit`` and + ``@overload`` decorated functions. + +* The Numba continuous integration (CI) set up tests a wide variety of NumPy + versions, you'll sometimes be alerted to a change in behaviour from some + previous NumPy version. If you can find supporting evidence in the NumPy + change log / repository, then you'll need to decide whether to create + branches and attempt to replicate the logic across versions, or use a version + gate (with associated wording in the documentation) to advertise that Numba + replicates NumPy from some particular version onwards. + +* You can look at the Numba source code for inspiration, many of the overloaded + NumPy functions and methods are in ``numba/targets/arrayobj.py``. Below, you + will find a list of implementations to look at that are well implemented in + terms of accepted types and test coverage. + + * ``np.repeat`` diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/extending/template.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/extending/template.py new file mode 100644 index 000000000..19b98cc01 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/extending/template.py @@ -0,0 +1,21 @@ +# Declare that function `myfunc` is going to be overloaded (have a +# substitutable Numba implementation) +@overload(myfunc) +# Define the overload function with formal arguments +# these arguments must be matched in the inner function implementation +def jit_myfunc(arg0, arg1, arg2, ...): + # This scope is for typing, access is available to the *type* of all + # arguments. This information can be used to change the behaviour of the + # implementing function and check that the types are actually supported + # by the implementation. + + print(arg0) # this will show the Numba type of arg0 + + # This is the definition of the function that implements the `myfunc` work. + # It does whatever algorithm is needed to implement myfunc. + def myfunc_impl(arg0, arg1, arg2, ...): # match arguments to jit_myfunc + # < Implementation goes here > + return # whatever needs to be returned by the algorithm + + # return the implementation + return myfunc_impl diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/glossary.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/glossary.rst new file mode 100644 index 000000000..35b690ca3 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/glossary.rst @@ -0,0 +1,106 @@ + +Glossary +======== + +.. glossary:: + + ahead-of-time compilation + AOT compilation + AOT + Compilation of a function in a separate step before running the + program code, producing an on-disk binary object which can be distributed + independently. This is the traditional kind of compilation known + in languages such as C, C++ or Fortran. + + bytecode + Python bytecode + The original form in which Python functions are executed. Python + bytecode describes a stack-machine executing abstract (untyped) + operations using operands from both the function stack and the + execution environment (e.g. global variables). + + compile-time constant + An expression whose value Numba can infer and freeze at compile-time. + Global variables and closure variables are compile-time constants. + + just-in-time compilation + JIT compilation + JIT + Compilation of a function at execution time, as opposed to + :term:`ahead-of-time compilation`. + + JIT function + Shorthand for "a function :term:`JIT-compiled ` with Numba using + the :ref:`@jit ` decorator." + + loop-lifting + loop-jitting + A feature of compilation in :term:`object mode` where a loop can be + automatically extracted and compiled in :term:`nopython mode`. This + allows functions with operations unsupported in nopython mode to see + significant performance improvements if they contain loops with only + nopython-supported operations. + + lowering + The act of translating :term:`Numba IR` into LLVM IR. The term + "lowering" stems from the fact that LLVM IR is low-level and + machine-specific while Numba IR is high-level and abstract. + + NPM + nopython mode + A Numba compilation mode that generates code that does not access the + Python C API. This compilation mode produces the highest performance + code, but requires that the native types of all values in the function + can be :term:`inferred `. Unless otherwise instructed, + the ``@jit`` decorator will automatically fall back to :term:`object + mode` if nopython mode cannot be used. + + Numba IR + Numba intermediate representation + A representation of a piece of Python code which is more amenable + to analysis and transformations than the original Python + :term:`bytecode`. + + object mode + A Numba compilation mode that generates code that handles all values + as Python objects and uses the Python C API to perform all operations + on those objects. Code compiled in object mode will often run + no faster than Python interpreted code, unless the Numba compiler can + take advantage of :term:`loop-jitting`. + + ``OptionalType`` + An ``OptionalType`` is effectively a type union of a ``type`` and ``None``. + They typically occur in practice due to a variable being set to ``None`` + and then in a branch the variable being set to some other value. It's + often not possible at compile time to determine if the branch will execute + so to permit :term:`type inference` to complete, the type of the variable + becomes the union of a ``type`` (from the value) and ``None``, + i.e. ``OptionalType(type)``. + + type inference + The process by which Numba determines the specialized types of all + values within a function being compiled. Type inference can fail + if arguments or globals have Python types unknown to Numba, or if + functions are used that are not recognized by Numba. Successful + type inference is a prerequisite for compilation in + :term:`nopython mode`. + + typing + The act of running :term:`type inference` on a value or operation. + + ufunc + A NumPy `universal function `_. + Numba can create new compiled ufuncs with + the :ref:`@vectorize ` decorator. + + reflection + In numba, when a mutable container is passed as argument to a nopython + function from the Python interpreter, the container object and all its + contained elements are converted into nopython values. To match the + semantics of Python, any mutation on the container inside the nopython + function must be visible in the Python interpreter. To do so, Numba + must update the container and its elements and convert them back into + Python objects during the transition back into the interpreter. + + Not to be confused with Python's "reflection" in the context of binary + operators (see https://docs.python.org/3.5/reference/datamodel.html). diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/index.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/index.rst new file mode 100644 index 000000000..fa757b5a8 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/index.rst @@ -0,0 +1,37 @@ +.. Numba documentation master file, created by + sphinx-quickstart on Tue Dec 30 11:55:40 2014. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Numba documentation +=================== + +This is the Numba documentation. Unless you are already acquainted +with Numba, we suggest you start with the :doc:`User manual `. + + +.. toctree:: + :caption: For all users + :maxdepth: 2 + + user/index.rst + reference/index.rst + + +.. toctree:: + :caption: For CUDA users + :maxdepth: 2 + + cuda/index.rst + cuda-reference/index.rst + + +.. toctree:: + :caption: For advanced users & developers + :maxdepth: 2 + + extending/index.rst + developer/index.rst + proposals/index.rst + glossary.rst + release-notes.rst diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/cfunc.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/cfunc.rst new file mode 100644 index 000000000..7650dfb6d --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/cfunc.rst @@ -0,0 +1,147 @@ +============================ +NBEP 4: Defining C callbacks +============================ + +:Author: Antoine Pitrou +:Date: April 2016 +:Status: Draft + + +Interfacing with some native libraries (for example written in C +or C++) can necessitate writing native callbacks to provide business logic +to the library. Some Python-facing libraries may also provide the +alternative of passing a ctypes-wrapped native callback instead of a +Python callback for better performance. A simple example is the +``scipy.integrate`` package where the user passes the function to be +integrated as a callback. + +Users of those libraries may want to benefit from the performance advantage +of running purely native code, while writing their code in Python. +This proposal outlines a scheme to provide such a functionality in +Numba. + + +Basic usage +=========== + +We propose adding a new decorator, ``@cfunc``, importable from the main +package. This decorator allows defining a callback as in the following +example:: + + from numba import cfunc + from numba.types import float64 + + # A callback with the C signature `double(double)` + + @cfunc(float64(float64), nopython=True) + def integrand(x): + return 1 / x + + +The ``@cfunc`` decorator returns a "C function" object holding the +resources necessary to run the given compiled function (for example its +LLVM module). This object has several attributes and methods: + +* the ``ctypes`` attribute is a ctypes function object representing + the native function. + +* the ``address`` attribute is the address of the native function code, as + an integer (note this can also be computed from the ``ctypes`` attribute). + +* the ``native_name`` attribute is the symbol under which the function + can be looked up inside the current process. + +* the ``inspect_llvm()`` method returns the IR for the LLVM module + in which the function is compiled. It is expected that the ``native_name`` + attribute corresponds to the function's name in the LLVM IR. + +The general signature of the decorator is ``cfunc(signature, **options)``. + +The ``signature`` must specify the argument types and return type of the +function using Numba types. In contrary to ``@jit``, the return type cannot +be omitted. + +The ``options`` are keyword-only parameters specifying compilation options. +We are expecting that the standard ``@jit`` options (``nopython``, +``forceobj``, ``cache``) can be made to work with ``@cfunc``. + + +Calling from Numba-compiled functions +------------------------------------- + +While the intended use is to pass a callback's address to foreign C +code expecting a function pointer, it should be made possible to call +the C callback from a Numba-compiled function. + + +Passing array data +================== + +Native platform ABIs as used by C or C++ don't have the notion of a shaped +array as in Numpy. One common solution is to pass a raw data pointer and +one or several size arguments (depending on dimensionality). Numba must +provide a way to rebuild an array view of this data inside the callback. + +:: + + from numba import cfunc, carray + from numba.types import float64, CPointer, void, intp + + # A callback with the C signature `void(double *, double *, size_t)` + + @cfunc(void(CPointer(float64), CPointer(float64), intp)) + def invert(in_ptr, out_ptr, n): + in_ = carray(in_ptr, (n,)) + out = carray(out_ptr, (n,)) + for i in range(n): + out[i] = 1 / in_[i] + + +The ``carray`` function takes ``(pointer, shape, dtype)`` arguments +(``dtype`` being optional) and returns a C-layout array view over the +data *pointer*, with the given *shape* and *dtype*. *pointer* must +be a ctypes pointer object (not a Python integer). The array's +dimensionality corresponds to the *shape* tuple's length. If *dtype* +is not given, the array's dtype corresponds to the *pointer*'s pointee +type. + +The ``farray`` function is similar except that it returns a F-layout +array view. + + +Error handling +============== + +There is no standard mechanism in C for error reporting. Unfortunately, +Numba currently doesn't handle ``try..except`` blocks, which makes it more +difficult for the user to implement the required error reporting scheme. +The current stance of this proposal is to let users guard against invalid +arguments where necessary, and do whatever is required to inform the caller +of the error. + +Based on user feedback, we can later add support for some error reporting +schemes, such as returning an integer error code depending on whether an +exception was raised, or setting ``errno``. + + +Deferred topics +=============== + +Ahead-of-Time compilation +------------------------- + +This proposal doesn't make any provision for AOT compilation of C callbacks. +It would probably necessitate a separate API (a new method on the +``numba.pycc.CC`` object), and the implementation would require exposing +a subset of the C function object's functionality from the compiled C +extension module. + +Opaque data pointers +-------------------- + +Some libraries allow passing an opaque data pointer (``void *``) to a +user-provided callback, to provide any required context for execution +of the callback. Taking advantage of this functionality would require +adding specific support in Numba, for example the ability to do generic +conversion from ``types.voidptr`` and to take the address of a +Python-facing ``jitclass`` instance. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/extension-points.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/extension-points.rst new file mode 100644 index 000000000..89197dd17 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/extension-points.rst @@ -0,0 +1,414 @@ +======================== +NBEP 2: Extension points +======================== + +:Author: Antoine Pitrou +:Date: July 2015 +:Status: Draft + + +Implementing new types or functions in Numba requires hooking into +various mechanisms along the compilation chain (and potentially +outside of it). This document aims, first, at examining the +current ways of doing so and, second, at making proposals to make +extending easier. + +If some of the proposals are implemented, we should first strive +to use and exercise them internally, before exposing the APIs to the +public. + +.. note:: + This document doesn't cover CUDA or any other non-CPU backend. + + +High-level API +============== + +There is currently no high-level API, making some use cases more +complicated than they should be. + +Proposed changes +---------------- + +Dedicated module +'''''''''''''''' + +We propose the addition of a ``numba.extending`` module exposing the main +APIs useful for extending Numba. + +Implementing a function +''''''''''''''''''''''' + +We propose the addition of a ``@overload`` decorator allowing the +implementation of a given function for use in :term:`nopython mode`. +The overloading function has the same formal signature as the implemented +function, and receives the actual argument types. It should return a +Python function implementing the overloaded function for the given types. + +The following example implements :func:`numpy.where` with +this approach. + +.. literalinclude:: np-where-override.py + +It is also possible to implement functions already known to Numba, to +support additional types. The following example implements the +built-in function :func:`len` for tuples with this approach:: + + @overload(len) + def tuple_len(x): + if isinstance(x, types.BaseTuple): + # The tuple length is known at compile-time, so simply reify it + # as a constant. + n = len(x) + def len_impl(x): + return n + return len_impl + + +Implementing an attribute +''''''''''''''''''''''''' + +We propose the addition of a ``@overload_attribute`` decorator allowing +the implementation of an attribute getter for use in :term:`nopython mode`. + +The following example implements the ``.nbytes`` attribute on Numpy arrays:: + + @overload_attribute(types.Array, 'nbytes') + def array_nbytes(arr): + def get(arr): + return arr.size * arr.itemsize + return get + +.. note:: + The overload_attribute() signature allows for expansion to also define + setters and deleters, by letting the decorated function return a + ``getter, setter, deleter`` tuple instead of a single ``getter``. + + +Implementing a method +''''''''''''''''''''' + +We propose the addition of a ``@overload_method`` decorator allowing the +implementation of an instance method for use in :term:`nopython mode`. + +The following example implements the ``.take()`` method on Numpy arrays:: + + @overload_method(types.Array, 'take') + def array_take(arr, indices): + if isinstance(indices, types.Array): + def take_impl(arr, indices): + n = indices.shape[0] + res = np.empty(n, arr.dtype) + for i in range(n): + res[i] = arr[indices[i]] + return res + return take_impl + + +Exposing a structure member +''''''''''''''''''''''''''' + +We propose the addition of a ``make_attribute_wrapper()`` function exposing +an internal field as a visible read-only attribute, for those types backed +by a ``StructModel`` data model. + +For example, assuming ``PdIndexType`` is the Numba type of pandas indices, +here is how to expose the underlying Numpy array as a ``._data`` attribute:: + + @register_model(PdIndexType) + class PdIndexModel(models.StructModel): + def __init__(self, dmm, fe_type): + members = [ + ('values', fe_type.as_array), + ] + models.StructModel.__init__(self, dmm, fe_type, members) + + make_attribute_wrapper(PdIndexType, 'values', '_data') + + +Typing +====== + +Numba types +----------- + +Numba's standard types are declared in :mod:`numba.types`. To declare +a new type, one subclasses the base :class:`Type` class or one of its +existing abstract subclasses, and implements the required functionality. + +Proposed changes +'''''''''''''''' + +No change required. + + +Type inference on values +------------------------ + +Values of a new type need to be type-inferred if they can appear as +function arguments or constants. The core machinery is in +:mod:`numba.typing.typeof`. + +In the common case where some Python class or classes map exclusively +to the new type, one can extend a generic function to dispatch on said +classes, e.g.:: + + from numba.typing.typeof import typeof_impl + + @typeof_impl(MyClass) + def _typeof_myclass(val, c): + if "some condition": + return MyType(...) + +The ``typeof_impl`` specialization must return a Numba type instance, +or None if the value failed typing. + +(when one controls the class being type-inferred, an alternative +to ``typeof_impl`` is to define a ``_numba_type_`` property on the class) + +In the rarer case where the new type can denote various Python classes +that are impossible to enumerate, one must insert a manual check in the +fallback implementation of the ``typeof_impl`` generic function. + +Proposed changes +'''''''''''''''' + +Allow people to define a generic hook without monkeypatching the +fallback implementation. + + +Fast path for type inference on function arguments +-------------------------------------------------- + +Optionally, one may want to allow a new type to participate in the +fast type resolution (written in C code) to minimize function call +overhead when a JIT-compiled function is called with the new type. +One must then insert the required checks and implementation in +the ``_typeof.c`` file, presumably inside the ``compute_fingerprint()`` +function. + +Proposed changes +'''''''''''''''' + +None. Adding generic hooks to C code embedded in a C Python extension +is too delicate a change. + + +Type inference on operations +---------------------------- + +Values resulting from various operations (function calls, operators, etc.) +are typed using a set of helpers called "templates". One can define a +new template by subclass one of the existing base classes and implement +the desired inference mechanism. The template is explicitly registered +with the type inference machinery using a decorator. + +The :class:`ConcreteTemplate` base class allows one to define inference as +a set of supported signatures for a given operation. The following example +types the modulo operator:: + + @builtin + class BinOpMod(ConcreteTemplate): + key = "%" + cases = [signature(op, op, op) + for op in sorted(types.signed_domain)] + cases += [signature(op, op, op) + for op in sorted(types.unsigned_domain)] + cases += [signature(op, op, op) for op in sorted(types.real_domain)] + +(note that type *instances* are used in the signatures, severely +limiting the amount of genericity that can be expressed) + +The :class:`AbstractTemplate` base class allows to define inference +programmatically, giving it full flexibility. Here is a simplistic +example of how tuple indexing (i.e. the ``__getitem__`` operator) can +be expressed:: + + @builtin + class GetItemUniTuple(AbstractTemplate): + key = "getitem" + + def generic(self, args, kws): + tup, idx = args + if isinstance(tup, types.UniTuple) and isinstance(idx, types.Integer): + return signature(tup.dtype, tup, idx) + + +The :class:`AttributeTemplate` base class allows to type the attributes +and methods of a given type. Here is an example, typing the ``.real`` +and ``.imag`` attributes of complex numbers:: + + @builtin_attr + class ComplexAttribute(AttributeTemplate): + key = types.Complex + + def resolve_real(self, ty): + return ty.underlying_float + + def resolve_imag(self, ty): + return ty.underlying_float + +.. note:: + :class:`AttributeTemplate` only works for getting attributes. Setting + an attribute's value is hardcoded in :mod:`numba.typeinfer`. + +The :class:`CallableTemplate` base class offers an easier way to parse +flexible function signatures, by letting one define a callable that has +the same definition as the function being typed. For example, here is how +one could hypothetically type Python's ``sorted`` function if Numba supported +lists:: + + @builtin + class Sorted(CallableTemplate): + key = sorted + + def generic(self): + def typer(iterable, key=None, reverse=None): + if reverse is not None and not isinstance(reverse, types.Boolean): + return + if key is not None and not isinstance(key, types.Callable): + return + if not isinstance(iterable, types.Iterable): + return + return types.List(iterable.iterator_type.yield_type) + + return typer + +(note you can return just the function's return type instead of the +full signature) + +Proposed changes +'''''''''''''''' + +Naming of the various decorators is quite vague and confusing. We propose +renaming ``@builtin`` to ``@infer``, ``@builtin_attr`` to ``@infer_getattr`` +and ``builtin_global`` to ``infer_global``. + +The two-step declaration for global values is a bit verbose, we propose +simplifying it by allowing the use of ``infer_global`` as a decorator:: + + @infer_global(len) + class Len(AbstractTemplate): + key = len + + def generic(self, args, kws): + assert not kws + (val,) = args + if isinstance(val, (types.Buffer, types.BaseTuple)): + return signature(types.intp, val) + +The class-based API can feel clumsy, we can add a functional API for +some of the template kinds: + +.. code-block:: python + + @type_callable(sorted) + def type_sorted(context): + def typer(iterable, key=None, reverse=None): + # [same function as above] + + return typer + + +Code generation +=============== + +Concrete representation of values of a Numba type +------------------------------------------------- + +Any concrete Numba type must be able to be represented in LLVM form +(for variable storage, argument passing, etc.). One defines that +representation by implementing a datamodel class and registering it +with a decorator. Datamodel classes for standard types are defined +in :mod:`numba.datamodel.models`. + +Proposed changes +'''''''''''''''' + +No change required. + +Conversion between types +------------------------ + +Implicit conversion between Numba types is currently implemented as a +monolithic sequence of choices and type checks in the +:meth:`BaseContext.cast` method. To add a new implicit conversion, one +appends a type-specific check in that method. + +Boolean evaluation is a special case of implicit conversion (the +destination type being :class:`types.Boolean`). + +.. note:: + Explicit conversion is seen as a regular operation, e.g. a constructor + call. + +Proposed changes +'''''''''''''''' + +Add a generic function for implicit conversion, with multiple dispatch +based on the source and destination types. Here is an example showing +how to write a float-to-integer conversion:: + + @lower_cast(types.Float, types.Integer) + def float_to_integer(context, builder, fromty, toty, val): + lty = context.get_value_type(toty) + if toty.signed: + return builder.fptosi(val, lty) + else: + return builder.fptoui(val, lty) + + +Implementation of an operation +------------------------------ + +Other operations are implemented and registered using a set of generic +functions and decorators. For example, here is how lookup for a the ``.ndim`` +attribute on Numpy arrays is implemented:: + + @builtin_attr + @impl_attribute(types.Kind(types.Array), "ndim", types.intp) + def array_ndim(context, builder, typ, value): + return context.get_constant(types.intp, typ.ndim) + +And here is how calling ``len()`` on a tuple value is implemented:: + + @builtin + @implement(types.len_type, types.Kind(types.BaseTuple)) + def tuple_len(context, builder, sig, args): + tupty, = sig.args + retty = sig.return_type + return context.get_constant(retty, len(tupty.types)) + +Proposed changes +'''''''''''''''' + +Review and streamine the API. Drop the requirement to write +``types.Kind(...)`` explicitly. Remove the separate ``@implement`` +decorator and rename ``@builtin`` to ``@lower_builtin``, ``@builtin_attr`` +to ``@lower_getattr``, etc. + +Add decorators to implement ``setattr()`` operations, named +``@lower_setattr`` and ``@lower_setattr_generic``. + + +Conversion from / to Python objects +----------------------------------- + +Some types need to be converted from or to Python objects, if they can +be passed as function arguments or returned from a function. The +corresponding boxing and unboxing operations are implemented using +a generic function. The implementations for standard Numba types +are in :mod:`numba.targets.boxing`. For example, here is the boxing +implementation for a boolean value:: + + @box(types.Boolean) + def box_bool(c, typ, val): + longval = c.builder.zext(val, c.pyapi.long) + return c.pyapi.bool_from_long(longval) + +Proposed changes +'''''''''''''''' + +Change the implementation signature from ``(c, typ, val)`` to +``(typ, val, c)``, to match the one chosen for the ``typeof_impl`` +generic function. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/external-memory-management.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/external-memory-management.rst new file mode 100644 index 000000000..38878d670 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/external-memory-management.rst @@ -0,0 +1,922 @@ +.. _nbep-7: + +=============================================== +NBEP 7: CUDA External Memory Management Plugins +=============================================== + +:Author: Graham Markall, NVIDIA +:Contributors: Thomson Comer, Peter Entschev, Leo Fang, John Kirkham, Keith Kraus +:Date: March 2020 +:Status: Final + +Background and goals +-------------------- + +The :ref:`CUDA Array Interface ` enables sharing of data +between different Python libraries that access CUDA devices. However, each +library manages its own memory distinctly from the others. For example: + + +* `Numba `_ internally manages memory for the creation + of device and mapped host arrays. +* `The RAPIDS libraries `_ (cuDF, cuML, etc.) use the `Rapids + Memory Manager `_ for allocating device + memory. +* `CuPy `_ includes a `memory pool + implementation `_ + for both device and pinned memory. + +The goal of this NBEP is to describe a plugin interface that enables Numba's +internal memory management to be replaced with an external memory manager by the +user. When the plugin interface is in use, Numba no longer directly allocates or +frees any memory when creating arrays, but instead requests allocations and +frees through the external manager. + +Requirements +------------ + +Provide an *External Memory Manager (EMM)* interface in Numba. + + +* When the EMM is in use, Numba will make all memory allocation using the EMM. + It will never directly call functions such as ``CuMemAlloc``\ , ``cuMemFree``\ , etc. +* When not using an *External Memory Manager (EMM)*\ , Numba's present behaviour + is unchanged (at the time of writing, the current version is the 0.48 + release). + +If an EMM is to be used, it will entirely replace Numba's internal memory +management for the duration of program execution. An interface for setting the +memory manager will be provided. + +Device vs. Host memory +^^^^^^^^^^^^^^^^^^^^^^^ + +An EMM will always take responsibility for the management of device memory. +However, not all CUDA memory management libraries also support managing host +memory, so a facility for Numba to continue the management of host memory +whilst ceding control of device memory to the EMM will be provided. + +Deallocation strategies +^^^^^^^^^^^^^^^^^^^^^^^ + +Numba's internal memory management uses a :ref:`deallocation strategy +` designed to increase efficiency by deferring +deallocations until a significant quantity are pending. It also provides a +mechanism for preventing deallocations entirely during critical sections, using +the :func:`~numba.cuda.defer_cleanup` context manager. + + +* When the EMM is not in use, the deallocation strategy and operation of + ``defer_cleanup`` remain unchanged. +* When the EMM is in use, the deallocation strategy is implemented by the EMM, + and Numba's internal deallocation mechanism is not used. For example: + + * A similar strategy to Numba's could be implemented by the EMM, or + * Deallocated memory might immediately be returned to a memory pool. + +* The ``defer_cleanup`` context manager may behave differently with an EMM - an + EMM should be accompanied by documentation of the behaviour of the + ``defer_cleanup`` context manager when it is in use. + + * For example, a pool allocator could always immediately return memory to a + pool even when the context manager is in use, but could choose + not to free empty pools until ``defer_cleanup`` is not in use. + +Management of other objects +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In addition to memory, Numba manages the allocation and deallocation of +:ref:`events `, :ref:`streams `, and modules (a module is a +compiled object, which is generated from ``@cuda.jit``\ -ted functions). The +management of streams, events, and modules should be unchanged by the presence +or absence of an EMM. + +Asynchronous allocation / deallocation +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +An asynchronous memory manager might provide the facility for an allocation or +free to take a CUDA stream and execute asynchronously. For freeing, this is +unlikely to cause issues since it operates at a layer beneath Python, but for +allocations this could be problematic if the user tries to then launch a kernel +on the default stream from this asynchronous memory allocation. + +The interface described in this proposal will not be required to support +asynchronous allocation and deallocation, and as such these use cases will not +be considered further. However, nothing in this proposal should preclude the +straightforward addition of asynchronous operations in future versions of the +interface. + +Non-requirements +^^^^^^^^^^^^^^^^ + +In order to minimise complexity and constrain this proposal to a reasonable +scope, the following will not be supported: + + +* Using different memory manager implementations for different contexts. All + contexts will use the same memory manager implementation - either the Numba + internal implementation or an external implementation. +* Changing the memory manager once execution has begun. It is not practical to + change the memory manager and retain all allocations. Cleaning up the entire + state and then changing to a different memory allocator (rather than starting + a new process) appears to be a rather niche use case. +* Any changes to the ``__cuda_array_interface__`` to further define its semantics, + e.g. for acquiring / releasing memory as discussed in `Numba Issue + #4886 `_ - these are independent, + and can be addressed as part of separate proposals. +* Managed memory / UVM is not supported. At present Numba does not support UVM - + see `Numba Issue #4362 `_ for + discussion of support. + +Interface for Plugin developers +------------------------------- + +New classes and functions will be added to ``numba.cuda.cudadrv.driver``: + +* ``BaseCUDAMemoryManager`` and ``HostOnlyCUDAMemoryManager``\ : base classes for + EMM plugin implementations. +* ``set_memory_manager``: a method for registering an external memory manager with + Numba. + +These will be exposed through the public API, in the ``numba.cuda`` module. +Additionally, some classes that are already part of the `driver` module will be +exposed as part of the public API: + +* ``MemoryPointer``: used to encapsulate information about a pointer to device + memory. +* ``MappedMemory``: used to hold information about host memory that is mapped into + the device address space (a subclass of ``MemoryPointer``\ ). +* ``PinnedMemory``: used to hold information about host memory that is pinned (a + subclass of ``mviewbuf.MemAlloc``\ , a class internal to Numba). + +As an alternative to calling the ``set_memory_manager`` function, an environment +variable can be used to set the memory manager. The value of the environment +variable should be the name of the module containing the memory manager in its +global scope, named ``_numba_memory_manager``\ : + +.. code-block:: + + export NUMBA_CUDA_MEMORY_MANAGER="" + +When this variable is set, Numba will automatically use the memory manager from +the specified module. Calls to ``set_memory_manager`` will issue a warning, but +otherwise be ignored. + +Plugin Base Classes +^^^^^^^^^^^^^^^^^^^ + +An EMM plugin is implemented by inheriting from the ``BaseCUDAMemoryManager`` +class, which is defined as: + +.. code-block:: python + + class BaseCUDAMemoryManager(object, metaclass=ABCMeta): + @abstractmethod + def memalloc(self, size): + """ + Allocate on-device memory in the current context. Arguments: + + - `size`: Size of allocation in bytes + + Returns: a `MemoryPointer` to the allocated memory. + """ + + @abstractmethod + def memhostalloc(self, size, mapped, portable, wc): + """ + Allocate pinned host memory. Arguments: + + - `size`: Size of the allocation in bytes + - `mapped`: Whether the allocated memory should be mapped into the CUDA + address space. + - `portable`: Whether the memory will be considered pinned by all + contexts, and not just the calling context. + - `wc`: Whether to allocate the memory as write-combined. + + Returns a `MappedMemory` or `PinnedMemory` instance that owns the + allocated memory, depending on whether the region was mapped into + device memory. + """ + + @abstractmethod + def mempin(self, owner, pointer, size, mapped): + """ + Pin a region of host memory that is already allocated. Arguments: + + - `owner`: An object owning the memory - e.g. a `DeviceNDArray`. + - `pointer`: The pointer to the beginning of the region to pin. + - `size`: The size of the region to pin. + - `mapped`: Whether the region should also be mapped into device memory. + + Returns a `MappedMemory` or `PinnedMemory` instance that refers to the + allocated memory, depending on whether the region was mapped into device + memory. + """ + + @abstractmethod + def initialize(self): + """ + Perform any initialization required for the EMM plugin to be ready to + use. + """ + + @abstractmethod + def get_memory_info(self): + """ + Returns (free, total) memory in bytes in the context + """ + + @abstractmethod + def get_ipc_handle(self, memory): + """ + Return an `IpcHandle` from a GPU allocation. Arguments: + + - `memory`: A `MemoryPointer` for which the IPC handle should be created. + """ + + @abstractmethod + def reset(self): + """ + Clear up all memory allocated in this context. + """ + + @abstractmethod + def defer_cleanup(self): + """ + Returns a context manager that ensures the implementation of deferred + cleanup whilst it is active. + """ + + @property + @abstractmethod + def interface_version(self): + """ + Returns an integer specifying the version of the EMM Plugin interface + supported by the plugin implementation. Should always return 1 for + implementations described in this proposal. + """ + +All of the methods of an EMM plugin are called from within Numba - they never +need to be invoked directly by a Numba user. + +The ``initialize`` method is called by Numba prior to any memory allocations +being requested. This gives the EMM an opportunity to initialize any data +structures, etc., that it needs for its normal operations. The method may be +called multiple times during the lifetime of the program - subsequent calls +should not invalidate or reset the state of the EMM. + +The ``memalloc``\ , ``memhostalloc``\ , and ``mempin`` methods are called when Numba +requires an allocation of device or host memory, or pinning of host memory. +Device memory should always be allocated in the current context. + +``get_ipc_handle`` is called when an IPC handle for an array is required. Note +that there is no method for closing an IPC handle - this is because the +``IpcHandle`` object constructed by ``get_ipc_handle`` contains a ``close()`` method +as part of its definition in Numba, which closes the handle by calling +``cuIpcCloseMemHandle``. It is expected that this is sufficient for general use +cases, so no facility for customising the closing of IPC handles is provided by +the EMM Plugin interface. + +``get_memory_info`` may be called at any time after ``initialize``. + +``reset`` is called as part of resetting a context. Numba does not normally call +reset spontaneously, but it may be called at the behest of the user. Calls to +``reset`` may even occur before ``initialize`` is called, so the plugin should be +robust against this occurrence. + +``defer_cleanup`` is called when the ``numba.cuda.defer_cleanup`` context manager +is used from user code. + +``interface_version`` is called by Numba when the memory manager is set, to +ensure that the version of the interface implemented by the plugin is +compatible with the version of Numba in use. + +Representing pointers +^^^^^^^^^^^^^^^^^^^^^ + +Device Memory +~~~~~~~~~~~~~ + +The ``MemoryPointer`` class is used to represent a pointer to memory. Whilst there +are various details of its implementation, the only aspect relevant to EMM +plugin development is its initialization. The ``__init__`` method has the +following interface: + +.. code-block:: python + + class MemoryPointer: + def __init__(self, context, pointer, size, owner=None, finalizer=None): + + +* ``context``\ : The context in which the pointer was allocated. +* ``pointer``\ : A ``ctypes`` pointer (e.g. ``ctypes.c_uint64``\ ) holding the address of + the memory. +* ``size``\ : The size of the allocation in bytes. +* ``owner``\ : The owner is sometimes set by the internals of the class, or used for + Numba's internal memory management, but need not be provided by the writer of + an EMM plugin - the default of ``None`` should always suffice. +* ``finalizer``\ : A method that is called when the last reference to the + ``MemoryPointer`` object is released. Usually this will make a call to the + external memory management library to inform it that the memory is no longer + required, and that it could potentially be freed (though the EMM is not + required to free it immediately). + +Host Memory +~~~~~~~~~~~ + +Memory mapped into the CUDA address space (which is created when the +``memhostalloc`` or ``mempin`` methods are called with ``mapped=True``\ ) is managed +using the ``MappedMemory`` class: + +.. code-block:: python + + class MappedMemory(AutoFreePointer): + def __init__(self, context, pointer, size, owner, finalizer=None): + + +* ``context``\ : The context in which the pointer was allocated. +* ``pointer``\ : A ``ctypes`` pointer (e.g. ``ctypes.c_void_p``\ ) holding the address of + the allocated memory. +* ``size``\ : The size of the allocated memory in bytes. +* ``owner``\ : A Python object that owns the memory, e.g. a ``DeviceNDArray`` + instance. +* ``finalizer``\ : A method that is called when the last reference to the + ``MappedMemory`` object is released. For example, this method could call + ``cuMemFreeHost`` on the pointer to deallocate the memory immediately. + +Note that the inheritance from ``AutoFreePointer`` is an implementation detail and +need not concern the developer of an EMM plugin - ``MemoryPointer`` is higher in +the MRO of ``MappedMemory``. + +Memory that is only in the host address space and has been pinned is represented +with the ``PinnedMemory`` class: + +.. code-block:: python + + class PinnedMemory(mviewbuf.MemAlloc): + def __init__(self, context, pointer, size, owner, finalizer=None): + + +* ``context``\ : The context in which the pointer was allocated. +* ``pointer``\ : A ``ctypes`` pointer (e.g. ``ctypes.c_void_p``\ ) holding the address of + the pinned memory. +* ``size``\ : The size of the pinned region in bytes. +* ``owner``\ : A Python object that owns the memory, e.g. a ``DeviceNDArray`` + instance. +* ``finalizer``\ : A method that is called when the last reference to the + ``PinnedMemory`` object is released. This method could e.g. call + ``cuMemHostUnregister`` on the pointer to unpin the memory immediately. + +Providing device memory management only +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Some external memory managers will support management of on-device memory but +not host memory. To make it easy to implement an EMM plugin using one of these +managers, Numba will provide a memory manager class with implementations of the +``memhostalloc`` and ``mempin`` methods. An abridged definition of this class +follows: + +.. code-block:: python + + class HostOnlyCUDAMemoryManager(BaseCUDAMemoryManager): + # Unimplemented methods: + # + # - memalloc + # - get_memory_info + + def memhostalloc(self, size, mapped, portable, wc): + # Implemented. + + def mempin(self, owner, pointer, size, mapped): + # Implemented. + + def initialize(self): + # Implemented. + # + # Must be called by any subclass when its initialize() method is + # called. + + def reset(self): + # Implemented. + # + # Must be called by any subclass when its reset() method is + # called. + + def defer_cleanup(self): + # Implemented. + # + # Must be called by any subclass when its defer_cleanup() method is + # called. + +A class can subclass the ``HostOnlyCUDAMemoryManager`` and then it only needs to +add implementations of methods for on-device memory. Any subclass must observe +the following rules: + + +* If the subclass implements ``__init__``\ , then it must also call + ``HostOnlyCUDAMemoryManager.__init__``\ , as this is used to initialize some of + its data structures (\ ``self.allocations`` and ``self.deallocations``\ ). +* The subclass must implement ``memalloc`` and ``get_memory_info``. +* The ``initialize`` and ``reset`` methods perform initialisation of structures + used by the ``HostOnlyCUDAMemoryManager``. + + * If the subclass has nothing to do on initialisation (possibly) or reset + (unlikely) then it need not implement these methods. + * However, if it does implement these methods then it must also call the + methods from ``HostOnlyCUDAMemoryManager`` in its own implementations. + +* Similarly if ``defer_cleanup`` is implemented, it should enter the context + provided by ``HostOnlyCUDAManager.defer_cleanup()`` prior to ``yield``\ ing (or in + the ``__enter__`` method) and release it prior to exiting (or in the ``__exit__`` + method). + +Import order +^^^^^^^^^^^^ + +The order in which Numba and the library implementing an EMM Plugin should not +matter. For example, if ``rmm`` were to implement and register an EMM Plugin, +then: + +.. code-block:: python + + from numba import cuda + import rmm + +and + +.. code-block:: python + + import rmm + from numba import cuda + +are equivalent - this is because Numba does not initialize CUDA or allocate any +memory until the first call to a CUDA function - neither instantiating and +registering an EMM plugin, nor importing ``numba.cuda`` causes a call to a CUDA +function. + +Numba as a Dependency +^^^^^^^^^^^^^^^^^^^^^ + +Adding the implementation of an EMM Plugin to a library naturally makes Numba a +dependency of the library where it may not have been previously. In order to +make the dependency optional, if this is desired, one might conditionally +instantiate and register the EMM Plugin like: + +.. code-block:: python + + try: + import numba + from mylib.numba_utils import MyNumbaMemoryManager + numba.cuda.cudadrv.driver.set_memory_manager(MyNumbaMemoryManager) + except: + print("Numba not importable - not registering EMM Plugin") + +so that ``mylib.numba_utils``\ , which contains the implementation of the EMM +Plugin, is only imported if Numba is already present. If Numba is not available, +then ``mylib.numba_utils`` (which necessarily imports ``numba``\ ), will never be +imported. + +It is recommended that any library with an EMM Plugin includes at least some +environments with Numba for testing with the EMM Plugin in use, as well as some +environments without Numba, to avoid introducing an accidental Numba dependency. + +Example implementation - A RAPIDS Memory Manager (RMM) Plugin +------------------------------------------------------------- + +An implementation of an EMM plugin within the `Rapids Memory Manager +(RMM) `_ is sketched out in this section. This is +intended to show an overview of the implementation in order to support the +descriptions above and to illustrate how the plugin interface can be used - +different choices may be made for a production-ready implementation. + +The plugin implementation consists of additions to `python/rmm/rmm.py +`_: + +.. code-block:: python + + # New imports: + from contextlib import context_manager + # RMM already has Numba as a dependency, so these imports need not be guarded + # by a check for the presence of numba. + from numba.cuda import (HostOnlyCUDAMemoryManager, MemoryPointer, IpcHandle, + set_memory_manager) + + + # New class implementing the EMM Plugin: + class RMMNumbaManager(HostOnlyCUDAMemoryManager): + def memalloc(self, size): + # Allocates device memory using RMM functions. The finalizer for the + # allocated memory calls back to RMM to free the memory. + addr = librmm.rmm_alloc(bytesize, 0) + ctx = cuda.current_context() + ptr = ctypes.c_uint64(int(addr)) + finalizer = _make_finalizer(addr, stream) + return MemoryPointer(ctx, ptr, size, finalizer=finalizer) + + def get_ipc_handle(self, memory): + """ + Get an IPC handle for the memory with offset modified by the RMM memory + pool. + """ + # This implementation provides a functional implementation and illustrates + # what get_ipc_handle needs to do, but it is not a very "clean" + # implementation, and it relies on borrowing bits of Numba internals to + # initialise ipchandle. + # + # A more polished implementation might make use of additional functions in + # the RMM C++ layer for initialising IPC handles, and not use any Numba + # internals. + ipchandle = (ctypes.c_byte * 64)() # IPC handle is 64 bytes + cuda.cudadrv.memory.driver_funcs.cuIpcGetMemHandle( + ctypes.byref(ipchandle), + memory.owner.handle, + ) + source_info = cuda.current_context().device.get_device_identity() + ptr = memory.device_ctypes_pointer.value + offset = librmm.rmm_getallocationoffset(ptr, 0) + return IpcHandle(memory, ipchandle, memory.size, source_info, + offset=offset) + + def get_memory_info(self): + # Returns a tuple of (free, total) using RMM functionality. + return get_info() # Function defined in rmm.py + + def initialize(self): + # Nothing required to initialize RMM here, but this method is added + # to illustrate that the super() method should also be called. + super().initialize() + + @contextmanager + def defer_cleanup(self): + # Does nothing to defer cleanup - a full implementation may choose to + # implement a different policy. + with super().defer_cleanup(): + yield + + @property + def interface_version(self): + # As required by the specification + return 1 + + # The existing _make_finalizer function is used by RMMNumbaManager: + def _make_finalizer(handle, stream): + """ + Factory to make the finalizer function. + We need to bind *handle* and *stream* into the actual finalizer, which + takes no args. + """ + + def finalizer(): + """ + Invoked when the MemoryPointer is freed + """ + librmm.rmm_free(handle, stream) + + return finalizer + + # Utility function register `RMMNumbaManager` as an EMM: + def use_rmm_for_numba(): + set_memory_manager(RMMNumbaManager) + + # To support `NUMBA_CUDA_MEMORY_MANAGER=rmm`: + _numba_memory_manager = RMMNumbaManager + +Example usage +^^^^^^^^^^^^^ + +A simple example that configures Numba to use RMM for memory management and +creates a device array is as follows: + +.. code-block:: python + + # example.py + import rmm + import numpy as np + + from numba import cuda + + rmm.use_rmm_for_numba() + + a = np.zeros(10) + d_a = cuda.to_device(a) + del(d_a) + print(rmm.csv_log()) + +Running this should result in output similar to the following: + +.. code-block:: + + Event Type,Device ID,Address,Stream,Size (bytes),Free Memory,Total Memory,Current Allocs,Start,End,Elapsed,Location + Alloc,0,0x7fae06600000,0,80,0,0,1,1.10549,1.1074,0.00191666,/numba/numba/cuda/cudadrv/driver.py:683 + Free,0,0x7fae06600000,0,0,0,0,0,1.10798,1.10921,0.00122238,/numba/numba/utils.py:678 + +Note that there is some scope for improvement in RMM for detecting the line +number at which the allocation / free occurred, but this is outside the scope of +the example in this proposal. + +Setting the memory manager through the environment +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Rather than calling ``rmm.use_rmm_for_numba()`` in the example above, the memory +manager could also be set to use RMM globally with an environment variable, so +the Python interpreter is invoked to run the example as: + +.. code-block:: + + NUMBA_CUDA_MEMORY_MANAGER="rmm.RMMNumbaManager" python example.py + +Numba internal changes +---------------------- + +This section is intended primarily for Numba developers - those with an interest +in the external interface for implementing EMM plugins may choose to skip over +this section. + +Current model / implementation +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +At present, memory management is implemented in the +:class:`~numba.cuda.cudadrv.driver.Context` class. It maintains lists of +allocations and deallocations: + +* ``allocations`` is a ``numba.core.utils.UniqueDict``, created at context + creation time. +* ``deallocations`` is an instance of the ``_PendingDeallocs`` class, and is created + when ``Context.prepare_for_use()`` is called. + +These are used to track allocations and deallocations of: + +* Device memory +* Pinned memory +* Mapped memory +* Streams +* Events +* Modules + +The ``_PendingDeallocs`` class implements the deferred deallocation strategy - +cleanup functions (such as ``cuMemFree``\ ) for the items above are added to its +list of pending deallocations by the finalizers of objects representing +allocations. These finalizers are run when the objects owning them are +garbage-collected by the Python interpreter. When the addition of a new +cleanup function to the deallocation list causes the number or size of pending +deallocations to exceed a configured ratio, the ``_PendingDeallocs`` object runs +deallocators for all items it knows about and then clears its internal pending +list. + +See :ref:`deallocation-behavior` for more details of this implementation. + +Proposed changes +^^^^^^^^^^^^^^^^ + +This section outlines the major changes that will be made to support the EMM +plugin interface - there will be various small changes to other parts of Numba +that will be required in order to adapt to these changes; an exhaustive list of +these is not provided. + +Context changes +~~~~~~~~~~~~~~~ + +The ``numba.cuda.cudadrv.driver.Context`` class will no longer directly allocate +and free memory. Instead, the context will hold a reference to a memory manager +instance, and its memory allocation methods will call into the memory manager, +e.g.: + +.. code-block:: python + + def memalloc(self, size): + return self.memory_manager.memalloc(size) + + def memhostalloc(self, size, mapped=False, portable=False, wc=False): + return self.memory_manager.memhostalloc(size, mapped, portable, wc) + + def mempin(self, owner, pointer, size, mapped=False): + if mapped and not self.device.CAN_MAP_HOST_MEMORY: + raise CudaDriverError("%s cannot map host memory" % self.device) + return self.memory_manager.mempin(owner, pointer, size, mapped) + + def prepare_for_use(self): + self.memory_manager.initialize() + + def get_memory_info(self): + self.memory_manager.get_memory_info() + + def get_ipc_handle(self, memory): + return self.memory_manager.get_ipc_handle(memory) + + def reset(self): + # ... Already-extant reset logic, plus: + self._memory_manager.reset() + +The ``memory_manager`` member is initialised when the context is created. + +The ``memunpin`` method (not shown above but currently exists in the ``Context`` +class) has never been implemented - it presently raises a ``NotImplementedError``. +This method arguably un-needed - pinned memory is immediately unpinned by its +finalizer, and unpinning before a finalizer runs would invalidate the state of +``PinnedMemory`` objects for which references are still held. It is proposed that +this is removed when making the other changes to the ``Context`` class. + +The ``Context`` class will still instantiate ``self.allocations`` and +``self.deallocations`` as before - these will still be used by the context to +manage the allocations and deallocations of events, streams, and modules, which +are not handled by the EMM plugin. + +New components of the ``driver`` module +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + +* ``BaseCUDAMemoryManager``\ : An abstract class, as defined in the plugin interface + above. +* ``HostOnlyCUDAMemoryManager``\ : A subclass of ``BaseCUDAMemoryManager``\ , with the + logic from ``Context.memhostalloc`` and ``Context.mempin`` moved into it. This + class will also create its own ``allocations`` and ``deallocations`` members, + similarly to how the ``Context`` class creates them. These are used to manage + the allocations and deallocations of pinned and mapped host memory. +* ``NumbaCUDAMemoryManager``\ : A subclass of ``HostOnlyCUDAMemoryManager``\ , which + also contains an implementation of ``memalloc`` based on that presently existing + in the ``Context`` class. This is the default memory manager, and its use + preserves the behaviour of Numba prior to the addition of the EMM plugin + interface - that is, all memory allocation and deallocation for Numba arrays + is handled within Numba. + + * This class shares the ``allocations`` and ``deallocations`` members with its + parent class ``HostOnlyCUDAMemoryManager``\ , and it uses these for the + management of device memory that it allocates. + +* The ``set_memory_manager`` function, which sets a global pointing to the memory + manager class. This global initially holds ``NumbaCUDAMemoryManager`` (the + default). + +Staged IPC +~~~~~~~~~~ + +Staged IPC should not take ownership of the memory that it allocates. When the +default internal memory manager is in use, the memory allocated for the staging +array is already owned. When an EMM plugin is in use, it is not legitimate to +take ownership of the memory. + +This change can be made by applying the following small patch, which has been +tested to have no effect on the CUDA test suite: + +.. code-block:: diff + + diff --git a/numba/cuda/cudadrv/driver.py b/numba/cuda/cudadrv/driver.py + index 7832955..f2c1352 100644 + --- a/numba/cuda/cudadrv/driver.py + +++ b/numba/cuda/cudadrv/driver.py + @@ -922,7 +922,11 @@ class _StagedIpcImpl(object): + with cuda.gpus[srcdev.id]: + impl.close() + + - return newmem.own() + + return newmem + +Testing +~~~~~~~ + +Alongside the addition of appropriate tests for new functionality, there will be +some refactoring of existing tests required, but these changes are not +substantial. Tests of the deallocation strategy (e.g. ``TestDeallocation``\ , +``TestDeferCleanup``\ ) will need to be modified to ensure that they are +examining the correct set of deallocations. When an EMM plugin is in use, they +will need to be skipped. + +Prototyping / experimental implementation +----------------------------------------- + +Some prototype / experimental implementations have been produced to guide the +designs presented in this document. The current implementations can be found in: + + +* Numba branch: https://github.com/gmarkall/numba/tree/grm-numba-nbep-7. +* RMM branch: https://github.com/gmarkall/rmm/tree/grm-numba-nbep-7. +* CuPy implementation: + https://github.com/gmarkall/nbep-7/blob/master/nbep7/cupy_mempool.py - uses + an unmodified CuPy. + + * See `CuPy memory management + docs `_. + +Current implementation status +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +RMM Plugin +~~~~~~~~~~ + +For a minimal example, a simple allocation and free using RMM works as expected. +For the example code (similar to the RMM example above): + +.. code-block:: python + + import rmm + import numpy as np + + from numba import cuda + + rmm.use_rmm_for_numba() + + a = np.zeros(10) + d_a = cuda.to_device(a) + del(d_a) + print(rmm.csv_log()) + +We see the following output: + +.. code-block:: + + Event Type,Device ID,Address,Stream,Size (bytes),Free Memory,Total Memory,Current Allocs,Start,End,Elapsed,Location + Alloc,0,0x7f96c7400000,0,80,0,0,1,1.13396,1.13576,0.00180059,/numba/numba/cuda/cudadrv/driver.py:686 + Free,0,0x7f96c7400000,0,0,0,0,0,1.13628,1.13723,0.000956004,/numba/numba/utils.py:678 + +This output is similar to the expected output from the example usage presented +above (though note that the pointer addresses and timestamps vary compared to +the example), and provides some validation of the example use case. + +CuPy Plugin +~~~~~~~~~~~ + +.. code-block:: python + + from nbep7.cupy_mempool import use_cupy_mm_for_numba + import numpy as np + + from numba import cuda + + use_cupy_mm_for_numba() + + a = np.zeros(10) + d_a = cuda.to_device(a) + del(d_a) + +The prototype CuPy plugin has somewhat primitive logging, so we see the output: + +.. code-block:: + + Allocated 80 bytes at 7f004d400000 + Freeing 80 bytes at 7f004d400000 + +Numba CUDA Unit tests +^^^^^^^^^^^^^^^^^^^^^ + +As well as providing correct execution of a simple example, all relevant Numba +CUDA unit tests also pass with the prototype branch, for both the internal memory +manager and the RMM EMM Plugin. + +RMM +~~~ + +The unit test suite can be run with the RMM EMM Plugin with: + +.. code-block:: + + NUMBA_CUDA_MEMORY_MANAGER=rmm python -m numba.runtests numba.cuda.tests + +A summary of the unit test suite output is: + +.. code-block:: + + Ran 564 tests in 142.211s + + OK (skipped=11) + +When running with the built-in Numba memory management, the output is: + +.. code-block:: + + Ran 564 tests in 133.396s + + OK (skipped=5) + +i.e. the changes for using an external memory manager do not break the built-in +Numba memory management. There are an additional 6 skipped tests, from: + + +* ``TestDeallocation``\ : skipped as it specifically tests Numba's internal + deallocation strategy. +* ``TestDeferCleanup``\ : skipped as it specifically tests Numba's implementation of + deferred cleanup. +* ``TestCudaArrayInterface.test_ownership``\ : skipped as Numba does not own memory + when an EMM Plugin is used, but ownership is assumed by this test case. + +CuPy +~~~~ + +The test suite can be run with the CuPy plugin using: + +.. code-block:: + + NUMBA_CUDA_MEMORY_MANAGER=nbep7.cupy_mempool python -m numba.runtests numba.cuda.tests + +This plugin implementation is presently more primitive than the RMM +implementation, and results in some errors with the unit test suite: + +.. code-block:: + + Ran 564 tests in 111.699s + + FAILED (errors=8, skipped=11) + +The 8 errors are due to a lack of implementation of ``get_ipc_handle`` in the +CuPy EMM Plugin implementation. It is expected that this implementation will be +re-visited and completed so that CuPy can be used stably as an allocator for +Numba in the future. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/index.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/index.rst new file mode 100644 index 000000000..33efe8d53 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/index.rst @@ -0,0 +1,35 @@ +=========================== +Numba Enhancement Proposals +=========================== + +Numba Enhancement Proposals (not really abbreviated "NEPs", since "NEP" +is already taken by the Numpy project) describe proposed changes to Numba. +They are modeled on Python Enhancement Proposals (PEPs) and Numpy Enhancement +Proposals, and are typically written up when important changes +(behavioural changes, feature additions...) to Numba are proposed. + +This page provides an overview of all proposals, making only a distinction +between the ones that have been implemented and those that have not been +implemented. + +Implemented proposals +--------------------- + +.. toctree:: + :maxdepth: 1 + + integer-typing.rst + external-memory-management.rst + +Other proposals +--------------- + +.. toctree:: + :maxdepth: 1 + + extension-points.rst + jit-classes.rst + cfunc.rst + type-inference.rst + typing_recursion.rst + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/integer-typing.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/integer-typing.rst new file mode 100644 index 000000000..9093d7e65 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/integer-typing.rst @@ -0,0 +1,186 @@ +.. _nbep-1: + +================================= +NBEP 1: Changes in integer typing +================================= + +:Author: Antoine Pitrou +:Date: July 2015 +:Status: Final + + +Current semantics +================= + +Type inference of integers in Numba currently has some subtleties +and some corner cases. The simple case is when some variable has an obvious +Numba type (for example because it is the result of a constructor call to a +Numpy scalar type such as ``np.int64``). That case suffers no ambiguity. + +The less simple case is when a variable doesn't bear such explicit +information. This can happen because it is inferred from a built-in Python +``int`` value, or from an arithmetic operation between two integers, or +other cases yet. Then Numba has a number of rules to infer the resulting +Numba type, especially its signedness and bitwidth. + +Currently, the generic case could be summarized as: *start small, +grow bigger as required*. Concretely: + +1. Each constant or pseudo-constant is inferred using the *smallest signed + integer type* that can correctly represent it (or, possibly, ``uint64`` + for positive integers between ``2**63`` and ``2**64 - 1``). +2. The result of an operation is typed so as to ensure safe representation + in the face of overflow and other magnitude increases (for example, + ``int32 + int32`` would be typed ``int64``). +3. As an exception, a Python ``int`` used as function argument is always + typed ``intp``, a pointer-size integer. This is to avoid the proliferation + of compiled specializations, as otherwise various integer bitwidths + in input arguments may produce multiple signatures. + +.. note:: + The second rule above (the "respect magnitude increases" rule) + reproduces Numpy's behaviour with arithmetic on scalar values. + Numba, however, has different implementation and performance constraints + than Numpy scalars. + + It is worth nothing, by the way, that Numpy arrays do not implement + said rule (i.e. ``array(int32) + array(int32)`` is typed ``array(int32)``, + not ``array(int64)``). Probably because this makes performance more + controllable. + +This has several non-obvious side-effects: + +1. It is difficult to predict the precise type of a value inside a function, + after several operations. The basic operands in an expression tree + may for example be ``int8`` but the end result may be ``int64``. Whether + this is desirable or not is an open question; it is good for correctness, + but potentially bad for performance. + +2. In trying to follow the correctness over predictability rule, some values + can actually leave the integer realm. For example, ``int64 + uint64`` + is typed ``float64`` in order to avoid magnitude losses (but incidentally + will lose precision on large integer values...), again following Numpy's + semantics for scalars. This is usually not intended by the user. + +3. More complicated scenarios can produce unexpected errors at the type unification + stage. An example is at `Github issue 1299 `_, + the gist of which is reproduced here:: + + @jit(nopython=True) + def f(): + variable = 0 + for i in range(1): + variable = variable + 1 + return np.arange(variable) + + At the time of this writing, this fails compiling, on a 64-bit system, + with the error:: + + numba.errors.TypingError: Failed at nopython (nopython frontend) + Can't unify types of variable '$48.4': $48.4 := {array(int32, 1d, C), array(int64, 1d, C)} + + People expert with Numba's type unification system can understand why. + But the user is caught in mystery. + + +Proposal: predictable width-conserving typing +============================================= + +We propose to turn the current typing philosophy on its head. Instead +of "*start small and grow as required*", we propose "*start big and keep +the width unchanged*". + +Concretely: + +1. The typing of Python ``int`` values used as function arguments doesn't + change, as it works satisfyingly and doesn't surprise the user. + +2. The typing of integer *constants* (and pseudo-constants) changes to match + the typing of integer arguments. That is, every non-explicitly typed + integer constant is typed ``intp``, the pointer-sized integer; except for + the rare cases where ``int64`` (on 32-bit systems) or ``uint64`` is + required. + +3. Operations on integers promote bitwidth to ``intp``, if smaller, otherwise + they don't promote. For example, on a 32-bit machine, ``int8 + int8`` + is typed ``int32``, as is ``int32 + int32``. However, ``int64 + int64`` + is typed ``int64``. + +4. Furthermore, mixed operations between signed and unsigned fall back to + signed, while following the same bitwidth rule. For example, on a + 32-bit machine, ``int8 + uint16`` is typed ``int32``, as is + ``uint32 + int32``. + + +Proposal impact +=============== + +Semantics +--------- + +With this proposal, the semantics become clearer. Regardless of whether +the arguments and constants of a function were explicitly typed or not, +the results of various expressions at any point in the function have +easily predictable types. + +When using built-in Python ``int``, the user gets acceptable magnitude +(32 or 64 bits depending on the system's bitness), and the type remains +the same across all computations. + +When explicitly using smaller bitwidths, intermediate results don't +suffer from magnitude loss, since their bitwidth is promoted to ``intp``. + +There is also less potential for annoyances with the type unification +system as demonstrated above. The user would have to force several +different types to be faced with such an error. + +One potential cause for concern is the discrepancy with Numpy's scalar +semantics; but at the same time this brings Numba scalar semantics closer +to array semantics (both Numba's and Numpy's), which seems a desirable +outcome as well. + +It is worth pointing out that some sources of integer numbers, such +as the ``range()`` built-in, always yield 32-bit integers or larger. +This proposal could be an opportunity to standardize them on ``intp``. + +Performance +----------- + +Except in trivial cases, it seems unlikely that the current "best fit" +behaviour for integer constants really brings a performance benefit. After +all, most integers in Numba code would either be stored in arrays (with +well-known types, chosen by the user) or be used as indices, where a ``int8`` +is highly unlikely to fare better than a ``intp`` (actually, it may be worse, +if LLVM isn't able to optimize away the required sign-extension). + +As a side note, the default use of ``intp`` rather than ``int64`` +ensures that 32-bit systems won't suffer from poor arithmetic performance. + +Implementation +-------------- + +Optimistically, this proposal may simplify some Numba internals a bit. +Or, at least, it doesn't threaten to make them significantly more complicated. + +Limitations +----------- + +This proposal doesn't really solve the combination of signed and unsigned +integers. It is geared mostly at solving the bitwidth issues, which are +a somewhat common cause of pain for users. Unsigned integers are in +practice very uncommon in Numba-compiled code, except when explicitly +asked for, and therefore much less of a pain point. + +On the bitwidth front, 32-bit systems could still show discrepancies based +on the values of constants: if a constant is too large to fit in 32 bits, +it is typed ``int64``, which propagates through other computations. +This would be a reminiscence of the current behaviour, but rarer and much +more controlled still. + +Long-term horizon +----------------- + +While we believe this proposal makes Numba's behaviour more regular and more +predictable, it also pulls it further from general compatibility with pure +Python semantics, where users can assume arbitrary-precision integers without +any truncation issues. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/jit-classes.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/jit-classes.rst new file mode 100644 index 000000000..2cd33d42c --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/jit-classes.rst @@ -0,0 +1,231 @@ +=================== +NBEP 3: JIT Classes +=================== + +:Author: Siu Kwan Lam +:Date: Dec 2015 +:Status: Draft + +Introduction +============ + +Numba does not yet support user-defined classes. +Classes provide useful abstraction and promote modularity when used +right. In the simplest sense, a class specifies the set of data and +operations as attributes and methods, respectively. +A class instance is an instantiation of that class. +This proposal will focus on supporting this simple usecase of classes--with +just attributes and methods. Other features, such as class methods, static +methods, and inheritance are deferred to another proposal, but we believe +these features can be easily implemented given the foundation described here. + + +Proposal: jit-classes +===================== + +A JIT-classes is more restricted than a Python class. +We will focus on the following operations on a class and its instance: + +* Instantiation: create an instance of a class using the class object as the + constructor: ``cls(*args, **kwargs)`` +* Destruction: remove resources allocated during instantiation and release + all references to other objects. +* Attribute access: loading and storing attributes using ``instance.attr`` + syntax. +* Method access: loading methods using ``instance.method`` syntax. + +With these operations, a class object (not the instance) does not need to be +materialize. Using the class object as a constructor is fully resolved (a +runtime implementation is picked) during the typing phase in the compiler. +This means **a class object will not be first class**. On the other hand, +implementing a first-class class object will require an "interface" type, +or the type of class. + +The instantiation of a class will allocate resources for storing the data +attributes. This is described in the "Storage model" section. Methods are +never stored in the instance. They are information attached to the class. +Since a class object only exists in the type domain, the methods will also be +fully resolved at the typing phase. Again, numba do not have first-class +function value and each function type maps uniquely to each function +implementation (this needs to be changed to support function value as argument). + +A class instance can contain other NRT reference-counted object as attributes. +To properly clean up an instance, a destructor is called when the reference +count of the instance is dropped to zero. This is described in the +"Reference count and descructor" section. + +Storage model +~~~~~~~~~~~~~ + +For compatibility with C, attributes are stored in a simple plain-old-data +structure. Each attribute are stored in a user-defined order in a padded +(for proper alignment), contiguous memory region. An instance that contains +three fields of int32, float32, complex64 will be compatible with the following +C structure:: + + struct { + int32 field0; + float32 field1; + complex64 field2; + }; + +This will also be compatible with an aligned NumPy structured dtype. + + +Methods +~~~~~~~ + +Methods are regular function that can be bounded to an instance. +They can be compiled as regular function by numba. +The operation ``getattr(instance, name)`` (getting an attribute ``name`` from +``instance``) binds the instance to the requested method at runtime. + + +The special ``__init__`` method is also handled like regular functions. + + +``__del__`` is not supported at this time. + + +Reference count and destructor +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +An instance of jit-class is reference-counted by NRT. Since it may contain +other NRT tracked object, it must call a destructor when its reference count +dropped to zero. The destructor will decrement the reference count of all +attributes by one. + +At this time, there is no support for user defined ``__del__`` method. + +Proper cleanup for cyclic reference is not handled at this time. +Cycles will cause memory leak. + +Type inference +~~~~~~~~~~~~~~ + +So far we have not described the type of the attributes or the methods. +Type information is necessary to materailize the instance (e.g. allocate the +storage). The simplest way is to let user provide the type of each attributes +as well as the ordering; for instance:: + + dct = OrderedDict() + dct['x'] = int32 + dct['y'] = float32 + +Allowing user to supply an ordered dictionary will provide the name, ordering +and types of the attributes. However, this statically typed semantic is not as +flexible as the Python semantic which behaves like a generic class. + +Inferring the type of attributes is difficult. In a previous attempt to +implement JIT classes, the ``__init__`` method is specialized to capture +the type stored into the attributes. Since the method can contain arbitrary +logic, the problem can become a dependent typing problem if types are assigned +conditionally depending on the value. (Very few languages implement dependent +typing and those that does are mostly theorem provers.) + +Example: typing function using an OrderedDict +--------------------------------------------- + +.. code-block:: python + + spec = OrderedDict() + spec['x'] = numba.int32 + spec['y'] = numba.float32 + + @jitclass(spec) + class Vec(object): + def __init__(self, x, y): + self.x = x + self.y = y + + def add(self, dx, dy): + self.x += dx + self.y += dy + +Example: typing function using a list of 2-tuples +------------------------------------------------- + +.. code-block:: python + + spec = [('x', numba.int32), + ('y', numba.float32)] + + @jitclass(spec) + class Vec(object): + ... + +Creating multiple jitclasses from a single class object +------------------------------------------------------- + +The `jitclass(spec)` decorator creates a new jitclass type even when applied to +the same class object and the same type specification. + +.. code-block:: python + + class Vec(object): + ... + + Vec1 = jitclass(spec)(Vec) + Vec2 = jitclass(spec)(Vec) + # Vec1 and Vec2 are two different jitclass types + +Usage from the Interpreter +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When constructing a new instance of a jitclass, a "box" is created that wraps +the underlying jitclass instance from numba. Attributes and methods are +accessible from the interpreter. The actual implementation will be in numba +compiled code. Any Python object is converted to its native +representation for consumption in numba. Similarly, the returned value is +converted to its Python representation. As a result, there may be overhead in +manipulating jitclass instances in the interpreter. This overhead is minimal +and should be easily amortized by more efficient computation in the compiled +methods. + +Support for property, staticmethod and classmethod +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The use of ``property`` is accepted for getter and setter only. Deleter is not +supported. + +The use of ``staticmethod`` is not supported. + +The use of ``classmethod`` is not supported. + +Inheritance +~~~~~~~~~~~ + +Class inhertance is not considered in this proposal. The only accepted base +class for a jitclass is `object`. + +Supported targets +~~~~~~~~~~~~~~~~~~ + +Only the CPU target (including the parallel target) is supported. +GPUs (e.g. CUDA and HSA) targets are supported via an immutable version of the +jitclass instance, which will be described in a separate NBEP. + + +Other properties +~~~~~~~~~~~~~~~~ + +Given: + +.. code-block:: python + + spec = [('x', numba.int32), + ('y', numba.float32)] + + @jitclass(spec) + class Vec(object): + ... + +* ``isinstance(Vec(1, 2), Vec)`` is True. +* ``type(Vec(1, 2))`` may not be ``Vec``. + +Future enhancements +~~~~~~~~~~~~~~~~~~~ + +This proposal has only described the basic semantic and functionality of a +jitclass. Additional features will be described in future enhancement +proposals. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/np-where-override.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/np-where-override.py new file mode 100644 index 000000000..109ba6a43 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/np-where-override.py @@ -0,0 +1,51 @@ +import numpy as np + +from numba.core import types +from numba.extending import overload + +@overload(np.where) +def where(cond, x, y): + """ + Implement np.where(). + """ + # Choose implementation based on argument types. + if isinstance(cond, types.Array): + # Array where() => return an array of the same shape + if all(ty.layout == 'C' for ty in (cond, x, y)): + def where_impl(cond, x, y): + """ + Fast implementation for C-contiguous arrays + """ + shape = cond.shape + if x.shape != shape or y.shape != shape: + raise ValueError("all inputs should have the same shape") + res = np.empty_like(x) + cf = cond.flat + xf = x.flat + yf = y.flat + rf = res.flat + for i in range(cond.size): + rf[i] = xf[i] if cf[i] else yf[i] + return res + else: + def where_impl(cond, x, y): + """ + Generic implementation for other arrays + """ + shape = cond.shape + if x.shape != shape or y.shape != shape: + raise ValueError("all inputs should have the same shape") + res = np.empty_like(x) + for idx, c in np.ndenumerate(cond): + res[idx] = x[idx] if c else y[idx] + return res + + else: + def where_impl(cond, x, y): + """ + Scalar where() => return a 0-dim array + """ + scal = x if cond else y + return np.full_like(scal, scal) + + return where_impl diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/recursion_callstack.svg b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/recursion_callstack.svg new file mode 100644 index 000000000..7e23f30d5 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/recursion_callstack.svg @@ -0,0 +1,4 @@ + + + + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/type-inference.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/type-inference.rst new file mode 100644 index 000000000..24f3b262b --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/type-inference.rst @@ -0,0 +1,124 @@ +====================== +NBEP 5: Type Inference +====================== + +:Author: Siu Kwan Lam +:Date: Sept 2016 +:Status: Draft + + +This document describes the current type inference implementation in numba. + + +Introduction +============ + +Numba uses type information to ensure that every variable in the user code can +be correctly lowered (translated into a low-level representation). The type of +a variable describes the set of valid operations and available attributes. +Resolving this information during compilation avoids the overhead of type +checking and dispatching at runtime. However, Python is dynamically typed and +the user does not declare variable types. Since type information is absent, +we use type inference to reconstruct the missing information. + + +Numba Type Semantic +=================== + +Type inference operates on :term:`Numba IR`, a mostly static-single-assignment (SSA) +encoding of the Python bytecode. Conceptually, all intermediate values in the +Python code are explicitly assigned to a variable in the IR. Numba enforces +that each IR variable to have one type only. A user variable (from the Python +source code) can be mapped to multiple variables in the IR. They are *versions* +of a variable. Each time a user variable is assigned to, a new version is +created. From that point, all subsequent references will use the new version. +The user variable *evolves* as the function logic updates its type. Merge +points (e.g. subsequent block to an if-else, the loop body, etc..) in the control +flow need extra care. At each merge point, a new version is implicitly created +to merge the different variable versions from the incoming paths. +The merging of the variable versions may translate into an implicit cast. + +Numba uses function overloading to emulate Python duck-typing. The type of a +function can contain multiple call signatures that accept different argument +types and yield different return types. The process to decide the best +signature for an overloaded function is called *overload resolution*. +Numba partially implements the C++ overload resolution scheme +(`ISOCPP`_ 13.3 Overload Resolution). The scheme uses a "best fit" algorithm by +ranking each argument symmetrically. The five possible rankings in increasing +order of penalty are: + +* *Exact*: the expected type is the same as the actual type. +* *Promotion*: the actual type can be upcast to the expected type by extending + the precision without changing the behavior. +* *Safe conversion*: the actual type can be cast to the expected type by changing + the type without losing information. +* *Unsafe conversion*: the actual type can be cast to the expected type by + changing the type or downcasting the type even if it is imprecise. +* *No match*: no valid operation can convert the actual type to the expected type. + +It is possible to have an ambiguous resolution. For example, a function with +signatures ``(int16, int32)`` and ``(int32, int16)`` can become ambiguous if +presented with the argument types ``(int32, int32)``, because demoting either +argument to ``int16`` is equally "fit". Fortunately, numba can usually resolve +such ambiguity by compiling a new version with the exact signature +``(int32, int32)``. When compilation is disabled and there are multiple +signatures with equal fit, an exception is raised. + +Type Inference +============== + +The type inference in numba has three important components---type +variable, constraint network, and typing context. + +* The *typing context* provides all the type information and typing related + operations, including the logic for type unification, and the logic for typing + of global and constant values. It defines the semantic of the language that + can be compiled by numba. + +* A *type variable* holds the type of each variable (in the Numba IR). + Conceptually, it is initialized to the universal type and, as it is re-assigned, + it stores a common type by unifying the new type with the existing type. The + common type must be able to represent values of the new type and the existing + type. Type conversion is applied as necessary and precision loss is + accepted for usability reason. + +* The *constraint network* is a dependency graph built from the IR. Each + node represents an operation in the Numba IR and updates at least one type + variable. There may be cycles due to loops in user code. + +The type inference process starts by seeding the argument types. These initial +types are propagated in the constraint network, which eventually fills all the +type variables. Due to cycles in the network, the process repeats until all +type variables converge or it fails with undecidable types. + +Type unification always returns a more "general" (quoted because unsafe conversion +is allowed) type. Types will converge to the least "general" type that +can represent all possible values that the variable can hold. Since unification +will never move down the type hierarchy and there is a single top type, the +universal type---``object``, the type inference is guaranteed to converge. + +A failure in type inference can be caused by two reasons. The first reason is user +error due to incorrect use of a type. This type of error will also trigger an +exception in regular python execution. The second reason is due to the use of an +unsupported feature, but the code is otherwise valid in regular python +execution. Upon an error, the type inference will set all types to the object +type. As a result, numba will fallback to *object-mode*. + +Since functions can be overloaded, the type inference needs to decide the +type signature used at each call site. The overload resolution is applied to +all known overload versions of the callee function described in *call-templates*. +A call-template can either be concrete or abstract. A concrete call-template +defines a fixed list of all possible signatures. An abstract call-template +defines the logic to compute the accepted signature and it is used to implement +generic functions. + +Numba-compiled functions are generic functions due to their ability to compile +new versions. When it sees a new set of argument types, it triggers type +inference to validate and determine the return type. When there are nested calls +for numba-compiled functions, each call-site triggers type inference. +This poses a problem to recursive functions because the type inference will also +be triggered recursively. Currently, simple single recursion is supported if +the signature is user-annotated by the user, which avoids unbound recursion in +type inference that will never terminate. + +.. _ISOCPP: http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4296.pdf \ No newline at end of file diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/typing_recursion.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/typing_recursion.rst new file mode 100644 index 000000000..a33a3a4b5 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/proposals/typing_recursion.rst @@ -0,0 +1,129 @@ +======================== +NBEP 6: Typing Recursion +======================== + +:Author: Siu Kwan Lam +:Date: Sept 2016 +:Status: Draft + +Introduction +============ + +This document proposes an enhancement to the type inference algorithm to +support recursion without explicitly annotating the function signature. +As a result, the proposal enables numba to type-infer both self-recursive and +mutual-recursive functions under some limitations. In practice, these +limitations can be easily overcome by specifying a compilation order. + + +The Current State +================= + +Recursion support in numba is currently limited to self-recursion with explicit +type annotation for the function. This limitation comes from the inability to +determine the return type of a recursive call. This is because the callee is +either the current function (for self-recursion) or a parent function +(mutual-recursion) and its type inference process has been suspended while waiting for +the function-type of its callee. This results in the formation of a cyclic +dependency. For example, given a function ``foo()`` that calls ``bar()``, +which in turns call ``foo()``:: + + def foo(x): + if x > 0: + return bar(x) + else: + return 1 + + def bar(x): + return foo(x - 1) + + +The type inference process of ``foo()`` depends on that of ``bar()``, +which depends on ``foo()``. Therefore ``foo()`` depends on itself and the type +inference algorithm cannot terminate. + + +The Solution +============ + +The proposed solution has two components: + +1. The introduction of a compile-time *callstack* that tracks the compiling functions. +2. The allowance of a partial type inference on functions by leveraging the return type + on non-recursive control-flow paths. + +The compile-time callstack stores typing information of the functions being +compiled. Like an ordinary callstack, it pushes a new record every time a +function is "called". Since this occurs at compile-time, a "call" triggers +a compilation of the callee. + +To detect recursion, the compile-time callstack is searched bottom-up +(stack grows downward) for a record that matches the callee. +As the record contains a reference to the type inference state, +the type inference process can be resumed to determine the return type. + +Recall that the type inference process cannot be resumed normally because of the cyclic +dependency of the return type. In practice, we can assume that a useful +program must have a terminating condition, a path that does not recurse. So, +the type inference process can make an initial guess for the return-type at the recursive +call by using the return-type determined by the non-recursive paths. This +allows type information to propagate on the recursive paths to generate the +final return type, which is used to refine the type information by the +subsequent iteration in the type inference process. + +The following figure illustrates the compile-time callstack when the compiler +reaches the recursive call to ``foo()`` from ``bar()``: + +.. image:: recursion_callstack.svg + :width: 400px + +At this time, the type inference process of ``foo()`` is suspended and that of ``bar()`` +is active. The compiler can see that the callee is already compiling by +searching the callstack. Knowing that it is a recursive call, the compiler +can resume the type-inference on ``foo()`` by ignoring the paths that contain +recursive calls. This means only the ``else`` branch is considered and we can +easily tell that ``foo()`` returns an ``int`` in this case. The compiler will +then set the initial return type of ``foo()`` and ``bar()`` to ``int``. The +subsequent type propagation can use this information to complete the type +inference of both functions, unifying the return-type of all returning paths. + + +Limitations +=========== + +For the proposed type inference algorithm to terminate, it assumes that +at least one of the control path leads to a return-statement without undertaking +a recursive call. Should this not be the case, the algorithm will raise an +exception indicating a potential runaway recursion. + +For example:: + + @jit + def first(x): + # The recursing call must have a path that is non-recursing. + if x > 0: + return second(x) + else: + return 1 + + @jit + def second(x): + return third(x) + + @jit + def third(x): + return first(x - 1) + + +The ``first()`` function must be the compiled first for the type inference algorithm to +complete successfully. Compiling any other function first will lead to a failure +in type inference. The type inference algorithm will treat it as a runaway +recursion due to the lack of a non-recursive exit in the recursive callee. + +For example, compiling ``second()`` first will move the recursive call to +``first()``. When the compiler tries to resume the type inference process of +``second()``, it will fail to find a non-recursive path. + +This is a small limitation and can be overcome easily by code restructuring or +precompiling in a specific order. + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/aot-compilation.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/aot-compilation.rst new file mode 100644 index 000000000..a6d56ee34 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/aot-compilation.rst @@ -0,0 +1,76 @@ +.. _aot-compilation: + +Ahead-of-Time compilation +========================= + +.. currentmodule:: numba.pycc + +.. class:: CC(extension_name, source_module=None) + + An object used to generate compiled extensions from Numba-compiled + Python functions. *extension_name* is the name of the extension + to be generated. *source_module* is the Python module + containing the functions; if ``None``, it is inferred by examining + the call stack. + + :class:`CC` instances have the following attributes and methods: + + .. attribute:: name + + (read-only attribute) The name of the extension module to be generated. + + .. attribute:: output_dir + + (read-write attribute) The directory the extension module will be + written into. By default it is the directory the *source_module* is + located in. + + .. attribute:: output_file + + (read-write attribute) The name of the file the extension module will + be written to. By default this follows the Python naming convention + for the current platform. + + .. attribute:: target_cpu + + (read-write attribute) The name of the CPU model to generate code for. + This will select the appropriate instruction set extensions. By + default, a generic CPU is selected in order to produce portable code. + + Recognized names for this attribute depend on the current architecture + and LLVM version. If you have LLVM installed, ``llc -mcpu=help`` + will give you a list. Examples on x86-64 are ``"ivybridge"``, + ``"haswell"``, ``"skylake"`` or ``"broadwell"``. You can also give + the value ``"host"`` which will select the current host CPU. + + .. attribute:: verbose + + (read-write attribute) If true, print out information while + compiling the extension. False by default. + + .. decorator:: export(exported_name, sig) + + Mark the decorated function for compilation with the signature *sig*. + The compiled function will be exposed as *exported_name* in the + generated extension module. + + All exported names within a given :class:`CC` instance must be + distinct, otherwise an exception is raised. + + .. method:: compile() + + Compile all exported functions and generate the extension module + as specified by :attr:`output_dir` and :attr:`output_file`. + + .. method:: distutils_extension(**kwargs) + + Return a :py:class:`distutils.core.Extension` instance allowing + to integrate generation of the extension module in a conventional + ``setup.py``-driven build process. The optional *kwargs* let you + pass optional parameters to the :py:class:`~distutils.core.Extension` + constructor. + + In this mode of operation, it is not necessary to call :meth:`compile` + yourself. Also, :attr:`output_dir` and :attr:`output_file` will be + ignored. + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/deprecation.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/deprecation.rst new file mode 100644 index 000000000..4271f0e70 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/deprecation.rst @@ -0,0 +1,312 @@ +.. _deprecation: + +=================== +Deprecation Notices +=================== + +This section contains information about deprecation of behaviours, features and +APIs that have become undesirable/obsolete. Any information about the schedule +for their deprecation and reasoning behind the changes, along with examples, is +provided. However, first is a small section on how to suppress deprecation +warnings that may be raised from Numba so as to prevent warnings propagating +into code that is consuming Numba. + +Suppressing Deprecation warnings +================================ +All Numba deprecations are issued via ``NumbaDeprecationWarning`` or +``NumbaPendingDeprecationWarning`` s, to suppress the reporting of +these the following code snippet can be used:: + + from numba.core.errors import NumbaDeprecationWarning, NumbaPendingDeprecationWarning + import warnings + + warnings.simplefilter('ignore', category=NumbaDeprecationWarning) + warnings.simplefilter('ignore', category=NumbaPendingDeprecationWarning) + +The ``action`` used above is ``'ignore'``, other actions are available, see +`The Warnings Filter `_ +documentation for more information. + +.. note:: It is **strongly recommended** that applications and libraries which + choose to suppress these warnings should pin their Numba dependency + to a suitable version because their users will no longer be aware of + the coming incompatibility. + +Deprecation of reflection for List and Set types +================================================ +Reflection (:term:`reflection`) is the jargon used in Numba to describe the +process of ensuring that changes made by compiled code to arguments that are +mutable Python container data types are visible in the Python interpreter when +the compiled function returns. Numba has for some time supported reflection of +``list`` and ``set`` data types and it is support for this reflection that +is scheduled for deprecation with view to replace with a better implementation. + +Reason for deprecation +---------------------- +First recall that for Numba to be able to compile a function in ``nopython`` +mode all the variables must have a concrete type ascertained through type +inference. In simple cases, it is clear how to reflect changes to containers +inside ``nopython`` mode back to the original Python containers. However, +reflecting changes to complex data structures with nested container types (for +example, lists of lists of integers) quickly becomes impossible to do +efficiently and consistently. After a number of years of experience with this +problem, it is clear that providing this behaviour is both fraught with +difficulty and often leads to code which does not have good performance (all +reflected data has to go through special APIs to convert the data to native +formats at call time and then back to CPython formats at return time). As a +result of this, the sheer number of reported problems in the issue tracker, and +how well a new approach that was taken with ``typed.Dict`` (typed dictionaries) +has gone, the core developers have decided to deprecate the noted ``reflection`` +behaviour. + + +Example(s) of the impact +------------------------ + +At present only a warning of the upcoming change is issued. In future code such +as:: + + from numba import njit + + @njit + def foo(x): + x.append(10) + + a = [1, 2, 3] + foo(a) + +will require adjustment to use a ``typed.List`` instance, this typed container +is synonymous to the :ref:`feature-typed-dict`. An example of translating the +above is:: + + from numba import njit + from numba.typed import List + + @njit + def foo(x): + x.append(10) + + a = [1, 2, 3] + typed_a = List() + [typed_a.append(x) for x in a] + foo(typed_a) + +For more information about ``typed.List`` see :ref:`feature-typed-list`. Further +usability enhancements for this feature were made in the 0.47.0 release +cycle. + +Schedule +-------- +This feature will be removed with respect to this schedule: + +* Pending-deprecation warnings will be issued in version 0.44.0 +* Prominent notice will be given for a minimum of two releases prior to full + removal. + +Recommendations +--------------- +Projects that need/rely on the deprecated behaviour should pin their dependency +on Numba to a version prior to removal of this behaviour, or consider following +replacement instructions that will be issued outlining how to adjust to the +change. + +Expected Replacement +-------------------- +As noted above ``typed.List`` will be used to permit similar functionality to +reflection in the case of ``list`` s, a ``typed.Set`` will provide the +equivalent for ``set`` (not implemented yet!). The advantages to this approach +are: + +* That the containers are typed means type inference has to work less hard. +* Nested containers (containers of containers of ...) are more easily + supported. +* Performance penalties currently incurred translating data to/from native + formats are largely avoided. +* Numba's ``typed.Dict`` will be able to use these containers as values. + + +Deprecation of :term:`object mode` `fall-back` behaviour when using ``@jit`` +============================================================================ +The ``numba.jit`` decorator has for a long time followed the behaviour of first +attempting to compile the decorated function in :term:`nopython mode` and should +this compilation fail it will `fall-back` and try again to compile but this time +in :term:`object mode`. It it this `fall-back` behaviour which is being +deprecated, the result of which will be that ``numba.jit`` will by default +compile in :term:`nopython mode` and :term:`object mode` compilation will +become `opt-in` only. + + +Reason for deprecation +---------------------- +The `fall-back` has repeatedly caused confusion for users as seemingly innocuous +changes in user code can lead to drastic performance changes as code which may +have once compiled in :term:`nopython mode` mode may silently switch to +compiling in :term:`object mode` e.g:: + + from numba import jit + + @jit + def foo(): + l = [] + for x in range(10): + l.append(x) + return l + + foo() + + assert foo.nopython_signatures # this was compiled in nopython mode + + @jit + def bar(): + l = [] + for x in range(10): + l.append(x) + return reversed(l) # innocuous change, but no reversed support in nopython mode + + bar() + + assert not bar.nopython_signatures # this was not compiled in nopython mode + +Another reason to remove the `fall-back` is that it is confusing for the +compiler engineers developing Numba as it causes internal state problems that +are really hard to debug and it makes manipulating the compiler pipelines +incredibly challenging. + +Further, it has long been considered best practice that the +:term:`nopython mode` keyword argument in the ``numba.jit`` decorator is set to +``True`` and that any user effort spent should go into making code work in this +mode as there's very little gain if it does not. The result is that, as Numba +has evolved, the amount of use :term:`object mode` gets in practice and its +general utility has decreased. It can be noted that there are some minor +improvements available through the notion of :term:`loop-lifting`, the cases of +this being used in practice are, however, rare and often a legacy from use of +less-recent Numba whereby such behaviour was better accommodated/the use of +``@jit`` with `fall-back` was recommended. + + +Example(s) of the impact +------------------------ +At present a warning of the upcoming change is issued if ``@jit`` decorated code +uses the `fall-back` compilation path. In future code such as:: + + @jit + def bar(): + l = [] + for x in range(10): + l.append(x) + return reversed(l) + + bar() + +will simply not compile, a ``TypingError`` would be raised. + +Schedule +-------- +This feature will be removed with respect to this schedule: + +* Deprecation warnings will be issued in version 0.44.0 +* Prominent notice will be given for a minimum of two releases prior to full + removal. + +Recommendations +--------------- +Projects that need/rely on the deprecated behaviour should pin their dependency +on Numba to a version prior to removal of this behaviour. Alternatively, to +accommodate the scheduled deprecations, users with code compiled at present with +``@jit`` can supply the ``nopython=True`` keyword argument, if the code +continues to compile then the code is already ready for this change. If the code +does not compile, continue using the ``@jit`` decorator without +``nopython=True`` and profile the performance of the function. Then remove the +decorator and again check the performance of the function. If there is no +benefit to having the ``@jit`` decorator present consider removing it! If there +is benefit to having the ``@jit`` decorator present, then to be future proof +supply the keyword argument ``forceobj=True`` to ensure the function is always +compiled in :term:`object mode`. + + +.. _deprecation-strict-strides: + + +Deprecation of eager compilation of CUDA device functions +========================================================= + +In future versions of Numba, the ``device`` kwarg to the ``@cuda.jit`` decorator +will be obviated, and whether a device function or global kernel is compiled will +be inferred from the context. With respect to kernel / device functions and lazy +/ eager compilation, four cases were handled: + +1. ``device=True``, eager compilation with a signature provided +2. ``device=False``, eager compilation with a signature provided +3. ``device=True``, lazy compilation with no signature +4. ``device=False``, lazy compilation with no signature + +The latter two cases can be differentiated without the ``device`` kwarg, because +it can be inferred from the calling context - if the call is from the host, then +a global kernel should be compiled, and if the call is from a kernel or another +device function, then a device function should be compiled. + +The first two cases cannot be differentiated in the absence of the ``device`` +kwarg - without it, it will not be clear from a signature alone whether a device +function or global kernel should be compiled. In order to resolve this, device +functions will no longer be eagerly compiled. When a signature is provided to a +device function, it will only be used to enforce the types of arguments that +the function accepts. + +.. note:: + + In previous releases this notice stated that support for providing + signatures to device functions would be removed completely - however, this + precludes the common use case of enforcing the types that can be passed to a + device function (and the automatic insertion of casts that it implies) so + this notice has been updated to retain support for passing signatures. + + +Schedule +-------- + +- In Numba 0.54: Eager compilation of device functions will be deprecated. +- In Numba 0.55: Eager compilation of device functions will be unsupported and + the provision of signatures for device functions will only enforce casting. + + +Deprecation and removal of ``numba.core.base.BaseContext.add_user_function()`` +============================================================================== + +``add_user_function()`` offered the same functionality as +``insert_user_function()``, only with a check that the function has already +been inserted at least once. It is now removed as it was no longer used +internally and it was expected that it was not used externally. + +Recommendations +--------------- + +Replace any uses of ``add_user_function()`` with ``insert_user_function()``. + +Schedule +-------- + +- In Numba 0.55: ``add_user_function()`` was deprecated. +- In Numba 0.56: ``add_user_function()`` was removed. + + +Deprecation and removal of CUDA Toolkits < 10.2 and devices with CC < 5.3 +========================================================================= + +- Support for CUDA toolkits less than 10.2 was deprecated and removed. +- Support for devices with Compute Capability < 5.3 is deprecated and will be + removed in the future. + + +Recommendations +--------------- + +- For devices of Compute Capability 3.0 and 3.2, Numba 0.55.1 or earlier will + be required. +- CUDA toolkit 10.2 or later (ideally 11.2 or later) should be installed. + +Schedule +-------- + +- In Numba 0.55.1: support for CC < 5.3 and CUDA toolkits < 10.2 was deprecated. +- In Numba 0.56: support for CC < 3.5 and CUDA toolkits < 10.2 was removed. +- In Numba 0.57: support for CC < 5.3 will be removed. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/envvars.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/envvars.rst new file mode 100644 index 000000000..ec28ed816 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/envvars.rst @@ -0,0 +1,589 @@ +.. _numba-envvars: + +Environment variables +===================== + +.. note:: This section relates to environment variables that impact Numba's + runtime, for compile time environment variables see + :ref:`numba-source-install-env_vars`. + +Numba allows its behaviour to be changed through the use of environment +variables. Unless otherwise mentioned, those variables have integer values and +default to zero. + +For convenience, Numba also supports the use of a configuration file to persist +configuration settings. Note: To use this feature ``pyyaml`` must be installed. + +The configuration file must be named ``.numba_config.yaml`` and be present in +the directory from which the Python interpreter is invoked. The configuration +file, if present, is read for configuration settings before the environment +variables are searched. This means that the environment variable settings will +override the settings obtained from a configuration file (the configuration file +is for setting permanent preferences whereas the environment variables are for +ephemeral preferences). + +The format of the configuration file is a dictionary in ``YAML`` format that +maps the environment variables below (without the ``NUMBA_`` prefix) to a +desired value. For example, to permanently switch on developer mode +(``NUMBA_DEVELOPER_MODE`` environment variable) and control flow graph printing +(``NUMBA_DUMP_CFG`` environment variable), create a configuration file with the +contents:: + + developer_mode: 1 + dump_cfg: 1 + +This can be especially useful in the case of wanting to use a set color scheme +based on terminal background color. For example, if the terminal background +color is black, the ``dark_bg`` color scheme would be well suited and can be set +for permanent use by adding:: + + color_scheme: dark_bg + +Jit flags +--------- + +These variables globally override flags to the :func:`~numba.jit` decorator. + +.. envvar:: NUMBA_BOUNDSCHECK + + If set to 0 or 1, globally disable or enable bounds checking, respectively. + The default if the variable is not set or set to an empty string is to use + the ``boundscheck`` flag passed to the :func:`~numba.jit` decorator for a + given function. See the documentation of :ref:`@jit + ` for more information. + + Note, due to limitations in numba, the bounds checking currently produces + exception messages that do not match those from NumPy. If you set + ``NUMBA_FULL_TRACEBACKS=1``, the full exception message with the axis, + index, and shape information will be printed to the terminal. + +Debugging +--------- + +These variables influence what is printed out during compilation of +:term:`JIT functions `. + +.. envvar:: NUMBA_DEVELOPER_MODE + + If set to non-zero, developer mode produces full tracebacks and disables + help instructions. Default is zero. + +.. envvar:: NUMBA_FULL_TRACEBACKS + + If set to non-zero, enable full tracebacks when an exception occurs. + Defaults to the value set by `NUMBA_DEVELOPER_MODE`. + +.. envvar:: NUMBA_SHOW_HELP + + If set to non-zero, show resources for getting help. Default is zero. + +.. envvar:: NUMBA_CAPTURED_ERRORS + + Alters the way in which Numba captures and handles exceptions that do not + inherit from ``numba.core.errors.NumbaError`` during compilation (e.g. + standard Python exceptions). This does not impact runtime exception + handling. Valid values are: + + - ``"old_style"`` (default): this is the exception handling behaviour that + is present in Numba versions <= 0.54.x. Numba will capture and wrap all + errors occurring in compilation and depending on the compilation phase they + will likely materialize as part of the message in a ``TypingError`` or a + ``LoweringError``. + - ``"new_style"`` this will treat any exception that does not inherit from + ``numba.core.errors.NumbaError`` **and** is raised during compilation as a + "hard error", i.e. the exception will propagate and compilation will halt. + The purpose of this new style is to differentiate between intentionally + raised exceptions and those which occur due to mistakes. For example, if + an ``AttributeError`` occurs in the typing of an ``@overload`` function, + under this new behaviour it is assumed that this a mistake in the + implementation and compilation will halt due to this exception. This + behaviour will eventually become the default. + +.. envvar:: NUMBA_DISABLE_ERROR_MESSAGE_HIGHLIGHTING + + If set to non-zero error message highlighting is disabled. This is useful + for running the test suite on CI systems. + +.. envvar:: NUMBA_COLOR_SCHEME + + Alters the color scheme used in error reporting (requires the ``colorama`` + package to be installed to work). Valid values are: + + - ``no_color`` No color added, just bold font weighting. + - ``dark_bg`` Suitable for terminals with a dark background. + - ``light_bg`` Suitable for terminals with a light background. + - ``blue_bg`` Suitable for terminals with a blue background. + - ``jupyter_nb`` Suitable for use in Jupyter Notebooks. + + *Default value:* ``no_color``. The type of the value is ``string``. + +.. envvar:: NUMBA_HIGHLIGHT_DUMPS + + If set to non-zero and ``pygments`` is installed, syntax highlighting is + applied to Numba IR, LLVM IR and assembly dumps. Default is zero. + +.. envvar:: NUMBA_DISABLE_PERFORMANCE_WARNINGS + + If set to non-zero the issuing of performance warnings is disabled. Default + is zero. + +.. envvar:: NUMBA_DEBUG + + If set to non-zero, print out all possible debugging information during + function compilation. Finer-grained control can be obtained using other + variables below. + +.. envvar:: NUMBA_DEBUG_FRONTEND + + If set to non-zero, print out debugging information during operation + of the compiler frontend, up to and including generation of the Numba + Intermediate Representation. + +.. envvar:: NUMBA_DEBUGINFO + + If set to non-zero, enable debug for the full application by setting + the default value of the ``debug`` option in ``jit``. Beware that + enabling debug info significantly increases the memory consumption + for each compiled function. + Default value equals to the value of `NUMBA_ENABLE_PROFILING`. + +.. envvar:: NUMBA_EXTEND_VARIABLE_LIFETIMES + + If set to non-zero, extend the lifetime of variables to the end of the block + in which their lifetime ends. This is particularly useful in conjunction + with :envvar:`NUMBA_DEBUGINFO` as it helps with introspection of values. + Default is zero. + +.. envvar:: NUMBA_GDB_BINARY + + Set the ``gdb`` binary for use in Numba's ``gdb`` support. This takes one of + two forms: 1) a path and full name of the binary to explicitly express + which binary to use 2) just the name of the binary and the current path will + be searched using the standard path resolution rules. For example: + ``/path/from/root/to/binary/name_of_gdb_binary`` or + ``custom_gdb_binary_name``. This is to permit the use of a ``gdb`` from a + non-default location with a non-default name. The default value is ``gdb``. + +.. envvar:: NUMBA_DEBUG_TYPEINFER + + If set to non-zero, print out debugging information about type inference. + +.. envvar:: NUMBA_ENABLE_PROFILING + + Enables JIT events of LLVM in order to support profiling of jitted functions. + This option is automatically enabled under certain profilers. + +.. envvar:: NUMBA_TRACE + + If set to non-zero, trace certain function calls (function entry and exit + events, including arguments and return values). + +.. envvar:: NUMBA_CHROME_TRACE + + If defined, chrome tracing is enabled and this variable specifies the filepath + of the chrome tracing json file output. The emitted file can be opened by + a Chromium-based browser using the profile viewer at `chrome://tracing/`. + + .. warning:: This feature is not supported in multi-process applications. + +.. envvar:: NUMBA_DUMP_BYTECODE + + If set to non-zero, print out the Python :py:term:`bytecode` of + compiled functions. + +.. envvar:: NUMBA_DUMP_CFG + + If set to non-zero, print out information about the Control Flow Graph + of compiled functions. + +.. envvar:: NUMBA_DUMP_IR + + If set to non-zero, print out the Numba Intermediate Representation + of compiled functions. + + +.. envvar:: NUMBA_DUMP_SSA + + If set to non-zero, print out the Numba Intermediate Representation of + compiled functions after conversion to Static Single Assignment (SSA) form. + +.. envvar:: NUMBA_DEBUG_PRINT_AFTER + + Dump the Numba IR after declared pass(es). This is useful for debugging IR + changes made by given passes. Accepted values are: + + * Any pass name (as given by the ``.name()`` method on the class) + * Multiple pass names as a comma separated list, i.e. ``"foo_pass,bar_pass"`` + * The token ``"all"``, which will print after all passes. + + The default value is ``"none"`` so as to prevent output. + +.. envvar:: NUMBA_DUMP_ANNOTATION + + If set to non-zero, print out types annotations for compiled functions. + +.. envvar:: NUMBA_DUMP_LLVM + + Dump the unoptimized LLVM assembly source of compiled functions. + Unoptimized code is usually very verbose; therefore, + :envvar:`NUMBA_DUMP_OPTIMIZED` is recommended instead. + +.. envvar:: NUMBA_DUMP_FUNC_OPT + + Dump the LLVM assembly source after the LLVM "function optimization" + pass, but before the "module optimization" pass. This is useful mostly + when developing Numba itself, otherwise use :envvar:`NUMBA_DUMP_OPTIMIZED`. + +.. envvar:: NUMBA_DUMP_OPTIMIZED + + Dump the LLVM assembly source of compiled functions after all + optimization passes. The output includes the raw function as well as + its CPython-compatible wrapper (whose name begins with ``wrapper.``). + Note that the function is often inlined inside the wrapper, as well. + +.. envvar:: NUMBA_DEBUG_ARRAY_OPT + + Dump debugging information related to the processing associated with + the ``parallel=True`` jit decorator option. + +.. envvar:: NUMBA_DEBUG_ARRAY_OPT_RUNTIME + + Dump debugging information related to the runtime scheduler associated + with the ``parallel=True`` jit decorator option. + +.. envvar:: NUMBA_DEBUG_ARRAY_OPT_STATS + + Dump statistics about how many operators/calls are converted to + parallel for-loops and how many are fused together, which are associated + with the ``parallel=True`` jit decorator option. + +.. envvar:: NUMBA_PARALLEL_DIAGNOSTICS + + If set to an integer value between 1 and 4 (inclusive) diagnostic information + about parallel transforms undertaken by Numba will be written to STDOUT. The + higher the value set the more detailed the information produced. + +.. envvar:: NUMBA_DUMP_ASSEMBLY + + Dump the native assembly code of compiled functions. + +.. envvar:: NUMBA_LLVM_PASS_TIMINGS + + Set to ``1`` to enable recording of pass timings in LLVM; + e.g. ``NUMBA_LLVM_PASS_TIMINGS=1``. + See :ref:`developer-llvm-timings`. + + *Default value*: ``0`` (Off) + +.. seealso:: + :ref:`numba-troubleshooting` and :ref:`architecture`. + + +Compilation options +------------------- + +.. envvar:: NUMBA_OPT + + The optimization level; this option is passed straight to LLVM. + + *Default value:* 3 + +.. envvar:: NUMBA_LOOP_VECTORIZE + + If set to non-zero, enable LLVM loop vectorization. + + *Default value:* 1 (except on 32-bit Windows) + +.. envvar:: NUMBA_SLP_VECTORIZE + + If set to non-zero, enable LLVM superword-level parallelism vectorization. + + *Default value:* 1 + +.. envvar:: NUMBA_ENABLE_AVX + + If set to non-zero, enable AVX optimizations in LLVM. This is disabled + by default on Sandy Bridge and Ivy Bridge architectures as it can sometimes + result in slower code on those platforms. + +.. envvar:: NUMBA_DISABLE_INTEL_SVML + + If set to non-zero and Intel SVML is available, the use of SVML will be + disabled. + +.. envvar:: NUMBA_DISABLE_JIT + + Disable JIT compilation entirely. The :func:`~numba.jit` decorator acts + as if it performs no operation, and the invocation of decorated functions + calls the original Python function instead of a compiled version. This + can be useful if you want to run the Python debugger over your code. + +.. envvar:: NUMBA_CPU_NAME +.. envvar:: NUMBA_CPU_FEATURES + + Override CPU and CPU features detection. + By setting ``NUMBA_CPU_NAME=generic``, a generic CPU model is picked + for the CPU architecture and the feature list (``NUMBA_CPU_FEATURES``) + defaults to empty. CPU features must be listed with the format + ``+feature1,-feature2`` where ``+`` indicates enable and ``-`` indicates + disable. For example, ``+sse,+sse2,-avx,-avx2`` enables SSE and SSE2, and + disables AVX and AVX2. + + These settings are passed to LLVM for configuring the compilation target. + To get a list of available options, use the ``llc`` commandline tool + from LLVM, for example:: + + llc -march=x86 -mattr=help + + + .. tip:: To force all caching functions (``@jit(cache=True)``) to emit + portable code (portable within the same architecture and OS), + simply set ``NUMBA_CPU_NAME=generic``. + +.. envvar:: NUMBA_FUNCTION_CACHE_SIZE + + Override the size of the function cache for retaining recently + deserialized functions in memory. In systems like + `Dask `_, it is common for functions to be deserialized + multiple times. Numba will cache functions as long as there is a + reference somewhere in the interpreter. This cache size variable controls + how many functions that are no longer referenced will also be retained, + just in case they show up in the future. The implementation of this is + not a true LRU, but the large size of the cache should be sufficient for + most situations. + + Note: this is unrelated to the compilation cache. + + *Default value:* 128 + +.. envvar:: NUMBA_LLVM_REFPRUNE_PASS + + Turns on the LLVM pass level reference-count pruning pass and disables the + regex based implementation in Numba. + + *Default value:* 1 (On) + +.. envvar:: NUMBA_LLVM_REFPRUNE_FLAGS + + When ``NUMBA_LLVM_REFPRUNE_PASS`` is on, this allows configuration + of subpasses in the reference-count pruning LLVM pass. + + Valid values are any combinations of the below separated by `,` + (case-insensitive): + + - ``all``: enable all subpasses. + - ``per_bb``: enable per-basic-block level pruning, which is same as the + old regex based implementation. + - ``diamond``: enable inter-basic-block pruning that is a diamond shape + pattern, i.e. a single-entry single-exit CFG subgraph where has an incref + in the entry and a corresponding decref in the exit. + - ``fanout``: enable inter-basic-block pruning that has a fanout pattern, + i.e. a single-entry multiple-exit CFG subgraph where the entry has an + incref and every exit has a corresponding decref. + - ``fanout_raise``: same as ``fanout`` but allow subgraph exit nodes to be + raising an exception and not have a corresponding decref. + + For example, ``all`` is the same as + ``per_bb, diamond, fanout, fanout_raise`` + + *Default value:* "all" + + +.. _numba-envvars-caching: + +Caching options +--------------- + +Options for the compilation cache. + +.. envvar:: NUMBA_DEBUG_CACHE + + If set to non-zero, print out information about operation of the + :ref:`JIT compilation cache `. + +.. envvar:: NUMBA_CACHE_DIR + + Override the location of the cache directory. If defined, this should be + a valid directory path. + + If not defined, Numba picks the cache directory in the following order: + + 1. In-tree cache. Put the cache next to the corresponding source file under + a ``__pycache__`` directory following how ``.pyc`` files are stored. + 2. User-wide cache. Put the cache in the user's application directory using + ``appdirs.user_cache_dir`` from the + `Appdirs package `_. + 3. IPython cache. Put the cache in an IPython specific application + directory. + Stores are made under the ``numba_cache`` in the directory returned by + ``IPython.paths.get_ipython_cache_dir()``. + + Also see :ref:`docs on cache sharing ` and + :ref:`docs on cache clearing ` + + +.. _numba-envvars-gpu-support: + +GPU support +----------- + +.. envvar:: NUMBA_DISABLE_CUDA + + If set to non-zero, disable CUDA support. + +.. envvar:: NUMBA_FORCE_CUDA_CC + + If set, force the CUDA compute capability to the given version (a + string of the type ``major.minor``), regardless of attached devices. + +.. envvar:: NUMBA_CUDA_DEFAULT_PTX_CC + + The default compute capability (a string of the type ``major.minor``) to + target when compiling to PTX using ``cuda.compile_ptx``. The default is + 5.2, which is the lowest non-deprecated compute capability in the most + recent version of the CUDA toolkit supported (10.2 at present). + +.. envvar:: NUMBA_ENABLE_CUDASIM + + If set, don't compile and execute code for the GPU, but use the CUDA + Simulator instead. For debugging purposes. + + +.. envvar:: NUMBA_CUDA_ARRAY_INTERFACE_SYNC + + Whether to synchronize on streams provided by objects imported using the CUDA + Array Interface. This defaults to 1. If set to 0, then no synchronization + takes place, and the user of Numba (and other CUDA libraries) is responsible + for ensuring correctness with respect to synchronization on streams. + +.. envvar:: NUMBA_CUDA_LOG_LEVEL + + For debugging purposes. If no other logging is configured, the value of this + variable is the logging level for CUDA API calls. The default value is + ``CRITICAL`` - to trace all API calls on standard error, set this to + ``DEBUG``. + +.. envvar:: NUMBA_CUDA_LOG_API_ARGS + + By default the CUDA API call logs only give the names of functions called. + Setting this variable to 1 also includes the values of arguments to Driver + API calls in the logs. + +.. envvar:: NUMBA_CUDA_DRIVER + + Path of the directory in which the CUDA driver libraries are to be found. + Normally this should not need to be set as Numba can locate the driver in + standard locations. However, this variable can be used if the driver is in a + non-standard location. + +.. envvar:: NUMBA_CUDA_LOG_SIZE + + Buffer size for logs produced by CUDA driver API operations. This defaults + to 1024 and should not normally need to be modified - however, if an error + in an API call produces a large amount of output that appears to be + truncated (perhaps due to multiple long function names, for example) then + this variable can be used to increase the buffer size and view the full + error message. + +.. envvar:: NUMBA_CUDA_VERBOSE_JIT_LOG + + Whether the CUDA driver should produce verbose log messages. Defaults to 1, + indicating that verbose messaging is enabled. This should not need to be + modified under normal circumstances. + +.. envvar:: NUMBA_CUDA_PER_THREAD_DEFAULT_STREAM + + When set to 1, the default stream is the per-thread default stream. When set + to 0, the default stream is the legacy default stream. This defaults to 0, + for the legacy default stream. See `Stream Synchronization Behavior + `_ + for an explanation of the legacy and per-thread default streams. + + This variable only takes effect when using Numba's internal CUDA bindings; + when using the NVIDIA bindings, use the environment variable + ``CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM`` instead. + + .. seealso:: + + The `Default Stream section + `_ + in the NVIDIA Bindings documentation. + +.. envvar:: NUMBA_CUDA_LOW_OCCUPANCY_WARNINGS + + Enable warnings if the grid size is too small relative to the number of + streaming multiprocessors (SM). This option is on by default (default value is 1). + + The heuristic checked is whether ``gridsize < 2 * (number of SMs)``. NOTE: The absence of + a warning does not imply a good gridsize relative to the number of SMs. Disabling + this warning will reduce the number of CUDA API calls (during JIT compilation), as the + heuristic needs to check the number of SMs available on the device in the + current context. + +.. envvar:: NUMBA_CUDA_WARN_ON_IMPLICIT_COPY + + Enable warnings if a kernel is launched with host memory which forces a copy to and + from the device. This option is on by default (default value is 1). + +.. envvar:: NUMBA_CUDA_USE_NVIDIA_BINDING + + When set to 1, Numba will attempt to use the `NVIDIA CUDA Python binding + `_ to make calls to the driver API + instead of using its own ctypes binding. This defaults to 0 (off), as the + NVIDIA binding is currently missing support for Per-Thread Default + Streams and the profiler APIs. + +.. envvar:: NUMBA_CUDA_INCLUDE_PATH + + The location of the CUDA include files. This is used when linking CUDA C/C++ + sources to Python kernels, and needs to be correctly set for CUDA includes to + be available to linked C/C++ sources. On Linux, it defaults to + ``/usr/local/cuda/include``. On Windows, the default is + ``$env:CUDA_PATH\include``. + + +Threading Control +----------------- + +.. envvar:: NUMBA_NUM_THREADS + + If set, the number of threads in the thread pool for the parallel CPU target + will take this value. Must be greater than zero. This value is independent + of ``OMP_NUM_THREADS`` and ``MKL_NUM_THREADS``. + + *Default value:* The number of CPU cores on the system as determined at run + time. This can be accessed via :obj:`numba.config.NUMBA_DEFAULT_NUM_THREADS`. + + See also the section on :ref:`setting_the_number_of_threads` for + information on how to set the number of threads at runtime. + +.. envvar:: NUMBA_THREADING_LAYER + + This environment variable controls the library used for concurrent execution + for the CPU parallel targets (``@vectorize(target='parallel')``, + ``@guvectorize(target='parallel')`` and ``@njit(parallel=True)``). The + variable type is string and by default is ``default`` which will select a + threading layer based on what is available in the runtime. The valid values + are (for more information about these see + :ref:`the threading layer documentation `): + + * ``default`` - select a threading layer based on what is available in the + current runtime. + * ``safe`` - select a threading layer that is both fork and thread safe + (requires the TBB package). + * ``forksafe`` - select a threading layer that is fork safe. + * ``threadsafe`` - select a threading layer that is thread safe. + * ``tbb`` - A threading layer backed by Intel TBB. + * ``omp`` - A threading layer backed by OpenMP. + * ``workqueue`` - A simple built-in work-sharing task scheduler. + +.. envvar:: NUMBA_THREADING_LAYER_PRIORITY + + This environment variable controls the order in which the libraries used for + concurrent execution, for the CPU parallel targets + (``@vectorize(target='parallel')``, ``@guvectorize(target='parallel')`` + and ``@njit(parallel=True)``), are prioritized for use. The variable type is + string and by default is ``tbb omp workqueue``, with the priority taken based + on position from the left of the string, left most being the highest. Valid + values are any permutation of the three choices (for more information about + these see :ref:`the threading layer documentation `.) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/fpsemantics.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/fpsemantics.rst new file mode 100644 index 000000000..7973d4e50 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/fpsemantics.rst @@ -0,0 +1,81 @@ + +Floating-point pitfalls +======================= + +Precision and accuracy +---------------------- + +For some operations, Numba may use a different algorithm than Python or +Numpy. The results may not be bit-by-bit compatible. The difference +should generally be small and within reasonable expectations. However, +small accumulated differences might produce large differences at the end, +especially if a divergent function is involved. + +Math library implementations +'''''''''''''''''''''''''''' + +Numba supports a variety of platforms and operating systems, each of which +has its own math library implementation (referred to as ``libm`` from here +in). The majority of math functions included in ``libm`` have specific +requirements as set out by the IEEE 754 standard (like ``sin()``, ``exp()`` +etc.), but each implementation may have bugs. Thus, on some platforms +Numba has to exercise special care in order to workaround known ``libm`` +issues. + +Another typical problem is when an operating system's ``libm`` function +set is incomplete and needs to be supplemented by additional functions. +These are provided with reference to the IEEE 754 and C99 standards +and are often implemented in Numba in a manner similar to equivalent +CPython functions. + +Linear algebra +'''''''''''''' + +Numpy forces some linear algebra operations to run in double-precision mode +even when a ``float32`` input is given. Numba will always observe +the input's precision, and invoke single-precision linear algebra routines +when all inputs are ``float32`` or ``complex64``. + +The implementations of the ``numpy.linalg`` routines in Numba only support the +floating point types that are used in the LAPACK functions that provide +the underlying core functionality. As a result only ``float32``, ``float64``, +``complex64`` and ``complex128`` types are supported. If a user has e.g. an +``int32`` type, an appropriate type conversion must be performed to a +floating point type prior to its use in these routines. The reason for this +decision is to essentially avoid having to replicate type conversion choices +made in Numpy and to also encourage the user to choose the optimal floating +point type for the operation they are undertaking. + + +Mixed-types operations +'''''''''''''''''''''' + +Numpy will most often return a ``float64`` as a result of a computation +with mixed integer and floating-point operands (a typical example is the +power operator ``**``). Numba by contrast will select the highest precision +amongst the floating-point operands, so for example ``float32 ** int32`` +will return a ``float32``, regardless of the input values. This makes +performance characteristics easier to predict, but you should explicitly +cast the input to ``float64`` if you need the extra precision. + + +.. _ufunc-fpu-errors: + +Warnings and errors +------------------- + +When calling a :term:`ufunc` created with :func:`~numba.vectorize`, +Numpy will determine whether an error occurred by examining the FPU +error word. It may then print out a warning or raise an exception +(such as ``RuntimeWarning: divide by zero encountered``), +depending on the current error handling settings. + +Depending on how LLVM optimized the ufunc's code, however, some spurious +warnings or errors may appear. If you get caught by this issue, we +recommend you call :func:`numpy.seterr` to change Numpy's error handling +settings, or the :class:`numpy.errstate` context manager to switch them +temporarily:: + + with np.errstate(all='ignore'): + x = my_ufunc(y) + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/index.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/index.rst new file mode 100644 index 000000000..e099d2a31 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/index.rst @@ -0,0 +1,16 @@ + +Reference Manual +================ + +.. toctree:: + + types.rst + jit-compilation.rst + aot-compilation.rst + utils.rst + envvars.rst + pysupported.rst + numpysupported.rst + pysemantics.rst + fpsemantics.rst + deprecation.rst diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/jit-compilation.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/jit-compilation.rst new file mode 100644 index 000000000..ac67a593b --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/jit-compilation.rst @@ -0,0 +1,572 @@ +Just-in-Time compilation +======================== + + +JIT functions +------------- + +.. _jit-decorator: + +.. decorator:: numba.jit(signature=None, nopython=False, nogil=False, cache=False, forceobj=False, parallel=False, error_model='python', fastmath=False, locals={}, boundscheck=False) + + Compile the decorated function on-the-fly to produce efficient machine + code. All parameters are optional. + + If present, the *signature* is either a single signature or a list of + signatures representing the expected :ref:`numba-types` of function + arguments and return values. Each signature can be given in several + forms: + + * A tuple of :ref:`numba-types` arguments (for example + ``(numba.int32, numba.double)``) representing the types of the + function's arguments; Numba will then infer an appropriate return + type from the arguments. + * A call signature using :ref:`numba-types`, specifying both return + type and argument types. This can be given in intuitive form + (for example ``numba.void(numba.int32, numba.double)``). + * A string representation of one of the above, for example + ``"void(int32, double)"``. All type names used in the string are assumed + to be defined in the ``numba.types`` module. + + *nopython* and *nogil* are boolean flags. *locals* is a mapping of + local variable names to :ref:`numba-types`. + + This decorator has several modes of operation: + + * If one or more signatures are given in *signature*, a specialization is + compiled for each of them. Calling the decorated function will then try + to choose the best matching signature, and raise a :class:`TypeError` if + no appropriate conversion is available for the function arguments. If + converting succeeds, the compiled machine code is executed with the + converted arguments and the return value is converted back according to + the signature. + + * If no *signature* is given, the decorated function implements + lazy compilation. Each call to the decorated function will try to + re-use an existing specialization if it exists (for example, a call + with two integer arguments may re-use a specialization for argument + types ``(numba.int64, numba.int64)``). If no suitable specialization + exists, a new specialization is compiled on-the-fly, stored for later + use, and executed with the converted arguments. + + If true, *nopython* forces the function to be compiled in :term:`nopython + mode`. If not possible, compilation will raise an error. + + If true, *forceobj* forces the function to be compiled in :term:`object + mode`. Since object mode is slower than nopython mode, this is mostly + useful for testing purposes. + + If true, *nogil* tries to release the :py:term:`global interpreter lock` + inside the compiled function. The GIL will only be released if Numba can + compile the function in :term:`nopython mode`, otherwise a compilation + warning will be printed. + + .. _jit-decorator-cache: + + If true, *cache* enables a file-based cache to shorten compilation times + when the function was already compiled in a previous invocation. + The cache is maintained in the ``__pycache__`` subdirectory of + the directory containing the source file; if the current user is not + allowed to write to it, though, it falls back to a platform-specific + user-wide cache directory (such as ``$HOME/.cache/numba`` on Unix + platforms). + + .. _jit-decorator-parallel: + + If true, *parallel* enables the automatic parallelization of a number of + common NumPy constructs as well as the fusion of adjacent parallel + operations to maximize cache locality. + + The *error_model* option controls the divide-by-zero behavior. + Setting it to 'python' causes divide-by-zero to raise exception like CPython. + Setting it to 'numpy' causes divide-by-zero to set the result to *+/-inf* or + *nan*. + + Not all functions can be cached, since some functionality cannot be + always persisted to disk. When a function cannot be cached, a + warning is emitted. + + .. _jit-decorator-fastmath: + + If true, *fastmath* enables the use of otherwise unsafe floating point + transforms as described in the + `LLVM documentation `_. + Further, if :ref:`Intel SVML ` is installed faster but less + accurate versions of some math intrinsics are used (answers to within + ``4 ULP``). + + .. _jit-decorator-boundscheck: + + If true, *boundscheck* enables bounds checking for array indices. Out of + bounds accesses will raise IndexError. The default is to not do bounds + checking. If bounds checking is disabled, out of bounds accesses can + produce garbage results or segfaults. However, enabling bounds checking + will slow down typical functions, so it is recommended to only use this + flag for debugging. You can also set the `NUMBA_BOUNDSCHECK` environment + variable to 0 or 1 to globally override this flag. + + The *locals* dictionary may be used to force the :ref:`numba-types` + of particular local variables, for example if you want to force the + use of single precision floats at some point. In general, we recommend + you let Numba's compiler infer the types of local variables by itself. + + Here is an example with two signatures:: + + @jit(["int32(int32)", "float32(float32)"], nopython=True) + def f(x): ... + + Not putting any parentheses after the decorator is equivalent to calling + the decorator without any arguments, i.e.:: + + @jit + def f(x): ... + + is equivalent to:: + + @jit() + def f(x): ... + + The decorator returns a :class:`Dispatcher` object. + + .. note:: + If no *signature* is given, compilation errors will be raised when + the actual compilation occurs, i.e. when the function is first called + with some given argument types. + + .. note:: + Compilation can be influenced by some dedicated :ref:`numba-envvars`. + + +Generated JIT functions +----------------------- + +.. decorator:: numba.generated_jit(nopython=False, nogil=False, cache=False, forceobj=False, locals={}) + + Like the :func:`~numba.jit` decorator, but calls the decorated function at + compile-time, passing the *types* of the function's arguments. + The decorated function must return a callable which will be compiled as + the function's implementation for those types, allowing flexible kinds of + specialization. + + The :func:`~numba.generated_jit` decorator returns a :class:`Dispatcher` object. + + +Dispatcher objects +------------------ + +.. class:: Dispatcher + + The class of objects created by calling :func:`~numba.jit` or + :func:`~numba.generated_jit`. You shouldn't try to create such an object + in any other way. Calling a Dispatcher object calls the compiled + specialization for the arguments with which it is called, letting it + act as an accelerated replacement for the Python function which was compiled. + + In addition, Dispatcher objects have the following methods and attributes: + + .. attribute:: py_func + + The pure Python function which was compiled. + + .. method:: inspect_types(file=None, pretty=False) + + Print out a listing of the function source code annotated line-by-line + with the corresponding Numba IR, and the inferred types of the various + variables. If *file* is specified, printing is done to that file + object, otherwise to sys.stdout. If *pretty* is set to True then colored + ANSI will be produced in a terminal and HTML in a notebook. + + .. seealso:: :ref:`architecture` + + .. method:: inspect_llvm(signature=None) + + Return a dictionary keying compiled function signatures to the human + readable LLVM IR generated for the function. If the signature + keyword is specified a string corresponding to that individual + signature is returned. + + .. method:: inspect_asm(signature=None) + + Return a dictionary keying compiled function signatures to the + human-readable native assembly code for the function. If the + signature keyword is specified a string corresponding to that + individual signature is returned. + + .. method:: inspect_cfg(signature=None, show_wrapped) + + Return a dictionary keying compiled function signatures to the + control-flow graph objects for the function. If the signature keyword is + specified a string corresponding to that individual signature is returned. + + The control-flow graph objects can be stringified (``str`` or ``repr``) + to get the textual representation of the graph in DOT format. Or, use + its ``.display(filename=None, view=False)`` method to plot the graph. + The *filename* option can be set to a specific path for the rendered + output to write to. If *view* option is True, the plot is opened by + the system default application for the image format (PDF). In IPython + notebook, the returned object can be plot inlined. + + Usage:: + + @jit + def foo(): + ... + + # opens the CFG in system default application + foo.inspect_cfg(foo.signatures[0]).display(view=True) + + + .. method:: inspect_disasm_cfg(signature=None) + + Return a dictionary keying compiled function signatures to the + control-flow graph of the disassembly of the underlying compiled ``ELF`` + object. If the signature keyword is specified a control-flow graph + corresponding to that individual signature is returned. This function is + execution environment aware and will produce SVG output in Jupyter + notebooks and ASCII in terminals. + + Example:: + + @njit + def foo(x): + if x < 3: + return x + 1 + return x + 2 + + foo(10) + + print(foo.inspect_disasm_cfg(signature=foo.signatures[0])) + + Gives:: + + [0x08000040]> # method.__main__.foo_241_long_long (int64_t arg1, int64_t arg3); + ─────────────────────────────────────────────────────────────────────┐ + │ 0x8000040 │ + │ ; arg3 ; [02] -r-x section size 279 named .text │ + │ ;-- section..text: │ + │ ;-- .text: │ + │ ;-- __main__::foo$241(long long): │ + │ ;-- rip: │ + │ 25: method.__main__.foo_241_long_long (int64_t arg1, int64_t arg3); │ + │ ; arg int64_t arg1 @ rdi │ + │ ; arg int64_t arg3 @ rdx │ + │ ; 2 │ + │ cmp rdx, 2 │ + │ jg 0x800004f │ + └─────────────────────────────────────────────────────────────────────┘ + f t + │ │ + │ └──────────────────────────────┐ + └──┐ │ + │ │ + ┌─────────────────────────┐ ┌─────────────────────────┐ + │ 0x8000046 │ │ 0x800004f │ + │ ; arg3 │ │ ; arg3 │ + │ inc rdx │ │ add rdx, 2 │ + │ ; arg3 │ │ ; arg3 │ + │ mov qword [rdi], rdx │ │ mov qword [rdi], rdx │ + │ xor eax, eax │ │ xor eax, eax │ + │ ret │ │ ret │ + └─────────────────────────┘ └─────────────────────────┘ + + .. method:: recompile() + + Recompile all existing signatures. This can be useful for example if + a global or closure variable was frozen by your function and its value + in Python has changed. Since compiling isn't cheap, this is mainly + for testing and interactive use. + + .. method:: parallel_diagnostics(signature=None, level=1) + + Print parallel diagnostic information for the given signature. If no + signature is present it is printed for all known signatures. ``level`` is + used to adjust the verbosity, ``level=1`` (default) is minimum verbosity, + levels 2, 3, and 4 provide increasing levels of verbosity. + + .. method:: get_metadata(signature=None) + + Obtain the compilation metadata for a given signature. This is useful for + developers of Numba and Numba extensions. + + +Vectorized functions (ufuncs and DUFuncs) +----------------------------------------- + +.. decorator:: numba.vectorize(*, signatures=[], identity=None, nopython=True, target='cpu', forceobj=False, cache=False, locals={}) + + Compile the decorated function and wrap it either as a `NumPy + ufunc`_ or a Numba :class:`~numba.DUFunc`. The optional + *nopython*, *forceobj* and *locals* arguments have the same meaning + as in :func:`numba.jit`. + + *signatures* is an optional list of signatures expressed in the + same form as in the :func:`numba.jit` *signature* argument. If + *signatures* is non-empty, then the decorator will compile the user + Python function into a NumPy ufunc. If no *signatures* are given, + then the decorator will wrap the user Python function in a + :class:`~numba.DUFunc` instance, which will compile the user + function at call time whenever NumPy can not find a matching loop + for the input arguments. *signatures* is required if *target* is + ``"parallel"``. + + *identity* is the identity (or unit) value of the function being + implemented. Possible values are 0, 1, None, and the string + ``"reorderable"``. The default is None. Both None and + ``"reorderable"`` mean the function has no identity value; + ``"reorderable"`` additionally specifies that reductions along multiple + axes can be reordered. + + If there are several *signatures*, they must be ordered from the more + specific to the least specific. Otherwise, NumPy's type-based + dispatching may not work as expected. For example, the following is + wrong:: + + @vectorize(["float64(float64)", "float32(float32)"]) + def f(x): ... + + as running it over a single-precision array will choose the ``float64`` + version of the compiled function, leading to much less efficient + execution. The correct invocation is:: + + @vectorize(["float32(float32)", "float64(float64)"]) + def f(x): ... + + *target* is a string for backend target; Available values are "cpu", + "parallel", and "cuda". To use a multithreaded version, change the + target to "parallel" (which requires signatures to be specified):: + + @vectorize(["float64(float64)", "float32(float32)"], target='parallel') + def f(x): ... + + For the CUDA target, use "cuda":: + + @vectorize(["float64(float64)", "float32(float32)"], target='cuda') + def f(x): ... + + The compiled function can be cached to reduce future compilation time. + It is enabled by setting *cache* to True. Only the "cpu" and "parallel" + targets support caching. + + +.. decorator:: numba.guvectorize(signatures, layout, *, identity=None, nopython=True, target='cpu', forceobj=False, cache=False, locals={}) + + Generalized version of :func:`numba.vectorize`. While + :func:`numba.vectorize` will produce a simple ufunc whose core + functionality (the function you are decorating) operates on scalar + operands and returns a scalar value, :func:`numba.guvectorize` + allows you to create a `NumPy ufunc`_ whose core function takes array + arguments of various dimensions. + + The additional argument *layout* is a string specifying, in symbolic + form, the dimensionality and size relationship of the argument types + and return types. For example, a matrix multiplication will have + a layout string of ``"(m,n),(n,p)->(m,p)"``. Its definition might + be (function body omitted):: + + @guvectorize(["void(float64[:,:], float64[:,:], float64[:,:])"], + "(m,n),(n,p)->(m,p)") + def f(a, b, result): + """Fill-in *result* matrix such as result := a * b""" + ... + + If one of the arguments should be a scalar, the corresponding layout + specification is ``()`` and the argument will really be given to + you as a zero-dimension array (you have to dereference it to get the + scalar value). For example, a :ref:`one-dimension moving average ` + with a parameterable window width may have a layout string of ``"(n),()->(n)"``. + + Note that any output will be given to you preallocated as an additional + function argument: your code has to fill it with the appropriate values + for the function you are implementing. + + If your function doesn't take an output array, you should omit the "arrow" + in the layout string (e.g. ``"(n),(n)"``). When doing this, it is important + to be aware that changes to the input arrays cannot always be relied on to be + visible outside the execution of the ufunc, as NumPy may pass in temporary + arrays as inputs (for example, if a cast is required). + + .. seealso:: + Specification of the `layout string `_ + as supported by NumPy. Note that NumPy uses the term "signature", + which we unfortunately use for something else. + + The compiled function can be cached to reduce future compilation time. + It is enabled by setting *cache* to True. Only the "cpu" and "parallel" + targets support caching. + +.. _NumPy ufunc: http://docs.scipy.org/doc/numpy/reference/ufuncs.html + +.. class:: numba.DUFunc + + The class of objects created by calling :func:`numba.vectorize` + with no signatures. + + DUFunc instances should behave similarly to NumPy + :class:`~numpy.ufunc` objects with one important difference: + call-time loop generation. When calling a ufunc, NumPy looks at + the existing loops registered for that ufunc, and will raise a + :class:`~python.TypeError` if it cannot find a loop that it cannot + safely cast the inputs to suit. When calling a DUFunc, Numba + delegates the call to NumPy. If the NumPy ufunc call fails, then + Numba attempts to build a new loop for the given input types, and + calls the ufunc again. If this second call attempt fails or a + compilation error occurs, then DUFunc passes along the exception to + the caller. + + .. seealso:: + + The ":ref:`dynamic-universal-functions`" section in the user's + guide demonstrates the call-time behavior of + :class:`~numba.DUFunc`, and discusses the impact of call order + on how Numba generates the underlying :class:`~numpy.ufunc`. + + .. attribute:: ufunc + + The actual NumPy :class:`~numpy.ufunc` object being built by the + :class:`~numba.DUFunc` instance. Note that the + :class:`~numba.DUFunc` object maintains several important data + structures required for proper ufunc functionality (specifically + the dynamically compiled loops). Users should not pass the + :class:`~numpy.ufunc` value around without ensuring the + underlying :class:`~numba.DUFunc` will not be garbage collected. + + .. attribute:: nin + + The number of DUFunc (ufunc) inputs. See `ufunc.nin`_. + + .. attribute:: nout + + The number of DUFunc outputs. See `ufunc.nout`_. + + .. attribute:: nargs + + The total number of possible DUFunc arguments (should be + :attr:`~numba.DUFunc.nin` + :attr:`~numba.DUFunc.nout`). + See `ufunc.nargs`_. + + .. attribute:: ntypes + + The number of input types supported by the DUFunc. See + `ufunc.ntypes`_. + + .. attribute:: types + + A list of the supported types given as strings. See + `ufunc.types`_. + + .. attribute:: identity + + The identity value when using the ufunc as a reduction. See + `ufunc.identity`_. + + .. method:: reduce(A, *, axis, dtype, out, keepdims) + + Reduces *A*\'s dimension by one by applying the DUFunc along one + axis. See `ufunc.reduce`_. + + .. method:: accumulate(A, *, axis, dtype, out) + + Accumulate the result of applying the operator to all elements. + See `ufunc.accumulate`_. + + .. method:: reduceat(A, indices, *, axis, dtype, out) + + Performs a (local) reduce with specified slices over a single + axis. See `ufunc.reduceat`_. + + .. method:: outer(A, B) + + Apply the ufunc to all pairs (*a*, *b*) with *a* in *A*, and *b* + in *B*. See `ufunc.outer`_. + + .. method:: at(A, indices, *, B) + + Performs unbuffered in place operation on operand *A* for + elements specified by *indices*. If you are using NumPy 1.7 or + earlier, this method will not be present. See `ufunc.at`_. + + +.. note:: + Vectorized functions can, in rare circumstances, show + :ref:`unexpected warnings or errors `. + + +.. _`ufunc.nin`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.nin.html#numpy.ufunc.nin + +.. _`ufunc.nout`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.nout.html#numpy.ufunc.nout + +.. _`ufunc.nargs`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.nargs.html#numpy.ufunc.nargs + +.. _`ufunc.ntypes`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.ntypes.html#numpy.ufunc.ntypes + +.. _`ufunc.types`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.types.html#numpy.ufunc.types + +.. _`ufunc.identity`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.identity.html#numpy.ufunc.identity + +.. _`ufunc.reduce`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.reduce.html#numpy.ufunc.reduce + +.. _`ufunc.accumulate`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.accumulate.html#numpy.ufunc.accumulate + +.. _`ufunc.reduceat`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.reduceat.html#numpy.ufunc.reduceat + +.. _`ufunc.outer`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.outer.html#numpy.ufunc.outer + +.. _`ufunc.at`: http://docs.scipy.org/doc/numpy/reference/generated/numpy.ufunc.at.html#numpy.ufunc.at + + +C callbacks +----------- + +.. decorator:: numba.cfunc(signature, nopython=False, cache=False, locals={}) + + Compile the decorated function on-the-fly to produce efficient machine + code. The compiled code is wrapped in a thin C callback that makes it + callable using the natural C ABI. + + The *signature* is a single signature representing the signature of the + C callback. It must have the same form as in :func:`~numba.jit`. + The decorator does not check that the types in the signature have + a well-defined representation in C. + + *nopython* and *cache* are boolean flags. *locals* is a mapping of + local variable names to :ref:`numba-types`. They all have the same + meaning as in :func:`~numba.jit`. + + The decorator returns a :class:`CFunc` object. + + .. note:: + C callbacks currently do not support :term:`object mode`. + + +.. class:: CFunc + + The class of objects created by :func:`~numba.cfunc`. :class:`CFunc` + objects expose the following attributes and methods: + + .. attribute:: address + + The address of the compiled C callback, as an integer. + + .. attribute:: cffi + + A `cffi`_ function pointer instance, to be passed as an argument to + `cffi`_-wrapped functions. The pointer's type is ``void *``, so + only minimal type checking will happen when passing it to `cffi`_. + + .. attribute:: ctypes + + A :mod:`ctypes` callback instance, as if it were created using + :func:`ctypes.CFUNCTYPE`. + + .. attribute:: native_name + + The name of the compiled C callback. + + .. method:: inspect_llvm() + + Return the human-readable LLVM IR generated for the C callback. + :attr:`native_name` is the name under which this callback is defined + in the IR. + + +.. _cffi: https://cffi.readthedocs.org/ diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/numpysupported.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/numpysupported.rst new file mode 100644 index 000000000..54060fd90 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/numpysupported.rst @@ -0,0 +1,925 @@ + +.. _numpy-support: + +======================== +Supported NumPy features +======================== + +One objective of Numba is having a seamless integration with `NumPy`_. +NumPy arrays provide an efficient storage method for homogeneous sets of +data. NumPy dtypes provide type information useful when compiling, and +the regular, structured storage of potentially large amounts of data +in memory provides an ideal memory layout for code generation. Numba +excels at generating code that executes on top of NumPy arrays. + +NumPy support in Numba comes in many forms: + +* Numba understands calls to NumPy `ufuncs`_ and is able to generate + equivalent native code for many of them. + +* NumPy arrays are directly supported in Numba. Access to NumPy arrays + is very efficient, as indexing is lowered to direct memory accesses + when possible. + +* Numba is able to generate `ufuncs`_ and `gufuncs`_. This means that it + is possible to implement ufuncs and gufuncs within Python, getting + speeds comparable to that of ufuncs/gufuncs implemented in C extension + modules using the NumPy C API. + +.. _NumPy: http://www.numpy.org/ +.. _ufuncs: http://docs.scipy.org/doc/numpy/reference/ufuncs.html +.. _gufuncs: http://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html + +The following sections focus on the NumPy features supported in +:term:`nopython mode`, unless otherwise stated. + + +Scalar types +============ + +Numba supports the following NumPy scalar types: + +* **Integers**: all integers of either signedness, and any width up to 64 bits +* **Booleans** +* **Real numbers:** single-precision (32-bit) and double-precision (64-bit) reals +* **Complex numbers:** single-precision (2x32-bit) and double-precision (2x64-bit) complex numbers +* **Datetimes and timestamps:** of any unit +* **Character sequences** (but no operations are available on them) +* **Structured scalars:** structured scalars made of any of the types above and arrays of the types above + +The following scalar types and features are not supported: + +* **Arbitrary Python objects** +* **Half-precision and extended-precision** real and complex numbers +* **Nested structured scalars** the fields of structured scalars may not contain other structured scalars + +The operations supported on NumPy scalars are almost the same as on the +equivalent built-in types such as ``int`` or ``float``. You can use a type's +constructor to convert from a different type or width. In addition you can use +the ``view(np.)`` method to bitcast all ``int`` and ``float`` types +within the same width. However, you must define the scalar using a NumPy +constructor within a jitted function. For example, the following will work: + +.. code:: pycon + + >>> import numpy as np + >>> from numba import njit + >>> @njit + ... def bitcast(): + ... i = np.int64(-1) + ... print(i.view(np.uint64)) + ... + >>> bitcast() + 18446744073709551615 + + +Whereas the following will not work: + + +.. code:: pycon + + >>> import numpy as np + >>> from numba import njit + >>> @njit + ... def bitcast(i): + ... print(i.view(np.uint64)) + ... + >>> bitcast(np.int64(-1)) + --------------------------------------------------------------------------- + TypingError Traceback (most recent call last) + ... + TypingError: Failed in nopython mode pipeline (step: ensure IR is legal prior to lowering) + 'view' can only be called on NumPy dtypes, try wrapping the variable with 'np.()' + + File "", line 3: + def bitcast(i): + print(i.view(np.uint64)) + +Structured scalars support attribute getting and setting, as well as +member lookup using constant strings. Strings stored in a local or global tuple +are considered constant strings and can be used for member lookup. + + + +.. literalinclude:: ../../../numba/tests/doc_examples/test_rec_array.py + :language: python + :start-after: magictoken.ex_rec_arr_const_index.begin + :end-before: magictoken.ex_rec_arr_const_index.end + :dedent: 8 + +It is also possible to use local or global tuples together with ``literal_unroll``: + +.. literalinclude:: ../../../numba/tests/doc_examples/test_rec_array.py + :language: python + :start-after: magictoken.ex_rec_arr_lit_unroll_index.begin + :end-before: magictoken.ex_rec_arr_lit_unroll_index.end + :dedent: 8 + + +Record subtyping +---------------- +.. warning:: + This is an experimental feature. + +Numba allows `width subtyping `_ of structured scalars. +For example, ``dtype([('a', 'f8'), ('b', 'i8')])`` will be considered a subtype of ``dtype([('a', 'f8')]``, because +the second is a strict subset of the first, i.e. field ``a`` is of the same type and is in the same position in both +types. The subtyping relationship will matter in cases where compilation for a certain input is not allowed, but the +input is a subtype of another, allowed type. + +.. code-block:: python + + import numpy as np + from numba import njit, typeof + from numba.core import types + record1 = np.array([1], dtype=[('a', 'f8')])[0] + record2 = np.array([(2,3)], dtype=[('a', 'f8'), ('b', 'f8')])[0] + + @njit(types.float64(typeof(record1))) + def foo(rec): + return rec['a'] + + foo(record1) + foo(record2) + +Without subtyping the last line would fail. With subtyping, no new compilation will be triggered, but the +compiled function for ``record1`` will be used for ``record2``. + +.. seealso:: + `NumPy scalars `_ + reference. + + +Array types +=========== + +`NumPy arrays `_ +of any of the scalar types above are supported, regardless of the shape +or layout. + +Array access +------------ + +Arrays support normal iteration. Full basic indexing and slicing is +supported. A subset of advanced indexing is also supported: only one +advanced index is allowed, and it has to be a one-dimensional array +(it can be combined with an arbitrary number of basic indices as well). + +.. seealso:: + `NumPy indexing `_ + reference. + + +.. _structured-array-access: + +Structured array access +----------------------- + +Numba presently supports accessing fields of individual elements in structured +arrays by attribute as well as by getting and setting. This goes slightly +beyond the NumPy API, which only allows accessing fields by getting and +setting. For example: + +.. code:: python + + from numba import njit + import numpy as np + + record_type = np.dtype([("ival", np.int32), ("fval", np.float64)], align=True) + + def f(rec): + value = 2.5 + rec[0].ival = int(value) + rec[0].fval = value + return rec + + arr = np.ones(1, dtype=record_type) + + cfunc = njit(f) + + # Works + print(cfunc(arr)) + + # Does not work + print(f(arr)) + +The above code results in the output: + +.. code:: none + + [(2, 2.5)] + Traceback (most recent call last): + File "repro.py", line 22, in + print(f(arr)) + File "repro.py", line 9, in f + rec[0].ival = int(value) + AttributeError: 'numpy.void' object has no attribute 'ival' + +The Numba-compiled version of the function executes, but the pure Python +version raises an error because of the unsupported use of attribute access. + +.. note:: + This behavior will eventually be deprecated and removed. + +Attributes +---------- + +The following attributes of NumPy arrays are supported: + +* :attr:`~numpy.ndarray.dtype` +* :attr:`~numpy.ndarray.flags` +* :attr:`~numpy.ndarray.flat` +* :attr:`~numpy.ndarray.itemsize` +* :attr:`~numpy.ndarray.ndim` +* :attr:`~numpy.ndarray.shape` +* :attr:`~numpy.ndarray.size` +* :attr:`~numpy.ndarray.strides` +* :attr:`~numpy.ndarray.T` +* :attr:`~numpy.ndarray.real` +* :attr:`~numpy.ndarray.imag` + +The ``flags`` object +'''''''''''''''''''' + +The object returned by the :attr:`~numpy.ndarray.flags` attribute supports +the ``contiguous``, ``c_contiguous`` and ``f_contiguous`` attributes. + +The ``flat`` object +''''''''''''''''''' + +The object returned by the :attr:`~numpy.ndarray.flat` attribute supports +iteration and indexing, but be careful: indexing is very slow on +non-C-contiguous arrays. + +The ``real`` and ``imag`` attributes +'''''''''''''''''''''''''''''''''''' + +NumPy supports these attributes regardless of the dtype but Numba chooses to +limit their support to avoid potential user error. For numeric dtypes, +Numba follows NumPy's behavior. The :attr:`~numpy.ndarray.real` attribute +returns a view of the real part of the complex array and it behaves as an identity +function for other numeric dtypes. The :attr:`~numpy.ndarray.imag` attribute +returns a view of the imaginary part of the complex array and it returns a zero +array with the same shape and dtype for other numeric dtypes. For non-numeric +dtypes, including all structured/record dtypes, using these attributes will +result in a compile-time (`TypingError`) error. This behavior differs from +NumPy's but it is chosen to avoid the potential confusion with field names that +overlap these attributes. + +Calculation +----------- + +The following methods of NumPy arrays are supported in their basic form +(without any optional arguments): + +* :meth:`~numpy.ndarray.all` +* :meth:`~numpy.ndarray.any` +* :meth:`~numpy.ndarray.clip` +* :meth:`~numpy.ndarray.conj` +* :meth:`~numpy.ndarray.conjugate` +* :meth:`~numpy.ndarray.cumprod` +* :meth:`~numpy.ndarray.cumsum` +* :meth:`~numpy.ndarray.max` +* :meth:`~numpy.ndarray.mean` +* :meth:`~numpy.ndarray.min` +* :meth:`~numpy.ndarray.nonzero` +* :meth:`~numpy.ndarray.prod` +* :meth:`~numpy.ndarray.std` +* :meth:`~numpy.ndarray.take` +* :meth:`~numpy.ndarray.var` + +The corresponding top-level NumPy functions (such as :func:`numpy.prod`) +are similarly supported. + +Other methods +------------- + +The following methods of NumPy arrays are supported: + +* :meth:`~numpy.ndarray.argmax` (``axis`` keyword argument supported). +* :meth:`~numpy.ndarray.argmin` (``axis`` keyword argument supported). +* :meth:`~numpy.ndarray.argsort` (``kind`` key word argument supported for + values ``'quicksort'`` and ``'mergesort'``) +* :meth:`~numpy.ndarray.astype` (only the 1-argument form) +* :meth:`~numpy.ndarray.copy` (without arguments) +* :meth:`~numpy.ndarray.dot` (only the 1-argument form) +* :meth:`~numpy.ndarray.flatten` (no order argument; 'C' order only) +* :meth:`~numpy.ndarray.item` (without arguments) +* :meth:`~numpy.ndarray.itemset` (only the 1-argument form) +* :meth:`~numpy.ndarray.ptp` (without arguments) +* :meth:`~numpy.ndarray.ravel` (no order argument; 'C' order only) +* :meth:`~numpy.ndarray.repeat` (no axis argument) +* :meth:`~numpy.ndarray.reshape` (only the 1-argument form) +* :meth:`~numpy.ndarray.sort` (without arguments) +* :meth:`~numpy.ndarray.sum` (with or without the ``axis`` and/or ``dtype`` + arguments.) + + * ``axis`` only supports ``integer`` values. + * If the ``axis`` argument is a compile-time constant, all valid values + are supported. + An out-of-range value will result in a ``LoweringError`` at compile-time. + * If the ``axis`` argument is not a compile-time constant, only values + from 0 to 3 are supported. + An out-of-range value will result in a runtime exception. + * All numeric ``dtypes`` are supported in the ``dtype`` parameter. + ``timedelta`` arrays can be used as input arrays but ``timedelta`` is not + supported as ``dtype`` parameter. + * When a ``dtype`` is given, it determines the type of the internal + accumulator. When it is not, the selection is made automatically based on + the input array's ``dtype``, mostly following the same rules as NumPy. + However, on 64-bit Windows, Numba uses a 64-bit accumulator for integer + inputs (``int64`` for ``int32`` inputs and ``uint64`` for ``uint32`` + inputs), while NumPy would use a 32-bit accumulator in those cases. + + +* :meth:`~numpy.ndarray.transpose` +* :meth:`~numpy.ndarray.view` (only the 1-argument form) +* :meth:`~numpy.ndarray.__contains__` + +Where applicable, the corresponding top-level NumPy functions (such as +:func:`numpy.argmax`) are similarly supported. + +.. warning:: + Sorting may be slightly slower than NumPy's implementation. + + +Functions +========= + +Linear algebra +-------------- + +Basic linear algebra is supported on 1-D and 2-D contiguous arrays of +floating-point and complex numbers: + +* :func:`numpy.dot` +* :func:`numpy.kron` ('C' and 'F' order only) +* :func:`numpy.outer` +* :func:`numpy.trace` (only the first argument). +* :func:`numpy.vdot` +* On Python 3.5 and above, the matrix multiplication operator from + :pep:`465` (i.e. ``a @ b`` where ``a`` and ``b`` are 1-D or 2-D arrays). +* :func:`numpy.linalg.cholesky` +* :func:`numpy.linalg.cond` (only non string values in ``p``). +* :func:`numpy.linalg.det` +* :func:`numpy.linalg.eig` (only running with data that does not cause a domain + change is supported e.g. real input -> real + output, complex input -> complex output). +* :func:`numpy.linalg.eigh` (only the first argument). +* :func:`numpy.linalg.eigvals` (only running with data that does not cause a + domain change is supported e.g. real input -> real output, + complex input -> complex output). +* :func:`numpy.linalg.eigvalsh` (only the first argument). +* :func:`numpy.linalg.inv` +* :func:`numpy.linalg.lstsq` +* :func:`numpy.linalg.matrix_power` +* :func:`numpy.linalg.matrix_rank` +* :func:`numpy.linalg.norm` (only the 2 first arguments and only non string + values in ``ord``). +* :func:`numpy.linalg.pinv` +* :func:`numpy.linalg.qr` (only the first argument). +* :func:`numpy.linalg.slogdet` +* :func:`numpy.linalg.solve` +* :func:`numpy.linalg.svd` (only the 2 first arguments). + +.. note:: + The implementation of these functions needs SciPy to be installed. + +Reductions +---------- + +The following reduction functions are supported: + +* :func:`numpy.diff` (only the 2 first arguments) +* :func:`numpy.median` (only the first argument) +* :func:`numpy.nancumprod` (only the first argument) +* :func:`numpy.nancumsum` (only the first argument) +* :func:`numpy.nanmax` (only the first argument) +* :func:`numpy.nanmean` (only the first argument) +* :func:`numpy.nanmedian` (only the first argument) +* :func:`numpy.nanmin` (only the first argument) +* :func:`numpy.nanpercentile` (only the 2 first arguments, complex dtypes + unsupported) +* :func:`numpy.nanquantile` (only the 2 first arguments, complex dtypes + unsupported) +* :func:`numpy.nanprod` (only the first argument) +* :func:`numpy.nanstd` (only the first argument) +* :func:`numpy.nansum` (only the first argument) +* :func:`numpy.nanvar` (only the first argument) +* :func:`numpy.percentile` (only the 2 first arguments, complex dtypes + unsupported) +* :func:`numpy.quantile` (only the 2 first arguments, complex dtypes + unsupported) + +Other functions +--------------- + +The following top-level functions are supported: + +* :func:`numpy.allclose` +* :func:`numpy.append` +* :func:`numpy.arange` +* :func:`numpy.argsort` (``kind`` key word argument supported for values + ``'quicksort'`` and ``'mergesort'``) +* :func:`numpy.argwhere` +* :func:`numpy.array` (only the 2 first arguments) +* :func:`numpy.array_equal` +* :func:`numpy.array_split` +* :func:`numpy.asarray` (only the 2 first arguments) +* :func:`numpy.asarray_chkfinite` (only the 2 first arguments) +* :func:`numpy.asfarray` +* :func:`numpy.asfortranarray` (only the first argument) +* :func:`numpy.atleast_1d` +* :func:`numpy.atleast_2d` +* :func:`numpy.atleast_3d` +* :func:`numpy.bartlett` +* :func:`numpy.bincount` +* :func:`numpy.blackman` +* :func:`numpy.broadcast_to` (only the 2 first arguments) +* :func:`numpy.broadcast_arrays` (only the first argument) +* :func:`numpy.broadcast_shapes` +* :func:`numpy.column_stack` +* :func:`numpy.concatenate` +* :func:`numpy.convolve` (only the 2 first arguments) +* :func:`numpy.copy` (only the first argument) +* :func:`numpy.corrcoef` (only the 3 first arguments, requires SciPy) +* :func:`numpy.correlate` (only the 2 first arguments) +* :func:`numpy.count_nonzero` (axis only supports scalar values) +* :func:`numpy.cov` (only the 5 first arguments) +* :func:`numpy.cross` (only the 2 first arguments; at least one of the input + arrays should have ``shape[-1] == 3``) + + * If ``shape[-1] == 2`` for both inputs, please replace your + :func:`numpy.cross` call with :func:`numba.np.extensions.cross2d`. + +* :func:`numpy.delete` (only the 2 first arguments) +* :func:`numpy.diag` +* :func:`numpy.digitize` +* :func:`numpy.dstack` +* :func:`numpy.dtype` (only the first argument) +* :func:`numpy.ediff1d` +* :func:`numpy.empty` (only the 2 first arguments) +* :func:`numpy.empty_like` (only the 2 first arguments) +* :func:`numpy.expand_dims` +* :func:`numpy.extract` +* :func:`numpy.eye` +* :func:`numpy.fill_diagonal` +* :func:`numpy.flatten` (no order argument; 'C' order only) +* :func:`numpy.flatnonzero` +* :func:`numpy.flip` (no axis argument) +* :func:`numpy.fliplr` +* :func:`numpy.flipud` +* :func:`numpy.frombuffer` (only the 2 first arguments) +* :func:`numpy.full` (only the 3 first arguments) +* :func:`numpy.full_like` (only the 3 first arguments) +* :func:`numpy.hamming` +* :func:`numpy.hanning` +* :func:`numpy.histogram` (only the 3 first arguments) +* :func:`numpy.hstack` +* :func:`numpy.identity` +* :func:`numpy.kaiser` +* :func:`numpy.iscomplex` +* :func:`numpy.iscomplexobj` +* :func:`numpy.isneginf` +* :func:`numpy.isposinf` +* :func:`numpy.isreal` +* :func:`numpy.isrealobj` +* :func:`numpy.isscalar` +* :func:`numpy.interp` (only the 3 first arguments) +* :func:`numpy.intersect1d` (only first 2 arguments, ar1 and ar2) +* :func:`numpy.linspace` (only the 3-argument form) +* :func:`numpy.logspace` (only the 3 first arguments) +* :class:`numpy.ndenumerate` +* :class:`numpy.ndindex` +* :class:`numpy.nditer` (only the first argument) +* :func:`numpy.ones` (only the 2 first arguments) +* :func:`numpy.ones_like` (only the 2 first arguments) +* :func:`numpy.partition` (only the 2 first arguments) +* :func:`numpy.ptp` (only the first argument) +* :func:`numpy.ravel` (no order argument; 'C' order only) +* :func:`numpy.repeat` (no axis argument) +* :func:`numpy.reshape` (no order argument; 'C' order only) +* :func:`numpy.roll` (only the 2 first arguments; second argument ``shift`` + must be an integer) +* :func:`numpy.roots` +* :func:`numpy.rot90` (only the 2 first arguments) +* :func:`numpy.round_` +* :func:`numpy.searchsorted` (only the 3 first arguments) +* :func:`numpy.select` (only using homogeneous lists or tuples for the first + two arguments, condlist and choicelist). Additionally, these two arguments + can only contain arrays (unlike NumPy that also accepts tuples). +* :func:`numpy.shape` +* :func:`numpy.sinc` +* :func:`numpy.sort` (no optional arguments, quicksort accepts + multi-dimensional array and sorts its last axis). +* :func:`numpy.split` +* :func:`numpy.stack` +* :func:`numpy.swapaxes` +* :func:`numpy.take` (only the 2 first arguments) +* :func:`numpy.take_along_axis` (the axis argument must be a literal value) +* :func:`numpy.transpose` +* :func:`numpy.trapz` (only the 3 first arguments) +* :func:`numpy.tri` (only the 3 first arguments; third argument ``k`` must be an integer) +* :func:`numpy.tril` (second argument ``k`` must be an integer) +* :func:`numpy.tril_indices` (all arguments must be integer) +* :func:`numpy.tril_indices_from` (second argument ``k`` must be an integer) +* :func:`numpy.triu` (second argument ``k`` must be an integer) +* :func:`numpy.triu_indices` (all arguments must be integer) +* :func:`numpy.triu_indices_from` (second argument ``k`` must be an integer) +* :func:`numpy.unique` (only the first argument) +* :func:`numpy.vander` +* :func:`numpy.vstack` +* :func:`numpy.where` +* :func:`numpy.zeros` (only the 2 first arguments) +* :func:`numpy.zeros_like` (only the 2 first arguments) + +The following constructors are supported, both with a numeric input (to +construct a scalar) or a sequence (to construct an array): + +* :class:`numpy.bool_` +* :class:`numpy.complex64` +* :class:`numpy.complex128` +* :class:`numpy.float32` +* :class:`numpy.float64` +* :class:`numpy.int8` +* :class:`numpy.int16` +* :class:`numpy.int32` +* :class:`numpy.int64` +* :class:`numpy.intc` +* :class:`numpy.intp` +* :class:`numpy.uint8` +* :class:`numpy.uint16` +* :class:`numpy.uint32` +* :class:`numpy.uint64` +* :class:`numpy.uintc` +* :class:`numpy.uintp` + +The following machine parameter classes are supported, with all purely numerical +attributes: + +* :class:`numpy.iinfo` +* :class:`numpy.finfo` (``machar`` attribute not supported) +* :class:`numpy.MachAr` (with no arguments to the constructor) + + +Literal arrays +-------------- + +.. XXX should this part of the user's guide? + +Neither Python nor Numba has actual array literals, but you can construct +arbitrary arrays by calling :func:`numpy.array` on a nested tuple:: + + a = numpy.array(((a, b, c), (d, e, f))) + +(nested lists are not yet supported by Numba) + + +Modules +======= + +.. _numpy-random: + +``random`` +---------- + +Generator Objects +''''''''''''''''' +Numba supports :py:class:`numpy.random.Generator()` objects. As of version 0.56, users can pass +individual NumPy :py:class:`Generator` objects into Numba functions and use their +methods inside the functions. The same algorithms are used as NumPy for +random number generation hence maintaining parity between the random +number generated using NumPy and Numba under identical arguments +(also the same documentation notes as NumPy :py:class:`Generator` methods apply). +The current Numba support for :py:class:`Generator` is not thread-safe, hence we +do not recommend using :py:class:`Generator` methods in methods with parallel +execution logic. + +.. note:: + NumPy's :py:class:`Generator` objects rely on :py:class:`BitGenerator` to manage state + and generate the random bits, which are then transformed into random + values from useful distributions. Numba will ``unbox`` the :py:class:`Generator` objects + and will maintain a reference to the underlying :py:class:`BitGenerator` objects using NumPy's + ``ctypes`` interface bindings. Hence :py:class:`Generator` objects can cross the JIT boundary + and their functions be used within Numba-Jit code. Note that since only references + to :py:class:`BitGenerator` objects are maintained, any change to the state of a particular + :py:class:`Generator` object outside Numba code would affect the state of :py:class:`Generator` + inside the Numba code. + +.. literalinclude:: ../../../numba/tests/doc_examples/test_numpy_generators.py + :language: python + :start-after: magictoken.npgen_usage.begin + :end-before: magictoken.npgen_usage.end + :dedent: 8 + +The following :py:class:`Generator` methods are supported: + +* :func:`numpy.random.Generator().random()` + +RandomState and legacy Random number generation +''''''''''''''''''''''''''''''''''''''''''''''' + +Numba supports top-level functions from the +`numpy.random `_ +module, but does not allow you to create individual RandomState instances. +The same algorithms are used as for :ref:`the standard +random module ` (and therefore the same notes apply), +but with an independent internal state: seeding or drawing numbers from +one generator won't affect the other. + +The following functions are supported. + +Initialization +'''''''''''''' + +* :func:`numpy.random.seed`: with an integer argument only + +.. warning:: + Calling :func:`numpy.random.seed` from interpreted code (including from :term:`object mode` + code) will seed the NumPy random generator, not the Numba random generator. + To seed the Numba random generator, see the example below. + +.. code-block:: python + + from numba import njit + import numpy as np + + @njit + def seed(a): + np.random.seed(a) + + @njit + def rand(): + return np.random.rand() + + + # Incorrect seeding + np.random.seed(1234) + print(rand()) + + np.random.seed(1234) + print(rand()) + + # Correct seeding + seed(1234) + print(rand()) + + seed(1234) + print(rand()) + + + + +Simple random data +'''''''''''''''''' + +* :func:`numpy.random.rand` +* :func:`numpy.random.randint` (only the first two arguments) +* :func:`numpy.random.randn` +* :func:`numpy.random.random` +* :func:`numpy.random.random_sample` +* :func:`numpy.random.ranf` +* :func:`numpy.random.sample` + +Permutations +'''''''''''' + +* :func:`numpy.random.choice`: the optional *p* argument (probabilities + array) is not supported +* :func:`numpy.random.permutation` +* :func:`numpy.random.shuffle`: the sequence argument must be a one-dimension + NumPy array or buffer-providing object (such as a :class:`bytearray` + or :class:`array.array`) + +Distributions +''''''''''''' + +The following functions support all arguments. + +* :func:`numpy.random.beta` +* :func:`numpy.random.binomial` +* :func:`numpy.random.chisquare` +* :func:`numpy.random.dirichlet` +* :func:`numpy.random.exponential` +* :func:`numpy.random.f` +* :func:`numpy.random.gamma` +* :func:`numpy.random.geometric` +* :func:`numpy.random.gumbel` +* :func:`numpy.random.hypergeometric` +* :func:`numpy.random.laplace` +* :func:`numpy.random.logistic` +* :func:`numpy.random.lognormal` +* :func:`numpy.random.logseries` +* :func:`numpy.random.multinomial` +* :func:`numpy.random.negative_binomial` +* :func:`numpy.random.noncentral_chisquare` +* :func:`numpy.random.normal` +* :func:`numpy.random.pareto` +* :func:`numpy.random.poisson` +* :func:`numpy.random.power` +* :func:`numpy.random.rayleigh` +* :func:`numpy.random.standard_cauchy` +* :func:`numpy.random.standard_exponential` +* :func:`numpy.random.standard_gamma` +* :func:`numpy.random.standard_normal` +* :func:`numpy.random.standard_t` +* :func:`numpy.random.triangular` +* :func:`numpy.random.uniform` +* :func:`numpy.random.vonmises` +* :func:`numpy.random.wald` +* :func:`numpy.random.weibull` +* :func:`numpy.random.zipf` + +.. note:: + Calling :func:`numpy.random.seed` from non-Numba code (or from + :term:`object mode` code) will seed the NumPy random generator, not the + Numba random generator. + +.. note:: + Since version 0.28.0, the generator is thread-safe and fork-safe. Each + thread and each process will produce independent streams of random numbers. + + +``stride_tricks`` +----------------- + +The following function from the :mod:`numpy.lib.stride_tricks` module +is supported: + +* :func:`~numpy.lib.stride_tricks.as_strided` (the *strides* argument + is mandatory, the *subok* argument is not supported) + +.. _supported_ufuncs: + +Standard ufuncs +=============== + +One objective of Numba is having all the +`standard ufuncs in NumPy `_ +understood by Numba. When a supported ufunc is found when compiling a +function, Numba maps the ufunc to equivalent native code. This allows the +use of those ufuncs in Numba code that gets compiled in :term:`nopython mode`. + +Limitations +----------- + +Right now, only a selection of the standard ufuncs work in :term:`nopython mode`. +Following is a list of the different standard ufuncs that Numba is aware of, +sorted in the same way as in the NumPy documentation. + + +Math operations +--------------- + +============== ============= =============== + UFUNC MODE +-------------- ------------------------------ + name object mode nopython mode +============== ============= =============== + add Yes Yes + subtract Yes Yes + multiply Yes Yes + divide Yes Yes + logaddexp Yes Yes + logaddexp2 Yes Yes + true_divide Yes Yes + floor_divide Yes Yes + negative Yes Yes + power Yes Yes + float_power Yes Yes + remainder Yes Yes + mod Yes Yes + fmod Yes Yes + divmod (*) Yes Yes + abs Yes Yes + absolute Yes Yes + fabs Yes Yes + rint Yes Yes + sign Yes Yes + conj Yes Yes + exp Yes Yes + exp2 Yes Yes + log Yes Yes + log2 Yes Yes + log10 Yes Yes + expm1 Yes Yes + log1p Yes Yes + sqrt Yes Yes + square Yes Yes + cbrt Yes Yes + reciprocal Yes Yes + conjugate Yes Yes + gcd Yes Yes + lcm Yes Yes +============== ============= =============== + +(\*) not supported on timedelta types + +Trigonometric functions +----------------------- + +============== ============= =============== + UFUNC MODE +-------------- ------------------------------ + name object mode nopython mode +============== ============= =============== + sin Yes Yes + cos Yes Yes + tan Yes Yes + arcsin Yes Yes + arccos Yes Yes + arctan Yes Yes + arctan2 Yes Yes + hypot Yes Yes + sinh Yes Yes + cosh Yes Yes + tanh Yes Yes + arcsinh Yes Yes + arccosh Yes Yes + arctanh Yes Yes + deg2rad Yes Yes + rad2deg Yes Yes + degrees Yes Yes + radians Yes Yes +============== ============= =============== + + +Bit-twiddling functions +----------------------- + +============== ============= =============== + UFUNC MODE +-------------- ------------------------------ + name object mode nopython mode +============== ============= =============== + bitwise_and Yes Yes + bitwise_or Yes Yes + bitwise_xor Yes Yes + bitwise_not Yes Yes + invert Yes Yes + left_shift Yes Yes + right_shift Yes Yes +============== ============= =============== + + +Comparison functions +-------------------- + +============== ============= =============== + UFUNC MODE +-------------- ------------------------------ + name object mode nopython mode +============== ============= =============== + greater Yes Yes + greater_equal Yes Yes + less Yes Yes + less_equal Yes Yes + not_equal Yes Yes + equal Yes Yes + logical_and Yes Yes + logical_or Yes Yes + logical_xor Yes Yes + logical_not Yes Yes + maximum Yes Yes + minimum Yes Yes + fmax Yes Yes + fmin Yes Yes +============== ============= =============== + + +Floating functions +------------------ + +============== ============= =============== + UFUNC MODE +-------------- ------------------------------ + name object mode nopython mode +============== ============= =============== + isfinite Yes Yes + isinf Yes Yes + isnan Yes Yes + signbit Yes Yes + copysign Yes Yes + nextafter Yes Yes + modf Yes No + ldexp Yes (*) Yes + frexp Yes No + floor Yes Yes + ceil Yes Yes + trunc Yes Yes + spacing Yes Yes +============== ============= =============== + +(\*) not supported on windows 32 bit + + +Datetime functions +------------------ + +============== ============= =============== + UFUNC MODE +-------------- ------------------------------ + name object mode nopython mode +============== ============= =============== + isnat Yes Yes +============== ============= =============== diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/pysemantics.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/pysemantics.rst new file mode 100644 index 000000000..296d76b08 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/pysemantics.rst @@ -0,0 +1,88 @@ +.. _pysemantics: + +Deviations from Python Semantics +================================ + +Bounds Checking +--------------- + +By default, instead of causing an :class:`IndexError`, accessing an +out-of-bound index of an array in a Numba-compiled function will return +invalid values or lead to an access violation error (it's reading from +invalid memory locations). Bounds checking can be enabled on a specific +function via the :ref:`boundscheck ` +option of the jit decorator. Additionally, the :envvar:`NUMBA_BOUNDSCHECK` +can be set to 0 or 1 to globally override this flag. + +.. note:: + Bounds checking will slow down typical functions so it is recommended to only + use this flag for debugging purposes. + +Exceptions and Memory Allocation +-------------------------------- + +Due to limitations in the current compiler when handling exceptions, memory +allocated (almost always NumPy arrays) within a function that raises an +exception will **leak**. This is a known issue that will be fixed, but in the +meantime, it is best to do memory allocation outside of functions that can +also raise exceptions. + +Integer width +------------- + +While Python has arbitrary-sized integers, integers in Numba-compiled +functions get a fixed size through :term:`type inference` (usually, +the size of a machine integer). This means that arithmetic +operations can wrapround or produce undefined results or overflow. + +Type inference can be overridden by an explicit type specification, +if fine-grained control of integer width is desired. + +.. seealso:: + :ref:`Enhancement proposal 1: Changes in integer typing ` + + +Boolean inversion +----------------- + +Calling the bitwise complement operator (the ``~`` operator) on a Python +boolean returns an integer, while the same operator on a Numpy boolean +returns another boolean:: + + >>> ~True + -2 + >>> ~np.bool_(True) + False + +Numba follows the Numpy semantics. + + +Global and closure variables +---------------------------- + +In :term:`nopython mode`, global and closure variables are *frozen* by +Numba: a Numba-compiled function sees the value of those variables at the +time the function was compiled. Also, it is not possible to change their +values from the function. + +Numba **may or may not** copy global variables referenced inside a compiled +function. Small global arrays are copied for potential compiler optimization +with immutability assumption. However, large global arrays are not copied to +conserve memory. The definition of "small" and "large" may change. + + +Zero initialization of variables +-------------------------------- + +Numba does not track variable liveness at runtime. For simplicity of +implementation, all variables are zero-initialized. Example:: + + from numba import njit + + @njit + def foo(): + for i in range(0): + pass + print(i) # will print 0 and not raise UnboundLocalError + + foo() diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/pysupported.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/pysupported.rst new file mode 100644 index 000000000..f0a6e45d0 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/pysupported.rst @@ -0,0 +1,1284 @@ +.. _pysupported: + +========================= +Supported Python features +========================= + +Apart from the :ref:`pysupported-language` part below, which applies to both +:term:`object mode` and :term:`nopython mode`, this page only lists the +features supported in :term:`nopython mode`. + +.. warning:: + Numba behavior differs from Python semantics in some situations. We + strongly advise reviewing :ref:`pysemantics` to become familiar with these + differences. + + +.. _pysupported-language: + +Language +======== + +Constructs +---------- + +Numba strives to support as much of the Python language as possible, but +some language features are not available inside Numba-compiled functions. +Below is a quick reference for the support level of Python constructs. + + +**Supported** constructs: + +- conditional branch: ``if .. elif .. else`` +- loops: ``while``, ``for .. in``, ``break``, ``continue`` +- basic generator: ``yield`` +- assertion: ``assert`` + +**Partially supported** constructs: + +- exceptions: ``try .. except``, ``raise``, ``else`` and ``finally`` + (See details in this :ref:`section `) + +- context manager: + ``with`` (only support :ref:`numba.objmode() `) + +- list comprehension (see details in this + :ref:`section `) + +**Unsupported** constructs: + +- async features: ``async with``, ``async for`` and ``async def`` +- class definition: ``class`` (except for :ref:`@jitclass `) +- set, dict and generator comprehensions +- generator delegation: ``yield from`` + +Functions +--------- + +Function calls +'''''''''''''' + +Numba supports function calls using positional and named arguments, as well +as arguments with default values and ``*args`` (note the argument for +``*args`` can only be a tuple, not a list). Explicit ``**kwargs`` are +not supported. + +Function calls to locally defined inner functions are supported as long as +they can be fully inlined. + +Functions as arguments +'''''''''''''''''''''' + +Functions can be passed as argument into another function. But, they cannot +be returned. For example: + +.. code-block:: python + + from numba import jit + + @jit + def add1(x): + return x + 1 + + @jit + def bar(fn, x): + return fn(x) + + @jit + def foo(x): + return bar(add1, x) + + # Passing add1 within numba compiled code. + print(foo(1)) + # Passing add1 into bar from interpreted code + print(bar(add1, 1)) + +.. note:: Numba does not handle function objects as real objects. Once a + function is assigned to a variable, the variable cannot be + re-assigned to a different function. + + +Inner function and closure +''''''''''''''''''''''''''' + +Numba now supports inner functions as long as they are non-recursive +and only called locally, but not passed as argument or returned as +result. The use of closure variables (variables defined in outer scopes) +within an inner function is also supported. + +Recursive calls +''''''''''''''' + +Most recursive call patterns are supported. The only restriction is that the +recursive callee must have a control-flow path that returns without recursing. +Numba is able to type-infer recursive functions without specifying the function +type signature (which is required in numba 0.28 and earlier). +Recursive calls can even call into a different overload of the function. + +.. XXX add reference to NBEP + +Generators +---------- + +Numba supports generator functions and is able to compile them in +:term:`object mode` and :term:`nopython mode`. The returned generator +can be used both from Numba-compiled code and from regular Python code. + +Coroutine features of generators are not supported (i.e. the +:meth:`generator.send`, :meth:`generator.throw`, :meth:`generator.close` +methods). + +.. _pysupported-exception-handling: + +Exception handling +------------------ + +``raise`` statement +''''''''''''''''''' + +The ``raise`` statement is only supported in the following forms: + +* ``raise SomeException`` +* ``raise SomeException()``: in :term:`nopython mode`, constructor + arguments must be :term:`compile-time constants ` + +It is currently unsupported to re-raise an exception created in compiled code. + +``try .. except`` +''''''''''''''''' + +The ``try .. except`` construct is partially supported. The following forms +of are supported: + +* the *bare* except that captures all exceptions: + + .. code-block:: python + + try: + ... + except: + ... + +* using exactly the ``Exception`` class in the ``except`` clause: + + .. code-block:: python + + try: + ... + except Exception: + ... + + This will match any exception that is a subclass of ``Exception`` as + expected. Currently, instances of ``Exception`` and it's subclasses are the + only kind of exception that can be raised in compiled code. + +.. warning:: Numba currently masks signals like ``KeyboardInterrupt`` and + ``SystemExit``. These signaling exceptions are ignored during the execution of + Numba compiled code. The Python interpreter will handle them as soon as + the control is returned to it. + +Currently, exception objects are not materialized inside compiled functions. +As a result, it is not possible to store an exception object into a user +variable or to re-raise an exception. With this limitation, the only realistic +use-case would look like: + +.. code-block:: python + + try: + do_work() + except Exception: + handle_error_case() + return error_code + +``try .. except .. else .. finally`` +'''''''''''''''''''''''''''''''''''' + +The ``else`` block and the ``finally`` block of a ``try .. except`` are +supported: + + .. code-block:: python + + >>> @jit(nopython=True) + ... def foo(): + ... try: + ... print('main block') + ... except Exception: + ... print('handler block') + ... else: + ... print('else block') + ... finally: + ... print('final block') + ... + >>> foo() + main block + else block + final block + +The ``try .. finally`` construct without the ``except`` clause is also +supported. + +.. _pysupported-builtin-types: + +Built-in types +============== + +int, bool +--------- + +Arithmetic operations as well as truth values are supported. + +The following attributes and methods are supported: + +* ``.conjugate()`` +* ``.real`` +* ``.imag`` + +float, complex +-------------- + +Arithmetic operations as well as truth values are supported. + +The following attributes and methods are supported: + +* ``.conjugate()`` +* ``.real`` +* ``.imag`` + +str +--- + +Numba supports (Unicode) strings in Python 3. Strings can be passed into +:term:`nopython mode` as arguments, as well as constructed and returned from +:term:`nopython mode`. As in Python, slices (even of length 1) return a new, +reference counted string. Optimized code paths for efficiently accessing +single characters may be introduced in the future. + +The in-memory representation is the same as was introduced in Python 3.4, with +each string having a tag to indicate whether the string is using a 1, 2, or 4 +byte character width in memory. When strings of different encodings are +combined (as in concatenation), the resulting string automatically uses the +larger character width of the two input strings. String slices also use the +same character width as the original string, even if the slice could be +represented with a narrower character width. (These details are invisible to +the user, of course.) + +The following constructors, functions, attributes and methods are currently +supported: + +* ``str(int)`` +* ``len()`` +* ``+`` (concatenation of strings) +* ``*`` (repetition of strings) +* ``in``, ``.contains()`` +* ``==``, ``<``, ``<=``, ``>``, ``>=`` (comparison) +* ``.capitalize()`` +* ``.casefold()`` +* ``.center()`` +* ``.count()`` +* ``.endswith()`` +* ``.endswith()`` +* ``.expandtabs()`` +* ``.find()`` +* ``.index()`` +* ``.isalnum()`` +* ``.isalpha()`` +* ``.isdecimal()`` +* ``.isdigit()`` +* ``.isidentifier()`` +* ``.islower()`` +* ``.isnumeric()`` +* ``.isprintable()`` +* ``.isspace()`` +* ``.istitle()`` +* ``.isupper()`` +* ``.join()`` +* ``.ljust()`` +* ``.lower()`` +* ``.lstrip()`` +* ``.partition()`` +* ``.replace()`` +* ``.rfind()`` +* ``.rindex()`` +* ``.rjust()`` +* ``.rpartition()`` +* ``.rsplit()`` +* ``.rstrip()`` +* ``.split()`` +* ``.splitlines()`` +* ``.startswith()`` +* ``.strip()`` +* ``.swapcase()`` +* ``.title()`` +* ``.upper()`` +* ``.zfill()`` + +Regular string literals (e.g. ``"ABC"``) as well as f-strings without format specs +(e.g. ``"ABC_{a+1}"``) +that only use string and integer variables (types with ``str()`` overload) +are supported in :term:`nopython mode`. + +Additional operations as well as support for Python 2 strings / Python 3 bytes +will be added in a future version of Numba. Python 2 Unicode objects will +likely never be supported. + +.. warning:: + The performance of some operations is known to be slower than the CPython + implementation. These include substring search (``in``, ``.contains()`` + and ``find()``) and string creation (like ``.split()``). Improving the + string performance is an ongoing task, but the speed of CPython is + unlikely to be surpassed for basic string operation in isolation. + Numba is most successfully used for larger algorithms that happen to + involve strings, where basic string operations are not the bottleneck. + + +tuple +----- + +Tuple support is categorised into two categories based on the contents of a +tuple. The first category is homogeneous tuples, these are tuples where the type +of all the values in the tuple are the same, the second is heterogeneous tuples, +these are tuples where the types of the values are different. + +.. note:: + + The ``tuple()`` constructor itself is NOT supported. + +homogeneous tuples +------------------ + +An example of a homogeneous tuple: + +.. code-block:: python + + homogeneous_tuple = (1, 2, 3, 4) + +The following operations are supported on homogeneous tuples: + +* Tuple construction. +* Tuple unpacking. +* Comparison between tuples. +* Iteration and indexing. +* Addition (concatenation) between tuples. +* Slicing tuples with a constant slice. +* The index method on tuples. + +heterogeneous tuples +-------------------- + +An example of a heterogeneous tuple: + +.. code-block:: python + + heterogeneous_tuple = (1, 2j, 3.0, "a") + +The following operations are supported on heterogeneous tuples: + +* Comparison between tuples. +* Indexing using an index value that is a compile time constant + e.g. ``mytuple[7]``, where ``7`` is evidently a constant. +* Iteration over a tuple (requires experimental :func:`literal_unroll` feature, + see below). + +.. warning:: + The following feature (:func:`literal_unroll`) is experimental and was added + in version 0.47. + +To permit iteration over a heterogeneous tuple the special function +:func:`numba.literal_unroll` must be used. This function has no effect other +than to act as a token to permit the use of this feature. Example use: + +.. code-block:: python + + from numba import njit, literal_unroll + + @njit + def foo(): + heterogeneous_tuple = (1, 2j, 3.0, "a") + for i in literal_unroll(heterogeneous_tuple): + print(i) + +.. warning:: + The following restrictions apply to the use of :func:`literal_unroll`: + + * :func:`literal_unroll` can only be used on tuples and constant lists of + compile time constants, e.g. ``[1, 2j, 3, "a"]`` and the list not being + mutated. + * The only supported use pattern for :func:`literal_unroll` is loop + iteration. + * Only one :func:`literal_unroll` call is permitted per loop nest (i.e. + nested heterogeneous tuple iteration loops are forbidden). + * The usual type inference/stability rules still apply. + +A more involved use of :func:`literal_unroll` might be type specific dispatch, +recall that string and integer literal values are considered their own type, +for example: + +.. code-block:: python + + from numba import njit, types, literal_unroll + from numba.extending import overload + + def dt(x): + # dummy function to overload + pass + + @overload(dt, inline='always') + def ol_dt(li): + if isinstance(li, types.StringLiteral): + value = li.literal_value + if value == "apple": + def impl(li): + return 1 + elif value == "orange": + def impl(li): + return 2 + elif value == "banana": + def impl(li): + return 3 + return impl + elif isinstance(li, types.IntegerLiteral): + value = li.literal_value + if value == 0xca11ab1e: + def impl(li): + # capture the dispatcher literal value + return 0x5ca1ab1e + value + return impl + + @njit + def foo(): + acc = 0 + for t in literal_unroll(('apple', 'orange', 'banana', 3390155550)): + acc += dt(t) + return acc + + print(foo()) + + +list +---- + + +.. warning:: + As of version 0.45.x the internal implementation for the list datatype in + Numba is changing. Until recently, only a single implementation of the list + datatype was available, the so-called *reflected-list* (see below). + However, it was scheduled for deprecation from version 0.44.0 onwards due + to its limitations. As of version 0.45.0 a new implementation, the + so-called *typed-list* (see below), is available as an experimental + feature. For more information, please see: :ref:`deprecation`. + +Creating and returning lists from JIT-compiled functions is supported, +as well as all methods and operations. Lists must be strictly homogeneous: +Numba will reject any list containing objects of different types, even if +the types are compatible (for example, ``[1, 2.5]`` is rejected as it +contains a :class:`int` and a :class:`float`). + +For example, to create a list of arrays:: + + In [1]: from numba import njit + + In [2]: import numpy as np + + In [3]: @njit + ...: def foo(x): + ...: lst = [] + ...: for i in range(x): + ...: lst.append(np.arange(i)) + ...: return lst + ...: + + In [4]: foo(4) + Out[4]: [array([], dtype=int64), array([0]), array([0, 1]), array([0, 1, 2])] + + +.. _feature-reflected-list: + +List Reflection +''''''''''''''' + +In nopython mode, Numba does not operate on Python objects. ``list`` are +compiled into an internal representation. Any ``list`` arguments must be +converted into this representation on the way in to nopython mode and their +contained elements must be restored in the original Python objects via a +process called :term:`reflection`. Reflection is required to maintain the same +semantics as found in regular Python code. However, the reflection process +can be expensive for large lists and it is not supported for lists that contain +reflected data types. Users cannot use list-of-list as an argument because +of this limitation. + +.. note:: + When passing a list into a JIT-compiled function, any modifications + made to the list will not be visible to the Python interpreter until + the function returns. (A limitation of the reflection process.) + +.. warning:: + List sorting currently uses a quicksort algorithm, which has different + performance characterics than the algorithm used by Python. + +.. _feature-list-initial-value: + +Initial Values +'''''''''''''' +.. warning:: + This is an experimental feature! + +Lists that: + +* Are constructed using the square braces syntax +* Have values of a literal type + +will have their initial value stored in the ``.initial_value`` property on the +type so as to permit inspection of these values at compile time. If required, +to force value based dispatch the :ref:`literally ` +function will accept such a list. + +Example: + +.. literalinclude:: ../../../numba/tests/doc_examples/test_literal_container_usage.py + :language: python + :caption: from ``test_ex_initial_value_list_compile_time_consts`` of ``numba/tests/doc_examples/test_literal_container_usage.py`` + :start-after: magictoken.test_ex_initial_value_list_compile_time_consts.begin + :end-before: magictoken.test_ex_initial_value_list_compile_time_consts.end + :dedent: 12 + :linenos: + +.. _feature-typed-list: + +Typed List +'''''''''' + +.. note:: + ``numba.typed.List`` is an experimental feature, if you encounter any bugs in + functionality or suffer from unexpectedly bad performance, please report + this, ideally by opening an issue on the Numba issue tracker. + +As of version 0.45.0 a new implementation of the list data type is available, +the so-called *typed-list*. This is compiled library backed, type-homogeneous +list data type that is an improvement over the *reflected-list* mentioned +above. Additionally, lists can now be arbitrarily nested. Since the +implementation is considered experimental, you will need to import it +explicitly from the `numba.typed` module:: + + In [1]: from numba.typed import List + + In [2]: from numba import njit + + In [3]: @njit + ...: def foo(l): + ...: l.append(23) + ...: return l + ...: + + In [4]: mylist = List() + + In [5]: mylist.append(1) + + In [6]: foo(mylist) + Out[6]: ListType[int64]([1, 23]) + + +.. note:: + As the typed-list stabilizes it will fully replace the reflected-list and the + constructors `[]` and `list()` will create a typed-list instead of a + reflected one. + + +Here's an example using ``List()`` to create ``numba.typed.List`` inside a +jit-compiled function and letting the compiler infer the item type: + +.. literalinclude:: ../../../numba/tests/doc_examples/test_typed_list_usage.py + :language: python + :caption: from ``ex_inferred_list_jit`` of ``numba/tests/doc_examples/test_typed_list_usage.py`` + :start-after: magictoken.ex_inferred_list_jit.begin + :end-before: magictoken.ex_inferred_list_jit.end + :dedent: 12 + :linenos: + +Here's an example of using ``List()`` to create a ``numba.typed.List`` outside of +a jit-compiled function and then using it as an argument to a jit-compiled +function: + +.. literalinclude:: ../../../numba/tests/doc_examples/test_typed_list_usage.py + :language: python + :caption: from ``ex_inferred_list`` of ``numba/tests/doc_examples/test_typed_list_usage.py`` + :start-after: magictoken.ex_inferred_list.begin + :end-before: magictoken.ex_inferred_list.end + :dedent: 12 + :linenos: + +Finally, here's an example of using a nested `List()`: + +.. literalinclude:: ../../../numba/tests/doc_examples/test_typed_list_usage.py + :language: python + :caption: from ``ex_nested_list`` of ``numba/tests/doc_examples/test_typed_list_usage.py`` + :start-after: magictoken.ex_nested_list.begin + :end-before: magictoken.ex_nested_list.end + :dedent: 12 + :linenos: + +.. _feature-literal-list: + +Literal List +'''''''''''' + +.. warning:: + This is an experimental feature! + +Numba supports the use of literal lists containing any values, for example:: + + l = ['a', 1, 2j, np.zeros(5,)] + +the predominant use of these lists is for use as a configuration object. +The lists appear as a ``LiteralList`` type which inherits from ``Literal``, as a +result the literal values of the list items are available at compile time. +For example: + +.. literalinclude:: ../../../numba/tests/doc_examples/test_literal_container_usage.py + :language: python + :caption: from ``test_ex_literal_list`` of ``numba/tests/doc_examples/test_literal_container_usage.py`` + :start-after: magictoken.test_ex_literal_list.begin + :end-before: magictoken.test_ex_literal_list.end + :dedent: 12 + :linenos: + +Important things to note about these kinds of lists: + +#. They are immutable, use of mutating methods e.g. ``.pop()`` will result in + compilation failure. Read-only static access and read only methods are + supported e.g. ``len()``. +#. Dynamic access of items is not possible, e.g. ``some_list[x]``, for a + value ``x`` which is not a compile time constant. This is because it's + impossible to statically determine the type of the item being accessed. +#. Inside the compiler, these lists are actually just tuples with some extra + things added to make them look like they are lists. +#. They cannot be returned to the interpreter from a compiled function. + +.. _pysupported-comprehension: + +List comprehension +'''''''''''''''''' + +Numba supports list comprehension. For example:: + + + In [1]: from numba import njit + + In [2]: @njit + ...: def foo(x): + ...: return [[i for i in range(n)] for n in range(x)] + ...: + + In [3]: foo(3) + Out[3]: [[], [0], [0, 1]] + + +.. note:: + Prior to version 0.39.0, Numba did not support the creation of nested lists. + + +Numba also supports "array comprehension" that is a list comprehension +followed immediately by a call to :func:`numpy.array`. The following +is an example that produces a 2D Numpy array:: + + from numba import jit + import numpy as np + + @jit(nopython=True) + def f(n): + return np.array([ [ x * y for x in range(n) ] for y in range(n) ]) + +In this case, Numba is able to optimize the program to allocate and +initialize the result array directly without allocating intermediate +list objects. Therefore, the nesting of list comprehension here is +not a problem since a multi-dimensional array is being created here +instead of a nested list. + +Additionally, Numba supports parallel array comprehension when combined +with the :ref:`parallel_jit_option` option on CPUs. + +set +--- + +All methods and operations on sets are supported in JIT-compiled functions. + +Sets must be strictly homogeneous: Numba will reject any set containing +objects of different types, even if the types are compatible (for example, +``{1, 2.5}`` is rejected as it contains a :class:`int` and a :class:`float`). +The use of reference counted types, e.g. strings, in sets is unsupported. + +.. note:: + When passing a set into a JIT-compiled function, any modifications + made to the set will not be visible to the Python interpreter until + the function returns. + +.. _feature-typed-dict: + +Typed Dict +---------- + +.. warning:: + ``numba.typed.Dict`` is an experimental feature. The API may change + in the future releases. + +.. note:: + ``dict()`` was not supported in versions prior to 0.44. Currently, calling + ``dict()`` translates to calling ``numba.typed.Dict()``. + +Numba only supports the use of ``dict()`` without any arguments. Such use is +semantically equivalent to ``{}`` and ``numba.typed.Dict()``. It will create +an instance of ``numba.typed.Dict`` where the key-value types will be later +inferred by usage. + +Numba does not fully support the Python ``dict`` because it is an untyped +container that can have any Python types as members. To generate efficient +machine code, Numba needs the keys and the values of the dictionary to have +fixed types, declared in advance. To achieve this, Numba has a typed dictionary, +``numba.typed.Dict``, for which the type-inference mechanism must be able to +infer the key-value types by use, or the user must explicitly declare the +key-value type using the ``Dict.empty()`` constructor method. +This typed dictionary has the same API as the Python ``dict``, it implements +the ``collections.MutableMapping`` interface and is usable in both interpreted +Python code and JIT-compiled Numba functions. +Because the typed dictionary stores keys and values in Numba's native, +unboxed data layout, passing a Numba dictionary into nopython mode has very low +overhead. However, this means that using a typed dictionary from the Python +interpreter is slower than a regular dictionary because Numba has to box and +unbox key and value objects when getting or setting items. + +An important difference of the typed dictionary in comparison to Python's +``dict`` is that **implicit casting** occurs when a key or value is stored. +As a result the *setitem* operation may fail should the type-casting fail. + +It should be noted that the Numba typed dictionary is implemented using the same +algorithm as the CPython 3.7 dictionary. As a consequence, the typed dictionary +is ordered and has the same collision resolution as the CPython implementation. + +Further to the above in relation to type specification, there are limitations +placed on the types that can be used as keys and/or values in the typed +dictionary, most notably the Numba ``Set`` and ``List`` types are currently +unsupported. Acceptable key/value types include but are not limited to: unicode +strings, arrays (value only), scalars, tuples. It is expected that these +limitations will be relaxed as Numba continues to improve. + +Here's an example of using ``dict()`` and ``{}`` to create ``numba.typed.Dict`` +instances and letting the compiler infer the key-value types: + +.. literalinclude:: ../../../numba/tests/doc_examples/test_typed_dict_usage.py + :language: python + :caption: from ``test_ex_inferred_dict_njit`` of ``numba/tests/doc_examples/test_typed_dict_usage.py`` + :start-after: magictoken.ex_inferred_dict_njit.begin + :end-before: magictoken.ex_inferred_dict_njit.end + :dedent: 12 + :linenos: + +Here's an example of creating a ``numba.typed.Dict`` instance from interpreted +code and using the dictionary in jit code: + +.. literalinclude:: ../../../numba/tests/doc_examples/test_typed_dict_usage.py + :language: python + :caption: from ``test_ex_typed_dict_from_cpython`` of ``numba/tests/doc_examples/test_typed_dict_usage.py`` + :start-after: magictoken.ex_typed_dict_from_cpython.begin + :end-before: magictoken.ex_typed_dict_from_cpython.end + :dedent: 12 + :linenos: + +Here's an example of creating a ``numba.typed.Dict`` instance from jit code and +using the dictionary in interpreted code: + +.. literalinclude:: ../../../numba/tests/doc_examples/test_typed_dict_usage.py + :language: python + :caption: from ``test_ex_typed_dict_njit`` of ``numba/tests/doc_examples/test_typed_dict_usage.py`` + :start-after: magictoken.ex_typed_dict_njit.begin + :end-before: magictoken.ex_typed_dict_njit.end + :dedent: 12 + :linenos: + +It should be noted that ``numba.typed.Dict`` is not thread-safe. +Specifically, functions which modify a dictionary from multiple +threads will potentially corrupt memory, causing a +range of possible failures. However, the dictionary can be safely read from +multiple threads as long as the contents of the dictionary do not +change during the parallel access. + +Dictionary comprehension +'''''''''''''''''''''''' + +Numba supports dictionary comprehension under the assumption that a +``numba.typed.Dict`` instance can be created from the comprehension. For +example:: + + In [1]: from numba import njit + + In [2]: @njit + ...: def foo(n): + ...: return {i: i**2 for i in range(n)} + ...: + + In [3]: foo(3) + Out[3]: DictType[int64,int64]({0: 0, 1: 1, 2: 4}) + +.. _feature-dict-initial-value: + +Initial Values +'''''''''''''' +.. warning:: + This is an experimental feature! + +Typed dictionaries that: + +* Are constructed using the curly braces syntax +* Have literal string keys +* Have values of a literal type + +will have their initial value stored in the ``.initial_value`` property on the +type so as to permit inspection of these values at compile time. If required, +to force value based dispatch the :ref:`literally ` +function will accept a typed dictionary. + +Example: + +.. literalinclude:: ../../../numba/tests/doc_examples/test_literal_container_usage.py + :language: python + :caption: from ``test_ex_initial_value_dict_compile_time_consts`` of ``numba/tests/doc_examples/test_literal_container_usage.py`` + :start-after: magictoken.test_ex_initial_value_dict_compile_time_consts.begin + :end-before: magictoken.test_ex_initial_value_dict_compile_time_consts.end + :dedent: 12 + :linenos: + +.. _feature-literal-str-key-dict: + +Heterogeneous Literal String Key Dictionary +------------------------------------------- + +.. warning:: + This is an experimental feature! + +Numba supports the use of statically declared string key to any value +dictionaries, for example:: + + d = {'a': 1, 'b': 'data', 'c': 2j} + +the predominant use of these dictionaries is to orchestrate advanced compilation +dispatch or as a container for use as a configuration object. The dictionaries +appear as a ``LiteralStrKeyDict`` type which inherits from ``Literal``, as a +result the literal values of the keys and the types of the items are available +at compile time. For example: + +.. literalinclude:: ../../../numba/tests/doc_examples/test_literal_container_usage.py + :language: python + :caption: from ``test_ex_literal_dict_compile_time_consts`` of ``numba/tests/doc_examples/test_literal_container_usage.py`` + :start-after: magictoken.test_ex_literal_dict_compile_time_consts.begin + :end-before: magictoken.test_ex_literal_dict_compile_time_consts.end + :dedent: 12 + :linenos: + +Important things to note about these kinds of dictionaries: + +#. They are immutable, use of mutating methods e.g. ``.pop()`` will result in + compilation failure. Read-only static access and read only methods are + supported e.g. ``len()``. +#. Dynamic access of items is not possible, e.g. ``some_dictionary[x]``, for a + value ``x`` which is not a compile time constant. This is because it's + impossible statically determine the type of the item being accessed. +#. Inside the compiler, these dictionaries are actually just named tuples with + some extra things added to make them look like they are dictionaries. +#. They cannot be returned to the interpreter from a compiled function. +#. The ``.keys()``, ``.values()`` and ``.items()`` methods all functionally + operate but return tuples opposed to iterables. + +None +---- + +The None value is supported for identity testing (when using an +:class:`~numba.optional` type). + + +bytes, bytearray, memoryview +---------------------------- + +The :class:`bytearray` type and, on Python 3, the :class:`bytes` type +support indexing, iteration and retrieving the len(). + +The :class:`memoryview` type supports indexing, slicing, iteration, +retrieving the len(), and also the following attributes: + +* :attr:`~memoryview.contiguous` +* :attr:`~memoryview.c_contiguous` +* :attr:`~memoryview.f_contiguous` +* :attr:`~memoryview.itemsize` +* :attr:`~memoryview.nbytes` +* :attr:`~memoryview.ndim` +* :attr:`~memoryview.readonly` +* :attr:`~memoryview.shape` +* :attr:`~memoryview.strides` + + +Built-in functions +================== + +The following built-in functions are supported: + +.. warning:: + Support for ``isinstance`` is an experimental feature. This feature is + automatically enabled by simply using ``isinstance`` in JIT compiled code. + +* :func:`abs` +* :class:`bool` +* :func:`chr` +* :class:`complex` +* :func:`divmod` +* :func:`enumerate` +* :func:`filter` +* :class:`float` +* :func:`hash` (see :ref:`pysupported-hashing` below) +* :class:`int`: only the one-argument form +* :func:`iter`: only the one-argument form +* :func:`isinstance` (experimental support only) +* :func:`len` +* :func:`min` +* :func:`map` +* :func:`max` +* :func:`next`: only the one-argument form +* :func:`ord` +* :func:`print`: only numbers and strings; no ``file`` or ``sep`` argument +* :class:`range`: The only permitted use of range is as a callable function + (cannot pass range as an argument to a jitted function or return a range from + a jitted function). +* :func:`round` +* :func:`sorted`: the ``key`` argument is not supported +* :func:`sum` +* :func:`type`: only the one-argument form, and only on some types + (e.g. numbers and named tuples) +* :func:`zip` + +.. _pysupported-hashing: + +Hashing +------- + +The :func:`hash` built-in is supported and produces hash values for all +supported hashable types with the following Python version specific behavior: + +Under Python 3, hash values computed by Numba will exactly match those computed +in CPython under the condition that the :attr:`sys.hash_info.algorithm` is +``siphash24`` (default). + +The ``PYTHONHASHSEED`` environment variable influences the hashing behavior in +precisely the manner described in the CPython documentation. + + +Standard library modules +======================== + +``array`` +--------- + +Limited support for the :class:`array.array` type is provided through +the buffer protocol. Indexing, iteration and taking the len() is supported. +All type codes are supported except for ``"u"``. + +``cmath`` +--------- + +The following functions from the :mod:`cmath` module are supported: + +* :func:`cmath.acos` +* :func:`cmath.acosh` +* :func:`cmath.asin` +* :func:`cmath.asinh` +* :func:`cmath.atan` +* :func:`cmath.atanh` +* :func:`cmath.cos` +* :func:`cmath.cosh` +* :func:`cmath.exp` +* :func:`cmath.isfinite` +* :func:`cmath.isinf` +* :func:`cmath.isnan` +* :func:`cmath.log` +* :func:`cmath.log10` +* :func:`cmath.phase` +* :func:`cmath.polar` +* :func:`cmath.rect` +* :func:`cmath.sin` +* :func:`cmath.sinh` +* :func:`cmath.sqrt` +* :func:`cmath.tan` +* :func:`cmath.tanh` + +``collections`` +--------------- + +Named tuple classes, as returned by :func:`collections.namedtuple`, are +supported in the same way regular tuples are supported. Attribute access +and named parameters in the constructor are also supported. + +Creating a named tuple class inside Numba code is *not* supported; the class +must be created at the global level. + +.. _ctypes-support: + +``ctypes`` +---------- + +Numba is able to call ctypes-declared functions with the following argument +and return types: + +* :class:`ctypes.c_int8` +* :class:`ctypes.c_int16` +* :class:`ctypes.c_int32` +* :class:`ctypes.c_int64` +* :class:`ctypes.c_uint8` +* :class:`ctypes.c_uint16` +* :class:`ctypes.c_uint32` +* :class:`ctypes.c_uint64` +* :class:`ctypes.c_float` +* :class:`ctypes.c_double` +* :class:`ctypes.c_void_p` + +``enum`` +-------- + +Both :class:`enum.Enum` and :class:`enum.IntEnum` subclasses are supported. + +``math`` +-------- + +The following functions from the :mod:`math` module are supported: + +* :func:`math.acos` +* :func:`math.acosh` +* :func:`math.asin` +* :func:`math.asinh` +* :func:`math.atan` +* :func:`math.atan2` +* :func:`math.atanh` +* :func:`math.ceil` +* :func:`math.copysign` +* :func:`math.cos` +* :func:`math.cosh` +* :func:`math.degrees` +* :func:`math.erf` +* :func:`math.erfc` +* :func:`math.exp` +* :func:`math.expm1` +* :func:`math.fabs` +* :func:`math.floor` +* :func:`math.frexp` +* :func:`math.gamma` +* :func:`math.gcd` +* :func:`math.hypot` +* :func:`math.isfinite` +* :func:`math.isinf` +* :func:`math.isnan` +* :func:`math.ldexp` +* :func:`math.lgamma` +* :func:`math.log` +* :func:`math.log10` +* :func:`math.log1p` +* :func:`math.pow` +* :func:`math.radians` +* :func:`math.sin` +* :func:`math.sinh` +* :func:`math.sqrt` +* :func:`math.tan` +* :func:`math.tanh` +* :func:`math.trunc` + +``operator`` +------------ + +The following functions from the :mod:`operator` module are supported: + +* :func:`operator.add` +* :func:`operator.and_` +* :func:`operator.eq` +* :func:`operator.floordiv` +* :func:`operator.ge` +* :func:`operator.gt` +* :func:`operator.iadd` +* :func:`operator.iand` +* :func:`operator.ifloordiv` +* :func:`operator.ilshift` +* :func:`operator.imatmul` (Python 3.5 and above) +* :func:`operator.imod` +* :func:`operator.imul` +* :func:`operator.invert` +* :func:`operator.ior` +* :func:`operator.ipow` +* :func:`operator.irshift` +* :func:`operator.isub` +* :func:`operator.itruediv` +* :func:`operator.ixor` +* :func:`operator.le` +* :func:`operator.lshift` +* :func:`operator.lt` +* :func:`operator.matmul` (Python 3.5 and above) +* :func:`operator.mod` +* :func:`operator.mul` +* :func:`operator.ne` +* :func:`operator.neg` +* :func:`operator.not_` +* :func:`operator.or_` +* :func:`operator.pos` +* :func:`operator.pow` +* :func:`operator.rshift` +* :func:`operator.sub` +* :func:`operator.truediv` +* :func:`operator.xor` + +``functools`` +------------- + +The :func:`functools.reduce` function is supported but the `initializer` +argument is required. + +.. _pysupported-random: + +``random`` +---------- + +Numba supports top-level functions from the :mod:`random` module, but does +not allow you to create individual Random instances. A Mersenne-Twister +generator is used, with a dedicated internal state. It is initialized at +startup with entropy drawn from the operating system. + +* :func:`random.betavariate` +* :func:`random.expovariate` +* :func:`random.gammavariate` +* :func:`random.gauss` +* :func:`random.getrandbits`: number of bits must not be greater than 64 +* :func:`random.lognormvariate` +* :func:`random.normalvariate` +* :func:`random.paretovariate` +* :func:`random.randint` +* :func:`random.random` +* :func:`random.randrange` +* :func:`random.seed`: with an integer argument only +* :func:`random.shuffle`: the sequence argument must be a one-dimension + Numpy array or buffer-providing object (such as a :class:`bytearray` + or :class:`array.array`); the second (optional) argument is not supported +* :func:`random.uniform` +* :func:`random.triangular` +* :func:`random.vonmisesvariate` +* :func:`random.weibullvariate` + +.. warning:: + Calling :func:`random.seed` from non-Numba code (or from :term:`object mode` + code) will seed the Python random generator, not the Numba random generator. + To seed the Numba random generator, see the example below. + +.. code-block:: python + + from numba import njit + import random + + @njit + def seed(a): + random.seed(a) + + @njit + def rand(): + return random.random() + + + # Incorrect seeding + random.seed(1234) + print(rand()) + + random.seed(1234) + print(rand()) + + # Correct seeding + seed(1234) + print(rand()) + + seed(1234) + print(rand()) + + +.. note:: + Since version 0.28.0, the generator is thread-safe and fork-safe. Each + thread and each process will produce independent streams of random numbers. + +.. seealso:: + Numba also supports most additional distributions from the :ref:`Numpy + random module `. + +``heapq`` +--------- + +The following functions from the :mod:`heapq` module are supported: + +* :func:`heapq.heapify` +* :func:`heapq.heappop` +* :func:`heapq.heappush` +* :func:`heapq.heappushpop` +* :func:`heapq.heapreplace` +* :func:`heapq.nlargest` : first two arguments only +* :func:`heapq.nsmallest` : first two arguments only + +Note: the heap must be seeded with at least one value to allow its type to be +inferred; heap items are assumed to be homogeneous in type. + + +Third-party modules +=================== + +.. I put this here as there's only one module (apart from Numpy), otherwise + it should be a separate page. + +.. _cffi-support: + +``cffi`` +-------- + +Similarly to ctypes, Numba is able to call into `cffi`_-declared external +functions, using the following C types and any derived pointer types: + +* :c:type:`char` +* :c:type:`short` +* :c:type:`int` +* :c:type:`long` +* :c:type:`long long` +* :c:type:`unsigned char` +* :c:type:`unsigned short` +* :c:type:`unsigned int` +* :c:type:`unsigned long` +* :c:type:`unsigned long long` +* :c:type:`int8_t` +* :c:type:`uint8_t` +* :c:type:`int16_t` +* :c:type:`uint16_t` +* :c:type:`int32_t` +* :c:type:`uint32_t` +* :c:type:`int64_t` +* :c:type:`uint64_t` +* :c:type:`float` +* :c:type:`double` +* :c:type:`ssize_t` +* :c:type:`size_t` +* :c:type:`void` + +The ``from_buffer()`` method of ``cffi.FFI`` and ``CompiledFFI`` objects is +supported for passing Numpy arrays and other buffer-like objects. Only +*contiguous* arguments are accepted. The argument to ``from_buffer()`` +is converted to a raw pointer of the appropriate C type (for example a +``double *`` for a ``float64`` array). + +Additional type mappings for the conversion from a buffer to the appropriate C +type may be registered with Numba. This may include struct types, though it is +only permitted to call functions that accept pointers to structs - passing a +struct by value is unsupported. For registering a mapping, use: + +.. function:: numba.core.typing.cffi_utils.register_type(cffi_type, numba_type) + +Out-of-line cffi modules must be registered with Numba prior to the use of any +of their functions from within Numba-compiled functions: + +.. function:: numba.core.typing.cffi_utils.register_module(mod) + + Register the cffi out-of-line module ``mod`` with Numba. + +Inline cffi modules require no registration. + +.. _cffi: https://cffi.readthedocs.org/ diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/types.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/types.rst new file mode 100644 index 000000000..75ff34320 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/types.rst @@ -0,0 +1,375 @@ +.. _numba-types: + +==================== +Types and signatures +==================== + +Rationale +========= + +As an optimizing compiler, Numba needs to decide on the type of each +variable to generate efficient machine code. Python's standard types +are not precise enough for that, so we had to develop our own fine-grained +type system. + +You will encounter Numba types mainly when trying to inspect the results +of Numba's type inference, for :ref:`debugging ` or +:ref:`educational ` purposes. However, you need to use +types explicitly if compiling code :ref:`ahead-of-time `. + + +Signatures +========== + +A signature specifies the type of a function. Exactly which kind +of signature is allowed depends on the context (:term:`AOT` or :term:`JIT` +compilation), but signatures always involve some representation of Numba +types to specify the concrete types for the function's arguments and, +if required, the function's return type. + +An example function signature would be the string ``"f8(i4, i4)"`` +(or the equivalent ``"float64(int32, int32)"``) which specifies a +function taking two 32-bit integers and returning a double-precision float. + + +Basic types +=========== + +The most basic types can be expressed through simple expressions. The +symbols below refer to attributes of the main ``numba`` module (so if +you read "boolean", it means that symbol can be accessed as ``numba.boolean``). +Many types are available both as a canonical name and a shorthand alias, +following NumPy's conventions. + +Numbers +------- + +The following table contains the elementary numeric types currently defined +by Numba and their aliases. + +=================== ========= =================================== +Type name(s) Shorthand Comments +=================== ========= =================================== +boolean b1 represented as a byte +uint8, byte u1 8-bit unsigned byte +uint16 u2 16-bit unsigned integer +uint32 u4 32-bit unsigned integer +uint64 u8 64-bit unsigned integer + +int8, char i1 8-bit signed byte +int16 i2 16-bit signed integer +int32 i4 32-bit signed integer +int64 i8 64-bit signed integer + +intc -- C int-sized integer +uintc -- C int-sized unsigned integer +intp -- pointer-sized integer +uintp -- pointer-sized unsigned integer +ssize_t -- C ssize_t +size_t -- C size_t + +float32 f4 single-precision floating-point number +float64, double f8 double-precision floating-point number + +complex64 c8 single-precision complex number +complex128 c16 double-precision complex number +=================== ========= =================================== + +Arrays +------ + +The easy way to declare :class:`~numba.types.Array` types is to subscript an +elementary type according to the number of dimensions. For example a +1-dimension single-precision array:: + + >>> numba.float32[:] + array(float32, 1d, A) + +or a 3-dimension array of the same underlying type:: + + >>> numba.float32[:, :, :] + array(float32, 3d, A) + +This syntax defines array types with no particular layout (producing code +that accepts both non-contiguous and contiguous arrays), but you can +specify a particular contiguity by using the ``::1`` index either at +the beginning or the end of the index specification:: + + >>> numba.float32[::1] + array(float32, 1d, C) + >>> numba.float32[:, :, ::1] + array(float32, 3d, C) + >>> numba.float32[::1, :, :] + array(float32, 3d, F) + +Functions +--------- + +.. warning:: + The feature of considering functions as first-class type objects is + under development. + +Functions are often considered as certain transformations of +input arguments to output values. Within Numba :term:`JIT` compiled +functions, the functions can also be considered as objects, that is, +functions can be passed around as arguments or return values, or used +as items in sequences, in addition to being callable. + +First-class function support is enabled for all Numba :term:`JIT` +compiled functions and Numba ``cfunc`` compiled functions except when: + +- using a non-CPU compiler, +- the compiled function is a Python generator, +- the compiled function has Omitted arguments, +- or the compiled function returns Optional value. + +To disable first-class function support, use ``no_cfunc_wrapper=True`` +decorator option. + +For instance, consider an example where the Numba :term:`JIT` compiled +function applies user-specified functions as a composition to an input +argument:: + + >>> @numba.njit + ... def composition(funcs, x): + ... r = x + ... for f in funcs[::-1]: + ... r = f(r) + ... return r + ... + >>> @numba.cfunc("double(double)") + ... def a(x): + ... return x + 1.0 + ... + >>> @numba.njit + ... def b(x): + ... return x * x + ... + >>> composition((a, b), 0.5), 0.5 ** 2 + 1 + (1.25, 1.25) + >>> composition((b, a, b, b, a), 0.5), b(a(b(b(a(0.5))))) + (36.75390625, 36.75390625) + +Here, ``cfunc`` compiled functions ``a`` and ``b`` are considered as +first-class function objects because these are passed in to the Numba +:term:`JIT` compiled function ``composition`` as arguments, that is, the +``composition`` is :term:`JIT` compiled independently from its argument function +objects (that are collected in the input argument ``funcs``). + +Currently, first-class function objects can be Numba ``cfunc`` compiled +functions, :term:`JIT` compiled functions, and objects that implement the +Wrapper Address Protocol (WAP, see below) with the following restrictions: + +======================== ============ ============== =========== +Context JIT compiled cfunc compiled WAP objects +======================== ============ ============== =========== +Can be used as arguments yes yes yes +Can be called yes yes yes +Can be used as items yes\* yes yes +Can be returned yes yes yes +Namespace scoping yes yes yes +Automatic overload yes no no +======================== ============ ============== =========== + +\* at least one of the items in a sequence of first-class function objects must +have a precise type. + + +Wrapper Address Protocol - WAP +++++++++++++++++++++++++++++++ + +Wrapper Address Protocol provides an API for making any Python object +a first-class function for Numba :term:`JIT` compiled functions. This assumes +that the Python object represents a compiled function that can be +called via its memory address (function pointer value) from Numba :term:`JIT` +compiled functions. The so-called WAP objects must define the +following two methods: + +.. method:: __wrapper_address__(self) -> int + + Return the memory address of a first-class function. This + method is used when a Numba :term:`JIT` compiled function tries to + call the given WAP instance. + +.. method:: signature(self) -> numba.typing.Signature + + Return the signature of the given first-class + function. This method is used when passing in the given + WAP instance to a Numba :term:`JIT` compiled function. + +In addition, the WAP object may implement the ``__call__`` +method. This is necessary when calling WAP objects from Numba +:term:`JIT` compiled functions in :term:`object mode`. + +As an example, let us call the standard math library function ``cos`` +within a Numba :term:`JIT` compiled function. The memory address of ``cos`` can +be established after loading the math library and using the ``ctypes`` +package:: + + >>> import numba, ctypes, ctypes.util, math + >>> libm = ctypes.cdll.LoadLibrary(ctypes.util.find_library('m')) + >>> class LibMCos(numba.types.WrapperAddressProtocol): + ... def __wrapper_address__(self): + ... return ctypes.cast(libm.cos, ctypes.c_voidp).value + ... def signature(self): + ... return numba.float64(numba.float64) + ... + >>> @numba.njit + ... def foo(f, x): + ... return f(x) + ... + >>> foo(LibMCos(), 0.0) + 1.0 + >>> foo(LibMCos(), 0.5), math.cos(0.5) + (0.8775825618903728, 0.8775825618903728) + +Miscellaneous Types +------------------- + +There are some non-numerical types that do not fit into the other categories. + +=================== ================================================= +Type name(s) Comments +=================== ================================================= +pyobject generic Python object +voidptr raw pointer, no operations can be performed on it +=================== ================================================= + +Advanced types +============== + +For more advanced declarations, you have to explicitly call helper +functions or classes provided by Numba. + +.. warning:: + The APIs documented here are not guaranteed to be stable. Unless + necessary, it is recommended to let Numba infer argument types by using + the :ref:`signature-less variant of @jit `. + +.. A word of note: I only documented those types that can be genuinely + useful to users, i.e. types that can be passed as parameters to a JIT + function. Other types such as tuple are only usable in type inference. + + +Inference +--------- + +.. function:: numba.typeof(value) + + Create a Numba type accurately describing the given Python *value*. + ``ValueError`` is raised if the value isn't supported in + :term:`nopython mode`. + + :: + + >>> numba.typeof(np.empty(3)) + array(float64, 1d, C) + >>> numba.typeof((1, 2.0)) + (int64, float64) + >>> numba.typeof([0]) + reflected list(int64) + + +NumPy scalars +------------- + +Instead of using :func:`~numba.typeof`, non-trivial scalars such as +structured types can also be constructed programmatically. + +.. function:: numba.from_dtype(dtype) + + Create a Numba type corresponding to the given NumPy *dtype*:: + + >>> struct_dtype = np.dtype([('row', np.float64), ('col', np.float64)]) + >>> ty = numba.from_dtype(struct_dtype) + >>> ty + Record([('row', '>> ty[:, :] + unaligned array(Record([('row', '`_. + + +Arrays +------ + +.. class:: numba.types.Array(dtype, ndim, layout) + + Create an array type. *dtype* should be a Numba type. *ndim* is the + number of dimensions of the array (a positive integer). *layout* + is a string giving the layout of the array: ``A`` means any layout, ``C`` + means C-contiguous and ``F`` means Fortran-contiguous. + + +Optional types +-------------- + +.. class:: numba.optional(typ) + + Create an optional type based on the underlying Numba type *typ*. + The optional type will allow any value of either *typ* or :const:`None`. + + :: + + >>> @jit((optional(intp),)) + ... def f(x): + ... return x is not None + ... + >>> f(0) + True + >>> f(None) + False + + +Type annotations +----------------- + +.. function:: numba.extending.as_numba_type(py_type) + + Create a Numba type corresponding to the given Python *type annotation*. + ``TypingError`` is raised if the type annotation can't be mapped to a Numba + type. This function is meant to be used at statically compile time to + evaluate Python type annotations. For runtime checking of Python objects + see ``typeof`` above. + + For any numba type, ``as_numba_type(nb_type) == nb_type``. + + >>> numba.extending.as_numba_type(int) + int64 + >>> import typing # the Python library, not the Numba one + >>> numba.extending.as_numba_type(typing.List[float]) + ListType[float64] + >>> numba.extending.as_numba_type(numba.int32) + int32 + + ``as_numba_type`` is automatically updated to include any ``@jitclass``. + + >>> @jitclass + ... class Counter: + ... x: int + ... + ... def __init__(self): + ... self.x = 0 + ... + ... def inc(self): + ... old_val = self.x + ... self.x += 1 + ... return old_val + ... + >>> numba.extending.as_numba_type(Counter) + instance.jitclass.Counter#11bad4278 + + Currently ``as_numba_type`` is only used to infer fields for ``@jitclass``. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/utils.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/utils.rst new file mode 100644 index 000000000..3fc866e5c --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/reference/utils.rst @@ -0,0 +1,34 @@ + +========= +Utilities +========= + +Dealing with pointers +===================== + +These functions can be called from pure Python as well as in +:term:`nopython mode`. + + +.. function:: numba.carray(ptr, shape, dtype=None) + + Return a Numpy array view over the data pointed to by *ptr* with the + given *shape*, in C order. If *dtype* is given, it is used as the array's + dtype, otherwise the array's dtype is inferred from *ptr*'s type. + As the returned array is a view, not a copy, writing to it will modify + the original data. + + *ptr* should be a ctypes pointer object (either a typed pointer + as created using :func:`~ctypes.POINTER`, or a :class:`~ctypes.c_void_p`). + + *shape* should be an integer or a tuple of integers. + + *dtype* should be a Numpy dtype or scalar class (i.e. both + ``np.dtype('int8')`` and ``np.int8`` are accepted). + + +.. function:: numba.farray(ptr, shape, dtype=None) + + Same as :func:`~numba.carray`, but the data is assumed to be laid out + in Fortran order, and the array view is constructed accordingly. + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/release-notes.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/release-notes.rst new file mode 100644 index 000000000..103366e15 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/release-notes.rst @@ -0,0 +1,5 @@ +====================== +Release Notes +====================== + +.. include:: ../../CHANGE_LOG diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/5minguide.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/5minguide.rst new file mode 100644 index 000000000..4ab382a6b --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/5minguide.rst @@ -0,0 +1,223 @@ +.. _numba-5_mins: + +A ~5 minute guide to Numba +========================== + +Numba is a just-in-time compiler for Python that works best on code that uses +NumPy arrays and functions, and loops. The most common way to use Numba is +through its collection of decorators that can be applied to your functions to +instruct Numba to compile them. When a call is made to a Numba-decorated +function it is compiled to machine code "just-in-time" for execution and all or +part of your code can subsequently run at native machine code speed! + +Out of the box Numba works with the following: + +* OS: Windows (32 and 64 bit), OSX, Linux (32 and 64 bit). Unofficial support on + \*BSD. +* Architecture: x86, x86_64, ppc64le, armv7l, armv8l (aarch64). Unofficial + support on M1/Arm64. +* GPUs: Nvidia CUDA. +* CPython +* NumPy 1.18 - latest + +How do I get it? +---------------- +Numba is available as a `conda `_ package for the +`Anaconda Python distribution `_:: + + $ conda install numba + +Numba also has wheels available:: + + $ pip install numba + +Numba can also be +:ref:`compiled from source `, although we do +not recommend it for first-time Numba users. + +Numba is often used as a core package so its dependencies are kept to an +absolute minimum, however, extra packages can be installed as follows to provide +additional functionality: + +* ``scipy`` - enables support for compiling ``numpy.linalg`` functions. +* ``colorama`` - enables support for color highlighting in backtraces/error + messages. +* ``pyyaml`` - enables configuration of Numba via a YAML config file. +* ``icc_rt`` - allows the use of the Intel SVML (high performance short vector + math library, x86_64 only). Installation instructions are in the + :ref:`performance tips `. + +Will Numba work for my code? +---------------------------- +This depends on what your code looks like, if your code is numerically +orientated (does a lot of math), uses NumPy a lot and/or has a lot of loops, +then Numba is often a good choice. In these examples we'll apply the most +fundamental of Numba's JIT decorators, ``@jit``, to try and speed up some +functions to demonstrate what works well and what does not. + +Numba works well on code that looks like this:: + + from numba import jit + import numpy as np + + x = np.arange(100).reshape(10, 10) + + @jit(nopython=True) # Set "nopython" mode for best performance, equivalent to @njit + def go_fast(a): # Function is compiled to machine code when called the first time + trace = 0.0 + for i in range(a.shape[0]): # Numba likes loops + trace += np.tanh(a[i, i]) # Numba likes NumPy functions + return a + trace # Numba likes NumPy broadcasting + + print(go_fast(x)) + + +It won't work very well, if at all, on code that looks like this:: + + from numba import jit + import pandas as pd + + x = {'a': [1, 2, 3], 'b': [20, 30, 40]} + + @jit + def use_pandas(a): # Function will not benefit from Numba jit + df = pd.DataFrame.from_dict(a) # Numba doesn't know about pd.DataFrame + df += 1 # Numba doesn't understand what this is + return df.cov() # or this! + + print(use_pandas(x)) + +Note that Pandas is not understood by Numba and as a result Numba would simply +run this code via the interpreter but with the added cost of the Numba internal +overheads! + +What is ``nopython`` mode? +-------------------------- +The Numba ``@jit`` decorator fundamentally operates in two compilation modes, +``nopython`` mode and ``object`` mode. In the ``go_fast`` example above, +``nopython=True`` is set in the ``@jit`` decorator; this is instructing Numba to +operate in ``nopython`` mode. The behaviour of the ``nopython`` compilation mode +is to essentially compile the decorated function so that it will run entirely +without the involvement of the Python interpreter. This is the recommended and +best-practice way to use the Numba ``jit`` decorator as it leads to the best +performance. + +Should the compilation in ``nopython`` mode fail, Numba can compile using +``object mode``. This is a fall back mode for the ``@jit`` decorator if +``nopython=True`` is not set (as seen in the ``use_pandas`` example above). In +this mode Numba will identify loops that it can compile and compile those into +functions that run in machine code, and it will run the rest of the code in the +interpreter. For best performance avoid using this mode! + +How to measure the performance of Numba? +---------------------------------------- +First, recall that Numba has to compile your function for the argument types +given before it executes the machine code version of your function. This takes +time. However, once the compilation has taken place Numba caches the machine +code version of your function for the particular types of arguments presented. +If it is called again with the same types, it can reuse the cached version +instead of having to compile again. + +A really common mistake when measuring performance is to not account for the +above behaviour and to time code once with a simple timer that includes the +time taken to compile your function in the execution time. + +For example:: + + from numba import jit + import numpy as np + import time + + x = np.arange(100).reshape(10, 10) + + @jit(nopython=True) + def go_fast(a): # Function is compiled and runs in machine code + trace = 0.0 + for i in range(a.shape[0]): + trace += np.tanh(a[i, i]) + return a + trace + + # DO NOT REPORT THIS... COMPILATION TIME IS INCLUDED IN THE EXECUTION TIME! + start = time.perf_counter() + go_fast(x) + end = time.perf_counter() + print("Elapsed (with compilation) = {}s".format((end - start))) + + # NOW THE FUNCTION IS COMPILED, RE-TIME IT EXECUTING FROM CACHE + start = time.perf_counter() + go_fast(x) + end = time.perf_counter() + print("Elapsed (after compilation) = {}s".format((end - start))) + +This, for example prints:: + + Elapsed (with compilation) = 0.33030009269714355s + Elapsed (after compilation) = 6.67572021484375e-06s + +A good way to measure the impact Numba JIT has on your code is to time execution +using the `timeit `_ module +functions; these measure multiple iterations of execution and, as a result, +can be made to accommodate for the compilation time in the first execution. + +As a side note, if compilation time is an issue, Numba JIT supports +:ref:`on-disk caching ` of compiled functions and also has +an :ref:`Ahead-Of-Time ` compilation mode. + +How fast is it? +--------------- +Assuming Numba can operate in ``nopython`` mode, or at least compile some loops, +it will target compilation to your specific CPU. Speed up varies depending on +application but can be one to two orders of magnitude. Numba has a +:ref:`performance guide ` that covers common options for +gaining extra performance. + +How does Numba work? +-------------------- +Numba reads the Python bytecode for a decorated function and combines this with +information about the types of the input arguments to the function. It analyzes +and optimizes your code, and finally uses the LLVM compiler library to generate +a machine code version of your function, tailored to your CPU capabilities. This +compiled version is then used every time your function is called. + +Other things of interest: +------------------------- +Numba has quite a few decorators, we've seen ``@jit``, but there's +also: + +* ``@njit`` - this is an alias for ``@jit(nopython=True)`` as it is so commonly + used! +* ``@vectorize`` - produces NumPy ``ufunc`` s (with all the ``ufunc`` methods + supported). :ref:`Docs are here `. +* ``@guvectorize`` - produces NumPy generalized ``ufunc`` s. + :ref:`Docs are here `. +* ``@stencil`` - declare a function as a kernel for a stencil like operation. + :ref:`Docs are here `. +* ``@jitclass`` - for jit aware classes. :ref:`Docs are here `. +* ``@cfunc`` - declare a function for use as a native call back (to be called + from C/C++ etc). :ref:`Docs are here `. +* ``@overload`` - register your own implementation of a function for use in + nopython mode, e.g. ``@overload(scipy.special.j0)``. + :ref:`Docs are here `. + +Extra options available in some decorators: + +* ``parallel = True`` - :ref:`enable ` the + :ref:`automatic parallelization ` of the function. +* ``fastmath = True`` - enable :ref:`fast-math ` + behaviour for the function. + +ctypes/cffi/cython interoperability: + +* ``cffi`` - The calling of :ref:`CFFI ` functions is supported + in ``nopython`` mode. +* ``ctypes`` - The calling of :ref:`ctypes ` wrapped + functions is supported in ``nopython`` mode. +* Cython exported functions :ref:`are callable `. + +GPU targets: +~~~~~~~~~~~~ + +Numba can target `Nvidia CUDA `_ GPUs. +You can write a kernel in pure Python and have Numba handle the computation and +data movement (or do this explicitly). Click for Numba documentation on +:ref:`CUDA `. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/cfunc.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/cfunc.rst new file mode 100644 index 000000000..845dc9634 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/cfunc.rst @@ -0,0 +1,218 @@ +.. _cfunc: + +==================================== +Creating C callbacks with ``@cfunc`` +==================================== + +Interfacing with some native libraries (for example written in C or C++) +can necessitate writing native callbacks to provide business logic to the +library. The :func:`numba.cfunc` decorator creates a compiled function +callable from foreign C code, using the signature of your choice. + + +Basic usage +=========== + +The ``@cfunc`` decorator has a similar usage to ``@jit``, but with an +important difference: passing a single signature is mandatory. +It determines the visible signature of the C callback:: + + from numba import cfunc + + @cfunc("float64(float64, float64)") + def add(x, y): + return x + y + + +The C function object exposes the address of the compiled C callback as +the :attr:`~CFunc.address` attribute, so that you can pass it to any +foreign C or C++ library. It also exposes a :mod:`ctypes` callback +object pointing to that callback; that object is also callable from +Python, making it easy to check the compiled code:: + + @cfunc("float64(float64, float64)") + def add(x, y): + return x + y + + print(add.ctypes(4.0, 5.0)) # prints "9.0" + + +Example +======= + +In this example, we are going to be using the ``scipy.integrate.quad`` +function. That function accepts either a regular Python callback or +a C callback wrapped in a :mod:`ctypes` callback object. + +Let's define a pure Python integrand and compile it as a +C callback:: + + >>> import numpy as np + >>> from numba import cfunc + >>> def integrand(t): + return np.exp(-t) / t**2 + ...: + >>> nb_integrand = cfunc("float64(float64)")(integrand) + +We can pass the ``nb_integrand`` object's :mod:`ctypes` callback to +``scipy.integrate.quad`` and check that the results are the same as with +the pure Python function:: + + >>> import scipy.integrate as si + >>> def do_integrate(func): + """ + Integrate the given function from 1.0 to +inf. + """ + return si.quad(func, 1, np.inf) + ...: + >>> do_integrate(integrand) + (0.14849550677592208, 3.8736750296130505e-10) + >>> do_integrate(nb_integrand.ctypes) + (0.14849550677592208, 3.8736750296130505e-10) + + +Using the compiled callback, the integration function does not invoke the +Python interpreter each time it evaluates the integrand. In our case, the +integration is made 18 times faster:: + + >>> %timeit do_integrate(integrand) + 1000 loops, best of 3: 242 µs per loop + >>> %timeit do_integrate(nb_integrand.ctypes) + 100000 loops, best of 3: 13.5 µs per loop + + +Dealing with pointers and array memory +====================================== + +A less trivial use case of C callbacks involves doing operation on some +array of data passed by the caller. As C doesn't have a high-level +abstraction similar to Numpy arrays, the C callback's signature will pass +low-level pointer and size arguments. Nevertheless, the Python code for +the callback will expect to exploit the power and expressiveness of Numpy +arrays. + +In the following example, the C callback is expected to operate on 2-d arrays, +with the signature ``void(double *input, double *output, int m, int n)``. +You can implement such a callback thusly:: + + from numba import cfunc, types, carray + + c_sig = types.void(types.CPointer(types.double), + types.CPointer(types.double), + types.intc, types.intc) + + @cfunc(c_sig) + def my_callback(in_, out, m, n): + in_array = carray(in_, (m, n)) + out_array = carray(out, (m, n)) + for i in range(m): + for j in range(n): + out_array[i, j] = 2 * in_array[i, j] + + +The :func:`numba.carray` function takes as input a data pointer and a shape +and returns an array view of the given shape over that data. The data is +assumed to be laid out in C order. If the data is laid out in Fortran order, +:func:`numba.farray` should be used instead. + + +Handling C structures +===================== + + +With CFFI +--------- + +For applications that have a lot of state, it is useful to pass data in C +structures. To simplify the interoperability with C code, numba can convert +a ``cffi`` type into a numba ``Record`` type using +``numba.core.typing.cffi_utils.map_type``:: + + from numba.core.typing import cffi_utils + + nbtype = cffi_utils.map_type(cffi_type, use_record_dtype=True) + +.. note:: **use_record_dtype=True** is needed otherwise pointers to C + structures are returned as void pointers. + +.. note:: From v0.49 the ``numba.cffi_support`` module has been phased out + in favour of ``numba.core.typing.cffi_utils`` + + +For example:: + + from cffi import FFI + + src = """ + + /* Define the C struct */ + typedef struct my_struct { + int i1; + float f2; + double d3; + float af4[7]; // arrays are supported + } my_struct; + + /* Define a callback function */ + typedef double (*my_func)(my_struct*, size_t); + """ + + ffi = FFI() + ffi.cdef(src) + + # Get the function signature from *my_func* + sig = cffi_utils.map_type(ffi.typeof('my_func'), use_record_dtype=True) + + # Make the cfunc + from numba import cfunc, carray + + @cfunc(sig) + def foo(ptr, n): + base = carray(ptr, n) # view pointer as an array of my_struct + tmp = 0 + for i in range(n): + tmp += base[i].i1 * base[i].f2 / base[i].d3 + tmp += base[i].af4.sum() # nested arrays are like normal NumPy arrays + return tmp + + +With ``numba.types.Record.make_c_struct`` +----------------------------------------- + +The ``numba.types.Record`` type can be created manually to follow a +C-structure's layout. To do that, use ``Record.make_c_struct``, for example:: + + my_struct = types.Record.make_c_struct([ + # Provides a sequence of 2-tuples i.e. (name:str, type:Type) + ('i1', types.int32), + ('f2', types.float32), + ('d3', types.float64), + ('af4', types.NestedArray(dtype=types.float32, shape=(7,))), + ]) + +Due to ABI limitations, structures should be passed as pointers +using ``types.CPointer(my_struct)`` as the argument type. Inside the ``cfunc`` +body, the ``my_struct*`` can be accessed with ``carray``. + +Full example +------------ + +See full example in ``examples/notebooks/Accessing C Struct Data.ipynb``. + + +Signature specification +======================= + +The explicit ``@cfunc`` signature can use any :ref:`Numba types `, +but only a subset of them make sense for a C callback. You should +generally limit yourself to scalar types (such as ``int8`` or ``float64``) +,pointers to them (for example ``types.CPointer(types.int8)``), or pointers +to ``Record`` type. + + +Compilation options +=================== + +A number of keyword-only arguments can be passed to the ``@cfunc`` +decorator: ``nopython`` and ``cache``. Their meaning is similar to those +in the ``@jit`` decorator. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/cli.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/cli.rst new file mode 100644 index 000000000..3764c687f --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/cli.rst @@ -0,0 +1,165 @@ +.. _cli: + +Command line interface +====================== + +Numba is a Python package, usually you ``import numba`` from Python and use the +Python application programming interface (API). However, Numba also ships with a +command line interface (CLI), i.e. a tool ``numba`` that is installed when you +install Numba. + +Currently, the only purpose of the CLI is to allow you to quickly show some +information about your system and installation, or to quickly get some debugging +information for a Python script using Numba. + +.. _cli_usage: + +Usage +----- + +To use the Numba CLI from the terminal, use ``numba`` followed by the options +and arguments like ``--help`` or ``-s``, as explained below. + +Sometimes it can happen that you get a "command not found" error when you type +``numba``, because your ``PATH`` isn't configured properly. In that case you can +use the equivalent command ``python -m numba``. If that still gives "command +not found", try to ``import numba`` as suggested here: +:ref:`numba-source-install-check`. + +The two versions ``numba`` and ``python -m numba`` are the same. The first is +shorter to type, but if you get a "command not found" error because your +``PATH`` doesn't contain the location where ``numba`` is installed, having the +``python -m numba`` variant is useful. + +To use the Numba CLI from IPython or Jupyter, use ``!numba``, i.e. prefix the +command with an exclamation mark. This is a general IPython/Jupyter feature to +execute shell commands, it is not available in the regular ``python`` terminal. + +.. _cli_help: + +Help +---- + +To see all available options, use ``numba --help``:: + + $ numba --help + usage: numba [-h] [--annotate] [--dump-llvm] [--dump-optimized] + [--dump-assembly] [--annotate-html ANNOTATE_HTML] [-s] + [--sys-json SYS_JSON] + [filename] + + positional arguments: + filename Python source filename + + optional arguments: + -h, --help show this help message and exit + --annotate Annotate source + --dump-llvm Print generated llvm assembly + --dump-optimized Dump the optimized llvm assembly + --dump-assembly Dump the LLVM generated assembly + --annotate-html ANNOTATE_HTML + Output source annotation as html + -s, --sysinfo Output system information for bug reporting + --sys-json SYS_JSON Saves the system info dict as a json file + + +.. _cli_sysinfo: + +System information +------------------ + +The ``numba -s`` (or the equivalent ``numba --sysinfo``) command prints a lot of +information about your system and your Numba installation and relevant +dependencies. + +Remember: you can use ``!numba -s`` with an exclamation mark to see this +information from IPython or Jupyter. + +Example output:: + + $ numba -s + + System info: + -------------------------------------------------------------------------------- + __Time Stamp__ + 2019-05-07 14:15:39.733994 + + __Hardware Information__ + Machine : x86_64 + CPU Name : haswell + CPU count : 8 + CPU Features : + aes avx avx2 bmi bmi2 cmov cx16 f16c fma fsgsbase invpcid lzcnt mmx movbe pclmul + popcnt rdrnd sahf sse sse2 sse3 sse4.1 sse4.2 ssse3 xsave xsaveopt + + __OS Information__ + Platform : Darwin-18.5.0-x86_64-i386-64bit + Release : 18.5.0 + System Name : Darwin + Version : Darwin Kernel Version 18.5.0: Mon Mar 11 20:40:32 PDT 2019; root:xnu-4903.251.3~3/RELEASE_X86_64 + OS specific info : 10.14.4 x86_64 + + __Python Information__ + Python Compiler : Clang 4.0.1 (tags/RELEASE_401/final) + Python Implementation : CPython + Python Version : 3.7.3 + Python Locale : en_US UTF-8 + + __LLVM information__ + LLVM version : 7.0.0 + + __CUDA Information__ + CUDA driver library cannot be found or no CUDA enabled devices are present. + Error class: + + __SVML Information__ + SVML state, config.USING_SVML : False + SVML library found and loaded : False + llvmlite using SVML patched LLVM : True + SVML operational : False + + __Threading Layer Information__ + TBB Threading layer available : False + +--> Disabled due to : Unknown import problem. + OpenMP Threading layer available : False + +--> Disabled due to : Unknown import problem. + Workqueue Threading layer available : True + + __Numba Environment Variable Information__ + None set. + + __Conda Information__ + conda_build_version : 3.17.8 + conda_env_version : 4.6.14 + platform : osx-64 + python_version : 3.7.3.final.0 + root_writable : True + + __Current Conda Env__ + (output truncated due to length) + +.. _cli_debug: + +Debugging +--------- + +As shown in the help output above, the ``numba`` command includes options that +can help you to debug Numba compiled code. + +To try it out, create an example script called ``myscript.py``:: + + import numba + + @numba.jit + def f(x): + return 2 * x + + f(42) + +and then execute one of the following commands:: + + $ numba myscript.py --annotate + $ numba myscript.py --annotate-html myscript.html + $ numba myscript.py --dump-llvm + $ numba myscript.py --dump-optimized + $ numba myscript.py --dump-assembly diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/examples.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/examples.rst new file mode 100644 index 000000000..2651c12ef --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/examples.rst @@ -0,0 +1,52 @@ +======== +Examples +======== + + +Mandelbrot +---------- + +.. literalinclude:: ../../../numba/tests/doc_examples/test_examples.py + :language: python + :caption: from ``test_mandelbrot`` of ``numba/tests/doc_examples/test_examples.py`` + :start-after: magictoken.ex_mandelbrot.begin + :end-before: magictoken.ex_mandelbrot.end + :dedent: 12 + :linenos: + +.. _example-movemean: + +Moving average +-------------- + +.. literalinclude:: ../../../numba/tests/doc_examples/test_examples.py + :language: python + :caption: from ``test_moving_average`` of ``numba/tests/doc_examples/test_examples.py`` + :start-after: magictoken.ex_moving_average.begin + :end-before: magictoken.ex_moving_average.end + :dedent: 12 + :linenos: + +Multi-threading +--------------- + +The code below showcases the potential performance improvement when +using the :ref:`nogil ` feature. For example, on a 4-core machine, +the following results were printed:: + + numpy (1 thread) 145 ms + numba (1 thread) 128 ms + numba (4 threads) 35 ms + +.. note:: + If preferred it's possible to use the standard `concurrent.futures + `_ module + rather than spawn threads and dispatch tasks by hand. + +.. literalinclude:: ../../../numba/tests/doc_examples/test_examples.py + :language: python + :caption: from ``test_no_gil`` of ``numba/tests/doc_examples/test_examples.py`` + :start-after: magictoken.ex_no_gil.begin + :end-before: magictoken.ex_no_gil.end + :dedent: 12 + :linenos: diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/faq.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/faq.rst new file mode 100644 index 000000000..b8fc588b0 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/faq.rst @@ -0,0 +1,413 @@ + +========================== +Frequently Asked Questions +========================== + +Installation +============ + +Numba could not be imported +--------------------------- + +If you are seeing an exception on importing Numba with an error message +that starts with:: + + ImportError: Numba could not be imported. + +here are some common issues and things to try to fix it. + +#. Your installation has more than one version of Numba a given environment. + + Common ways this occurs include: + + * Installing Numba with conda and then installing again with pip. + * Installing Numba with pip and then updating to a new version with pip (pip + re-installations don't seem to always clean up very well). + + To fix this the best approach is to create an entirely new environment and + install a single version of Numba in that environment using a package manager + of your choice. + +#. Your installation has Numba for Python version X but you are running with + Python version Y. + + This occurs due to a variety of Python environment mix-up/mismatch problems. + The most common mismatch comes from installing Numba into the + site-packages/environment of one version of Python by using a base or + system installation of Python that is a different version, this typically + happens through the use of the "wrong" ``pip`` binary. This will obviously + cause problems as the C-Extensions on which Numba relies are bound to + specific Python versions. A way to check if this likely the problem is to + see if the path to the ``python`` binary at:: + + python -c 'import sys; print(sys.executable)' + + matches the path to your installation tool and/or matches the reported + installation location and if the Python versions match up across all of + these. Note that Python version ``X.Y.A`` is compatible with ``X.Y.B``. + + To fix this the best approach is to create an entirely new environment and + ensure that the installation tool used to install Numba is the one from that + environment/the Python versions at install and run time match. + +#. Your core system libraries are too old. + + This is a somewhat rare occurrence, but there are occasions when a very old + (typically out of support) version of Linux is in use it doesn't have a + ``glibc`` library with sufficiently new versioned symbols for Numba's shared + libraries to resolve against. The fix for this is to update your OS system + libraries/update your OS. + +#. You are using an IDE e.g. Spyder. + + There are some unknown issues in relation to installing Numba via IDEs, but + it would appear that these are likely variations of 1. or 2. with the same + suggested fixes. Also, try installation from outside of the IDE with the + command line. + + +If you have an installation problem which is not one of the above problems, +please do ask on `numba.discourse.group `_ and +if possible include the path where Numba is installed and also the output of:: + + python -c 'import sys; print(sys.executable)' + + +Programming +=========== + +Can I pass a function as an argument to a jitted function? +---------------------------------------------------------- + +As of Numba 0.39, you can, so long as the function argument has also been +JIT-compiled:: + + @jit(nopython=True) + def f(g, x): + return g(x) + g(-x) + + result = f(jitted_g_function, 1) + +However, dispatching with arguments that are functions has extra overhead. +If this matters for your application, you can also use a factory function to +capture the function argument in a closure:: + + def make_f(g): + # Note: a new f() is created each time make_f() is called! + @jit(nopython=True) + def f(x): + return g(x) + g(-x) + return f + + f = make_f(jitted_g_function) + result = f(1) + +Improving the dispatch performance of functions in Numba is an ongoing task. + +Numba doesn't seem to care when I modify a global variable +---------------------------------------------------------- + +Numba considers global variables as compile-time constants. If you want +your jitted function to update itself when you have modified a global +variable's value, one solution is to recompile it using the +:meth:`~Dispatcher.recompile` method. This is a relatively slow operation, +though, so you may instead decide to rearchitect your code and turn the +global variable into a function argument. + +Can I debug a jitted function? +------------------------------ + +Calling into :mod:`pdb` or other such high-level facilities is currently not +supported from Numba-compiled code. However, you can temporarily disable +compilation by setting the :envvar:`NUMBA_DISABLE_JIT` environment +variable. + +How can I create a Fortran-ordered array? +----------------------------------------- + +Numba currently doesn't support the ``order`` argument to most Numpy +functions such as :func:`numpy.empty` (because of limitations in the +:term:`type inference` algorithm). You can work around this issue by +creating a C-ordered array and then transposing it. For example:: + + a = np.empty((3, 5), order='F') + b = np.zeros(some_shape, order='F') + +can be rewritten as:: + + a = np.empty((5, 3)).T + b = np.zeros(some_shape[::-1]).T + +How can I increase integer width? +--------------------------------- + +By default, Numba will generally use machine integer width for integer +variables. On a 32-bit machine, you may sometimes need the magnitude of +64-bit integers instead. You can simply initialize relevant variables as +``np.int64`` (for example ``np.int64(0)`` instead of ``0``). It will +propagate to all computations involving those variables. + +.. _parallel_faqs: + +How can I tell if ``parallel=True`` worked? +------------------------------------------- + +If the ``parallel=True`` transformations failed for a function +decorated as such, a warning will be displayed. See also +:ref:`numba-parallel-diagnostics` for information about parallel diagnostics. + +Performance +=========== + +Does Numba inline functions? +---------------------------- + +Numba gives enough information to LLVM so that functions short enough +can be inlined. This only works in :term:`nopython mode`. + +Does Numba vectorize array computations (SIMD)? +----------------------------------------------- + +Numba doesn't implement such optimizations by itself, but it lets LLVM +apply them. + +Why has my loop not vectorized? +------------------------------- + +Numba enables the loop-vectorize optimization in LLVM by default. +While it is a powerful optimization, not all loops are applicable. +Sometimes, loop-vectorization may fail due to subtle details like memory access +pattern. To see additional diagnostic information from LLVM, +add the following lines: + +.. code-block:: python + + import llvmlite.binding as llvm + llvm.set_option('', '--debug-only=loop-vectorize') + +This tells LLVM to print debug information from the **loop-vectorize** +pass to stderr. Each function entry looks like: + + +.. note:: + Using ``--debug-only`` requires LLVM to be build with assertions enabled to + work. Use the build of llvmlite in the `Numba channel `_ + which is linked against LLVM with assertions enabled. + +.. code-block:: text + + LV: Checking a loop in "" from + LV: Loop hints: force=? width=0 unroll=0 + ... + LV: Vectorization is possible but not beneficial. + LV: Interleaving is not beneficial. + +Each function entry is separated by an empty line. The reason for rejecting +the vectorization is usually at the end of the entry. In the example above, +LLVM rejected the vectorization because doing so will not speedup the loop. +In this case, it can be due to memory access pattern. For instance, the +array being looped over may not be in contiguous layout. + +When memory access pattern is non-trivial such that it cannot determine the +access memory region, LLVM may reject with the following message: + +.. code-block:: text + + LV: Can't vectorize due to memory conflicts + +Another common reason is: + +.. code-block:: text + + LV: Not vectorizing: loop did not meet vectorization requirements. + +In this case, vectorization is rejected because the vectorized code may behave +differently. This is a case to try turning on ``fastmath=True`` to allow +fastmath instructions. + +Why are the ``typed`` containers slower when used from the interpreter? +----------------------------------------------------------------------- + +The Numba ``typed`` containers found in ``numba.typed`` e.g. +``numba.typed.List`` store their data in an efficient form for access from JIT +compiled code. When these containers are used from the CPython interpreter, the +data involved has to be converted from/to the container format. This process is +relatively costly and as a result impacts performance. In JIT compiled code no +such penalty exists and so operations on the containers are much quicker and +often faster than the pure Python equivalent. + +Does Numba automatically parallelize code? +------------------------------------------ + +It can, in some cases: + +* Ufuncs and gufuncs with the ``target="parallel"`` option will run on multiple threads. +* The ``parallel=True`` option to ``@jit`` will attempt to optimize array + operations and run them in parallel. It also adds support for ``prange()`` to + explicitly parallelize a loop. + +You can also manually run computations on multiple threads yourself and use +the ``nogil=True`` option (see :ref:`releasing the GIL `). Numba +can also target parallel execution on GPU architectures using its CUDA and HSA +backends. + + +Can Numba speed up short-running functions? +------------------------------------------- + +Not significantly. New users sometimes expect to JIT-compile such +functions:: + + def f(x, y): + return x + y + +and get a significant speedup over the Python interpreter. But there isn't +much Numba can improve here: most of the time is probably spent in CPython's +function call mechanism, rather than the function itself. As a rule of +thumb, if a function takes less than 10 µs to execute: leave it. + +The exception is that you *should* JIT-compile that function if it is called +from another jitted function. + +There is a delay when JIT-compiling a complicated function, how can I improve it? +--------------------------------------------------------------------------------- + +Try to pass ``cache=True`` to the ``@jit`` decorator. It will keep the +compiled version on disk for later use. + +A more radical alternative is :ref:`ahead-of-time compilation `. + + +GPU Programming +=============== + +How do I work around the ``CUDA initialized before forking`` error? +------------------------------------------------------------------- + +On Linux, the ``multiprocessing`` module in the Python standard library +defaults to using the ``fork`` method for creating new processes. Because of +the way process forking duplicates state between the parent and child +processes, CUDA will not work correctly in the child process if the CUDA +runtime was initialized *prior* to the fork. Numba detects this and raises a +``CudaDriverError`` with the message ``CUDA initialized before forking``. + +One approach to avoid this error is to make all calls to ``numba.cuda`` +functions inside the child processes or after the process pool is created. +However, this is not always possible, as you might want to query the number of +available GPUs before starting the process pool. In Python 3, you can change +the process start method, as described in the `multiprocessing documentation +`_. +Switching from ``fork`` to ``spawn`` or ``forkserver`` will avoid the CUDA +initialization issue, although the child processes will not inherit any global +variables from their parent. + + +Integration with other utilities +================================ + +Can I "freeze" an application which uses Numba? +----------------------------------------------- + +If you're using PyInstaller or a similar utility to freeze an application, +you may encounter issues with llvmlite. llvmlite needs a non-Python DLL +for its working, but it won't be automatically detected by freezing utilities. +You have to inform the freezing utility of the DLL's location: it will +usually be named ``llvmlite/binding/libllvmlite.so`` or +``llvmlite/binding/llvmlite.dll``, depending on your system. + +I get errors when running a script twice under Spyder +----------------------------------------------------- + +When you run a script in a console under Spyder, Spyder first tries to +reload existing modules. This doesn't work well with Numba, and can +produce errors like ``TypeError: No matching definition for argument type(s)``. + +There is a fix in the Spyder preferences. Open the "Preferences" window, +select "Console", then "Advanced Settings", click the "Set UMR excluded +modules" button, and add ``numba`` inside the text box that pops up. + +To see the setting take effect, be sure to restart the IPython console or +kernel. + +.. _llvm-locale-bug: + +Why does Numba complain about the current locale? +------------------------------------------------- + +If you get an error message such as the following:: + + RuntimeError: Failed at nopython (nopython mode backend) + LLVM will produce incorrect floating-point code in the current locale + +it means you have hit a LLVM bug which causes incorrect handling of +floating-point constants. This is known to happen with certain third-party +libraries such as the Qt backend to matplotlib. + +To work around the bug, you need to force back the locale to its default +value, for example:: + + import locale + locale.setlocale(locale.LC_NUMERIC, 'C') + +How do I get Numba development builds? +-------------------------------------- + +Pre-release versions of Numba can be installed with conda:: + + $ conda install -c numba/label/dev numba + + +Miscellaneous +============= + +Where does the project name "Numba" come from? +---------------------------------------------- + +"Numba" is a combination of "NumPy" and "Mamba". Mambas are some of the fastest +snakes in the world, and Numba makes your Python code fast. + +How do I reference/cite/acknowledge Numba in other work? +-------------------------------------------------------- +For academic use, the best option is to cite our ACM Proceedings: `Numba: a +LLVM-based Python JIT compiler. +`_ You can also find +`the sources on github `_, including +`a pre-print pdf +`_, in case +you don't have access to the ACM site but would like to read the paper. + +Other related papers +~~~~~~~~~~~~~~~~~~~~ +A paper describing ParallelAccelerator technology, that is activated when the +``parallel=True`` jit option is used, can be found `here +`_. + +How do I write a minimal working reproducer for a problem with Numba? +--------------------------------------------------------------------- + +A minimal working reproducer for Numba should include: + +1. The source code of the function(s) that reproduce the problem. +2. Some example data and a demonstration of calling the reproducing code with + that data. As Numba compiles based on type information, unless your problem + is numerical, it's fine to just provide dummy data of the right type, e.g. + use ``numpy.ones`` of the correct ``dtype``/size/shape for arrays. +3. Ideally put 1. and 2. into a script with all the correct imports. Make sure + your script actually executes and reproduces the problem before submitting + it! The target is to make it so that the script can just be copied directly + from the `issue tracker `_ and run by + someone else such that they can see the same problem as you are having. + +Having made a reproducer, now remove every part of the code that does not +contribute directly to reproducing the problem to create a "minimal" reproducer. +This means removing imports that aren't used, removing variables that aren't +used or have no effect, removing lines of code which have no effect, reducing +the complexity of expressions, and shrinking input data to the minimal amount +required to trigger the problem. + +Doing the above really helps out the Numba issue triage process and will enable +a faster response to your problem! + +`Suggested further reading +`_ on +writing minimal working reproducers. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/generated-jit.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/generated-jit.rst new file mode 100644 index 000000000..975df08c5 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/generated-jit.rst @@ -0,0 +1,71 @@ +.. _generated-jit: + +================================================ +Flexible specializations with ``@generated_jit`` +================================================ + + +While the :func:`~numba.jit` decorator is useful for many situations, +sometimes you want to write a function that has different implementations +depending on its input types. The :func:`~numba.generated_jit` decorator +allows the user to control the selection of a specialization at compile-time, +while fully retaining runtime execution speed of a JIT function. + + +Example +======= + +Suppose you want to write a function which returns whether a given value +is a "missing" value according to certain conventions. For the sake of +the example, let's adopt the following definition: + +- for floating-point arguments, a missing value is a ``NaN`` +- for Numpy datetime64 and timedelta64 arguments, a missing value is a ``NaT`` +- other types don't have the concept of a missing value. + +That compile-time logic is easily implemented using the +:func:`~numba.generated_jit` decorator:: + + import numpy as np + + from numba import generated_jit, types + + @generated_jit(nopython=True) + def is_missing(x): + """ + Return True if the value is missing, False otherwise. + """ + if isinstance(x, types.Float): + return lambda x: np.isnan(x) + elif isinstance(x, (types.NPDatetime, types.NPTimedelta)): + # The corresponding Not-a-Time value + missing = x('NaT') + return lambda x: x == missing + else: + return lambda x: False + + +There are several things to note here: + +* The decorated function is called with the :ref:`Numba types ` + of the arguments, not their values. + +* The decorated function doesn't actually compute a result, it returns + a callable implementing the actual definition of the function for the + given types. + +* It is possible to pre-compute some data at compile-time (the ``missing`` + variable above) to have them reused inside the compiled implementation. + +* The function definitions use the same names for arguments as in the + decorated function, this is required to ensure passing arguments by + name works as expected. + + +Compilation options +=================== + +The :func:`~numba.generated_jit` decorator supports the same keyword-only +arguments as the :func:`~numba.jit` decorator, for example the ``nopython`` +and ``cache`` options. + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/index.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/index.rst new file mode 100644 index 000000000..74d2575fb --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/index.rst @@ -0,0 +1,26 @@ + +User Manual +=========== + +.. toctree:: + + 5minguide.rst + overview.rst + installing.rst + jit.rst + generated-jit.rst + vectorize.rst + jitclass.rst + cfunc.rst + pycc.rst + parallel.rst + stencil.rst + withobjmode.rst + jit-module.rst + performance-tips.rst + threading-layer.rst + cli.rst + troubleshoot.rst + faq.rst + examples.rst + talks.rst diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/installing.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/installing.rst new file mode 100644 index 000000000..d06a91ee1 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/installing.rst @@ -0,0 +1,373 @@ + +Installation +============ + +Compatibility +------------- + +Numba is compatible with Python 3.7--3.10, and Numpy versions 1.18 up to 1.23. + +Our supported platforms are: + +* Linux x86 (32-bit and 64-bit) +* Linux ppcle64 (POWER8, POWER9) +* Windows 7 and later (32-bit and 64-bit) +* OS X 10.9 and later (64-bit and unofficial support on M1/Arm64) +* \*BSD (unofficial support only) +* NVIDIA GPUs of compute capability 5.3 and later + + * Compute capabilities 3.5 - 5.2 are supported, but deprecated. +* ARMv7 (32-bit little-endian, such as Raspberry Pi 2 and 3) +* ARMv8 (64-bit little-endian, such as the NVIDIA Jetson) + +:ref:`numba-parallel` is only available on 64-bit platforms. + +Installing using conda on x86/x86_64/POWER Platforms +---------------------------------------------------- + +The easiest way to install Numba and get updates is by using ``conda``, +a cross-platform package manager and software distribution maintained +by Anaconda, Inc. You can either use `Anaconda +`_ to get the full stack in one download, +or `Miniconda `_ which will install +the minimum packages required for a conda environment. + +Once you have conda installed, just type:: + + $ conda install numba + +or:: + + $ conda update numba + +Note that Numba, like Anaconda, only supports PPC in 64-bit little-endian mode. + +To enable CUDA GPU support for Numba, install the latest `graphics drivers from +NVIDIA `_ for your platform. +(Note that the open source Nouveau drivers shipped by default with many Linux +distributions do not support CUDA.) Then install the ``cudatoolkit`` package:: + + $ conda install cudatoolkit + +You do not need to install the CUDA SDK from NVIDIA. + + +Installing using pip on x86/x86_64 Platforms +-------------------------------------------- + +Binary wheels for Windows, Mac, and Linux are also available from `PyPI +`_. You can install Numba using ``pip``:: + + $ pip install numba + +This will download all of the needed dependencies as well. You do not need to +have LLVM installed to use Numba (in fact, Numba will ignore all LLVM +versions installed on the system) as the required components are bundled into +the llvmlite wheel. + +To use CUDA with Numba installed by `pip`, you need to install the `CUDA SDK +`_ from NVIDIA. Please refer to +:ref:`cudatoolkit-lookup` for details. Numba can also detect CUDA libraries +installed system-wide on Linux. + + +.. _numba-install-armv7: + +Installing on Linux ARMv7 Platforms +----------------------------------- + +`Berryconda `_ is a +conda-based Python distribution for the Raspberry Pi. We are now uploading +packages to the ``numba`` channel on Anaconda Cloud for 32-bit little-endian, +ARMv7-based boards, which currently includes the Raspberry Pi 2 and 3, +but not the Pi 1 or Zero. These can be installed using conda from the +``numba`` channel:: + + $ conda install -c numba numba + +Berryconda and Numba may work on other Linux-based ARMv7 systems, but this has +not been tested. + + +Installing on Linux ARMv8 (AArch64) Platforms +--------------------------------------------- + +We build and test conda packages on the `NVIDIA Jetson TX2 +`_, +but they are likely to work for other AArch64 platforms. (Note that while the +Raspberry Pi CPU is 64-bit, Raspbian runs it in 32-bit mode, so look at +:ref:`numba-install-armv7` instead.) + +Conda-forge support for AArch64 is still quite experimental and packages are limited, +but it does work enough for Numba to build and pass tests. To set up the environment: + +* Install `miniforge `_. + This will create a minimal conda environment. + +* Then you can install Numba from the ``numba`` channel:: + + $ conda install -c numba numba + +On CUDA-enabled systems, like the Jetson, the CUDA toolkit should be +automatically detected in the environment. + +.. _numba-source-install-instructions: + +Installing from source +---------------------- + +Installing Numba from source is fairly straightforward (similar to other +Python packages), but installing `llvmlite +`_ can be quite challenging due to the need +for a special LLVM build. If you are building from source for the purposes of +Numba development, see :ref:`buildenv` for details on how to create a Numba +development environment with conda. + +If you are building Numba from source for other reasons, first follow the +`llvmlite installation guide `_. +Once that is completed, you can download the latest Numba source code from +`Github `_:: + + $ git clone git://github.com/numba/numba.git + +Source archives of the latest release can also be found on +`PyPI `_. In addition to ``llvmlite``, you will also need: + +* A C compiler compatible with your Python installation. If you are using + Anaconda, you can use the following conda packages: + + * Linux ``x86``: ``gcc_linux-32`` and ``gxx_linux-32`` + * Linux ``x86_64``: ``gcc_linux-64`` and ``gxx_linux-64`` + * Linux ``POWER``: ``gcc_linux-ppc64le`` and ``gxx_linux-ppc64le`` + * Linux ``ARM``: no conda packages, use the system compiler + * Mac OSX: ``clang_osx-64`` and ``clangxx_osx-64`` or the system compiler at + ``/usr/bin/clang`` (Mojave onwards) + * Mac OSX (M1): ``clang_osx-arm64`` and ``clangxx_osx-arm64`` + * Windows: a version of Visual Studio appropriate for the Python version in + use + +* `NumPy `_ + +Then you can build and install Numba from the top level of the source tree:: + + $ python setup.py install + +.. _numba-source-install-env_vars: + +Build time environment variables and configuration of optional components +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Below are environment variables that are applicable to altering how Numba would +otherwise build by default along with information on configuration options. + +.. envvar:: NUMBA_DISABLE_OPENMP (default: not set) + + To disable compilation of the OpenMP threading backend set this environment + variable to a non-empty string when building. If not set (default): + + * For Linux and Windows it is necessary to provide OpenMP C headers and + runtime libraries compatible with the compiler tool chain mentioned above, + and for these to be accessible to the compiler via standard flags. + * For OSX the conda package ``llvm-openmp`` provides suitable C headers and + libraries. If the compilation requirements are not met the OpenMP threading + backend will not be compiled. + +.. envvar:: NUMBA_DISABLE_TBB (default: not set) + + To disable the compilation of the TBB threading backend set this environment + variable to a non-empty string when building. If not set (default) the TBB C + headers and libraries must be available at compile time. If building with + ``conda build`` this requirement can be met by installing the ``tbb-devel`` + package. If not building with ``conda build`` the requirement can be met via a + system installation of TBB or through the use of the ``TBBROOT`` environment + variable to provide the location of the TBB installation. For more + information about setting ``TBBROOT`` see the `Intel documentation `_. + +.. _numba-source-install-check: + +Dependency List +--------------- + +Numba has numerous required and optional dependencies which additionally may +vary with target operating system and hardware. The following lists them all +(as of July 2020). + +* Required build time: + + * ``setuptools`` + * ``numpy`` + * ``llvmlite`` + * Compiler toolchain mentioned above + +* Required run time: + + * ``setuptools`` + * ``numpy`` + * ``llvmlite`` + +* Optional build time: + + See :ref:`numba-source-install-env_vars` for more details about additional + options for the configuration and specification of these optional components. + + * ``llvm-openmp`` (OSX) - provides headers for compiling OpenMP support into + Numba's threading backend + * ``tbb-devel`` - provides TBB headers/libraries for compiling TBB support + into Numba's threading backend (2021 <= version < 2021.6 required). + * ``importlib_metadata`` (for Python versions < 3.9) + +* Optional runtime are: + + * ``scipy`` - provides cython bindings used in Numba's ``np.linalg.*`` + support + * ``tbb`` - provides the TBB runtime libraries used by Numba's TBB threading + backend (version >= 2021 required). + * ``jinja2`` - for "pretty" type annotation output (HTML) via the ``numba`` + CLI + * ``cffi`` - permits use of CFFI bindings in Numba compiled functions + * ``llvm-openmp`` - (OSX) provides OpenMP library support for Numba's OpenMP + threading backend. + * ``intel-openmp`` - (OSX) provides an alternative OpenMP library for use with + Numba's OpenMP threading backend. + * ``ipython`` - if in use, caching will use IPython's cache + directories/caching still works + * ``pyyaml`` - permits the use of a ``.numba_config.yaml`` + file for storing per project configuration options + * ``colorama`` - makes error message highlighting work + * ``icc_rt`` - (numba channel) allows Numba to use Intel SVML for extra + performance + * ``pygments`` - for "pretty" type annotation + * ``gdb`` as an executable on the ``$PATH`` - if you would like to use the gdb + support + * Compiler toolchain mentioned above, if you would like to use ``pycc`` for + Ahead-of-Time (AOT) compilation + * ``r2pipe`` - required for assembly CFG inspection. + * ``radare2`` as an executable on the ``$PATH`` - required for assembly CFG + inspection. `See here `_ for + information on obtaining and installing. + * ``graphviz`` - for some CFG inspection functionality. + * ``pickle5`` - provides Python 3.8 pickling features for faster pickling in + Python 3.7. + * ``typeguard`` - used by ``runtests.py`` for + :ref:`runtime type-checking `. + * ``cuda-python`` - The NVIDIA CUDA Python bindings. See :ref:`cuda-bindings`. + Numba requires Version 11.6 or greater. + +* To build the documentation: + + * ``sphinx`` + * ``pygments`` + * ``sphinx_rtd_theme`` + * ``numpydoc`` + * ``make`` as an executable on the ``$PATH`` + +.. _numba_support_info: + +Version support information +--------------------------- + +This is the canonical reference for information concerning which versions of +Numba's dependencies were tested and known to work against a given version of +Numba. Other versions of the dependencies (especially NumPy) may work reasonably +well but were not tested. The use of ``x`` in a version number indicates all +patch levels supported. The use of ``?`` as a version is due to missing +information. + ++----------++--------------+---------------------------+----------------------------+------------------------------+-------------------+-----------------------------+ +| Numba | Release date | Python | NumPy | llvmlite | LLVM | TBB | ++===========+==============+===========================+============================+==============================+===================+=============================+ +| 0.57.x | TBC | 3.8.x <= version < 3.12 | 1.19 <= version < 1.24 | 0.40.x | 11.x | 2021.x | ++-----------+--------------+---------------------------+----------------------------+------------------------------+-------------------+-----------------------------+ +| 0.56.4 | 2022-11-03 | 3.7.x <= version < 3.11 | 1.18 <= version < 1.24 | 0.39.x | 11.x | 2021.x | ++-----------+--------------+---------------------------+----------------------------+------------------------------+-------------------+-----------------------------+ +| 0.56.3 | 2022-10-13 | 3.7.x <= version < 3.11 | 1.18 <= version < 1.24 | 0.39.x | 11.x | 2021.x | ++-----------+--------------+---------------------------+----------------------------+------------------------------+-------------------+-----------------------------+ +| 0.56.2 | 2022-09-01 | 3.7.x <= version < 3.11 | 1.18 <= version < 1.24 | 0.39.x | 11.x | 2021.x | ++-----------+--------------+---------------------------+----------------------------+------------------------------+-------------------+-----------------------------+ +| 0.56.0 | 2022-07-25 | 3.7.x <= version < 3.11 | 1.18 <= version < 1.23 | 0.39.x | 11.x | 2021.x | ++-----------+--------------+---------------------------+----------------------------+------------------------------+-------------------+-----------------------------+ +| 0.55.2 | 2022-05-25 | 3.7.x <= version < 3.11 | 1.18 <= version < 1.23 | 0.38.x | 11.x | 2021.x | ++-----------+--------------+---------------------------+----------------------------+------------------------------+-------------------+-----------------------------+ +| 0.55.{0,1}| 2022-01-13 | 3.7.x <= version < 3.11 | 1.18 <= version < 1.22 | 0.38.x | 11.x | 2021.x | ++-----------+--------------+---------------------------+----------------------------+------------------------------+-------------------+-----------------------------+ +| 0.54.x | 2021-08-19 | 3.6.x <= version < 3.10 | 1.17 <= version < 1.21 | 0.37.x | 11.x | 2021.x | ++-----------+--------------+---------------------------+----------------------------+------------------------------+-------------------+-----------------------------+ +| 0.53.x | 2021-03-11 | 3.6.x <= version < 3.10 | 1.15 <= version < 1.21 | 0.36.x | 11.x | 2019.5 <= version < 2021.4 | ++-----------+--------------+---------------------------+----------------------------+------------------------------+-------------------+-----------------------------+ +| 0.52.x | 2020-11-30 | 3.6.x <= version < 3.9 | 1.15 <= version < 1.20 | 0.35.x | 10.x | 2019.5 <= version < 2020.3 | +| | | | | | (9.x for aarch64) | | ++-----------+--------------+---------------------------+----------------------------+------------------------------+-------------------+-----------------------------+ +| 0.51.x | 2020-08-12 | 3.6.x <= version < 3.9 | 1.15 <= version < 1.19 | 0.34.x | 10.x | 2019.5 <= version < 2020.0 | +| | | | | | (9.x for aarch64) | | ++-----------+--------------+---------------------------+----------------------------+------------------------------+-------------------+-----------------------------+ +| 0.50.x | 2020-06-10 | 3.6.x <= version < 3.9 | 1.15 <= version < 1.19 | 0.33.x | 9.x | 2019.5 <= version < 2020.0 | ++-----------+--------------+---------------------------+----------------------------+------------------------------+-------------------+-----------------------------+ +| 0.49.x | 2020-04-16 | 3.6.x <= version < 3.9 | 1.15 <= version < 1.18 | 0.31.x <= version < 0.33.x | 9.x | 2019.5 <= version < 2020.0 | ++-----------+--------------+---------------------------+----------------------------+------------------------------+-------------------+-----------------------------+ +| 0.48.x | 2020-01-27 | 3.6.x <= version < 3.9 | 1.15 <= version < 1.18 | 0.31.x | 8.x | 2018.0.5 <= version < ? | +| | | | | | (7.x for ppc64le) | | ++-----------+--------------+---------------------------+----------------------------+------------------------------+-------------------+-----------------------------+ +| 0.47.x | 2020-01-02 | 3.5.x <= version < 3.9; | 1.15 <= version < 1.18 | 0.30.x | 8.x | 2018.0.5 <= version < ? | +| | | version == 2.7.x | | | (7.x for ppc64le) | | ++-----------+--------------+---------------------------+----------------------------+------------------------------+-------------------+-----------------------------+ + +Checking your installation +-------------------------- + +You should be able to import Numba from the Python prompt:: + + $ python + Python 3.10.2 | packaged by conda-forge | (main, Jan 14 2022, 08:02:09) [GCC 9.4.0] on linux + Type "help", "copyright", "credits" or "license" for more information. + >>> import numba + >>> numba.__version__ + '0.55.1' + +You can also try executing the ``numba --sysinfo`` (or ``numba -s`` for short) +command to report information about your system capabilities. See :ref:`cli` for +further information. + +:: + + $ numba -s + System info: + -------------------------------------------------------------------------------- + __Time Stamp__ + Report started (local time) : 2022-01-18 10:35:08.981319 + + __Hardware Information__ + Machine : x86_64 + CPU Name : skylake-avx512 + CPU Count : 12 + CPU Features : + 64bit adx aes avx avx2 avx512bw avx512cd avx512dq avx512f avx512vl bmi bmi2 + clflushopt clwb cmov cx16 cx8 f16c fma fsgsbase fxsr invpcid lzcnt mmx + movbe pclmul pku popcnt prfchw rdrnd rdseed rtm sahf sse sse2 sse3 sse4.1 + sse4.2 ssse3 xsave xsavec xsaveopt xsaves + + __OS Information__ + Platform Name : Linux-5.4.0-94-generic-x86_64-with-glibc2.31 + Platform Release : 5.4.0-94-generic + OS Name : Linux + OS Version : #106-Ubuntu SMP Thu Jan 6 23:58:14 UTC 2022 + + __Python Information__ + Python Compiler : GCC 9.4.0 + Python Implementation : CPython + Python Version : 3.10.2 + Python Locale : en_GB.UTF-8 + + __LLVM information__ + LLVM Version : 11.1.0 + + __CUDA Information__ + Found 1 CUDA devices + id 0 b'Quadro RTX 8000' [SUPPORTED] + Compute Capability: 7.5 + PCI Device ID: 0 + PCI Bus ID: 21 + UUID: GPU-e6489c45-5b68-3b03-bab7-0e7c8e809643 + Watchdog: Enabled + FP32/FP64 Performance Ratio: 32 + +(output truncated due to length) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/jit-module.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/jit-module.rst new file mode 100644 index 000000000..cf2315b6c --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/jit-module.rst @@ -0,0 +1,103 @@ +.. _jit-module: + +============================================ +Automatic module jitting with ``jit_module`` +============================================ + +A common usage pattern is to have an entire module containing user-defined +functions that all need to be jitted. One option to accomplish this is to +manually apply the ``@jit`` decorator to each function definition. This approach +works and is great in many cases. However, for large modules with many functions, +manually ``jit``-wrapping each function definition can be tedious. For these +situations, Numba provides another option, the ``jit_module`` function, to +automatically replace functions declared in a module with their ``jit``-wrapped +equivalents. + +It's important to note the conditions under which ``jit_module`` will *not* +impact a function: + +1. Functions which have already been wrapped with a Numba decorator (e.g. + ``jit``, ``vectorize``, ``cfunc``, etc.) are not impacted by ``jit_module``. + +2. Functions which are declared outside the module from which ``jit_module`` + is called are not automatically ``jit``-wrapped. + +3. Function declarations which occur logically after calling ``jit_module`` + are not impacted. + +All other functions in a module will have the ``@jit`` decorator automatically +applied to them. See the following section for an example use case. + +.. note:: This feature is for use by module authors. ``jit_module`` should not + be called outside the context of a module containing functions to be jitted. + + +Example usage +============= + +Let's assume we have a Python module we've created, ``mymodule.py`` (shown +below), which contains several functions. Some of these functions are defined +in ``mymodule.py`` while others are imported from other modules. We wish to have +all the functions which are defined in ``mymodule.py`` jitted using +``jit_module``. + +.. _jit-module-usage: + +.. code-block:: python + + # mymodule.py + + from numba import jit, jit_module + + def inc(x): + return x + 1 + + def add(x, y): + return x + y + + import numpy as np + # Use NumPy's mean function + mean = np.mean + + @jit(nogil=True) + def mul(a, b): + return a * b + + jit_module(nopython=True, error_model="numpy") + + def div(a, b): + return a / b + +There are several things to note in the above example: + +- Both the ``inc`` and ``add`` functions will be replaced with their + ``jit``-wrapped equivalents with :ref:`compilation options ` + ``nopython=True`` and ``error_model="numpy"``. + +- The ``mean`` function, because it's defined *outside* of ``mymodule.py`` in + NumPy, will not be modified. + +- ``mul`` will not be modified because it has been manually decorated with + ``jit``. + +- ``div`` will not be automatically ``jit``-wrapped because it is declared + after ``jit_module`` is called. + +When the above module is imported, we have: + +.. code-block:: python + + >>> import mymodule + >>> mymodule.inc + CPUDispatcher() + >>> mymodule.mean + + + +API +=== +.. warning:: This feature is experimental. The supported features may change + with or without notice. + +.. autofunction:: numba.jit_module + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/jit.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/jit.rst new file mode 100644 index 000000000..36b4ba985 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/jit.rst @@ -0,0 +1,201 @@ +.. _jit: + +=================================== +Compiling Python code with ``@jit`` +=================================== + +Numba provides several utilities for code generation, but its central +feature is the :func:`numba.jit` decorator. Using this decorator, you can mark +a function for optimization by Numba's JIT compiler. Various invocation +modes trigger differing compilation options and behaviours. + + +Basic usage +=========== + +.. _jit-lazy: + +Lazy compilation +---------------- + +The recommended way to use the ``@jit`` decorator is to let Numba decide +when and how to optimize:: + + from numba import jit + + @jit + def f(x, y): + # A somewhat trivial example + return x + y + +In this mode, compilation will be deferred until the first function +execution. Numba will infer the argument types at call time, and generate +optimized code based on this information. Numba will also be able to +compile separate specializations depending on the input types. For example, +calling the ``f()`` function above with integer or complex numbers will +generate different code paths:: + + >>> f(1, 2) + 3 + >>> f(1j, 2) + (2+1j) + +Eager compilation +----------------- + +You can also tell Numba the function signature you are expecting. The +function ``f()`` would now look like:: + + from numba import jit, int32 + + @jit(int32(int32, int32)) + def f(x, y): + # A somewhat trivial example + return x + y + +``int32(int32, int32)`` is the function's signature. In this case, the +corresponding specialization will be compiled by the ``@jit`` decorator, +and no other specialization will be allowed. This is useful if you want +fine-grained control over types chosen by the compiler (for example, +to use single-precision floats). + +If you omit the return type, e.g. by writing ``(int32, int32)`` instead of +``int32(int32, int32)``, Numba will try to infer it for you. Function +signatures can also be strings, and you can pass several of them as a list; +see the :func:`numba.jit` documentation for more details. + +Of course, the compiled function gives the expected results:: + + >>> f(1,2) + 3 + +and if we specified ``int32`` as return type, the higher-order bits get +discarded:: + + >>> f(2**31, 2**31 + 1) + 1 + + +Calling and inlining other functions +==================================== + +Numba-compiled functions can call other compiled functions. The function +calls may even be inlined in the native code, depending on optimizer +heuristics. For example:: + + @jit + def square(x): + return x ** 2 + + @jit + def hypot(x, y): + return math.sqrt(square(x) + square(y)) + +The ``@jit`` decorator *must* be added to any such library function, +otherwise Numba may generate much slower code. + + +Signature specifications +======================== + +Explicit ``@jit`` signatures can use a number of types. Here are some +common ones: + +* ``void`` is the return type of functions returning nothing (which + actually return :const:`None` when called from Python) +* ``intp`` and ``uintp`` are pointer-sized integers (signed and unsigned, + respectively) +* ``intc`` and ``uintc`` are equivalent to C ``int`` and ``unsigned int`` + integer types +* ``int8``, ``uint8``, ``int16``, ``uint16``, ``int32``, ``uint32``, + ``int64``, ``uint64`` are fixed-width integers of the corresponding bit + width (signed and unsigned) +* ``float32`` and ``float64`` are single- and double-precision floating-point + numbers, respectively +* ``complex64`` and ``complex128`` are single- and double-precision complex + numbers, respectively +* array types can be specified by indexing any numeric type, e.g. ``float32[:]`` + for a one-dimensional single-precision array or ``int8[:,:]`` for a + two-dimensional array of 8-bit integers. + + +.. _jit-options: + +Compilation options +=================== + +A number of keyword-only arguments can be passed to the ``@jit`` decorator. + +.. _jit-nopython: + +``nopython`` +------------ + +Numba has two compilation modes: :term:`nopython mode` and +:term:`object mode`. The former produces much faster code, but has +limitations that can force Numba to fall back to the latter. To prevent +Numba from falling back, and instead raise an error, pass ``nopython=True``. + +:: + + @jit(nopython=True) + def f(x, y): + return x + y + +.. seealso:: :ref:`numba-troubleshooting` + +.. _jit-nogil: + +``nogil`` +--------- + +Whenever Numba optimizes Python code to native code that only works on +native types and variables (rather than Python objects), it is not necessary +anymore to hold Python's :py:term:`global interpreter lock` (GIL). +Numba will release the GIL when entering such a compiled function if you +passed ``nogil=True``. + +:: + + @jit(nogil=True) + def f(x, y): + return x + y + +Code running with the GIL released runs concurrently with other +threads executing Python or Numba code (either the same compiled function, +or another one), allowing you to take advantage of multi-core systems. +This will not be possible if the function is compiled in :term:`object mode`. + +When using ``nogil=True``, you'll have to be wary of the usual pitfalls +of multi-threaded programming (consistency, synchronization, race conditions, +etc.). + +.. _jit-cache: + +``cache`` +--------- + +To avoid compilation times each time you invoke a Python program, +you can instruct Numba to write the result of function compilation into +a file-based cache. This is done by passing ``cache=True``:: + + @jit(cache=True) + def f(x, y): + return x + y + +.. _parallel_jit_option: + +``parallel`` +------------ + +Enables automatic parallelization (and related optimizations) for those +operations in the function known to have parallel semantics. For a list of +supported operations, see :ref:`numba-parallel`. This feature is enabled by +passing ``parallel=True`` and must be used in conjunction with +``nopython=True``:: + + @jit(nopython=True, parallel=True) + def f(x, y): + return x + y + +.. seealso:: :ref:`numba-parallel` diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/jitclass.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/jitclass.rst new file mode 100644 index 000000000..9000bf436 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/jitclass.rst @@ -0,0 +1,270 @@ +.. _jitclass: + +=========================================== +Compiling Python classes with ``@jitclass`` +=========================================== + +.. note:: + + This is a early version of jitclass support. Not all compiling features are + exposed or implemented, yet. + + +Numba supports code generation for classes via the +:func:`numba.experimental.jitclass` decorator. A class can be marked for +optimization using this decorator along with a specification of the types of +each field. We call the resulting class object a *jitclass*. All methods of a +jitclass are compiled into nopython functions. The data of a jitclass instance +is allocated on the heap as a C-compatible structure so that any compiled +functions can have direct access to the underlying data, bypassing the +interpreter. + + +Basic usage +=========== + +Here's an example of a jitclass: + +.. literalinclude:: ../../../numba/tests/doc_examples/test_jitclass.py + :language: python + :start-after: magictoken.ex_jitclass.begin + :end-before: magictoken.ex_jitclass.end + :dedent: 8 + +In the above example, a ``spec`` is provided as a list of 2-tuples. The tuples +contain the name of the field and the Numba type of the field. Alternatively, +user can use a dictionary (an ``OrderedDict`` preferably for stable field +ordering), which maps field names to types. + +The definition of the class requires at least a ``__init__`` method for +initializing each defined fields. Uninitialized fields contains garbage data. +Methods and properties (getters and setters only) can be defined. They will be +automatically compiled. + + +Inferred class member types from type annotations with ``as_numba_type`` +======================================================================== + +Fields of a ``jitclass`` can also be inferred from Python type annotations. + +.. literalinclude:: ../../../numba/tests/doc_examples/test_jitclass.py + :language: python + :start-after: magictoken.ex_jitclass_type_hints.begin + :end-before: magictoken.ex_jitclass_type_hints.end + :dedent: 8 + +Any type annotations on the class will be used to extend the spec if that field +is not already present. The Numba type corresponding to the given Python type +is inferred using ``as_numba_type``. For example, if we have the class + +.. code-block:: python + + @jitclass([("w", int32), ("y", float64[:])]) + class Foo: + w: int + x: float + y: np.ndarray + z: SomeOtherType + + def __init__(self, w: int, x: float, y: np.ndarray, z: SomeOtherType): + ... + +then the full spec used for ``Foo`` will be: + +* ``"w": int32`` (specified in the ``spec``) +* ``"x": float64`` (added from type annotation) +* ``"y": array(float64, 1d, A)`` (specified in the ``spec``) +* ``"z": numba.as_numba_type(SomeOtherType)`` (added from type annotation) + +Here ``SomeOtherType`` could be any supported Python type (e.g. +``bool``, ``typing.Dict[int, typing.Tuple[float, float]]``, or another +``jitclass``). + +Note that only type annotations on the class will be used to infer spec +elements. Method type annotations (e.g. those of ``__init__`` above) are +ignored. + +Numba requires knowing the dtype and rank of NumPy arrays, which cannot +currently be expressed with type annotations. Because of this, NumPy arrays need +to be included in the ``spec`` explicitly. + + +Specifying ``numba.typed`` containers as class members explicitly +================================================================= + +The following patterns demonstrate how to specify a ``numba.typed.Dict`` or +``numba.typed.List`` explicitly as part of the ``spec`` passed to ``jitclass``. + +First, using explicit Numba types and explicit construction. + +.. code-block:: python + + from numba import jitclass, types, typed + + # key and value types + kv_ty = (types.int64, types.unicode_type) + + # A container class with: + # * member 'd' holding a typed dictionary of int64 -> unicode string (kv_ty) + # * member 'l' holding a typed list of float64 + @jitclass([('d', types.DictType(*kv_ty)), + ('l', types.ListType(types.float64))]) + class ContainerHolder(object): + def __init__(self): + # initialize the containers + self.d = typed.Dict.empty(*kv_ty) + self.l = typed.List.empty_list(types.float64) + + container = ContainerHolder() + container.d[1] = "apple" + container.d[2] = "orange" + container.l.append(123.) + container.l.append(456.) + print(container.d) # {1: apple, 2: orange} + print(container.l) # [123.0, 456.0] + +Another useful pattern is to use the ``numba.typed`` container attribute +``_numba_type_`` to find the type of a container, this can be accessed directly +from an instance of the container in the Python interpreter. The same +information can be obtained by calling :func:`numba.typeof` on the instance. For +example: + +.. code-block:: python + + from numba import jitclass, typed, typeof + + d = typed.Dict() + d[1] = "apple" + d[2] = "orange" + l = typed.List() + l.append(123.) + l.append(456.) + + + @jitclass([('d', typeof(d)), ('l', typeof(l))]) + class ContainerInstHolder(object): + def __init__(self, dict_inst, list_inst): + self.d = dict_inst + self.l = list_inst + + container = ContainerInstHolder(d, l) + print(container.d) # {1: apple, 2: orange} + print(container.l) # [123.0, 456.0] + +It is worth noting that the instance of the container in a ``jitclass`` must be +initialized before use, for example, this will cause an invalid memory access +as ``self.d`` is written to without ``d`` being initialized as a ``type.Dict`` +instance of the type specified. + +.. code-block:: python + + from numba import jitclass, types + + dict_ty = types.DictType(types.int64, types.unicode_type) + + @jitclass([('d', dict_ty)]) + class NotInitialisingContainer(object): + def __init__(self): + self.d[10] = "apple" # this is invalid, `d` is not initialized + + NotInitialisingContainer() # segmentation fault/memory access violation + + +Support operations +================== + +The following operations of jitclasses work in both the interpreter and Numba +compiled functions: + +* calling the jitclass class object to construct a new instance + (e.g. ``mybag = Bag(123)``); +* read/write access to attributes and properties (e.g. ``mybag.value``); +* calling methods (e.g. ``mybag.increment(3)``); +* calling static methods as instance attributes (e.g. ``mybag.add(1, 1)``); +* calling static methods as class attributes (e.g. ``Bag.add(1, 2)``); +* using select dunder methods (e.g. ``__add__`` with ``mybag + otherbag``); + +Using jitclasses in Numba compiled function is more efficient. +Short methods can be inlined (at the discretion of LLVM inliner). +Attributes access are simply reading from a C structure. +Using jitclasses from the interpreter has the same overhead of calling any +Numba compiled function from the interpreter. Arguments and return values +must be unboxed or boxed between Python objects and native representation. +Values encapsulated by a jitclass does not get boxed into Python object when +the jitclass instance is handed to the interpreter. It is during attribute +access to the field values that they are boxed. +Calling static methods as class attributes is only supported outside of the +class definition (i.e. code cannot call ``Bag.add()`` from within another method +of ``Bag``). + + +Supported dunder methods +------------------------ + +The following dunder methods may be defined for jitclasses: + +* ``__abs__`` +* ``__bool__`` +* ``__complex__`` +* ``__contains__`` +* ``__float__`` +* ``__getitem__`` +* ``__hash__`` +* ``__index__`` +* ``__int__`` +* ``__len__`` +* ``__setitem__`` +* ``__str__`` +* ``__eq__`` +* ``__ne__`` +* ``__ge__`` +* ``__gt__`` +* ``__le__`` +* ``__lt__`` +* ``__add__`` +* ``__floordiv__`` +* ``__lshift__`` +* ``__mod__`` +* ``__mul__`` +* ``__neg__`` +* ``__pos__`` +* ``__pow__`` +* ``__rshift__`` +* ``__sub__`` +* ``__truediv__`` +* ``__and__`` +* ``__or__`` +* ``__xor__`` +* ``__iadd__`` +* ``__ifloordiv__`` +* ``__ilshift__`` +* ``__imod__`` +* ``__imul__`` +* ``__ipow__`` +* ``__irshift__`` +* ``__isub__`` +* ``__itruediv__`` +* ``__iand__`` +* ``__ior__`` +* ``__ixor__`` + +Refer to the `Python Data Model documentation +`_ for descriptions of +these methods. + + +Limitations +=========== + +* A jitclass class object is treated as a function (the constructor) inside + a Numba compiled function. +* ``isinstance()`` only works in the interpreter. +* Manipulating jitclass instances in the interpreter is not optimized, yet. +* Support for jitclasses are available on CPU only. + (Note: Support for GPU devices is planned for a future release.) + + +The decorator: ``@jitclass`` +============================ + +.. autofunction:: numba.experimental.jitclass diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/overview.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/overview.rst new file mode 100644 index 000000000..9b11b5a49 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/overview.rst @@ -0,0 +1,34 @@ + +Overview +======== + +Numba is a compiler for Python array and numerical functions that gives +you the power to speed up your applications with high performance +functions written directly in Python. + +Numba generates optimized machine code from pure Python code using +the `LLVM compiler infrastructure `_. With a few simple +annotations, array-oriented and math-heavy Python code can be +just-in-time optimized to performance similar as C, C++ and Fortran, without +having to switch languages or Python interpreters. + +Numba's main features are: + +* :ref:`on-the-fly code generation ` (at import time or runtime, at the + user's preference) +* native code generation for the CPU (default) and + :doc:`GPU hardware <../cuda/index>` +* integration with the Python scientific software stack (thanks to Numpy) + +Here is how a Numba-optimized function, taking a Numpy array as argument, +might look like:: + + @numba.jit + def sum2d(arr): + M, N = arr.shape + result = 0.0 + for i in range(M): + for j in range(N): + result += arr[i,j] + return result + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/parallel.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/parallel.rst new file mode 100644 index 000000000..3f09a0bc9 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/parallel.rst @@ -0,0 +1,719 @@ +.. Copyright (c) 2017 Intel Corporation + SPDX-License-Identifier: BSD-2-Clause + +.. _numba-parallel: + +======================================= +Automatic parallelization with ``@jit`` +======================================= + +Setting the :ref:`parallel_jit_option` option for :func:`~numba.jit` enables +a Numba transformation pass that attempts to automatically parallelize and +perform other optimizations on (part of) a function. At the moment, this +feature only works on CPUs. + +Some operations inside a user defined function, e.g. adding a scalar value to +an array, are known to have parallel semantics. A user program may contain +many such operations and while each operation could be parallelized +individually, such an approach often has lackluster performance due to poor +cache behavior. Instead, with auto-parallelization, Numba attempts to +identify such operations in a user program, and fuse adjacent ones together, +to form one or more kernels that are automatically run in parallel. +The process is fully automated without modifications to the user program, +which is in contrast to Numba's :func:`~numba.vectorize` or +:func:`~numba.guvectorize` mechanism, where manual effort is required +to create parallel kernels. + +.. _numba-parallel-supported: + +Supported Operations +==================== + +In this section, we give a list of all the array operations that have +parallel semantics and for which we attempt to parallelize. + +#. All numba array operations that are supported by :ref:`case-study-array-expressions`, + which include common arithmetic functions between Numpy arrays, and between + arrays and scalars, as well as Numpy ufuncs. They are often called + `element-wise` or `point-wise` array operations: + + * unary operators: ``+`` ``-`` ``~`` + * binary operators: ``+`` ``-`` ``*`` ``/`` ``/?`` ``%`` ``|`` ``>>`` ``^`` ``<<`` ``&`` ``**`` ``//`` + * comparison operators: ``==`` ``!=`` ``<`` ``<=`` ``>`` ``>=`` + * :ref:`Numpy ufuncs ` that are supported in :term:`nopython mode`. + * User defined :class:`~numba.DUFunc` through :func:`~numba.vectorize`. + +#. Numpy reduction functions ``sum``, ``prod``, ``min``, ``max``, ``argmin``, + and ``argmax``. Also, array math functions ``mean``, ``var``, and ``std``. + +#. Numpy array creation functions ``zeros``, ``ones``, ``arange``, ``linspace``, + and several random functions (rand, randn, ranf, random_sample, sample, + random, standard_normal, chisquare, weibull, power, geometric, exponential, + poisson, rayleigh, normal, uniform, beta, binomial, f, gamma, lognormal, + laplace, randint, triangular). + +#. Numpy ``dot`` function between a matrix and a vector, or two vectors. + In all other cases, Numba's default implementation is used. + +#. Multi-dimensional arrays are also supported for the above operations + when operands have matching dimension and size. The full semantics of + Numpy broadcast between arrays with mixed dimensionality or size is + not supported, nor is the reduction across a selected dimension. + +#. Array assignment in which the target is an array selection using a slice + or a boolean array, and the value being assigned is either a scalar or + another selection where the slice range or bitarray are inferred to be + compatible. + +#. The ``reduce`` operator of ``functools`` is supported for specifying parallel + reductions on 1D Numpy arrays but the initial value argument is mandatory. + +.. _numba-prange: + +Explicit Parallel Loops +======================== + +Another feature of the code transformation pass (when ``parallel=True``) is +support for explicit parallel loops. One can use Numba's ``prange`` instead of +``range`` to specify that a loop can be parallelized. The user is required to +make sure that the loop does not have cross iteration dependencies except for +supported reductions. + +A reduction is inferred automatically if a variable is updated by a supported binary +function/operator using its previous value in the loop body. The following +functions/operators are supported: ``+=``, ``+``, ``-=``, ``-``, ``*=``, +``*``, ``/=``, ``/``, ``max()``, ``min()``. +The initial value of the reduction is inferred automatically for the +supported operators (i.e., not the ``max`` and ``min`` functions). +Note that the ``//=`` operator is not supported because +in the general case the result depends on the order in which the divisors are +applied. However, if all divisors are integers then the programmer may be +able to rewrite the ``//=`` reduction as a ``*=`` reduction followed by +a single floor division after the parallel region where the divisor is the +accumulated product. +For the ``max`` and ``min`` functions, the reduction variable should hold the identity +value right before entering the ``prange`` loop. Reductions in this manner +are supported for scalars and for arrays of arbitrary dimensions. + +The example below demonstrates a parallel loop with a +reduction (``A`` is a one-dimensional Numpy array):: + + from numba import njit, prange + + @njit(parallel=True) + def prange_test(A): + s = 0 + # Without "parallel=True" in the jit-decorator + # the prange statement is equivalent to range + for i in prange(A.shape[0]): + s += A[i] + return s + +The following example demonstrates a product reduction on a two-dimensional array:: + + from numba import njit, prange + import numpy as np + + @njit(parallel=True) + def two_d_array_reduction_prod(n): + shp = (13, 17) + result1 = 2 * np.ones(shp, np.int_) + tmp = 2 * np.ones_like(result1) + + for i in prange(n): + result1 *= tmp + + return result1 + +.. note:: When using Python's ``range`` to induce a loop, Numba types the + induction variable as a signed integer. This is also the case for + Numba's ``prange`` when ``parallel=False``. However, for + ``parallel=True``, if the range is identifiable as strictly positive, + the type of the induction variable will be ``uint64``. The impact of + a ``uint64`` induction variable is often most noticable when + undertaking operations involving it and a signed integer. Under + Numba's type coercion rules, such a case will commonly result in the + operation producing a floating point result type. + + +Care should be taken, however, when reducing into slices or elements of an array +if the elements specified by the slice or index are written to simultaneously by +multiple parallel threads. The compiler may not detect such cases and then a race condition +would occur. + +The following example demonstrates such a case where a race condition in the execution of the +parallel for-loop results in an incorrect return value:: + + from numba import njit, prange + import numpy as np + + @njit(parallel=True) + def prange_wrong_result(x): + n = x.shape[0] + y = np.zeros(4) + for i in prange(n): + # accumulating into the same element of `y` from different + # parallel iterations of the loop results in a race condition + y[:] += x[i] + + return y + +as does the following example where the accumulating element is explicitly specified:: + + from numba import njit, prange + import numpy as np + + @njit(parallel=True) + def prange_wrong_result(x): + n = x.shape[0] + y = np.zeros(4) + for i in prange(n): + # accumulating into the same element of `y` from different + # parallel iterations of the loop results in a race condition + y[i % 4] += x[i] + + return y + +whereas performing a whole array reduction is fine:: + + from numba import njit, prange + import numpy as np + + @njit(parallel=True) + def prange_ok_result_whole_arr(x): + n = x.shape[0] + y = np.zeros(4) + for i in prange(n): + y += x[i] + return y + +as is creating a slice reference outside of the parallel reduction loop:: + + from numba import njit, prange + import numpy as np + + @njit(parallel=True) + def prange_ok_result_outer_slice(x): + n = x.shape[0] + y = np.zeros(4) + z = y[:] + for i in prange(n): + z += x[i] + return y + +Examples +======== + +In this section, we give an example of how this feature helps +parallelize Logistic Regression:: + + @numba.jit(nopython=True, parallel=True) + def logistic_regression(Y, X, w, iterations): + for i in range(iterations): + w -= np.dot(((1.0 / (1.0 + np.exp(-Y * np.dot(X, w))) - 1.0) * Y), X) + return w + +We will not discuss details of the algorithm, but instead focus on how +this program behaves with auto-parallelization: + +1. Input ``Y`` is a vector of size ``N``, ``X`` is an ``N x D`` matrix, + and ``w`` is a vector of size ``D``. + +2. The function body is an iterative loop that updates variable ``w``. + The loop body consists of a sequence of vector and matrix operations. + +3. The inner ``dot`` operation produces a vector of size ``N``, followed by a + sequence of arithmetic operations either between a scalar and vector of + size ``N``, or two vectors both of size ``N``. + +4. The outer ``dot`` produces a vector of size ``D``, followed by an inplace + array subtraction on variable ``w``. + +5. With auto-parallelization, all operations that produce array of size + ``N`` are fused together to become a single parallel kernel. This includes + the inner ``dot`` operation and all point-wise array operations following it. + +6. The outer ``dot`` operation produces a result array of different dimension, + and is not fused with the above kernel. + +Here, the only thing required to take advantage of parallel hardware is to set +the :ref:`parallel_jit_option` option for :func:`~numba.jit`, with no +modifications to the ``logistic_regression`` function itself. If we were to +give an equivalence parallel implementation using :func:`~numba.guvectorize`, +it would require a pervasive change that rewrites the code to extract kernel +computation that can be parallelized, which was both tedious and challenging. + +Unsupported Operations +====================== + +This section contains a non-exhaustive list of commonly encountered but +currently unsupported features: + +#. **Mutating a list is not threadsafe** + + Concurrent write operations on container types (i.e. lists, sets and + dictionaries) in a ``prange`` parallel region are not threadsafe e.g.:: + + @njit(parallel=True) + def invalid(): + z = [] + for i in prange(10000): + z.append(i) + return z + + It is highly likely that the above will result in corruption or an access + violation as containers require thread-safety under mutation but this feature + is not implemented. + +#. **Induction variables are not associated with thread ID** + + The use of the induction variable induced by a ``prange`` based loop in + conjunction with ``get_num_threads`` as a method of ensuring safe writes into + a pre-sized container is not valid e.g.:: + + @njit(parallel=True) + def invalid(): + n = get_num_threads() + z = [0 for _ in range(n)] + for i in prange(100): + z[i % n] += i + return z + + The above can on occasion appear to work, but it does so by luck. There's no + guarantee about which indexes are assigned to which executing threads or the + order in which the loop iterations execute. + +.. _numba-parallel-diagnostics: + +Diagnostics +=========== + +.. note:: At present not all parallel transforms and functions can be tracked + through the code generation process. Occasionally diagnostics about + some loops or transforms may be missing. + +The :ref:`parallel_jit_option` option for :func:`~numba.jit` can produce +diagnostic information about the transforms undertaken in automatically +parallelizing the decorated code. This information can be accessed in two ways, +the first is by setting the environment variable +:envvar:`NUMBA_PARALLEL_DIAGNOSTICS`, the second is by calling +:meth:`~Dispatcher.parallel_diagnostics`, both methods give the same information +and print to ``STDOUT``. The level of verbosity in the diagnostic information is +controlled by an integer argument of value between 1 and 4 inclusive, 1 being +the least verbose and 4 the most. For example:: + + @njit(parallel=True) + def test(x): + n = x.shape[0] + a = np.sin(x) + b = np.cos(a * a) + acc = 0 + for i in prange(n - 2): + for j in prange(n - 1): + acc += b[i] + b[j + 1] + return acc + + test(np.arange(10)) + + test.parallel_diagnostics(level=4) + +produces:: + + ================================================================================ + ======= Parallel Accelerator Optimizing: Function test, example.py (4) ======= + ================================================================================ + + + Parallel loop listing for Function test, example.py (4) + --------------------------------------|loop #ID + @njit(parallel=True) | + def test(x): | + n = x.shape[0] | + a = np.sin(x)---------------------| #0 + b = np.cos(a * a)-----------------| #1 + acc = 0 | + for i in prange(n - 2):-----------| #3 + for j in prange(n - 1):-------| #2 + acc += b[i] + b[j + 1] | + return acc | + --------------------------------- Fusing loops --------------------------------- + Attempting fusion of parallel loops (combines loops with similar properties)... + Trying to fuse loops #0 and #1: + - fusion succeeded: parallel for-loop #1 is fused into for-loop #0. + Trying to fuse loops #0 and #3: + - fusion failed: loop dimension mismatched in axis 0. slice(0, x_size0.1, 1) + != slice(0, $40.4, 1) + ----------------------------- Before Optimization ------------------------------ + Parallel region 0: + +--0 (parallel) + +--1 (parallel) + + + Parallel region 1: + +--3 (parallel) + +--2 (parallel) + + + -------------------------------------------------------------------------------- + ------------------------------ After Optimization ------------------------------ + Parallel region 0: + +--0 (parallel, fused with loop(s): 1) + + + Parallel region 1: + +--3 (parallel) + +--2 (serial) + + + + Parallel region 0 (loop #0) had 1 loop(s) fused. + + Parallel region 1 (loop #3) had 0 loop(s) fused and 1 loop(s) serialized as part + of the larger parallel loop (#3). + -------------------------------------------------------------------------------- + -------------------------------------------------------------------------------- + + ---------------------------Loop invariant code motion--------------------------- + + Instruction hoisting: + loop #0: + Failed to hoist the following: + dependency: $arg_out_var.10 = getitem(value=x, index=$parfor__index_5.99) + dependency: $0.6.11 = getattr(value=$0.5, attr=sin) + dependency: $expr_out_var.9 = call $0.6.11($arg_out_var.10, func=$0.6.11, args=[Var($arg_out_var.10, example.py (7))], kws=(), vararg=None) + dependency: $arg_out_var.17 = $expr_out_var.9 * $expr_out_var.9 + dependency: $0.10.20 = getattr(value=$0.9, attr=cos) + dependency: $expr_out_var.16 = call $0.10.20($arg_out_var.17, func=$0.10.20, args=[Var($arg_out_var.17, example.py (8))], kws=(), vararg=None) + loop #3: + Has the following hoisted: + $const58.3 = const(int, 1) + $58.4 = _n_23 - $const58.3 + -------------------------------------------------------------------------------- + + + +To aid users unfamiliar with the transforms undertaken when the +:ref:`parallel_jit_option` option is used, and to assist in the understanding of +the subsequent sections, the following definitions are provided: + +* Loop fusion + `Loop fusion `_ is a + technique whereby loops with equivalent bounds may be combined under certain + conditions to produce a loop with a larger body (aiming to improve data + locality). + +* Loop serialization + Loop serialization occurs when any number of ``prange`` driven loops are + present inside another ``prange`` driven loop. In this case the outermost + of all the ``prange`` loops executes in parallel and any inner ``prange`` + loops (nested or otherwise) are treated as standard ``range`` based loops. + Essentially, nested parallelism does not occur. + +* Loop invariant code motion + `Loop invariant code motion + `_ is an + optimization technique that analyses a loop to look for statements that can + be moved outside the loop body without changing the result of executing the + loop, these statements are then "hoisted" out of the loop to save repeated + computation. + +* Allocation hoisting + Allocation hoisting is a specialized case of loop invariant code motion that + is possible due to the design of some common NumPy allocation methods. + Explanation of this technique is best driven by an example: + + .. code-block:: python + + @njit(parallel=True) + def test(n): + for i in prange(n): + temp = np.zeros((50, 50)) # <--- Allocate a temporary array with np.zeros() + for j in range(50): + temp[j, j] = i + + # ...do something with temp + + internally, this is transformed to approximately the following: + + .. code-block:: python + + @njit(parallel=True) + def test(n): + for i in prange(n): + temp = np.empty((50, 50)) # <--- np.zeros() is rewritten as np.empty() + temp[:] = 0 # <--- and then a zero initialisation + for j in range(50): + temp[j, j] = i + + # ...do something with temp + + then after hoisting: + + .. code-block:: python + + @njit(parallel=True) + def test(n): + temp = np.empty((50, 50)) # <--- allocation is hoisted as a loop invariant as `np.empty` is considered pure + for i in prange(n): + temp[:] = 0 # <--- this remains as assignment is a side effect + for j in range(50): + temp[j, j] = i + + # ...do something with temp + + it can be seen that the ``np.zeros`` allocation is split into an allocation + and an assignment, and then the allocation is hoisted out of the loop in + ``i``, this producing more efficient code as the allocation only occurs + once. + +The parallel diagnostics report sections +---------------------------------------- + +The report is split into the following sections: + +#. Code annotation + This is the first section and contains the source code of the decorated + function with loops that have parallel semantics identified and enumerated. + The ``loop #ID`` column on the right of the source code lines up with + identified parallel loops. From the example, ``#0`` is ``np.sin``, ``#1`` + is ``np.cos`` and ``#2`` and ``#3`` are ``prange()``: + + .. code-block:: python + + Parallel loop listing for Function test, example.py (4) + --------------------------------------|loop #ID + @njit(parallel=True) | + def test(x): | + n = x.shape[0] | + a = np.sin(x)---------------------| #0 + b = np.cos(a * a)-----------------| #1 + acc = 0 | + for i in prange(n - 2):-----------| #3 + for j in prange(n - 1):-------| #2 + acc += b[i] + b[j + 1] | + return acc | + + It is worth noting that the loop IDs are enumerated in the order they are + discovered which is not necessarily the same order as present in the source. + Further, it should also be noted that the parallel transforms use a static + counter for loop ID indexing. As a consequence it is possible for the loop + ID index to not start at 0 due to use of the same counter for internal + optimizations/transforms taking place that are invisible to the user. + +#. Fusing loops + This section describes the attempts made at fusing discovered + loops noting which succeeded and which failed. In the case of failure to + fuse a reason is given (e.g. dependency on other data). From the example: + + .. code-block:: text + + --------------------------------- Fusing loops --------------------------------- + Attempting fusion of parallel loops (combines loops with similar properties)... + Trying to fuse loops #0 and #1: + - fusion succeeded: parallel for-loop #1 is fused into for-loop #0. + Trying to fuse loops #0 and #3: + - fusion failed: loop dimension mismatched in axis 0. slice(0, x_size0.1, 1) + != slice(0, $40.4, 1) + + It can be seen that fusion of loops ``#0`` and ``#1`` was attempted and this + succeeded (both are based on the same dimensions of ``x``). Following the + successful fusion of ``#0`` and ``#1``, fusion was attempted between ``#0`` + (now including the fused ``#1`` loop) and ``#3``. This fusion failed because + there is a loop dimension mismatch, ``#0`` is size ``x.shape`` whereas + ``#3`` is size ``x.shape[0] - 2``. + +#. Before Optimization + This section shows the structure of the parallel regions in the code before + any optimization has taken place, but with loops associated with their final + parallel region (this is to make before/after optimization output directly + comparable). Multiple parallel regions may exist if there are loops which + cannot be fused, in this case code within each region will execute in + parallel, but each parallel region will run sequentially. From the example: + + .. code-block:: text + + Parallel region 0: + +--0 (parallel) + +--1 (parallel) + + + Parallel region 1: + +--3 (parallel) + +--2 (parallel) + + As alluded to by the `Fusing loops` section, there are necessarily two + parallel regions in the code. The first contains loops ``#0`` and ``#1``, + the second contains ``#3`` and ``#2``, all loops are marked ``parallel`` as + no optimization has taken place yet. + +#. After Optimization + This section shows the structure of the parallel regions in the code after + optimization has taken place. Again, parallel regions are enumerated with + their corresponding loops but this time loops which are fused or serialized + are noted and a summary is presented. From the example: + + .. code-block:: text + + Parallel region 0: + +--0 (parallel, fused with loop(s): 1) + + + Parallel region 1: + +--3 (parallel) + +--2 (serial) + + Parallel region 0 (loop #0) had 1 loop(s) fused. + + Parallel region 1 (loop #3) had 0 loop(s) fused and 1 loop(s) serialized as part + of the larger parallel loop (#3). + + + It can be noted that parallel region 0 contains loop ``#0`` and, as seen in + the `fusing loops` section, loop ``#1`` is fused into loop ``#0``. It can + also be noted that parallel region 1 contains loop ``#3`` and that loop + ``#2`` (the inner ``prange()``) has been serialized for execution in the + body of loop ``#3``. + +#. Loop invariant code motion + This section shows for each loop, after optimization has occurred: + + * the instructions that failed to be hoisted and the reason for failure + (dependency/impure). + * the instructions that were hoisted. + * any allocation hoisting that may have occurred. + + From the example: + + .. code-block:: text + + Instruction hoisting: + loop #0: + Failed to hoist the following: + dependency: $arg_out_var.10 = getitem(value=x, index=$parfor__index_5.99) + dependency: $0.6.11 = getattr(value=$0.5, attr=sin) + dependency: $expr_out_var.9 = call $0.6.11($arg_out_var.10, func=$0.6.11, args=[Var($arg_out_var.10, example.py (7))], kws=(), vararg=None) + dependency: $arg_out_var.17 = $expr_out_var.9 * $expr_out_var.9 + dependency: $0.10.20 = getattr(value=$0.9, attr=cos) + dependency: $expr_out_var.16 = call $0.10.20($arg_out_var.17, func=$0.10.20, args=[Var($arg_out_var.17, example.py (8))], kws=(), vararg=None) + loop #3: + Has the following hoisted: + $const58.3 = const(int, 1) + $58.4 = _n_23 - $const58.3 + + The first thing to note is that this information is for advanced users as it + refers to the :term:`Numba IR` of the function being transformed. As an + example, the expression ``a * a`` in the example source partly translates to + the expression ``$arg_out_var.17 = $expr_out_var.9 * $expr_out_var.9`` in + the IR, this clearly cannot be hoisted out of ``loop #0`` because it is not + loop invariant! Whereas in ``loop #3``, the expression + ``$const58.3 = const(int, 1)`` comes from the source ``b[j + 1]``, the + number ``1`` is clearly a constant and so can be hoisted out of the loop. + +.. _numba-parallel-scheduling: + +Scheduling +========== + +By default, Numba divides the iterations of a parallel region into approximately equal +sized chunks and gives one such chunk to each configured thread. +(See :ref:`setting_the_number_of_threads`). +This scheduling approach is equivalent to OpenMP's static schedule with no specified +chunk size and is appropriate when the work required for each iteration is nearly constant. +Conversely, if the work required per iteration, as shown in the ``prange`` loop below, +varies significantly then this static +scheduling approach can lead to load imbalances and longer execution times. + +.. literalinclude:: ../../../numba/tests/doc_examples/test_parallel_chunksize.py + :language: python + :caption: from ``test_unbalanced_example`` of ``numba/tests/doc_examples/test_parallel_chunksize.py`` + :start-after: magictoken.ex_unbalanced.begin + :end-before: magictoken.ex_unbalanced.end + :dedent: 12 + :linenos: + +In such cases, +Numba provides a mechanism to control how many iterations of a parallel region +(i.e., the chunk size) go into each chunk. +Numba then computes the number of required chunks which is +equal to the number of iterations divided by the chunk size, truncated to the nearest +integer. All of these chunks are then approximately, equally sized. +Numba then gives one such chunk to each configured +thread as above and when a thread finishes a chunk, Numba gives that thread the next +available chunk. +This scheduling approach is similar to OpenMP's dynamic scheduling +option with the specified chunk size. +(Note that Numba is only capable of supporting this dynamic scheduling +of parallel regions if the underlying Numba threading backend, +:ref:`numba-threading-layer`, is also capable of dynamic scheduling. +At the moment, only the ``tbb`` backend is capable of dynamic +scheduling and so is required if any performance +benefit is to be achieved from this chunk size selection mechanism.) +To minimize execution time, the programmer must +pick a chunk size that strikes a balance between greater load balancing with smaller +chunk sizes and less scheduling overhead with larger chunk sizes. +See :ref:`chunk-details-label` for additional details on the internal implementation +of chunk sizes. + +The number of iterations of a parallel region in a chunk is stored as a thread-local +variable and can be set using +:func:`numba.set_parallel_chunksize`. This function takes one integer parameter +whose value must be greater than +or equal to 0. A value of 0 is the default value and instructs Numba to use the +static scheduling approach above. Values greater than 0 instruct Numba to use that value +as the chunk size in the dynamic scheduling approach described above. +:func:`numba.set_parallel_chunksize` returns the previous value of the chunk size. +The current value of this thread local variable is used as the chunk size for all +subsequent parallel regions invoked by this thread. +However, upon entering a parallel region, Numba sets the chunk size thread local variable +for each of the threads executing that parallel region back to the default of 0, +since it is unlikely +that any nested parallel regions would require the same chunk size. If the same thread is +used to execute a sequential and parallel region then that thread's chunk size +variable is set to 0 at the beginning of the parallel region and restored to +its original value upon exiting the parallel region. +This behavior is demonstrated in ``func1`` in the example below in that the +reported chunk size inside the ``prange`` parallel region is 0 but is 4 outside +the parallel region. Note that if the ``prange`` is not executed in parallel for +any reason (e.g., setting ``parallel=False``) then the chunk size reported inside +the non-parallel prange would be reported as 4. +This behavior may initially be counterintuitive to programmers as it differs from +how thread local variables typically behave in other languages. +A programmer may use +the chunk size API described in this section within the threads executing a parallel +region if the programmer wishes to specify a chunk size for any nested parallel regions +that may be launched. +The current value of the parallel chunk size can be obtained by calling +:func:`numba.get_parallel_chunksize`. +Both of these functions can be used from standard Python and from within Numba JIT compiled functions +as shown below. Both invocations of ``func1`` would be executed with a chunk size of 4 whereas +``func2`` would use a chunk size of 8. + +.. literalinclude:: ../../../numba/tests/doc_examples/test_parallel_chunksize.py + :language: python + :caption: from ``test_chunksize_manual`` of ``numba/tests/doc_examples/test_parallel_chunksize.py`` + :start-after: magictoken.ex_chunksize_manual.begin + :end-before: magictoken.ex_chunksize_manual.end + :dedent: 12 + :linenos: + +Since this idiom of saving and restoring is so common, Numba provides the +:func:`parallel_chunksize` with clause context-manager to simplify the idiom. +As shown below, this with clause can be invoked from both standard Python and +within Numba JIT compiled functions. As with other Numba context-managers, be +aware that the raising of exceptions is not supported from within a context managed +block that is part of a Numba JIT compiled function. + +.. literalinclude:: ../../../numba/tests/doc_examples/test_parallel_chunksize.py + :language: python + :caption: from ``test_chunksize_with`` of ``numba/tests/doc_examples/test_parallel_chunksize.py`` + :start-after: magictoken.ex_chunksize_with.begin + :end-before: magictoken.ex_chunksize_with.end + :dedent: 12 + :linenos: + +Note that these functions to set the chunk size only have an effect on +Numba automatic parallelization with the :ref:`parallel_jit_option` option. +Chunk size specification has no effect on the :func:`~numba.vectorize` decorator +or the :func:`~numba.guvectorize` decorator. + +.. seealso:: :ref:`parallel_jit_option`, :ref:`Parallel FAQs ` diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/performance-tips.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/performance-tips.rst new file mode 100644 index 000000000..688071dec --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/performance-tips.rst @@ -0,0 +1,248 @@ +.. _performance-tips: + +Performance Tips +================ + +This is a short guide to features present in Numba that can help with obtaining +the best performance from code. Two examples are used, both are entirely +contrived and exist purely for pedagogical reasons to motivate discussion. +The first is the computation of the trigonometric identity +``cos(x)^2 + sin(x)^2``, the second is a simple element wise square root of a +vector with reduction over summation. All performance numbers are indicative +only and unless otherwise stated were taken from running on an Intel ``i7-4790`` +CPU (4 hardware threads) with an input of ``np.arange(1.e7)``. + +.. note:: + A reasonably effective approach to achieving high performance code is to + profile the code running with real data and use that to guide performance + tuning. The information presented here is to demonstrate features, not to act + as canonical guidance! + +No Python mode vs Object mode +----------------------------- + +A common pattern is to decorate functions with ``@jit`` as this is the most +flexible decorator offered by Numba. ``@jit`` essentially encompasses two modes +of compilation, first it will try and compile the decorated function in no +Python mode, if this fails it will try again to compile the function using +object mode. Whilst the use of looplifting in object mode can enable some +performance increase, getting functions to compile under no python mode is +really the key to good performance. To make it such that only no python mode is +used and if compilation fails an exception is raised the decorators ``@njit`` +and ``@jit(nopython=True)`` can be used (the first is an alias of the +second for convenience). + +Loops +----- +Whilst NumPy has developed a strong idiom around the use of vector operations, +Numba is perfectly happy with loops too. For users familiar with C or Fortran, +writing Python in this style will work fine in Numba (after all, LLVM gets a +lot of use in compiling C lineage languages). For example:: + + @njit + def ident_np(x): + return np.cos(x) ** 2 + np.sin(x) ** 2 + + @njit + def ident_loops(x): + r = np.empty_like(x) + n = len(x) + for i in range(n): + r[i] = np.cos(x[i]) ** 2 + np.sin(x[i]) ** 2 + return r + +The above run at almost identical speeds when decorated with ``@njit``, without +the decorator the vectorized function is a couple of orders of magnitude faster. + ++-----------------+-------+----------------+ +| Function Name | @njit | Execution time | ++=================+=======+================+ +| ``ident_np`` | No | 0.581s | ++-----------------+-------+----------------+ +| ``ident_np`` | Yes | 0.659s | ++-----------------+-------+----------------+ +| ``ident_loops`` | No | 25.2s | ++-----------------+-------+----------------+ +| ``ident_loops`` | Yes | 0.670s | ++-----------------+-------+----------------+ + +.. _fast-math: + +Fastmath +-------- +In certain classes of applications strict IEEE 754 compliance is less +important. As a result it is possible to relax some numerical rigour with +view of gaining additional performance. The way to achieve this behaviour in +Numba is through the use of the ``fastmath`` keyword argument:: + + @njit(fastmath=False) + def do_sum(A): + acc = 0. + # without fastmath, this loop must accumulate in strict order + for x in A: + acc += np.sqrt(x) + return acc + + @njit(fastmath=True) + def do_sum_fast(A): + acc = 0. + # with fastmath, the reduction can be vectorized as floating point + # reassociation is permitted. + for x in A: + acc += np.sqrt(x) + return acc + + ++-----------------+-----------------+ +| Function Name | Execution time | ++=================+=================+ +| ``do_sum`` | 35.2 ms | ++-----------------+-----------------+ +| ``do_sum_fast`` | 17.8 ms | ++-----------------+-----------------+ + +In some cases you may wish to opt-in to only a subset of possible fast-math +optimizations. This can be done by supplying a set of `LLVM fast-math flags +`_ to ``fastmath``.:: + + def add_assoc(x, y): + return (x - y) + y + + print(njit(fastmath=False)(add_assoc)(0, np.inf)) # nan + print(njit(fastmath=True) (add_assoc)(0, np.inf)) # 0.0 + print(njit(fastmath={'reassoc', 'nsz'})(add_assoc)(0, np.inf)) # 0.0 + print(njit(fastmath={'reassoc'}) (add_assoc)(0, np.inf)) # nan + print(njit(fastmath={'nsz'}) (add_assoc)(0, np.inf)) # nan + + +Parallel=True +------------- +If code contains operations that are parallelisable (:ref:`and supported +`) Numba can compile a version that will run in +parallel on multiple native threads (no GIL!). This parallelisation is performed +automatically and is enabled by simply adding the ``parallel`` keyword +argument:: + + @njit(parallel=True) + def ident_parallel(x): + return np.cos(x) ** 2 + np.sin(x) ** 2 + + +Executions times are as follows: + ++--------------------+-----------------+ +| Function Name | Execution time | ++====================+=================+ +| ``ident_parallel`` | 112 ms | ++--------------------+-----------------+ + + +The execution speed of this function with ``parallel=True`` present is +approximately 5x that of the NumPy equivalent and 6x that of standard +``@njit``. + + +Numba parallel execution also has support for explicit parallel loop +declaration similar to that in OpenMP. To indicate that a loop should be +executed in parallel the ``numba.prange`` function should be used, this function +behaves like Python ``range`` and if ``parallel=True`` is not set it acts +simply as an alias of ``range``. Loops induced with ``prange`` can be used for +embarrassingly parallel computation and also reductions. + +Revisiting the reduce over sum example, assuming it is safe for the sum to be +accumulated out of order, the loop in ``n`` can be parallelised through the use +of ``prange``. Further, the ``fastmath=True`` keyword argument can be added +without concern in this case as the assumption that out of order execution is +valid has already been made through the use of ``parallel=True`` (as each thread +computes a partial sum). +:: + + @njit(parallel=True) + def do_sum_parallel(A): + # each thread can accumulate its own partial sum, and then a cross + # thread reduction is performed to obtain the result to return + n = len(A) + acc = 0. + for i in prange(n): + acc += np.sqrt(A[i]) + return acc + + @njit(parallel=True, fastmath=True) + def do_sum_parallel_fast(A): + n = len(A) + acc = 0. + for i in prange(n): + acc += np.sqrt(A[i]) + return acc + + +Execution times are as follows, ``fastmath`` again improves performance. + ++-------------------------+-----------------+ +| Function Name | Execution time | ++=========================+=================+ +| ``do_sum_parallel`` | 9.81 ms | ++-------------------------+-----------------+ +| ``do_sum_parallel_fast``| 5.37 ms | ++-------------------------+-----------------+ + +.. _intel-svml: + +Intel SVML +---------- + +Intel provides a short vector math library (SVML) that contains a large number +of optimised transcendental functions available for use as compiler +intrinsics. If the ``icc_rt`` package is present in the environment (or the SVML +libraries are simply locatable!) then Numba automatically configures the LLVM +back end to use the SVML intrinsic functions where ever possible. SVML provides +both high and low accuracy versions of each intrinsic and the version that is +used is determined through the use of the ``fastmath`` keyword. The default is +to use high accuracy which is accurate to within ``1 ULP``, however if +``fastmath`` is set to ``True`` then the lower accuracy versions of the +intrinsics are used (answers to within ``4 ULP``). + + +First obtain SVML, using conda for example:: + + conda install -c numba icc_rt + +Rerunning the identity function example ``ident_np`` from above with various +combinations of options to ``@njit`` and with/without SVML yields the following +performance results (input size ``np.arange(1.e8)``). For reference, with just +NumPy the function executed in ``5.84s``: + ++-----------------------------------+--------+-------------------+ +| ``@njit`` kwargs | SVML | Execution time | ++===================================+========+===================+ +| ``None`` | No | 5.95s | ++-----------------------------------+--------+-------------------+ +| ``None`` | Yes | 2.26s | ++-----------------------------------+--------+-------------------+ +| ``fastmath=True`` | No | 5.97s | ++-----------------------------------+--------+-------------------+ +| ``fastmath=True`` | Yes | 1.8s | ++-----------------------------------+--------+-------------------+ +| ``parallel=True`` | No | 1.36s | ++-----------------------------------+--------+-------------------+ +| ``parallel=True`` | Yes | 0.624s | ++-----------------------------------+--------+-------------------+ +| ``parallel=True, fastmath=True`` | No | 1.32s | ++-----------------------------------+--------+-------------------+ +| ``parallel=True, fastmath=True`` | Yes | 0.576s | ++-----------------------------------+--------+-------------------+ + +It is evident that SVML significantly increases the performance of this +function. The impact of ``fastmath`` in the case of SVML not being present is +zero, this is expected as there is nothing in the original function that would +benefit from relaxing numerical strictness. + +Linear algebra +-------------- +Numba supports most of ``numpy.linalg`` in no Python mode. The internal +implementation relies on a LAPACK and BLAS library to do the numerical work +and it obtains the bindings for the necessary functions from SciPy. Therefore, +to achieve good performance in ``numpy.linalg`` functions with Numba it is +necessary to use a SciPy built against a well optimised LAPACK/BLAS library. +In the case of the Anaconda distribution SciPy is built against Intel's MKL +which is highly optimised and as a result Numba makes use of this performance. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/pycc.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/pycc.rst new file mode 100644 index 000000000..b0f1275a5 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/pycc.rst @@ -0,0 +1,140 @@ + +============================ +Compiling code ahead of time +============================ + +.. _pycc: + +While Numba's main use case is :term:`Just-in-Time compilation`, it also +provides a facility for :term:`Ahead-of-Time compilation` (AOT). + + +Overview +======== + +Benefits +-------- + +#. AOT compilation produces a compiled extension module which does not depend + on Numba: you can distribute the module on machines which do not have + Numba installed (but Numpy is required). + +#. There is no compilation overhead at runtime (but see the + ``@jit`` :ref:`cache ` option), nor any overhead of importing + Numba. + +.. seealso:: + Compiled extension modules are discussed in the + `Python packaging user guide `_. + + +Limitations +----------- + +#. AOT compilation only allows for regular functions, not :term:`ufuncs `. + +#. You have to specify function signatures explicitly. + +#. Each exported function can have only one signature (but you can export + several different signatures under different names). + +#. Exported functions do not check the types of the arguments that are passed + to them; the caller is expected to provide arguments of the correct type. + +#. AOT compilation produces generic code for your CPU's architectural family + (for example "x86-64"), while JIT compilation produces code optimized + for your particular CPU model. + + +Usage +===== + +Standalone example +------------------ + +:: + + from numba.pycc import CC + + cc = CC('my_module') + # Uncomment the following line to print out the compilation steps + #cc.verbose = True + + @cc.export('multf', 'f8(f8, f8)') + @cc.export('multi', 'i4(i4, i4)') + def mult(a, b): + return a * b + + @cc.export('square', 'f8(f8)') + def square(a): + return a ** 2 + + if __name__ == "__main__": + cc.compile() + + +If you run this Python script, it will generate an extension module named +``my_module``. Depending on your platform, the actual filename may be +``my_module.so``, ``my_module.pyd``, ``my_module.cpython-34m.so``, etc. + +The generated module has three functions: ``multf``, ``multi`` and ``square``. +``multi`` operates on 32-bit integers (``i4``), while ``multf`` and ``square`` +operate on double-precision floats (``f8``):: + + >>> import my_module + >>> my_module.multi(3, 4) + 12 + >>> my_module.square(1.414) + 1.9993959999999997 + + +Distutils integration +--------------------- + +You can also integrate the compilation step for your extension modules +in your ``setup.py`` script, using distutils or setuptools:: + + from distutils.core import setup + + from source_module import cc + + setup(..., + ext_modules=[cc.distutils_extension()]) + + +The ``source_module`` above is the module defining the ``cc`` object. +Extensions compiled like this will be automatically included in the +build files for your Python project, so you can distribute them inside +binary packages such as wheels or Conda packages. Note that in the case of +using conda, the compilers used for AOT need to be those that are available +in the Anaconda distribution. + + +Signature syntax +---------------- + +The syntax for exported signatures is the same as in the ``@jit`` +decorator. You can read more about it in the :ref:`types ` +reference. + +Here is an example of exporting an implementation of the second-order +centered difference on a 1d array:: + + @cc.export('centdiff_1d', 'f8[:](f8[:], f8)') + def centdiff_1d(u, dx): + D = np.empty_like(u) + D[0] = 0 + D[-1] = 0 + for i in range(1, len(D) - 1): + D[i] = (u[i+1] - 2 * u[i] + u[i-1]) / dx**2 + return D + +.. (example from http://nbviewer.ipython.org/gist/ketch/ae87a94f4ef0793d5d52) + +You can also omit the return type, which will then be inferred by Numba:: + + @cc.export('centdiff_1d', '(f8[:], f8)') + def centdiff_1d(u, dx): + # Same code as above + ... + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/stencil.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/stencil.rst new file mode 100644 index 000000000..6888a556f --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/stencil.rst @@ -0,0 +1,255 @@ +.. Copyright (c) 2017 Intel Corporation + SPDX-License-Identifier: BSD-2-Clause + +.. _numba-stencil: + +================================ +Using the ``@stencil`` decorator +================================ + +Stencils are a common computational pattern in which array elements +are updated according to some fixed pattern called the stencil kernel. +Numba provides the ``@stencil`` decorator so that users may +easily specify a stencil kernel and Numba then generates the looping +code necessary to apply that kernel to some input array. Thus, the +stencil decorator allows clearer, more concise code and in conjunction +with :ref:`the parallel jit option ` enables higher +performance through parallelization of the stencil execution. + + +Basic usage +=========== + +An example use of the ``@stencil`` decorator:: + + from numba import stencil + + @stencil + def kernel1(a): + return 0.25 * (a[0, 1] + a[1, 0] + a[0, -1] + a[-1, 0]) + +The stencil kernel is specified by what looks like a standard Python +function definition but there are different semantics with +respect to array indexing. +Stencils produce an output array of the same size and shape as the +input array although depending on the kernel definition may have a +different type. +Conceptually, the stencil kernel is run once for each element in the +output array. The return value from the stencil kernel is the value +written into the output array for that particular element. + +The parameter ``a`` represents the input array over which the +kernel is applied. +Indexing into this array takes place with respect to the current element +of the output array being processed. For example, if element ``(x, y)`` +is being processed then ``a[0, 0]`` in the stencil kernel corresponds to +``a[x + 0, y + 0]`` in the input array. Similarly, ``a[-1, 1]`` in the stencil +kernel corresponds to ``a[x - 1, y + 1]`` in the input array. + +Depending on the specified kernel, the kernel may not be applicable to the +borders of the output array as this may cause the input array to be +accessed out-of-bounds. The way in which the stencil decorator handles +this situation is dependent upon which :ref:`stencil-mode` is selected. +The default mode is for the stencil decorator to set the border elements +of the output array to zero. + +To invoke a stencil on an input array, call the stencil as if it were +a regular function and pass the input array as the argument. For example, using +the kernel defined above:: + + >>> import numpy as np + >>> input_arr = np.arange(100).reshape((10, 10)) + array([[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [10, 11, 12, 13, 14, 15, 16, 17, 18, 19], + [20, 21, 22, 23, 24, 25, 26, 27, 28, 29], + [30, 31, 32, 33, 34, 35, 36, 37, 38, 39], + [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], + [50, 51, 52, 53, 54, 55, 56, 57, 58, 59], + [60, 61, 62, 63, 64, 65, 66, 67, 68, 69], + [70, 71, 72, 73, 74, 75, 76, 77, 78, 79], + [80, 81, 82, 83, 84, 85, 86, 87, 88, 89], + [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]]) + >>> output_arr = kernel1(input_arr) + array([[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], + [ 0., 11., 12., 13., 14., 15., 16., 17., 18., 0.], + [ 0., 21., 22., 23., 24., 25., 26., 27., 28., 0.], + [ 0., 31., 32., 33., 34., 35., 36., 37., 38., 0.], + [ 0., 41., 42., 43., 44., 45., 46., 47., 48., 0.], + [ 0., 51., 52., 53., 54., 55., 56., 57., 58., 0.], + [ 0., 61., 62., 63., 64., 65., 66., 67., 68., 0.], + [ 0., 71., 72., 73., 74., 75., 76., 77., 78., 0.], + [ 0., 81., 82., 83., 84., 85., 86., 87., 88., 0.], + [ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]) + >>> input_arr.dtype + dtype('int64') + >>> output_arr.dtype + dtype('float64') + +Note that the stencil decorator has determined that the output type +of the specified stencil kernel is ``float64`` and has thus created the +output array as ``float64`` while the input array is of type ``int64``. + +Stencil Parameters +================== + +Stencil kernel definitions may take any number of arguments with +the following provisions. The first argument must be an array. +The size and shape of the output array will be the same as that of the +first argument. Additional arguments may either be scalars or +arrays. For array arguments, those arrays must be at least as large +as the first argument (array) in each dimension. Array indexing is relative for +all such input array arguments. + +.. _stencil-kernel-shape-inference: + +Kernel shape inference and border handling +========================================== + +In the above example and in most cases, the array indexing in the +stencil kernel will exclusively use ``Integer`` literals. +In such cases, the stencil decorator is able to analyze the stencil +kernel to determine its size. In the above example, the stencil +decorator determines that the kernel is ``3 x 3`` in shape since indices +``-1`` to ``1`` are used for both the first and second dimensions. Note that +the stencil decorator also correctly handles non-symmetric and +non-square stencil kernels. + +Based on the size of the stencil kernel, the stencil decorator is +able to compute the size of the border in the output array. If +applying the kernel to some element of input array would cause +an index to be out-of-bounds then that element belongs to the border +of the output array. In the above example, points ``-1`` and ``+1`` are +accessed in each dimension and thus the output array has a border +of size one in all dimensions. + +The parallel mode is able to infer kernel indices as constants from +simple expressions if possible. For example:: + + @njit(parallel=True) + def stencil_test(A): + c = 2 + B = stencil( + lambda a, c: 0.3 * (a[-c+1] + a[0] + a[c-1]))(A, c) + return B + + +Stencil decorator options +========================= + +.. note:: + The stencil decorator may be augmented in the future to provide additional + mechanisms for border handling. At present, only one behaviour is + implemented, ``"constant"`` (see ``func_or_mode`` below for details). + +.. _stencil-neighborhood: + +``neighborhood`` +---------------- + +Sometimes it may be inconvenient to write the stencil kernel +exclusively with ``Integer`` literals. For example, let us say we +would like to compute the trailing 30-day moving average of a +time series of data. One could write +``(a[-29] + a[-28] + ... + a[-1] + a[0]) / 30`` but the stencil +decorator offers a more concise form using the ``neighborhood`` +option:: + + @stencil(neighborhood = ((-29, 0),)) + def kernel2(a): + cumul = 0 + for i in range(-29, 1): + cumul += a[i] + return cumul / 30 + +The neighborhood option is a tuple of tuples. The outer tuple's +length is equal to the number of dimensions of the input array. +The inner tuple's lengths are always two because +each element of the inner tuple corresponds to minimum and +maximum index offsets used in the corresponding dimension. + +If a user specifies a neighborhood but the kernel accesses elements outside the +specified neighborhood, **the behavior is undefined.** + +.. _stencil-mode: + +``func_or_mode`` +---------------- + +The optional ``func_or_mode`` parameter controls how the border of the output array +is handled. Currently, there is only one supported value, ``"constant"``. +In ``constant`` mode, the stencil kernel is not applied in cases where +the kernel would access elements outside the valid range of the input +array. In such cases, those elements in the output array are assigned +to a constant value, as specified by the ``cval`` parameter. + +``cval`` +-------- + +The optional cval parameter defaults to zero but can be set to any +desired value, which is then used for the border of the output array +if the ``func_or_mode`` parameter is set to ``constant``. The cval parameter is +ignored in all other modes. The type of the cval parameter must match +the return type of the stencil kernel. If the user wishes the output +array to be constructed from a particular type then they should ensure +that the stencil kernel returns that type. + +``standard_indexing`` +--------------------- + +By default, all array accesses in a stencil kernel are processed as +relative indices as described above. However, sometimes it may be +advantageous to pass an auxiliary array (e.g. an array of weights) +to a stencil kernel and have that array use standard Python indexing +rather than relative indexing. For this purpose, there is the +stencil decorator option ``standard_indexing`` whose value is a +collection of strings whose names match those parameters to the +stencil function that are to be accessed with standard Python indexing +rather than relative indexing:: + + @stencil(standard_indexing=("b",)) + def kernel3(a, b): + return a[-1] * b[0] + a[0] + b[1] + +``StencilFunc`` +=============== + +The stencil decorator returns a callable object of type ``StencilFunc``. +``StencilFunc`` objects contains a number of attributes but the only one of +potential interest to users is the ``neighborhood`` attribute. +If the ``neighborhood`` option was passed to the stencil decorator then +the provided neighborhood is stored in this attribute. Else, upon +first execution or compilation, the system calculates the neighborhood +as described above and then stores the computed neighborhood into this +attribute. A user may then inspect the attribute if they wish to verify +that the calculated neighborhood is correct. + +Stencil invocation options +========================== + +Internally, the stencil decorator transforms the specified stencil +kernel into a regular Python function. This function will have the +same parameters as specified in the stencil kernel definition but will +also include the following optional parameter. + +.. _stencil-function-out: + +``out`` +------- + +The optional ``out`` parameter is added to every stencil function +generated by Numba. If specified, the ``out`` parameter tells +Numba that the user is providing their own pre-allocated array +to be used for the output of the stencil. In this case, the +stencil function will not allocate its own output array. +Users should assure that the return type of the stencil kernel can +be safely cast to the element-type of the user-specified output array +following the `NumPy ufunc casting rules`_. + +.. _`NumPy ufunc casting rules`: http://docs.scipy.org/doc/numpy/reference/ufuncs.html#casting-rules + +An example usage is shown below:: + + >>> import numpy as np + >>> input_arr = np.arange(100).reshape((10, 10)) + >>> output_arr = np.full(input_arr.shape, 0.0) + >>> kernel1(input_arr, out=output_arr) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/talks.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/talks.rst new file mode 100644 index 000000000..ef35ae944 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/talks.rst @@ -0,0 +1,36 @@ + +Talks and Tutorials +=================== + +.. note:: This is a selection of talks and tutorials that have been given by members of + the Numba team as well as Numba users. If you know of a Numba-related talk + that should be included on this list, please `open an issue `_. + +Talks on Numba +-------------- + +* AnacondaCON 2018 - Accelerating Scientific Workloads with Numba - Siu Kwan Lam (`Video `__) +* `DIANA-HEP Meeting, 23 April 2018 `__ - Overview of Numba - Stan Seibert + +Talks on Applications of Numba +------------------------------ + +* GPU Technology Conference 2016 - Accelerating a Spectral Algorithm for Plasma Physics with Python/Numba on GPU - Manuel Kirchen & Rémi Lehe (`Slides `__) +* `DIANA-HEP Meeting, 23 April 2018 `_ - Use of Numba in XENONnT - Chris Tunnell +* `DIANA-HEP Meeting, 23 April 2018 `_ - Extending Numba for HEP data types - Jim Pivarski +* STAC Summit, Nov 1 2017 - Scaling High-Performance Python with Minimal Effort - Ehsan Totoni (`Video `__, `Slides `__) +* SciPy 2018 - UMAP: Uniform Manifold Approximation and Projection for Dimensional Reduction - Leland McInnes (`Video `__, `Github `__) +* PyData Berlin 2018 - Extending Pandas using Apache Arrow and Numba - Uwe L. Korn (`Video `__, `Blog `__) +* FOSDEM 2019 - Extending Numba - Joris Geessels (`Video, Slides & Examples `__) +* PyCon India 2019 - Real World Numba: Taking the Path of Least Resistance - Ankit Mahato (`Video `__) +* SciPy 2019 - How to Accelerate an Existing Codebase with Numba - Siu Kwan Lam & Stanley Seibert (`Video `__) +* SciPy 2019 - Real World Numba: Creating a Skeleton Analysis Library - Juan Nunez-Iglesias (`Video `__) +* SciPy 2019 - Fast Gradient Boosting Decision Trees with PyGBM and Numba - Nicholas Hug (`Video `__) +* PyCon Sweden 2020 - Accelerating Scientific Computing using Numba - Ankit Mahato (`Video `__) + +Tutorials +--------- + +* SciPy 2017 - Numba: Tell those C++ Bullies to Get Lost - Gil Forsyth & Lorena Barba (`Video `__, `Notebooks `__) +* GPU Technology Conference 2018 - GPU Computing in Python with Numba - Stan Seibert (`Notebooks `__) +* PyData Amsterdam 2019 - Create CUDA kernels from Python using Numba and CuPy - Valentin Haenel (`Video `__) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/threading-layer.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/threading-layer.rst new file mode 100644 index 000000000..6aa5fef43 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/threading-layer.rst @@ -0,0 +1,313 @@ +.. _numba-threading-layer: + +The Threading Layers +==================== + +This section is about the Numba threading layer, this is the library that is +used internally to perform the parallel execution that occurs through the use of +the ``parallel`` targets for CPUs, namely: + +* The use of the ``parallel=True`` kwarg in ``@jit`` and ``@njit``. +* The use of the ``target='parallel'`` kwarg in ``@vectorize`` and + ``@guvectorize``. + +.. note:: + If a code base does not use the ``threading`` or ``multiprocessing`` + modules (or any other sort of parallelism) the defaults for the threading + layer that ship with Numba will work well, no further action is required! + + +Which threading layers are available? +------------------------------------- +There are three threading layers available and they are named as follows: + +* ``tbb`` - A threading layer backed by Intel TBB. +* ``omp`` - A threading layer backed by OpenMP. +* ``workqueue`` -A simple built-in work-sharing task scheduler. + +In practice, the only threading layer guaranteed to be present is ``workqueue``. +The ``omp`` layer requires the presence of a suitable OpenMP runtime library. +The ``tbb`` layer requires the presence of Intel's TBB libraries, these can be +obtained via the conda command:: + + $ conda install tbb + +If you installed Numba with ``pip``, TBB can be enabled by running:: + + $ pip install tbb + +Due to compatibility issues with manylinux1 and other portability concerns, +the OpenMP threading layer is disabled in the Numba binary wheels on PyPI. + +.. note:: + The default manner in which Numba searches for and loads a threading layer + is tolerant of missing libraries, incompatible runtimes etc. + + +.. _numba-threading-layer-setting-mech: + +Setting the threading layer +--------------------------- + + +The threading layer is set via the environment variable +``NUMBA_THREADING_LAYER`` or through assignment to +``numba.config.THREADING_LAYER``. If the programmatic approach to setting the +threading layer is used it must occur logically before any Numba based +compilation for a parallel target has occurred. There are two approaches to +choosing a threading layer, the first is by selecting a threading layer that is +safe under various forms of parallel execution, the second is through explicit +selection via the threading layer name (e.g. ``tbb``). + +Setting the threading layer selection priority +---------------------------------------------- + +By default the threading layers are searched in the order of ``'tbb'``, +``'omp'``, then ``'workqueue'``. To change this search order whilst +maintaining the selection of a threading layer based on availability, the +environment variable :envvar:`NUMBA_THREADING_LAYER_PRIORITY` can be used. + +Note that it can also be set via +:py:data:`numba.config.THREADING_LAYER_PRIORITY`. +Similar to :py:data:`numba.config.THREADING_LAYER`, +it must occur logically before any Numba based +compilation for a parallel target has occurred. + +For example, to instruct Numba to choose ``omp`` first if available, +then ``tbb`` and so on, set the environment variable as +``NUMBA_THREADING_LAYER_PRIORITY="omp tbb workqueue"``. +Or programmatically, +``numba.config.THREADING_LAYER_PRIORITY = ["omp", "tbb", "workqueue"]``. + +Selecting a threading layer for safe parallel execution +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Parallel execution is fundamentally derived from core Python libraries in four +forms (the first three also apply to code using parallel execution via other +means!): + +* ``threads`` from the ``threading`` module. +* ``spawn`` ing processes from the ``multiprocessing`` module via ``spawn`` + (default on Windows, only available in Python 3.4+ on Unix) +* ``fork`` ing processes from the ``multiprocessing`` module via ``fork`` + (default on Unix). +* ``fork`` ing processes from the ``multiprocessing`` module through the use of + a ``forkserver`` (only available in Python 3 on Unix). Essentially a new + process is spawned and then forks are made from this new process on request. + +Any library in use with these forms of parallelism must exhibit safe behaviour +under the given paradigm. As a result, the threading layer selection methods +are designed to provide a way to choose a threading layer library that is safe +for a given paradigm in an easy, cross platform and environment tolerant manner. +The options that can be supplied to the +:ref:`setting mechanisms ` are as +follows: + +* ``default`` provides no specific safety guarantee and is the default. +* ``safe`` is both fork and thread safe, this requires the ``tbb`` package + (Intel TBB libraries) to be installed. +* ``forksafe`` provides a fork safe library. +* ``threadsafe`` provides a thread safe library. + +To discover the threading layer that was selected, the function +``numba.threading_layer()`` may be called after parallel execution. For example, +on a Linux machine with no TBB installed:: + + from numba import config, njit, threading_layer + import numpy as np + + # set the threading layer before any parallel target compilation + config.THREADING_LAYER = 'threadsafe' + + @njit(parallel=True) + def foo(a, b): + return a + b + + x = np.arange(10.) + y = x.copy() + + # this will force the compilation of the function, select a threading layer + # and then execute in parallel + foo(x, y) + + # demonstrate the threading layer chosen + print("Threading layer chosen: %s" % threading_layer()) + +which produces:: + + Threading layer chosen: omp + +and this makes sense as GNU OpenMP, as present on Linux, is thread safe. + +Selecting a named threading layer +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Advanced users may wish to select a specific threading layer for their use case, +this is done by directly supplying the threading layer name to the +:ref:`setting mechanisms `. The options +and requirements are as follows: + ++----------------------+-----------+-------------------------------------------+ +| Threading Layer Name | Platform | Requirements | ++======================+===========+===========================================+ +| ``tbb`` | All | The ``tbb`` package (``$ conda install | +| | | tbb``) | ++----------------------+-----------+-------------------------------------------+ +| ``omp`` | Linux | GNU OpenMP libraries (very likely this | +| | | will already exist) | +| | | | +| | Windows | MS OpenMP libraries (very likely this will| +| | | already exist) | +| | | | +| | OSX | Either the ``intel-openmp`` package or the| +| | | ``llvm-openmp`` package | +| | | (``conda install`` the package as named). | ++----------------------+-----------+-------------------------------------------+ +| ``workqueue`` | All | None | ++----------------------+-----------+-------------------------------------------+ + +Should the threading layer not load correctly Numba will detect this and provide +a hint about how to resolve the problem. It should also be noted that the Numba +diagnostic command ``numba -s`` has a section +``__Threading Layer Information__`` that reports on the availability of +threading layers in the current environment. + + +Extra notes +----------- +The threading layers have fairly complex interactions with CPython internals and +system level libraries, some additional things to note: + +* The installation of Intel's TBB libraries vastly widens the options available + in the threading layer selection process. +* On Linux, the ``omp`` threading layer is not fork safe due to the GNU OpenMP + runtime library (``libgomp``) not being fork safe. If a fork occurs in a + program that is using the ``omp`` threading layer, a detection mechanism is + present that will try and gracefully terminate the forked child and print an + error message to ``STDERR``. +* On systems with the ``fork(2)`` system call available, if the TBB backed + threading layer is in use and a ``fork`` call is made from a thread other than + the thread that launched TBB (typically the main thread) then this results in + undefined behaviour and a warning will be displayed on ``STDERR``. As + ``spawn`` is essentially ``fork`` followed by ``exec`` it is safe to ``spawn`` + from a non-main thread, but as this cannot be differentiated from just a + ``fork`` call the warning message will still be displayed. +* On OSX, the ``intel-openmp`` package is required to enable the OpenMP based + threading layer. + +.. _setting_the_number_of_threads: + +Setting the Number of Threads +----------------------------- + +The number of threads used by numba is based on the number of CPU cores +available (see :obj:`numba.config.NUMBA_DEFAULT_NUM_THREADS`), but it can be +overridden with the :envvar:`NUMBA_NUM_THREADS` environment variable. + +The total number of threads that numba launches is in the variable +:obj:`numba.config.NUMBA_NUM_THREADS`. + +For some use cases, it may be desirable to set the number of threads to a +lower value, so that numba can be used with higher level parallelism. + +The number of threads can be set dynamically at runtime using +:func:`numba.set_num_threads`. Note that :func:`~.set_num_threads` only allows +setting the number of threads to a smaller value than +:obj:`~.NUMBA_NUM_THREADS`. Numba always launches +:obj:`numba.config.NUMBA_NUM_THREADS` threads, but :func:`~.set_num_threads` +causes it to mask out unused threads so they aren't used in computations. + +The current number of threads used by numba can be accessed with +:func:`numba.get_num_threads`. Both functions work inside of a jitted +function. + +.. _numba-threading-layer-thread-masking: + +Example of Limiting the Number of Threads +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In this example, suppose the machine we are running on has 8 cores (so +:obj:`numba.config.NUMBA_NUM_THREADS` would be ``8``). Suppose we want to run +some code with ``@njit(parallel=True)``, but we also want to run our code +concurrently in 4 different processes. With the default number of threads, +each Python process would run 8 threads, for a total in 4*8 = 32 threads, +which is oversubscription for our 8 cores. We should rather limit each process +to 2 threads, so that the total will be 4*2 = 8, which matches our number of +physical cores. + +There are two ways to do this. One is to set the :envvar:`NUMBA_NUM_THREADS` +environment variable to ``2``. + +.. code:: bash + + $ NUMBA_NUM_THREADS=2 python ourcode.py + +However, there are two downsides to this approach: + +1. :envvar:`NUMBA_NUM_THREADS` must be set before Numba is imported, and + ideally before Python is launched. As soon as Numba is imported the + environment variable is read and that number of threads is locked in as the + number of threads Numba launches. + +2. If we want to later increase the number of threads used by the process, we + cannot. :envvar:`NUMBA_NUM_THREADS` sets the *maximum* number of threads + that are launched for a process. Calling :func:`~.set_num_threads()` with a + value greater than :obj:`numba.config.NUMBA_NUM_THREADS` results in an + error. + +The advantage of this approach is that we can do it from outside of the +process without changing the code. + +Another approach is to use the :func:`numba.set_num_threads` function in our code + +.. code:: python + + from numba import njit, set_num_threads + + @njit(parallel=True) + def func(): + ... + + set_num_threads(2) + func() + +If we call ``set_num_threads(2)`` before executing our parallel code, it has +the same effect as calling the process with ``NUMBA_NUM_THREADS=2``, in that +the parallel code will only execute on 2 threads. However, we can later call +``set_num_threads(8)`` to increase the number of threads back to the default +size. And we do not have to worry about setting it before Numba gets imported. +It only needs to be called before the parallel function is run. + +.. _numba-threading-layer-thread-id: + +Getting a Thread ID +------------------- + +In some cases it may be beneficial to have access to a unique identifier for the +current thread that is executing a parallel region in Numba. For that purpose, +Numba provides the :func:`numba.get_thread_id` function. This function is the +corollary of OpenMP's function ``omp_get_thread_num`` and returns an integer +between 0 (inclusive) and the number of configured threads as described above +(exclusive). + +API Reference +~~~~~~~~~~~~~ + +.. py:data:: numba.config.NUMBA_NUM_THREADS + + The total (maximum) number of threads launched by numba. + + Defaults to :obj:`numba.config.NUMBA_DEFAULT_NUM_THREADS`, but can be + overridden with the :envvar:`NUMBA_NUM_THREADS` environment variable. + +.. py:data:: numba.config.NUMBA_DEFAULT_NUM_THREADS + + The number of usable CPU cores on the system (as determined by + ``len(os.sched_getaffinity(0))``, if supported by the OS, or + ``multiprocessing.cpu_count()`` if not). + This is the default value for :obj:`numba.config.NUMBA_NUM_THREADS` unless + the :envvar:`NUMBA_NUM_THREADS` environment variable is set. + +.. autofunction:: numba.set_num_threads + +.. autofunction:: numba.get_num_threads + +.. autofunction:: numba.get_thread_id diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/troubleshoot.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/troubleshoot.rst new file mode 100644 index 000000000..b92144346 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/troubleshoot.rst @@ -0,0 +1,1179 @@ + +.. _numba-troubleshooting: + +======================== +Troubleshooting and tips +======================== + +.. _what-to-compile: + +What to compile +=============== + +The general recommendation is that you should only try to compile the +critical paths in your code. If you have a piece of performance-critical +computational code amongst some higher-level code, you may factor out +the performance-critical code in a separate function and compile the +separate function with Numba. Letting Numba focus on that small piece +of performance-critical code has several advantages: + +* it reduces the risk of hitting unsupported features; +* it reduces the compilation times; +* it allows you to evolve the higher-level code which is outside of the + compiled function much easier. + +.. _code-doesnt-compile: + +My code doesn't compile +======================= + +There can be various reasons why Numba cannot compile your code, and raises +an error instead. One common reason is that your code relies on an +unsupported Python feature, especially in :term:`nopython mode`. +Please see the list of :ref:`pysupported`. If you find something that +is listed there and still fails compiling, please +:ref:`report a bug `. + +When Numba tries to compile your code it first tries to work out the types of +all the variables in use, this is so it can generate a type specific +implementation of your code that can be compiled down to machine code. A common +reason for Numba failing to compile (especially in :term:`nopython mode`) is a +type inference failure, essentially Numba cannot work out what the type of all +the variables in your code should be. + +For example, let's consider this trivial function:: + + @jit(nopython=True) + def f(x, y): + return x + y + +If you call it with two numbers, Numba is able to infer the types properly:: + + >>> f(1, 2) + 3 + +If however you call it with a tuple and a number, Numba is unable to say +what the result of adding a tuple and number is, and therefore compilation +errors out:: + + >>> f(1, (2,)) + Traceback (most recent call last): + File "", line 1, in + File "/numba/numba/dispatcher.py", line 339, in _compile_for_args + reraise(type(e), e, None) + File "/numba/numba/six.py", line 658, in reraise + raise value.with_traceback(tb) + numba.errors.TypingError: Failed at nopython (nopython frontend) + Invalid use of + with parameters (int64, tuple(int64 x 1)) + Known signatures: + * (int64, int64) -> int64 + * (int64, uint64) -> int64 + * (uint64, int64) -> int64 + * (uint64, uint64) -> uint64 + * (float32, float32) -> float32 + * (float64, float64) -> float64 + * (complex64, complex64) -> complex64 + * (complex128, complex128) -> complex128 + * (uint16,) -> uint64 + * (uint8,) -> uint64 + * (uint64,) -> uint64 + * (uint32,) -> uint64 + * (int16,) -> int64 + * (int64,) -> int64 + * (int8,) -> int64 + * (int32,) -> int64 + * (float32,) -> float32 + * (float64,) -> float64 + * (complex64,) -> complex64 + * (complex128,) -> complex128 + * parameterized + [1] During: typing of intrinsic-call at (3) + + File "", line 3: + +The error message helps you find out what went wrong: +"Invalid use of + with parameters (int64, tuple(int64 x 1))" is to be +interpreted as "Numba encountered an addition of variables typed as integer +and 1-tuple of integer, respectively, and doesn't know about any such +operation". + +Note that if you allow object mode:: + + @jit + def g(x, y): + return x + y + +compilation will succeed and the compiled function will raise at runtime as +Python would do:: + + >>> g(1, (2,)) + Traceback (most recent call last): + File "", line 1, in + TypeError: unsupported operand type(s) for +: 'int' and 'tuple' + + +My code has a type unification problem +====================================== + +Another common reason for Numba not being able to compile your code is that it +cannot statically determine the return type of a function. The most likely +cause of this is the return type depending on a value that is available only at +runtime. Again, this is most often problematic when using +:term:`nopython mode`. The concept of type unification is simply trying to find +a type in which two variables could safely be represented. For example a 64 bit +float and a 64 bit complex number could both be represented in a 128 bit complex +number. + +As an example of type unification failure, this function has a return type that +is determined at runtime based on the value of `x`:: + + In [1]: from numba import jit + + In [2]: @jit(nopython=True) + ...: def f(x): + ...: if x > 10: + ...: return (1,) + ...: else: + ...: return 1 + ...: + + In [3]: f(10) + +Trying to execute this function, errors out as follows:: + + TypingError: Failed at nopython (nopython frontend) + Can't unify return type from the following types: tuple(int64 x 1), int64 + Return of: IR name '$8.2', type '(int64 x 1)', location: + File "", line 4: + def f(x): + + if x > 10: + return (1,) + ^ + Return of: IR name '$12.2', type 'int64', location: + File "", line 6: + def f(x): + + else: + return 1 + +The error message "Can't unify return type from the following types: +tuple(int64 x 1), int64" should be read as "Numba cannot find a type that +can safely represent a 1-tuple of integer and an integer". + +.. _code-has-untyped-list: + +My code has an untyped list problem +=================================== + +As :ref:`noted previously ` the first part of Numba +compiling your code involves working out what the types of all the variables +are. In the case of lists, a list must contain items that are of the same type +or can be empty if the type can be inferred from some later operation. What is +not possible is to have a list which is defined as empty and has no inferable +type (i.e. an untyped list). + +For example, this is using a list of a known type:: + + from numba import jit + @jit(nopython=True) + def f(): + return [1, 2, 3] # this list is defined on construction with `int` type + +This is using an empty list, but the type can be inferred:: + + from numba import jit + @jit(nopython=True) + def f(x): + tmp = [] # defined empty + for i in range(x): + tmp.append(i) # list type can be inferred from the type of `i` + return tmp + +This is using an empty list and the type cannot be inferred:: + + from numba import jit + @jit(nopython=True) + def f(x): + tmp = [] # defined empty + return (tmp, x) # ERROR: the type of `tmp` is unknown + +Whilst slightly contrived, if you need an empty list and the type cannot be +inferred but you know what type you want the list to be, this "trick" can be +used to instruct the typing mechanism:: + + from numba import jit + import numpy as np + @jit(nopython=True) + def f(x): + # define empty list, but instruct that the type is np.complex64 + tmp = [np.complex64(x) for x in range(0)] + return (tmp, x) # the type of `tmp` is known, but it is still empty + +The compiled code is too slow +============================= + +The most common reason for slowness of a compiled JIT function is that +compiling in :term:`nopython mode` has failed and the Numba compiler has +fallen back to :term:`object mode`. :term:`object mode` currently provides +little to no speedup compared to regular Python interpretation, and its +main point is to allow an internal optimization known as +:term:`loop-lifting`: this optimization will allow to compile inner +loops in :term:`nopython mode` regardless of what code surrounds those +inner loops. + +To find out if type inference succeeded on your function, you can use +the :meth:`~Dispatcher.inspect_types` method on the compiled function. + +For example, let's take the following function:: + + @jit + def f(a, b): + s = a + float(b) + return s + +When called with numbers, this function should be fast as Numba is able +to convert number types to floating-point numbers. Let's see:: + + >>> f(1, 2) + 3.0 + >>> f.inspect_types() + f (int64, int64) + -------------------------------------------------------------------------------- + # --- LINE 7 --- + + @jit + + # --- LINE 8 --- + + def f(a, b): + + # --- LINE 9 --- + # label 0 + # a.1 = a :: int64 + # del a + # b.1 = b :: int64 + # del b + # $0.2 = global(float: ) :: Function() + # $0.4 = call $0.2(b.1, ) :: (int64,) -> float64 + # del b.1 + # del $0.2 + # $0.5 = a.1 + $0.4 :: float64 + # del a.1 + # del $0.4 + # s = $0.5 :: float64 + # del $0.5 + + s = a + float(b) + + # --- LINE 10 --- + # $0.7 = cast(value=s) :: float64 + # del s + # return $0.7 + + return s + +Without trying to understand too much of the Numba intermediate representation, +it is still visible that all variables and temporary values have had their +types inferred properly: for example *a* has the type ``int64``, *$0.5* has +the type ``float64``, etc. + +However, if *b* is passed as a string, compilation will fall back on object +mode as the float() constructor with a string is currently not supported +by Numba:: + + >>> f(1, "2") + 3.0 + >>> f.inspect_types() + [... snip annotations for other signatures, see above ...] + ================================================================================ + f (int64, str) + -------------------------------------------------------------------------------- + # --- LINE 7 --- + + @jit + + # --- LINE 8 --- + + def f(a, b): + + # --- LINE 9 --- + # label 0 + # a.1 = a :: pyobject + # del a + # b.1 = b :: pyobject + # del b + # $0.2 = global(float: ) :: pyobject + # $0.4 = call $0.2(b.1, ) :: pyobject + # del b.1 + # del $0.2 + # $0.5 = a.1 + $0.4 :: pyobject + # del a.1 + # del $0.4 + # s = $0.5 :: pyobject + # del $0.5 + + s = a + float(b) + + # --- LINE 10 --- + # $0.7 = cast(value=s) :: pyobject + # del s + # return $0.7 + + return s + +Here we see that all variables end up typed as ``pyobject``. This means +that the function was compiled in object mode and values are passed +around as generic Python objects, without Numba trying to look into them +to reason about their raw values. This is a situation you want to avoid +when caring about the speed of your code. + +If a function fails to compile in ``nopython`` mode warnings will be emitted +with explanation as to why compilation failed. For example with the ``f()`` +function above (slightly edited for documentation purposes):: + + >>> f(1, 2) + 3.0 + >>> f(1, "2") + example.py:7: NumbaWarning: + Compilation is falling back to object mode WITH looplifting enabled because Function "f" failed type inference due to: Invalid use of Function() with argument(s) of type(s): (unicode_type) + * parameterized + In definition 0: + TypeError: float() only support for numbers + raised from /numba/typing/builtins.py:880 + In definition 1: + TypeError: float() only support for numbers + raised from /numba/typing/builtins.py:880 + This error is usually caused by passing an argument of a type that is unsupported by the named function. + [1] During: resolving callee type: Function() + [2] During: typing of call at example.py (9) + + + File "example.py", line 9: + def f(a, b): + s = a + float(b) + ^ + + /numba/compiler.py:722: NumbaWarning: Function "f" was compiled in object mode without forceobj=True. + + File "example.py", line 8: + @jit + def f(a, b): + ^ + + 3.0 + + +Disabling JIT compilation +========================= + +In order to debug code, it is possible to disable JIT compilation, which makes +the ``jit`` decorator (and the ``njit`` decorator) act as if +they perform no operation, and the invocation of decorated functions calls the +original Python function instead of a compiled version. This can be toggled by +setting the :envvar:`NUMBA_DISABLE_JIT` environment variable to ``1``. + +When this mode is enabled, the ``vectorize`` and ``guvectorize`` decorators will +still result in compilation of a ufunc, as there is no straightforward pure +Python implementation of these functions. + + +.. _debugging-jit-compiled-code: + +Debugging JIT compiled code with GDB +==================================== + +Setting the ``debug`` keyword argument in the ``jit`` decorator +(e.g. ``@jit(debug=True)``) enables the emission of debug info in the jitted +code. To debug, GDB version 7.0 or above is required. Currently, the following +debug info is available: + +* Function name will be shown in the backtrace along with type information and + values (if available). +* Source location (filename and line number) is available. For example, + users can set a break point by the absolute filename and line number; + e.g. ``break /path/to/myfile.py:6``. +* Arguments to the current function can be show with ``info args`` +* Local variables in the current function can be shown with ``info locals``. +* The type of variables can be shown with ``whatis myvar``. +* The value of variables can be shown with ``print myvar`` or ``display myvar``. + + * Simple numeric types, i.e. int, float and double, are shown in their + native representation. + * Other types are shown as a structure based on Numba's memory model + representation of the type. + +Further, the Numba ``gdb`` printing extension can be loaded into ``gdb`` (if the +``gdb`` has Python support) to permit the printing of variables as they would be +in native Python. The extension does this by reinterpreting Numba's memory model +representations as Python types. Information about the ``gdb`` installation that +Numba is using, including the path to load the ``gdb`` printing extension, can +be displayed by using the ``numba -g`` command. For best results ensure that the +Python that ``gdb`` is using has a NumPy module accessible. An example output +of the ``gdb`` information follows: + +.. code-block:: none + :emphasize-lines: 1 + + $ numba -g + GDB info: + -------------------------------------------------------------------------------- + Binary location : /gdb + Print extension location : /numba/misc/gdb_print_extension.py + Python version : 3.8 + NumPy version : 1.20.0 + Numba printing extension supported : True + + To load the Numba gdb printing extension, execute the following from the gdb prompt: + + source /numba/misc/gdb_print_extension.py + + -------------------------------------------------------------------------------- + +Known issues: + +* Stepping depends heavily on optimization level. At full optimization + (equivalent to O3), most of the variables are optimized out. It is often + beneficial to use the jit option ``_dbg_optnone=True`` + or the environment variable :envvar:`NUMBA_OPT` to adjust the + optimization level and the jit option ``_dbg_extend_lifetimes=True`` + (which is on by default if ``debug=True``) or + :envvar:`NUMBA_EXTEND_VARIABLE_LIFETIMES` to extend + the lifetime of variables to the end of their scope so as to get a debugging + experience closer to the semantics of Python execution. + +* Memory consumption increases significantly with debug info enabled. + The compiler emits extra information (`DWARF `_) + along with the instructions. The emitted object code can be 2x bigger with + debug info. + +Internal details: + +* Since Python semantics allow variables to bind to value of different types, + Numba internally creates multiple versions of the variable for each type. + So for code like:: + + x = 1 # type int + x = 2.3 # type float + x = (1, 2, 3) # type 3-tuple of int + + Each assignments will store to a different variable name. In the debugger, + the variables will be ``x``, ``x$1`` and ``x$2``. (In the Numba IR, they are + ``x``, ``x.1`` and ``x.2``.) + +* When debug is enabled, inlining of functions at LLVM IR level is disabled. + +JIT options for debug +--------------------- + +* ``debug`` (bool). Set to ``True`` to enable debug info. Defaults to ``False``. +* ``_dbg_optnone`` (bool). Set to ``True`` to disable all LLVM optimization passes + on the function. Defaults to ``False``. See :envvar:`NUMBA_OPT` for a global setting + to disable optimization. +* ``_dbg_extend_lifetimes`` (bool). Set to ``True`` to extend the lifetime of + objects such that they more closely follow the semantics of Python. + Automatically set to ``True`` when + ``debug=True``; otherwise, defaults to ``False``. Users can explicitly set this option + to ``False`` to retain the normal execution semantics of compiled code. + See :envvar:`NUMBA_EXTEND_VARIABLE_LIFETIMES` for a global option to extend object + lifetimes. + +Example debug usage +------------------- + +The python source: + +.. code-block:: python + :linenos: + + from numba import njit + + @njit(debug=True) + def foo(a): + b = a + 1 + c = a * 2.34 + d = (a, b, c) + print(a, b, c, d) + + r = foo(123) + print(r) + +In the terminal: + +.. code-block:: none + :emphasize-lines: 1, 3, 7, 12, 14, 16, 20, 22, 26, 28, 30, 32, 34, 36 + + $ NUMBA_OPT=0 NUMBA_EXTEND_VARIABLE_LIFETIMES=1 gdb -q python + Reading symbols from python... + (gdb) break test1.py:5 + No source file named test1.py. + Make breakpoint pending on future shared library load? (y or [n]) y + Breakpoint 1 (test1.py:5) pending. + (gdb) run test1.py + Starting program: /bin/python test1.py + ... + Breakpoint 1, __main__::foo_241[abi:c8tJTC_2fWgEeGLSgydRTQUgiqKEZ6gEoDvQJmaQIA](long long) (a=123) at test1.py:5 + 5 b = a + 1 + (gdb) info args + a = 123 + (gdb) n + 6 c = a * 2.34 + (gdb) info locals + b = 124 + c = 0 + d = {f0 = 0, f1 = 0, f2 = 0} + (gdb) n + 7 d = (a, b, c) + (gdb) info locals + b = 124 + c = 287.81999999999999 + d = {f0 = 0, f1 = 0, f2 = 0} + (gdb) whatis b + type = int64 + (gdb) whatis d + type = Tuple(int64, int64, float64) ({i64, i64, double}) + (gdb) n + 8 print(a, b, c, d) + (gdb) print b + $1 = 124 + (gdb) print d + $2 = {f0 = 123, f1 = 124, f2 = 287.81999999999999} + (gdb) bt + #0 __main__::foo_241[abi:c8tJTC_2fWgEeGLSgydRTQUgiqKEZ6gEoDvQJmaQIA](long long) (a=123) at test1.py:8 + #1 0x00007ffff06439fa in cpython::__main__::foo_241[abi:c8tJTC_2fWgEeGLSgydRTQUgiqKEZ6gEoDvQJmaQIA](long long) () + + +Another example follows that makes use of the Numba ``gdb`` printing extension +mentioned above, note the change in the print format once the extension is +loaded with ``source`` : + +The Python source: + +.. code-block:: python + :linenos: + + from numba import njit + import numpy as np + + @njit(debug=True) + def foo(n): + x = np.arange(n) + y = (x[0], x[-1]) + return x, y + + foo(4) + +In the terminal: + +.. code-block:: none + :emphasize-lines: 1, 3, 4, 7, 12, 14, 16, 17, 20 + + $ NUMBA_OPT=0 NUMBA_EXTEND_VARIABLE_LIFETIMES=1 gdb -q python + Reading symbols from python... + (gdb) set breakpoint pending on + (gdb) break test2.py:8 + No source file named test2.py. + Breakpoint 1 (test2.py:8) pending. + (gdb) run test2.py + Starting program: /bin/python test2.py + ... + Breakpoint 1, __main__::foo_241[abi:c8tJTC_2fWgEeGLSgydRTQUgiqKEZ6gEoDvQJmaQIA](long long) (n=4) at test2.py:8 + 8 return x, y + (gdb) print x + $1 = {meminfo = 0x55555688f470 "\001", parent = 0x0, nitems = 4, itemsize = 8, data = 0x55555688f4a0, shape = {4}, strides = {8}} + (gdb) print y + $2 = {0, 3} + (gdb) source numba/misc/gdb_print_extension.py + (gdb) print x + $3 = + [0 1 2 3] + (gdb) print y + $4 = (0, 3) + + + +Globally override debug setting +------------------------------- + +It is possible to enable debug for the full application by setting environment +variable ``NUMBA_DEBUGINFO=1``. This sets the default value of the ``debug`` +option in ``jit``. Debug can be turned off on individual functions by setting +``debug=False``. + +Beware that enabling debug info significantly increases the memory consumption +for each compiled function. For large application, this may cause out-of-memory +error. + +Using Numba's direct ``gdb`` bindings in ``nopython`` mode +=========================================================== + +Numba (version 0.42.0 and later) has some additional functions relating to +``gdb`` support for CPUs that make it easier to debug programs. All the ``gdb`` +related functions described in the following work in the same manner +irrespective of whether they are called from the standard CPython interpreter or +code compiled in either :term:`nopython mode` or :term:`object mode`. + +.. note:: This feature is experimental! + +.. warning:: This feature does unexpected things if used from Jupyter or + alongside the ``pdb`` module. It's behaviour is harmless, just hard + to predict! + +Set up +------ +Numba's ``gdb`` related functions make use of a ``gdb`` binary, the location and +name of this binary can be configured via the :envvar:`NUMBA_GDB_BINARY` +environment variable if desired. + +.. note:: Numba's ``gdb`` support requires the ability for ``gdb`` to attach to + another process. On some systems (notably Ubuntu Linux) default + security restrictions placed on ``ptrace`` prevent this from being + possible. This restriction is enforced at the system level by the + Linux security module `Yama`. Documentation for this module and the + security implications of making changes to its behaviour can be found + in the `Linux Kernel documentation `_. + The `Ubuntu Linux security documentation `_ + discusses how to adjust the behaviour of `Yama` on with regards to + ``ptrace_scope`` so as to permit the required behaviour. + +Basic ``gdb`` support +--------------------- + +.. warning:: Calling :func:`numba.gdb` and/or :func:`numba.gdb_init` more than + once in the same program is not advisable, unexpected things may + happen. If multiple breakpoints are desired within a program, + launch ``gdb`` once via :func:`numba.gdb` or :func:`numba.gdb_init` + and then use :func:`numba.gdb_breakpoint` to register additional + breakpoint locations. + +The most simple function for adding ``gdb`` support is :func:`numba.gdb`, which, +at the call location, will: + +* launch ``gdb`` and attach it to the running process. +* create a breakpoint at the site of the :func:`numba.gdb()` function call, the + attached ``gdb`` will pause execution here awaiting user input. + +use of this functionality is best motivated by example, continuing with the +example used above: + +.. code-block:: python + :linenos: + + from numba import njit, gdb + + @njit(debug=True) + def foo(a): + b = a + 1 + gdb() # instruct Numba to attach gdb at this location and pause execution + c = a * 2.34 + d = (a, b, c) + print(a, b, c, d) + + r= foo(123) + print(r) + +In the terminal (``...`` on a line by itself indicates output that is not +presented for brevity): + +.. code-block:: none + :emphasize-lines: 1, 4, 8, 13, 24, 26, 28, 30, 32, 37 + + $ NUMBA_OPT=0 NUMBA_EXTEND_VARIABLE_LIFETIMES=1 python demo_gdb.py + ... + Breakpoint 1, 0x00007fb75238d830 in numba_gdb_breakpoint () from numba/_helperlib.cpython-39-x86_64-linux-gnu.so + (gdb) s + Single stepping until exit from function numba_gdb_breakpoint, + which has no line number information. + 0x00007fb75233e1cf in numba::misc::gdb_hook::hook_gdb::_3clocals_3e::impl_242[abi:c8tJTIeFCjyCbUFRqqOAK_2f6h0phxApMogijRBAA_3d](StarArgTuple) () + (gdb) s + Single stepping until exit from function _ZN5numba4misc8gdb_hook8hook_gdb12_3clocals_3e8impl_242B44c8tJTIeFCjyCbUFRqqOAK_2f6h0phxApMogijRBAA_3dE12StarArgTuple, + which has no line number information. + __main__::foo_241[abi:c8tJTC_2fWgEeGLSgydRTQUgiqKEZ6gEoDvQJmaQIA](long long) (a=123) at demo_gdb.py:7 + 7 c = a * 2.34 + (gdb) l + 2 + 3 @njit(debug=True) + 4 def foo(a): + 5 b = a + 1 + 6 gdb() # instruct Numba to attach gdb at this location and pause execution + 7 c = a * 2.34 + 8 d = (a, b, c) + 9 print(a, b, c, d) + 10 + 11 r= foo(123) + (gdb) p a + $1 = 123 + (gdb) p b + $2 = 124 + (gdb) p c + $3 = 0 + (gdb) b 9 + Breakpoint 2 at 0x7fb73d1f7287: file demo_gdb.py, line 9. + (gdb) c + Continuing. + + Breakpoint 2, __main__::foo_241[abi:c8tJTC_2fWgEeGLSgydRTQUgiqKEZ6gEoDvQJmaQIA](long long) (a=123) at demo_gdb.py:9 + 9 print(a, b, c, d) + (gdb) info locals + b = 124 + c = 287.81999999999999 + d = {f0 = 123, f1 = 124, f2 = 287.81999999999999} + + +It can be seen in the above example that execution of the code is paused at the +location of the ``gdb()`` function call at end of the ``numba_gdb_breakpoint`` +function (this is the Numba internal symbol registered as breakpoint with +``gdb``). Issuing a ``step`` twice at this point moves to the stack frame of the +compiled Python source. From there, it can be seen that the variables ``a`` and +``b`` have been evaluated but ``c`` has not, as demonstrated by printing their +values, this is precisely as expected given the location of the ``gdb()`` call. +Issuing a ``break`` on line 9 and then continuing execution leads to the +evaluation of line ``7``. The variable ``c`` is assigned a value as a result of +the execution and this can be seen in output of ``info locals`` when the +breakpoint is hit. + +Running with ``gdb`` enabled +---------------------------- + +The functionality provided by :func:`numba.gdb` (launch and attach ``gdb`` to +the executing process and pause on a breakpoint) is also available as two +separate functions: + +* :func:`numba.gdb_init` this function injects code at the call site to launch + and attach ``gdb`` to the executing process but does not pause execution. +* :func:`numba.gdb_breakpoint` this function injects code at the call site that + will call the special ``numba_gdb_breakpoint`` function that is registered as + a breakpoint in Numba's ``gdb`` support. This is demonstrated in the next + section. + +This functionality enables more complex debugging capabilities. Again, motivated +by example, debugging a 'segfault' (memory access violation signalling +``SIGSEGV``): + +.. code-block:: python + :linenos: + + from numba import njit, gdb_init + import numpy as np + + # NOTE debug=True switches bounds-checking on, but for the purposes of this + # example it is explicitly turned off so that the out of bounds index is + # not caught! + @njit(debug=True, boundscheck=False) + def foo(a, index): + gdb_init() # instruct Numba to attach gdb at this location, but not to pause execution + b = a + 1 + c = a * 2.34 + d = c[index] # access an address that is a) invalid b) out of the page + print(a, b, c, d) + + bad_index = int(1e9) # this index is invalid + z = np.arange(10) + r = foo(z, bad_index) + print(r) + +In the terminal (``...`` on a line by itself indicates output that is not +presented for brevity): + +.. code-block:: none + :emphasize-lines: 1, 6, 8, 10, 12 + + $ NUMBA_OPT=0 python demo_gdb_segfault.py + ... + Program received signal SIGSEGV, Segmentation fault. + 0x00007f5a4ca655eb in __main__::foo_241[abi:c8tJTC_2fWgEeGLSgydRTQUgiqKEZ6gEoDvQJmaQIA](Array, long long) (a=..., index=1000000000) at demo_gdb_segfault.py:12 + 12 d = c[index] # access an address that is a) invalid b) out of the page + (gdb) p index + $1 = 1000000000 + (gdb) p c + $2 = {meminfo = 0x5586cfb95830 "\001", parent = 0x0, nitems = 10, itemsize = 8, data = 0x5586cfb95860, shape = {10}, strides = {8}} + (gdb) whatis c + type = array(float64, 1d, C) ({i8*, i8*, i64, i64, double*, [1 x i64], [1 x i64]}) + (gdb) p c.nitems + $3 = 10 + +In the ``gdb`` output it can be noted that a ``SIGSEGV`` signal was caught, and +the line in which the access violation occurred is printed. + +Continuing the example as a debugging session demonstration, first ``index`` +can be printed, and it is evidently 1e9. Printing ``c`` shows that it is a +structure, so the type needs looking up and it can be seen that is it an +``array(float64, 1d, C)`` type. Given the segfault came from an invalid access +it would be informative to check the number of items in the array and compare +that to the index requested. Inspecting the ``nitems`` member of the structure +``c`` shows 10 items. It's therefore clear that the segfault comes from an +invalid access of index ``1000000000`` in an array containing ``10`` items. + +Adding breakpoints to code +-------------------------- + +The next example demonstrates using multiple breakpoints that are defined +through the invocation of the :func:`numba.gdb_breakpoint` function: + +.. code-block:: python + :linenos: + + from numba import njit, gdb_init, gdb_breakpoint + + @njit(debug=True) + def foo(a): + gdb_init() # instruct Numba to attach gdb at this location + b = a + 1 + gdb_breakpoint() # instruct gdb to break at this location + c = a * 2.34 + d = (a, b, c) + gdb_breakpoint() # and to break again at this location + print(a, b, c, d) + + r= foo(123) + print(r) + +In the terminal (``...`` on a line by itself indicates output that is not +presented for brevity): + +.. code-block:: none + :emphasize-lines: 1, 4, 9, 20, 22, 24, 29, 31 + + $ NUMBA_OPT=0 python demo_gdb_breakpoints.py + ... + Breakpoint 1, 0x00007fb65bb4c830 in numba_gdb_breakpoint () from numba/_helperlib.cpython-39-x86_64-linux-gnu.so + (gdb) step + Single stepping until exit from function numba_gdb_breakpoint, + which has no line number information. + __main__::foo_241[abi:c8tJTC_2fWgEeGLSgydRTQUgiqKEZ6gEoDvQJmaQIA](long long) (a=123) at demo_gdb_breakpoints.py:8 + 8 c = a * 2.34 + (gdb) l + 3 @njit(debug=True) + 4 def foo(a): + 5 gdb_init() # instruct Numba to attach gdb at this location + 6 b = a + 1 + 7 gdb_breakpoint() # instruct gdb to break at this location + 8 c = a * 2.34 + 9 d = (a, b, c) + 10 gdb_breakpoint() # and to break again at this location + 11 print(a, b, c, d) + 12 + (gdb) p b + $1 = 124 + (gdb) p c + $2 = 0 + (gdb) c + Continuing. + + Breakpoint 1, 0x00007fb65bb4c830 in numba_gdb_breakpoint () + from numba/_helperlib.cpython-39-x86_64-linux-gnu.so + (gdb) step + 11 print(a, b, c, d) + (gdb) p c + $3 = 287.81999999999999 + +From the ``gdb`` output it can be seen that execution paused at line 8 as a +breakpoint was hit, and after a ``continue`` was issued, it broke again at line +11 where the next breakpoint was hit. + +Debugging in parallel regions +----------------------------- + +The follow example is quite involved, it executes with ``gdb`` instrumentation +from the outset as per the example above, but it also uses threads and makes use +of the breakpoint functionality. Further, the last iteration of the parallel +section calls the function ``work``, which is actually just a binding to +``glibc``'s ``free(3)`` in this case, but could equally be some involved +function that is presenting a segfault for unknown reasons. + +.. code-block:: python + :linenos: + + from numba import njit, prange, gdb_init, gdb_breakpoint + import ctypes + + def get_free(): + lib = ctypes.cdll.LoadLibrary('libc.so.6') + free_binding = lib.free + free_binding.argtypes = [ctypes.c_void_p,] + free_binding.restype = None + return free_binding + + work = get_free() + + @njit(debug=True, parallel=True) + def foo(): + gdb_init() # instruct Numba to attach gdb at this location, but not to pause execution + counter = 0 + n = 9 + for i in prange(n): + if i > 3 and i < 8: # iterations 4, 5, 6, 7 will break here + gdb_breakpoint() + + if i == 8: # last iteration segfaults + work(0xBADADD) + + counter += 1 + return counter + + r = foo() + print(r) + +In the terminal (``...`` on a line by itself indicates output that is not +presented for brevity), note the setting of ``NUMBA_NUM_THREADS`` to 4 to ensure +that there are 4 threads running in the parallel section: + +.. code-block:: none + :emphasize-lines: 1, 19, 29, 44, 50, 56, 62, 69 + + $ NUMBA_NUM_THREADS=4 NUMBA_OPT=0 python demo_gdb_threads.py + Attaching to PID: 21462 + ... + Attaching to process 21462 + [New LWP 21467] + [New LWP 21468] + [New LWP 21469] + [New LWP 21470] + [Thread debugging using libthread_db enabled] + Using host libthread_db library "/lib64/libthread_db.so.1". + 0x00007f59ec31756d in nanosleep () at ../sysdeps/unix/syscall-template.S:81 + 81 T_PSEUDO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS) + Breakpoint 1 at 0x7f59d631e8f0: file numba/_helperlib.c, line 1090. + Continuing. + [Switching to Thread 0x7f59d1fd1700 (LWP 21470)] + + Thread 5 "python" hit Breakpoint 1, numba_gdb_breakpoint () at numba/_helperlib.c:1090 + 1090 } + (gdb) info threads + Id Target Id Frame + 1 Thread 0x7f59eca2f740 (LWP 21462) "python" pthread_cond_wait@@GLIBC_2.3.2 () + at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185 + 2 Thread 0x7f59d37d4700 (LWP 21467) "python" pthread_cond_wait@@GLIBC_2.3.2 () + at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185 + 3 Thread 0x7f59d2fd3700 (LWP 21468) "python" pthread_cond_wait@@GLIBC_2.3.2 () + at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185 + 4 Thread 0x7f59d27d2700 (LWP 21469) "python" numba_gdb_breakpoint () at numba/_helperlib.c:1090 + * 5 Thread 0x7f59d1fd1700 (LWP 21470) "python" numba_gdb_breakpoint () at numba/_helperlib.c:1090 + (gdb) thread apply 2-5 info locals + + Thread 2 (Thread 0x7f59d37d4700 (LWP 21467)): + No locals. + + Thread 3 (Thread 0x7f59d2fd3700 (LWP 21468)): + No locals. + + Thread 4 (Thread 0x7f59d27d2700 (LWP 21469)): + No locals. + + Thread 5 (Thread 0x7f59d1fd1700 (LWP 21470)): + sched$35 = '\000' + counter__arr = '\000' , "\001\000\000\000\000\000\000\000\b\000\000\000\000\000\000\000\370B]\"hU\000\000\001", '\000' + counter = 0 + (gdb) continue + Continuing. + [Switching to Thread 0x7f59d27d2700 (LWP 21469)] + + Thread 4 "python" hit Breakpoint 1, numba_gdb_breakpoint () at numba/_helperlib.c:1090 + 1090 } + (gdb) continue + Continuing. + [Switching to Thread 0x7f59d1fd1700 (LWP 21470)] + + Thread 5 "python" hit Breakpoint 1, numba_gdb_breakpoint () at numba/_helperlib.c:1090 + 1090 } + (gdb) continue + Continuing. + [Switching to Thread 0x7f59d27d2700 (LWP 21469)] + + Thread 4 "python" hit Breakpoint 1, numba_gdb_breakpoint () at numba/_helperlib.c:1090 + 1090 } + (gdb) continue + Continuing. + + Thread 5 "python" received signal SIGSEGV, Segmentation fault. + [Switching to Thread 0x7f59d1fd1700 (LWP 21470)] + __GI___libc_free (mem=0xbadadd) at malloc.c:2935 + 2935 if (chunk_is_mmapped(p)) /* release mmapped memory. */ + (gdb) bt + #0 __GI___libc_free (mem=0xbadadd) at malloc.c:2935 + #1 0x00007f59d37ded84 in $3cdynamic$3e::__numba_parfor_gufunc__0x7ffff80a61ae3e31$244(Array, Array) () at :24 + #2 0x00007f59d17ce326 in __gufunc__._ZN13$3cdynamic$3e45__numba_parfor_gufunc__0x7ffff80a61ae3e31$244E5ArrayIyLi1E1C7mutable7alignedE5ArrayIxLi1E1C7mutable7alignedE () + #3 0x00007f59d37d7320 in thread_worker () + from /numba/numba/npyufunc/workqueue.cpython-37m-x86_64-linux-gnu.so + #4 0x00007f59ec626e25 in start_thread (arg=0x7f59d1fd1700) at pthread_create.c:308 + #5 0x00007f59ec350bad in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:113 + +In the output it can be seen that there are 4 threads launched and that they all +break at the breakpoint, further that ``Thread 5`` receives a signal ``SIGSEGV`` +and that back tracing shows that it came from ``__GI___libc_free`` with the +invalid address in ``mem``, as expected. + +Using the ``gdb`` command language +---------------------------------- +Both the :func:`numba.gdb` and :func:`numba.gdb_init` functions accept unlimited +string arguments which will be passed directly to ``gdb`` as command line +arguments when it initializes, this makes it easy to set breakpoints on other +functions and perform repeated debugging tasks without having to manually type +them every time. For example, this code runs with ``gdb`` attached and sets a +breakpoint on ``_dgesdd`` (say for example the arguments passed to the LAPACK's +double precision divide and conqueror SVD function need debugging). + +.. code-block:: python + :linenos: + + from numba import njit, gdb + import numpy as np + + @njit(debug=True) + def foo(a): + # instruct Numba to attach gdb at this location and on launch, switch + # breakpoint pending on , and then set a breakpoint on the function + # _dgesdd, continue execution, and once the breakpoint is hit, backtrace + gdb('-ex', 'set breakpoint pending on', + '-ex', 'b dgesdd_', + '-ex','c', + '-ex','bt') + b = a + 10 + u, s, vh = np.linalg.svd(b) + return s # just return singular values + + z = np.arange(70.).reshape(10, 7) + r = foo(z) + print(r) + +In the terminal (``...`` on a line by itself indicates output that is not +presented for brevity), note that no interaction is required to break and +backtrace: + +.. code-block:: none + :emphasize-lines: 1 + + $ NUMBA_OPT=0 python demo_gdb_args.py + Attaching to PID: 22300 + GNU gdb (GDB) Red Hat Enterprise Linux 8.0.1-36.el7 + ... + Attaching to process 22300 + Reading symbols from /bin/python3.7...done. + 0x00007f652305a550 in __nanosleep_nocancel () at ../sysdeps/unix/syscall-template.S:81 + 81 T_PSEUDO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS) + Breakpoint 1 at 0x7f650d0618f0: file numba/_helperlib.c, line 1090. + Continuing. + + Breakpoint 1, numba_gdb_breakpoint () at numba/_helperlib.c:1090 + 1090 } + Breakpoint 2 at 0x7f65102322e0 (2 locations) + Continuing. + + Breakpoint 2, 0x00007f65182be5f0 in mkl_lapack.dgesdd_ () + from /lib/python3.7/site-packages/numpy/core/../../../../libmkl_rt.so + #0 0x00007f65182be5f0 in mkl_lapack.dgesdd_ () + from /lib/python3.7/site-packages/numpy/core/../../../../libmkl_rt.so + #1 0x00007f650d065b71 in numba_raw_rgesdd (kind=kind@entry=100 'd', jobz=, jobz@entry=65 'A', m=m@entry=10, + n=n@entry=7, a=a@entry=0x561c6fbb20c0, lda=lda@entry=10, s=0x561c6facf3a0, u=0x561c6fb680e0, ldu=10, vt=0x561c6fd375c0, + ldvt=7, work=0x7fff4c926c30, lwork=-1, iwork=0x7fff4c926c40, info=0x7fff4c926c20) at numba/_lapack.c:1277 + #2 0x00007f650d06768f in numba_ez_rgesdd (ldvt=7, vt=0x561c6fd375c0, ldu=10, u=0x561c6fb680e0, s=0x561c6facf3a0, lda=10, + a=0x561c6fbb20c0, n=7, m=10, jobz=65 'A', kind=) at numba/_lapack.c:1307 + #3 numba_ez_gesdd (kind=, jobz=, m=10, n=7, a=0x561c6fbb20c0, lda=10, s=0x561c6facf3a0, + u=0x561c6fb680e0, ldu=10, vt=0x561c6fd375c0, ldvt=7) at numba/_lapack.c:1477 + #4 0x00007f650a3147a3 in numba::targets::linalg::svd_impl::$3clocals$3e::svd_impl$243(Array, omitted$28default$3d1$29) () + #5 0x00007f650a1c0489 in __main__::foo$241(Array) () at demo_gdb_args.py:15 + #6 0x00007f650a1c2110 in cpython::__main__::foo$241(Array) () + #7 0x00007f650cd096a4 in call_cfunc () + from /numba/numba/_dispatcher.cpython-37m-x86_64-linux-gnu.so + ... + + +How does the ``gdb`` binding work? +---------------------------------- +For advanced users and debuggers of Numba applications it's important to know +some of the internal implementation details of the outlined ``gdb`` bindings. +The :func:`numba.gdb` and :func:`numba.gdb_init` functions work by injecting the +following into the function's LLVM IR: + +* At the call site of the function first inject a call to ``getpid(3)`` to get + the PID of the executing process and store this for use later, then inject a + ``fork(3)`` call: + + * In the parent: + + * Inject a call ``sleep(3)`` (hence the pause whilst ``gdb`` loads). + * Inject a call to the ``numba_gdb_breakpoint`` function (only + :func:`numba.gdb` does this). + + * In the child: + + * Inject a call to ``execl(3)`` with the arguments + ``numba.config.GDB_BINARY``, the ``attach`` command and the PID recorded + earlier. Numba has a special ``gdb`` command file that contains + instructions to break on the symbol ``numba_gdb_breakpoint`` and then + ``finish``, this is to make sure that the program stops on the + breakpoint but the frame it stops in is the compiled Python frame (or + one ``step`` away from, depending on optimisation). This command file is + also added to the arguments and finally and any user specified arguments + are added. + +At the call site of a :func:`numba.gdb_breakpoint` a call is injected to the +special ``numba_gdb_breakpoint`` symbol, which is already registered and +instrumented as a place to break and ``finish`` immediately. + +As a result of this, a e.g. :func:`numba.gdb` call will cause a fork in the +program, the parent will sleep whilst the child launches ``gdb`` and attaches it +to the parent and tells the parent to continue. The launched ``gdb`` has the +``numba_gdb_breakpoint`` symbol registered as a breakpoint and when the parent +continues and stops sleeping it will immediately call ``numba_gdb_breakpoint`` +on which the child will break. Additional :func:`numba.gdb_breakpoint` calls +create calls to the registered breakpoint hence the program will also break at +these locations. + +.. _debugging-cuda-python-code: + +Debugging CUDA Python code +========================== + +Using the simulator +------------------- + +CUDA Python code can be run in the Python interpreter using the CUDA Simulator, +allowing it to be debugged with the Python debugger or with print statements. To +enable the CUDA simulator, set the environment variable +:envvar:`NUMBA_ENABLE_CUDASIM` to 1. For more information on the CUDA Simulator, +see :ref:`the CUDA Simulator documentation `. + + +Debug Info +---------- + +By setting the ``debug`` argument to ``cuda.jit`` to ``True`` +(``@cuda.jit(debug=True)``), Numba will emit source location in the compiled +CUDA code. Unlike the CPU target, only filename and line information are +available, but no variable type information is emitted. The information +is sufficient to debug memory error with +`cuda-memcheck `_. + +For example, given the following cuda python code: + +.. code-block:: python + :linenos: + + import numpy as np + from numba import cuda + + @cuda.jit(debug=True) + def foo(arr): + arr[cuda.threadIdx.x] = 1 + + arr = np.arange(30) + foo[1, 32](arr) # more threads than array elements + +We can use ``cuda-memcheck`` to find the memory error: + +.. code-block:: none + + $ cuda-memcheck python chk_cuda_debug.py + ========= CUDA-MEMCHECK + ========= Invalid __global__ write of size 8 + ========= at 0x00000148 in /home/user/chk_cuda_debug.py:6:cudapy::__main__::foo$241(Array<__int64, int=1, C, mutable, aligned>) + ========= by thread (31,0,0) in block (0,0,0) + ========= Address 0x500a600f8 is out of bounds + ... + ========= + ========= Invalid __global__ write of size 8 + ========= at 0x00000148 in /home/user/chk_cuda_debug.py:6:cudapy::__main__::foo$241(Array<__int64, int=1, C, mutable, aligned>) + ========= by thread (30,0,0) in block (0,0,0) + ========= Address 0x500a600f0 is out of bounds + ... diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/vectorize.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/vectorize.rst new file mode 100644 index 000000000..dc15cda1b --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/vectorize.rst @@ -0,0 +1,423 @@ +================================== +Creating NumPy universal functions +================================== + +There are two types of universal functions: + +* Those which operate on scalars, these are "universal functions" or *ufuncs* + (see ``@vectorize`` below). +* Those which operate on higher dimensional arrays and scalars, these are + "generalized universal functions" or *gufuncs* (``@guvectorize`` below). + +.. _vectorize: + +The ``@vectorize`` decorator +============================ + +Numba's vectorize allows Python functions taking scalar input arguments to +be used as NumPy `ufuncs`_. Creating a traditional NumPy ufunc is +not the most straightforward process and involves writing some C code. +Numba makes this easy. Using the :func:`~numba.vectorize` decorator, Numba +can compile a pure Python function into a ufunc that operates over NumPy +arrays as fast as traditional ufuncs written in C. + +.. _ufuncs: http://docs.scipy.org/doc/numpy/reference/ufuncs.html + +Using :func:`~numba.vectorize`, you write your function as operating over +input scalars, rather than arrays. Numba will generate the surrounding +loop (or *kernel*) allowing efficient iteration over the actual inputs. + +The :func:`~numba.vectorize` decorator has two modes of operation: + +* Eager, or decoration-time, compilation: If you pass one or more type + signatures to the decorator, you will be building a NumPy universal + function (ufunc). The rest of this subsection describes building + ufuncs using decoration-time compilation. + +* Lazy, or call-time, compilation: When not given any signatures, the + decorator will give you a Numba dynamic universal function + (:class:`~numba.DUFunc`) that dynamically compiles a new kernel when + called with a previously unsupported input type. A later + subsection, ":ref:`dynamic-universal-functions`", describes this mode in + more depth. + +As described above, if you pass a list of signatures to the +:func:`~numba.vectorize` decorator, your function will be compiled +into a NumPy ufunc. In the basic case, only one signature will be +passed:: + + from numba import vectorize, float64 + + @vectorize([float64(float64, float64)]) + def f(x, y): + return x + y + +If you pass several signatures, beware that you have to pass most specific +signatures before least specific ones (e.g., single-precision floats +before double-precision floats), otherwise type-based dispatching will not work +as expected:: + + @vectorize([int32(int32, int32), + int64(int64, int64), + float32(float32, float32), + float64(float64, float64)]) + def f(x, y): + return x + y + +The function will work as expected over the specified array types:: + + >>> a = np.arange(6) + >>> f(a, a) + array([ 0, 2, 4, 6, 8, 10]) + >>> a = np.linspace(0, 1, 6) + >>> f(a, a) + array([ 0. , 0.4, 0.8, 1.2, 1.6, 2. ]) + +but it will fail working on other types:: + + >>> a = np.linspace(0, 1+1j, 6) + >>> f(a, a) + Traceback (most recent call last): + File "", line 1, in + TypeError: ufunc 'ufunc' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe'' + + +You might ask yourself, "why would I go through this instead of compiling +a simple iteration loop using the :ref:`@jit ` decorator?". The +answer is that NumPy ufuncs automatically get other features such as +reduction, accumulation or broadcasting. Using the example above:: + + >>> a = np.arange(12).reshape(3, 4) + >>> a + array([[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11]]) + >>> f.reduce(a, axis=0) + array([12, 15, 18, 21]) + >>> f.reduce(a, axis=1) + array([ 6, 22, 38]) + >>> f.accumulate(a) + array([[ 0, 1, 2, 3], + [ 4, 6, 8, 10], + [12, 15, 18, 21]]) + >>> f.accumulate(a, axis=1) + array([[ 0, 1, 3, 6], + [ 4, 9, 15, 22], + [ 8, 17, 27, 38]]) + +.. seealso:: + `Standard features of ufuncs `_ (NumPy documentation). + +.. note:: + Only the broadcasting features of ufuncs are supported in compiled code. + +The :func:`~numba.vectorize` decorator supports multiple ufunc targets: + +================= =============================================================== +Target Description +================= =============================================================== +cpu Single-threaded CPU + + +parallel Multi-core CPU + + +cuda CUDA GPU + + .. NOTE:: This creates an *ufunc-like* object. + See `documentation for CUDA ufunc <../cuda/ufunc.html>`_ for detail. +================= =============================================================== + +A general guideline is to choose different targets for different data sizes +and algorithms. +The "cpu" target works well for small data sizes (approx. less than 1KB) and low +compute intensity algorithms. It has the least amount of overhead. +The "parallel" target works well for medium data sizes (approx. less than 1MB). +Threading adds a small delay. +The "cuda" target works well for big data sizes (approx. greater than 1MB) and +high compute intensity algorithms. Transferring memory to and from the GPU adds +significant overhead. + + +.. _guvectorize: + +The ``@guvectorize`` decorator +============================== + +While :func:`~numba.vectorize` allows you to write ufuncs that work on one +element at a time, the :func:`~numba.guvectorize` decorator takes the concept +one step further and allows you to write ufuncs that will work on an +arbitrary number of elements of input arrays, and take and return arrays of +differing dimensions. The typical example is a running median or a +convolution filter. + +Contrary to :func:`~numba.vectorize` functions, :func:`~numba.guvectorize` +functions don't return their result value: they take it as an array +argument, which must be filled in by the function. This is because the +array is actually allocated by NumPy's dispatch mechanism, which calls into +the Numba-generated code. + +Similar to :func:`~numba.vectorize` decorator, :func:`~numba.guvectorize` +also has two modes of operation: Eager, or decoration-time compilation and +lazy, or call-time compilation. + + +Here is a very simple example:: + + @guvectorize([(int64[:], int64, int64[:])], '(n),()->(n)') + def g(x, y, res): + for i in range(x.shape[0]): + res[i] = x[i] + y + +The underlying Python function simply adds a given scalar (``y``) to all +elements of a 1-dimension array. What's more interesting is the declaration. +There are two things there: + +* the declaration of input and output *layouts*, in symbolic form: + ``(n),()->(n)`` tells NumPy that the function takes a *n*-element one-dimension + array, a scalar (symbolically denoted by the empty tuple ``()``) and + returns a *n*-element one-dimension array; + +* the list of supported concrete *signatures* as per ``@vectorize``; here, + as in the above example, we demonstrate ``int64`` arrays. + +.. note:: + 1D array type can also receive scalar arguments (those with shape ``()``). + In the above example, the second argument also could be declared as + ``int64[:]``. In that case, the value must be read by ``y[0]``. + +We can now check what the compiled ufunc does, over a simple example:: + + >>> a = np.arange(5) + >>> a + array([0, 1, 2, 3, 4]) + >>> g(a, 2) + array([2, 3, 4, 5, 6]) + +The nice thing is that NumPy will automatically dispatch over more +complicated inputs, depending on their shapes:: + + >>> a = np.arange(6).reshape(2, 3) + >>> a + array([[0, 1, 2], + [3, 4, 5]]) + >>> g(a, 10) + array([[10, 11, 12], + [13, 14, 15]]) + >>> g(a, np.array([10, 20])) + array([[10, 11, 12], + [23, 24, 25]]) + + +.. note:: + Both :func:`~numba.vectorize` and :func:`~numba.guvectorize` support + passing ``nopython=True`` :ref:`as in the @jit decorator `. + Use it to ensure the generated code does not fallback to + :term:`object mode`. + +.. _overwriting-input-values: + +Overwriting input values +------------------------ + +In most cases, writing to inputs may also appear to work - however, this +behaviour cannot be relied on. Consider the following example function:: + + @guvectorize([(float64[:], float64[:])], '()->()') + def init_values(invals, outvals): + invals[0] = 6.5 + outvals[0] = 4.2 + +Calling the `init_values` function with an array of `float64` type results in +visible changes to the input:: + + >>> invals = np.zeros(shape=(3, 3), dtype=np.float64) + >>> outvals = init_values(invals) + >>> invals + array([[6.5, 6.5, 6.5], + [6.5, 6.5, 6.5], + [6.5, 6.5, 6.5]]) + >>> outvals + array([[4.2, 4.2, 4.2], + [4.2, 4.2, 4.2], + [4.2, 4.2, 4.2]]) + +This works because NumPy can pass the input data directly into the `init_values` +function as the data `dtype` matches that of the declared argument. However, it +may also create and pass in a temporary array, in which case changes to the +input are lost. For example, this can occur when casting is required. To +demonstrate, we can use an array of `float32` with the `init_values` function:: + + >>> invals = np.zeros(shape=(3, 3), dtype=np.float32) + >>> outvals = init_values(invals) + >>> invals + array([[0., 0., 0.], + [0., 0., 0.], + [0., 0., 0.]], dtype=float32) + +In this case, there is no change to the `invals` array because the temporary +casted array was mutated instead. + +.. _dynamic-universal-functions: + +Dynamic universal functions +=========================== + +As described above, if you do not pass any signatures to the +:func:`~numba.vectorize` decorator, your Python function will be used +to build a dynamic universal function, or :class:`~numba.DUFunc`. For +example:: + + from numba import vectorize + + @vectorize + def f(x, y): + return x * y + +The resulting :func:`f` is a :class:`~numba.DUFunc` instance that +starts with no supported input types. As you make calls to :func:`f`, +Numba generates new kernels whenever you pass a previously unsupported +input type. Given the example above, the following set of interpreter +interactions illustrate how dynamic compilation works:: + + >>> f + + >>> f.ufunc + + >>> f.ufunc.types + [] + +The example above shows that :class:`~numba.DUFunc` instances are not +ufuncs. Rather than subclass ufunc's, :class:`~numba.DUFunc` +instances work by keeping a :attr:`~numba.DUFunc.ufunc` member, and +then delegating ufunc property reads and method calls to this member +(also known as type aggregation). When we look at the initial types +supported by the ufunc, we can verify there are none. + +Let's try to make a call to :func:`f`:: + + >>> f(3,4) + 12 + >>> f.types # shorthand for f.ufunc.types + ['ll->l'] + +If this was a normal NumPy ufunc, we would have seen an exception +complaining that the ufunc couldn't handle the input types. When we +call :func:`f` with integer arguments, not only do we receive an +answer, but we can verify that Numba created a loop supporting C +:code:`long` integers. + +We can add additional loops by calling :func:`f` with different inputs:: + + >>> f(1.,2.) + 2.0 + >>> f.types + ['ll->l', 'dd->d'] + +We can now verify that Numba added a second loop for dealing with +floating-point inputs, :code:`"dd->d"`. + +If we mix input types to :func:`f`, we can verify that `NumPy ufunc +casting rules`_ are still in effect:: + + >>> f(1,2.) + 2.0 + >>> f.types + ['ll->l', 'dd->d'] + +.. _`NumPy ufunc casting rules`: http://docs.scipy.org/doc/numpy/reference/ufuncs.html#casting-rules + +This example demonstrates that calling :func:`f` with mixed types +caused NumPy to select the floating-point loop, and cast the integer +argument to a floating-point value. Thus, Numba did not create a +special :code:`"dl->d"` kernel. + +This :class:`~numba.DUFunc` behavior leads us to a point similar to +the warning given above in "`The @vectorize decorator`_" subsection, +but instead of signature declaration order in the decorator, call +order matters. If we had passed in floating-point arguments first, +any calls with integer arguments would be cast to double-precision +floating-point values. For example:: + + >>> @vectorize + ... def g(a, b): return a / b + ... + >>> g(2.,3.) + 0.66666666666666663 + >>> g(2,3) + 0.66666666666666663 + >>> g.types + ['dd->d'] + +If you require precise support for various type signatures, you should +specify them in the :func:`~numba.vectorize` decorator, and not rely +on dynamic compilation. + +Dynamic generalized universal functions +======================================= + +Similar to a dynamic universal function, if you do not specify any types to +the :func:`~numba.guvectorize` decorator, your Python function will be used +to build a dynamic generalized universal function, or :class:`~numba.GUFunc`. +For example:: + + from numba import guvectorize + + @guvectorize('(n),()->(n)') + def g(x, y, res): + for i in range(x.shape[0]): + res[i] = x[i] + y + +We can verify the resulting function :func:`g` is a :class:`~numba.GUFunc` +instance that starts with no supported input types. For instance:: + + >>> g + + >>> g.ufunc + + >>> g.ufunc.types + [] + +Similar to a :class:`~numba.DUFunc`, as one make calls to :func:`g()`, +numba generates new kernels for previously unsupported input types. The +following set of interpreter interactions will illustrate how dynamic +compilation works for a :class:`~numba.GUFunc`:: + + >>> x = np.arange(5, dtype=np.int64) + >>> y = 10 + >>> res = np.zeros_like(x) + >>> g(x, y, res) + >>> res + array([5, 6, 7, 8, 9]) + >>> g.types + ['ll->l'] + +If this was a normal :func:`guvectorize` function, we would have seen an +exception complaining that the ufunc could not handle the given input types. +When we call :func:`g()` with the input arguments, numba creates a new loop +for the input types. + +We can add additional loops by calling :func:`g` with new arguments:: + + >>> x = np.arange(5, dtype=np.double) + >>> y = 2.2 + >>> res = np.zeros_like(x) + >>> g(x, y, res) + +We can now verify that Numba added a second loop for dealing with +floating-point inputs, :code:`"dd->d"`. + + >>> g.types # shorthand for g.ufunc.types + ['ll->l', 'dd->d'] + +One can also verify that NumPy ufunc casting rules are working as expected:: + + >>> x = np.arange(5, dtype=np.int64) + >>> y = 2.2 + >>> res = np.zeros_like(x) + >>> g(x, y, res) + >>> res + +If you need precise support for various type signatures, you should not rely on dynamic +compilation and instead, specify the types them as first +argument in the :func:`~numba.guvectorize` decorator. diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/withobjmode.rst b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/withobjmode.rst new file mode 100644 index 000000000..e94237e8b --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/docs/source/user/withobjmode.rst @@ -0,0 +1,34 @@ +============================================================ +Callback into the Python Interpreter from within JIT'ed code +============================================================ + +There are rare but real cases when a nopython-mode function needs to callback +into the Python interpreter to invoke code that cannot be compiled by Numba. +Such cases include: + +- logging progress for long running JIT'ed functions; +- use data structures that are not currently supported by Numba; +- debugging inside JIT'ed code using the Python debugger. + +When Numba callbacks into the Python interpreter, the following has to happen: + +- acquire the GIL; +- convert values in native representation back into Python objects; +- call-back into the Python interpreter; +- convert returned values from the Python-code into native representation; +- release the GIL. + +These steps can be expensive. Users **should not** rely on the feature +described here on performance-critical paths. + + +.. _with_objmode: + +The ``objmode`` context-manager +=============================== + +.. warning:: This feature can be easily mis-used. Users should first consider + alternative approaches to achieve their intended goal before using + this feature. + +.. autofunction:: numba.objmode diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/install_numba.sh b/cv/3d_detection/pointrcnn-iou/pytorch/numba/install_numba.sh new file mode 100644 index 000000000..207cbc9c3 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/install_numba.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +clang_version=`clang --version | grep "clang version 16."` +if [[ "${clang_version}" != "" ]]; then + echo "Not support LLVM16 now!" + exit 0 +fi + +TARGET_DIR=${TARGET_DIR:-} +PYTHON_PATH=$(which python3) +PYTHON_DIST_PATH=${TARGET_DIR}/lib/python3/dist-packages + +PKG_DIR="build_pip" +PKG_NAME="numba" + +if [[ ! -d ${PKG_DIR} ]]; then + echo "ERROR: Package directory ${PKG_DIR} doesn't exist" + exit 1 +fi + +latest_pkg="$(ls -t ${PKG_DIR} | grep ${PKG_NAME} | head -1)" +if [[ "${latest_pkg}" == "" ]]; then + echo "ERROR: Cannot find latest ${PKG_NAME} package" + exit 1 +else + echo "INFO: Found latest package ${latest_pkg} in directory ${PKG_DIR}" +fi + +if [[ "${TARGET_DIR}" != "" ]]; then + mkdir tmp + cp -R ${PYTHON_DIST_PATH}/bin ./tmp/ + ${PYTHON_PATH} -m pip install --upgrade -t ${PYTHON_DIST_PATH} ${PKG_DIR}/${latest_pkg} || exit + cp -n ./tmp/bin/* ${PYTHON_DIST_PATH}/bin + rm -rf ./tmp + echo "${PKG_NAME} installed in ${PYTHON_DIST_PATH}; please add it to your PYTHONPATH." +else + ${PYTHON_PATH} -m pip uninstall ${PKG_NAME} -y + ${PYTHON_PATH} -m pip install ${PKG_DIR}/${latest_pkg} || exit +fi + +# finish installing successfully +exit 0 diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/mypy.ini b/cv/3d_detection/pointrcnn-iou/pytorch/numba/mypy.ini new file mode 100644 index 000000000..0b790befd --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/mypy.ini @@ -0,0 +1,52 @@ +# Global options: + +[mypy] +warn_unused_configs = True +follow_imports = silent +show_error_context = True +files = **/numba/core/types/*.py, **/numba/core/datamodel/*.py, **/numba/core/rewrites/*.py, **/numba/core/unsafe/*.py + +# Per-module options: +# To classify a given module as Level 1, 2 or 3 it must be added both in files (variable above) and in the lists below. +# Level 1 - modules checked on the strictest settings. +;[mypy-] +;warn_return_any = True +;disallow_any_expr = True +;disallow_any_explicit = True +;disallow_any_generics = True +;disallow_subclassing_any = True +;disallow_untyped_calls = True +;disallow_untyped_defs = True +;disallow_incomplete_defs = True +;check_untyped_defs = True +;disallow_untyped_decorators = True +;warn_unused_ignores = True +;follow_imports = normal +;warn_unreachable = True +;strict_equality = True + +# Level 2 - module that pass reasonably strict settings. +# No untyped functions allowed. Imports must be typed or explicitly ignored. +;[mypy-] +;warn_return_any = True +;disallow_untyped_defs = True +;disallow_incomplete_defs = True +;follow_imports = normal + +# Level 3 - modules that pass mypy default settings (only those in `files` global setting and not in previous levels) +# Function/variables are annotated to avoid mypy errors, but annotations are not complete. +[mypy-numba.core.*] +warn_return_any = True + +# Level 4 - modules that do not pass mypy check: they are excluded from "files" setting in global section + +# External packages that lack annotations +[mypy-llvmlite.*] +ignore_missing_imports = True + +[mypy-numpy.*] +ignore_missing_imports = True + +[mypy-winreg.*] +# this can be removed after Mypy 0.78 is out with the latest typeshed +ignore_missing_imports = True diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/__init__.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/__init__.py new file mode 100644 index 000000000..0e437a600 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/__init__.py @@ -0,0 +1,224 @@ +""" +Expose top-level symbols that are safe for import * +""" + +import platform +import re +import sys +import warnings + +from ._version import get_versions +from numba.misc.init_utils import generate_version_info + +__version__ = get_versions()['version'] +version_info = generate_version_info(__version__) +del get_versions +del generate_version_info + + +from numba.core import config +from numba.core import types, errors + +# Re-export typeof +from numba.misc.special import ( + typeof, prange, pndindex, gdb, gdb_breakpoint, gdb_init, + literally, literal_unroll, +) + +# Re-export error classes +from numba.core.errors import * + +# Re-export types itself +import numba.core.types as types + +# Re-export all type names +from numba.core.types import * + +# Re-export decorators +from numba.core.decorators import (cfunc, generated_jit, jit, njit, stencil, + jit_module) + +# Re-export vectorize decorators and the thread layer querying function +from numba.np.ufunc import (vectorize, guvectorize, threading_layer, + get_num_threads, set_num_threads, + set_parallel_chunksize, get_parallel_chunksize, + get_thread_id) + +# Re-export Numpy helpers +from numba.np.numpy_support import carray, farray, from_dtype + +# Re-export experimental +from numba import experimental + +# Initialize withcontexts +import numba.core.withcontexts +from numba.core.withcontexts import objmode_context as objmode +from numba.core.withcontexts import parallel_chunksize + +# Initialize target extensions +import numba.core.target_extension + +# Initialize typed containers +import numba.typed + +# Keep this for backward compatibility. +def test(argv, **kwds): + # To speed up the import time, avoid importing `unittest` and other test + # dependencies unless the user is actually trying to run tests. + from numba.testing import _runtests as runtests + return runtests.main(argv, **kwds) + +__all__ = """ + cfunc + from_dtype + guvectorize + jit + experimental + njit + stencil + jit_module + typeof + prange + gdb + gdb_breakpoint + gdb_init + vectorize + objmode + literal_unroll + get_num_threads + set_num_threads + set_parallel_chunksize + get_parallel_chunksize + parallel_chunksize + """.split() + types.__all__ + errors.__all__ + + +_min_llvmlite_version = (0, 39, 0) +_min_llvm_version = (11, 0, 0) + +def _ensure_llvm(): + """ + Make sure llvmlite is operational. + """ + import warnings + import llvmlite + + # Only look at the the major, minor and bugfix version numbers. + # Ignore other stuffs + regex = re.compile(r'(\d+)\.(\d+).(\d+)') + m = regex.match(llvmlite.__version__) + if m: + ver = tuple(map(int, m.groups())) + if ver < _min_llvmlite_version: + msg = ("Numba requires at least version %d.%d.%d of llvmlite.\n" + "Installed version is %s.\n" + "Please update llvmlite." % + (_min_llvmlite_version + (llvmlite.__version__,))) + raise ImportError(msg) + else: + # Not matching? + warnings.warn("llvmlite version format not recognized!") + + from llvmlite.binding import llvm_version_info, check_jit_execution + + if llvm_version_info < _min_llvm_version: + msg = ("Numba requires at least version %d.%d.%d of LLVM.\n" + "Installed llvmlite is built against version %d.%d.%d.\n" + "Please update llvmlite." % + (_min_llvm_version + llvm_version_info)) + raise ImportError(msg) + + check_jit_execution() + +def _ensure_critical_deps(): + """ + Make sure Python, NumPy and SciPy have supported versions. + """ + from numba.np.numpy_support import numpy_version + from numba.core.utils import PYVERSION + + if PYVERSION < (3, 7): + raise ImportError("Numba needs Python 3.7 or greater") + + if numpy_version < (1, 18): + raise ImportError("Numba needs NumPy 1.18 or greater") + elif numpy_version > (1, 23): + raise ImportError("Numba needs NumPy 1.23 or less") + + try: + import scipy + except ImportError: + pass + else: + sp_version = tuple(map(int, scipy.__version__.split('.')[:2])) + if sp_version < (1, 0): + raise ImportError("Numba requires SciPy version 1.0 or greater") + + +def _try_enable_svml(): + """ + Tries to enable SVML if configuration permits use and the library is found. + """ + if not config.DISABLE_INTEL_SVML: + try: + if sys.platform.startswith('linux'): + llvmlite.binding.load_library_permanently("libsvml.so") + elif sys.platform.startswith('darwin'): + llvmlite.binding.load_library_permanently("libsvml.dylib") + elif sys.platform.startswith('win'): + llvmlite.binding.load_library_permanently("svml_dispmd") + else: + return False + # The SVML library is loaded, therefore SVML *could* be supported. + # Now see if LLVM has been compiled with the SVML support patch. + # If llvmlite has the checking function `has_svml` and it returns + # True, then LLVM was compiled with SVML support and the the setup + # for SVML can proceed. We err on the side of caution and if the + # checking function is missing, regardless of that being fine for + # most 0.23.{0,1} llvmlite instances (i.e. conda or pip installed), + # we assume that SVML was not compiled in. llvmlite 0.23.2 is a + # bugfix release with the checking function present that will always + # produce correct behaviour. For context see: #3006. + try: + if not getattr(llvmlite.binding.targets, "has_svml")(): + # has detection function, but no svml compiled in, therefore + # disable SVML + return False + except AttributeError: + if platform.machine() == 'x86_64' and config.DEBUG: + msg = ("SVML was found but llvmlite >= 0.23.2 is " + "needed to support it.") + warnings.warn(msg) + # does not have detection function, cannot detect reliably, + # disable SVML. + return False + + # All is well, detection function present and reports SVML is + # compiled in, set the vector library to SVML. + llvmlite.binding.set_option('SVML', '-vector-library=SVML') + return True + except: + if platform.machine() == 'x86_64' and config.DEBUG: + warnings.warn("SVML was not found/could not be loaded.") + return False + +_ensure_llvm() +_ensure_critical_deps() + +# we know llvmlite is working as the above tests passed, import it now as SVML +# needs to mutate runtime options (sets the `-vector-library`). +import llvmlite + +""" +Is set to True if Intel SVML is in use. +""" +config.USING_SVML = _try_enable_svml() + + +# ---------------------- WARNING WARNING WARNING ---------------------------- +# The following imports occur below here (SVML init) because somewhere in their +# import sequence they have a `@njit` wrapped function. This triggers too early +# a bind to the underlying LLVM libraries which then irretrievably sets the LLVM +# SVML state to "no SVML". See https://github.com/numba/numba/issues/4689 for +# context. +# ---------------------- WARNING WARNING WARNING ---------------------------- diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/__main__.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/__main__.py new file mode 100644 index 000000000..4e85bf372 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/__main__.py @@ -0,0 +1,6 @@ +"""Expose Numba command via ``python -m numba``.""" +import sys +from numba.misc.numba_entry import main + +if __name__ == '__main__': + sys.exit(main()) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_arraystruct.h b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_arraystruct.h new file mode 100644 index 000000000..dcb866e2b --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_arraystruct.h @@ -0,0 +1,21 @@ +#ifndef NUMBA_ARYSTRUCT_H_ +#define NUMBA_ARYSTRUCT_H_ +/* + * Fill in the *arystruct* with information from the Numpy array *obj*. + * *arystruct*'s layout is defined in numba.targets.arrayobj (look + * for the ArrayTemplate class). + */ + +typedef struct { + void *meminfo; /* see _nrt_python.c and nrt.h in numba/core/runtime */ + PyObject *parent; + npy_intp nitems; + npy_intp itemsize; + void *data; + + npy_intp shape_and_strides[]; +} arystruct_t; + + +#endif /* NUMBA_ARYSTRUCT_H_ */ + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_devicearray.cpp b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_devicearray.cpp new file mode 100644 index 000000000..3d40bee8b --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_devicearray.cpp @@ -0,0 +1,142 @@ +/* This file contains the base class implementation for all device arrays. The + * base class is implemented in C so that computing typecodes for device arrays + * can be implemented efficiently. */ + +#include "_pymodule.h" + + +/* Include _devicearray., but make sure we don't get the definitions intended + * for consumers of the Device Array API. + */ +#define NUMBA_IN_DEVICEARRAY_CPP_ +#include "_devicearray.h" + +/* DeviceArray PyObject implementation. Note that adding more members here is + * presently prohibited because mapped and managed arrays derive from both + * DeviceArray and NumPy's ndarray, which is also a C extension class - the + * layout of the object cannot be resolved if this class also has members beyond + * PyObject_HEAD. */ +class DeviceArray { + PyObject_HEAD +}; + +/* Trivial traversal - DeviceArray instances own nothing. */ +static int +DeviceArray_traverse(DeviceArray *self, visitproc visit, void *arg) +{ + return 0; +} + +/* Trivial clear of all references - DeviceArray instances own nothing. */ +static int +DeviceArray_clear(DeviceArray *self) +{ + return 0; +} + +/* The _devicearray.DeviceArray type */ +PyTypeObject DeviceArrayType = { + PyVarObject_HEAD_INIT(NULL, 0) + "_devicearray.DeviceArray", /* tp_name */ + sizeof(DeviceArray), /* tp_basicsize */ + 0, /* tp_itemsize */ + 0, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call*/ + 0, /* tp_str*/ + 0, /* tp_getattro*/ + 0, /* tp_setattro*/ + 0, /* tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, + /* tp_flags*/ + "DeviceArray object", /* tp_doc */ + (traverseproc) DeviceArray_traverse, /* tp_traverse */ + (inquiry) DeviceArray_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ + 0, /* tp_free */ + 0, /* tp_is_gc */ + 0, /* tp_bases */ + 0, /* tp_mro */ + 0, /* tp_cache */ + 0, /* tp_subclasses */ + 0, /* tp_weaklist */ + 0, /* tp_del */ + 0, /* tp_version_tag */ + 0, /* tp_finalize */ +#if PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION == 8 + 0, /* tp_vectorcall */ + 0, /* tp_print */ +#endif +}; + +/* CUDA device array C API */ +static void *_DeviceArray_API[1] = { + (void*)&DeviceArrayType +}; + +MOD_INIT(_devicearray) { + PyObject *m = nullptr; + PyObject *d = nullptr; + PyObject *c_api = nullptr; + int error = 0; + + MOD_DEF(m, "_devicearray", "No docs", NULL) + if (m == NULL) + goto error_occurred; + + c_api = PyCapsule_New((void *)_DeviceArray_API, "numba._devicearray._DEVICEARRAY_API", NULL); + if (c_api == NULL) + goto error_occurred; + + DeviceArrayType.tp_new = PyType_GenericNew; + if (PyType_Ready(&DeviceArrayType) < 0) + goto error_occurred; + + Py_INCREF(&DeviceArrayType); + error = PyModule_AddObject(m, "DeviceArray", (PyObject*)(&DeviceArrayType)); + if (error) + goto error_occurred; + + d = PyModule_GetDict(m); + if (d == NULL) + goto error_occurred; + + error = PyDict_SetItemString(d, "_DEVICEARRAY_API", c_api); + /* Decref and set c_api to NULL, Py_XDECREF in error_occurred will have no + * effect. */ + Py_CLEAR(c_api); + + if (error) + goto error_occurred; + + return MOD_SUCCESS_VAL(m); + +error_occurred: + Py_XDECREF(m); + Py_XDECREF(c_api); + Py_XDECREF((PyObject*)&DeviceArrayType); + + return MOD_ERROR_VAL; +} diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_devicearray.h b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_devicearray.h new file mode 100644 index 000000000..5b276eacf --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_devicearray.h @@ -0,0 +1,25 @@ +#ifndef NUMBA_DEVICEARRAY_H_ +#define NUMBA_DEVICEARRAY_H_ + +#ifdef __cplusplus + extern "C" { +#endif + +/* These definitions should only be used by consumers of the Device Array API. + * Consumers access the API through the opaque pointer stored in + * _devicearray._DEVICEARRAY_API. We don't want these definitions in + * _devicearray.cpp itself because they would conflict with the actual + * implementations there. + */ +#ifndef NUMBA_IN_DEVICEARRAY_CPP_ + + extern void **DeviceArray_API; + #define DeviceArrayType (*(PyTypeObject*)DeviceArray_API[0]) + +#endif /* ndef NUMBA_IN_DEVICEARRAY_CPP */ + +#ifdef __cplusplus + } +#endif + +#endif /* NUMBA_DEVICEARRAY_H_ */ diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_dispatcher.cpp b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_dispatcher.cpp new file mode 100644 index 000000000..5ffefb245 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_dispatcher.cpp @@ -0,0 +1,1223 @@ +#include "_pymodule.h" + +#include +#include +#include +#include + +#include "_typeof.h" +#include "frameobject.h" +#include "core/typeconv/typeconv.hpp" +#include "_devicearray.h" + +/* + * Notes on the C_TRACE macro: + * + * The original C_TRACE macro (from ceval.c) would call + * PyTrace_C_CALL et al., for which the frame argument wouldn't + * be usable. Since we explicitly synthesize a frame using the + * original Python code object, we call PyTrace_CALL instead so + * the profiler can report the correct source location. + * + * Likewise, while ceval.c would call PyTrace_C_EXCEPTION in case + * of error, the profiler would simply expect a RETURN in case of + * a Python function, so we generate that here (making sure the + * exception state is preserved correctly). + * + */ + +/* + * NOTE: There is a version split for tracing code. Python 3.10 introduced a + * trace_info structure to help make tracing more robust. See: + * https://github.com/python/cpython/pull/24726 + */ +#if (PY_MAJOR_VERSION >= 3) && (PY_MINOR_VERSION >= 10) + +/* + * Code originally from: + * https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L36-L40 + */ +typedef struct { + PyCodeObject *code; // The code object for the bounds. May be NULL. + PyCodeAddressRange bounds; // Only valid if code != NULL. + CFrame cframe; +} PyTraceInfo; + + +/* + * Code originally from: + * https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Objects/codeobject.c#L1257-L1266 + * NOTE: The function is renamed. + */ +static void +_nb_PyLineTable_InitAddressRange(const char *linetable, Py_ssize_t length, int firstlineno, PyCodeAddressRange *range) +{ + range->opaque.lo_next = linetable; + range->opaque.limit = range->opaque.lo_next + length; + range->ar_start = -1; + range->ar_end = 0; + range->opaque.computed_line = firstlineno; + range->ar_line = -1; +} + +/* + * Code originally from: + * https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Objects/codeobject.c#L1269-L1275 + * NOTE: The function is renamed. + */ +static int +_nb_PyCode_InitAddressRange(PyCodeObject* co, PyCodeAddressRange *bounds) +{ + const char *linetable = PyBytes_AS_STRING(co->co_linetable); + Py_ssize_t length = PyBytes_GET_SIZE(co->co_linetable); + _nb_PyLineTable_InitAddressRange(linetable, length, co->co_firstlineno, bounds); + return bounds->ar_line; +} + +/* + * Code originally from: + * https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L5468-L5475 + * NOTE: The call to _PyCode_InitAddressRange is renamed. + */ +static void +initialize_trace_info(PyTraceInfo *trace_info, PyFrameObject *frame) +{ + if (trace_info->code != frame->f_code) { + trace_info->code = frame->f_code; + _nb_PyCode_InitAddressRange(frame->f_code, &trace_info->bounds); + } +} + +/* + * Code originally from: + * https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L5477-L5501 + */ +static int +call_trace(Py_tracefunc func, PyObject *obj, + PyThreadState *tstate, PyFrameObject *frame, + PyTraceInfo *trace_info, + int what, PyObject *arg) +{ + int result; + if (tstate->tracing) + return 0; + tstate->tracing++; + tstate->cframe->use_tracing = 0; + if (frame->f_lasti < 0) { + frame->f_lineno = frame->f_code->co_firstlineno; + } + else { + initialize_trace_info(trace_info, frame); + frame->f_lineno = _PyCode_CheckLineNumber(frame->f_lasti*sizeof(_Py_CODEUNIT), &trace_info->bounds); + } + result = func(obj, frame, what, arg); + frame->f_lineno = 0; + tstate->cframe->use_tracing = ((tstate->c_tracefunc != NULL) + || (tstate->c_profilefunc != NULL)); + tstate->tracing--; + return result; +} + +/* + * Code originally from: + * https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L5445-L5466 + */ +static int +call_trace_protected(Py_tracefunc func, PyObject *obj, + PyThreadState *tstate, PyFrameObject *frame, + PyTraceInfo *trace_info, + int what, PyObject *arg) +{ + PyObject *type, *value, *traceback; + int err; + PyErr_Fetch(&type, &value, &traceback); + err = call_trace(func, obj, tstate, frame, trace_info, what, arg); + if (err == 0) + { + PyErr_Restore(type, value, traceback); + return 0; + } + else + { + Py_XDECREF(type); + Py_XDECREF(value); + Py_XDECREF(traceback); + return -1; + } +} + +/* + * Code originally from: + * https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L5810-L5839 + * NOTE: The state test https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L5811 + * has been removed, it's dealt with in call_cfunc. + */ +#define C_TRACE(x, call) \ +if (call_trace(tstate->c_profilefunc, tstate->c_profileobj, \ + tstate, tstate->frame, &trace_info, PyTrace_CALL,\ + cfunc)) \ + x = NULL; \ +else \ +{ \ + x = call; \ + if (tstate->c_profilefunc != NULL) \ + { \ + if (x == NULL) \ + { \ + call_trace_protected(tstate->c_profilefunc, \ + tstate->c_profileobj, \ + tstate, tstate->frame, \ + &trace_info, \ + PyTrace_RETURN, cfunc); \ + /* XXX should pass (type, value, tb) */ \ + } \ + else \ + { \ + if (call_trace(tstate->c_profilefunc, \ + tstate->c_profileobj, \ + tstate, tstate->frame, \ + &trace_info, \ + PyTrace_RETURN, cfunc)) \ + { \ + Py_DECREF(x); \ + x = NULL; \ + } \ + } \ + } \ +} + +#else + +/* + * Code originally from: + * https://github.com/python/cpython/blob/d5650a1738fe34f6e1db4af5f4c4edb7cae90a36/Python/ceval.c#L4242-L4257 + */ +static int +call_trace(Py_tracefunc func, PyObject *obj, + PyThreadState *tstate, PyFrameObject *frame, + int what, PyObject *arg) +{ + int result; + if (tstate->tracing) + return 0; + tstate->tracing++; + tstate->use_tracing = 0; + result = func(obj, frame, what, arg); + tstate->use_tracing = ((tstate->c_tracefunc != NULL) + || (tstate->c_profilefunc != NULL)); + tstate->tracing--; + return result; +} + +/* + * Code originally from: + * https://github.com/python/cpython/blob/d5650a1738fe34f6e1db4af5f4c4edb7cae90a36/Python/ceval.c#L4220-L4240 + */ +static int +call_trace_protected(Py_tracefunc func, PyObject *obj, + PyThreadState *tstate, PyFrameObject *frame, + int what, PyObject *arg) +{ + PyObject *type, *value, *traceback; + int err; + PyErr_Fetch(&type, &value, &traceback); + err = call_trace(func, obj, tstate, frame, what, arg); + if (err == 0) + { + PyErr_Restore(type, value, traceback); + return 0; + } + else + { + Py_XDECREF(type); + Py_XDECREF(value); + Py_XDECREF(traceback); + return -1; + } +} + +/* + * Code originally from: + * https://github.com/python/cpython/blob/d5650a1738fe34f6e1db4af5f4c4edb7cae90a36/Python/ceval.c#L4520-L4549 + * NOTE: The state test https://github.com/python/cpython/blob/d5650a1738fe34f6e1db4af5f4c4edb7cae90a36/Python/ceval.c#L4521 + * has been removed, it's dealt with in call_cfunc. + */ +#define C_TRACE(x, call) \ +if (call_trace(tstate->c_profilefunc, tstate->c_profileobj, \ + tstate, tstate->frame, PyTrace_CALL, cfunc)) \ + x = NULL; \ +else \ +{ \ + x = call; \ + if (tstate->c_profilefunc != NULL) \ + { \ + if (x == NULL) \ + { \ + call_trace_protected(tstate->c_profilefunc, \ + tstate->c_profileobj, \ + tstate, tstate->frame, \ + PyTrace_RETURN, cfunc); \ + /* XXX should pass (type, value, tb) */ \ + } \ + else \ + { \ + if (call_trace(tstate->c_profilefunc, \ + tstate->c_profileobj, \ + tstate, tstate->frame, \ + PyTrace_RETURN, cfunc)) \ + { \ + Py_DECREF(x); \ + x = NULL; \ + } \ + } \ + } \ +} + + +#endif + +typedef std::vector TypeTable; +typedef std::vector Functions; + +/* The Dispatcher class is the base class of all dispatchers in the CPU and + CUDA targets. Its main responsibilities are: + + - Resolving the best overload to call for a given set of arguments, and + - Calling the resolved overload. + + This logic is implemented within this class for efficiency (lookup of the + appropriate overload needs to be fast) and ease of implementation (calling + directly into a compiled function using a function pointer is easier within + the C++ code where the overload has been resolved). */ +class Dispatcher { +public: + PyObject_HEAD + /* Whether compilation of new overloads is permitted */ + char can_compile; + /* Whether fallback to object mode is permitted */ + char can_fallback; + /* Whether types must match exactly when resolving overloads. + If not, conversions (e.g. float32 -> float64) are permitted when + searching for a match. */ + char exact_match_required; + /* Borrowed reference */ + PyObject *fallbackdef; + /* Whether to fold named arguments and default values + (false for lifted loops) */ + int fold_args; + /* Whether the last positional argument is a stararg */ + int has_stararg; + /* Tuple of argument names */ + PyObject *argnames; + /* Tuple of default values */ + PyObject *defargs; + /* Number of arguments to function */ + int argct; + /* Used for selecting overloaded function implementations */ + TypeManager *tm; + /* An array of overloads */ + Functions functions; + /* A flattened array of argument types to all overloads + * (invariant: sizeof(overloads) == argct * sizeof(functions)) */ + TypeTable overloads; + + /* Add a new overload. Parameters: + + - args: An array of Type objects, one for each parameter + - callable: The callable implementing this overload. */ + void addDefinition(Type args[], PyObject *callable) { + overloads.reserve(argct + overloads.size()); + for (int i=0; iselectOverload(sig, &overloads[0], selected, argct, + ovct, allow_unsafe, + exact_match_required); + } + if (matches == 1) { + return functions[selected]; + } + return NULL; + } + + /* Remove all overloads */ + void clear() { + functions.clear(); + overloads.clear(); + } + +}; + + +static int +Dispatcher_traverse(Dispatcher *self, visitproc visit, void *arg) +{ + Py_VISIT(self->defargs); + return 0; +} + +static void +Dispatcher_dealloc(Dispatcher *self) +{ + Py_XDECREF(self->argnames); + Py_XDECREF(self->defargs); + self->clear(); + Py_TYPE(self)->tp_free((PyObject*)self); +} + + +static int +Dispatcher_init(Dispatcher *self, PyObject *args, PyObject *kwds) +{ + PyObject *tmaddrobj; + void *tmaddr; + int argct; + int can_fallback; + int has_stararg = 0; + int exact_match_required = 0; + + if (!PyArg_ParseTuple(args, "OiiO!O!i|ii", &tmaddrobj, &argct, + &self->fold_args, + &PyTuple_Type, &self->argnames, + &PyTuple_Type, &self->defargs, + &can_fallback, + &has_stararg, + &exact_match_required + )) { + return -1; + } + Py_INCREF(self->argnames); + Py_INCREF(self->defargs); + tmaddr = PyLong_AsVoidPtr(tmaddrobj); + self->tm = static_cast(tmaddr); + self->argct = argct; + self->can_compile = 1; + self->can_fallback = can_fallback; + self->fallbackdef = NULL; + self->has_stararg = has_stararg; + self->exact_match_required = exact_match_required; + return 0; +} + +static PyObject * +Dispatcher_clear(Dispatcher *self, PyObject *args) +{ + self->clear(); + Py_RETURN_NONE; +} + +static +PyObject* +Dispatcher_Insert(Dispatcher *self, PyObject *args, PyObject *kwds) +{ + /* The cuda kwarg is a temporary addition until CUDA overloads are compiled + * functions. Once they are compiled functions, kwargs can be removed from + * this function. */ + static char *keywords[] = { + (char*)"sig", + (char*)"func", + (char*)"objectmode", + (char*)"cuda", + NULL + }; + + PyObject *sigtup, *cfunc; + int i, sigsz; + int *sig; + int objectmode = 0; + int cuda = 0; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|ip", keywords, &sigtup, + &cfunc, &objectmode, &cuda)) { + return NULL; + } + + if (!cuda && !PyObject_TypeCheck(cfunc, &PyCFunction_Type) ) { + PyErr_SetString(PyExc_TypeError, "must be builtin_function_or_method"); + return NULL; + } + + sigsz = PySequence_Fast_GET_SIZE(sigtup); + sig = new int[sigsz]; + + for (i = 0; i < sigsz; ++i) { + sig[i] = PyLong_AsLong(PySequence_Fast_GET_ITEM(sigtup, i)); + } + + /* The reference to cfunc is borrowed; this only works because the + derived Python class also stores an (owned) reference to cfunc. */ + self->addDefinition(sig, cfunc); + + /* Add pure python fallback */ + if (!self->fallbackdef && objectmode){ + self->fallbackdef = cfunc; + } + + delete[] sig; + + Py_RETURN_NONE; +} + +static +void explain_issue(PyObject *dispatcher, PyObject *args, PyObject *kws, + const char *method_name, const char *default_msg) +{ + PyObject *callback, *result; + callback = PyObject_GetAttrString(dispatcher, method_name); + if (!callback) { + PyErr_SetString(PyExc_TypeError, default_msg); + return; + } + result = PyObject_Call(callback, args, kws); + Py_DECREF(callback); + if (result != NULL) { + PyErr_Format(PyExc_RuntimeError, "%s must raise an exception", + method_name); + Py_DECREF(result); + } +} + +static +void explain_ambiguous(PyObject *dispatcher, PyObject *args, PyObject *kws) +{ + explain_issue(dispatcher, args, kws, "_explain_ambiguous", + "Ambiguous overloading"); +} + +static +void explain_matching_error(PyObject *dispatcher, PyObject *args, PyObject *kws) +{ + explain_issue(dispatcher, args, kws, "_explain_matching_error", + "No matching definition"); +} + +static +int search_new_conversions(PyObject *dispatcher, PyObject *args, PyObject *kws) +{ + PyObject *callback, *result; + int res; + + callback = PyObject_GetAttrString(dispatcher, + "_search_new_conversions"); + if (!callback) { + return -1; + } + result = PyObject_Call(callback, args, kws); + Py_DECREF(callback); + if (result == NULL) { + return -1; + } + if (!PyBool_Check(result)) { + Py_DECREF(result); + PyErr_SetString(PyExc_TypeError, + "_search_new_conversions() should return a boolean"); + return -1; + } + res = (result == Py_True) ? 1 : 0; + Py_DECREF(result); + return res; +} + + +/* A custom, fast, inlinable version of PyCFunction_Call() */ +static PyObject * +call_cfunc(Dispatcher *self, PyObject *cfunc, PyObject *args, PyObject *kws, PyObject *locals) +{ + PyCFunctionWithKeywords fn; + PyThreadState *tstate; + + assert(PyCFunction_Check(cfunc)); + assert(PyCFunction_GET_FLAGS(cfunc) == (METH_VARARGS | METH_KEYWORDS)); + fn = (PyCFunctionWithKeywords) PyCFunction_GET_FUNCTION(cfunc); + tstate = PyThreadState_GET(); + +#if (PY_MAJOR_VERSION >= 3) && (PY_MINOR_VERSION >= 10) + /* + * On Python 3.10+ trace_info comes from somewhere up in PyFrameEval et al, + * Numba doesn't have access to that so creates an equivalent struct and + * wires it up against the cframes. This is passed into the tracing + * functions. + * + * Code originally from: + * https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L1611-L1622 + */ + PyTraceInfo trace_info; + trace_info.code = NULL; // not initialized + CFrame *prev_cframe = tstate->cframe; + trace_info.cframe.use_tracing = prev_cframe->use_tracing; + trace_info.cframe.previous = prev_cframe; + + if (trace_info.cframe.use_tracing && tstate->c_profilefunc) +#else + /* + * On Python prior to 3.10, tracing state is a member of the threadstate + */ + if (tstate->use_tracing && tstate->c_profilefunc) +#endif + { + /* + * The following code requires some explaining: + * + * We want the jit-compiled function to be visible to the profiler, so we + * need to synthesize a frame for it. + * The PyFrame_New() constructor doesn't do anything with the 'locals' value if the 'code's + * 'CO_NEWLOCALS' flag is set (which is always the case nowadays). + * So, to get local variables into the frame, we have to manually set the 'f_locals' + * member, then call `PyFrame_LocalsToFast`, where a subsequent call to the `frame.f_locals` + * property (by virtue of the `frame_getlocals` function in frameobject.c) will find them. + */ + PyCodeObject *code = (PyCodeObject*)PyObject_GetAttrString((PyObject*)self, "__code__"); + PyObject *globals = PyDict_New(); + PyObject *builtins = PyEval_GetBuiltins(); + PyFrameObject *frame = NULL; + PyObject *result = NULL; + + if (!code) { + PyErr_Format(PyExc_RuntimeError, "No __code__ attribute found."); + goto error; + } + /* Populate builtins, which is required by some JITted functions */ + if (PyDict_SetItemString(globals, "__builtins__", builtins)) { + goto error; + } + + /* unset the CO_OPTIMIZED flag, make the frame get a new locals dict */ + code->co_flags &= 0xFFFE; + + frame = PyFrame_New(tstate, code, globals, locals); + if (frame == NULL) { + goto error; + } + /* Populate the 'fast locals' in `frame` */ + PyFrame_LocalsToFast(frame, 0); + tstate->frame = frame; + C_TRACE(result, fn(PyCFunction_GET_SELF(cfunc), args, kws)); + /* write changes back to locals? */ + PyFrame_FastToLocals(frame); + tstate->frame = frame->f_back; + + error: + Py_XDECREF(frame); + Py_XDECREF(globals); + Py_XDECREF(code); + return result; + } + else + { + return fn(PyCFunction_GET_SELF(cfunc), args, kws); + } +} + +static +PyObject* +compile_and_invoke(Dispatcher *self, PyObject *args, PyObject *kws, PyObject *locals) +{ + /* Compile a new one */ + PyObject *cfa, *cfunc, *retval; + cfa = PyObject_GetAttrString((PyObject*)self, "_compile_for_args"); + if (cfa == NULL) + return NULL; + + /* NOTE: we call the compiled function ourselves instead of + letting the Python derived class do it. This is for proper + behaviour of globals() in jitted functions (issue #476). */ + cfunc = PyObject_Call(cfa, args, kws); + Py_DECREF(cfa); + + if (cfunc == NULL) + return NULL; + + if (PyObject_TypeCheck(cfunc, &PyCFunction_Type)) { + retval = call_cfunc(self, cfunc, args, kws, locals); + } else { + /* Re-enter interpreter */ + retval = PyObject_Call(cfunc, args, kws); + } + Py_DECREF(cfunc); + + return retval; +} + +/* A copy of compile_and_invoke, that only compiles. This is needed for CUDA + * kernels, because its overloads are Python instances of the _Kernel class, + * rather than compiled functions. Once CUDA overloads are compiled functions, + * cuda_compile_only can be removed. */ +static +PyObject* +cuda_compile_only(Dispatcher *self, PyObject *args, PyObject *kws, PyObject *locals) +{ + /* Compile a new one */ + PyObject *cfa, *cfunc; + cfa = PyObject_GetAttrString((PyObject*)self, "_compile_for_args"); + if (cfa == NULL) + return NULL; + + cfunc = PyObject_Call(cfa, args, kws); + Py_DECREF(cfa); + + return cfunc; +} + +static int +find_named_args(Dispatcher *self, PyObject **pargs, PyObject **pkws) +{ + PyObject *oldargs = *pargs, *newargs; + PyObject *kws = *pkws; + Py_ssize_t pos_args = PyTuple_GET_SIZE(oldargs); + Py_ssize_t named_args, total_args, i; + Py_ssize_t func_args = PyTuple_GET_SIZE(self->argnames); + Py_ssize_t defaults = PyTuple_GET_SIZE(self->defargs); + /* Last parameter with a default value */ + Py_ssize_t last_def = (self->has_stararg) + ? func_args - 2 + : func_args - 1; + /* First parameter with a default value */ + Py_ssize_t first_def = last_def - defaults + 1; + /* Minimum number of required arguments */ + Py_ssize_t minargs = first_def; + + if (kws != NULL) + named_args = PyDict_Size(kws); + else + named_args = 0; + total_args = pos_args + named_args; + if (!self->has_stararg && total_args > func_args) { + PyErr_Format(PyExc_TypeError, + "too many arguments: expected %d, got %d", + (int) func_args, (int) total_args); + return -1; + } + else if (total_args < minargs) { + if (minargs == func_args) + PyErr_Format(PyExc_TypeError, + "not enough arguments: expected %d, got %d", + (int) minargs, (int) total_args); + else + PyErr_Format(PyExc_TypeError, + "not enough arguments: expected at least %d, got %d", + (int) minargs, (int) total_args); + return -1; + } + newargs = PyTuple_New(func_args); + if (!newargs) + return -1; + /* First pack the stararg */ + if (self->has_stararg) { + Py_ssize_t stararg_size = Py_MAX(0, pos_args - func_args + 1); + PyObject *stararg = PyTuple_New(stararg_size); + if (!stararg) { + Py_DECREF(newargs); + return -1; + } + for (i = 0; i < stararg_size; i++) { + PyObject *value = PyTuple_GET_ITEM(oldargs, func_args - 1 + i); + Py_INCREF(value); + PyTuple_SET_ITEM(stararg, i, value); + } + /* Put it in last position */ + PyTuple_SET_ITEM(newargs, func_args - 1, stararg); + + } + for (i = 0; i < pos_args; i++) { + PyObject *value = PyTuple_GET_ITEM(oldargs, i); + if (self->has_stararg && i >= func_args - 1) { + /* Skip stararg */ + break; + } + Py_INCREF(value); + PyTuple_SET_ITEM(newargs, i, value); + } + + /* Iterate over missing positional arguments, try to find them in + named arguments or default values. */ + for (i = pos_args; i < func_args; i++) { + PyObject *name = PyTuple_GET_ITEM(self->argnames, i); + if (self->has_stararg && i >= func_args - 1) { + /* Skip stararg */ + break; + } + if (kws != NULL) { + /* Named argument? */ + PyObject *value = PyDict_GetItem(kws, name); + if (value != NULL) { + Py_INCREF(value); + PyTuple_SET_ITEM(newargs, i, value); + named_args--; + continue; + } + } + if (i >= first_def && i <= last_def) { + /* Argument has a default value? */ + PyObject *value = PyTuple_GET_ITEM(self->defargs, i - first_def); + Py_INCREF(value); + PyTuple_SET_ITEM(newargs, i, value); + continue; + } + else if (i < func_args - 1 || !self->has_stararg) { + PyErr_Format(PyExc_TypeError, + "missing argument '%s'", + PyString_AsString(name)); + Py_DECREF(newargs); + return -1; + } + } + if (named_args) { + PyErr_Format(PyExc_TypeError, + "some keyword arguments unexpected"); + Py_DECREF(newargs); + return -1; + } + *pargs = newargs; + *pkws = NULL; + return 0; +} + + +/* + * Management of thread-local + */ + +#ifdef _MSC_VER +#define THREAD_LOCAL(ty) __declspec(thread) ty +#else +/* Non-standard C99 extension that's understood by gcc and clang */ +#define THREAD_LOCAL(ty) __thread ty +#endif + +static THREAD_LOCAL(bool) use_tls_target_stack; + + +struct raii_use_tls_target_stack { + bool old_setting; + + raii_use_tls_target_stack(bool new_setting) + : old_setting(use_tls_target_stack) + { + use_tls_target_stack = new_setting; + } + + ~raii_use_tls_target_stack() { + use_tls_target_stack = old_setting; + } +}; + +static PyObject* +Dispatcher_call(Dispatcher *self, PyObject *args, PyObject *kws) +{ + PyObject *tmptype, *retval = NULL; + int *tys = NULL; + int argct; + int i; + int prealloc[24]; + int matches; + PyObject *cfunc; + PyThreadState *ts = PyThreadState_Get(); + PyObject *locals = NULL; + + // Check TLS target stack + if (use_tls_target_stack) { + raii_use_tls_target_stack turn_off(false); + PyObject * meth_call_tls_target; + meth_call_tls_target = PyObject_GetAttrString((PyObject*)self, + "_call_tls_target"); + if (!meth_call_tls_target) return NULL; + // Transfer control to self._call_tls_target + retval = PyObject_Call(meth_call_tls_target, args, kws); + Py_DECREF(meth_call_tls_target); + return retval; + } + + /* If compilation is enabled, ensure that an exact match is found and if + * not compile one */ + int exact_match_required = self->can_compile ? 1 : self->exact_match_required; + +#if (PY_MAJOR_VERSION >= 3) && (PY_MINOR_VERSION >= 10) + if (ts->tracing && ts->c_profilefunc) { +#else + if (ts->use_tracing && ts->c_profilefunc) { +#endif + locals = PyEval_GetLocals(); + if (locals == NULL) { + goto CLEANUP; + } + } + if (self->fold_args) { + if (find_named_args(self, &args, &kws)) + return NULL; + } + else + Py_INCREF(args); + /* Now we own a reference to args */ + + argct = PySequence_Fast_GET_SIZE(args); + + if (argct < (Py_ssize_t) (sizeof(prealloc) / sizeof(int))) + tys = prealloc; + else + tys = new int[argct]; + + for (i = 0; i < argct; ++i) { + tmptype = PySequence_Fast_GET_ITEM(args, i); + tys[i] = typeof_typecode((PyObject *) self, tmptype); + if (tys[i] == -1) { + if (self->can_fallback){ + /* We will clear the exception if fallback is allowed. */ + PyErr_Clear(); + } else { + goto CLEANUP; + } + } + } + + /* We only allow unsafe conversions if compilation of new specializations + has been disabled. + + Note that the number of matches is returned in matches by resolve, which + accepts it as a reference. */ + cfunc = self->resolve(tys, matches, !self->can_compile, + exact_match_required); + + if (matches == 0 && !self->can_compile) { + /* + * If we can't compile a new specialization, look for + * matching signatures for which conversions haven't been + * registered on the C++ TypeManager. + */ + int res = search_new_conversions((PyObject *) self, args, kws); + if (res < 0) { + retval = NULL; + goto CLEANUP; + } + if (res > 0) { + /* Retry with the newly registered conversions */ + cfunc = self->resolve(tys, matches, !self->can_compile, + exact_match_required); + } + } + if (matches == 1) { + /* Definition is found */ + retval = call_cfunc(self, cfunc, args, kws, locals); + } else if (matches == 0) { + /* No matching definition */ + if (self->can_compile) { + retval = compile_and_invoke(self, args, kws, locals); + } else if (self->fallbackdef) { + /* Have object fallback */ + retval = call_cfunc(self, self->fallbackdef, args, kws, locals); + } else { + /* Raise TypeError */ + explain_matching_error((PyObject *) self, args, kws); + retval = NULL; + } + } else if (self->can_compile) { + /* Ambiguous, but are allowed to compile */ + retval = compile_and_invoke(self, args, kws, locals); + } else { + /* Ambiguous */ + explain_ambiguous((PyObject *) self, args, kws); + retval = NULL; + } + +CLEANUP: + if (tys != prealloc) + delete[] tys; + Py_DECREF(args); + + return retval; +} + +/* Based on Dispatcher_call above, with the following differences: + 1. It does not invoke the definition of the function. + 2. It returns the definition, instead of a value returned by the function. + + This is because CUDA functions are, at present, _Kernel objects rather than + compiled functions. */ +static PyObject* +Dispatcher_cuda_call(Dispatcher *self, PyObject *args, PyObject *kws) +{ + PyObject *tmptype, *retval = NULL; + int *tys = NULL; + int argct; + int i; + int prealloc[24]; + int matches; + PyObject *cfunc; + PyThreadState *ts = PyThreadState_Get(); + PyObject *locals = NULL; + + /* If compilation is enabled, ensure that an exact match is found and if + * not compile one */ + int exact_match_required = self->can_compile ? 1 : self->exact_match_required; + +#if (PY_MAJOR_VERSION >= 3) && (PY_MINOR_VERSION >= 10) + if (ts->tracing && ts->c_profilefunc) { +#else + if (ts->use_tracing && ts->c_profilefunc) { +#endif + locals = PyEval_GetLocals(); + if (locals == NULL) { + goto CLEANUP; + } + } + if (self->fold_args) { + if (find_named_args(self, &args, &kws)) + return NULL; + } + else + Py_INCREF(args); + /* Now we own a reference to args */ + + argct = PySequence_Fast_GET_SIZE(args); + + if (argct < (Py_ssize_t) (sizeof(prealloc) / sizeof(int))) + tys = prealloc; + else + tys = new int[argct]; + + for (i = 0; i < argct; ++i) { + tmptype = PySequence_Fast_GET_ITEM(args, i); + tys[i] = typeof_typecode((PyObject *) self, tmptype); + if (tys[i] == -1) { + if (self->can_fallback){ + /* We will clear the exception if fallback is allowed. */ + PyErr_Clear(); + } else { + goto CLEANUP; + } + } + } + + /* We only allow unsafe conversions if compilation of new specializations + has been disabled. */ + cfunc = self->resolve(tys, matches, !self->can_compile, + exact_match_required); + + if (matches == 0 && !self->can_compile) { + /* + * If we can't compile a new specialization, look for + * matching signatures for which conversions haven't been + * registered on the C++ TypeManager. + */ + int res = search_new_conversions((PyObject *) self, args, kws); + if (res < 0) { + retval = NULL; + goto CLEANUP; + } + if (res > 0) { + /* Retry with the newly registered conversions */ + cfunc = self->resolve(tys, matches, !self->can_compile, + exact_match_required); + } + } + + if (matches == 1) { + /* Definition is found */ + retval = cfunc; + Py_INCREF(retval); + } else if (matches == 0) { + /* No matching definition */ + if (self->can_compile) { + retval = cuda_compile_only(self, args, kws, locals); + } else if (self->fallbackdef) { + /* Have object fallback */ + retval = call_cfunc(self, self->fallbackdef, args, kws, locals); + } else { + /* Raise TypeError */ + explain_matching_error((PyObject *) self, args, kws); + retval = NULL; + } + } else if (self->can_compile) { + /* Ambiguous, but are allowed to compile */ + retval = cuda_compile_only(self, args, kws, locals); + } else { + /* Ambiguous */ + explain_ambiguous((PyObject *) self, args, kws); + retval = NULL; + } + +CLEANUP: + if (tys != prealloc) + delete[] tys; + Py_DECREF(args); + + return retval; +} + +static int +import_devicearray(void) +{ + PyObject *devicearray = PyImport_ImportModule("numba._devicearray"); + if (devicearray == NULL) { + return -1; + } + Py_DECREF(devicearray); + + DeviceArray_API = (void**)PyCapsule_Import("numba._devicearray._DEVICEARRAY_API", 0); + if (DeviceArray_API == NULL) { + return -1; + } + + return 0; +} + +static PyMethodDef Dispatcher_methods[] = { + { "_clear", (PyCFunction)Dispatcher_clear, METH_NOARGS, NULL }, + { "_insert", (PyCFunction)Dispatcher_Insert, METH_VARARGS | METH_KEYWORDS, + "insert new definition"}, + { "_cuda_call", (PyCFunction)Dispatcher_cuda_call, + METH_VARARGS | METH_KEYWORDS, "CUDA call resolution" }, + { NULL }, +}; + +static PyMemberDef Dispatcher_members[] = { + {(char*)"_can_compile", T_BOOL, offsetof(Dispatcher, can_compile), 0, NULL }, + {NULL} /* Sentinel */ +}; + + +static PyTypeObject DispatcherType = { + PyVarObject_HEAD_INIT(NULL, 0) + "_dispatcher.Dispatcher", /* tp_name */ + sizeof(Dispatcher), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)Dispatcher_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + (PyCFunctionWithKeywords)Dispatcher_call, /* tp_call*/ + 0, /* tp_str*/ + 0, /* tp_getattro*/ + 0, /* tp_setattro*/ + 0, /* tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /* tp_flags*/ + "Dispatcher object", /* tp_doc */ + (traverseproc) Dispatcher_traverse, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + Dispatcher_methods, /* tp_methods */ + Dispatcher_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)Dispatcher_init, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ + 0, /* tp_free */ + 0, /* tp_is_gc */ + 0, /* tp_bases */ + 0, /* tp_mro */ + 0, /* tp_cache */ + 0, /* tp_subclasses */ + 0, /* tp_weaklist */ + 0, /* tp_del */ + 0, /* tp_version_tag */ + 0, /* tp_finalize */ +#if PY_MAJOR_VERSION == 3 +/* Python 3.8 has two slots, 3.9 has one. */ +#if PY_MINOR_VERSION > 7 + 0, /* tp_vectorcall */ +#if PY_MINOR_VERSION == 8 + 0, /* tp_print */ +#endif +#endif +#endif +}; + + +static PyObject *compute_fingerprint(PyObject *self, PyObject *args) +{ + PyObject *val; + if (!PyArg_ParseTuple(args, "O:compute_fingerprint", &val)) + return NULL; + return typeof_compute_fingerprint(val); +} + +static PyObject *set_use_tls_target_stack(PyObject *self, PyObject *args) +{ + int val; + if (!PyArg_ParseTuple(args, "p", &val)) + return NULL; + bool old = use_tls_target_stack; + use_tls_target_stack = val; + // return the old value + if (old) { + Py_RETURN_TRUE; + } else { + Py_RETURN_FALSE; + } +} + +static PyMethodDef ext_methods[] = { +#define declmethod(func) { #func , ( PyCFunction )func , METH_VARARGS , NULL } + declmethod(typeof_init), + declmethod(compute_fingerprint), + declmethod(set_use_tls_target_stack), + { NULL }, +#undef declmethod +}; + + +MOD_INIT(_dispatcher) { + if (import_devicearray() < 0) { + PyErr_Print(); + PyErr_SetString(PyExc_ImportError, "numba._devicearray failed to import"); + return MOD_ERROR_VAL; + } + + PyObject *m; + MOD_DEF(m, "_dispatcher", "No docs", ext_methods) + if (m == NULL) + return MOD_ERROR_VAL; + + DispatcherType.tp_new = PyType_GenericNew; + if (PyType_Ready(&DispatcherType) < 0) { + return MOD_ERROR_VAL; + } + Py_INCREF(&DispatcherType); + PyModule_AddObject(m, "Dispatcher", (PyObject*)(&DispatcherType)); + + return MOD_SUCCESS_VAL(m); +} diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_dynfunc.c b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_dynfunc.c new file mode 100644 index 000000000..7c228a830 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_dynfunc.c @@ -0,0 +1,507 @@ +/* + * Definition of Environment and Closure objects. + * This module is included by _dynfuncmod.c and by pycc-compiled modules. + */ + +#include "_pymodule.h" + +#include + +/* NOTE: EnvironmentObject and ClosureObject must be kept in sync with + * the definitions in numba/targets/base.py (EnvBody and ClosureBody). + */ + +/* + * EnvironmentObject hosts data needed for execution of compiled functions. + */ +typedef struct { + PyObject_HEAD + PyObject *globals; + /* Assorted "constants" that are needed at runtime to execute + the compiled function. This can include frozen closure variables, + lifted loops, etc. */ + PyObject *consts; +} EnvironmentObject; + + +static PyMemberDef env_members[] = { + {"globals", T_OBJECT, offsetof(EnvironmentObject, globals), READONLY, NULL}, + {"consts", T_OBJECT, offsetof(EnvironmentObject, consts), READONLY, NULL}, + {NULL} /* Sentinel */ +}; + +static int +env_traverse(EnvironmentObject *env, visitproc visit, void *arg) +{ + Py_VISIT(env->globals); + Py_VISIT(env->consts); + return 0; +} + +static int +env_clear(EnvironmentObject *env) +{ + Py_CLEAR(env->globals); + Py_CLEAR(env->consts); + return 0; +} + +static void +env_dealloc(EnvironmentObject *env) +{ + PyObject_GC_UnTrack((PyObject *) env); + env_clear(env); + Py_TYPE(env)->tp_free((PyObject *) env); +} + +static EnvironmentObject * +env_new_empty(PyTypeObject* type) +{ + return (EnvironmentObject *) PyType_GenericNew(type, NULL, NULL); +} + +static PyObject * +env_new(PyTypeObject* type, PyObject* args, PyObject* kwds) +{ + PyObject *globals; + EnvironmentObject *env; + static char *kwlist[] = {"globals", 0}; + + if (!PyArg_ParseTupleAndKeywords( + args, kwds, "O!:function", kwlist, + &PyDict_Type, &globals)) + return NULL; + + env = env_new_empty(type); + if (env == NULL) + return NULL; + Py_INCREF(globals); + env->globals = globals; + env->consts = PyList_New(0); + if (!env->consts) { + Py_DECREF(env); + return NULL; + } + return (PyObject *) env; +} + + +static PyTypeObject EnvironmentType = { + PyVarObject_HEAD_INIT(NULL, 0) + "_dynfunc.Environment", /*tp_name*/ + sizeof(EnvironmentObject), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor) env_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + 0, /* tp_doc */ + (traverseproc) env_traverse, /* tp_traverse */ + (inquiry) env_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + env_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + env_new, /* tp_new */ + 0, /* tp_free */ + 0, /* tp_is_gc */ + 0, /* tp_bases */ + 0, /* tp_mro */ + 0, /* tp_cache */ + 0, /* tp_subclasses */ + 0, /* tp_weaklist */ + 0, /* tp_del */ + 0, /* tp_version_tag */ + 0, /* tp_finalize */ +#if PY_MAJOR_VERSION == 3 +/* Python 3.8 has two slots, 3.9 has one. */ +#if PY_MINOR_VERSION > 7 + 0, /* tp_vectorcall */ +#if PY_MINOR_VERSION == 8 + 0, /* tp_print */ +#endif +#endif +#endif +}; + +/* A closure object is created for each call to make_function(), and stored + as the resulting PyCFunction object's "self" pointer. It points to an + EnvironmentObject which is constructed during compilation. This allows + for two things: + - lifetime management of dependent data (e.g. lifted loop dispatchers) + - access to the execution environment by the compiled function + (for example the globals module) + */ + +/* Closure is a variable-sized object for binary compatibility with + Generator (see below). */ +#define CLOSURE_HEAD \ + PyObject_VAR_HEAD \ + EnvironmentObject *env; + +typedef struct { + CLOSURE_HEAD + /* The dynamically-filled method definition for the PyCFunction object + using this closure. */ + PyMethodDef def; + /* Arbitrary object to keep alive during the closure's lifetime. + (put a tuple to put several objects alive). + In practice, this helps keep the LLVM module and its generated + code alive. */ + PyObject *keepalive; + PyObject *weakreflist; +} ClosureObject; + + +static int +closure_traverse(ClosureObject *clo, visitproc visit, void *arg) +{ + Py_VISIT(clo->env); + Py_VISIT(clo->keepalive); + return 0; +} + +static void +closure_dealloc(ClosureObject *clo) +{ + PyObject_GC_UnTrack((PyObject *) clo); + if (clo->weakreflist != NULL) + PyObject_ClearWeakRefs((PyObject *) clo); + PyObject_Free((void *) clo->def.ml_name); + PyObject_Free((void *) clo->def.ml_doc); + Py_XDECREF(clo->env); + Py_XDECREF(clo->keepalive); + Py_TYPE(clo)->tp_free((PyObject *) clo); +} + +static PyTypeObject ClosureType = { + PyVarObject_HEAD_INIT(NULL, 0) + "_dynfunc._Closure", /*tp_name*/ + sizeof(ClosureObject), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor) closure_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + 0, /* tp_doc */ + (traverseproc) closure_traverse, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + offsetof(ClosureObject, weakreflist), /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ + 0, /* tp_free */ + 0, /* tp_is_gc */ + 0, /* tp_bases */ + 0, /* tp_mro */ + 0, /* tp_cache */ + 0, /* tp_subclasses */ + 0, /* tp_weaklist */ + 0, /* tp_del */ + 0, /* tp_version_tag */ + 0, /* tp_finalize */ +#if PY_MAJOR_VERSION == 3 +/* Python 3.8 has two slots, 3.9 has one. */ +#if PY_MINOR_VERSION > 7 + 0, /* tp_vectorcall */ +#if PY_MINOR_VERSION == 8 + 0, /* tp_print */ +#endif +#endif +#endif +}; + + +/* Return an owned piece of character data duplicating a Python string + object's value. */ +static char * +dup_string(PyObject *strobj) +{ + const char *tmp = NULL; + char *str; + tmp = PyString_AsString(strobj); + if (tmp == NULL) + return NULL; + /* Using PyObject_Malloc allows this memory to be tracked for + leaks. */ + str = PyObject_Malloc(strlen(tmp) + 1); + if (str == NULL) { + PyErr_NoMemory(); + return NULL; + } + strcpy(str, tmp); + return str; +} + +/* Create and initialize a new Closure object */ +static ClosureObject * +closure_new(PyObject *name, PyObject *doc, PyCFunction fnaddr, + EnvironmentObject *env, PyObject *keepalive) +{ + ClosureObject *clo = (ClosureObject *) PyType_GenericAlloc(&ClosureType, 0); + if (clo == NULL) + return NULL; + + clo->def.ml_name = dup_string(name); + if (!clo->def.ml_name) { + Py_DECREF(clo); + return NULL; + } + clo->def.ml_meth = fnaddr; + clo->def.ml_flags = METH_VARARGS | METH_KEYWORDS; + clo->def.ml_doc = dup_string(doc); + if (!clo->def.ml_doc) { + Py_DECREF(clo); + return NULL; + } + Py_INCREF(env); + clo->env = env; + Py_XINCREF(keepalive); + clo->keepalive = keepalive; + return clo; +} + +/* Create a new PyCFunction object wrapping a closure defined by + the given arguments. */ +static PyObject * +pycfunction_new(PyObject *module, PyObject *name, PyObject *doc, + PyCFunction fnaddr, EnvironmentObject *env, PyObject *keepalive) +{ + PyObject *funcobj; + PyObject *modname = NULL; + ClosureObject *closure = NULL; + + closure = closure_new(name, doc, fnaddr, env, keepalive); + if (closure == NULL) goto FAIL; + + modname = PyObject_GetAttrString(module, "__name__"); + if (modname == NULL) goto FAIL; + + funcobj = PyCFunction_NewEx(&closure->def, (PyObject *) closure, modname); + Py_DECREF(closure); + Py_DECREF(modname); + + return funcobj; + +FAIL: + Py_XDECREF(closure); + Py_XDECREF(modname); + return NULL; +} + +/* + * Python-facing wrapper for Numba-compiled generator. + * Note the Environment's offset inside the struct is the same as in the + * Closure object. This is required to simplify generation of Python wrappers. + */ + +typedef void (*gen_finalizer_t)(void *); + +typedef struct { + CLOSURE_HEAD + PyCFunctionWithKeywords nextfunc; + gen_finalizer_t finalizer; + PyObject *weakreflist; + union { + double dummy; /* Force alignment */ + char state[0]; + }; +} GeneratorObject; + +static int +generator_traverse(GeneratorObject *gen, visitproc visit, void *arg) +{ + /* XXX this doesn't traverse the state, which can own references to + PyObjects */ + Py_VISIT(gen->env); + return 0; +} + +static int +generator_clear(GeneratorObject *gen) +{ + if (gen->finalizer != NULL) { + gen->finalizer(gen->state); + gen->finalizer = NULL; + } + Py_CLEAR(gen->env); + gen->nextfunc = NULL; + return 0; +} + +static void +generator_dealloc(GeneratorObject *gen) +{ + PyObject_GC_UnTrack((PyObject *) gen); + if (gen->weakreflist != NULL) + PyObject_ClearWeakRefs((PyObject *) gen); + /* XXX The finalizer may be called after the LLVM module has been + destroyed (typically at interpreter shutdown) */ +#if PY_MAJOR_VERSION >= 3 +#if PY_MINOR_VERSION >= 7 + if (!_Py_IsFinalizing()) +#else + if (!_Py_Finalizing) +#endif +#endif + if (gen->finalizer != NULL) + gen->finalizer(gen->state); + Py_XDECREF(gen->env); + Py_TYPE(gen)->tp_free((PyObject *) gen); +} + +static PyObject * +generator_iternext(GeneratorObject *gen) +{ + PyObject *res, *args; + if (gen->nextfunc == NULL) { + PyErr_SetString(PyExc_RuntimeError, + "cannot call next() on finalized generator"); + return NULL; + } + args = PyTuple_Pack(1, (PyObject *) gen); + if (args == NULL) + return NULL; + res = (*gen->nextfunc)((PyObject *) gen, args, NULL); + Py_DECREF(args); + return res; +} + +static PyTypeObject GeneratorType = { + PyVarObject_HEAD_INIT(NULL, 0) + "_dynfunc._Generator", /* tp_name*/ + offsetof(GeneratorObject, state), /* tp_basicsize*/ + 1, /* tp_itemsize*/ + (destructor) generator_dealloc, /* tp_dealloc*/ + 0, /* tp_print*/ + 0, /* tp_getattr*/ + 0, /* tp_setattr*/ + 0, /* tp_compare*/ + 0, /* tp_repr*/ + 0, /* tp_as_number*/ + 0, /* tp_as_sequence*/ + 0, /* tp_as_mapping*/ + 0, /* tp_hash */ + 0, /* tp_call*/ + 0, /* tp_str*/ + 0, /* tp_getattro*/ + 0, /* tp_setattro*/ + 0, /* tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC + | Py_TPFLAGS_BASETYPE, /* tp_flags*/ + 0, /* tp_doc */ + (traverseproc) generator_traverse, /* tp_traverse */ + (inquiry) generator_clear, /* tp_clear */ + 0, /* tp_richcompare */ + offsetof(GeneratorObject, weakreflist), /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc) generator_iternext, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ + 0, /* tp_free */ + 0, /* tp_is_gc */ + 0, /* tp_bases */ + 0, /* tp_mro */ + 0, /* tp_cache */ + 0, /* tp_subclasses */ + 0, /* tp_weaklist */ + 0, /* tp_del */ + 0, /* tp_version_tag */ + 0, /* tp_finalize */ +#if PY_MAJOR_VERSION == 3 +/* Python 3.8 has two slots, 3.9 has one. */ +#if PY_MINOR_VERSION > 7 + 0, /* tp_vectorcall */ +#if PY_MINOR_VERSION == 8 + 0, /* tp_print */ +#endif +#endif +#endif +}; + +/* Dynamically create a new generator object */ +static PyObject * +Numba_make_generator(Py_ssize_t gen_state_size, + void *initial_state, + PyCFunctionWithKeywords nextfunc, + gen_finalizer_t finalizer, + EnvironmentObject *env) +{ + GeneratorObject *gen; + gen = (GeneratorObject *) PyType_GenericAlloc(&GeneratorType, gen_state_size); + if (gen == NULL) + return NULL; + memcpy(gen->state, initial_state, gen_state_size); + gen->nextfunc = nextfunc; + Py_XINCREF(env); + gen->env = env; + gen->finalizer = finalizer; + return (PyObject *) gen; +} + +/* Initialization subroutine for use by modules including this */ +static int +init_dynfunc_module(PyObject *module) +{ + if (PyType_Ready(&ClosureType)) + return -1; + if (PyType_Ready(&EnvironmentType)) + return -1; + if (PyType_Ready(&GeneratorType)) + return -1; + return 0; +} diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_dynfuncmod.c b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_dynfuncmod.c new file mode 100644 index 000000000..5d80529c0 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_dynfuncmod.c @@ -0,0 +1,93 @@ +#include "_dynfunc.c" + +/* Python-facing function to dynamically create a new C function object */ +static PyObject* +make_function(PyObject *self, PyObject *args) +{ + PyObject *module, *fname, *fdoc, *fnaddrobj; + void *fnaddr; + EnvironmentObject *env; + PyObject *keepalive; + + if (!PyArg_ParseTuple(args, "OOOOO!|O", + &module, &fname, &fdoc, &fnaddrobj, &EnvironmentType, &env, + &keepalive)) { + return NULL; + } + + fnaddr = PyLong_AsVoidPtr(fnaddrobj); + if (fnaddr == NULL && PyErr_Occurred()) + return NULL; + + return pycfunction_new(module, fname, fdoc, fnaddr, env, keepalive); +} + +static PyMethodDef ext_methods[] = { +#define declmethod(func) { #func , ( PyCFunction )func , METH_VARARGS , NULL } + declmethod(make_function), + { NULL }, +#undef declmethod +}; + + +static PyObject * +build_c_helpers_dict(void) +{ + PyObject *dct = PyDict_New(); + if (dct == NULL) + goto error; + +#define _declpointer(name, value) do { \ + PyObject *o = PyLong_FromVoidPtr(value); \ + if (o == NULL) goto error; \ + if (PyDict_SetItemString(dct, name, o)) { \ + Py_DECREF(o); \ + goto error; \ + } \ + Py_DECREF(o); \ +} while (0) + +#define declmethod(func) _declpointer(#func, &Numba_##func) + +#define declpointer(ptr) _declpointer(#ptr, &ptr) + + declmethod(make_generator); + +#undef declmethod + return dct; +error: + Py_XDECREF(dct); + return NULL; +} + +MOD_INIT(_dynfunc) { + PyObject *m, *impl_info; + + MOD_DEF(m, "_dynfunc", "No docs", ext_methods) + if (m == NULL) + return MOD_ERROR_VAL; + + if (init_dynfunc_module(m)) + return MOD_ERROR_VAL; + + impl_info = Py_BuildValue( + "{snsnsn}", + "offsetof_closure_body", offsetof(ClosureObject, env), + "offsetof_env_body", offsetof(EnvironmentObject, globals), + "offsetof_generator_state", offsetof(GeneratorObject, state) + ); + if (impl_info == NULL) + return MOD_ERROR_VAL; + PyModule_AddObject(m, "_impl_info", impl_info); + + Py_INCREF(&ClosureType); + PyModule_AddObject(m, "_Closure", (PyObject *) (&ClosureType)); + Py_INCREF(&EnvironmentType); + PyModule_AddObject(m, "Environment", (PyObject *) (&EnvironmentType)); + Py_INCREF(&GeneratorType); + PyModule_AddObject(m, "_Generator", (PyObject *) (&GeneratorType)); + + PyModule_AddObject(m, "c_helpers", build_c_helpers_dict()); + + return MOD_SUCCESS_VAL(m); +} diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_hashtable.c b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_hashtable.c new file mode 100644 index 000000000..76392f79e --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_hashtable.c @@ -0,0 +1,530 @@ +/* + * This file and _hashtable.h are from CPython 3.5. The symbols have been + * renamed from _Py_hashxxx to _Numba_hashxxx to avoid name clashes with + * the CPython definitions (including at runtime through dynamic linking). + * Those CPython APIs are private and can change in incompatible ways at + * any time. + * + * Command line used for renaming: + * $ sed -i -r 's/\b_Py_(has[h]table)/_Numba_\1/ig' numba/_hashtable.h numba/_hashtable.c + */ + +/* The implementation of the hash table (_Numba_hashtable_t) is based on the cfuhash + project: + http://sourceforge.net/projects/libcfu/ + + Copyright of cfuhash: + ---------------------------------- + Creation date: 2005-06-24 21:22:40 + Authors: Don + Change log: + + Copyright (c) 2005 Don Owens + All rights reserved. + + This code is released under the BSD license: + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the author nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + OF THE POSSIBILITY OF SUCH DAMAGE. + ---------------------------------- +*/ + +#include "_pymodule.h" +#include "_hashtable.h" + +#define HASHTABLE_MIN_SIZE 16 +#define HASHTABLE_HIGH 0.50 +#define HASHTABLE_LOW 0.10 +#define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH) + +#define BUCKETS_HEAD(SLIST) \ + ((_Numba_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST))) +#define TABLE_HEAD(HT, BUCKET) \ + ((_Numba_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET])) +#define ENTRY_NEXT(ENTRY) \ + ((_Numba_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY)) +#define HASHTABLE_ITEM_SIZE(HT) \ + (sizeof(_Numba_hashtable_entry_t) + (HT)->data_size) + +/* Forward declaration */ +static void hashtable_rehash(_Numba_hashtable_t *ht); + +static void +_Py_slist_init(_Py_slist_t *list) +{ + list->head = NULL; +} + +static void +_Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item) +{ + item->next = list->head; + list->head = item; +} + +static void +_Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous, + _Py_slist_item_t *item) +{ + if (previous != NULL) + previous->next = item->next; + else + list->head = item->next; +} + +Py_uhash_t +_Numba_hashtable_hash_int(const void *key) +{ + return (Py_uhash_t)key; +} + +Py_uhash_t +_Numba_hashtable_hash_ptr(const void *key) +{ + return (Py_uhash_t)_Py_HashPointer((void *)key); +} + +int +_Numba_hashtable_compare_direct(const void *key, const _Numba_hashtable_entry_t *entry) +{ + return entry->key == key; +} + +/* makes sure the real size of the buckets array is a power of 2 */ +static size_t +round_size(size_t s) +{ + size_t i; + if (s < HASHTABLE_MIN_SIZE) + return HASHTABLE_MIN_SIZE; + i = 1; + while (i < s) + i <<= 1; + return i; +} + +_Numba_hashtable_t * +_Numba_hashtable_new_full(size_t data_size, size_t init_size, + _Numba_hashtable_hash_func hash_func, + _Numba_hashtable_compare_func compare_func, + _Numba_hashtable_copy_data_func copy_data_func, + _Numba_hashtable_free_data_func free_data_func, + _Numba_hashtable_get_data_size_func get_data_size_func, + _Numba_hashtable_allocator_t *allocator) +{ + _Numba_hashtable_t *ht; + size_t buckets_size; + _Numba_hashtable_allocator_t alloc; + + if (allocator == NULL) { + alloc.malloc = PyMem_RawMalloc; + alloc.free = PyMem_RawFree; + } + else + alloc = *allocator; + + ht = (_Numba_hashtable_t *)alloc.malloc(sizeof(_Numba_hashtable_t)); + if (ht == NULL) + return ht; + + ht->num_buckets = round_size(init_size); + ht->entries = 0; + ht->data_size = data_size; + + buckets_size = ht->num_buckets * sizeof(ht->buckets[0]); + ht->buckets = alloc.malloc(buckets_size); + if (ht->buckets == NULL) { + alloc.free(ht); + return NULL; + } + memset(ht->buckets, 0, buckets_size); + + ht->hash_func = hash_func; + ht->compare_func = compare_func; + ht->copy_data_func = copy_data_func; + ht->free_data_func = free_data_func; + ht->get_data_size_func = get_data_size_func; + ht->alloc = alloc; + return ht; +} + +_Numba_hashtable_t * +_Numba_hashtable_new(size_t data_size, + _Numba_hashtable_hash_func hash_func, + _Numba_hashtable_compare_func compare_func) +{ + return _Numba_hashtable_new_full(data_size, HASHTABLE_MIN_SIZE, + hash_func, compare_func, + NULL, NULL, NULL, NULL); +} + +size_t +_Numba_hashtable_size(_Numba_hashtable_t *ht) +{ + size_t size; + size_t hv; + + size = sizeof(_Numba_hashtable_t); + + /* buckets */ + size += ht->num_buckets * sizeof(_Numba_hashtable_entry_t *); + + /* entries */ + size += ht->entries * HASHTABLE_ITEM_SIZE(ht); + + /* data linked from entries */ + if (ht->get_data_size_func) { + for (hv = 0; hv < ht->num_buckets; hv++) { + _Numba_hashtable_entry_t *entry; + + for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) { + void *data; + + data = _Numba_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry); + size += ht->get_data_size_func(data); + } + } + } + return size; +} + +#ifdef Py_DEBUG +void +_Numba_hashtable_print_stats(_Numba_hashtable_t *ht) +{ + size_t size; + size_t chain_len, max_chain_len, total_chain_len, nchains; + _Numba_hashtable_entry_t *entry; + size_t hv; + double load; + + size = _Numba_hashtable_size(ht); + + load = (double)ht->entries / ht->num_buckets; + + max_chain_len = 0; + total_chain_len = 0; + nchains = 0; + for (hv = 0; hv < ht->num_buckets; hv++) { + entry = TABLE_HEAD(ht, hv); + if (entry != NULL) { + chain_len = 0; + for (; entry; entry = ENTRY_NEXT(entry)) { + chain_len++; + } + if (chain_len > max_chain_len) + max_chain_len = chain_len; + total_chain_len += chain_len; + nchains++; + } + } + printf("hash table %p: entries=%" + PY_FORMAT_SIZE_T "u/%" PY_FORMAT_SIZE_T "u (%.0f%%), ", + ht, ht->entries, ht->num_buckets, load * 100.0); + if (nchains) + printf("avg_chain_len=%.1f, ", (double)total_chain_len / nchains); + printf("max_chain_len=%" PY_FORMAT_SIZE_T "u, %" PY_FORMAT_SIZE_T "u kB\n", + max_chain_len, size / 1024); +} +#endif + +/* Get an entry. Return NULL if the key does not exist. */ +_Numba_hashtable_entry_t * +_Numba_hashtable_get_entry(_Numba_hashtable_t *ht, const void *key) +{ + Py_uhash_t key_hash; + size_t index; + _Numba_hashtable_entry_t *entry; + + key_hash = ht->hash_func(key); + index = key_hash & (ht->num_buckets - 1); + + for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) { + if (entry->key_hash == key_hash && ht->compare_func(key, entry)) + break; + } + + return entry; +} + +static int +_hashtable_pop_entry(_Numba_hashtable_t *ht, const void *key, void *data, size_t data_size) +{ + Py_uhash_t key_hash; + size_t index; + _Numba_hashtable_entry_t *entry, *previous; + + key_hash = ht->hash_func(key); + index = key_hash & (ht->num_buckets - 1); + + previous = NULL; + for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) { + if (entry->key_hash == key_hash && ht->compare_func(key, entry)) + break; + previous = entry; + } + + if (entry == NULL) + return 0; + + _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous, + (_Py_slist_item_t *)entry); + ht->entries--; + + if (data != NULL) + _Numba_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry); + ht->alloc.free(entry); + + if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW) + hashtable_rehash(ht); + return 1; +} + +/* Add a new entry to the hash. The key must not be present in the hash table. + Return 0 on success, -1 on memory error. */ +int +_Numba_hashtable_set(_Numba_hashtable_t *ht, const void *key, + void *data, size_t data_size) +{ + Py_uhash_t key_hash; + size_t index; + _Numba_hashtable_entry_t *entry; + + assert(data != NULL || data_size == 0); +#ifndef NDEBUG + /* Don't write the assertion on a single line because it is interesting + to know the duplicated entry if the assertion failed. The entry can + be read using a debugger. */ + entry = _Numba_hashtable_get_entry(ht, key); + assert(entry == NULL); +#endif + + key_hash = ht->hash_func(key); + index = key_hash & (ht->num_buckets - 1); + + entry = ht->alloc.malloc(HASHTABLE_ITEM_SIZE(ht)); + if (entry == NULL) { + /* memory allocation failed */ + return -1; + } + + entry->key = (void *)key; + entry->key_hash = key_hash; + + assert(data_size == ht->data_size); + memcpy(_Numba_HASHTABLE_ENTRY_DATA(entry), data, data_size); + + _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry); + ht->entries++; + + if ((float)ht->entries / (float)ht->num_buckets > HASHTABLE_HIGH) + hashtable_rehash(ht); + return 0; +} + +/* Get data from an entry. Copy entry data into data and return 1 if the entry + exists, return 0 if the entry does not exist. */ +int +_Numba_hashtable_get(_Numba_hashtable_t *ht, const void *key, void *data, size_t data_size) +{ + _Numba_hashtable_entry_t *entry; + + assert(data != NULL); + + entry = _Numba_hashtable_get_entry(ht, key); + if (entry == NULL) + return 0; + _Numba_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry); + return 1; +} + +int +_Numba_hashtable_pop(_Numba_hashtable_t *ht, const void *key, void *data, size_t data_size) +{ + assert(data != NULL); + assert(ht->free_data_func == NULL); + return _hashtable_pop_entry(ht, key, data, data_size); +} + +/* Delete an entry. The entry must exist. */ +void +_Numba_hashtable_delete(_Numba_hashtable_t *ht, const void *key) +{ +#ifndef NDEBUG + int found = _hashtable_pop_entry(ht, key, NULL, 0); + assert(found); +#else + (void)_hashtable_pop_entry(ht, key, NULL, 0); +#endif +} + +/* Prototype for a pointer to a function to be called foreach + key/value pair in the hash by hashtable_foreach(). Iteration + stops if a non-zero value is returned. */ +int +_Numba_hashtable_foreach(_Numba_hashtable_t *ht, + int (*func) (_Numba_hashtable_entry_t *entry, void *arg), + void *arg) +{ + _Numba_hashtable_entry_t *entry; + size_t hv; + + for (hv = 0; hv < ht->num_buckets; hv++) { + for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) { + int res = func(entry, arg); + if (res) + return res; + } + } + return 0; +} + +static void +hashtable_rehash(_Numba_hashtable_t *ht) +{ + size_t buckets_size, new_size, bucket; + _Py_slist_t *old_buckets = NULL; + size_t old_num_buckets; + + new_size = round_size((size_t)(ht->entries * HASHTABLE_REHASH_FACTOR)); + if (new_size == ht->num_buckets) + return; + + old_num_buckets = ht->num_buckets; + + buckets_size = new_size * sizeof(ht->buckets[0]); + old_buckets = ht->buckets; + ht->buckets = ht->alloc.malloc(buckets_size); + if (ht->buckets == NULL) { + /* cancel rehash on memory allocation failure */ + ht->buckets = old_buckets ; + /* memory allocation failed */ + return; + } + memset(ht->buckets, 0, buckets_size); + + ht->num_buckets = new_size; + + for (bucket = 0; bucket < old_num_buckets; bucket++) { + _Numba_hashtable_entry_t *entry, *next; + for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) { + size_t entry_index; + + assert(ht->hash_func(entry->key) == entry->key_hash); + next = ENTRY_NEXT(entry); + entry_index = entry->key_hash & (new_size - 1); + + _Py_slist_prepend(&ht->buckets[entry_index], (_Py_slist_item_t*)entry); + } + } + + ht->alloc.free(old_buckets); +} + +void +_Numba_hashtable_clear(_Numba_hashtable_t *ht) +{ + _Numba_hashtable_entry_t *entry, *next; + size_t i; + + for (i=0; i < ht->num_buckets; i++) { + for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) { + next = ENTRY_NEXT(entry); + if (ht->free_data_func) + ht->free_data_func(_Numba_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry)); + ht->alloc.free(entry); + } + _Py_slist_init(&ht->buckets[i]); + } + ht->entries = 0; + hashtable_rehash(ht); +} + +void +_Numba_hashtable_destroy(_Numba_hashtable_t *ht) +{ + size_t i; + + for (i = 0; i < ht->num_buckets; i++) { + _Py_slist_item_t *entry = ht->buckets[i].head; + while (entry) { + _Py_slist_item_t *entry_next = entry->next; + if (ht->free_data_func) + ht->free_data_func(_Numba_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry)); + ht->alloc.free(entry); + entry = entry_next; + } + } + + ht->alloc.free(ht->buckets); + ht->alloc.free(ht); +} + +/* Return a copy of the hash table */ +_Numba_hashtable_t * +_Numba_hashtable_copy(_Numba_hashtable_t *src) +{ + _Numba_hashtable_t *dst; + _Numba_hashtable_entry_t *entry; + size_t bucket; + int err; + void *data, *new_data; + + dst = _Numba_hashtable_new_full(src->data_size, src->num_buckets, + src->hash_func, src->compare_func, + src->copy_data_func, src->free_data_func, + src->get_data_size_func, &src->alloc); + if (dst == NULL) + return NULL; + + for (bucket=0; bucket < src->num_buckets; bucket++) { + entry = TABLE_HEAD(src, bucket); + for (; entry; entry = ENTRY_NEXT(entry)) { + if (src->copy_data_func) { + data = _Numba_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry); + new_data = src->copy_data_func(data); + if (new_data != NULL) + err = _Numba_hashtable_set(dst, entry->key, + &new_data, src->data_size); + else + err = 1; + } + else { + data = _Numba_HASHTABLE_ENTRY_DATA(entry); + err = _Numba_hashtable_set(dst, entry->key, data, src->data_size); + } + if (err) { + _Numba_hashtable_destroy(dst); + return NULL; + } + } + } + return dst; +} + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_hashtable.h b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_hashtable.h new file mode 100644 index 000000000..37430429d --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_hashtable.h @@ -0,0 +1,132 @@ +/* + * See _hashtable.c for more information about this file. + */ + +#ifndef Py_HASHTABLE_H +#define Py_HASHTABLE_H + +/* The whole API is private */ +#ifndef Py_LIMITED_API + +typedef struct _Py_slist_item_s { + struct _Py_slist_item_s *next; +} _Py_slist_item_t; + +typedef struct { + _Py_slist_item_t *head; +} _Py_slist_t; + +#define _Py_SLIST_ITEM_NEXT(ITEM) (((_Py_slist_item_t *)ITEM)->next) + +#define _Py_SLIST_HEAD(SLIST) (((_Py_slist_t *)SLIST)->head) + +typedef struct { + /* used by _Numba_hashtable_t.buckets to link entries */ + _Py_slist_item_t _Py_slist_item; + + const void *key; + Py_uhash_t key_hash; + + /* data follows */ +} _Numba_hashtable_entry_t; + +#define _Numba_HASHTABLE_ENTRY_DATA(ENTRY) \ + ((char *)(ENTRY) + sizeof(_Numba_hashtable_entry_t)) + +#define _Numba_HASHTABLE_ENTRY_DATA_AS_VOID_P(ENTRY) \ + (*(void **)_Numba_HASHTABLE_ENTRY_DATA(ENTRY)) + +#define _Numba_HASHTABLE_ENTRY_READ_DATA(TABLE, DATA, DATA_SIZE, ENTRY) \ + do { \ + assert((DATA_SIZE) == (TABLE)->data_size); \ + memcpy(DATA, _Numba_HASHTABLE_ENTRY_DATA(ENTRY), DATA_SIZE); \ + } while (0) + +typedef Py_uhash_t (*_Numba_hashtable_hash_func) (const void *key); +typedef int (*_Numba_hashtable_compare_func) (const void *key, const _Numba_hashtable_entry_t *he); +typedef void* (*_Numba_hashtable_copy_data_func)(void *data); +typedef void (*_Numba_hashtable_free_data_func)(void *data); +typedef size_t (*_Numba_hashtable_get_data_size_func)(void *data); + +typedef struct { + /* allocate a memory block */ + void* (*malloc) (size_t size); + + /* release a memory block */ + void (*free) (void *ptr); +} _Numba_hashtable_allocator_t; + +typedef struct { + size_t num_buckets; + size_t entries; /* Total number of entries in the table. */ + _Py_slist_t *buckets; + size_t data_size; + + _Numba_hashtable_hash_func hash_func; + _Numba_hashtable_compare_func compare_func; + _Numba_hashtable_copy_data_func copy_data_func; + _Numba_hashtable_free_data_func free_data_func; + _Numba_hashtable_get_data_size_func get_data_size_func; + _Numba_hashtable_allocator_t alloc; +} _Numba_hashtable_t; + +/* hash and compare functions for integers and pointers */ +PyAPI_FUNC(Py_uhash_t) _Numba_hashtable_hash_ptr(const void *key); +PyAPI_FUNC(Py_uhash_t) _Numba_hashtable_hash_int(const void *key); +PyAPI_FUNC(int) _Numba_hashtable_compare_direct(const void *key, const _Numba_hashtable_entry_t *entry); + +PyAPI_FUNC(_Numba_hashtable_t *) _Numba_hashtable_new( + size_t data_size, + _Numba_hashtable_hash_func hash_func, + _Numba_hashtable_compare_func compare_func); +PyAPI_FUNC(_Numba_hashtable_t *) _Numba_hashtable_new_full( + size_t data_size, + size_t init_size, + _Numba_hashtable_hash_func hash_func, + _Numba_hashtable_compare_func compare_func, + _Numba_hashtable_copy_data_func copy_data_func, + _Numba_hashtable_free_data_func free_data_func, + _Numba_hashtable_get_data_size_func get_data_size_func, + _Numba_hashtable_allocator_t *allocator); +PyAPI_FUNC(_Numba_hashtable_t *) _Numba_hashtable_copy(_Numba_hashtable_t *src); +PyAPI_FUNC(void) _Numba_hashtable_clear(_Numba_hashtable_t *ht); +PyAPI_FUNC(void) _Numba_hashtable_destroy(_Numba_hashtable_t *ht); + +typedef int (*_Numba_hashtable_foreach_func) (_Numba_hashtable_entry_t *entry, void *arg); + +PyAPI_FUNC(int) _Numba_hashtable_foreach( + _Numba_hashtable_t *ht, + _Numba_hashtable_foreach_func func, void *arg); +PyAPI_FUNC(size_t) _Numba_hashtable_size(_Numba_hashtable_t *ht); + +PyAPI_FUNC(_Numba_hashtable_entry_t*) _Numba_hashtable_get_entry( + _Numba_hashtable_t *ht, + const void *key); +PyAPI_FUNC(int) _Numba_hashtable_set( + _Numba_hashtable_t *ht, + const void *key, + void *data, + size_t data_size); +PyAPI_FUNC(int) _Numba_hashtable_get( + _Numba_hashtable_t *ht, + const void *key, + void *data, + size_t data_size); +PyAPI_FUNC(int) _Numba_hashtable_pop( + _Numba_hashtable_t *ht, + const void *key, + void *data, + size_t data_size); +PyAPI_FUNC(void) _Numba_hashtable_delete( + _Numba_hashtable_t *ht, + const void *key); + +#define _Numba_HASHTABLE_SET(TABLE, KEY, DATA) \ + _Numba_hashtable_set(TABLE, KEY, &(DATA), sizeof(DATA)) + +#define _Numba_HASHTABLE_GET(TABLE, KEY, DATA) \ + _Numba_hashtable_get(TABLE, KEY, &(DATA), sizeof(DATA)) + +#endif /* Py_LIMITED_API */ + +#endif diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_helperlib.c b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_helperlib.c new file mode 100644 index 000000000..c1da22477 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_helperlib.c @@ -0,0 +1,1186 @@ +/* + * Helper functions used by Numba at runtime. + * This C file is meant to be included after defining the + * NUMBA_EXPORT_FUNC() and NUMBA_EXPORT_DATA() macros. + */ + +#include "_pymodule.h" +#include +#include +#include +#ifdef _MSC_VER + #define int64_t signed __int64 + #define uint64_t unsigned __int64 + #define uint32_t unsigned __int32 +#else + #include +#endif +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#include +#include +#include + +#include "_arraystruct.h" + +/* + * Other helpers. + */ + + +/* Fix fmod() and fmodf() for windows x64 VC 9.0 (VS 2008) +https://support.microsoft.com/en-us/kb/982107 +*/ +static void (*fnclex)(void) = NULL; + +NUMBA_EXPORT_FUNC(double) +numba_fixed_fmod(double x, double y){ + fnclex(); /* no inline asm in x64 =( */ + return fmod(x, y); +} + +NUMBA_EXPORT_FUNC(float) +numba_fixed_fmodf(float x, float y) { + fnclex(); /* no inline asm in x64 =( */ + return fmodf(x, y); +} + +NUMBA_EXPORT_FUNC(void) +numba_set_fnclex(void *fn){ + fnclex = fn; +} + +/* provide 64-bit division function to 32-bit platforms */ +NUMBA_EXPORT_FUNC(int64_t) +numba_sdiv(int64_t a, int64_t b) { + return a / b; +} + +NUMBA_EXPORT_FUNC(uint64_t) +numba_udiv(uint64_t a, uint64_t b) { + return a / b; +} + +/* provide 64-bit remainder function to 32-bit platforms */ +NUMBA_EXPORT_FUNC(int64_t) +numba_srem(int64_t a, int64_t b) { + return a % b; +} + +NUMBA_EXPORT_FUNC(uint64_t) +numba_urem(uint64_t a, uint64_t b) { + return a % b; +} + +/* provide frexp and ldexp; these wrappers deal with special cases + * (zero, nan, infinity) directly, to sidestep platform differences. + */ +NUMBA_EXPORT_FUNC(double) +numba_frexp(double x, int *exp) +{ + if (!Py_IS_FINITE(x) || !x) + *exp = 0; + else + x = frexp(x, exp); + return x; +} + +NUMBA_EXPORT_FUNC(float) +numba_frexpf(float x, int *exp) +{ + if (Py_IS_NAN(x) || Py_IS_INFINITY(x) || !x) + *exp = 0; + else + x = frexpf(x, exp); + return x; +} + +NUMBA_EXPORT_FUNC(double) +numba_ldexp(double x, int exp) +{ + if (Py_IS_FINITE(x) && x && exp) + x = ldexp(x, exp); + return x; +} + +NUMBA_EXPORT_FUNC(float) +numba_ldexpf(float x, int exp) +{ + if (Py_IS_FINITE(x) && x && exp) + x = ldexpf(x, exp); + return x; +} + +/* provide complex power */ +NUMBA_EXPORT_FUNC(void) +numba_cpow(Py_complex *a, Py_complex *b, Py_complex *out) { + errno = 0; + *out = _Py_c_pow(*a, *b); + if (errno == EDOM) { + /* _Py_c_pow() doesn't bother returning the right value + in this case, as Python raises ZeroDivisionError */ + out->real = out->imag = Py_NAN; + } +} + +NUMBA_EXPORT_FUNC(void) +numba_cpowf(npy_cfloat *a, npy_cfloat *b, npy_cfloat *out) { + Py_complex _a, _b, _out; + _a.real = npy_crealf(*a); + _a.imag = npy_cimagf(*a); + _b.real = npy_crealf(*b); + _b.imag = npy_cimagf(*b); + numba_cpow(&_a, &_b, &_out); + *out = npy_cpackf((float) _out.real, (float) _out.imag); +} + +/* C99 math functions: redirect to system implementations */ + +NUMBA_EXPORT_FUNC(double) +numba_gamma(double x) +{ + return tgamma(x); +} + +NUMBA_EXPORT_FUNC(float) +numba_gammaf(float x) +{ + return tgammaf(x); +} + +NUMBA_EXPORT_FUNC(double) +numba_lgamma(double x) +{ + return lgamma(x); +} + +NUMBA_EXPORT_FUNC(float) +numba_lgammaf(float x) +{ + return lgammaf(x); +} + +NUMBA_EXPORT_FUNC(double) +numba_erf(double x) +{ + return erf(x); +} + +NUMBA_EXPORT_FUNC(float) +numba_erff(float x) +{ + return erff(x); +} + +NUMBA_EXPORT_FUNC(double) +numba_erfc(double x) +{ + return erfc(x); +} + +NUMBA_EXPORT_FUNC(float) +numba_erfcf(float x) +{ + return erfcf(x); +} + +/* Note npy_signbit() is actually a polymorphic macro */ +NUMBA_EXPORT_FUNC(int) +numba_signbitf(float a) +{ + return npy_signbit(a); +} + +NUMBA_EXPORT_FUNC(int) +numba_signbit(npy_double a) +{ + return npy_signbit(a); +} + +/* Unpack any Python complex-like object into a Py_complex structure */ +NUMBA_EXPORT_FUNC(int) +numba_complex_adaptor(PyObject* obj, Py_complex *out) { + PyObject* fobj; + PyArray_Descr *dtype; + double val[2]; + + // Convert from python complex or numpy complex128 + if (PyComplex_Check(obj)) { + out->real = PyComplex_RealAsDouble(obj); + out->imag = PyComplex_ImagAsDouble(obj); + } + // Convert from numpy complex64 + else if (PyArray_IsScalar(obj, ComplexFloating)) { + dtype = PyArray_DescrFromScalar(obj); + if (dtype == NULL) { + return 0; + } + if (PyArray_CastScalarDirect(obj, dtype, &val[0], NPY_CDOUBLE) < 0) { + Py_DECREF(dtype); + return 0; + } + out->real = val[0]; + out->imag = val[1]; + Py_DECREF(dtype); + } else { + fobj = PyNumber_Float(obj); + if (!fobj) return 0; + out->real = PyFloat_AsDouble(fobj); + out->imag = 0.; + Py_DECREF(fobj); + } + return 1; +} + +/* Minimum PyBufferObject structure to hack inside it */ +typedef struct { + PyObject_HEAD + PyObject *b_base; + void *b_ptr; + Py_ssize_t b_size; + Py_ssize_t b_offset; +} PyBufferObject_Hack; + +/* +Get data address of record data buffer +*/ +NUMBA_EXPORT_FUNC(void *) +numba_extract_record_data(PyObject *recordobj, Py_buffer *pbuf) { + PyObject *attrdata; + void *ptr; + + attrdata = PyObject_GetAttrString(recordobj, "data"); + if (!attrdata) return NULL; + + if (-1 == PyObject_GetBuffer(attrdata, pbuf, 0)){ + Py_DECREF(attrdata); + return NULL; + } else { + ptr = pbuf->buf; + } + Py_DECREF(attrdata); + return ptr; +} + +/* + * Return a record instance with dtype as the record type, and backed + * by a copy of the memory area pointed to by (pdata, size). + */ +NUMBA_EXPORT_FUNC(PyObject *) +numba_recreate_record(void *pdata, int size, PyObject *dtype) { + PyObject *numpy = NULL; + PyObject *numpy_record = NULL; + PyObject *aryobj = NULL; + PyObject *dtypearg = NULL; + PyObject *record = NULL; + PyArray_Descr *descr = NULL; + + if (dtype == NULL) { + PyErr_Format(PyExc_RuntimeError, + "In 'numba_recreate_record', 'dtype' is NULL"); + return NULL; + } + + numpy = PyImport_ImportModuleNoBlock("numpy"); + if (!numpy) goto CLEANUP; + + numpy_record = PyObject_GetAttrString(numpy, "record"); + if (!numpy_record) goto CLEANUP; + + dtypearg = PyTuple_Pack(2, numpy_record, dtype); + if (!dtypearg || !PyArray_DescrConverter(dtypearg, &descr)) + goto CLEANUP; + + /* This steals a reference to descr, so we don't have to DECREF it */ + aryobj = PyArray_FromString(pdata, size, descr, 1, NULL); + if (!aryobj) goto CLEANUP; + + record = PySequence_GetItem(aryobj, 0); + +CLEANUP: + Py_XDECREF(numpy); + Py_XDECREF(numpy_record); + Py_XDECREF(aryobj); + Py_XDECREF(dtypearg); + + return record; +} + +NUMBA_EXPORT_FUNC(int) +numba_adapt_ndarray(PyObject *obj, arystruct_t* arystruct) { + PyArrayObject *ndary; + int i, ndim; + npy_intp *p; + + if (!PyArray_Check(obj)) { + return -1; + } + + ndary = (PyArrayObject*)obj; + ndim = PyArray_NDIM(ndary); + + arystruct->data = PyArray_DATA(ndary); + arystruct->nitems = PyArray_SIZE(ndary); + arystruct->itemsize = PyArray_ITEMSIZE(ndary); + arystruct->parent = obj; + p = arystruct->shape_and_strides; + for (i = 0; i < ndim; i++, p++) { + *p = PyArray_DIM(ndary, i); + } + for (i = 0; i < ndim; i++, p++) { + *p = PyArray_STRIDE(ndary, i); + } + arystruct->meminfo = NULL; + return 0; +} + +NUMBA_EXPORT_FUNC(int) +numba_get_buffer(PyObject *obj, Py_buffer *buf) +{ + /* Ask for shape and strides, but no suboffsets */ + return PyObject_GetBuffer(obj, buf, PyBUF_RECORDS_RO); +} + +NUMBA_EXPORT_FUNC(void) +numba_adapt_buffer(Py_buffer *buf, arystruct_t *arystruct) +{ + int i; + npy_intp *p; + + arystruct->data = buf->buf; + arystruct->itemsize = buf->itemsize; + arystruct->parent = buf->obj; + arystruct->nitems = 1; + p = arystruct->shape_and_strides; + for (i = 0; i < buf->ndim; i++, p++) { + *p = buf->shape[i]; + arystruct->nitems *= buf->shape[i]; + } + for (i = 0; i < buf->ndim; i++, p++) { + *p = buf->strides[i]; + } + arystruct->meminfo = NULL; +} + +NUMBA_EXPORT_FUNC(void) +numba_release_buffer(Py_buffer *buf) +{ + PyBuffer_Release(buf); +} + +NUMBA_EXPORT_FUNC(PyObject *) +numba_ndarray_new(int nd, + npy_intp *dims, /* shape */ + npy_intp *strides, + void* data, + int type_num, + int itemsize) +{ + PyObject *ndary; + int flags = NPY_ARRAY_BEHAVED; + ndary = PyArray_New((PyTypeObject*)&PyArray_Type, nd, dims, type_num, + strides, data, 0, flags, NULL); + return ndary; +} + + +/* + * Handle reshaping of zero-sized array. + * See numba_attempt_nocopy_reshape() below. + */ +static int +nocopy_empty_reshape(npy_intp nd, const npy_intp *dims, const npy_intp *strides, + npy_intp newnd, const npy_intp *newdims, + npy_intp *newstrides, npy_intp itemsize, + int is_f_order) +{ + int i; + /* Just make the strides vaguely reasonable + * (they can have any value in theory). + */ + for (i = 0; i < newnd; i++) + newstrides[i] = itemsize; + return 1; /* reshape successful */ +} + +/* + * Straight from Numpy's _attempt_nocopy_reshape() + * (np/core/src/multiarray/shape.c). + * Attempt to reshape an array without copying data + * + * This function should correctly handle all reshapes, including + * axes of length 1. Zero strides should work but are untested. + * + * If a copy is needed, returns 0 + * If no copy is needed, returns 1 and fills `npy_intp *newstrides` + * with appropriate strides + */ + +NUMBA_EXPORT_FUNC(int) +numba_attempt_nocopy_reshape(npy_intp nd, const npy_intp *dims, const npy_intp *strides, + npy_intp newnd, const npy_intp *newdims, + npy_intp *newstrides, npy_intp itemsize, + int is_f_order) +{ + int oldnd; + npy_intp olddims[NPY_MAXDIMS]; + npy_intp oldstrides[NPY_MAXDIMS]; + npy_intp np, op, last_stride; + int oi, oj, ok, ni, nj, nk; + + oldnd = 0; + /* + * Remove axes with dimension 1 from the old array. They have no effect + * but would need special cases since their strides do not matter. + */ + for (oi = 0; oi < nd; oi++) { + if (dims[oi]!= 1) { + olddims[oldnd] = dims[oi]; + oldstrides[oldnd] = strides[oi]; + oldnd++; + } + } + + np = 1; + for (ni = 0; ni < newnd; ni++) { + np *= newdims[ni]; + } + op = 1; + for (oi = 0; oi < oldnd; oi++) { + op *= olddims[oi]; + } + if (np != op) { + /* different total sizes; no hope */ + return 0; + } + + if (np == 0) { + /* the Numpy code does not handle 0-sized arrays */ + return nocopy_empty_reshape(nd, dims, strides, + newnd, newdims, newstrides, + itemsize, is_f_order); + } + + /* oi to oj and ni to nj give the axis ranges currently worked with */ + oi = 0; + oj = 1; + ni = 0; + nj = 1; + while (ni < newnd && oi < oldnd) { + np = newdims[ni]; + op = olddims[oi]; + + while (np != op) { + if (np < op) { + /* Misses trailing 1s, these are handled later */ + np *= newdims[nj++]; + } else { + op *= olddims[oj++]; + } + } + + /* Check whether the original axes can be combined */ + for (ok = oi; ok < oj - 1; ok++) { + if (is_f_order) { + if (oldstrides[ok+1] != olddims[ok]*oldstrides[ok]) { + /* not contiguous enough */ + return 0; + } + } + else { + /* C order */ + if (oldstrides[ok] != olddims[ok+1]*oldstrides[ok+1]) { + /* not contiguous enough */ + return 0; + } + } + } + + /* Calculate new strides for all axes currently worked with */ + if (is_f_order) { + newstrides[ni] = oldstrides[oi]; + for (nk = ni + 1; nk < nj; nk++) { + newstrides[nk] = newstrides[nk - 1]*newdims[nk - 1]; + } + } + else { + /* C order */ + newstrides[nj - 1] = oldstrides[oj - 1]; + for (nk = nj - 1; nk > ni; nk--) { + newstrides[nk - 1] = newstrides[nk]*newdims[nk]; + } + } + ni = nj++; + oi = oj++; + } + + /* + * Set strides corresponding to trailing 1s of the new shape. + */ + if (ni >= 1) { + last_stride = newstrides[ni - 1]; + } + else { + last_stride = itemsize; + } + if (is_f_order) { + last_stride *= newdims[ni - 1]; + } + for (nk = ni; nk < newnd; nk++) { + newstrides[nk] = last_stride; + } + + return 1; +} + +/* + * Cython utilities. + */ + +/* Fetch the address of the given function, as exposed by + a cython module */ +static void * +import_cython_function(const char *module_name, const char *function_name) +{ + PyObject *module, *capi, *cobj; + void *res = NULL; + const char *capsule_name; + + module = PyImport_ImportModule(module_name); + if (module == NULL) + return NULL; + capi = PyObject_GetAttrString(module, "__pyx_capi__"); + Py_DECREF(module); + if (capi == NULL) + return NULL; + cobj = PyMapping_GetItemString(capi, (char *)function_name); + Py_DECREF(capi); + if (cobj == NULL) { + PyErr_Clear(); + PyErr_Format(PyExc_ValueError, + "No function '%s' found in __pyx_capi__ of '%s'", + function_name, module_name); + return NULL; + } + /* 2.7+ => Cython exports a PyCapsule */ + capsule_name = PyCapsule_GetName(cobj); + if (capsule_name != NULL) { + res = PyCapsule_GetPointer(cobj, capsule_name); + } + Py_DECREF(cobj); + return res; +} + +NUMBA_EXPORT_FUNC(PyObject *) +_numba_import_cython_function(PyObject *self, PyObject *args) +{ + const char *module_name; + const char *function_name; + void *p = NULL; + PyObject *res; + + if (!PyArg_ParseTuple(args, "ss", &module_name, &function_name)) { + return NULL; + } + p = import_cython_function(module_name, function_name); + if (p == NULL) { + return NULL; + } + res = PyLong_FromVoidPtr(p); + if (res == NULL) { + PyErr_SetString(PyExc_RuntimeError, + "Could not convert function address to int"); + return NULL; + } + return res; +} + +/* We use separate functions for datetime64 and timedelta64, to ensure + * proper type checking. + */ +NUMBA_EXPORT_FUNC(npy_int64) +numba_extract_np_datetime(PyObject *td) +{ + if (!PyArray_IsScalar(td, Datetime)) { + PyErr_SetString(PyExc_TypeError, + "expected a numpy.datetime64 object"); + return -1; + } + return PyArrayScalar_VAL(td, Timedelta); +} + +NUMBA_EXPORT_FUNC(npy_int64) +numba_extract_np_timedelta(PyObject *td) +{ + if (!PyArray_IsScalar(td, Timedelta)) { + PyErr_SetString(PyExc_TypeError, + "expected a numpy.timedelta64 object"); + return -1; + } + return PyArrayScalar_VAL(td, Timedelta); +} + +NUMBA_EXPORT_FUNC(PyObject *) +numba_create_np_datetime(npy_int64 value, int unit_code) +{ + PyDatetimeScalarObject *obj = (PyDatetimeScalarObject *) + PyArrayScalar_New(Datetime); + if (obj != NULL) { + obj->obval = value; + obj->obmeta.base = unit_code; + obj->obmeta.num = 1; + } + return (PyObject *) obj; +} + +NUMBA_EXPORT_FUNC(PyObject *) +numba_create_np_timedelta(npy_int64 value, int unit_code) +{ + PyTimedeltaScalarObject *obj = (PyTimedeltaScalarObject *) + PyArrayScalar_New(Timedelta); + if (obj != NULL) { + obj->obval = value; + obj->obmeta.base = unit_code; + obj->obmeta.num = 1; + } + return (PyObject *) obj; +} + +NUMBA_EXPORT_FUNC(uint64_t) +numba_fptoui(double x) { + /* First cast to signed int of the full width to make sure sign extension + happens (this can make a difference on some platforms...). */ + return (uint64_t) (int64_t) x; +} + +NUMBA_EXPORT_FUNC(uint64_t) +numba_fptouif(float x) { + return (uint64_t) (int64_t) x; +} + +NUMBA_EXPORT_FUNC(void) +numba_gil_ensure(PyGILState_STATE *state) { + *state = PyGILState_Ensure(); +} + +NUMBA_EXPORT_FUNC(void) +numba_gil_release(PyGILState_STATE *state) { + PyGILState_Release(*state); +} + +NUMBA_EXPORT_FUNC(PyObject *) +numba_py_type(PyObject *obj) { + return (PyObject *) Py_TYPE(obj); +} + + +/* + * Functions for tagging an arbitrary Python object with an arbitrary pointer. + * These functions make strong lifetime assumptions, see below. + */ + +static PyObject *private_data_dict = NULL; + +static PyObject * +_get_private_data_dict(void) +{ + if (private_data_dict == NULL) + private_data_dict = PyDict_New(); + return private_data_dict; +} + +NUMBA_EXPORT_FUNC(void) +numba_set_pyobject_private_data(PyObject *obj, void *ptr) +{ + PyObject *dct = _get_private_data_dict(); + /* This assumes the reference to setobj is kept alive until the + call to numba_reset_set_private_data()! */ + PyObject *key = PyLong_FromVoidPtr((void *) obj); + PyObject *value = PyLong_FromVoidPtr(ptr); + + if (!dct || !value || !key) + goto error; + if (PyDict_SetItem(dct, key, value)) + goto error; + Py_DECREF(key); + Py_DECREF(value); + return; + +error: + Py_FatalError("unable to set private data"); +} + +NUMBA_EXPORT_FUNC(void *) +numba_get_pyobject_private_data(PyObject *obj) +{ + PyObject *dct = _get_private_data_dict(); + PyObject *value, *key = PyLong_FromVoidPtr((void *) obj); + void *ptr; + if (!dct || !key) + goto error; + + value = PyDict_GetItem(dct, key); + Py_DECREF(key); + if (!value) + return NULL; + else { + ptr = PyLong_AsVoidPtr(value); + if (ptr == NULL && PyErr_Occurred()) + goto error; + return ptr; + } + +error: + Py_FatalError("unable to get private data"); + return NULL; +} + +NUMBA_EXPORT_FUNC(void) +numba_reset_pyobject_private_data(PyObject *obj) +{ + PyObject *dct = _get_private_data_dict(); + PyObject *key = PyLong_FromVoidPtr((void *) obj); + + if (!key) + goto error; + if (PyDict_DelItem(dct, key)) + PyErr_Clear(); + Py_DECREF(key); + return; + +error: + Py_FatalError("unable to reset private data"); +} + +NUMBA_EXPORT_FUNC(int) +numba_unpack_slice(PyObject *obj, + Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step) +{ + PySliceObject *slice = (PySliceObject *) obj; + if (!PySlice_Check(obj)) { + PyErr_Format(PyExc_TypeError, + "Expected a slice object, got '%s'", + Py_TYPE(slice)->tp_name); + return -1; + } +#define FETCH_MEMBER(NAME, DEFAULT) \ + if (slice->NAME != Py_None) { \ + Py_ssize_t v = PyNumber_AsSsize_t(slice->NAME, \ + PyExc_OverflowError); \ + if (v == -1 && PyErr_Occurred()) \ + return -1; \ + *NAME = v; \ + } \ + else { \ + *NAME = DEFAULT; \ + } + FETCH_MEMBER(step, 1) + FETCH_MEMBER(stop, (*step > 0) ? PY_SSIZE_T_MAX : PY_SSIZE_T_MIN) + FETCH_MEMBER(start, (*step > 0) ? 0 : PY_SSIZE_T_MAX) + return 0; + +#undef FETCH_MEMBER +} + +NUMBA_EXPORT_FUNC(int) +numba_fatal_error(void) +{ + PyGILState_Ensure(); + Py_FatalError("in Numba-compiled function"); + return 0; /* unreachable */ +} + +/* Insert a frame into the traceback for (funcname, filename, lineno). */ +/* This function is CPython's _PyTraceback_Add, renamed, see: + * https://github.com/python/cpython/blob/d545869d084e70d4838310e79b52a25a72a1ca56/Python/traceback.c#L246 + * and modified for Python 2.x based on + * https://github.com/python/cpython/blob/2e1a34025cde19bddf12a2eac8fedb6afcca8339/Modules/_ctypes/callbacks.c#L151-L174 + */ +static void traceback_add(const char *funcname, const char *filename, int lineno) +{ + PyObject *globals = NULL; + PyCodeObject *code = NULL; + PyFrameObject *frame = NULL; + PyObject *exc, *val, *tb; + + /* Save and clear the current exception. Python functions must not be + called with an exception set. Calling Python functions happens when + the codec of the filesystem encoding is implemented in pure Python. */ + PyErr_Fetch(&exc, &val, &tb); + + globals = PyDict_New(); + if (!globals) + goto error; + code = PyCode_NewEmpty(filename, funcname, lineno); + if (!code) { + goto error; + } + frame = PyFrame_New(PyThreadState_Get(), code, globals, NULL); + Py_DECREF(globals); + Py_DECREF(code); + if (!frame) + goto error; + frame->f_lineno = lineno; + + PyErr_Restore(exc, val, tb); + PyTraceBack_Here(frame); + Py_DECREF(frame); + return; + +error: + _PyErr_ChainExceptions(exc, val, tb); +} + + +/* + * Add traceback information to *loc* to the active exception. + * loc can be NULL, which causes this function to become a no-op. + */ +static +void traceback_add_loc(PyObject *loc) { + const char *function_name_str = NULL, *filename_str = NULL; + PyObject *function_name = NULL, *filename = NULL, *lineno = NULL; + Py_ssize_t pos; + + /* instance is instantiated/internal exception is raised, if loc is present + * add a frame for it into the traceback */ + if(loc && loc != Py_None && PyTuple_Check(loc)) + { + pos = 0; + function_name = PyTuple_GET_ITEM(loc, pos); + function_name_str = PyString_AsString(function_name); + pos = 1; + filename = PyTuple_GET_ITEM(loc, pos); + filename_str = PyString_AsString(filename); + pos = 2; + lineno = PyTuple_GET_ITEM(loc, pos); + traceback_add(function_name_str, filename_str, \ + (int)PyLong_AsLong(lineno)); + } +} + +/** + * Re-raise the current active exception. + * Called internal by process_raise() when *exc* is None. + */ +static +int reraise_exc_is_none(void) { + /* Reraise */ + PyThreadState *tstate = PyThreadState_GET(); + PyObject *tb, *type, *value; +#if (PY_MAJOR_VERSION >= 3) && (PY_MINOR_VERSION >= 7) + _PyErr_StackItem *tstate_exc = tstate->exc_info; +#else + PyThreadState *tstate_exc = tstate; +#endif + type = tstate_exc->exc_type; + value = tstate_exc->exc_value; + tb = tstate_exc->exc_traceback; + if (type == Py_None) { + PyErr_SetString(PyExc_RuntimeError, + "No active exception to reraise"); + return 0; + } + /* incref needed because PyErr_Restore DOES NOT */ + Py_XINCREF(type); + Py_XINCREF(value); + Py_XINCREF(tb); + PyErr_Restore(type, value, tb); + return 1; +} + +/* + * Set exception given the Exception type and the constructor argument. + * Equivalent to ``raise exc(value)``. + * PyExceptionClass_Check(exc) must be True. + * value can be NULL. + */ +static +int process_exception_class(PyObject *exc, PyObject *value) { + PyObject *type; + /* It is a class, type used here just as a tmp var */ + type = PyObject_CallObject(exc, value); + if (type == NULL){ + return 0; + } + if (!PyExceptionInstance_Check(type)) { + PyErr_SetString(PyExc_TypeError, + "exceptions must derive from BaseException"); + Py_DECREF(type); + return 0; + } + /* all ok, set type to the exc */ + Py_DECREF(type); + type = exc; + PyErr_SetObject(type, value); + return 1; +} + +/* + * Internal routine to process exceptions. + * exc cannot be NULL. It can be a None, Exception type, or Exception instance. + * value can be NULL for absent, or any PyObject valid for the exception. + */ +static +int process_raise(PyObject *exc, PyObject *value) { + /* exc is None */ + if (exc == Py_None) { + return reraise_exc_is_none(); + } + /* exc should be an exception class */ + else if (PyExceptionClass_Check(exc)) { + return process_exception_class(exc, value); + } + /* exc is an instance of an Exception */ + else if (PyExceptionInstance_Check(exc)) { + PyObject *type = PyExceptionInstance_Class(exc); + PyErr_SetObject(type, exc); + return 0; + } + else { + /* Not something you can raise. You get an exception + anyway, just not what you specified :-) */ + PyErr_SetString(PyExc_TypeError, + "exceptions must derive from BaseException"); + return 0; + } +} + +/* Logic for raising an arbitrary object. Adapted from CPython's ceval.c. + This *consumes* a reference count to its argument. */ +NUMBA_EXPORT_FUNC(int) +numba_do_raise(PyObject *exc_packed) +{ + int status; + PyObject *exc = NULL, *value = NULL, *loc = NULL; + + /* We support the following forms of raise: + raise + raise + raise */ + + /* could be a tuple from npm (some exc like thing, args, location) */ + if (PyTuple_CheckExact(exc_packed)) { + /* Unpack a (class/inst/tuple, arguments, location) tuple. */ + if (!PyArg_ParseTuple(exc_packed, "OOO", &exc, &value, &loc)) { + traceback_add_loc(loc); + return 0; + } + } else { + /* could be a reraise or an exception from objmode */ + exc = exc_packed; + /* branch exit with value = NULL and loc = NULL */ + } + /* value is either NULL or borrowed */ + status = process_raise(exc, value); + traceback_add_loc(loc); + Py_DECREF(exc_packed); + return status; +} + +#ifdef PYCC_COMPILING +/* AOT avoid the use of `numba.core.serialize` */ +NUMBA_EXPORT_FUNC(PyObject *) +numba_unpickle(const char *data, int n, const char *hashed) +{ + PyObject *buf, *obj; + static PyObject *loads; + + /* Caching the pickle.loads function shaves a couple µs here. */ + if (loads == NULL) { + PyObject *picklemod; + picklemod = PyImport_ImportModule("pickle"); + if (picklemod == NULL) + return NULL; + loads = PyObject_GetAttrString(picklemod, "loads"); + Py_DECREF(picklemod); + if (loads == NULL) + return NULL; + } + + buf = PyBytes_FromStringAndSize(data, n); + if (buf == NULL) + return NULL; + obj = PyObject_CallFunctionObjArgs(loads, buf, NULL); + Py_DECREF(buf); + return obj; +} + +#else + +NUMBA_EXPORT_FUNC(PyObject *) +numba_unpickle(const char *data, int n, const char *hashed) +{ + PyObject *buf=NULL, *obj=NULL, *addr=NULL, *hashedbuf=NULL; + static PyObject *loads=NULL; + + /* Caching the pickle.loads function shaves a couple µs here. */ + if (loads == NULL) { + PyObject *picklemod; + picklemod = PyImport_ImportModule("numba.core.serialize"); + if (picklemod == NULL) + return NULL; + loads = PyObject_GetAttrString(picklemod, "_numba_unpickle"); + Py_DECREF(picklemod); + if (loads == NULL) + return NULL; + } + + buf = PyBytes_FromStringAndSize(data, n); + if (buf == NULL) + return NULL; + /* SHA1 produces 160 bit or 20 bytes */ + hashedbuf = PyBytes_FromStringAndSize(hashed, 20); + if (hashedbuf == NULL) + goto error; + addr = PyLong_FromVoidPtr((void*)data); + if (addr == NULL) + goto error; + obj = PyObject_CallFunctionObjArgs(loads, addr, buf, hashedbuf, NULL); +error: + Py_XDECREF(addr); + Py_XDECREF(hashedbuf); + Py_DECREF(buf); + return obj; +} +#endif + +/* + * Unicode helpers + */ + +/* Developer note: + * + * The hash value of unicode objects is obtained via: + * ((PyASCIIObject *)(obj))->hash; + * The use comes from this definition: + * https://github.com/python/cpython/blob/6d43f6f081023b680d9db4542d19b9e382149f0a/Objects/unicodeobject.c#L119-L120 + * and it's used extensively throughout the `cpython/Object/unicodeobject.c` + * source, not least in `unicode_hash` itself: + * https://github.com/python/cpython/blob/6d43f6f081023b680d9db4542d19b9e382149f0a/Objects/unicodeobject.c#L11662-L11679 + * + * The Unicode string struct layouts are described here: + * https://github.com/python/cpython/blob/6d43f6f081023b680d9db4542d19b9e382149f0a/Include/cpython/unicodeobject.h#L82-L161 + * essentially, all the unicode string layouts start with a `PyASCIIObject` at + * offset 0 (as of commit 6d43f6f081023b680d9db4542d19b9e382149f0a, somewhere + * in the 3.8 development cycle). + * + * For safety against future CPython internal changes, the code checks that the + * _base members of the unicode structs are what is expected in 3.7, and that + * their offset is 0. It then walks the struct to the hash location to make sure + * the offset is indeed the same as PyASCIIObject->hash. + * Note: The large condition in the if should evaluate to a compile time + * constant. + */ + +#define MEMBER_SIZE(structure, member) sizeof(((structure *)0)->member) + +NUMBA_EXPORT_FUNC(void *) +numba_extract_unicode(PyObject *obj, Py_ssize_t *length, int *kind, + unsigned int *ascii, Py_ssize_t *hash) { + if (!PyUnicode_READY(obj)) { + *length = PyUnicode_GET_LENGTH(obj); + *kind = PyUnicode_KIND(obj); + /* could also use PyUnicode_IS_ASCII but it is not publicly advertised in https://docs.python.org/3/c-api/unicode.html */ + *ascii = (unsigned int)(PyUnicode_MAX_CHAR_VALUE(obj) == (0x7f)); + /* this is here as a crude check for safe casting of all unicode string + * structs to a PyASCIIObject */ + if (MEMBER_SIZE(PyCompactUnicodeObject, _base) == sizeof(PyASCIIObject) && + MEMBER_SIZE(PyUnicodeObject, _base) == sizeof(PyCompactUnicodeObject) && + offsetof(PyCompactUnicodeObject, _base) == 0 && + offsetof(PyUnicodeObject, _base) == 0 && + offsetof(PyCompactUnicodeObject, _base.hash) == offsetof(PyASCIIObject, hash) && + offsetof(PyUnicodeObject, _base._base.hash) == offsetof(PyASCIIObject, hash) + ) { + /* Grab the hash from the type object cache, do not compute it. */ + *hash = ((PyASCIIObject *)(obj))->hash; + } + else { + /* cast is not safe, fail */ + return NULL; + } + return PyUnicode_DATA(obj); + } else { + return NULL; + } +} + +/* this is late included as it #defines e.g. SHIFT that should not impact + * the above */ +#include "_unicodetype_db.h" + +/* This function is a modified copy of the private function gettyperecord from + * CPython's Objects/unicodectype.c + * + * See:https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodectype.c#L45-L59 + */ +NUMBA_EXPORT_FUNC(void) +numba_gettyperecord(Py_UCS4 code, int *upper, int *lower, int *title, + unsigned char *decimal, unsigned char *digit, + unsigned short *flags) +{ + int index; + const numba_PyUnicode_TypeRecord *rec; + + if (code >= 0x110000) + index = 0; + else + { + index = index1[(code>>SHIFT)]; + index = index2[(index<upper; + *lower = rec->lower; + *title = rec->title; + *decimal = rec->decimal; + *digit = rec->digit; + *flags = rec->flags; +} + +/* This function provides a consistent access point for the + * _PyUnicode_ExtendedCase array defined in CPython's Objects/unicodectype.c + * and now also as numba_PyUnicode_ExtendedCase in Numba's _unicodetype_db.h + */ +NUMBA_EXPORT_FUNC(Py_UCS4) +numba_get_PyUnicode_ExtendedCase(int code) +{ + return numba_PyUnicode_ExtendedCase[code]; +} + +/* from _unicodetype_db.h */ +#undef SHIFT + +/* + * defined break point for gdb + */ +NUMBA_EXPORT_FUNC(void) +numba_gdb_breakpoint(void) { + /* does nothing */ +} + +/* + * Define bridge for all math functions + */ + +#define MATH_UNARY(F, R, A) \ + NUMBA_EXPORT_FUNC(R) numba_##F(A a) { return F(a); } +#define MATH_BINARY(F, R, A, B) \ + NUMBA_EXPORT_FUNC(R) numba_##F(A a, B b) { return F(a, b); } + +#include "mathnames.h" + +#undef MATH_UNARY +#undef MATH_BINARY + +/* + * BLAS and LAPACK wrappers + */ + +#include "_lapack.c" + +/* + * PRNG support + */ + +#include "_random.c" diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_helpermod.c b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_helpermod.c new file mode 100644 index 000000000..bbbac20bd --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_helpermod.c @@ -0,0 +1,307 @@ +/* +Expose all functions as pointers in a dedicated C extension. +*/ +#include "cext/cext.h" +/* Import _pymodule.h first, for a recent _POSIX_C_SOURCE */ +#include "_pymodule.h" + +#include +#ifdef _MSC_VER + #define false 0 + #define true 1 + #define bool int +#else + #include +#endif + +/* +Include C-extension here +*/ +#include "cext/cext.h" + +/* Numba C helpers */ +#include "_helperlib.c" + +/* Numpy C math function exports */ +#include "_npymath_exports.c" + +static PyObject * +build_c_helpers_dict(void) +{ + PyObject *dct = PyDict_New(); + if (dct == NULL) + goto error; + +#define _declpointer(name, value) do { \ + PyObject *o = PyLong_FromVoidPtr(value); \ + if (o == NULL) goto error; \ + if (PyDict_SetItemString(dct, name, o)) { \ + Py_DECREF(o); \ + goto error; \ + } \ + Py_DECREF(o); \ +} while (0) + +#define declmethod(func) _declpointer(#func, &numba_##func) + +#define declpointer(ptr) _declpointer(#ptr, &numba_##ptr) + + declmethod(fixed_fmod); + declmethod(fixed_fmodf); + declmethod(set_fnclex); + + declmethod(sdiv); + declmethod(srem); + declmethod(udiv); + declmethod(urem); + declmethod(frexp); + declmethod(frexpf); + declmethod(ldexp); + declmethod(ldexpf); + declmethod(cpow); + declmethod(cpowf); + declmethod(erf); + declmethod(erff); + declmethod(erfc); + declmethod(erfcf); + declmethod(gamma); + declmethod(gammaf); + declmethod(lgamma); + declmethod(lgammaf); + declmethod(signbit); + declmethod(signbitf); + declmethod(complex_adaptor); + declmethod(adapt_ndarray); + declmethod(ndarray_new); + declmethod(extract_record_data); + declmethod(get_buffer); + declmethod(adapt_buffer); + declmethod(release_buffer); + declmethod(extract_np_datetime); + declmethod(create_np_datetime); + declmethod(extract_np_timedelta); + declmethod(create_np_timedelta); + declmethod(recreate_record); + declmethod(fptoui); + declmethod(fptouif); + declmethod(gil_ensure); + declmethod(gil_release); + declmethod(fatal_error); + declmethod(py_type); + declmethod(unpack_slice); + declmethod(do_raise); + declmethod(unpickle); + declmethod(attempt_nocopy_reshape); + declmethod(get_pyobject_private_data); + declmethod(set_pyobject_private_data); + declmethod(reset_pyobject_private_data); + + /* BLAS / LAPACK */ + declmethod(xxgemm); + declmethod(xxgemv); + declmethod(xxdot); + declmethod(xxgetrf); + declmethod(ez_xxgetri); + declmethod(xxpotrf); + declmethod(ez_rgeev); + declmethod(ez_cgeev); + declmethod(ez_xxxevd); + declmethod(ez_gesdd); + declmethod(ez_geqrf); + declmethod(ez_xxgqr); + declmethod(ez_gelsd); + declmethod(xgesv); + declmethod(xxnrm2); + + /* PRNG support */ + declmethod(get_py_random_state); + declmethod(get_np_random_state); + declmethod(get_internal_random_state); + declmethod(rnd_shuffle); + declmethod(rnd_init); + declmethod(poisson_ptrs); + + /* Unicode string support */ + declmethod(extract_unicode); + declmethod(gettyperecord); + declmethod(get_PyUnicode_ExtendedCase); + + /* for gdb breakpoint */ + declmethod(gdb_breakpoint); + + /* for dictionary support */ + declmethod(test_dict); + declmethod(dict_new_minsize); + declmethod(dict_set_method_table); + declmethod(dict_free); + declmethod(dict_length); + declmethod(dict_lookup); + declmethod(dict_insert); + declmethod(dict_insert_ez); + declmethod(dict_delitem); + declmethod(dict_popitem); + declmethod(dict_iter_sizeof); + declmethod(dict_iter); + declmethod(dict_iter_next); + declmethod(dict_dump); + + /* for list support */ + declmethod(test_list); + declmethod(list_new); + declmethod(list_set_method_table); + declmethod(list_free); + declmethod(list_base_ptr); + declmethod(list_size_address); + declmethod(list_length); + declmethod(list_allocated); + declmethod(list_is_mutable); + declmethod(list_set_is_mutable); + declmethod(list_setitem); + declmethod(list_getitem); + declmethod(list_append); + declmethod(list_delitem); + declmethod(list_delete_slice); + declmethod(list_iter_sizeof); + declmethod(list_iter); + declmethod(list_iter_next); + +#define MATH_UNARY(F, R, A) declmethod(F); +#define MATH_BINARY(F, R, A, B) declmethod(F); + #include "mathnames.h" +#undef MATH_UNARY +#undef MATH_BINARY + +#undef declmethod + return dct; +error: + Py_XDECREF(dct); + return NULL; +} + +static int +register_npymath_exports(PyObject *dct) +{ + size_t count = sizeof(npymath_exports) / sizeof(npymath_exports[0]); + size_t i; + + for (i = 0; i < count; ++i) { + PyObject *ptr = PyLong_FromVoidPtr(npymath_exports[i].func); + if (ptr == NULL) + return -1; + if (PyDict_SetItemString(dct, npymath_exports[i].name, ptr) < 0) { + Py_DECREF(ptr); + return -1; + } + Py_DECREF(ptr); + } + + return 0; +} + +static PyObject * +build_npymath_exports_dict(void) +{ + PyObject *dct = PyDict_New(); + if (dct != NULL) { + if (register_npymath_exports(dct) < 0) + Py_CLEAR(dct); + } + return dct; +} + + +/* + * Helper to deal with flushing stdout + */ +PyAPI_FUNC(void) _numba_flush_stdout(void) ; + +void +_numba_flush_stdout(void) { + fflush(stdout); +} + + +static PyMethodDef ext_methods[] = { + { "rnd_get_state", (PyCFunction) _numba_rnd_get_state, METH_O, NULL }, + { "rnd_get_py_state_ptr", (PyCFunction) _numba_rnd_get_py_state_ptr, METH_NOARGS, NULL }, + { "rnd_get_np_state_ptr", (PyCFunction) _numba_rnd_get_np_state_ptr, METH_NOARGS, NULL }, + { "rnd_seed", (PyCFunction) _numba_rnd_seed, METH_VARARGS, NULL }, + { "rnd_set_state", (PyCFunction) _numba_rnd_set_state, METH_VARARGS, NULL }, + { "rnd_shuffle", (PyCFunction) _numba_rnd_shuffle, METH_O, NULL }, + { "_import_cython_function", (PyCFunction) _numba_import_cython_function, METH_VARARGS, NULL }, + { NULL }, +}; + +/* + * These functions are exported by the module's DLL, to exercise ctypes / cffi + * without relying on libc availability (see https://bugs.python.org/issue23606) + */ + +PyAPI_FUNC(double) _numba_test_sin(double x); +PyAPI_FUNC(double) _numba_test_cos(double x); +PyAPI_FUNC(double) _numba_test_exp(double x); +PyAPI_FUNC(void) _numba_test_vsquare(int n, double *x, double *out); +PyAPI_FUNC(double) _numba_test_funcptr(double (*func)(double)); +PyAPI_FUNC(bool) _numba_test_boolean(void); + +double _numba_test_sin(double x) +{ + return sin(x); +} + +double _numba_test_cos(double x) +{ + return cos(x); +} + +double _numba_test_exp(double x) +{ + return exp(x); +} + +void _numba_test_vsquare(int n, double *x, double *out) +{ + int i; + for (i = 0; i < n; i++) + out[i] = pow(x[i], 2.0); +} + +void _numba_test_vcube(int n, double *x, double *out) +{ + int i; + for (i = 0; i < n; i++) + out[i] = pow(x[i], 3.0); +} + +double _numba_test_funcptr(double (*func)(double)) +{ + return func(1.5); +} + +bool _numba_test_boolean() +{ + return true; +} + +MOD_INIT(_helperlib) { + PyObject *m; + MOD_DEF(m, "_helperlib", "No docs", ext_methods) + if (m == NULL) + return MOD_ERROR_VAL; + + import_array(); + + PyModule_AddObject(m, "c_helpers", build_c_helpers_dict()); + PyModule_AddObject(m, "npymath_exports", build_npymath_exports_dict()); + PyModule_AddIntConstant(m, "long_min", LONG_MIN); + PyModule_AddIntConstant(m, "long_max", LONG_MAX); + PyModule_AddIntConstant(m, "py_buffer_size", sizeof(Py_buffer)); + PyModule_AddIntConstant(m, "py_gil_state_size", sizeof(PyGILState_STATE)); + PyModule_AddIntConstant(m, "py_unicode_1byte_kind", PyUnicode_1BYTE_KIND); + PyModule_AddIntConstant(m, "py_unicode_2byte_kind", PyUnicode_2BYTE_KIND); + PyModule_AddIntConstant(m, "py_unicode_4byte_kind", PyUnicode_4BYTE_KIND); + PyModule_AddIntConstant(m, "py_unicode_wchar_kind", PyUnicode_WCHAR_KIND); + numba_rnd_ensure_global_init(); + + return MOD_SUCCESS_VAL(m); +} diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_lapack.c b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_lapack.c new file mode 100644 index 000000000..6d5d183ec --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_lapack.c @@ -0,0 +1,1946 @@ +/* + * This file contains wrappers of BLAS and LAPACK functions + */ +/* + * BLAS calling helpers. The helpers can be called without the GIL held. + * The caller is responsible for checking arguments (especially dimensions). + */ + +/* Fast getters caching the value of a function's address after + the first call to import_cblas_function(). */ + +#define EMIT_GET_CBLAS_FUNC(name) \ + static void *cblas_ ## name = NULL; \ + static void *get_cblas_ ## name(void) { \ + if (cblas_ ## name == NULL) { \ + PyGILState_STATE st = PyGILState_Ensure(); \ + const char *mod = "scipy.linalg.cython_blas"; \ + cblas_ ## name = import_cython_function(mod, # name); \ + PyGILState_Release(st); \ + } \ + return cblas_ ## name; \ + } + +EMIT_GET_CBLAS_FUNC(dgemm) +EMIT_GET_CBLAS_FUNC(sgemm) +EMIT_GET_CBLAS_FUNC(cgemm) +EMIT_GET_CBLAS_FUNC(zgemm) +EMIT_GET_CBLAS_FUNC(dgemv) +EMIT_GET_CBLAS_FUNC(sgemv) +EMIT_GET_CBLAS_FUNC(cgemv) +EMIT_GET_CBLAS_FUNC(zgemv) +EMIT_GET_CBLAS_FUNC(ddot) +EMIT_GET_CBLAS_FUNC(sdot) +EMIT_GET_CBLAS_FUNC(cdotu) +EMIT_GET_CBLAS_FUNC(zdotu) +EMIT_GET_CBLAS_FUNC(cdotc) +EMIT_GET_CBLAS_FUNC(zdotc) +EMIT_GET_CBLAS_FUNC(snrm2) +EMIT_GET_CBLAS_FUNC(dnrm2) +EMIT_GET_CBLAS_FUNC(scnrm2) +EMIT_GET_CBLAS_FUNC(dznrm2) + + +#undef EMIT_GET_CBLAS_FUNC + +/* + * NOTE: On return value convention. + * For LAPACK wrapper development the following conventions are followed: + * Publicly exposed wrapper functions must return:- + * STATUS_ERROR : For an unrecoverable error e.g. caught by xerbla, this is so + * a Py_FatalError can be raised. + * STATUS_SUCCESS: For successful execution + * +n : Where n is an integer for a routine specific error + * (typically derived from an `info` argument). + * + * The caller is responsible for checking and handling the error status. + */ + +/* return STATUS_SUCCESS if everything went ok */ +#define STATUS_SUCCESS (0) + +/* return STATUS_ERROR if an unrecoverable error is encountered */ +#define STATUS_ERROR (-1) + +/* + * A union of all the types accepted by BLAS/LAPACK for use in cases where + * stack based allocation is needed (typically for work space query args length + * 1). + */ +typedef union all_dtypes_ +{ + float s; + double d; + npy_complex64 c; + npy_complex128 z; +} all_dtypes; + +/* + * A checked PyMem_RawMalloc, ensures that the var is either NULL + * and an exception is raised, or that the allocation was successful. + * Returns zero on success for status checking. + */ +static int checked_PyMem_RawMalloc(void** var, size_t bytes) +{ + *var = NULL; + *var = PyMem_RawMalloc(bytes); + if (!(*var)) + { + { + PyGILState_STATE st = PyGILState_Ensure(); + + PyErr_SetString(PyExc_MemoryError, + "Insufficient memory for buffer allocation\ + required by LAPACK."); + PyGILState_Release(st); + } + return 1; + } + return 0; +} + +/* + * Checks that the char kind is valid (one of [s,d,c,z]) for use in blas/lapack. + * Returns zero on success for status checking. + */ +static int check_kind(char kind) +{ + switch (kind) + { + case 's': + case 'd': + case 'c': + case 'z': + break; + default: + { + PyGILState_STATE st = PyGILState_Ensure(); + PyErr_SetString(PyExc_ValueError, + "invalid data type (kind) found"); + PyGILState_Release(st); + } + return 1; + } + return 0; +} + +/* + * Guard macro for ensuring a valid data "kind" is being used. + * Place at the top of all routines with switches on "kind" that accept + * one of [s,d,c,z]. + */ +#define ENSURE_VALID_KIND(__KIND) \ +if (check_kind( __KIND )) \ +{ \ + return STATUS_ERROR; \ +} \ + +/* + * Checks that the char kind is valid for the real domain (one of [s,d]) + * for use in blas/lapack. + * Returns zero on success for status checking. + */ +static int check_real_kind(char kind) +{ + switch (kind) + { + case 's': + case 'd': + break; + default: + { + PyGILState_STATE st = PyGILState_Ensure(); + PyErr_SetString(PyExc_ValueError, + "invalid data type (kind) found"); + PyGILState_Release(st); + } + return 1; + } + return 0; +} + +/* + * Guard macro for ensuring a valid data "kind" is being used for the + * real domain routines. + * Place at the top of all routines with switches on "kind" that accept + * one of [s,d]. + */ +#define ENSURE_VALID_REAL_KIND(__KIND) \ +if (check_real_kind( __KIND )) \ +{ \ + return STATUS_ERROR; \ +} \ + + +/* + * Checks that the char kind is valid for the complex domain (one of [c,z]) + * for use in blas/lapack. + * Returns zero on success for status checking. + */ +static int check_complex_kind(char kind) +{ + switch (kind) + { + case 'c': + case 'z': + break; + default: + { + PyGILState_STATE st = PyGILState_Ensure(); + PyErr_SetString(PyExc_ValueError, + "invalid data type (kind) found"); + PyGILState_Release(st); + } + return 1; + } + return 0; +} + +/* + * Guard macro for ensuring a valid data "kind" is being used for the + * real domain routines. + * Place at the top of all routines with switches on "kind" that accept + * one of [c,z]. + */ +#define ENSURE_VALID_COMPLEX_KIND(__KIND) \ +if (check_complex_kind( __KIND )) \ +{ \ + return STATUS_ERROR; \ +} \ + + +/* + * Checks that a function is found (i.e. not null) + * Returns zero on success for status checking. + */ +static int check_func(void *func) +{ + if (func == NULL) + { + PyGILState_STATE st = PyGILState_Ensure(); + PyErr_SetString(PyExc_RuntimeError, + "Specified LAPACK function could not be found."); + PyGILState_Release(st); + return STATUS_ERROR; + } + return STATUS_SUCCESS; +} + + +/* + * Guard macro for ensuring a valid function is found. + */ +#define ENSURE_VALID_FUNC(__FUNC) \ +if (check_func(__FUNC)) \ +{ \ + return STATUS_ERROR; \ +} \ + + +/* + * Define what a Fortran "int" is, some LAPACKs have 64 bit integer support + * numba presently opts for a 32 bit C int. + * This definition allows scope for later configuration time magic to adjust + * the size of int at all the call sites. + */ +#define F_INT int + + +typedef float (*sdot_t)(F_INT *n, void *dx, F_INT *incx, void *dy, F_INT *incy); +typedef double (*ddot_t)(F_INT *n, void *dx, F_INT *incx, void *dy, F_INT + *incy); +typedef npy_complex64 (*cdot_t)(F_INT *n, void *dx, F_INT *incx, void *dy, + F_INT *incy); +typedef npy_complex128 (*zdot_t)(F_INT *n, void *dx, F_INT *incx, void *dy, + F_INT *incy); + +typedef void (*xxgemv_t)(char *trans, F_INT *m, F_INT *n, + void *alpha, void *a, F_INT *lda, + void *x, F_INT *incx, void *beta, + void *y, F_INT *incy); + +typedef void (*xxgemm_t)(char *transa, char *transb, + F_INT *m, F_INT *n, F_INT *k, + void *alpha, void *a, F_INT *lda, + void *b, F_INT *ldb, void *beta, + void *c, F_INT *ldc); + +typedef float (*sxnrm2_t) (F_INT *n, void *x, F_INT *incx); +typedef double (*dxnrm2_t) (F_INT *n, void *x, F_INT *incx); + +/* Vector * vector: result = dx * dy */ +NUMBA_EXPORT_FUNC(int) +numba_xxdot(char kind, char conjugate, Py_ssize_t n, void *dx, void *dy, + void *result) +{ + void *raw_func = NULL; + F_INT _n; + F_INT inc = 1; + + ENSURE_VALID_KIND(kind) + + switch (kind) + { + case 's': + raw_func = get_cblas_sdot(); + break; + case 'd': + raw_func = get_cblas_ddot(); + break; + case 'c': + raw_func = conjugate ? get_cblas_cdotc() : get_cblas_cdotu(); + break; + case 'z': + raw_func = conjugate ? get_cblas_zdotc() : get_cblas_zdotu(); + break; + } + ENSURE_VALID_FUNC(raw_func) + + _n = (F_INT) n; + + switch (kind) + { + case 's': + *(float *) result = (*(sdot_t) raw_func)(&_n, dx, &inc, dy, &inc);; + break; + case 'd': + *(double *) result = (*(ddot_t) raw_func)(&_n, dx, &inc, dy, &inc);; + break; + case 'c': + *(npy_complex64 *) result = (*(cdot_t) raw_func)(&_n, dx, &inc, dy,\ + &inc);; + break; + case 'z': + *(npy_complex128 *) result = (*(zdot_t) raw_func)(&_n, dx, &inc,\ + dy, &inc);; + break; + } + + return 0; +} + +/* Matrix * vector: y = alpha * a * x + beta * y */ +NUMBA_EXPORT_FUNC(int) +numba_xxgemv(char kind, char trans, Py_ssize_t m, Py_ssize_t n, + void *alpha, void *a, Py_ssize_t lda, + void *x, void *beta, void *y) +{ + void *raw_func = NULL; + F_INT _m, _n; + F_INT _lda; + F_INT inc = 1; + + ENSURE_VALID_KIND(kind) + + switch (kind) + { + case 's': + raw_func = get_cblas_sgemv(); + break; + case 'd': + raw_func = get_cblas_dgemv(); + break; + case 'c': + raw_func = get_cblas_cgemv(); + break; + case 'z': + raw_func = get_cblas_zgemv(); + break; + } + ENSURE_VALID_FUNC(raw_func) + + _m = (F_INT) m; + _n = (F_INT) n; + _lda = (F_INT) lda; + + (*(xxgemv_t) raw_func)(&trans, &_m, &_n, alpha, a, &_lda, + x, &inc, beta, y, &inc); + return 0; +} + +/* Matrix * matrix: c = alpha * a * b + beta * c */ +NUMBA_EXPORT_FUNC(int) +numba_xxgemm(char kind, char transa, char transb, + Py_ssize_t m, Py_ssize_t n, Py_ssize_t k, + void *alpha, void *a, Py_ssize_t lda, + void *b, Py_ssize_t ldb, void *beta, + void *c, Py_ssize_t ldc) +{ + void *raw_func = NULL; + F_INT _m, _n, _k; + F_INT _lda, _ldb, _ldc; + + ENSURE_VALID_KIND(kind) + + switch (kind) + { + case 's': + raw_func = get_cblas_sgemm(); + break; + case 'd': + raw_func = get_cblas_dgemm(); + break; + case 'c': + raw_func = get_cblas_cgemm(); + break; + case 'z': + raw_func = get_cblas_zgemm(); + break; + } + ENSURE_VALID_FUNC(raw_func) + + _m = (F_INT) m; + _n = (F_INT) n; + _k = (F_INT) k; + _lda = (F_INT) lda; + _ldb = (F_INT) ldb; + _ldc = (F_INT) ldc; + + (*(xxgemm_t) raw_func)(&transa, &transb, &_m, &_n, &_k, alpha, a, &_lda, + b, &_ldb, beta, c, &_ldc); + return 0; +} + + +/* L2-norms */ +NUMBA_EXPORT_FUNC(F_INT) +numba_xxnrm2(char kind, Py_ssize_t n, void * x, Py_ssize_t incx, void * result) +{ + void *raw_func = NULL; + F_INT _incx; + F_INT _n; + + ENSURE_VALID_KIND(kind) + + switch (kind) + { + case 's': + raw_func = get_cblas_snrm2(); + break; + case 'd': + raw_func = get_cblas_dnrm2(); + break; + case 'c': + raw_func = get_cblas_scnrm2(); + break; + case 'z': + raw_func = get_cblas_dznrm2(); + break; + } + ENSURE_VALID_FUNC(raw_func) + + _n = (F_INT) n; + _incx = (F_INT) incx; + + switch (kind) + { + case 's': + *(float *) result = (*(sxnrm2_t) raw_func)(&_n, x, &_incx);; + break; + case 'd': + *(double *) result = (*(dxnrm2_t) raw_func)(&_n, x, &_incx);; + break; + case 'c': + *(float *) result = (*(sxnrm2_t) raw_func)(&_n, x, &_incx);; + break; + case 'z': + *(double *) result = (*(dxnrm2_t) raw_func)(&_n, x, &_incx);; + break; + } + + return 0; +} + + +/* + * LAPACK calling helpers. The helpers can be called without the GIL held. + * The caller is responsible for checking arguments (especially dimensions). + */ + +/* Fast getters caching the value of a function's address after + the first call to import_clapack_function(). */ + +#define EMIT_GET_CLAPACK_FUNC(name) \ + static void *clapack_ ## name = NULL; \ + static void *get_clapack_ ## name(void) { \ + if (clapack_ ## name == NULL) { \ + PyGILState_STATE st = PyGILState_Ensure(); \ + const char *mod = "scipy.linalg.cython_lapack"; \ + clapack_ ## name = import_cython_function(mod, # name); \ + PyGILState_Release(st); \ + } \ + return clapack_ ## name; \ + } + +/* Computes an LU factorization of a general M-by-N matrix A + * using partial pivoting with row interchanges. + */ +EMIT_GET_CLAPACK_FUNC(sgetrf) +EMIT_GET_CLAPACK_FUNC(dgetrf) +EMIT_GET_CLAPACK_FUNC(cgetrf) +EMIT_GET_CLAPACK_FUNC(zgetrf) + +/* Computes the inverse of a matrix using the LU factorization + * computed by xGETRF. + */ +EMIT_GET_CLAPACK_FUNC(sgetri) +EMIT_GET_CLAPACK_FUNC(dgetri) +EMIT_GET_CLAPACK_FUNC(cgetri) +EMIT_GET_CLAPACK_FUNC(zgetri) + +/* Compute Cholesky factorizations */ +EMIT_GET_CLAPACK_FUNC(spotrf) +EMIT_GET_CLAPACK_FUNC(dpotrf) +EMIT_GET_CLAPACK_FUNC(cpotrf) +EMIT_GET_CLAPACK_FUNC(zpotrf) + +/* Computes for an N-by-N real nonsymmetric matrix A, the + * eigenvalues and, optionally, the left and/or right eigenvectors. + */ +EMIT_GET_CLAPACK_FUNC(sgeev) +EMIT_GET_CLAPACK_FUNC(dgeev) +EMIT_GET_CLAPACK_FUNC(cgeev) +EMIT_GET_CLAPACK_FUNC(zgeev) + +/* Computes for an N-by-N Hermitian matrix A, the + * eigenvalues and, optionally, the left and/or right eigenvectors. + */ +EMIT_GET_CLAPACK_FUNC(ssyevd) +EMIT_GET_CLAPACK_FUNC(dsyevd) +EMIT_GET_CLAPACK_FUNC(cheevd) +EMIT_GET_CLAPACK_FUNC(zheevd) + +/* Computes generalised SVD */ +EMIT_GET_CLAPACK_FUNC(sgesdd) +EMIT_GET_CLAPACK_FUNC(dgesdd) +EMIT_GET_CLAPACK_FUNC(cgesdd) +EMIT_GET_CLAPACK_FUNC(zgesdd) + +/* Computes QR decompositions */ +EMIT_GET_CLAPACK_FUNC(sgeqrf) +EMIT_GET_CLAPACK_FUNC(dgeqrf) +EMIT_GET_CLAPACK_FUNC(cgeqrf) +EMIT_GET_CLAPACK_FUNC(zgeqrf) + +/* Computes columns of Q from elementary reflectors produced by xgeqrf() (QR). + */ +EMIT_GET_CLAPACK_FUNC(sorgqr) +EMIT_GET_CLAPACK_FUNC(dorgqr) +EMIT_GET_CLAPACK_FUNC(cungqr) +EMIT_GET_CLAPACK_FUNC(zungqr) + +/* Computes the minimum norm solution to linear least squares problems */ +EMIT_GET_CLAPACK_FUNC(sgelsd) +EMIT_GET_CLAPACK_FUNC(dgelsd) +EMIT_GET_CLAPACK_FUNC(cgelsd) +EMIT_GET_CLAPACK_FUNC(zgelsd) + +// Computes the solution to a system of linear equations +EMIT_GET_CLAPACK_FUNC(sgesv) +EMIT_GET_CLAPACK_FUNC(dgesv) +EMIT_GET_CLAPACK_FUNC(cgesv) +EMIT_GET_CLAPACK_FUNC(zgesv) + + +#undef EMIT_GET_CLAPACK_FUNC + +typedef void (*xxgetrf_t)(F_INT *m, F_INT *n, void *a, F_INT *lda, F_INT *ipiv, + F_INT *info); + +typedef void (*xxgetri_t)(F_INT *n, void *a, F_INT *lda, F_INT *ipiv, void + *work, F_INT *lwork, F_INT *info); + +typedef void (*xxpotrf_t)(char *uplo, F_INT *n, void *a, F_INT *lda, F_INT + *info); + +typedef void (*rgeev_t)(char *jobvl, char *jobvr, F_INT *n, void *a, F_INT *lda, + void *wr, void *wi, void *vl, F_INT *ldvl, void *vr, + F_INT *ldvr, void *work, F_INT *lwork, F_INT *info); + +typedef void (*cgeev_t)(char *jobvl, char *jobvr, F_INT *n, void *a, F_INT + *lda, void *w, void *vl, F_INT *ldvl, void *vr, + F_INT *ldvr, void *work, F_INT *lwork, void *rwork, + F_INT *info); + +typedef void (*rgesdd_t)(char *jobz, F_INT *m, F_INT *n, void *a, F_INT *lda, + void *s, void *u, F_INT *ldu, void *vt, F_INT *ldvt, + void *work, F_INT *lwork, F_INT *iwork, F_INT *info); + +typedef void (*cgesdd_t)(char *jobz, F_INT *m, F_INT *n, void *a, F_INT *lda, + void *s, void * u, F_INT *ldu, void * vt, F_INT *ldvt, + void *work, F_INT *lwork, void *rwork, F_INT *iwork, + F_INT *info); + +typedef void (*xsyevd_t)(char *jobz, char *uplo, F_INT *n, void *a, F_INT *lda, + void *w, void *work, F_INT *lwork, F_INT *iwork, + F_INT *liwork, F_INT *info); + +typedef void (*xheevd_t)(char *jobz, char *uplo, F_INT *n, void *a, F_INT *lda, + void *w, void *work, F_INT *lwork, void *rwork, + F_INT *lrwork, F_INT *iwork, F_INT *liwork, + F_INT *info); + +typedef void (*xgeqrf_t)(F_INT *m, F_INT *n, void *a, F_INT *lda, void *tau, + void *work, F_INT *lwork, F_INT *info); + +typedef void (*xxxgqr_t)(F_INT *m, F_INT *n, F_INT *k, void *a, F_INT *lda, + void *tau, void *work, F_INT *lwork, F_INT *info); + +typedef void (*rgelsd_t)(F_INT *m, F_INT *n, F_INT *nrhs, void *a, F_INT *lda, + void *b, F_INT *ldb, void *s, void *rcond, F_INT *rank, + void *work, F_INT *lwork, F_INT *iwork, F_INT *info); + +typedef void (*cgelsd_t)(F_INT *m, F_INT *n, F_INT *nrhs, void *a, F_INT *lda, + void *b, F_INT *ldb, void *s, void *rcond, F_INT *rank, + void *work, F_INT *lwork, void *rwork, F_INT *iwork, + F_INT *info); + +typedef void (*xgesv_t)(F_INT *n, F_INT *nrhs, void *a, F_INT *lda, F_INT *ipiv, + void *b, F_INT *ldb, F_INT *info); + + + +/* + * kind_size() + * gets the data size appropriate for a specified kind. + * + * Input: + * kind - the kind, one of: + * (s, d, c, z) = (float, double, complex, double complex). + * + * Returns: + * data_size - the appropriate data size. + * + */ +static size_t kind_size(char kind) +{ + size_t data_size = 0; + switch (kind) + { + case 's': + data_size = sizeof(float); + break; + case 'd': + data_size = sizeof(double); + break; + case 'c': + data_size = sizeof(npy_complex64); + break; + case 'z': + data_size = sizeof(npy_complex128); + break; + } + return data_size; + +} + +/* + * underlying_float_kind() + * gets the underlying float kind for a given kind. + * + * Input: + * kind - the kind, one of: + * (s, d, c, z) = (float, double, complex, double complex). + * + * Returns: + * underlying_float_kind - the underlying float kind, one of: + * (s, d) = (float, double). + * + * This function essentially provides a map between the char kind + * of a type and the char kind of the underlying float used in the + * type. Essentially: + * --------------- + * Input -> Output + * --------------- + * s -> s + * d -> d + * c -> s + * z -> d + * --------------- + * + */ +static char underlying_float_kind(char kind) +{ + switch(kind) + { + case 's': + case 'c': + return 's'; + case 'd': + case 'z': + return 'd'; + default: + { + PyGILState_STATE st = PyGILState_Ensure(); + PyErr_SetString(PyExc_ValueError, + "invalid kind in underlying_float_kind()"); + PyGILState_Release(st); + } + } + return -1; +} + +/* + * cast_from_X() + * cast from a kind (s, d, c, z) = (float, double, complex, double complex) + * to a Fortran integer. + * + * Parameters: + * kind the kind of val + * val a pointer to the value to cast + * + * Returns: + * A Fortran int from a cast of val (in complex case, takes the real part). + * + * Struct access via non c99 (python only) cmplx types, used for compatibility. + */ +static F_INT +cast_from_X(char kind, void *val) +{ + switch(kind) + { + case 's': + return (F_INT)(*((float *) val)); + case 'd': + return (F_INT)(*((double *) val)); + case 'c': + return (F_INT)(*((npy_complex64 *)val)).real; + case 'z': + return (F_INT)(*((npy_complex128 *)val)).real; + default: + { + PyGILState_STATE st = PyGILState_Ensure(); + PyErr_SetString(PyExc_ValueError, + "invalid kind in cast"); + PyGILState_Release(st); + } + } + return -1; +} + + +#define CATCH_LAPACK_INVALID_ARG(__routine, info) \ + do { \ + if (info < 0) { \ + PyGILState_STATE st = PyGILState_Ensure(); \ + PyErr_Format(PyExc_RuntimeError, \ + "LAPACK Error: Routine " #__routine ". On input %d\n",\ + -(int) info); \ + PyGILState_Release(st); \ + return STATUS_ERROR; \ + } \ + } while(0) + +/* Compute LU decomposition of A + * NOTE: ipiv is an array of Fortran integers allocated by the caller, + * which is therefore expected to use the right dtype. + */ +NUMBA_EXPORT_FUNC(int) +numba_xxgetrf(char kind, Py_ssize_t m, Py_ssize_t n, void *a, Py_ssize_t lda, + F_INT *ipiv) +{ + void *raw_func = NULL; + F_INT _m, _n, _lda, info; + + ENSURE_VALID_KIND(kind) + + switch (kind) + { + case 's': + raw_func = get_clapack_sgetrf(); + break; + case 'd': + raw_func = get_clapack_dgetrf(); + break; + case 'c': + raw_func = get_clapack_cgetrf(); + break; + case 'z': + raw_func = get_clapack_zgetrf(); + break; + } + ENSURE_VALID_FUNC(raw_func) + + _m = (F_INT) m; + _n = (F_INT) n; + _lda = (F_INT) lda; + + (*(xxgetrf_t) raw_func)(&_m, &_n, a, &_lda, ipiv, &info); + CATCH_LAPACK_INVALID_ARG("xxgetrf", info); + + return (int)info; +} + +/* Compute the inverse of a matrix given its LU decomposition + * Args are as per LAPACK. + */ +static int +numba_raw_xxgetri(char kind, F_INT n, void *a, F_INT lda, + F_INT *ipiv, void *work, F_INT *lwork, F_INT *info) +{ + void *raw_func = NULL; + + ENSURE_VALID_KIND(kind) + + switch (kind) + { + case 's': + raw_func = get_clapack_sgetri(); + break; + case 'd': + raw_func = get_clapack_dgetri(); + break; + case 'c': + raw_func = get_clapack_cgetri(); + break; + case 'z': + raw_func = get_clapack_zgetri(); + break; + } + ENSURE_VALID_FUNC(raw_func) + + (*(xxgetri_t) raw_func)(&n, a, &lda, ipiv, work, lwork, info); + + return 0; +} + +/* Compute the inverse of a matrix from the factorization provided by + * xxgetrf. (see numba_xxgetrf() about ipiv) + * Args are as per LAPACK. + */ +NUMBA_EXPORT_FUNC(int) +numba_ez_xxgetri(char kind, Py_ssize_t n, void *a, Py_ssize_t lda, + F_INT *ipiv) +{ + F_INT _n, _lda; + F_INT lwork = -1; + F_INT info = 0; + size_t base_size = -1; + void * work = NULL; + all_dtypes stack_slot; + + ENSURE_VALID_KIND(kind) + + _n = (F_INT)n; + _lda = (F_INT)lda; + + base_size = kind_size(kind); + + work = &stack_slot; + + numba_raw_xxgetri(kind, _n, a, _lda, ipiv, work, &lwork, &info); + CATCH_LAPACK_INVALID_ARG("xxgetri", info); + + lwork = cast_from_X(kind, work); + + if (checked_PyMem_RawMalloc(&work, base_size * lwork)) + { + return STATUS_ERROR; + } + + numba_raw_xxgetri(kind, _n, a, _lda, ipiv, work, &lwork, &info); + PyMem_RawFree(work); + CATCH_LAPACK_INVALID_ARG("xxgetri", info); + + return (int)info; +} + +/* Compute the Cholesky factorization of a matrix. */ +NUMBA_EXPORT_FUNC(int) +numba_xxpotrf(char kind, char uplo, Py_ssize_t n, void *a, Py_ssize_t lda) +{ + void *raw_func = NULL; + F_INT _n, _lda, info; + + ENSURE_VALID_KIND(kind) + + switch (kind) + { + case 's': + raw_func = get_clapack_spotrf(); + break; + case 'd': + raw_func = get_clapack_dpotrf(); + break; + case 'c': + raw_func = get_clapack_cpotrf(); + break; + case 'z': + raw_func = get_clapack_zpotrf(); + break; + } + ENSURE_VALID_FUNC(raw_func) + + _n = (F_INT) n; + _lda = (F_INT) lda; + + (*(xxpotrf_t) raw_func)(&uplo, &_n, a, &_lda, &info); + CATCH_LAPACK_INVALID_ARG("xxpotrf", info); + return (int)info; +} + + +/* real space eigen systems info from dgeev/sgeev */ +static int +numba_raw_rgeev(char kind, char jobvl, char jobvr, + Py_ssize_t n, void *a, Py_ssize_t lda, void *wr, void *wi, + void *vl, Py_ssize_t ldvl, void *vr, Py_ssize_t ldvr, + void *work, Py_ssize_t lwork, F_INT *info) +{ + void *raw_func = NULL; + F_INT _n, _lda, _ldvl, _ldvr, _lwork; + + ENSURE_VALID_REAL_KIND(kind) + + switch (kind) + { + case 's': + raw_func = get_clapack_sgeev(); + break; + case 'd': + raw_func = get_clapack_dgeev(); + break; + } + ENSURE_VALID_FUNC(raw_func) + + _n = (F_INT) n; + _lda = (F_INT) lda; + _ldvl = (F_INT) ldvl; + _ldvr = (F_INT) ldvr; + _lwork = (F_INT) lwork; + + (*(rgeev_t) raw_func)(&jobvl, &jobvr, &_n, a, &_lda, wr, wi, vl, &_ldvl, vr, + &_ldvr, work, &_lwork, info); + return 0; +} + +/* Real space eigen systems info from dgeev/sgeev + * as numba_raw_rgeev but the allocation and error handling is done for the user. + * Args are as per LAPACK. + */ +NUMBA_EXPORT_FUNC(int) +numba_ez_rgeev(char kind, char jobvl, char jobvr, Py_ssize_t n, void *a, + Py_ssize_t lda, void *wr, void *wi, void *vl, Py_ssize_t ldvl, + void *vr, Py_ssize_t ldvr) +{ + F_INT info = 0; + F_INT lwork = -1; + F_INT _n, _lda, _ldvl, _ldvr; + size_t base_size = -1; + void * work = NULL; + all_dtypes stack_slot; + + ENSURE_VALID_REAL_KIND(kind) + + _n = (F_INT) n; + _lda = (F_INT) lda; + _ldvl = (F_INT) ldvl; + _ldvr = (F_INT) ldvr; + + base_size = kind_size(kind); + + work = &stack_slot; + numba_raw_rgeev(kind, jobvl, jobvr, _n, a, _lda, wr, wi, vl, _ldvl, + vr, _ldvr, work, lwork, &info); + CATCH_LAPACK_INVALID_ARG("numba_raw_rgeev", info); + + lwork = cast_from_X(kind, work); + if (checked_PyMem_RawMalloc(&work, base_size * lwork)) + { + return STATUS_ERROR; + } + numba_raw_rgeev(kind, jobvl, jobvr, _n, a, _lda, wr, wi, vl, _ldvl, + vr, _ldvr, work, lwork, &info); + PyMem_RawFree(work); + + CATCH_LAPACK_INVALID_ARG("numba_raw_rgeev", info); + + return (int)info; +} + +/* Complex space eigen systems info from cgeev/zgeev + * Args are as per LAPACK. + */ +static int +numba_raw_cgeev(char kind, char jobvl, char jobvr, + Py_ssize_t n, void *a, Py_ssize_t lda, void *w, void *vl, + Py_ssize_t ldvl, void *vr, Py_ssize_t ldvr, void *work, + Py_ssize_t lwork, void *rwork, F_INT *info) +{ + void *raw_func = NULL; + F_INT _n, _lda, _ldvl, _ldvr, _lwork; + + ENSURE_VALID_COMPLEX_KIND(kind) + + _n = (F_INT) n; + _lda = (F_INT) lda; + _ldvl = (F_INT) ldvl; + _ldvr = (F_INT) ldvr; + _lwork = (F_INT) lwork; + + switch (kind) + { + case 'c': + raw_func = get_clapack_cgeev(); + break; + case 'z': + raw_func = get_clapack_zgeev(); + break; + } + ENSURE_VALID_FUNC(raw_func) + + (*(cgeev_t) raw_func)(&jobvl, &jobvr, &_n, a, &_lda, w, vl, &_ldvl, vr, + &_ldvr, work, &_lwork, rwork, info); + return 0; +} + + +/* Complex space eigen systems info from cgeev/zgeev + * as numba_raw_cgeev but the allocation and error handling is done for the user. + * Args are as per LAPACK. + */ +NUMBA_EXPORT_FUNC(int) +numba_ez_cgeev(char kind, char jobvl, char jobvr, Py_ssize_t n, void *a, + Py_ssize_t lda, void *w, void *vl, Py_ssize_t ldvl, void *vr, + Py_ssize_t ldvr) +{ + F_INT info = 0; + F_INT lwork = -1; + F_INT _n, _lda, _ldvl, _ldvr; + size_t base_size = -1; + all_dtypes stack_slot, wk; + void * work = NULL; + void * rwork = (void *)&wk; + + ENSURE_VALID_COMPLEX_KIND(kind) + + _n = (F_INT) n; + _lda = (F_INT) lda; + _ldvl = (F_INT) ldvl; + _ldvr = (F_INT) ldvr; + + base_size = kind_size(kind); + + work = &stack_slot; + numba_raw_cgeev(kind, jobvl, jobvr, n, a, lda, w, vl, ldvl, + vr, ldvr, work, lwork, rwork, &info); + CATCH_LAPACK_INVALID_ARG("numba_raw_cgeev", info); + + lwork = cast_from_X(kind, work); + if (checked_PyMem_RawMalloc((void**)&rwork, 2*n*base_size)) + { + return STATUS_ERROR; + } + if (checked_PyMem_RawMalloc(&work, base_size * lwork)) + { + PyMem_RawFree(rwork); + return STATUS_ERROR; + } + numba_raw_cgeev(kind, jobvl, jobvr, _n, a, _lda, w, vl, _ldvl, + vr, _ldvr, work, lwork, rwork, &info); + PyMem_RawFree(work); + PyMem_RawFree(rwork); + CATCH_LAPACK_INVALID_ARG("numba_raw_cgeev", info); + + return (int)info; +} + +/* real space symmetric eigen systems info from ssyevd/dsyevd */ +static int +numba_raw_rsyevd(char kind, char jobz, char uplo, Py_ssize_t n, void *a, + Py_ssize_t lda, void *w, void *work, Py_ssize_t lwork, + F_INT *iwork, Py_ssize_t liwork, F_INT *info) +{ + void *raw_func = NULL; + F_INT _n, _lda, _lwork, _liwork; + + ENSURE_VALID_REAL_KIND(kind) + + switch (kind) + { + case 's': + raw_func = get_clapack_ssyevd(); + break; + case 'd': + raw_func = get_clapack_dsyevd(); + break; + } + ENSURE_VALID_FUNC(raw_func) + + _n = (F_INT) n; + _lda = (F_INT) lda; + _lwork = (F_INT) lwork; + _liwork = (F_INT) liwork; + + (*(xsyevd_t) raw_func)(&jobz, &uplo, &_n, a, &_lda, w, work, &_lwork, iwork, &_liwork, info); + return 0; +} + +/* Real space eigen systems info from dsyevd/ssyevd + * as numba_raw_rsyevd but the allocation and error handling is done for the user. + * Args are as per LAPACK. + */ +static int +numba_ez_rsyevd(char kind, char jobz, char uplo, Py_ssize_t n, void *a, Py_ssize_t lda, void *w) +{ + F_INT info = 0; + F_INT lwork = -1, liwork=-1; + F_INT _n, _lda; + size_t base_size = -1; + void *work = NULL; + F_INT *iwork = NULL; + all_dtypes stack_slot; + int stack_int = -1; + + ENSURE_VALID_REAL_KIND(kind) + + _n = (F_INT) n; + _lda = (F_INT) lda; + + base_size = kind_size(kind); + + work = &stack_slot; + iwork = &stack_int; + numba_raw_rsyevd(kind, jobz, uplo, _n, a, _lda, w, work, lwork, iwork, liwork, &info); + CATCH_LAPACK_INVALID_ARG("numba_raw_rsyevd", info); + + lwork = cast_from_X(kind, work); + if (checked_PyMem_RawMalloc(&work, base_size * lwork)) + { + return STATUS_ERROR; + } + liwork = *iwork; + if (checked_PyMem_RawMalloc((void**)&iwork, base_size * liwork)) + { + PyMem_RawFree(work); + return STATUS_ERROR; + } + numba_raw_rsyevd(kind, jobz, uplo, _n, a, _lda, w, work, lwork, iwork, liwork, &info); + PyMem_RawFree(work); + PyMem_RawFree(iwork); + + CATCH_LAPACK_INVALID_ARG("numba_raw_rsyevd", info); + + return (int)info; +} + + +/* complex space symmetric eigen systems info from cheevd/zheevd*/ +static int +numba_raw_cheevd(char kind, char jobz, char uplo, Py_ssize_t n, void *a, + Py_ssize_t lda, void *w, void *work, Py_ssize_t lwork, + void *rwork, Py_ssize_t lrwork, F_INT *iwork, + Py_ssize_t liwork, F_INT *info) +{ + void *raw_func = NULL; + F_INT _n, _lda, _lwork, _lrwork, _liwork; + + ENSURE_VALID_COMPLEX_KIND(kind) + + switch (kind) + { + case 'c': + raw_func = get_clapack_cheevd(); + break; + case 'z': + raw_func = get_clapack_zheevd(); + break; + } + ENSURE_VALID_FUNC(raw_func) + + _n = (F_INT) n; + _lda = (F_INT) lda; + _lwork = (F_INT) lwork; + _lrwork = (F_INT) lrwork; + _liwork = (F_INT) liwork; + + (*(xheevd_t) raw_func)(&jobz, &uplo, &_n, a, &_lda, w, work, &_lwork, rwork, &_lrwork, iwork, &_liwork, info); + return 0; +} + +/* complex space eigen systems info from cheevd/zheevd + * as numba_raw_cheevd but the allocation and error handling is done for the user. + * Args are as per LAPACK. + */ +static int +numba_ez_cheevd(char kind, char jobz, char uplo, Py_ssize_t n, void *a, Py_ssize_t lda, void *w) +{ + F_INT info = 0; + F_INT lwork = -1, lrwork = -1, liwork=-1; + F_INT _n, _lda; + size_t base_size = -1, underlying_float_size = -1; + void *work = NULL, *rwork = NULL; + F_INT *iwork = NULL; + all_dtypes stack_slot1, stack_slot2; + char uf_kind; + int stack_int = -1; + + ENSURE_VALID_COMPLEX_KIND(kind) + + _n = (F_INT) n; + _lda = (F_INT) lda; + + base_size = kind_size(kind); + uf_kind = underlying_float_kind(kind); + underlying_float_size = kind_size(uf_kind); + + work = &stack_slot1; + rwork = &stack_slot2; + iwork = &stack_int; + numba_raw_cheevd(kind, jobz, uplo, _n, a, _lda, w, work, lwork, rwork, lrwork, iwork, liwork, &info); + CATCH_LAPACK_INVALID_ARG("numba_raw_cheevd", info); + + lwork = cast_from_X(uf_kind, work); + if (checked_PyMem_RawMalloc(&work, base_size * lwork)) + { + return STATUS_ERROR; + } + + lrwork = cast_from_X(uf_kind, rwork); + if (checked_PyMem_RawMalloc(&rwork, underlying_float_size * lrwork)) + { + PyMem_RawFree(work); + return STATUS_ERROR; + } + + liwork = *iwork; + if (checked_PyMem_RawMalloc((void**)&iwork, base_size * liwork)) + { + PyMem_RawFree(work); + PyMem_RawFree(rwork); + return STATUS_ERROR; + } + numba_raw_cheevd(kind, jobz, uplo, _n, a, _lda, w, work, lwork, rwork, lrwork, iwork, liwork, &info); + PyMem_RawFree(work); + PyMem_RawFree(rwork); + PyMem_RawFree(iwork); + + CATCH_LAPACK_INVALID_ARG("numba_raw_cheevd", info); + + return (int)info; +} + +/* Hermitian eigenvalue systems info from *syevd and *heevd. + * This routine hides the type and general complexity involved with making the + * calls. The work space computation and error handling etc is hidden. + * Args are as per LAPACK. + */ +NUMBA_EXPORT_FUNC(int) +numba_ez_xxxevd(char kind, char jobz, char uplo, Py_ssize_t n, void *a, Py_ssize_t lda, void *w) +{ + ENSURE_VALID_KIND(kind) + + switch (kind) + { + case 's': + case 'd': + return numba_ez_rsyevd(kind, jobz, uplo, n, a, lda, w); + case 'c': + case 'z': + return numba_ez_cheevd(kind, jobz, uplo, n, a, lda, w); + } + return STATUS_ERROR; /* unreachable */ +} + +/* Real space svd systems info from dgesdd/sgesdd + * Args are as per LAPACK. + */ +static int +numba_raw_rgesdd(char kind, char jobz, Py_ssize_t m, Py_ssize_t n, void *a, + Py_ssize_t lda, void *s, void *u, Py_ssize_t ldu, void *vt, + Py_ssize_t ldvt, void *work, Py_ssize_t lwork, + F_INT *iwork, F_INT *info) +{ + void *raw_func = NULL; + F_INT _m, _n, _lda, _ldu, _ldvt, _lwork; + + ENSURE_VALID_REAL_KIND(kind) + + _m = (F_INT) m; + _n = (F_INT) n; + _lda = (F_INT) lda; + _ldu = (F_INT) ldu; + _ldvt = (F_INT) ldvt; + _lwork = (F_INT) lwork; + + switch (kind) + { + case 's': + raw_func = get_clapack_sgesdd(); + break; + case 'd': + raw_func = get_clapack_dgesdd(); + break; + } + ENSURE_VALID_FUNC(raw_func) + + (*(rgesdd_t) raw_func)(&jobz, &_m, &_n, a, &_lda, s, u, &_ldu, vt, &_ldvt, + work, &_lwork, iwork, info); + return 0; +} + +/* Real space svd info from dgesdd/sgesdd. + * As numba_raw_rgesdd but the allocation and error handling is done for the + * user. + * Args are as per LAPACK. + */ +static int +numba_ez_rgesdd(char kind, char jobz, Py_ssize_t m, Py_ssize_t n, void *a, + Py_ssize_t lda, void *s, void *u, Py_ssize_t ldu, void *vt, + Py_ssize_t ldvt) +{ + F_INT info = 0; + Py_ssize_t minmn = -1; + Py_ssize_t lwork = -1; + all_dtypes stack_slot, wk; + size_t base_size = -1; + F_INT *iwork = (F_INT *)&wk; + void *work = NULL; + + ENSURE_VALID_REAL_KIND(kind) + + base_size = kind_size(kind); + + work = &stack_slot; + + /* Compute optimal work size (lwork) */ + numba_raw_rgesdd(kind, jobz, m, n, a, lda, s, u, ldu, vt, ldvt, work, + lwork, iwork, &info); + CATCH_LAPACK_INVALID_ARG("numba_raw_rgesdd", info); + + /* Allocate work array */ + lwork = cast_from_X(kind, work); + if (checked_PyMem_RawMalloc(&work, base_size * lwork)) + return -1; + minmn = m > n ? n : m; + if (checked_PyMem_RawMalloc((void**) &iwork, 8 * minmn * sizeof(F_INT))) + { + PyMem_RawFree(work); + return STATUS_ERROR; + } + numba_raw_rgesdd(kind, jobz, m, n, a, lda, s, u ,ldu, vt, ldvt, work, lwork, + iwork, &info); + PyMem_RawFree(work); + PyMem_RawFree(iwork); + CATCH_LAPACK_INVALID_ARG("numba_raw_rgesdd", info); + + return (int)info; +} + +/* Complex space svd systems info from cgesdd/zgesdd + * Args are as per LAPACK. + */ +static int +numba_raw_cgesdd(char kind, char jobz, Py_ssize_t m, Py_ssize_t n, void *a, + Py_ssize_t lda, void *s, void *u, Py_ssize_t ldu, void *vt, + Py_ssize_t ldvt, void *work, Py_ssize_t lwork, void *rwork, + F_INT *iwork, F_INT *info) +{ + void *raw_func = NULL; + F_INT _m, _n, _lda, _ldu, _ldvt, _lwork; + + ENSURE_VALID_COMPLEX_KIND(kind) + + _m = (F_INT) m; + _n = (F_INT) n; + _lda = (F_INT) lda; + _ldu = (F_INT) ldu; + _ldvt = (F_INT) ldvt; + _lwork = (F_INT) lwork; + + switch (kind) + { + case 'c': + raw_func = get_clapack_cgesdd(); + break; + case 'z': + raw_func = get_clapack_zgesdd(); + break; + } + ENSURE_VALID_FUNC(raw_func) + + (*(cgesdd_t) raw_func)(&jobz, &_m, &_n, a, &_lda, s, u, &_ldu, vt, &_ldvt, + work, &_lwork, rwork, iwork, info); + return 0; +} + +/* complex space svd info from cgesdd/zgesdd. + * As numba_raw_cgesdd but the allocation and error handling is done for the + * user. + * Args are as per LAPACK. + */ +static int +numba_ez_cgesdd(char kind, char jobz, Py_ssize_t m, Py_ssize_t n, void *a, + Py_ssize_t lda, void *s, void *u, Py_ssize_t ldu, void *vt, + Py_ssize_t ldvt) +{ + F_INT info = 0; + Py_ssize_t lwork = -1; + Py_ssize_t lrwork = -1; + Py_ssize_t minmn = -1; + Py_ssize_t tmp1, tmp2; + Py_ssize_t maxmn = -1; + size_t real_base_size = -1; + size_t complex_base_size = -1; + all_dtypes stack_slot, wk1, wk2; + void *work = NULL; + void *rwork = (void *)&wk1; + F_INT *iwork = (F_INT *)&wk2; + + ENSURE_VALID_COMPLEX_KIND(kind) + + switch (kind) + { + case 'c': + real_base_size = sizeof(float); + complex_base_size = sizeof(npy_complex64); + break; + case 'z': + real_base_size = sizeof(double); + complex_base_size = sizeof(npy_complex128); + break; + default: + { + PyGILState_STATE st = PyGILState_Ensure(); + PyErr_SetString(PyExc_ValueError,\ + "Invalid kind in numba_ez_rgesdd"); + PyGILState_Release(st); + } + return STATUS_ERROR; + } + + work = &stack_slot; + + /* Compute optimal work size (lwork) */ + numba_raw_cgesdd(kind, jobz, m, n, a, lda, s, u ,ldu, vt, ldvt, work, lwork, + rwork, iwork, &info); + CATCH_LAPACK_INVALID_ARG("numba_raw_cgesdd", info); + + /* Allocate work array */ + lwork = cast_from_X(kind, work); + if (checked_PyMem_RawMalloc(&work, complex_base_size * lwork)) + return STATUS_ERROR; + + minmn = m > n ? n : m; + if (jobz == 'n') + { + lrwork = 7 * minmn; + } + else + { + maxmn = m > n ? m : n; + tmp1 = 5 * minmn + 7; + tmp2 = 2 * maxmn + 2 * minmn + 1; + lrwork = minmn * (tmp1 > tmp2 ? tmp1: tmp2); + } + + if (checked_PyMem_RawMalloc(&rwork, + real_base_size * (lrwork > 1 ? lrwork : 1))) + { + PyMem_RawFree(work); + return STATUS_ERROR; + } + if (checked_PyMem_RawMalloc((void **) &iwork, + 8 * minmn * sizeof(F_INT))) + { + PyMem_RawFree(work); + PyMem_RawFree(rwork); + return STATUS_ERROR; + } + numba_raw_cgesdd(kind, jobz, m, n, a, lda, s, u ,ldu, vt, ldvt, work, lwork, + rwork, iwork, &info); + PyMem_RawFree(work); + PyMem_RawFree(rwork); + PyMem_RawFree(iwork); + CATCH_LAPACK_INVALID_ARG("numba_raw_cgesdd", info); + + return (int)info; +} + + +/* SVD systems info from *gesdd. + * This routine hides the type and general complexity involved with making the + * calls to *gesdd. The work space computation and error handling etc is hidden. + * Args are as per LAPACK. + */ +NUMBA_EXPORT_FUNC(int) +numba_ez_gesdd(char kind, char jobz, Py_ssize_t m, Py_ssize_t n, void *a, + Py_ssize_t lda, void *s, void *u, Py_ssize_t ldu, void *vt, + Py_ssize_t ldvt) +{ + ENSURE_VALID_KIND(kind) + + switch (kind) + { + case 's': + case 'd': + return numba_ez_rgesdd(kind, jobz, m, n, a, lda, s, u, ldu, vt, + ldvt); + case 'c': + case 'z': + return numba_ez_cgesdd(kind, jobz, m, n, a, lda, s, u, ldu, vt, + ldvt); + } + return STATUS_ERROR; /* unreachable */ +} + + +/* + * Compute the QR factorization of a matrix. + * Return -1 on internal error, 0 on success, > 0 on failure. + */ +static int +numba_raw_xgeqrf(char kind, Py_ssize_t m, Py_ssize_t n, void *a, Py_ssize_t + lda, void *tau, void *work, Py_ssize_t lwork, F_INT *info) +{ + void *raw_func = NULL; + F_INT _m, _n, _lda, _lwork; + + ENSURE_VALID_KIND(kind) + + switch (kind) + { + case 's': + raw_func = get_clapack_sgeqrf(); + break; + case 'd': + raw_func = get_clapack_dgeqrf(); + break; + case 'c': + raw_func = get_clapack_cgeqrf(); + break; + case 'z': + raw_func = get_clapack_zgeqrf(); + break; + } + ENSURE_VALID_FUNC(raw_func) + + _m = (F_INT) m; + _n = (F_INT) n; + _lda = (F_INT) lda; + _lwork = (F_INT) lwork; + + (*(xgeqrf_t) raw_func)(&_m, &_n, a, &_lda, tau, work, &_lwork, info); + return 0; +} + +/* + * Compute the QR factorization of a matrix. + * This routine hides the type and general complexity involved with making the + * xgeqrf calls. The work space computation and error handling etc is hidden. + * Args are as per LAPACK. + */ +NUMBA_EXPORT_FUNC(int) +numba_ez_geqrf(char kind, Py_ssize_t m, Py_ssize_t n, void *a, Py_ssize_t + lda, void *tau) +{ + F_INT info = 0; + Py_ssize_t lwork = -1; + size_t base_size = -1; + all_dtypes stack_slot; + void *work = NULL; + + base_size = kind_size(kind); + + work = &stack_slot; + + /* Compute optimal work size (lwork) */ + numba_raw_xgeqrf(kind, m, n, a, lda, tau, work, lwork, &info); + CATCH_LAPACK_INVALID_ARG("numba_raw_xgeqrf", info); + + /* Allocate work array */ + lwork = cast_from_X(kind, work); + if (checked_PyMem_RawMalloc(&work, base_size * lwork)) + return STATUS_ERROR; + + numba_raw_xgeqrf(kind, m, n, a, lda, tau, work, lwork, &info); + PyMem_RawFree(work); + CATCH_LAPACK_INVALID_ARG("numba_raw_xgeqrf", info); + + return 0; /* info cannot be >0 */ + +} + + +/* + * Compute the orthogonal Q matrix (in QR) from elementary relectors. + */ +static int +numba_raw_xxxgqr(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t k, void *a, + Py_ssize_t lda, void *tau, void * work, Py_ssize_t lwork, F_INT *info) +{ + void *raw_func = NULL; + F_INT _m, _n, _k, _lda, _lwork; + + ENSURE_VALID_KIND(kind) + + switch (kind) + { + case 's': + raw_func = get_clapack_sorgqr(); + break; + case 'd': + raw_func = get_clapack_dorgqr(); + break; + case 'c': + raw_func = get_clapack_cungqr(); + break; + case 'z': + raw_func = get_clapack_zungqr(); + break; + } + ENSURE_VALID_FUNC(raw_func) + + _m = (F_INT) m; + _n = (F_INT) n; + _k = (F_INT) k; + _lda = (F_INT) lda; + _lwork = (F_INT) lwork; + + (*(xxxgqr_t) raw_func)(&_m, &_n, &_k, a, &_lda, tau, work, &_lwork, info); + return 0; +} + + +/* + * Compute the orthogonal Q matrix (in QR) from elementary reflectors. + * This routine hides the type and general complexity involved with making the + * x{or,un}qrf calls. The work space computation and error handling etc is + * hidden. Args are as per LAPACK. + */ +NUMBA_EXPORT_FUNC(int) +numba_ez_xxgqr(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t k, void *a, + Py_ssize_t lda, void *tau) +{ + F_INT info = 0; + Py_ssize_t lwork = -1; + size_t base_size = -1; + all_dtypes stack_slot; + void *work = NULL; + + work = &stack_slot; + + /* Compute optimal work size (lwork) */ + numba_raw_xxxgqr(kind, m, n, k, a, lda, tau, work, lwork, &info); + CATCH_LAPACK_INVALID_ARG("numba_raw_xxxgqr", info); + + base_size = kind_size(kind); + + /* Allocate work array */ + lwork = cast_from_X(kind, work); + if (checked_PyMem_RawMalloc(&work, base_size * lwork)) + return STATUS_ERROR; + + numba_raw_xxxgqr(kind, m, n, k, a, lda, tau, work, lwork, &info); + PyMem_RawFree(work); + CATCH_LAPACK_INVALID_ARG("numba_raw_xxxgqr", info); + + return 0; /* info cannot be >0 */ + +} + + +/* + * Compute the minimum-norm solution to a real linear least squares problem. + */ +static int +numba_raw_rgelsd(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t nrhs, + void *a, Py_ssize_t lda, void *b, Py_ssize_t ldb, void *S, + void * rcond, Py_ssize_t * rank, void * work, + Py_ssize_t lwork, F_INT *iwork, F_INT *info) +{ + void *raw_func = NULL; + F_INT _m, _n, _nrhs, _lda, _ldb, _rank, _lwork; + + ENSURE_VALID_REAL_KIND(kind) + + switch (kind) + { + case 's': + raw_func = get_clapack_sgelsd(); + break; + case 'd': + raw_func = get_clapack_dgelsd(); + break; + } + ENSURE_VALID_FUNC(raw_func) + + _m = (F_INT) m; + _n = (F_INT) n; + _nrhs = (F_INT) nrhs; + _lda = (F_INT) lda; + _ldb = (F_INT) ldb; + _lwork = (F_INT) lwork; + + (*(rgelsd_t) raw_func)(&_m, &_n, &_nrhs, a, &_lda, b, &_ldb, S, rcond, + &_rank, work, &_lwork, iwork, info); + *rank = (Py_ssize_t) _rank; + return 0; +} + +/* + * Compute the minimum-norm solution to a real linear least squares problem. + * This routine hides the type and general complexity involved with making the + * {s,d}gelsd calls. The work space computation and error handling etc is + * hidden. Args are as per LAPACK. + */ +static int +numba_ez_rgelsd(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t nrhs, + void *a, Py_ssize_t lda, void *b, Py_ssize_t ldb, void *S, + double rcond, Py_ssize_t * rank) +{ + F_INT info = 0; + Py_ssize_t lwork = -1; + size_t base_size = -1; + all_dtypes stack_slot; + void *work = NULL, *rcond_cast = NULL; + F_INT *iwork = NULL; + F_INT iwork_tmp; + float tmpf; + + ENSURE_VALID_REAL_KIND(kind) + + base_size = kind_size(kind); + + work = &stack_slot; + rcond_cast = work; /* stop checks on null ptr complaining */ + + /* Compute optimal work size (lwork) */ + numba_raw_rgelsd(kind, m, n, nrhs, a, lda, b, ldb, S, rcond_cast, rank, + work, lwork, &iwork_tmp, &info); + CATCH_LAPACK_INVALID_ARG("numba_raw_rgelsd", info); + + /* Allocate work array */ + lwork = cast_from_X(kind, work); + if (checked_PyMem_RawMalloc(&work, base_size * lwork)) + return STATUS_ERROR; + + /* Allocate iwork array */ + if (checked_PyMem_RawMalloc((void **)&iwork, sizeof(F_INT) * iwork_tmp)) + { + PyMem_RawFree(work); + return STATUS_ERROR; + } + + /* cast rcond to the right type */ + switch (kind) + { + case 's': + tmpf = (float)rcond; + rcond_cast = (void * )&tmpf; + break; + case 'd': + rcond_cast = (void * )&rcond; + break; + } + + numba_raw_rgelsd(kind, m, n, nrhs, a, lda, b, ldb, S, rcond_cast, rank, + work, lwork, iwork, &info); + PyMem_RawFree(work); + PyMem_RawFree(iwork); + CATCH_LAPACK_INVALID_ARG("numba_raw_rgelsd", info); + + return (int)info; +} + + +/* + * Compute the minimum-norm solution to a complex linear least squares problem. + */ +static int +numba_raw_cgelsd(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t nrhs, + void *a, Py_ssize_t lda, void *b, Py_ssize_t ldb, void *S, + void *rcond, Py_ssize_t * rank, void * work, + Py_ssize_t lwork, void * rwork, F_INT *iwork, F_INT *info) +{ + void *raw_func = NULL; + F_INT _m, _n, _nrhs, _lda, _ldb, _rank, _lwork; + + ENSURE_VALID_COMPLEX_KIND(kind) + + switch (kind) + { + case 'c': + raw_func = get_clapack_cgelsd(); + break; + case 'z': + raw_func = get_clapack_zgelsd(); + break; + } + ENSURE_VALID_FUNC(raw_func) + + _m = (F_INT) m; + _n = (F_INT) n; + _nrhs = (F_INT) nrhs; + _lda = (F_INT) lda; + _ldb = (F_INT) ldb; + _lwork = (F_INT) lwork; + + (*(cgelsd_t) raw_func)(&_m, &_n, &_nrhs, a, &_lda, b, &_ldb, S, rcond, + &_rank, work, &_lwork, rwork, iwork, info); + *rank = (Py_ssize_t) _rank; + return 0; +} + + +/* + * Compute the minimum-norm solution to a complex linear least squares problem. + * This routine hides the type and general complexity involved with making the + * {c,z}gelsd calls. The work space computation and error handling etc is + * hidden. Args are as per LAPACK. + */ +static int +numba_ez_cgelsd(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t nrhs, + void *a, Py_ssize_t lda, void *b, Py_ssize_t ldb, void *S, + double rcond, Py_ssize_t * rank) +{ + F_INT info = 0; + Py_ssize_t lwork = -1; + size_t base_size = -1; + all_dtypes stack_slot1, stack_slot2; + size_t real_base_size = 0; + void *work = NULL, *rwork = NULL, *rcond_cast = NULL; + Py_ssize_t lrwork; + F_INT *iwork = NULL; + F_INT iwork_tmp; + char real_kind = '-'; + float tmpf; + + ENSURE_VALID_COMPLEX_KIND(kind) + + base_size = kind_size(kind); + + work = &stack_slot1; + rwork = &stack_slot2; + rcond_cast = work; /* stop checks on null ptr complaining */ + + /* Compute optimal work size */ + numba_raw_cgelsd(kind, m, n, nrhs, a, lda, b, ldb, S, rcond_cast, rank, + work, lwork, rwork, &iwork_tmp, &info); + CATCH_LAPACK_INVALID_ARG("numba_raw_cgelsd", info); + + /* Allocate work array */ + lwork = cast_from_X(kind, work); + if (checked_PyMem_RawMalloc(&work, base_size * lwork)) + return STATUS_ERROR; + + /* Allocate iwork array */ + if (checked_PyMem_RawMalloc((void **)&iwork, sizeof(F_INT) * iwork_tmp)) + { + PyMem_RawFree(work); + return STATUS_ERROR; + } + + switch (kind) + { + case 'c': + real_kind = 's'; + tmpf = (float)rcond; + rcond_cast = (void * )&tmpf; + break; + case 'z': + real_kind = 'd'; + rcond_cast = (void * )&rcond; + break; + } + + real_base_size = kind_size(real_kind); + + lrwork = cast_from_X(real_kind, rwork); + if (checked_PyMem_RawMalloc((void **)&rwork, real_base_size * lrwork)) + { + PyMem_RawFree(work); + PyMem_RawFree(iwork); + return STATUS_ERROR; + } + + numba_raw_cgelsd(kind, m, n, nrhs, a, lda, b, ldb, S, rcond_cast, rank, + work, lwork, rwork, iwork, &info); + PyMem_RawFree(work); + PyMem_RawFree(rwork); + PyMem_RawFree(iwork); + CATCH_LAPACK_INVALID_ARG("numba_raw_cgelsd", info); + + return (int)info; +} + + +/* + * Compute the minimum-norm solution to a linear least squares problems. + * This routine hides the type and general complexity involved with making the + * calls to *gelsd. The work space computation and error handling etc is hidden. + * Args are as per LAPACK. + */ +NUMBA_EXPORT_FUNC(int) +numba_ez_gelsd(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t nrhs, + void *a, Py_ssize_t lda, void *b, Py_ssize_t ldb, void *S, + double rcond, Py_ssize_t * rank) +{ + ENSURE_VALID_KIND(kind) + + switch (kind) + { + case 's': + case 'd': + return numba_ez_rgelsd(kind, m, n, nrhs, a, lda, b, ldb, S, rcond, + rank); + case 'c': + case 'z': + return numba_ez_cgelsd(kind, m, n, nrhs, a, lda, b, ldb, S, rcond, + rank); + } + return STATUS_ERROR; /* unreachable */ +} + + +/* + * Compute the solution to a system of linear equations + */ +NUMBA_EXPORT_FUNC(int) +numba_xgesv(char kind, Py_ssize_t n, Py_ssize_t nrhs, void *a, Py_ssize_t lda, + F_INT *ipiv, void *b, Py_ssize_t ldb) +{ + void *raw_func = NULL; + F_INT _n, _nrhs, _lda, _ldb, info; + + ENSURE_VALID_KIND(kind) + + switch (kind) + { + case 's': + raw_func = get_clapack_sgesv(); + break; + case 'd': + raw_func = get_clapack_dgesv(); + break; + case 'c': + raw_func = get_clapack_cgesv(); + break; + case 'z': + raw_func = get_clapack_zgesv(); + break; + } + + ENSURE_VALID_FUNC(raw_func) + + _n = (F_INT) n; + _nrhs = (F_INT) nrhs; + _lda = (F_INT) lda; + _ldb = (F_INT) ldb; + + (*(xgesv_t) raw_func)(&_n, &_nrhs, a, &_lda, ipiv, b, &_ldb, &info); + CATCH_LAPACK_INVALID_ARG("xgesv", info); + + return (int)info; +} + +/* undef defines and macros */ +#undef STATUS_SUCCESS +#undef STATUS_ERROR +#undef ENSURE_VALID_KIND +#undef ENSURE_VALID_REAL_KIND +#undef ENSURE_VALID_COMPLEX_KIND +#undef ENSURE_VALID_FUNC +#undef F_INT +#undef EMIT_GET_CLAPACK_FUNC +#undef CATCH_LAPACK_INVALID_ARG diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_npymath_exports.c b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_npymath_exports.c new file mode 100644 index 000000000..881b56c91 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_npymath_exports.c @@ -0,0 +1,46 @@ +/* + * This file contains exports of Numpy math functions needed by numba. + */ + +#include "_pymodule.h" +#include +#include + + +/* + * Map Numpy C function symbols to their addresses. + */ + +struct npymath_entry { + const char *name; + void *func; +}; + +#define NPYMATH_SYMBOL(name) \ + { "npy_" #name, (void*) npy_##name } + +static struct npymath_entry npymath_exports[] = { + /* double functions */ + NPYMATH_SYMBOL(exp2), + NPYMATH_SYMBOL(log2), + + NPYMATH_SYMBOL(logaddexp), + NPYMATH_SYMBOL(logaddexp2), + NPYMATH_SYMBOL(nextafter), + NPYMATH_SYMBOL(spacing), + + NPYMATH_SYMBOL(modf), + + /* float functions */ + NPYMATH_SYMBOL(exp2f), + NPYMATH_SYMBOL(log2f), + + NPYMATH_SYMBOL(logaddexpf), + NPYMATH_SYMBOL(logaddexp2f), + NPYMATH_SYMBOL(nextafterf), + NPYMATH_SYMBOL(spacingf), + + NPYMATH_SYMBOL(modff), +}; + +#undef NPYMATH_SYMBOL diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_numba_common.h b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_numba_common.h new file mode 100644 index 000000000..c5e67d9c6 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_numba_common.h @@ -0,0 +1,39 @@ +#ifndef NUMBA_COMMON_H_ +#define NUMBA_COMMON_H_ + +/* __has_attribute() is a clang / gcc-5 macro */ +#ifndef __has_attribute +# define __has_attribute(x) 0 +#endif + +/* This attribute marks symbols that can be shared across C objects + * but are not exposed outside of a shared library or executable. + * Note this is default behaviour for global symbols under Windows. + */ +#if (__has_attribute(visibility) || \ + (defined(__GNUC__) && __GNUC__ >= 4)) +#define VISIBILITY_HIDDEN __attribute__ ((visibility("hidden"))) +#else +#define VISIBILITY_HIDDEN +#endif + +/* + * Numba's version of the PyArray_DescrCheck macro from NumPy, use it as a + * direct replacement of NumPy's PyArray_DescrCheck to ensure binary + * compatibility. + * + * Details of why this is needed: + * NumPy 1.18 changed the definition of the PyArray_DescrCheck macro here: + * https://github.com/numpy/numpy/commit/6108b5d1e138d07e3c9f2a4e3b1933749ad0e698 + * the result of this being that building against NumPy <1.18 would prevent + * Numba running against NumPy >= 1.20 as noted here: + * https://github.com/numba/numba/issues/6041#issuecomment-665132199 + * + * This macro definition is copied from: + * https://github.com/numpy/numpy/commit/6108b5d1e138d07e3c9f2a4e3b1933749ad0e698#diff-ad2213da23136c5fc5883d9eb2d88666R26 + * + * NOTE: This is the NumPy 1.18 and above version of the macro. + */ +#define NUMBA_PyArray_DescrCheck(op) PyObject_TypeCheck(op, &PyArrayDescr_Type) + +#endif /* NUMBA_COMMON_H_ */ diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_pymodule.h b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_pymodule.h new file mode 100644 index 000000000..8622598a7 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_pymodule.h @@ -0,0 +1,32 @@ +#ifndef NUMBA_PY_MODULE_H_ +#define NUMBA_PY_MODULE_H_ + +#define PY_SSIZE_T_CLEAN + +#include +#include +#include + +#define MOD_ERROR_VAL NULL +#define MOD_SUCCESS_VAL(val) val +#define MOD_INIT(name) PyMODINIT_FUNC PyInit_##name(void) +#define MOD_DEF(ob, name, doc, methods) { \ + static struct PyModuleDef moduledef = { \ + PyModuleDef_HEAD_INIT, name, doc, -1, methods, NULL, NULL, NULL, NULL }; \ + ob = PyModule_Create(&moduledef); } +#define MOD_INIT_EXEC(name) PyInit_##name(); + +#define PyString_AsString PyUnicode_AsUTF8 +#define PyString_Check PyUnicode_Check +#define PyString_FromFormat PyUnicode_FromFormat +#define PyString_FromString PyUnicode_FromString +#define PyString_InternFromString PyUnicode_InternFromString +#define PyInt_Type PyLong_Type +#define PyInt_Check PyLong_Check +#define PyInt_CheckExact PyLong_CheckExact +#define SetAttrStringFromVoidPointer(m, name) do { \ + PyObject *tmp = PyLong_FromVoidPtr((void *) &name); \ + PyObject_SetAttrString(m, #name, tmp); \ + Py_DECREF(tmp); } while (0) + +#endif /* NUMBA_PY_MODULE_H_ */ diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_random.c b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_random.c new file mode 100644 index 000000000..bf95a3639 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_random.c @@ -0,0 +1,492 @@ +/* + * PRNG support. + */ + +#ifdef _MSC_VER +#define HAVE_PTHREAD_ATFORK 0 +#else +#define HAVE_PTHREAD_ATFORK 1 +#include +#endif + + +/* Magic Mersenne Twister constants */ +#define MT_N 624 +#define MT_M 397 +#define MT_MATRIX_A 0x9908b0dfU +#define MT_UPPER_MASK 0x80000000U +#define MT_LOWER_MASK 0x7fffffffU + +/* + * Note this structure is accessed in numba.targets.randomimpl, + * any changes here should be reflected there too. + */ +typedef struct { + int index; + /* unsigned int is sufficient on modern machines as we only need 32 bits */ + unsigned int mt[MT_N]; + int has_gauss; + double gauss; + int is_initialized; +} rnd_state_t; + +/* Some code portions below from CPython's _randommodule.c, some others + from Numpy's and Jean-Sebastien Roy's randomkit.c. */ + +NUMBA_EXPORT_FUNC(void) +numba_rnd_shuffle(rnd_state_t *state) +{ + int i; + unsigned int y; + + for (i = 0; i < MT_N - MT_M; i++) { + y = (state->mt[i] & MT_UPPER_MASK) | (state->mt[i+1] & MT_LOWER_MASK); + state->mt[i] = state->mt[i+MT_M] ^ (y >> 1) ^ + (-(int) (y & 1) & MT_MATRIX_A); + } + for (; i < MT_N - 1; i++) { + y = (state->mt[i] & MT_UPPER_MASK) | (state->mt[i+1] & MT_LOWER_MASK); + state->mt[i] = state->mt[i+(MT_M-MT_N)] ^ (y >> 1) ^ + (-(int) (y & 1) & MT_MATRIX_A); + } + y = (state->mt[MT_N - 1] & MT_UPPER_MASK) | (state->mt[0] & MT_LOWER_MASK); + state->mt[MT_N - 1] = state->mt[MT_M - 1] ^ (y >> 1) ^ + (-(int) (y & 1) & MT_MATRIX_A); +} + +/* Initialize mt[] with an integer seed */ +NUMBA_EXPORT_FUNC(void) +numba_rnd_init(rnd_state_t *state, unsigned int seed) +{ + unsigned int pos; + seed &= 0xffffffffU; + + /* Knuth's PRNG as used in the Mersenne Twister reference implementation */ + for (pos = 0; pos < MT_N; pos++) { + state->mt[pos] = seed; + seed = (1812433253U * (seed ^ (seed >> 30)) + pos + 1) & 0xffffffffU; + } + state->index = MT_N; + state->has_gauss = 0; + state->gauss = 0.0; + state->is_initialized = 1; +} + +/* Perturb mt[] with a key array */ +static void +rnd_init_by_array(rnd_state_t *state, unsigned int init_key[], size_t key_length) +{ + size_t i, j, k; + unsigned int *mt = state->mt; + + numba_rnd_init(state, 19650218U); + i = 1; j = 0; + k = (MT_N > key_length ? MT_N : key_length); + for (; k; k--) { + mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1664525U)) + + init_key[j] + (unsigned int) j; /* non linear */ + mt[i] &= 0xffffffffU; + i++; j++; + if (i >= MT_N) { mt[0] = mt[MT_N - 1]; i = 1; } + if (j >= key_length) j = 0; + } + for (k = MT_N - 1; k; k--) { + mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1566083941U)) + - (unsigned int) i; /* non linear */ + mt[i] &= 0xffffffffU; + i++; + if (i >= MT_N) { mt[0] = mt[MT_N - 1]; i=1; } + } + + mt[0] = 0x80000000U; /* MSB is 1; ensuring non-zero initial array */ + state->index = MT_N; + state->has_gauss = 0; + state->gauss = 0.0; + state->is_initialized = 1; +} + +/* + * Management of thread-local random state. + */ + +static int rnd_globally_initialized; + +#ifdef _MSC_VER +#define THREAD_LOCAL(ty) __declspec(thread) ty +#else +/* Non-standard C99 extension that's understood by gcc and clang */ +#define THREAD_LOCAL(ty) __thread ty +#endif + +static THREAD_LOCAL(rnd_state_t) numba_py_random_state; +static THREAD_LOCAL(rnd_state_t) numba_np_random_state; +static THREAD_LOCAL(rnd_state_t) numba_internal_random_state; + +/* Seed the state with random bytes */ +static int +rnd_seed_with_bytes(rnd_state_t *state, Py_buffer *buf) +{ + unsigned int *keys; + unsigned char *bytes; + size_t i, nkeys; + + nkeys = buf->len / sizeof(unsigned int); + keys = (unsigned int *) PyMem_Malloc(nkeys * sizeof(unsigned int)); + if (keys == NULL) { + PyBuffer_Release(buf); + return -1; + } + bytes = (unsigned char *) buf->buf; + /* Convert input bytes to int32 keys, without violating alignment + * constraints. + */ + for (i = 0; i < nkeys; i++, bytes += 4) { + keys[i] = + ((unsigned int)bytes[3] << 24) + + ((unsigned int)bytes[2] << 16) + + ((unsigned int)bytes[1] << 8) + + ((unsigned int)bytes[0] << 0); + } + PyBuffer_Release(buf); + rnd_init_by_array(state, keys, nkeys); + PyMem_Free(keys); + return 0; +} + +#if HAVE_PTHREAD_ATFORK +/* After a fork(), the child should reseed its random states. + * Since only the main thread survives in the child, it's enough to mark + * the current thread-local states as uninitialized. + */ +static void +rnd_atfork_child(void) +{ + numba_py_random_state.is_initialized = 0; + numba_np_random_state.is_initialized = 0; + numba_internal_random_state.is_initialized = 0; +} +#endif + +/* Global initialization routine. It must be called as early as possible. + */ +NUMBA_EXPORT_FUNC(void) +numba_rnd_ensure_global_init(void) +{ + if (!rnd_globally_initialized) { +#if HAVE_PTHREAD_ATFORK + pthread_atfork(NULL, NULL, rnd_atfork_child); +#endif + numba_py_random_state.is_initialized = 0; + numba_np_random_state.is_initialized = 0; + numba_internal_random_state.is_initialized = 0; + rnd_globally_initialized = 1; + } +} + +/* First-time init a random state */ +static void +rnd_implicit_init(rnd_state_t *state) +{ + /* Initialize with random bytes. The easiest way to get good-quality + * cross-platform random bytes is still to call os.urandom() + * using the Python interpreter... + */ + PyObject *module, *bufobj; + Py_buffer buf; + PyGILState_STATE gilstate = PyGILState_Ensure(); + + module = PyImport_ImportModuleNoBlock("os"); + if (module == NULL) + goto error; + /* Read as many bytes as necessary to get the full entropy + * exploitable by the MT generator. + */ + bufobj = PyObject_CallMethod(module, "urandom", "i", + (int) (MT_N * sizeof(unsigned int))); + Py_DECREF(module); + if (bufobj == NULL) + goto error; + if (PyObject_GetBuffer(bufobj, &buf, PyBUF_SIMPLE)) + goto error; + Py_DECREF(bufobj); + if (rnd_seed_with_bytes(state, &buf)) + goto error; + /* state->is_initialized is set now */ + + PyGILState_Release(gilstate); + return; + +error: + /* In normal conditions, os.urandom() and PyMem_Malloc() shouldn't fail, + * and we don't want the caller to deal with errors, so just bail out. + */ + if (PyErr_Occurred()) + PyErr_Print(); + Py_FatalError(NULL); +} + +/* Functions returning the thread-local random state pointer. + * The LLVM JIT doesn't support thread-local variables so we rely + * on the C compiler instead. + */ + +NUMBA_EXPORT_FUNC(rnd_state_t *) +numba_get_py_random_state(void) +{ + rnd_state_t *state = &numba_py_random_state; + if (!state->is_initialized) + rnd_implicit_init(state); + return state; +} + +NUMBA_EXPORT_FUNC(rnd_state_t *) +numba_get_np_random_state(void) +{ + rnd_state_t *state = &numba_np_random_state; + if (!state->is_initialized) + rnd_implicit_init(state); + return state; +} + +NUMBA_EXPORT_FUNC(rnd_state_t *) +numba_get_internal_random_state(void) +{ + rnd_state_t *state = &numba_internal_random_state; + if (!state->is_initialized) + rnd_implicit_init(state); + return state; +} + +/* + * Python-exposed helpers for state management and testing. + */ +static int +rnd_state_converter(PyObject *obj, rnd_state_t **state) +{ + *state = (rnd_state_t *) PyLong_AsVoidPtr(obj); + return (*state != NULL || !PyErr_Occurred()); +} + +NUMBA_EXPORT_FUNC(PyObject *) +_numba_rnd_get_py_state_ptr(PyObject *self) +{ + return PyLong_FromVoidPtr(numba_get_py_random_state()); +} + +NUMBA_EXPORT_FUNC(PyObject *) +_numba_rnd_get_np_state_ptr(PyObject *self) +{ + return PyLong_FromVoidPtr(numba_get_np_random_state()); +} + +NUMBA_EXPORT_FUNC(PyObject *) +_numba_rnd_shuffle(PyObject *self, PyObject *arg) +{ + rnd_state_t *state; + if (!rnd_state_converter(arg, &state)) + return NULL; + numba_rnd_shuffle(state); + Py_RETURN_NONE; +} + +NUMBA_EXPORT_FUNC(PyObject *) +_numba_rnd_set_state(PyObject *self, PyObject *args) +{ + int i, index; + rnd_state_t *state; + PyObject *tuplearg, *intlist; + + if (!PyArg_ParseTuple(args, "O&O!:rnd_set_state", + rnd_state_converter, &state, + &PyTuple_Type, &tuplearg)) + return NULL; + if (!PyArg_ParseTuple(tuplearg, "iO!", &index, &PyList_Type, &intlist)) + return NULL; + if (PyList_GET_SIZE(intlist) != MT_N) { + PyErr_SetString(PyExc_ValueError, "list object has wrong size"); + return NULL; + } + state->index = index; + for (i = 0; i < MT_N; i++) { + PyObject *v = PyList_GET_ITEM(intlist, i); + unsigned long x = PyLong_AsUnsignedLong(v); + if (x == (unsigned long) -1 && PyErr_Occurred()) + return NULL; + state->mt[i] = (unsigned int) x; + } + state->has_gauss = 0; + state->gauss = 0.0; + state->is_initialized = 1; + Py_RETURN_NONE; +} + +NUMBA_EXPORT_FUNC(PyObject *) +_numba_rnd_get_state(PyObject *self, PyObject *arg) +{ + PyObject *intlist; + int i; + rnd_state_t *state; + if (!rnd_state_converter(arg, &state)) + return NULL; + + intlist = PyList_New(MT_N); + if (intlist == NULL) + return NULL; + for (i = 0; i < MT_N; i++) { + PyObject *v = PyLong_FromUnsignedLong(state->mt[i]); + if (v == NULL) { + Py_DECREF(intlist); + return NULL; + } + PyList_SET_ITEM(intlist, i, v); + } + return Py_BuildValue("iN", state->index, intlist); +} + +NUMBA_EXPORT_FUNC(PyObject *) +_numba_rnd_seed(PyObject *self, PyObject *args) +{ + unsigned int seed; + rnd_state_t *state; + + if (!PyArg_ParseTuple(args, "O&I:rnd_seed", + rnd_state_converter, &state, &seed)) { + /* rnd_seed_*(bytes-like object) */ + Py_buffer buf; + + PyErr_Clear(); + if (!PyArg_ParseTuple(args, "O&s*:rnd_seed", + rnd_state_converter, &state, &buf)) + return NULL; + + if (rnd_seed_with_bytes(state, &buf)) + return NULL; + else + Py_RETURN_NONE; + } + else { + /* rnd_seed_*(int32) */ + numba_rnd_init(state, seed); + Py_RETURN_NONE; + } +} + +/* + * Random distribution helpers. + * Most code straight from Numpy's distributions.c. + */ + +#ifndef M_PI +#define M_PI 3.14159265358979323846264338328 +#endif + +NUMBA_EXPORT_FUNC(unsigned int) +get_next_int32(rnd_state_t *state) +{ + unsigned int y; + + if (state->index == MT_N) { + numba_rnd_shuffle(state); + state->index = 0; + } + y = state->mt[state->index++]; + /* Tempering */ + y ^= (y >> 11); + y ^= (y << 7) & 0x9d2c5680U; + y ^= (y << 15) & 0xefc60000U; + y ^= (y >> 18); + return y; +} + +NUMBA_EXPORT_FUNC(double) +get_next_double(rnd_state_t *state) +{ + double a = get_next_int32(state) >> 5; + double b = get_next_int32(state) >> 6; + return (a * 67108864.0 + b) / 9007199254740992.0; +} + +NUMBA_EXPORT_FUNC(double) +loggam(double x) +{ + double x0, x2, xp, gl, gl0; + long k, n; + + static double a[10] = {8.333333333333333e-02,-2.777777777777778e-03, + 7.936507936507937e-04,-5.952380952380952e-04, + 8.417508417508418e-04,-1.917526917526918e-03, + 6.410256410256410e-03,-2.955065359477124e-02, + 1.796443723688307e-01,-1.39243221690590e+00}; + x0 = x; + n = 0; + if ((x == 1.0) || (x == 2.0)) + { + return 0.0; + } + else if (x <= 7.0) + { + n = (long)(7 - x); + x0 = x + n; + } + x2 = 1.0/(x0*x0); + xp = 2*M_PI; + gl0 = a[9]; + for (k=8; k>=0; k--) + { + gl0 *= x2; + gl0 += a[k]; + } + gl = gl0/x0 + 0.5*log(xp) + (x0-0.5)*log(x0) - x0; + if (x <= 7.0) + { + for (k=1; k<=n; k++) + { + gl -= log(x0-1.0); + x0 -= 1.0; + } + } + return gl; +} + + +NUMBA_EXPORT_FUNC(int64_t) +numba_poisson_ptrs(rnd_state_t *state, double lam) +{ + /* This method is invoked only if the parameter lambda of this + * distribution is big enough ( >= 10 ). The algorithm used is + * described in "Hörmann, W. 1992. 'The Transformed Rejection + * Method for Generating Poisson Random Variables'. + * The implementation comes straight from Numpy. + */ + int64_t k; + double U, V, slam, loglam, a, b, invalpha, vr, us; + + slam = sqrt(lam); + loglam = log(lam); + b = 0.931 + 2.53*slam; + a = -0.059 + 0.02483*b; + invalpha = 1.1239 + 1.1328/(b-3.4); + vr = 0.9277 - 3.6224/(b-2); + + while (1) + { + U = get_next_double(state) - 0.5; + V = get_next_double(state); + us = 0.5 - fabs(U); + k = (int64_t) floor((2*a/us + b)*U + lam + 0.43); + if ((us >= 0.07) && (V <= vr)) + { + return k; + } + if ((k < 0) || + ((us < 0.013) && (V > us))) + { + continue; + } + if ((log(V) + log(invalpha) - log(a/(us*us)+b)) <= + (-lam + (double) k*loglam - loggam((double) k+1))) + { + return k; + } + } +} diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_typeof.c b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_typeof.c new file mode 100644 index 000000000..334ff2b99 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_typeof.c @@ -0,0 +1,1133 @@ +#include "_pymodule.h" + +#include +#include +#include + +#include "_numba_common.h" +#include "_typeof.h" +#include "_hashtable.h" +#include "_devicearray.h" +#include "pyerrors.h" + +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#include + + +/* Cached typecodes for basic scalar types */ +static int tc_int8; +static int tc_int16; +static int tc_int32; +static int tc_int64; +static int tc_uint8; +static int tc_uint16; +static int tc_uint32; +static int tc_uint64; +static int tc_float32; +static int tc_float64; +static int tc_complex64; +static int tc_complex128; +static int BASIC_TYPECODES[12]; + +static int tc_intp; + +/* The type object for the numba .dispatcher.OmittedArg class + * that wraps omitted arguments. + */ +static PyObject *omittedarg_type; + +static PyObject *typecache; +static PyObject *ndarray_typecache; +static PyObject *structured_dtypes; + +static PyObject *str_typeof_pyval = NULL; +static PyObject *str_value = NULL; +static PyObject *str_numba_type = NULL; + +/* CUDA device array API */ +void **DeviceArray_API; + +/* + * Type fingerprint computation. + */ + +typedef struct { + /* A buffer the fingerprint will be written to */ + char *buf; + size_t n; + size_t allocated; + /* A preallocated buffer, sufficient to fit the fingerprint for most types */ + char static_buf[40]; +} string_writer_t; + +static void +string_writer_init(string_writer_t *w) +{ + w->buf = w->static_buf; + w->n = 0; + w->allocated = sizeof(w->static_buf) / sizeof(unsigned char); +} + +static void +string_writer_clear(string_writer_t *w) +{ + if (w->buf != w->static_buf) + free(w->buf); +} + +static void +string_writer_move(string_writer_t *dest, const string_writer_t *src) +{ + dest->n = src->n; + dest->allocated = src->allocated; + if (src->buf == src->static_buf) { + dest->buf = dest->static_buf; + memcpy(dest->buf, src->buf, src->n); + } + else { + dest->buf = src->buf; + } +} + +/* Ensure at least *bytes* can be appended to the string writer's buffer. */ +static int +string_writer_ensure(string_writer_t *w, size_t bytes) +{ + size_t newsize; + bytes += w->n; + if (bytes <= w->allocated) + return 0; + newsize = (w->allocated << 2) + 1; + if (newsize < bytes) + newsize = bytes; + if (w->buf == w->static_buf) + w->buf = malloc(newsize); + else + w->buf = realloc(w->buf, newsize); + if (w->buf) { + w->allocated = newsize; + return 0; + } + else { + PyErr_NoMemory(); + return -1; + } +} + +static int +string_writer_put_char(string_writer_t *w, unsigned char c) +{ + if (string_writer_ensure(w, 1)) + return -1; + w->buf[w->n++] = c; + return 0; +} + +static int +string_writer_put_int32(string_writer_t *w, unsigned int v) +{ + if (string_writer_ensure(w, 4)) + return -1; + w->buf[w->n] = v & 0xff; + w->buf[w->n + 1] = (v >> 8) & 0xff; + w->buf[w->n + 2] = (v >> 16) & 0xff; + w->buf[w->n + 3] = (v >> 24) & 0xff; + w->n += 4; + return 0; +} + +static int +string_writer_put_intp(string_writer_t *w, npy_intp v) +{ + if (string_writer_ensure(w, NPY_SIZEOF_PY_INTPTR_T)) + return -1; + w->buf[w->n] = v & 0xff; + w->buf[w->n + 1] = (v >> 8) & 0xff; + w->buf[w->n + 2] = (v >> 16) & 0xff; + w->buf[w->n + 3] = (v >> 24) & 0xff; +#if NPY_SIZEOF_PY_INTPTR_T == 8 + w->buf[w->n + 4] = (v >> 32) & 0xff; + w->buf[w->n + 5] = (v >> 40) & 0xff; + w->buf[w->n + 6] = (v >> 48) & 0xff; + w->buf[w->n + 7] = (v >> 56) & 0xff; +#endif + w->n += NPY_SIZEOF_PY_INTPTR_T; + return 0; +} + +static int +string_writer_put_string(string_writer_t *w, const char *s) +{ + if (s == NULL) { + return string_writer_put_char(w, 0); + } + else { + size_t N = strlen(s) + 1; + if (string_writer_ensure(w, N)) + return -1; + memcpy(w->buf + w->n, s, N); + w->n += N; + return 0; + } +} + +enum opcode { + OP_START_TUPLE = '(', + OP_END_TUPLE = ')', + OP_INT = 'i', + OP_FLOAT = 'f', + OP_COMPLEX = 'c', + OP_BOOL = '?', + OP_OMITTED = '!', + + OP_BYTEARRAY = 'a', + OP_BYTES = 'b', + OP_NONE = 'n', + OP_LIST = '[', + OP_SET = '{', + + OP_BUFFER = 'B', + OP_NP_SCALAR = 'S', + OP_NP_ARRAY = 'A', + OP_NP_DTYPE = 'D' +}; + +#define TRY(func, w, arg) \ + do { \ + if (func(w, arg)) return -1; \ + } while (0) + + +static int +fingerprint_unrecognized(void) +{ + PyErr_SetString(PyExc_NotImplementedError, + "cannot compute type fingerprint for value"); + return -1; +} + +static int +compute_dtype_fingerprint(string_writer_t *w, PyArray_Descr *descr) +{ + int typenum = descr->type_num; + if (typenum < NPY_OBJECT) + return string_writer_put_char(w, (char) typenum); + if (typenum == NPY_VOID) { + /* Structured dtype: serialize the dtype pointer. Unfortunately, + * some structured dtypes can be ephemeral, so we have to + * intern them to avoid pointer reuse and fingerprint collisions. + * (e.g. np.recarray(dtype=some_dtype) creates a new dtype + * equal to some_dtype) + */ + PyObject *interned = PyDict_GetItem(structured_dtypes, + (PyObject *) descr); + if (interned == NULL) { + interned = (PyObject *) descr; + if (PyDict_SetItem(structured_dtypes, interned, interned)) + return -1; + } + TRY(string_writer_put_char, w, (char) typenum); + return string_writer_put_intp(w, (npy_intp) interned); + } +#if NPY_API_VERSION >= 0x00000007 + if (PyTypeNum_ISDATETIME(typenum)) { + PyArray_DatetimeMetaData *md; + md = &(((PyArray_DatetimeDTypeMetaData *)descr->c_metadata)->meta); + TRY(string_writer_put_char, w, (char) typenum); + TRY(string_writer_put_char, w, (char) md->base); + return string_writer_put_int32(w, (char) md->num); + } +#endif + + return fingerprint_unrecognized(); +} + +static int +compute_fingerprint(string_writer_t *w, PyObject *val) +{ + /* + * Implementation note: for performance, we start with common + * types that can be tested with fast checks. + */ + if (val == Py_None) + return string_writer_put_char(w, OP_NONE); + if (PyBool_Check(val)) + return string_writer_put_char(w, OP_BOOL); + /* Note we avoid matching int subclasses such as IntEnum */ + if (PyInt_CheckExact(val) || PyLong_CheckExact(val)) + return string_writer_put_char(w, OP_INT); + if (PyFloat_Check(val)) + return string_writer_put_char(w, OP_FLOAT); + if (PyComplex_CheckExact(val)) + return string_writer_put_char(w, OP_COMPLEX); + if (PyTuple_Check(val)) { + if(PyTuple_CheckExact(val)) { + Py_ssize_t i, n; + n = PyTuple_GET_SIZE(val); + TRY(string_writer_put_char, w, OP_START_TUPLE); + for (i = 0; i < n; i++) + TRY(compute_fingerprint, w, PyTuple_GET_ITEM(val, i)); + TRY(string_writer_put_char, w, OP_END_TUPLE); + return 0; + } + /* as per typeof.py, check "_asdict" for namedtuple. */ + else if(PyObject_HasAttrString(val, "_asdict")) + { + /* + * This encodes the class name and field names of a namedtuple into + * the fingerprint on the condition that the number of fields is + * small (<10) and that the class name and field names are encodable + * as ASCII. + */ + PyObject * clazz = NULL; + PyObject * name = NULL; + PyObject * _fields = PyObject_GetAttrString(val, "_fields"); + PyObject * field = NULL; + PyObject * ascii_str = NULL; + Py_ssize_t i, n, j, flen; + char * buf = NULL; + int ret; + + clazz = PyObject_GetAttrString(val, "__class__"); + if (clazz == NULL) + return -1; + + name = PyObject_GetAttrString(clazz, "__name__"); + Py_DECREF(clazz); + if (name == NULL) + return -1; + + ascii_str = PyUnicode_AsEncodedString(name, "ascii", "ignore"); + Py_DECREF(name); + if (ascii_str == NULL) + return -1; + ret = PyBytes_AsStringAndSize(ascii_str, &buf, &flen); + + if (ret == -1) + return -1; + for(j = 0; j < flen; j++) { + TRY(string_writer_put_char, w, buf[j]); + } + Py_DECREF(ascii_str); + + if (_fields == NULL) + return -1; + + n = PyTuple_GET_SIZE(val); + + TRY(string_writer_put_char, w, OP_START_TUPLE); + for (i = 0; i < n; i++) { + field = PyTuple_GET_ITEM(_fields, i); + if (field == NULL) + return -1; + ascii_str = PyUnicode_AsEncodedString(field, "ascii", "ignore"); + if (ascii_str == NULL) + return -1; + ret = PyBytes_AsStringAndSize(ascii_str, &buf, &flen); + if (ret == -1) + return -1; + for(j = 0; j < flen; j++) { + TRY(string_writer_put_char, w, buf[j]); + } + Py_DECREF(ascii_str); + TRY(compute_fingerprint, w, PyTuple_GET_ITEM(val, i)); + } + TRY(string_writer_put_char, w, OP_END_TUPLE); + Py_DECREF(_fields); + return 0; + } + } + if (PyBytes_Check(val)) + return string_writer_put_char(w, OP_BYTES); + if (PyByteArray_Check(val)) + return string_writer_put_char(w, OP_BYTEARRAY); + if ((PyObject *) Py_TYPE(val) == omittedarg_type) { + PyObject *default_val = PyObject_GetAttr(val, str_value); + if (default_val == NULL) + return -1; + TRY(string_writer_put_char, w, OP_OMITTED); + TRY(compute_fingerprint, w, default_val); + Py_DECREF(default_val); + return 0; + } + if (PyArray_IsScalar(val, Generic)) { + /* Note: PyArray_DescrFromScalar() may be a bit slow on + non-trivial types. */ + PyArray_Descr *descr = PyArray_DescrFromScalar(val); + if (descr == NULL) + return -1; + TRY(string_writer_put_char, w, OP_NP_SCALAR); + TRY(compute_dtype_fingerprint, w, descr); + Py_DECREF(descr); + return 0; + } + if (PyArray_Check(val)) { + PyArrayObject *ary = (PyArrayObject *) val; + int ndim = PyArray_NDIM(ary); + + TRY(string_writer_put_char, w, OP_NP_ARRAY); + TRY(string_writer_put_int32, w, ndim); + if (PyArray_IS_C_CONTIGUOUS(ary)) + TRY(string_writer_put_char, w, 'C'); + else if (PyArray_IS_F_CONTIGUOUS(ary)) + TRY(string_writer_put_char, w, 'F'); + else + TRY(string_writer_put_char, w, 'A'); + if (PyArray_ISWRITEABLE(ary)) + TRY(string_writer_put_char, w, 'W'); + else + TRY(string_writer_put_char, w, 'R'); + return compute_dtype_fingerprint(w, PyArray_DESCR(ary)); + } + if (PyList_Check(val)) { + Py_ssize_t n = PyList_GET_SIZE(val); + if (n == 0) { + PyErr_SetString(PyExc_ValueError, + "cannot compute fingerprint of empty list"); + return -1; + } + /* Only the first item is considered, as in typeof.py */ + TRY(string_writer_put_char, w, OP_LIST); + TRY(compute_fingerprint, w, PyList_GET_ITEM(val, 0)); + return 0; + } + /* Note we only accept sets, not frozensets */ + if (Py_TYPE(val) == &PySet_Type) { + Py_hash_t h; + PyObject *item; + Py_ssize_t pos = 0; + /* Only one item is considered, as in typeof.py */ + if (!_PySet_NextEntry(val, &pos, &item, &h)) { + /* Empty set */ + PyErr_SetString(PyExc_ValueError, + "cannot compute fingerprint of empty set"); + return -1; + } + TRY(string_writer_put_char, w, OP_SET); + TRY(compute_fingerprint, w, item); + return 0; + } + if (PyObject_CheckBuffer(val)) { + Py_buffer buf; + int flags = PyBUF_ND | PyBUF_STRIDES | PyBUF_FORMAT; + char contig; + int ndim; + char readonly; + + /* Attempt to get a writable buffer, then fallback on read-only */ + if (PyObject_GetBuffer(val, &buf, flags | PyBUF_WRITABLE)) { + PyErr_Clear(); + if (PyObject_GetBuffer(val, &buf, flags)) + goto _unrecognized; + } + if (PyBuffer_IsContiguous(&buf, 'C')) + contig = 'C'; + else if (PyBuffer_IsContiguous(&buf, 'F')) + contig = 'F'; + else + contig = 'A'; + ndim = buf.ndim; + readonly = buf.readonly ? 'R' : 'W'; + if (string_writer_put_char(w, OP_BUFFER) || + string_writer_put_int32(w, ndim) || + string_writer_put_char(w, contig) || + string_writer_put_char(w, readonly) || + string_writer_put_string(w, buf.format) || + /* We serialize the object's Python type as well, to + distinguish between types which have Numba specializations + (e.g. array.array() vs. memoryview) + */ + string_writer_put_intp(w, (npy_intp) Py_TYPE(val))) { + PyBuffer_Release(&buf); + return -1; + } + PyBuffer_Release(&buf); + return 0; + } + if (NUMBA_PyArray_DescrCheck(val)) { + TRY(string_writer_put_char, w, OP_NP_DTYPE); + return compute_dtype_fingerprint(w, (PyArray_Descr *) val); + } + +_unrecognized: + /* Type not recognized */ + return fingerprint_unrecognized(); +} + +PyObject * +typeof_compute_fingerprint(PyObject *val) +{ + PyObject *res; + string_writer_t w; + + string_writer_init(&w); + + if (compute_fingerprint(&w, val)) + goto error; + res = PyBytes_FromStringAndSize(w.buf, w.n); + + string_writer_clear(&w); + return res; + +error: + string_writer_clear(&w); + return NULL; +} + +/* + * Getting the typecode from a Type object. + */ +static int +_typecode_from_type_object(PyObject *tyobj) { + int typecode; + PyObject *tmpcode = PyObject_GetAttrString(tyobj, "_code"); + if (tmpcode == NULL) { + return -1; + } + typecode = PyLong_AsLong(tmpcode); + Py_DECREF(tmpcode); + return typecode; +} + +/* When we want to cache the type's typecode for later lookup, we need to + keep a reference to the returned type object so that it cannot be + deleted. This is because of the following events occurring when first + using a @jit function for a given set of types: + + 1. typecode_fallback requests a new typecode for an arbitrary Python value; + this implies creating a Numba type object (on the first dispatcher call); + the typecode cache is then populated. + 2. matching of the typecode list in _dispatcherimpl.cpp fails, since the + typecode is new. + 3. we have to compile: compile_and_invoke() is called, it will invoke + Dispatcher_Insert to register the new signature. + + The reference to the Numba type object returned in step 1 is deleted as + soon as we call Py_DECREF() on it, since we are holding the only + reference. If this happens and we use the typecode we got to populate the + cache, then the cache won't ever return the correct typecode, and the + dispatcher will never successfully match the typecodes with those of + some already-compiled instance. So we need to make sure that we don't + call Py_DECREF() on objects whose typecode will be used to populate the + cache. This is ensured by calling _typecode_fallback with + retain_reference == 0. + + Note that technically we are leaking the reference, since we do not continue + to hold a pointer to the type object that we get back from typeof_pyval. + However, we don't need to refer to it again, we just need to make sure that + it is never deleted. +*/ +static int +_typecode_fallback(PyObject *dispatcher, PyObject *val, + int retain_reference) { + PyObject *numba_type; + int typecode; + + /* + * For values that define "_numba_type_", which holds a numba Type + * instance that should be used as the type of the value. + * Note this is done here, not in typeof_typecode(), so that + * some values can still benefit from fingerprint caching. + */ + if (PyObject_HasAttr(val, str_numba_type)) { + numba_type = PyObject_GetAttrString(val, "_numba_type_"); + if (!numba_type) + return -1; + } + else { + // Go back to the interpreter + numba_type = PyObject_CallMethodObjArgs((PyObject *) dispatcher, + str_typeof_pyval, val, NULL); + } + if (!numba_type) + return -1; + typecode = _typecode_from_type_object(numba_type); + if (!retain_reference) + Py_DECREF(numba_type); + return typecode; +} + +/* Variations on _typecode_fallback for convenience */ + +static +int typecode_fallback(PyObject *dispatcher, PyObject *val) { + return _typecode_fallback(dispatcher, val, 0); +} + +static +int typecode_fallback_keep_ref(PyObject *dispatcher, PyObject *val) { + return _typecode_fallback(dispatcher, val, 1); +} + + +/* A cache mapping fingerprints (string_writer_t *) to typecodes (int). */ +static _Numba_hashtable_t *fingerprint_hashtable = NULL; + +static Py_uhash_t +hash_writer(const void *key) +{ + string_writer_t *writer = (string_writer_t *) key; + Py_uhash_t x = 0; + + /* The old FNV algorithm used by Python 2 */ + if (writer->n > 0) { + unsigned char *p = (unsigned char *) writer->buf; + Py_ssize_t len = writer->n; + x ^= *p << 7; + while (--len >= 0) + x = (1000003*x) ^ *p++; + x ^= writer->n; + if (x == (Py_uhash_t) -1) + x = -2; + } + return x; +} + +static int +compare_writer(const void *key, const _Numba_hashtable_entry_t *entry) +{ + string_writer_t *v = (string_writer_t *) key; + string_writer_t *w = (string_writer_t *) entry->key; + if (v->n != w->n) + return 0; + return memcmp(v->buf, w->buf, v->n) == 0; +} + +/* Try to compute *val*'s typecode using its fingerprint and the + * fingerprint->typecode cache. + */ +static int +typecode_using_fingerprint(PyObject *dispatcher, PyObject *val) +{ + int typecode; + string_writer_t w; + + string_writer_init(&w); + + if (compute_fingerprint(&w, val)) { + string_writer_clear(&w); + if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) { + /* Can't compute a type fingerprint for the given value, + fall back on typeof() without caching. */ + PyErr_Clear(); + return typecode_fallback(dispatcher, val); + } + return -1; + } + if (_Numba_HASHTABLE_GET(fingerprint_hashtable, &w, typecode) > 0) { + /* Cache hit */ + string_writer_clear(&w); + return typecode; + } + + /* Not found in cache: invoke pure Python typeof() and cache result. + * Note we have to keep the type alive forever as explained + * above in _typecode_fallback(). + */ + typecode = typecode_fallback_keep_ref(dispatcher, val); + if (typecode >= 0) { + string_writer_t *key = (string_writer_t *) malloc(sizeof(string_writer_t)); + if (key == NULL) { + string_writer_clear(&w); + PyErr_NoMemory(); + return -1; + } + /* Ownership of the string writer's buffer will be transferred + * to the hash table. + */ + string_writer_move(key, &w); + if (_Numba_HASHTABLE_SET(fingerprint_hashtable, key, typecode)) { + string_writer_clear(&w); + PyErr_NoMemory(); + return -1; + } + } + return typecode; +} + + +/* + * Direct lookup table for extra-fast typecode resolution of simple array types. + */ + +#define N_DTYPES 12 +#define N_NDIM 5 /* Fast path for up to 5D array */ +#define N_LAYOUT 3 +static int cached_arycode[N_NDIM][N_LAYOUT][N_DTYPES]; + +/* Convert a Numpy dtype number to an internal index into cached_arycode. + The returned value must also be a valid index into BASIC_TYPECODES. */ +static int dtype_num_to_typecode(int type_num) { + int dtype; + switch(type_num) { + case NPY_INT8: + dtype = 0; + break; + case NPY_INT16: + dtype = 1; + break; + case NPY_INT32: + dtype = 2; + break; + case NPY_INT64: + dtype = 3; + break; + case NPY_UINT8: + dtype = 4; + break; + case NPY_UINT16: + dtype = 5; + break; + case NPY_UINT32: + dtype = 6; + break; + case NPY_UINT64: + dtype = 7; + break; + case NPY_FLOAT32: + dtype = 8; + break; + case NPY_FLOAT64: + dtype = 9; + break; + case NPY_COMPLEX64: + dtype = 10; + break; + case NPY_COMPLEX128: + dtype = 11; + break; + default: + /* Type not included in the global lookup table */ + dtype = -1; + } + return dtype; +} + +static +int get_cached_typecode(PyArray_Descr* descr) { + PyObject* tmpobject = PyDict_GetItem(typecache, (PyObject*)descr); + if (tmpobject == NULL) + return -1; + + return PyLong_AsLong(tmpobject); +} + +static +void cache_typecode(PyArray_Descr* descr, int typecode) { + PyObject* value = PyLong_FromLong(typecode); + PyDict_SetItem(typecache, (PyObject*)descr, value); + Py_DECREF(value); +} + +static +PyObject* ndarray_key(int ndim, int layout, PyArray_Descr* descr) { + PyObject* tmpndim = PyLong_FromLong(ndim); + PyObject* tmplayout = PyLong_FromLong(layout); + PyObject* key = PyTuple_Pack(3, tmpndim, tmplayout, descr); + Py_DECREF(tmpndim); + Py_DECREF(tmplayout); + return key; +} + +static +int get_cached_ndarray_typecode(int ndim, int layout, PyArray_Descr* descr) { + PyObject* key = ndarray_key(ndim, layout, descr); + PyObject *tmpobject = PyDict_GetItem(ndarray_typecache, key); + if (tmpobject == NULL) + return -1; + + Py_DECREF(key); + return PyLong_AsLong(tmpobject); +} + +static +void cache_ndarray_typecode(int ndim, int layout, PyArray_Descr* descr, + int typecode) { + PyObject* key = ndarray_key(ndim, layout, descr); + PyObject* value = PyLong_FromLong(typecode); + PyDict_SetItem(ndarray_typecache, key, value); + Py_DECREF(key); + Py_DECREF(value); +} + +static +int typecode_ndarray(PyObject *dispatcher, PyArrayObject *ary) { + int typecode; + int dtype; + int ndim = PyArray_NDIM(ary); + int layout = 0; + + /* The order in which we check for the right contiguous-ness is important. + The order must match the order by numba.numpy_support.map_layout. + Further, only *contiguous-ness* is checked, not alignment, byte order or + write permissions. + */ + if (PyArray_IS_C_CONTIGUOUS(ary)){ + layout = 1; + } else if (PyArray_IS_F_CONTIGUOUS(ary)) { + layout = 2; + } + + /* the typecode cache by convention is for "behaved" arrays (aligned and + * writeable), all others must be forced to the fall back */ + if (!PyArray_ISBEHAVED(ary)) goto FALLBACK; + + if (ndim <= 0 || ndim > N_NDIM) goto FALLBACK; + + dtype = dtype_num_to_typecode(PyArray_TYPE(ary)); + if (dtype == -1) goto FALLBACK; + + /* Fast path, using direct table lookup */ + assert(layout < N_LAYOUT); + assert(ndim <= N_NDIM); + assert(dtype < N_DTYPES); + + typecode = cached_arycode[ndim - 1][layout][dtype]; + if (typecode == -1) { + /* First use of this table entry, so it requires populating */ + typecode = typecode_fallback_keep_ref(dispatcher, (PyObject*)ary); + cached_arycode[ndim - 1][layout][dtype] = typecode; + } + return typecode; + +FALLBACK: + /* Slower path, for non-trivial array types */ + + /* If this isn't a structured array then we can't use the cache */ + if (PyArray_TYPE(ary) != NPY_VOID) + return typecode_using_fingerprint(dispatcher, (PyObject *) ary); + + /* Check type cache */ + typecode = get_cached_ndarray_typecode(ndim, layout, PyArray_DESCR(ary)); + if (typecode == -1) { + /* First use of this type, use fallback and populate the cache */ + typecode = typecode_fallback_keep_ref(dispatcher, (PyObject*)ary); + cache_ndarray_typecode(ndim, layout, PyArray_DESCR(ary), typecode); + } + return typecode; +} + +static +int typecode_arrayscalar(PyObject *dispatcher, PyObject* aryscalar) { + int typecode; + PyArray_Descr *descr; + descr = PyArray_DescrFromScalar(aryscalar); + if (!descr) + return typecode_using_fingerprint(dispatcher, aryscalar); + + /* Is it a structured scalar? */ + if (descr->type_num == NPY_VOID) { + typecode = get_cached_typecode(descr); + if (typecode == -1) { + /* Resolve through fallback then populate cache */ + typecode = typecode_fallback_keep_ref(dispatcher, aryscalar); + cache_typecode(descr, typecode); + } + Py_DECREF(descr); + return typecode; + } + + /* Is it one of the well-known basic types? */ + typecode = dtype_num_to_typecode(descr->type_num); + Py_DECREF(descr); + if (typecode == -1) + return typecode_using_fingerprint(dispatcher, aryscalar); + return BASIC_TYPECODES[typecode]; +} + +static +int typecode_devicendarray(PyObject *dispatcher, PyObject *ary) +{ + int typecode; + int dtype; + int ndim; + int layout = 0; + + PyObject* flags = PyObject_GetAttrString(ary, "flags"); + if (flags == NULL) + { + PyErr_Clear(); + goto FALLBACK; + } + + if (PyDict_GetItemString(flags, "C_CONTIGUOUS") == Py_True) { + layout = 1; + } else if (PyDict_GetItemString(flags, "F_CONTIGUOUS") == Py_True) { + layout = 2; + } + + Py_DECREF(flags); + + PyObject *ndim_obj = PyObject_GetAttrString(ary, "ndim"); + if (ndim_obj == NULL) { + /* If there's no ndim, try to proceed by clearing the error and using the + * fallback. */ + PyErr_Clear(); + goto FALLBACK; + } + + ndim = PyLong_AsLong(ndim_obj); + Py_DECREF(ndim_obj); + + if (PyErr_Occurred()) { + /* ndim wasn't an integer for some reason - unlikely to happen, but try + * the fallback. */ + PyErr_Clear(); + goto FALLBACK; + } + + if (ndim <= 0 || ndim > N_NDIM) + goto FALLBACK; + + PyObject* dtype_obj = PyObject_GetAttrString(ary, "dtype"); + if (dtype_obj == NULL) { + /* No dtype: try the fallback. */ + PyErr_Clear(); + goto FALLBACK; + } + + PyObject* num_obj = PyObject_GetAttrString(dtype_obj, "num"); + Py_DECREF(dtype_obj); + + if (num_obj == NULL) { + /* This strange dtype has no num - try the fallback. */ + PyErr_Clear(); + goto FALLBACK; + } + + int dtype_num = PyLong_AsLong(num_obj); + Py_DECREF(num_obj); + + if (PyErr_Occurred()) { + /* num wasn't an integer for some reason - unlikely to happen, but try + * the fallback. */ + PyErr_Clear(); + goto FALLBACK; + } + + dtype = dtype_num_to_typecode(dtype_num); + if (dtype == -1) { + /* Not a dtype we have in the global lookup table. */ + goto FALLBACK; + } + + /* Fast path, using direct table lookup */ + assert(layout < N_LAYOUT); + assert(ndim <= N_NDIM); + assert(dtype < N_DTYPES); + typecode = cached_arycode[ndim - 1][layout][dtype]; + + if (typecode == -1) { + /* First use of this table entry, so it requires populating */ + typecode = typecode_fallback_keep_ref(dispatcher, (PyObject*)ary); + cached_arycode[ndim - 1][layout][dtype] = typecode; + } + + return typecode; + +FALLBACK: + /* Slower path, for non-trivial array types. At present this always uses + the fingerprinting to get the typecode. Future optimization might + implement a cache, but this would require some fast equivalent of + PyArray_DESCR for a device array. */ + + return typecode_using_fingerprint(dispatcher, (PyObject *) ary); +} + +int +typeof_typecode(PyObject *dispatcher, PyObject *val) +{ + PyTypeObject *tyobj = Py_TYPE(val); + int subtype_attr; + /* This needs to be kept in sync with Dispatcher.typeof_pyval(), + * otherwise funny things may happen. + */ + if (tyobj == &PyInt_Type || tyobj == &PyLong_Type) { +#if SIZEOF_VOID_P < 8 + /* On 32-bit platforms, choose between tc_intp (32-bit) and tc_int64 */ + PY_LONG_LONG ll = PyLong_AsLongLong(val); + if (ll == -1 && PyErr_Occurred()) { + /* The integer is too large, let us truncate it */ + PyErr_Clear(); + return tc_int64; + } + if ((ll & 0xffffffff) != ll) + return tc_int64; +#endif + return tc_intp; + } + else if (tyobj == &PyFloat_Type) + return tc_float64; + else if (tyobj == &PyComplex_Type) + return tc_complex128; + /* Array scalar handling */ + else if (PyArray_CheckScalar(val)) { + return typecode_arrayscalar(dispatcher, val); + } + /* Array handling */ + else if (tyobj == &PyArray_Type) { + return typecode_ndarray(dispatcher, (PyArrayObject*)val); + } + /* Subtype of CUDA device array */ + else if (PyType_IsSubtype(tyobj, &DeviceArrayType)) { + return typecode_devicendarray(dispatcher, val); + } + /* Subtypes of Array handling */ + else if (PyType_IsSubtype(tyobj, &PyArray_Type)) { + /* By default, Numba will treat all numpy.ndarray subtypes as if they + were the base numpy.ndarray type. In this way, ndarray subtypes + can easily use all of the support that Numba has for ndarray + methods. + EXPERIMENTAL: There may be cases where a programmer would NOT want + ndarray subtypes to be treated exactly like the base numpy.ndarray. + For this purpose, a currently experimental feature allows a + programmer to add an attribute named + __numba_array_subtype_dispatch__ to their ndarray subtype. This + attribute can have any value as Numba only checks for the presence + of the attribute and not its value. When present, a ndarray subtype + will NOT be typed by Numba as a regular ndarray but this code will + fallthrough to the typecode_using_fingerprint call, which will + create a new unique Numba typecode for this ndarray subtype. This + behavior has several significant effects. First, since this + ndarray subtype will be treated as a different type by Numba, + the Numba dispatcher would then specialize on this type. So, if + there was a function that had several parameters that were + expected to be either numpy.ndarray or a subtype of ndarray, then + Numba would compile a custom version of this function for each + combination of base and subtypes that were actually passed to the + function. Second, because this subtype would now be treated as + a totally separate type, it will cease to function in Numba unless + an implementation of that type is provided to Numba through the + Numba type extension mechanisms (e.g., overload). This would + typically start with defining a Numba type corresponding to the + ndarray subtype. This is the same concept as how Numba has a + corollary of numpy.ndarray in its type system as types.Array. + Next, one would typically defining boxing and unboxing routines + and the associated memory model. Then, overloads for NumPy + functions on that type would be created. However, + if the same default array memory model is used then there are tricks + one can do to look at Numba's internal types.Array registries and + to quickly apply those to the subtype as well. In this manner, + only those cases where the base ndarray and the ndarray subtype + behavior differ would new custom functions need to be written for + the subtype. Finally, + after adding support for the new type, you would have a separate + ndarray subtype that could operate with other objects of the same + subtype but would not support interoperation with regular NumPy + ndarrays. In standard Python, this interoperation is provided + through the __array_ufunc__ magic method in the ndarray subtype + class and in that case the function operates on ndarrays or their + subtypes. This idea is extended into Numba such that + __array_ufunc__ can be present in a Numba array type object. + In this case, this function is consulted during Numba typing and + so the arguments to __array_ufunc__ are Numba types instead of + ndarray subtypes. The array type __array_ufunc__ returns the + type of the output of the given ufunc. + */ + subtype_attr = PyObject_HasAttrString(val, "__numba_array_subtype_dispatch__"); + if (!subtype_attr) { + return typecode_ndarray(dispatcher, (PyArrayObject*)val); + } + } + + return typecode_using_fingerprint(dispatcher, val); +} + + +static +void* wrap_import_array(void) { + import_array(); /* import array returns NULL on failure */ + return (void*)1; +} + + +static +int init_numpy(void) { + return wrap_import_array() != NULL; +} + + +/* + * typeof_init(omittedarg_type, typecode_dict) + * (called from dispatcher.py to fill in missing information) + */ +PyObject * +typeof_init(PyObject *self, PyObject *args) +{ + PyObject *tmpobj; + PyObject *dict; + int index = 0; + + if (!PyArg_ParseTuple(args, "O!O!:typeof_init", + &PyType_Type, &omittedarg_type, + &PyDict_Type, &dict)) + return NULL; + + /* Initialize Numpy API */ + if ( ! init_numpy() ) { + return NULL; + } + + #define UNWRAP_TYPE(S) \ + if(!(tmpobj = PyDict_GetItemString(dict, #S))) return NULL; \ + else { tc_##S = PyLong_AsLong(tmpobj); \ + BASIC_TYPECODES[index++] = tc_##S; } + + UNWRAP_TYPE(int8) + UNWRAP_TYPE(int16) + UNWRAP_TYPE(int32) + UNWRAP_TYPE(int64) + + UNWRAP_TYPE(uint8) + UNWRAP_TYPE(uint16) + UNWRAP_TYPE(uint32) + UNWRAP_TYPE(uint64) + + UNWRAP_TYPE(float32) + UNWRAP_TYPE(float64) + + UNWRAP_TYPE(complex64) + UNWRAP_TYPE(complex128) + + switch(sizeof(void*)) { + case 4: + tc_intp = tc_int32; + break; + case 8: + tc_intp = tc_int64; + break; + default: + PyErr_SetString(PyExc_AssertionError, "sizeof(void*) != {4, 8}"); + return NULL; + } + + #undef UNWRAP_TYPE + + typecache = PyDict_New(); + ndarray_typecache = PyDict_New(); + structured_dtypes = PyDict_New(); + if (typecache == NULL || ndarray_typecache == NULL || + structured_dtypes == NULL) { + PyErr_SetString(PyExc_RuntimeError, "failed to create type cache"); + return NULL; + } + + fingerprint_hashtable = _Numba_hashtable_new(sizeof(int), + hash_writer, + compare_writer); + if (fingerprint_hashtable == NULL) { + PyErr_NoMemory(); + return NULL; + } + + /* initialize cached_arycode to all ones (in bits) */ + memset(cached_arycode, 0xFF, sizeof(cached_arycode)); + + str_typeof_pyval = PyString_InternFromString("typeof_pyval"); + str_value = PyString_InternFromString("value"); + str_numba_type = PyString_InternFromString("_numba_type_"); + if (!str_value || !str_typeof_pyval || !str_numba_type) + return NULL; + + Py_RETURN_NONE; +} diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_typeof.h b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_typeof.h new file mode 100644 index 000000000..6e0039b5f --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_typeof.h @@ -0,0 +1,16 @@ +#ifndef NUMBA_TYPEOF_H_ +#define NUMBA_TYPEOF_H_ + +#ifdef __cplusplus + extern "C" { +#endif + +extern PyObject *typeof_init(PyObject *self, PyObject *args); +extern int typeof_typecode(PyObject *dispatcher, PyObject *val); +extern PyObject *typeof_compute_fingerprint(PyObject *val); + +#ifdef __cplusplus + } +#endif + +#endif /* NUMBA_TYPEOF_H_ */ diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_unicodetype_db.h b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_unicodetype_db.h new file mode 100644 index 000000000..d4dca060d --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_unicodetype_db.h @@ -0,0 +1,6091 @@ +/* This file is from CPython: + * https://github.com/python/cpython/blob/3.7/Objects/unicodetype_db.h + * As of Commit SHA: 1d4b6ba19466aba0eb91c4ba01ba509acf18c723 + * + * Changes made include: + * - Renaming all functions and structures with a `numba` prefix to prevent + * collisions. + * + * NOTE: Numba devs, this may need updating from time to time as the unicode + * standard is updated. + */ + +#ifndef _UNICODETYPE_DB_H +#define _UNICODETYPE_DB_H + +/*Py_UCS4 definition from Include/unicodeobject.h */ +#define Py_UCS4 uint32_t + +typedef struct { + /* + These are either deltas to the character or offsets in + _PyUnicode_ExtendedCase. + */ + const int upper; + const int lower; + const int title; + /* Note if more flag space is needed, decimal and digit could be unified. */ + const unsigned char decimal; + const unsigned char digit; + const unsigned short flags; +} numba_PyUnicode_TypeRecord; + +/* -------------------------------------------------------------------------- */ +/* CPython unicodetype_db.h definitions start here */ +/* -------------------------------------------------------------------------- */ + +/* this file was generated by Tools/unicode/makeunicodedata.py 3.2 */ + +/* a list of unique character type descriptors */ +const numba_PyUnicode_TypeRecord numba_PyUnicode_TypeRecords[] = { + {0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 32}, + {0, 0, 0, 0, 0, 48}, + {0, 0, 0, 0, 0, 1056}, + {0, 0, 0, 0, 0, 1024}, + {0, 0, 0, 0, 0, 5120}, + {0, 0, 0, 0, 0, 3590}, + {0, 0, 0, 1, 1, 3590}, + {0, 0, 0, 2, 2, 3590}, + {0, 0, 0, 3, 3, 3590}, + {0, 0, 0, 4, 4, 3590}, + {0, 0, 0, 5, 5, 3590}, + {0, 0, 0, 6, 6, 3590}, + {0, 0, 0, 7, 7, 3590}, + {0, 0, 0, 8, 8, 3590}, + {0, 0, 0, 9, 9, 3590}, + {0, 32, 0, 0, 0, 10113}, + {0, 0, 0, 0, 0, 1536}, + {-32, 0, -32, 0, 0, 9993}, + {0, 0, 0, 0, 0, 9993}, + {0, 0, 0, 0, 0, 4096}, + {0, 0, 0, 0, 2, 3076}, + {0, 0, 0, 0, 3, 3076}, + {16777218, 17825792, 16777218, 0, 0, 26377}, + {0, 0, 0, 0, 0, 5632}, + {0, 0, 0, 0, 1, 3076}, + {0, 0, 0, 0, 0, 3072}, + {33554438, 18874371, 33554440, 0, 0, 26377}, + {121, 0, 121, 0, 0, 9993}, + {0, 1, 0, 0, 0, 10113}, + {-1, 0, -1, 0, 0, 9993}, + {16777228, 33554442, 16777228, 0, 0, 26497}, + {-232, 0, -232, 0, 0, 9993}, + {33554448, 18874381, 33554448, 0, 0, 26377}, + {0, -121, 0, 0, 0, 10113}, + {16777236, 17825810, 16777236, 0, 0, 26377}, + {195, 0, 195, 0, 0, 9993}, + {0, 210, 0, 0, 0, 10113}, + {0, 206, 0, 0, 0, 10113}, + {0, 205, 0, 0, 0, 10113}, + {0, 79, 0, 0, 0, 10113}, + {0, 202, 0, 0, 0, 10113}, + {0, 203, 0, 0, 0, 10113}, + {0, 207, 0, 0, 0, 10113}, + {97, 0, 97, 0, 0, 9993}, + {0, 211, 0, 0, 0, 10113}, + {0, 209, 0, 0, 0, 10113}, + {163, 0, 163, 0, 0, 9993}, + {0, 213, 0, 0, 0, 10113}, + {130, 0, 130, 0, 0, 9993}, + {0, 214, 0, 0, 0, 10113}, + {0, 218, 0, 0, 0, 10113}, + {0, 217, 0, 0, 0, 10113}, + {0, 219, 0, 0, 0, 10113}, + {0, 0, 0, 0, 0, 1793}, + {56, 0, 56, 0, 0, 9993}, + {0, 2, 1, 0, 0, 10113}, + {-1, 1, 0, 0, 0, 10049}, + {-2, 0, -1, 0, 0, 9993}, + {-79, 0, -79, 0, 0, 9993}, + {33554456, 18874389, 33554456, 0, 0, 26377}, + {0, -97, 0, 0, 0, 10113}, + {0, -56, 0, 0, 0, 10113}, + {0, -130, 0, 0, 0, 10113}, + {0, 10795, 0, 0, 0, 10113}, + {0, -163, 0, 0, 0, 10113}, + {0, 10792, 0, 0, 0, 10113}, + {10815, 0, 10815, 0, 0, 9993}, + {0, -195, 0, 0, 0, 10113}, + {0, 69, 0, 0, 0, 10113}, + {0, 71, 0, 0, 0, 10113}, + {10783, 0, 10783, 0, 0, 9993}, + {10780, 0, 10780, 0, 0, 9993}, + {10782, 0, 10782, 0, 0, 9993}, + {-210, 0, -210, 0, 0, 9993}, + {-206, 0, -206, 0, 0, 9993}, + {-205, 0, -205, 0, 0, 9993}, + {-202, 0, -202, 0, 0, 9993}, + {-203, 0, -203, 0, 0, 9993}, + {42319, 0, 42319, 0, 0, 9993}, + {42315, 0, 42315, 0, 0, 9993}, + {-207, 0, -207, 0, 0, 9993}, + {42280, 0, 42280, 0, 0, 9993}, + {42308, 0, 42308, 0, 0, 9993}, + {-209, 0, -209, 0, 0, 9993}, + {-211, 0, -211, 0, 0, 9993}, + {10743, 0, 10743, 0, 0, 9993}, + {42305, 0, 42305, 0, 0, 9993}, + {10749, 0, 10749, 0, 0, 9993}, + {-213, 0, -213, 0, 0, 9993}, + {-214, 0, -214, 0, 0, 9993}, + {10727, 0, 10727, 0, 0, 9993}, + {-218, 0, -218, 0, 0, 9993}, + {42282, 0, 42282, 0, 0, 9993}, + {-69, 0, -69, 0, 0, 9993}, + {-217, 0, -217, 0, 0, 9993}, + {-71, 0, -71, 0, 0, 9993}, + {-219, 0, -219, 0, 0, 9993}, + {42261, 0, 42261, 0, 0, 9993}, + {42258, 0, 42258, 0, 0, 9993}, + {0, 0, 0, 0, 0, 14089}, + {0, 0, 0, 0, 0, 5889}, + {16777244, 17825818, 16777244, 0, 0, 30216}, + {0, 0, 0, 0, 0, 13321}, + {0, 116, 0, 0, 0, 10113}, + {0, 38, 0, 0, 0, 10113}, + {0, 37, 0, 0, 0, 10113}, + {0, 64, 0, 0, 0, 10113}, + {0, 63, 0, 0, 0, 10113}, + {50331681, 19922973, 50331681, 0, 0, 26377}, + {-38, 0, -38, 0, 0, 9993}, + {-37, 0, -37, 0, 0, 9993}, + {50331688, 19922980, 50331688, 0, 0, 26377}, + {16777261, 17825835, 16777261, 0, 0, 26377}, + {-64, 0, -64, 0, 0, 9993}, + {-63, 0, -63, 0, 0, 9993}, + {0, 8, 0, 0, 0, 10113}, + {16777264, 17825838, 16777264, 0, 0, 26377}, + {16777267, 17825841, 16777267, 0, 0, 26377}, + {0, 0, 0, 0, 0, 10113}, + {16777270, 17825844, 16777270, 0, 0, 26377}, + {16777273, 17825847, 16777273, 0, 0, 26377}, + {-8, 0, -8, 0, 0, 9993}, + {16777276, 17825850, 16777276, 0, 0, 26377}, + {16777279, 17825853, 16777279, 0, 0, 26377}, + {7, 0, 7, 0, 0, 9993}, + {-116, 0, -116, 0, 0, 9993}, + {0, -60, 0, 0, 0, 10113}, + {16777282, 17825856, 16777282, 0, 0, 26377}, + {0, -7, 0, 0, 0, 10113}, + {0, 80, 0, 0, 0, 10113}, + {-80, 0, -80, 0, 0, 9993}, + {0, 15, 0, 0, 0, 10113}, + {-15, 0, -15, 0, 0, 9993}, + {0, 48, 0, 0, 0, 10113}, + {-48, 0, -48, 0, 0, 9993}, + {33554502, 18874435, 33554504, 0, 0, 26377}, + {0, 0, 0, 0, 0, 1537}, + {0, 7264, 0, 0, 0, 10113}, + {3008, 0, 0, 0, 0, 9993}, + {0, 0, 0, 0, 1, 3588}, + {0, 0, 0, 0, 2, 3588}, + {0, 0, 0, 0, 3, 3588}, + {0, 0, 0, 0, 4, 3588}, + {0, 0, 0, 0, 5, 3588}, + {0, 0, 0, 0, 6, 3588}, + {0, 0, 0, 0, 7, 3588}, + {0, 0, 0, 0, 8, 3588}, + {0, 0, 0, 0, 9, 3588}, + {16777292, 17825866, 16777292, 0, 0, 26497}, + {16777295, 17825869, 16777295, 0, 0, 26497}, + {16777298, 17825872, 16777298, 0, 0, 26497}, + {16777301, 17825875, 16777301, 0, 0, 26497}, + {16777304, 17825878, 16777304, 0, 0, 26497}, + {16777307, 17825881, 16777307, 0, 0, 26497}, + {16777310, 17825884, 16777310, 0, 0, 26497}, + {16777313, 17825887, 16777313, 0, 0, 26497}, + {16777316, 17825890, 16777316, 0, 0, 26497}, + {16777319, 17825893, 16777319, 0, 0, 26497}, + {16777322, 17825896, 16777322, 0, 0, 26497}, + {16777325, 17825899, 16777325, 0, 0, 26497}, + {16777328, 17825902, 16777328, 0, 0, 26497}, + {16777331, 17825905, 16777331, 0, 0, 26497}, + {16777334, 17825908, 16777334, 0, 0, 26497}, + {16777337, 17825911, 16777337, 0, 0, 26497}, + {16777340, 17825914, 16777340, 0, 0, 26497}, + {16777343, 17825917, 16777343, 0, 0, 26497}, + {16777346, 17825920, 16777346, 0, 0, 26497}, + {16777349, 17825923, 16777349, 0, 0, 26497}, + {16777352, 17825926, 16777352, 0, 0, 26497}, + {16777355, 17825929, 16777355, 0, 0, 26497}, + {16777358, 17825932, 16777358, 0, 0, 26497}, + {16777361, 17825935, 16777361, 0, 0, 26497}, + {16777364, 17825938, 16777364, 0, 0, 26497}, + {16777367, 17825941, 16777367, 0, 0, 26497}, + {16777370, 17825944, 16777370, 0, 0, 26497}, + {16777373, 17825947, 16777373, 0, 0, 26497}, + {16777376, 17825950, 16777376, 0, 0, 26497}, + {16777379, 17825953, 16777379, 0, 0, 26497}, + {16777382, 17825956, 16777382, 0, 0, 26497}, + {16777385, 17825959, 16777385, 0, 0, 26497}, + {16777388, 17825962, 16777388, 0, 0, 26497}, + {16777391, 17825965, 16777391, 0, 0, 26497}, + {16777394, 17825968, 16777394, 0, 0, 26497}, + {16777397, 17825971, 16777397, 0, 0, 26497}, + {16777400, 17825974, 16777400, 0, 0, 26497}, + {16777403, 17825977, 16777403, 0, 0, 26497}, + {16777406, 17825980, 16777406, 0, 0, 26497}, + {16777409, 17825983, 16777409, 0, 0, 26497}, + {16777412, 17825986, 16777412, 0, 0, 26497}, + {16777415, 17825989, 16777415, 0, 0, 26497}, + {16777418, 17825992, 16777418, 0, 0, 26497}, + {16777421, 17825995, 16777421, 0, 0, 26497}, + {16777424, 17825998, 16777424, 0, 0, 26497}, + {16777427, 17826001, 16777427, 0, 0, 26497}, + {16777430, 17826004, 16777430, 0, 0, 26497}, + {16777433, 17826007, 16777433, 0, 0, 26497}, + {16777436, 17826010, 16777436, 0, 0, 26497}, + {16777439, 17826013, 16777439, 0, 0, 26497}, + {16777442, 17826016, 16777442, 0, 0, 26497}, + {16777445, 17826019, 16777445, 0, 0, 26497}, + {16777448, 17826022, 16777448, 0, 0, 26497}, + {16777451, 17826025, 16777451, 0, 0, 26497}, + {16777454, 17826028, 16777454, 0, 0, 26497}, + {16777457, 17826031, 16777457, 0, 0, 26497}, + {16777460, 17826034, 16777460, 0, 0, 26497}, + {16777463, 17826037, 16777463, 0, 0, 26497}, + {16777466, 17826040, 16777466, 0, 0, 26497}, + {16777469, 17826043, 16777469, 0, 0, 26497}, + {16777472, 17826046, 16777472, 0, 0, 26497}, + {16777475, 17826049, 16777475, 0, 0, 26497}, + {16777478, 17826052, 16777478, 0, 0, 26497}, + {16777481, 17826055, 16777481, 0, 0, 26497}, + {16777484, 17826058, 16777484, 0, 0, 26497}, + {16777487, 17826061, 16777487, 0, 0, 26497}, + {16777490, 17826064, 16777490, 0, 0, 26497}, + {16777493, 17826067, 16777493, 0, 0, 26497}, + {16777496, 17826070, 16777496, 0, 0, 26497}, + {16777499, 17826073, 16777499, 0, 0, 26497}, + {16777502, 17826076, 16777502, 0, 0, 26497}, + {16777505, 17826079, 16777505, 0, 0, 26497}, + {16777508, 17826082, 16777508, 0, 0, 26497}, + {16777511, 17826085, 16777511, 0, 0, 26497}, + {16777514, 17826088, 16777514, 0, 0, 26497}, + {16777517, 17826091, 16777517, 0, 0, 26497}, + {16777520, 17826094, 16777520, 0, 0, 26497}, + {16777523, 17826097, 16777523, 0, 0, 26497}, + {16777526, 17826100, 16777526, 0, 0, 26497}, + {16777529, 17826103, 16777529, 0, 0, 26497}, + {16777532, 17826106, 16777532, 0, 0, 26497}, + {16777535, 17826109, 16777535, 0, 0, 26497}, + {16777538, 17826112, 16777538, 0, 0, 26497}, + {16777541, 17826115, 16777541, 0, 0, 26497}, + {16777544, 17826118, 16777544, 0, 0, 26497}, + {16777547, 17826121, 16777547, 0, 0, 26497}, + {16777550, 17826124, 16777550, 0, 0, 26377}, + {16777553, 17826127, 16777553, 0, 0, 26377}, + {16777556, 17826130, 16777556, 0, 0, 26377}, + {16777559, 17826133, 16777559, 0, 0, 26377}, + {16777562, 17826136, 16777562, 0, 0, 26377}, + {16777565, 17826139, 16777565, 0, 0, 26377}, + {0, 0, 0, 0, 0, 3840}, + {0, 0, 0, 0, 0, 5888}, + {16777568, 17826142, 16777568, 0, 0, 26377}, + {16777571, 17826145, 16777571, 0, 0, 26377}, + {16777574, 17826148, 16777574, 0, 0, 26377}, + {16777577, 17826151, 16777577, 0, 0, 26377}, + {16777580, 17826154, 16777580, 0, 0, 26377}, + {16777583, 17826157, 16777583, 0, 0, 26377}, + {16777586, 17826160, 16777586, 0, 0, 26377}, + {16777589, 17826163, 16777589, 0, 0, 26377}, + {16777592, 17826166, 16777592, 0, 0, 26377}, + {0, -3008, 0, 0, 0, 10113}, + {35332, 0, 35332, 0, 0, 9993}, + {3814, 0, 3814, 0, 0, 9993}, + {33554812, 18874745, 33554812, 0, 0, 26377}, + {33554817, 18874750, 33554817, 0, 0, 26377}, + {33554822, 18874755, 33554822, 0, 0, 26377}, + {33554827, 18874760, 33554827, 0, 0, 26377}, + {33554832, 18874765, 33554832, 0, 0, 26377}, + {16777620, 17826194, 16777620, 0, 0, 26377}, + {16777624, 18874773, 16777624, 0, 0, 26497}, + {8, 0, 8, 0, 0, 9993}, + {0, -8, 0, 0, 0, 10113}, + {33554844, 18874777, 33554844, 0, 0, 26377}, + {50332066, 19923358, 50332066, 0, 0, 26377}, + {50332073, 19923365, 50332073, 0, 0, 26377}, + {50332080, 19923372, 50332080, 0, 0, 26377}, + {74, 0, 74, 0, 0, 9993}, + {86, 0, 86, 0, 0, 9993}, + {100, 0, 100, 0, 0, 9993}, + {128, 0, 128, 0, 0, 9993}, + {112, 0, 112, 0, 0, 9993}, + {126, 0, 126, 0, 0, 9993}, + {33554870, 18874803, 16777656, 0, 0, 26377}, + {33554876, 18874809, 16777662, 0, 0, 26377}, + {33554882, 18874815, 16777668, 0, 0, 26377}, + {33554888, 18874821, 16777674, 0, 0, 26377}, + {33554894, 18874827, 16777680, 0, 0, 26377}, + {33554900, 18874833, 16777686, 0, 0, 26377}, + {33554906, 18874839, 16777692, 0, 0, 26377}, + {33554912, 18874845, 16777698, 0, 0, 26377}, + {33554918, 18874851, 16777704, 0, 0, 26433}, + {33554924, 18874857, 16777710, 0, 0, 26433}, + {33554930, 18874863, 16777716, 0, 0, 26433}, + {33554936, 18874869, 16777722, 0, 0, 26433}, + {33554942, 18874875, 16777728, 0, 0, 26433}, + {33554948, 18874881, 16777734, 0, 0, 26433}, + {33554954, 18874887, 16777740, 0, 0, 26433}, + {33554960, 18874893, 16777746, 0, 0, 26433}, + {33554966, 18874899, 16777752, 0, 0, 26377}, + {33554972, 18874905, 16777758, 0, 0, 26377}, + {33554978, 18874911, 16777764, 0, 0, 26377}, + {33554984, 18874917, 16777770, 0, 0, 26377}, + {33554990, 18874923, 16777776, 0, 0, 26377}, + {33554996, 18874929, 16777782, 0, 0, 26377}, + {33555002, 18874935, 16777788, 0, 0, 26377}, + {33555008, 18874941, 16777794, 0, 0, 26377}, + {33555014, 18874947, 16777800, 0, 0, 26433}, + {33555020, 18874953, 16777806, 0, 0, 26433}, + {33555026, 18874959, 16777812, 0, 0, 26433}, + {33555032, 18874965, 16777818, 0, 0, 26433}, + {33555038, 18874971, 16777824, 0, 0, 26433}, + {33555044, 18874977, 16777830, 0, 0, 26433}, + {33555050, 18874983, 16777836, 0, 0, 26433}, + {33555056, 18874989, 16777842, 0, 0, 26433}, + {33555062, 18874995, 16777848, 0, 0, 26377}, + {33555068, 18875001, 16777854, 0, 0, 26377}, + {33555074, 18875007, 16777860, 0, 0, 26377}, + {33555080, 18875013, 16777866, 0, 0, 26377}, + {33555086, 18875019, 16777872, 0, 0, 26377}, + {33555092, 18875025, 16777878, 0, 0, 26377}, + {33555098, 18875031, 16777884, 0, 0, 26377}, + {33555104, 18875037, 16777890, 0, 0, 26377}, + {33555110, 18875043, 16777896, 0, 0, 26433}, + {33555116, 18875049, 16777902, 0, 0, 26433}, + {33555122, 18875055, 16777908, 0, 0, 26433}, + {33555128, 18875061, 16777914, 0, 0, 26433}, + {33555134, 18875067, 16777920, 0, 0, 26433}, + {33555140, 18875073, 16777926, 0, 0, 26433}, + {33555146, 18875079, 16777932, 0, 0, 26433}, + {33555152, 18875085, 16777938, 0, 0, 26433}, + {33555158, 18875091, 33555160, 0, 0, 26377}, + {33555165, 18875098, 16777951, 0, 0, 26377}, + {33555171, 18875104, 33555173, 0, 0, 26377}, + {33555178, 18875111, 33555178, 0, 0, 26377}, + {50332400, 19923692, 50332403, 0, 0, 26377}, + {0, -74, 0, 0, 0, 10113}, + {33555193, 18875126, 16777979, 0, 0, 26433}, + {16777982, 17826556, 16777982, 0, 0, 26377}, + {33555202, 18875135, 33555204, 0, 0, 26377}, + {33555209, 18875142, 16777995, 0, 0, 26377}, + {33555215, 18875148, 33555217, 0, 0, 26377}, + {33555222, 18875155, 33555222, 0, 0, 26377}, + {50332444, 19923736, 50332447, 0, 0, 26377}, + {0, -86, 0, 0, 0, 10113}, + {33555237, 18875170, 16778023, 0, 0, 26433}, + {50332460, 19923752, 50332460, 0, 0, 26377}, + {50332467, 19923759, 50332467, 0, 0, 26377}, + {33555257, 18875190, 33555257, 0, 0, 26377}, + {50332479, 19923771, 50332479, 0, 0, 26377}, + {0, -100, 0, 0, 0, 10113}, + {50332486, 19923778, 50332486, 0, 0, 26377}, + {50332493, 19923785, 50332493, 0, 0, 26377}, + {33555283, 18875216, 33555283, 0, 0, 26377}, + {33555288, 18875221, 33555288, 0, 0, 26377}, + {50332510, 19923802, 50332510, 0, 0, 26377}, + {0, -112, 0, 0, 0, 10113}, + {33555300, 18875233, 33555302, 0, 0, 26377}, + {33555307, 18875240, 16778093, 0, 0, 26377}, + {33555313, 18875246, 33555315, 0, 0, 26377}, + {33555320, 18875253, 33555320, 0, 0, 26377}, + {50332542, 19923834, 50332545, 0, 0, 26377}, + {0, -128, 0, 0, 0, 10113}, + {0, -126, 0, 0, 0, 10113}, + {33555335, 18875268, 16778121, 0, 0, 26433}, + {0, 0, 0, 0, 0, 3076}, + {0, 0, 0, 0, 4, 3076}, + {0, 0, 0, 0, 5, 3076}, + {0, 0, 0, 0, 6, 3076}, + {0, 0, 0, 0, 7, 3076}, + {0, 0, 0, 0, 8, 3076}, + {0, 0, 0, 0, 9, 3076}, + {0, 0, 0, 0, 0, 1792}, + {0, -7517, 0, 0, 0, 10113}, + {0, -8383, 0, 0, 0, 10113}, + {0, -8262, 0, 0, 0, 10113}, + {0, 28, 0, 0, 0, 10113}, + {-28, 0, -28, 0, 0, 9993}, + {0, 16, 0, 0, 0, 12160}, + {-16, 0, -16, 0, 0, 12040}, + {0, 26, 0, 0, 0, 9344}, + {-26, 0, -26, 0, 0, 9224}, + {0, -10743, 0, 0, 0, 10113}, + {0, -3814, 0, 0, 0, 10113}, + {0, -10727, 0, 0, 0, 10113}, + {-10795, 0, -10795, 0, 0, 9993}, + {-10792, 0, -10792, 0, 0, 9993}, + {0, -10780, 0, 0, 0, 10113}, + {0, -10749, 0, 0, 0, 10113}, + {0, -10783, 0, 0, 0, 10113}, + {0, -10782, 0, 0, 0, 10113}, + {0, -10815, 0, 0, 0, 10113}, + {-7264, 0, -7264, 0, 0, 9993}, + {0, 0, 0, 0, 0, 5121}, + {0, 0, 0, 0, 0, 3841}, + {0, -35332, 0, 0, 0, 10113}, + {0, -42280, 0, 0, 0, 10113}, + {0, -42308, 0, 0, 0, 10113}, + {0, -42319, 0, 0, 0, 10113}, + {0, -42315, 0, 0, 0, 10113}, + {0, -42305, 0, 0, 0, 10113}, + {0, -42258, 0, 0, 0, 10113}, + {0, -42282, 0, 0, 0, 10113}, + {0, -42261, 0, 0, 0, 10113}, + {0, 928, 0, 0, 0, 10113}, + {-928, 0, -928, 0, 0, 9993}, + {16778124, 17826698, 16778124, 0, 0, 26377}, + {16778127, 17826701, 16778127, 0, 0, 26377}, + {16778130, 17826704, 16778130, 0, 0, 26377}, + {16778133, 17826707, 16778133, 0, 0, 26377}, + {16778136, 17826710, 16778136, 0, 0, 26377}, + {16778139, 17826713, 16778139, 0, 0, 26377}, + {16778142, 17826716, 16778142, 0, 0, 26377}, + {16778145, 17826719, 16778145, 0, 0, 26377}, + {16778148, 17826722, 16778148, 0, 0, 26377}, + {16778151, 17826725, 16778151, 0, 0, 26377}, + {16778154, 17826728, 16778154, 0, 0, 26377}, + {16778157, 17826731, 16778157, 0, 0, 26377}, + {16778160, 17826734, 16778160, 0, 0, 26377}, + {16778163, 17826737, 16778163, 0, 0, 26377}, + {16778166, 17826740, 16778166, 0, 0, 26377}, + {16778169, 17826743, 16778169, 0, 0, 26377}, + {16778172, 17826746, 16778172, 0, 0, 26377}, + {16778175, 17826749, 16778175, 0, 0, 26377}, + {16778178, 17826752, 16778178, 0, 0, 26377}, + {16778181, 17826755, 16778181, 0, 0, 26377}, + {16778184, 17826758, 16778184, 0, 0, 26377}, + {16778187, 17826761, 16778187, 0, 0, 26377}, + {16778190, 17826764, 16778190, 0, 0, 26377}, + {16778193, 17826767, 16778193, 0, 0, 26377}, + {16778196, 17826770, 16778196, 0, 0, 26377}, + {16778199, 17826773, 16778199, 0, 0, 26377}, + {16778202, 17826776, 16778202, 0, 0, 26377}, + {16778205, 17826779, 16778205, 0, 0, 26377}, + {16778208, 17826782, 16778208, 0, 0, 26377}, + {16778211, 17826785, 16778211, 0, 0, 26377}, + {16778214, 17826788, 16778214, 0, 0, 26377}, + {16778217, 17826791, 16778217, 0, 0, 26377}, + {16778220, 17826794, 16778220, 0, 0, 26377}, + {16778223, 17826797, 16778223, 0, 0, 26377}, + {16778226, 17826800, 16778226, 0, 0, 26377}, + {16778229, 17826803, 16778229, 0, 0, 26377}, + {16778232, 17826806, 16778232, 0, 0, 26377}, + {16778235, 17826809, 16778235, 0, 0, 26377}, + {16778238, 17826812, 16778238, 0, 0, 26377}, + {16778241, 17826815, 16778241, 0, 0, 26377}, + {16778244, 17826818, 16778244, 0, 0, 26377}, + {16778247, 17826821, 16778247, 0, 0, 26377}, + {16778250, 17826824, 16778250, 0, 0, 26377}, + {16778253, 17826827, 16778253, 0, 0, 26377}, + {16778256, 17826830, 16778256, 0, 0, 26377}, + {16778259, 17826833, 16778259, 0, 0, 26377}, + {16778262, 17826836, 16778262, 0, 0, 26377}, + {16778265, 17826839, 16778265, 0, 0, 26377}, + {16778268, 17826842, 16778268, 0, 0, 26377}, + {16778271, 17826845, 16778271, 0, 0, 26377}, + {16778274, 17826848, 16778274, 0, 0, 26377}, + {16778277, 17826851, 16778277, 0, 0, 26377}, + {16778280, 17826854, 16778280, 0, 0, 26377}, + {16778283, 17826857, 16778283, 0, 0, 26377}, + {16778286, 17826860, 16778286, 0, 0, 26377}, + {16778289, 17826863, 16778289, 0, 0, 26377}, + {16778292, 17826866, 16778292, 0, 0, 26377}, + {16778295, 17826869, 16778295, 0, 0, 26377}, + {16778298, 17826872, 16778298, 0, 0, 26377}, + {16778301, 17826875, 16778301, 0, 0, 26377}, + {16778304, 17826878, 16778304, 0, 0, 26377}, + {16778307, 17826881, 16778307, 0, 0, 26377}, + {16778310, 17826884, 16778310, 0, 0, 26377}, + {16778313, 17826887, 16778313, 0, 0, 26377}, + {16778316, 17826890, 16778316, 0, 0, 26377}, + {16778319, 17826893, 16778319, 0, 0, 26377}, + {16778322, 17826896, 16778322, 0, 0, 26377}, + {16778325, 17826899, 16778325, 0, 0, 26377}, + {16778328, 17826902, 16778328, 0, 0, 26377}, + {16778331, 17826905, 16778331, 0, 0, 26377}, + {16778334, 17826908, 16778334, 0, 0, 26377}, + {16778337, 17826911, 16778337, 0, 0, 26377}, + {16778340, 17826914, 16778340, 0, 0, 26377}, + {16778343, 17826917, 16778343, 0, 0, 26377}, + {16778346, 17826920, 16778346, 0, 0, 26377}, + {16778349, 17826923, 16778349, 0, 0, 26377}, + {16778352, 17826926, 16778352, 0, 0, 26377}, + {16778355, 17826929, 16778355, 0, 0, 26377}, + {16778358, 17826932, 16778358, 0, 0, 26377}, + {16778361, 17826935, 16778361, 0, 0, 26377}, + {33555581, 18875514, 33555583, 0, 0, 26377}, + {33555588, 18875521, 33555590, 0, 0, 26377}, + {33555595, 18875528, 33555597, 0, 0, 26377}, + {50332819, 19924111, 50332822, 0, 0, 26377}, + {50332829, 19924121, 50332832, 0, 0, 26377}, + {33555622, 18875555, 33555624, 0, 0, 26377}, + {33555629, 18875562, 33555631, 0, 0, 26377}, + {33555636, 18875569, 33555638, 0, 0, 26377}, + {33555643, 18875576, 33555645, 0, 0, 26377}, + {33555650, 18875583, 33555652, 0, 0, 26377}, + {33555657, 18875590, 33555659, 0, 0, 26377}, + {33555664, 18875597, 33555666, 0, 0, 26377}, + {0, 0, 0, 0, 0, 1025}, + {0, 0, 0, 0, 0, 5633}, + {0, 40, 0, 0, 0, 10113}, + {-40, 0, -40, 0, 0, 9993}, + {0, 34, 0, 0, 0, 10113}, + {-34, 0, -34, 0, 0, 9993}, + {0, 0, 0, 0, 0, 9344}, +}; + +/* extended case mappings */ + +const Py_UCS4 numba_PyUnicode_ExtendedCase[] = { + 181, + 956, + 924, + 223, + 115, + 115, + 83, + 83, + 83, + 115, + 105, + 775, + 304, + 329, + 700, + 110, + 700, + 78, + 383, + 115, + 83, + 496, + 106, + 780, + 74, + 780, + 837, + 953, + 921, + 912, + 953, + 776, + 769, + 921, + 776, + 769, + 944, + 965, + 776, + 769, + 933, + 776, + 769, + 962, + 963, + 931, + 976, + 946, + 914, + 977, + 952, + 920, + 981, + 966, + 934, + 982, + 960, + 928, + 1008, + 954, + 922, + 1009, + 961, + 929, + 1013, + 949, + 917, + 1415, + 1381, + 1410, + 1333, + 1362, + 1333, + 1410, + 43888, + 5024, + 5024, + 43889, + 5025, + 5025, + 43890, + 5026, + 5026, + 43891, + 5027, + 5027, + 43892, + 5028, + 5028, + 43893, + 5029, + 5029, + 43894, + 5030, + 5030, + 43895, + 5031, + 5031, + 43896, + 5032, + 5032, + 43897, + 5033, + 5033, + 43898, + 5034, + 5034, + 43899, + 5035, + 5035, + 43900, + 5036, + 5036, + 43901, + 5037, + 5037, + 43902, + 5038, + 5038, + 43903, + 5039, + 5039, + 43904, + 5040, + 5040, + 43905, + 5041, + 5041, + 43906, + 5042, + 5042, + 43907, + 5043, + 5043, + 43908, + 5044, + 5044, + 43909, + 5045, + 5045, + 43910, + 5046, + 5046, + 43911, + 5047, + 5047, + 43912, + 5048, + 5048, + 43913, + 5049, + 5049, + 43914, + 5050, + 5050, + 43915, + 5051, + 5051, + 43916, + 5052, + 5052, + 43917, + 5053, + 5053, + 43918, + 5054, + 5054, + 43919, + 5055, + 5055, + 43920, + 5056, + 5056, + 43921, + 5057, + 5057, + 43922, + 5058, + 5058, + 43923, + 5059, + 5059, + 43924, + 5060, + 5060, + 43925, + 5061, + 5061, + 43926, + 5062, + 5062, + 43927, + 5063, + 5063, + 43928, + 5064, + 5064, + 43929, + 5065, + 5065, + 43930, + 5066, + 5066, + 43931, + 5067, + 5067, + 43932, + 5068, + 5068, + 43933, + 5069, + 5069, + 43934, + 5070, + 5070, + 43935, + 5071, + 5071, + 43936, + 5072, + 5072, + 43937, + 5073, + 5073, + 43938, + 5074, + 5074, + 43939, + 5075, + 5075, + 43940, + 5076, + 5076, + 43941, + 5077, + 5077, + 43942, + 5078, + 5078, + 43943, + 5079, + 5079, + 43944, + 5080, + 5080, + 43945, + 5081, + 5081, + 43946, + 5082, + 5082, + 43947, + 5083, + 5083, + 43948, + 5084, + 5084, + 43949, + 5085, + 5085, + 43950, + 5086, + 5086, + 43951, + 5087, + 5087, + 43952, + 5088, + 5088, + 43953, + 5089, + 5089, + 43954, + 5090, + 5090, + 43955, + 5091, + 5091, + 43956, + 5092, + 5092, + 43957, + 5093, + 5093, + 43958, + 5094, + 5094, + 43959, + 5095, + 5095, + 43960, + 5096, + 5096, + 43961, + 5097, + 5097, + 43962, + 5098, + 5098, + 43963, + 5099, + 5099, + 43964, + 5100, + 5100, + 43965, + 5101, + 5101, + 43966, + 5102, + 5102, + 43967, + 5103, + 5103, + 5112, + 5104, + 5104, + 5113, + 5105, + 5105, + 5114, + 5106, + 5106, + 5115, + 5107, + 5107, + 5116, + 5108, + 5108, + 5117, + 5109, + 5109, + 5112, + 5104, + 5104, + 5113, + 5105, + 5105, + 5114, + 5106, + 5106, + 5115, + 5107, + 5107, + 5116, + 5108, + 5108, + 5117, + 5109, + 5109, + 7296, + 1074, + 1042, + 7297, + 1076, + 1044, + 7298, + 1086, + 1054, + 7299, + 1089, + 1057, + 7300, + 1090, + 1058, + 7301, + 1090, + 1058, + 7302, + 1098, + 1066, + 7303, + 1123, + 1122, + 7304, + 42571, + 42570, + 7830, + 104, + 817, + 72, + 817, + 7831, + 116, + 776, + 84, + 776, + 7832, + 119, + 778, + 87, + 778, + 7833, + 121, + 778, + 89, + 778, + 7834, + 97, + 702, + 65, + 702, + 7835, + 7777, + 7776, + 223, + 115, + 115, + 7838, + 8016, + 965, + 787, + 933, + 787, + 8018, + 965, + 787, + 768, + 933, + 787, + 768, + 8020, + 965, + 787, + 769, + 933, + 787, + 769, + 8022, + 965, + 787, + 834, + 933, + 787, + 834, + 8064, + 7936, + 953, + 7944, + 921, + 8072, + 8065, + 7937, + 953, + 7945, + 921, + 8073, + 8066, + 7938, + 953, + 7946, + 921, + 8074, + 8067, + 7939, + 953, + 7947, + 921, + 8075, + 8068, + 7940, + 953, + 7948, + 921, + 8076, + 8069, + 7941, + 953, + 7949, + 921, + 8077, + 8070, + 7942, + 953, + 7950, + 921, + 8078, + 8071, + 7943, + 953, + 7951, + 921, + 8079, + 8064, + 7936, + 953, + 7944, + 921, + 8072, + 8065, + 7937, + 953, + 7945, + 921, + 8073, + 8066, + 7938, + 953, + 7946, + 921, + 8074, + 8067, + 7939, + 953, + 7947, + 921, + 8075, + 8068, + 7940, + 953, + 7948, + 921, + 8076, + 8069, + 7941, + 953, + 7949, + 921, + 8077, + 8070, + 7942, + 953, + 7950, + 921, + 8078, + 8071, + 7943, + 953, + 7951, + 921, + 8079, + 8080, + 7968, + 953, + 7976, + 921, + 8088, + 8081, + 7969, + 953, + 7977, + 921, + 8089, + 8082, + 7970, + 953, + 7978, + 921, + 8090, + 8083, + 7971, + 953, + 7979, + 921, + 8091, + 8084, + 7972, + 953, + 7980, + 921, + 8092, + 8085, + 7973, + 953, + 7981, + 921, + 8093, + 8086, + 7974, + 953, + 7982, + 921, + 8094, + 8087, + 7975, + 953, + 7983, + 921, + 8095, + 8080, + 7968, + 953, + 7976, + 921, + 8088, + 8081, + 7969, + 953, + 7977, + 921, + 8089, + 8082, + 7970, + 953, + 7978, + 921, + 8090, + 8083, + 7971, + 953, + 7979, + 921, + 8091, + 8084, + 7972, + 953, + 7980, + 921, + 8092, + 8085, + 7973, + 953, + 7981, + 921, + 8093, + 8086, + 7974, + 953, + 7982, + 921, + 8094, + 8087, + 7975, + 953, + 7983, + 921, + 8095, + 8096, + 8032, + 953, + 8040, + 921, + 8104, + 8097, + 8033, + 953, + 8041, + 921, + 8105, + 8098, + 8034, + 953, + 8042, + 921, + 8106, + 8099, + 8035, + 953, + 8043, + 921, + 8107, + 8100, + 8036, + 953, + 8044, + 921, + 8108, + 8101, + 8037, + 953, + 8045, + 921, + 8109, + 8102, + 8038, + 953, + 8046, + 921, + 8110, + 8103, + 8039, + 953, + 8047, + 921, + 8111, + 8096, + 8032, + 953, + 8040, + 921, + 8104, + 8097, + 8033, + 953, + 8041, + 921, + 8105, + 8098, + 8034, + 953, + 8042, + 921, + 8106, + 8099, + 8035, + 953, + 8043, + 921, + 8107, + 8100, + 8036, + 953, + 8044, + 921, + 8108, + 8101, + 8037, + 953, + 8045, + 921, + 8109, + 8102, + 8038, + 953, + 8046, + 921, + 8110, + 8103, + 8039, + 953, + 8047, + 921, + 8111, + 8114, + 8048, + 953, + 8122, + 921, + 8122, + 837, + 8115, + 945, + 953, + 913, + 921, + 8124, + 8116, + 940, + 953, + 902, + 921, + 902, + 837, + 8118, + 945, + 834, + 913, + 834, + 8119, + 945, + 834, + 953, + 913, + 834, + 921, + 913, + 834, + 837, + 8115, + 945, + 953, + 913, + 921, + 8124, + 8126, + 953, + 921, + 8130, + 8052, + 953, + 8138, + 921, + 8138, + 837, + 8131, + 951, + 953, + 919, + 921, + 8140, + 8132, + 942, + 953, + 905, + 921, + 905, + 837, + 8134, + 951, + 834, + 919, + 834, + 8135, + 951, + 834, + 953, + 919, + 834, + 921, + 919, + 834, + 837, + 8131, + 951, + 953, + 919, + 921, + 8140, + 8146, + 953, + 776, + 768, + 921, + 776, + 768, + 8147, + 953, + 776, + 769, + 921, + 776, + 769, + 8150, + 953, + 834, + 921, + 834, + 8151, + 953, + 776, + 834, + 921, + 776, + 834, + 8162, + 965, + 776, + 768, + 933, + 776, + 768, + 8163, + 965, + 776, + 769, + 933, + 776, + 769, + 8164, + 961, + 787, + 929, + 787, + 8166, + 965, + 834, + 933, + 834, + 8167, + 965, + 776, + 834, + 933, + 776, + 834, + 8178, + 8060, + 953, + 8186, + 921, + 8186, + 837, + 8179, + 969, + 953, + 937, + 921, + 8188, + 8180, + 974, + 953, + 911, + 921, + 911, + 837, + 8182, + 969, + 834, + 937, + 834, + 8183, + 969, + 834, + 953, + 937, + 834, + 921, + 937, + 834, + 837, + 8179, + 969, + 953, + 937, + 921, + 8188, + 43888, + 5024, + 5024, + 43889, + 5025, + 5025, + 43890, + 5026, + 5026, + 43891, + 5027, + 5027, + 43892, + 5028, + 5028, + 43893, + 5029, + 5029, + 43894, + 5030, + 5030, + 43895, + 5031, + 5031, + 43896, + 5032, + 5032, + 43897, + 5033, + 5033, + 43898, + 5034, + 5034, + 43899, + 5035, + 5035, + 43900, + 5036, + 5036, + 43901, + 5037, + 5037, + 43902, + 5038, + 5038, + 43903, + 5039, + 5039, + 43904, + 5040, + 5040, + 43905, + 5041, + 5041, + 43906, + 5042, + 5042, + 43907, + 5043, + 5043, + 43908, + 5044, + 5044, + 43909, + 5045, + 5045, + 43910, + 5046, + 5046, + 43911, + 5047, + 5047, + 43912, + 5048, + 5048, + 43913, + 5049, + 5049, + 43914, + 5050, + 5050, + 43915, + 5051, + 5051, + 43916, + 5052, + 5052, + 43917, + 5053, + 5053, + 43918, + 5054, + 5054, + 43919, + 5055, + 5055, + 43920, + 5056, + 5056, + 43921, + 5057, + 5057, + 43922, + 5058, + 5058, + 43923, + 5059, + 5059, + 43924, + 5060, + 5060, + 43925, + 5061, + 5061, + 43926, + 5062, + 5062, + 43927, + 5063, + 5063, + 43928, + 5064, + 5064, + 43929, + 5065, + 5065, + 43930, + 5066, + 5066, + 43931, + 5067, + 5067, + 43932, + 5068, + 5068, + 43933, + 5069, + 5069, + 43934, + 5070, + 5070, + 43935, + 5071, + 5071, + 43936, + 5072, + 5072, + 43937, + 5073, + 5073, + 43938, + 5074, + 5074, + 43939, + 5075, + 5075, + 43940, + 5076, + 5076, + 43941, + 5077, + 5077, + 43942, + 5078, + 5078, + 43943, + 5079, + 5079, + 43944, + 5080, + 5080, + 43945, + 5081, + 5081, + 43946, + 5082, + 5082, + 43947, + 5083, + 5083, + 43948, + 5084, + 5084, + 43949, + 5085, + 5085, + 43950, + 5086, + 5086, + 43951, + 5087, + 5087, + 43952, + 5088, + 5088, + 43953, + 5089, + 5089, + 43954, + 5090, + 5090, + 43955, + 5091, + 5091, + 43956, + 5092, + 5092, + 43957, + 5093, + 5093, + 43958, + 5094, + 5094, + 43959, + 5095, + 5095, + 43960, + 5096, + 5096, + 43961, + 5097, + 5097, + 43962, + 5098, + 5098, + 43963, + 5099, + 5099, + 43964, + 5100, + 5100, + 43965, + 5101, + 5101, + 43966, + 5102, + 5102, + 43967, + 5103, + 5103, + 64256, + 102, + 102, + 70, + 70, + 70, + 102, + 64257, + 102, + 105, + 70, + 73, + 70, + 105, + 64258, + 102, + 108, + 70, + 76, + 70, + 108, + 64259, + 102, + 102, + 105, + 70, + 70, + 73, + 70, + 102, + 105, + 64260, + 102, + 102, + 108, + 70, + 70, + 76, + 70, + 102, + 108, + 64261, + 115, + 116, + 83, + 84, + 83, + 116, + 64262, + 115, + 116, + 83, + 84, + 83, + 116, + 64275, + 1396, + 1398, + 1348, + 1350, + 1348, + 1398, + 64276, + 1396, + 1381, + 1348, + 1333, + 1348, + 1381, + 64277, + 1396, + 1387, + 1348, + 1339, + 1348, + 1387, + 64278, + 1406, + 1398, + 1358, + 1350, + 1358, + 1398, + 64279, + 1396, + 1389, + 1348, + 1341, + 1348, + 1389, +}; + +/* type indexes */ +#define SHIFT 7 +static unsigned short index1[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 34, 35, 36, 37, + 38, 39, 34, 34, 34, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, + 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 64, 64, 64, 65, 66, 64, + 64, 64, 64, 67, 68, 64, 64, 64, 64, 64, 64, 69, 70, 71, 72, 73, 74, 75, + 76, 64, 77, 78, 79, 80, 81, 82, 83, 64, 64, 84, 85, 34, 34, 34, 34, 34, + 34, 86, 34, 34, 34, 34, 34, 87, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 88, 89, 90, 91, 34, 34, 34, 92, 34, 34, + 34, 93, 94, 34, 34, 34, 34, 34, 95, 34, 34, 34, 96, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 97, 98, 99, 34, 34, 34, 34, 34, 34, 100, 101, 34, 34, + 34, 34, 34, 34, 34, 34, 102, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 103, 34, 34, 34, 34, 34, 34, 34, 34, 104, 34, 34, 34, 34, + 100, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 103, 34, 34, 34, 34, 34, 34, 105, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 106, 107, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 108, 109, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 110, 111, 34, 34, 34, 34, 34, + 34, 34, 34, 112, 34, 34, 113, 114, 115, 116, 117, 118, 119, 120, 121, + 122, 123, 124, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 125, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 127, 128, 129, + 130, 131, 132, 133, 34, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, + 144, 145, 146, 147, 148, 149, 150, 144, 34, 34, 151, 144, 152, 153, 154, + 155, 156, 157, 158, 159, 160, 161, 162, 144, 163, 144, 164, 144, 165, + 166, 167, 168, 169, 170, 171, 144, 172, 173, 144, 174, 175, 176, 177, + 144, 178, 179, 144, 144, 180, 181, 144, 144, 182, 183, 184, 185, 144, + 186, 144, 144, 34, 34, 34, 34, 34, 34, 34, 187, 188, 34, 189, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 34, 34, 34, 34, 34, 34, 34, 34, 190, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 34, 34, 34, 34, 191, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 34, 34, 34, 34, 192, 193, 194, 195, 144, 144, 144, 144, 196, + 197, 198, 199, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 200, 34, 34, + 34, 34, 34, 201, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 34, 34, 202, 34, 34, 203, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 204, 205, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 64, + 206, 207, 208, 209, 210, 211, 144, 212, 213, 214, 215, 216, 217, 218, + 219, 64, 64, 64, 64, 220, 221, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 222, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 34, 223, 224, 144, 144, 144, 144, 144, 225, 226, 144, + 144, 227, 228, 144, 144, 229, 230, 231, 232, 233, 144, 64, 234, 64, 64, + 64, 64, 64, 235, 236, 237, 238, 239, 240, 241, 242, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 243, 244, 245, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 86, 246, 34, 247, 248, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 249, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 250, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 251, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 252, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 253, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 254, 34, + 255, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 256, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 257, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 34, 249, 34, 34, 258, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 259, 144, + 260, 261, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 144, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 262, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 262, +}; + +static unsigned short index2[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 3, 3, 3, 2, 4, 5, 5, 5, 5, 5, 5, 6, 5, 5, 5, 5, 5, 5, 6, 5, + 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 6, 5, 5, 5, 5, 5, 5, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 5, 5, 5, 6, 18, 6, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 5, 5, + 5, 5, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 5, 5, 5, 5, 5, 5, 5, 6, 5, 20, 5, 5, + 21, 5, 6, 5, 5, 22, 23, 6, 24, 5, 25, 6, 26, 20, 5, 27, 27, 27, 5, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 5, 17, 17, 17, 17, 17, 17, 17, 28, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 5, 19, 19, 19, 19, 19, 19, 19, 29, 30, 31, 30, 31, 30, 31, 30, 31, 30, + 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, + 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, + 31, 30, 31, 32, 33, 30, 31, 30, 31, 30, 31, 20, 30, 31, 30, 31, 30, 31, + 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 34, 30, 31, 30, 31, 30, 31, 30, + 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, + 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, + 31, 30, 31, 35, 30, 31, 30, 31, 30, 31, 36, 37, 38, 30, 31, 30, 31, 39, + 30, 31, 40, 40, 30, 31, 20, 41, 42, 43, 30, 31, 40, 44, 45, 46, 47, 30, + 31, 48, 20, 46, 49, 50, 51, 30, 31, 30, 31, 30, 31, 52, 30, 31, 52, 20, + 20, 30, 31, 52, 30, 31, 53, 53, 30, 31, 30, 31, 54, 30, 31, 20, 55, 30, + 31, 20, 56, 55, 55, 55, 55, 57, 58, 59, 57, 58, 59, 57, 58, 59, 30, 31, + 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 60, 30, 31, 30, + 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 61, 57, 58, + 59, 30, 31, 62, 63, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, + 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, + 31, 30, 31, 30, 31, 30, 31, 30, 31, 64, 20, 30, 31, 30, 31, 30, 31, 30, + 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 20, 20, 20, 20, 20, 20, 65, + 30, 31, 66, 67, 68, 68, 30, 31, 69, 70, 71, 30, 31, 30, 31, 30, 31, 30, + 31, 30, 31, 72, 73, 74, 75, 76, 20, 77, 77, 20, 78, 20, 79, 80, 20, 20, + 20, 77, 81, 20, 82, 20, 83, 84, 20, 85, 86, 84, 87, 88, 20, 20, 86, 20, + 89, 90, 20, 20, 91, 20, 20, 20, 20, 20, 20, 20, 92, 20, 20, 93, 20, 20, + 93, 20, 20, 20, 94, 93, 95, 96, 96, 97, 20, 20, 20, 20, 20, 98, 20, 55, + 20, 20, 20, 20, 20, 20, 20, 20, 99, 100, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 101, 101, 101, 101, 101, 101, 101, + 101, 101, 102, 102, 102, 102, 102, 102, 102, 101, 101, 6, 6, 6, 6, 102, + 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 101, 101, 101, 101, 101, 6, 6, 6, 6, 6, 6, 6, + 102, 6, 102, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 103, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 30, 31, 30, 31, 102, 6, 30, 31, 0, 0, 104, 50, 50, 50, 5, 105, 0, + 0, 0, 0, 6, 6, 106, 25, 107, 107, 107, 0, 108, 0, 109, 109, 110, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 0, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 111, 112, 112, 112, 113, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 114, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 115, 116, 116, 117, 118, 119, 120, 120, 120, 121, 122, + 123, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, + 31, 30, 31, 30, 31, 30, 31, 124, 125, 126, 127, 128, 129, 5, 30, 31, 130, + 30, 31, 20, 64, 64, 64, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, + 131, 131, 131, 131, 131, 131, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 132, + 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, + 132, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, + 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 5, + 25, 25, 25, 25, 25, 6, 6, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, + 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, + 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, + 30, 31, 30, 31, 30, 31, 133, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, + 31, 30, 31, 134, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, + 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, + 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, + 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, + 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, + 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 0, 135, 135, 135, 135, 135, 135, + 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, + 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, + 135, 135, 135, 135, 0, 0, 102, 5, 5, 5, 5, 5, 5, 20, 136, 136, 136, 136, + 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, + 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, + 136, 136, 136, 136, 136, 136, 137, 20, 5, 5, 0, 0, 5, 5, 5, 0, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 5, 25, 5, 25, 25, 5, 25, 25, 5, 25, 0, 0, 0, + 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 55, 55, + 55, 55, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 5, + 21, 0, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 102, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 5, 5, 5, 5, 55, 55, 25, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 5, 55, 25, 25, 25, 25, 25, 25, 25, 21, 5, 25, 25, 25, 25, 25, 25, + 102, 102, 25, 25, 5, 25, 25, 25, 25, 55, 55, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 55, 55, 55, 5, 5, 55, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 0, 21, 55, 25, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 102, 102, 5, 5, 5, 5, 102, 0, 0, 25, + 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 25, 25, 25, 25, 102, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 102, 25, 25, 25, 102, 25, 25, 25, 25, 25, 0, 0, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 25, 0, 0, + 5, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 21, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 18, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 25, 18, 25, 55, 18, 18, 18, 25, 25, 25, 25, 25, 25, 25, 25, 18, + 18, 18, 18, 25, 18, 18, 55, 25, 25, 25, 25, 25, 25, 25, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 25, 25, 5, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 5, 102, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 25, 18, 18, 0, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 55, 55, 0, 0, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 0, 55, 0, 0, 0, 55, 55, 55, + 55, 0, 0, 25, 55, 18, 18, 18, 25, 25, 25, 25, 0, 0, 18, 18, 0, 0, 18, 18, + 25, 55, 0, 0, 0, 0, 0, 0, 0, 0, 18, 0, 0, 0, 0, 55, 55, 0, 55, 55, 55, + 25, 25, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 55, 55, 5, 5, 27, 27, + 27, 27, 27, 27, 5, 5, 55, 5, 25, 0, 0, 25, 25, 18, 0, 55, 55, 55, 55, 55, + 55, 0, 0, 0, 0, 55, 55, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, + 55, 0, 55, 55, 0, 55, 55, 0, 55, 55, 0, 0, 25, 0, 18, 18, 18, 25, 25, 0, + 0, 0, 0, 25, 25, 0, 0, 25, 25, 25, 0, 0, 0, 25, 0, 0, 0, 0, 0, 0, 0, 55, + 55, 55, 55, 0, 55, 0, 0, 0, 0, 0, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 25, 25, 55, 55, 55, 25, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 18, + 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 0, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 0, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 0, 55, 55, 55, 55, 55, 0, 0, + 25, 55, 18, 18, 18, 25, 25, 25, 25, 25, 0, 25, 25, 18, 0, 18, 18, 25, 0, + 0, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 25, 25, 0, 0, + 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 5, 5, 0, 0, 0, 0, 0, 0, 0, 55, 25, + 25, 25, 25, 25, 25, 0, 25, 18, 18, 0, 55, 55, 55, 55, 55, 55, 55, 55, 0, + 0, 55, 55, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, + 0, 55, 55, 55, 55, 55, 0, 0, 25, 55, 18, 25, 18, 25, 25, 25, 25, 0, 0, + 18, 18, 0, 0, 18, 18, 25, 0, 0, 0, 0, 0, 0, 0, 0, 25, 18, 0, 0, 0, 0, 55, + 55, 0, 55, 55, 55, 25, 25, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 5, + 55, 27, 27, 27, 27, 27, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 55, 0, 55, + 55, 55, 55, 55, 55, 0, 0, 0, 55, 55, 55, 0, 55, 55, 55, 55, 0, 0, 0, 55, + 55, 0, 55, 0, 55, 55, 0, 0, 0, 55, 55, 0, 0, 0, 55, 55, 55, 0, 0, 0, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 18, 18, 25, 18, + 18, 0, 0, 0, 18, 18, 18, 0, 18, 18, 18, 25, 0, 0, 55, 0, 0, 0, 0, 0, 0, + 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 27, 27, 27, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 25, 18, + 18, 18, 25, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 0, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 0, 0, 0, 55, 25, 25, 25, 18, 18, 18, 18, 0, 25, 25, 25, 0, 25, 25, + 25, 25, 0, 0, 0, 0, 0, 0, 0, 25, 25, 0, 55, 55, 55, 0, 0, 0, 0, 0, 55, + 55, 25, 25, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, + 0, 0, 27, 27, 27, 27, 27, 27, 27, 5, 55, 25, 18, 18, 5, 55, 55, 55, 55, + 55, 55, 55, 55, 0, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 0, 0, 25, 55, 18, 25, 18, + 18, 18, 18, 18, 0, 25, 18, 18, 0, 18, 18, 25, 25, 0, 0, 0, 0, 0, 0, 0, + 18, 18, 0, 0, 0, 0, 0, 0, 0, 55, 0, 55, 55, 25, 25, 0, 0, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 0, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 25, 25, 18, 18, 0, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 0, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 25, 25, 55, 18, 18, 18, 25, 25, 25, 25, 0, 18, 18, 18, 0, + 18, 18, 18, 25, 55, 5, 0, 0, 0, 0, 55, 55, 55, 18, 27, 27, 27, 27, 27, + 27, 27, 55, 55, 55, 25, 25, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 5, 55, 55, 55, 55, 55, 55, 0, 0, 18, + 18, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 0, 55, 0, 0, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 25, 0, 0, 0, 0, + 18, 18, 18, 25, 25, 25, 0, 25, 0, 18, 18, 18, 18, 18, 18, 18, 18, 0, 0, + 0, 0, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 18, 18, 5, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 25, 55, 138, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 5, 55, 55, 55, + 55, 55, 55, 102, 25, 25, 25, 25, 25, 25, 25, 25, 5, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 0, + 55, 0, 0, 55, 55, 0, 55, 0, 0, 55, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 0, + 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 0, 55, 0, 55, 0, 0, 55, 55, 0, + 55, 55, 55, 55, 25, 55, 138, 25, 25, 25, 25, 25, 25, 0, 25, 25, 55, 0, 0, + 55, 55, 55, 55, 55, 0, 102, 0, 25, 25, 25, 25, 25, 25, 0, 0, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 0, 0, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 25, + 25, 5, 5, 5, 5, 5, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 5, 25, 5, 25, 5, 25, 5, 5, 5, 5, 18, 18, 55, + 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 18, 25, 25, 25, 25, 25, 5, 25, 25, 55, 55, + 55, 55, 55, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 0, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 0, 5, 5, 5, + 5, 5, 5, 5, 5, 25, 5, 5, 5, 5, 5, 5, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 18, 18, 25, 25, 25, 25, 18, 25, 25, 25, 25, 25, 25, 18, 25, 25, 18, 18, + 25, 25, 55, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 5, 5, 5, 5, 5, 5, 55, + 55, 55, 55, 55, 55, 18, 18, 25, 25, 55, 55, 55, 55, 25, 25, 25, 55, 18, + 18, 18, 55, 55, 18, 18, 18, 18, 18, 18, 18, 55, 55, 55, 25, 25, 25, 25, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 18, 18, 25, 25, + 18, 18, 18, 18, 18, 18, 25, 55, 18, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 18, 18, 18, 25, 5, 5, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, + 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, + 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 0, + 139, 0, 0, 0, 0, 0, 139, 0, 0, 140, 140, 140, 140, 140, 140, 140, 140, + 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, + 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, + 140, 140, 140, 140, 140, 140, 140, 5, 102, 140, 140, 140, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, + 55, 55, 55, 55, 0, 0, 55, 55, 55, 55, 55, 55, 55, 0, 55, 0, 55, 55, 55, + 55, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 0, 0, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 0, 0, 55, 55, + 55, 55, 55, 55, 55, 0, 55, 0, 55, 55, 55, 55, 0, 0, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, + 55, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 25, 25, + 25, 5, 5, 5, 5, 5, 5, 5, 5, 5, 141, 142, 143, 144, 145, 146, 147, 148, + 149, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 0, 0, 0, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 0, 0, 0, 0, 0, 0, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, + 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, + 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, + 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, + 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, + 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, + 230, 231, 232, 233, 234, 235, 0, 0, 236, 237, 238, 239, 240, 241, 0, 0, + 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 2, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 5, 5, 0, 0, 0, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 5, 5, 5, 242, 242, 242, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, + 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, + 55, 55, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 25, 5, 5, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 0, 25, 25, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 25, 25, 18, 25, 25, 25, 25, 25, 25, 25, 18, + 18, 18, 18, 18, 18, 18, 18, 25, 18, 18, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 5, 5, 5, 102, 5, 5, 5, 5, 55, 25, 0, 0, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 25, 25, 25, 21, 0, 7, + 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 102, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 55, + 55, 55, 55, 55, 243, 243, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 25, 55, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 25, 25, 25, 18, 18, 18, 18, + 25, 25, 18, 18, 18, 0, 0, 0, 0, 18, 18, 25, 18, 18, 18, 18, 18, 18, 25, + 25, 25, 0, 0, 0, 0, 5, 0, 0, 0, 5, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 55, 55, 55, 55, + 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, + 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 141, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 25, 25, 18, 18, 25, 0, 0, 5, 5, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, 25, 18, 25, 25, 25, 25, 25, + 25, 25, 0, 25, 18, 25, 18, 18, 25, 25, 25, 25, 25, 25, 25, 25, 18, 18, + 18, 18, 18, 18, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 0, 0, 25, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 102, 5, 5, 5, 5, 5, 5, + 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 6, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 18, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 25, 18, 25, 25, 25, 25, 25, 18, 25, 18, + 18, 18, 18, 18, 25, 18, 18, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 25, 25, 25, 25, 25, 25, 25, 25, 25, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 0, 0, 0, 25, 25, 18, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, + 25, 25, 25, 25, 18, 18, 25, 25, 18, 25, 25, 25, 55, 55, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 18, 25, 25, 18, + 18, 18, 25, 18, 25, 25, 25, 18, 18, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 18, 18, 18, 18, 18, 18, 18, 18, 25, 25, 25, 25, 25, 25, 25, 25, 18, 18, + 25, 25, 0, 0, 0, 5, 5, 5, 5, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, + 0, 0, 55, 55, 55, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 102, 102, 102, 102, 102, 102, 5, 5, 244, + 245, 246, 247, 248, 249, 250, 251, 252, 0, 0, 0, 0, 0, 0, 0, 253, 253, + 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, + 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, + 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 0, 0, + 253, 253, 253, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, + 25, 5, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 18, 25, 25, + 25, 25, 25, 25, 25, 55, 55, 55, 55, 25, 55, 55, 55, 55, 18, 18, 25, 55, + 55, 18, 25, 25, 0, 0, 0, 0, 0, 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 101, 101, + 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, + 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, + 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, + 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, + 101, 101, 101, 101, 101, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 101, 254, 20, 20, 20, 255, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, + 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, + 101, 101, 101, 101, 101, 101, 101, 101, 101, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 0, 25, 25, + 25, 25, 25, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, + 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, + 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, + 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, + 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, + 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, + 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, + 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, + 31, 30, 31, 30, 31, 30, 31, 30, 31, 256, 257, 258, 259, 260, 261, 20, 20, + 262, 20, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, + 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, + 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, + 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, + 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, + 30, 31, 30, 31, 30, 31, 30, 31, 263, 263, 263, 263, 263, 263, 263, 263, + 264, 264, 264, 264, 264, 264, 264, 264, 263, 263, 263, 263, 263, 263, 0, + 0, 264, 264, 264, 264, 264, 264, 0, 0, 263, 263, 263, 263, 263, 263, 263, + 263, 264, 264, 264, 264, 264, 264, 264, 264, 263, 263, 263, 263, 263, + 263, 263, 263, 264, 264, 264, 264, 264, 264, 264, 264, 263, 263, 263, + 263, 263, 263, 0, 0, 264, 264, 264, 264, 264, 264, 0, 0, 265, 263, 266, + 263, 267, 263, 268, 263, 0, 264, 0, 264, 0, 264, 0, 264, 263, 263, 263, + 263, 263, 263, 263, 263, 264, 264, 264, 264, 264, 264, 264, 264, 269, + 269, 270, 270, 270, 270, 271, 271, 272, 272, 273, 273, 274, 274, 0, 0, + 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, + 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, + 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, + 317, 318, 319, 320, 321, 322, 263, 263, 323, 324, 325, 0, 326, 327, 264, + 264, 328, 328, 329, 6, 330, 6, 6, 6, 331, 332, 333, 0, 334, 335, 336, + 336, 336, 336, 337, 6, 6, 6, 263, 263, 338, 339, 0, 0, 340, 341, 264, + 264, 342, 342, 0, 6, 6, 6, 263, 263, 343, 344, 345, 126, 346, 347, 264, + 264, 348, 348, 130, 6, 6, 6, 0, 0, 349, 350, 351, 0, 352, 353, 354, 354, + 355, 355, 356, 6, 6, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 21, 21, 21, 21, + 21, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 5, 5, + 6, 3, 3, 21, 21, 21, 21, 21, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 18, 18, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 18, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 21, 21, 21, 21, 21, 0, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 357, 101, 0, 0, 358, 359, 360, 361, 362, 363, 5, + 5, 5, 5, 5, 101, 357, 26, 22, 23, 358, 359, 360, 361, 362, 363, 5, 5, 5, + 5, 5, 0, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, + 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 6, 6, 6, 6, + 25, 6, 6, 6, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 120, 5, 5, 5, 5, 120, 5, 5, 20, + 120, 120, 120, 20, 20, 120, 120, 120, 20, 5, 120, 5, 5, 364, 120, 120, + 120, 120, 120, 5, 5, 5, 5, 5, 5, 120, 5, 365, 5, 120, 5, 366, 367, 120, + 120, 364, 20, 120, 120, 368, 120, 20, 55, 55, 55, 55, 20, 5, 5, 20, 20, + 120, 120, 5, 5, 5, 5, 5, 120, 20, 20, 20, 20, 5, 5, 5, 5, 369, 5, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 370, 370, 370, + 370, 370, 370, 370, 370, 370, 370, 370, 370, 370, 370, 370, 370, 371, + 371, 371, 371, 371, 371, 371, 371, 371, 371, 371, 371, 371, 371, 371, + 371, 242, 242, 242, 30, 31, 242, 242, 242, 242, 27, 5, 5, 0, 0, 0, 0, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26, + 22, 23, 358, 359, 360, 361, 362, 363, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 26, 22, 23, 358, 359, 360, 361, 362, 363, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 26, 22, 23, 358, 359, 360, 361, 362, 363, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 372, 372, 372, 372, 372, 372, 372, + 372, 372, 372, 372, 372, 372, 372, 372, 372, 372, 372, 372, 372, 372, + 372, 372, 372, 372, 372, 373, 373, 373, 373, 373, 373, 373, 373, 373, + 373, 373, 373, 373, 373, 373, 373, 373, 373, 373, 373, 373, 373, 373, + 373, 373, 373, 357, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 26, 22, 23, + 358, 359, 360, 361, 362, 363, 27, 357, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 26, 22, 23, 358, 359, 360, 361, 362, + 363, 27, 26, 22, 23, 358, 359, 360, 361, 362, 363, 27, 26, 22, 23, 358, + 359, 360, 361, 362, 363, 27, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 135, 135, 135, 135, 135, 135, 135, + 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, + 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, + 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, 0, 136, 136, + 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, + 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, + 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, + 136, 136, 136, 0, 30, 31, 374, 375, 376, 377, 378, 30, 31, 30, 31, 30, + 31, 379, 380, 381, 382, 20, 30, 31, 20, 30, 31, 20, 20, 20, 20, 20, 101, + 101, 383, 383, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, + 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, + 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, + 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, + 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, + 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 20, 5, 5, 5, 5, + 5, 5, 30, 31, 30, 31, 25, 25, 25, 30, 31, 0, 0, 0, 0, 0, 5, 5, 5, 5, 27, + 5, 5, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, + 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, + 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 384, 0, 384, 0, 0, 0, + 0, 0, 384, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 102, 5, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 25, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, + 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, + 55, 0, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 0, 55, + 55, 55, 55, 55, 55, 55, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 385, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 0, 0, 0, 0, 2, 5, 5, 5, 5, 102, 55, 242, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 242, 242, 242, 242, 242, 242, + 242, 242, 242, 25, 25, 25, 25, 18, 18, 5, 102, 102, 102, 102, 102, 5, 5, + 242, 242, 242, 102, 55, 5, 5, 5, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 0, 0, 25, 25, 6, 6, 102, 102, 55, 5, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 5, 102, 102, 102, + 55, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 5, + 5, 27, 27, 27, 27, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 27, 27, 27, 27, 27, 27, 27, 27, 5, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 55, 55, 55, 55, 55, 386, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 386, 55, 55, 386, 55, 55, 55, 386, 55, 386, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 386, 55, 55, 55, 55, 55, 55, 55, 386, 55, 386, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 386, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, + 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 386, 55, 386, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 386, 55, 386, 386, 386, 55, 55, 55, 55, 55, + 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 386, 386, 386, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 386, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 386, 386, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 386, 386, 386, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, + 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 386, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 102, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 102, 102, 102, 102, 102, 102, 5, + 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 102, 5, 5, 5, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, + 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, + 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 55, 25, 6, 6, 6, + 5, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 5, 102, 30, 31, 30, 31, 30, + 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, + 31, 30, 31, 30, 31, 101, 101, 25, 25, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 242, 242, 242, 242, 242, 242, 242, 242, 242, + 242, 25, 25, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 102, 102, 102, 102, + 102, 102, 102, 102, 102, 6, 6, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, + 30, 31, 30, 31, 20, 20, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, + 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, + 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, + 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 101, 20, 20, 20, + 20, 20, 20, 20, 20, 30, 31, 30, 31, 387, 30, 31, 30, 31, 30, 31, 30, 31, + 30, 31, 102, 6, 6, 30, 31, 388, 20, 55, 30, 31, 30, 31, 20, 20, 30, 31, + 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, 30, 31, + 389, 390, 391, 392, 389, 20, 393, 394, 395, 396, 30, 31, 30, 31, 30, 31, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 101, 101, 20, 55, 55, 55, 55, + 55, 55, 55, 25, 55, 55, 55, 25, 55, 55, 55, 55, 25, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 18, 18, 25, 25, 18, 5, 5, 5, 5, 0, 0, 0, 0, 27, 27, 27, 27, 27, 27, 5, 5, + 5, 5, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 18, 18, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, + 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 55, 55, 55, 55, + 55, 55, 5, 5, 5, 55, 5, 55, 55, 25, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 25, 25, 25, 25, 25, 25, + 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 18, + 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 0, 0, 0, 25, 25, 25, 18, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 25, 18, 18, 25, 25, 25, 25, 18, 18, 25, 18, 18, 18, 18, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 102, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 0, 0, 0, 0, 5, 5, 55, 55, 55, 55, 55, 25, 102, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 55, 55, 55, 55, 55, + 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 25, 25, 25, 25, 25, 25, 18, 18, 25, 25, 18, 18, + 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 25, 55, 55, 55, 55, 55, + 55, 55, 55, 25, 18, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 5, + 5, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 102, 55, 55, 55, 55, 55, 55, 5, 5, 5, 55, 18, 25, 18, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 55, 25, 25, 25, 55, 55, 25, 25, + 55, 55, 55, 55, 55, 25, 25, 55, 25, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 102, 5, 5, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 18, 25, 25, 18, 18, 5, 5, 55, 102, 102, 18, + 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 0, 0, 55, 55, + 55, 55, 55, 55, 0, 0, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 0, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 397, 20, 20, 20, + 20, 20, 20, 20, 6, 101, 101, 101, 101, 20, 20, 20, 20, 20, 20, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, + 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, + 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, + 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, + 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, + 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, 18, + 25, 18, 18, 25, 18, 18, 5, 18, 25, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, + 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, + 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 386, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 478, 479, 480, 481, 482, 483, 484, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 485, 486, 487, 488, 489, 0, 0, 0, 0, 0, 55, 25, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 0, 55, 0, 55, 55, 0, 55, 55, 0, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 490, 490, 490, 490, 490, 490, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 490, 490, 5, 5, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 5, 5, 5, 6, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 5, 5, 5, 18, 18, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 18, 18, 18, 5, 5, 6, 0, 5, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 0, 5, 5, 5, 5, 0, 0, 0, 0, 490, 55, 490, 55, 490, 0, 490, 55, + 490, 55, 490, 55, 490, 55, 490, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 21, 0, 5, 5, 5, 5, 5, 5, 6, 5, 5, + 5, 5, 5, 5, 6, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 6, 5, 5, 5, 5, 5, + 5, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 5, 5, 5, 6, 18, 6, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 102, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 491, 491, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 55, 55, 55, 55, 55, + 55, 0, 0, 55, 55, 55, 55, 55, 55, 0, 0, 55, 55, 55, 55, 55, 55, 0, 0, 55, + 55, 55, 0, 0, 0, 5, 5, 5, 6, 5, 5, 5, 0, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 21, 21, 21, 5, 5, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 0, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, + 0, 0, 5, 5, 5, 0, 0, 0, 0, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 0, 0, 0, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 242, 242, 242, 242, 242, 242, 242, 242, 242, + 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, + 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, + 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, + 242, 242, 27, 27, 27, 27, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 27, 27, 5, 5, 5, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 5, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 25, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 25, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 0, 0, 0, + 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 27, 27, 27, + 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 242, 55, 55, 55, 55, 55, 55, 55, + 55, 242, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 5, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 55, 55, 55, + 55, 55, 55, 55, 55, 5, 242, 242, 242, 242, 242, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 492, 492, 492, 492, 492, 492, 492, 492, + 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, + 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, + 492, 492, 492, 492, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, + 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, + 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, + 493, 493, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 0, 0, 0, 0, 0, 0, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, + 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, + 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 492, 0, 0, 0, 0, + 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, + 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, 493, + 493, 493, 493, 493, 493, 493, 493, 493, 0, 0, 0, 0, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, + 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, + 55, 0, 0, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 0, 0, 0, 55, + 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 0, 5, 27, 27, 27, 27, 27, 27, 27, 27, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 5, 5, 27, 27, 27, 27, 27, 27, 27, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 0, 55, 55, 0, 0, 0, 0, 0, 27, 27, 27, 27, 27, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 27, + 27, 27, 27, 27, 27, 0, 0, 0, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, + 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, + 0, 27, 27, 55, 55, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 0, 0, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 55, 25, 25, 25, + 0, 25, 25, 0, 0, 0, 0, 0, 25, 25, 25, 25, 55, 55, 55, 55, 0, 55, 55, 55, + 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 25, 25, 25, 0, 0, + 0, 0, 25, 26, 22, 23, 358, 27, 27, 27, 27, 27, 0, 0, 0, 0, 0, 0, 0, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 27, 27, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 27, + 27, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 5, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 0, 0, 0, 0, 27, 27, 27, 27, 27, + 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 5, 5, 5, 5, 5, + 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 0, 0, 27, 27, 27, 27, 27, 27, 27, 27, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, + 0, 27, 27, 27, 27, 27, 27, 27, 27, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 27, 27, 27, 27, 27, 27, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 108, 108, 108, 108, 108, 108, 108, + 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, + 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, + 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, 108, + 108, 108, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 115, 115, 115, 115, 115, + 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, + 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, + 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, + 115, 115, 115, 115, 0, 0, 0, 0, 0, 0, 0, 27, 27, 27, 27, 27, 27, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, + 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26, 22, + 23, 358, 359, 360, 361, 362, 363, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 0, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 55, 0, 0, + 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 27, 27, 27, 27, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 18, 25, 18, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 26, 22, 23, 358, 359, 360, 361, 362, + 363, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 18, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, 18, 18, 25, 25, 25, 25, 18, 18, + 25, 25, 5, 5, 21, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 0, 0, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 0, 0, 0, 0, 0, 0, 25, 25, 25, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 25, 25, 25, 18, 25, 25, + 25, 25, 25, 25, 25, 25, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 5, 5, 5, + 5, 55, 18, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 5, 5, 55, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 25, 25, 18, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, 18, + 18, 25, 25, 25, 25, 25, 25, 25, 25, 25, 18, 18, 55, 55, 55, 55, 5, 5, 5, + 5, 25, 25, 25, 25, 5, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 55, 5, + 55, 5, 5, 5, 0, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 18, 18, 18, 25, 25, 25, 18, 18, 25, 18, 25, 25, 5, 5, 5, + 5, 5, 5, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, + 55, 55, 55, 55, 55, 0, 55, 0, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 5, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 25, 18, 18, 18, 25, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 25, 25, 18, 18, 0, 55, 55, + 55, 55, 55, 55, 55, 55, 0, 0, 55, 55, 0, 0, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, + 55, 55, 55, 55, 55, 0, 55, 55, 0, 55, 55, 55, 55, 55, 0, 25, 25, 55, 18, + 18, 25, 18, 18, 18, 18, 0, 0, 18, 18, 0, 0, 18, 18, 18, 0, 0, 55, 0, 0, + 0, 0, 0, 0, 18, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 18, 18, 0, 0, 25, 25, + 25, 25, 25, 25, 25, 0, 0, 0, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 18, 18, 18, 25, 25, 25, 25, 25, 25, 25, 25, 18, 18, 25, 25, 25, 18, + 25, 55, 55, 55, 55, 5, 5, 5, 5, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 0, 5, 0, 5, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 18, 18, 18, 25, 25, 25, 25, 25, 25, 18, 25, 18, 18, 18, + 18, 25, 25, 18, 25, 25, 55, 55, 5, 55, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, 18, 18, 25, 25, 25, 25, 0, 0, + 18, 18, 18, 18, 25, 25, 18, 25, 25, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 55, 55, 55, 55, 25, 25, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, 18, + 18, 25, 25, 25, 25, 25, 25, 25, 25, 18, 18, 25, 18, 25, 25, 5, 5, 5, 55, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, + 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, + 18, 25, 18, 18, 25, 25, 25, 25, 25, 25, 18, 25, 0, 0, 0, 0, 0, 0, 0, 0, + 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 0, 0, 25, 25, 25, 18, 18, 25, 25, 25, 25, 18, 25, 25, 25, + 25, 25, 0, 0, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 27, 27, 5, 5, 5, + 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 18, 18, 18, 25, 25, 25, 25, 25, 25, 25, 25, 25, 18, 25, 25, + 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 27, 27, 27, 27, 27, 27, 27, 27, 27, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 55, 55, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 25, 25, 25, 25, 25, 25, 18, 55, 25, 25, 25, 25, 5, 5, 5, 5, 5, 5, + 5, 5, 25, 0, 0, 0, 0, 0, 0, 0, 0, 55, 25, 25, 25, 25, 25, 25, 18, 18, 25, + 25, 25, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 0, 0, 55, 55, 55, 55, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 18, 25, 25, 5, 5, 5, 55, 5, 5, 5, 5, 5, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 18, 25, 25, 25, 25, 25, 25, 25, 0, 25, 25, 25, 25, 25, 25, 18, + 25, 55, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 0, 0, 0, 5, 5, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 0, 18, 25, 25, 25, 25, 25, 25, 25, 18, + 25, 25, 18, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 0, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 25, 25, 25, 25, 25, 25, 0, 0, 0, 25, 0, 25, 25, 0, 25, 25, 25, 25, 25, + 25, 25, 55, 25, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 0, 55, 55, 0, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 18, 18, 18, 18, 18, 0, 25, + 25, 0, 18, 18, 25, 18, 25, 55, 0, 0, 0, 0, 0, 0, 0, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 25, 25, 18, + 18, 5, 5, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, + 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, + 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, + 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, + 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, + 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, + 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, + 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 242, 0, + 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, + 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 25, 25, + 25, 25, 25, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 25, 25, 25, 25, 25, 25, 25, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 102, 102, 102, 102, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 0, 27, 27, 27, 27, 27, 27, 27, 0, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, + 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 55, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 102, 102, 102, 102, 102, 102, 102, 102, + 102, 102, 102, 102, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 102, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 5, 25, 25, 5, 21, 21, 21, + 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 18, 18, 25, 25, 25, 5, 5, 5, 18, 18, 18, + 18, 18, 18, 21, 21, 21, 21, 21, 21, 21, 21, 25, 25, 25, 25, 25, 25, 25, + 25, 5, 5, 25, 25, 25, 25, 25, 25, 25, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 25, 25, 25, 25, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 25, 25, 25, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 0, 0, 0, 0, 0, 0, 0, + 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 20, 20, 20, 20, 20, 20, 20, 0, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 120, 0, 120, + 120, 0, 0, 120, 0, 0, 120, 120, 0, 0, 120, 120, 120, 120, 0, 120, 120, + 120, 120, 120, 120, 120, 120, 20, 20, 20, 20, 0, 20, 0, 20, 20, 20, 20, + 20, 20, 20, 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, 120, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 120, 120, 0, 120, 120, 120, 120, 0, 0, 120, 120, 120, 120, 120, 120, + 120, 120, 0, 120, 120, 120, 120, 120, 120, 120, 0, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 120, 120, 0, 120, 120, 120, 120, 0, 120, 120, 120, 120, 120, + 0, 120, 0, 0, 0, 120, 120, 120, 120, 120, 120, 120, 0, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, + 120, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, 120, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, + 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 5, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 5, 20, 20, 20, 20, 20, 20, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 5, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 5, 20, 20, 20, 20, + 20, 20, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 5, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 5, 20, 20, 20, 20, 20, 20, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 5, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 5, 20, 20, + 20, 20, 20, 20, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 5, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 5, 20, 20, 20, 20, 20, 20, 120, 20, 0, 0, 7, + 8, 9, 10, 11, 12, 13, 14, 15, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 7, + 8, 9, 10, 11, 12, 13, 14, 15, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 7, + 8, 9, 10, 11, 12, 13, 14, 15, 16, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 5, 5, 5, 5, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 5, 5, 5, 5, 5, 5, 5, 5, 25, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 25, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 0, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, + 25, 25, 25, 25, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 0, 0, 25, 25, 25, 25, 25, 25, 25, 0, 25, 25, 0, 25, 25, + 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 494, 494, 494, 494, 494, 494, 494, 494, 494, 494, 494, + 494, 494, 494, 494, 494, 494, 494, 494, 494, 494, 494, 494, 494, 494, + 494, 494, 494, 494, 494, 494, 494, 494, 494, 495, 495, 495, 495, 495, + 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, + 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, + 495, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 5, 27, 27, 27, 5, 27, + 27, 27, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 0, 55, 55, 0, 55, 0, 0, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 0, 55, 55, 55, 55, 0, 55, 0, 55, 0, 0, 0, 0, 0, 0, 55, 0, 0, 0, 0, + 55, 0, 55, 0, 55, 0, 55, 55, 55, 0, 55, 55, 0, 55, 0, 0, 55, 0, 55, 0, + 55, 0, 55, 0, 55, 0, 55, 55, 0, 55, 0, 0, 55, 55, 55, 55, 0, 55, 55, 55, + 55, 55, 55, 55, 0, 55, 55, 55, 55, 0, 55, 55, 55, 55, 0, 55, 0, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 55, 55, 55, 0, 55, 55, 55, + 55, 55, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 357, 357, 26, 22, 23, 358, 359, + 360, 361, 362, 363, 27, 27, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 496, 496, + 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, + 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 5, 5, 5, 5, 5, 5, 496, + 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, + 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 5, 5, 0, 0, 0, 0, + 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, + 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 496, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, + 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, + 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, + 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 5, 5, 5, + 5, 0, 0, 0, 5, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, + 0, 0, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, + 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 386, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, +}; + +/* Returns the numeric value as double for Unicode characters + * having this property, -1.0 otherwise. + */ +double numba_PyUnicode_ToNumeric(Py_UCS4 ch) +{ + switch (ch) { + case 0x0F33: + return (double) -1.0/2.0; + case 0x0030: + case 0x0660: + case 0x06F0: + case 0x07C0: + case 0x0966: + case 0x09E6: + case 0x0A66: + case 0x0AE6: + case 0x0B66: + case 0x0BE6: + case 0x0C66: + case 0x0C78: + case 0x0CE6: + case 0x0D66: + case 0x0DE6: + case 0x0E50: + case 0x0ED0: + case 0x0F20: + case 0x1040: + case 0x1090: + case 0x17E0: + case 0x17F0: + case 0x1810: + case 0x1946: + case 0x19D0: + case 0x1A80: + case 0x1A90: + case 0x1B50: + case 0x1BB0: + case 0x1C40: + case 0x1C50: + case 0x2070: + case 0x2080: + case 0x2189: + case 0x24EA: + case 0x24FF: + case 0x3007: + case 0x96F6: + case 0xA620: + case 0xA6EF: + case 0xA8D0: + case 0xA900: + case 0xA9D0: + case 0xA9F0: + case 0xAA50: + case 0xABF0: + case 0xF9B2: + case 0xFF10: + case 0x1018A: + case 0x104A0: + case 0x10D30: + case 0x11066: + case 0x110F0: + case 0x11136: + case 0x111D0: + case 0x112F0: + case 0x11450: + case 0x114D0: + case 0x11650: + case 0x116C0: + case 0x11730: + case 0x118E0: + case 0x11C50: + case 0x11D50: + case 0x11DA0: + case 0x16A60: + case 0x16B50: + case 0x16E80: + case 0x1D2E0: + case 0x1D7CE: + case 0x1D7D8: + case 0x1D7E2: + case 0x1D7EC: + case 0x1D7F6: + case 0x1E950: + case 0x1F100: + case 0x1F101: + case 0x1F10B: + case 0x1F10C: + return (double) 0.0; + case 0x0031: + case 0x00B9: + case 0x0661: + case 0x06F1: + case 0x07C1: + case 0x0967: + case 0x09E7: + case 0x0A67: + case 0x0AE7: + case 0x0B67: + case 0x0BE7: + case 0x0C67: + case 0x0C79: + case 0x0C7C: + case 0x0CE7: + case 0x0D67: + case 0x0DE7: + case 0x0E51: + case 0x0ED1: + case 0x0F21: + case 0x1041: + case 0x1091: + case 0x1369: + case 0x17E1: + case 0x17F1: + case 0x1811: + case 0x1947: + case 0x19D1: + case 0x19DA: + case 0x1A81: + case 0x1A91: + case 0x1B51: + case 0x1BB1: + case 0x1C41: + case 0x1C51: + case 0x2081: + case 0x215F: + case 0x2160: + case 0x2170: + case 0x2460: + case 0x2474: + case 0x2488: + case 0x24F5: + case 0x2776: + case 0x2780: + case 0x278A: + case 0x3021: + case 0x3192: + case 0x3220: + case 0x3280: + case 0x4E00: + case 0x58F1: + case 0x58F9: + case 0x5E7A: + case 0x5F0C: + case 0xA621: + case 0xA6E6: + case 0xA8D1: + case 0xA901: + case 0xA9D1: + case 0xA9F1: + case 0xAA51: + case 0xABF1: + case 0xFF11: + case 0x10107: + case 0x10142: + case 0x10158: + case 0x10159: + case 0x1015A: + case 0x102E1: + case 0x10320: + case 0x103D1: + case 0x104A1: + case 0x10858: + case 0x10879: + case 0x108A7: + case 0x108FB: + case 0x10916: + case 0x109C0: + case 0x10A40: + case 0x10A7D: + case 0x10A9D: + case 0x10AEB: + case 0x10B58: + case 0x10B78: + case 0x10BA9: + case 0x10CFA: + case 0x10D31: + case 0x10E60: + case 0x10F1D: + case 0x10F51: + case 0x11052: + case 0x11067: + case 0x110F1: + case 0x11137: + case 0x111D1: + case 0x111E1: + case 0x112F1: + case 0x11451: + case 0x114D1: + case 0x11651: + case 0x116C1: + case 0x11731: + case 0x118E1: + case 0x11C51: + case 0x11C5A: + case 0x11D51: + case 0x11DA1: + case 0x12415: + case 0x1241E: + case 0x1242C: + case 0x12434: + case 0x1244F: + case 0x12458: + case 0x16A61: + case 0x16B51: + case 0x16E81: + case 0x16E94: + case 0x1D2E1: + case 0x1D360: + case 0x1D372: + case 0x1D377: + case 0x1D7CF: + case 0x1D7D9: + case 0x1D7E3: + case 0x1D7ED: + case 0x1D7F7: + case 0x1E8C7: + case 0x1E951: + case 0x1EC71: + case 0x1ECA3: + case 0x1ECB1: + case 0x1F102: + case 0x2092A: + return (double) 1.0; + case 0x0D5C: + case 0x2152: + return (double) 1.0/10.0; + case 0x109F6: + return (double) 1.0/12.0; + case 0x09F4: + case 0x0B75: + case 0x0D76: + case 0xA833: + return (double) 1.0/16.0; + case 0x0D58: + return (double) 1.0/160.0; + case 0x00BD: + case 0x0B73: + case 0x0D74: + case 0x0F2A: + case 0x2CFD: + case 0xA831: + case 0x10141: + case 0x10175: + case 0x10176: + case 0x109BD: + case 0x10A48: + case 0x10E7B: + case 0x10F26: + case 0x12464: + case 0x1ECAE: + return (double) 1.0/2.0; + case 0x0D5B: + return (double) 1.0/20.0; + case 0x2153: + case 0x10E7D: + case 0x1245A: + case 0x1245D: + case 0x12465: + return (double) 1.0/3.0; + case 0x00BC: + case 0x09F7: + case 0x0B72: + case 0x0D73: + case 0xA830: + case 0x10140: + case 0x1018B: + case 0x10E7C: + case 0x12460: + case 0x12462: + case 0x12463: + case 0x1ECAD: + return (double) 1.0/4.0; + case 0x0D59: + return (double) 1.0/40.0; + case 0x0D5E: + case 0x2155: + return (double) 1.0/5.0; + case 0x2159: + case 0x12461: + return (double) 1.0/6.0; + case 0x2150: + return (double) 1.0/7.0; + case 0x09F5: + case 0x0B76: + case 0x0D77: + case 0x215B: + case 0xA834: + case 0x1245F: + return (double) 1.0/8.0; + case 0x2151: + return (double) 1.0/9.0; + case 0x0BF0: + case 0x0D70: + case 0x1372: + case 0x2169: + case 0x2179: + case 0x2469: + case 0x247D: + case 0x2491: + case 0x24FE: + case 0x277F: + case 0x2789: + case 0x2793: + case 0x3038: + case 0x3229: + case 0x3248: + case 0x3289: + case 0x4EC0: + case 0x5341: + case 0x62FE: + case 0xF973: + case 0xF9FD: + case 0x10110: + case 0x10149: + case 0x10150: + case 0x10157: + case 0x10160: + case 0x10161: + case 0x10162: + case 0x10163: + case 0x10164: + case 0x102EA: + case 0x10322: + case 0x103D3: + case 0x1085B: + case 0x1087E: + case 0x108AD: + case 0x108FD: + case 0x10917: + case 0x109C9: + case 0x10A44: + case 0x10A9E: + case 0x10AED: + case 0x10B5C: + case 0x10B7C: + case 0x10BAD: + case 0x10CFC: + case 0x10E69: + case 0x10F22: + case 0x10F52: + case 0x1105B: + case 0x111EA: + case 0x1173A: + case 0x118EA: + case 0x11C63: + case 0x16B5B: + case 0x16E8A: + case 0x1D2EA: + case 0x1D369: + case 0x1EC7A: + return (double) 10.0; + case 0x109FF: + return (double) 10.0/12.0; + case 0x0BF1: + case 0x0D71: + case 0x137B: + case 0x216D: + case 0x217D: + case 0x4F70: + case 0x767E: + case 0x964C: + case 0x10119: + case 0x1014B: + case 0x10152: + case 0x1016A: + case 0x102F3: + case 0x103D5: + case 0x1085D: + case 0x108AF: + case 0x108FF: + case 0x10919: + case 0x109D2: + case 0x10A46: + case 0x10AEF: + case 0x10B5E: + case 0x10B7E: + case 0x10BAF: + case 0x10CFE: + case 0x10E72: + case 0x10F25: + case 0x10F54: + case 0x11064: + case 0x111F3: + case 0x11C6C: + case 0x16B5C: + case 0x1EC83: + return (double) 100.0; + case 0x0BF2: + case 0x0D72: + case 0x216F: + case 0x217F: + case 0x2180: + case 0x4EDF: + case 0x5343: + case 0x9621: + case 0x10122: + case 0x1014D: + case 0x10154: + case 0x10171: + case 0x1085E: + case 0x109DB: + case 0x10A47: + case 0x10B5F: + case 0x10B7F: + case 0x10CFF: + case 0x11065: + case 0x111F4: + case 0x1EC8C: + return (double) 1000.0; + case 0x137C: + case 0x2182: + case 0x4E07: + case 0x842C: + case 0x1012B: + case 0x10155: + case 0x1085F: + case 0x109E4: + case 0x16B5D: + case 0x1EC95: + case 0x1ECB3: + return (double) 10000.0; + case 0x2188: + case 0x109ED: + case 0x1EC9E: + case 0x1ECA0: + case 0x1ECB4: + return (double) 100000.0; + case 0x16B5E: + return (double) 1000000.0; + case 0x1ECA1: + return (double) 10000000.0; + case 0x4EBF: + case 0x5104: + case 0x16B5F: + return (double) 100000000.0; + case 0x16B60: + return (double) 10000000000.0; + case 0x5146: + case 0x16B61: + return (double) 1000000000000.0; + case 0x216A: + case 0x217A: + case 0x246A: + case 0x247E: + case 0x2492: + case 0x24EB: + case 0x16E8B: + case 0x1D2EB: + return (double) 11.0; + case 0x109BC: + return (double) 11.0/12.0; + case 0x0F2F: + return (double) 11.0/2.0; + case 0x216B: + case 0x217B: + case 0x246B: + case 0x247F: + case 0x2493: + case 0x24EC: + case 0x16E8C: + case 0x1D2EC: + return (double) 12.0; + case 0x246C: + case 0x2480: + case 0x2494: + case 0x24ED: + case 0x16E8D: + case 0x1D2ED: + return (double) 13.0; + case 0x0F30: + return (double) 13.0/2.0; + case 0x246D: + case 0x2481: + case 0x2495: + case 0x24EE: + case 0x16E8E: + case 0x1D2EE: + return (double) 14.0; + case 0x246E: + case 0x2482: + case 0x2496: + case 0x24EF: + case 0x16E8F: + case 0x1D2EF: + return (double) 15.0; + case 0x0F31: + return (double) 15.0/2.0; + case 0x09F9: + case 0x246F: + case 0x2483: + case 0x2497: + case 0x24F0: + case 0x16E90: + case 0x1D2F0: + return (double) 16.0; + case 0x16EE: + case 0x2470: + case 0x2484: + case 0x2498: + case 0x24F1: + case 0x16E91: + case 0x1D2F1: + return (double) 17.0; + case 0x0F32: + return (double) 17.0/2.0; + case 0x16EF: + case 0x2471: + case 0x2485: + case 0x2499: + case 0x24F2: + case 0x16E92: + case 0x1D2F2: + return (double) 18.0; + case 0x16F0: + case 0x2472: + case 0x2486: + case 0x249A: + case 0x24F3: + case 0x16E93: + case 0x1D2F3: + return (double) 19.0; + case 0x0032: + case 0x00B2: + case 0x0662: + case 0x06F2: + case 0x07C2: + case 0x0968: + case 0x09E8: + case 0x0A68: + case 0x0AE8: + case 0x0B68: + case 0x0BE8: + case 0x0C68: + case 0x0C7A: + case 0x0C7D: + case 0x0CE8: + case 0x0D68: + case 0x0DE8: + case 0x0E52: + case 0x0ED2: + case 0x0F22: + case 0x1042: + case 0x1092: + case 0x136A: + case 0x17E2: + case 0x17F2: + case 0x1812: + case 0x1948: + case 0x19D2: + case 0x1A82: + case 0x1A92: + case 0x1B52: + case 0x1BB2: + case 0x1C42: + case 0x1C52: + case 0x2082: + case 0x2161: + case 0x2171: + case 0x2461: + case 0x2475: + case 0x2489: + case 0x24F6: + case 0x2777: + case 0x2781: + case 0x278B: + case 0x3022: + case 0x3193: + case 0x3221: + case 0x3281: + case 0x3483: + case 0x4E8C: + case 0x5169: + case 0x5F0D: + case 0x5F10: + case 0x8CAE: + case 0x8CB3: + case 0x8D30: + case 0xA622: + case 0xA6E7: + case 0xA8D2: + case 0xA902: + case 0xA9D2: + case 0xA9F2: + case 0xAA52: + case 0xABF2: + case 0xF978: + case 0xFF12: + case 0x10108: + case 0x1015B: + case 0x1015C: + case 0x1015D: + case 0x1015E: + case 0x102E2: + case 0x103D2: + case 0x104A2: + case 0x10859: + case 0x1087A: + case 0x108A8: + case 0x1091A: + case 0x109C1: + case 0x10A41: + case 0x10B59: + case 0x10B79: + case 0x10BAA: + case 0x10D32: + case 0x10E61: + case 0x10F1E: + case 0x11053: + case 0x11068: + case 0x110F2: + case 0x11138: + case 0x111D2: + case 0x111E2: + case 0x112F2: + case 0x11452: + case 0x114D2: + case 0x11652: + case 0x116C2: + case 0x11732: + case 0x118E2: + case 0x11C52: + case 0x11C5B: + case 0x11D52: + case 0x11DA2: + case 0x12400: + case 0x12416: + case 0x1241F: + case 0x12423: + case 0x1242D: + case 0x12435: + case 0x1244A: + case 0x12450: + case 0x12456: + case 0x12459: + case 0x16A62: + case 0x16B52: + case 0x16E82: + case 0x16E95: + case 0x1D2E2: + case 0x1D361: + case 0x1D373: + case 0x1D7D0: + case 0x1D7DA: + case 0x1D7E4: + case 0x1D7EE: + case 0x1D7F8: + case 0x1E8C8: + case 0x1E952: + case 0x1EC72: + case 0x1ECA4: + case 0x1ECB2: + case 0x1F103: + case 0x22390: + return (double) 2.0; + case 0x109F7: + return (double) 2.0/12.0; + case 0x2154: + case 0x10177: + case 0x10E7E: + case 0x1245B: + case 0x1245E: + case 0x12466: + return (double) 2.0/3.0; + case 0x2156: + return (double) 2.0/5.0; + case 0x1373: + case 0x2473: + case 0x2487: + case 0x249B: + case 0x24F4: + case 0x3039: + case 0x3249: + case 0x5344: + case 0x5EFF: + case 0x10111: + case 0x102EB: + case 0x103D4: + case 0x1085C: + case 0x1087F: + case 0x108AE: + case 0x108FE: + case 0x10918: + case 0x109CA: + case 0x10A45: + case 0x10A9F: + case 0x10AEE: + case 0x10B5D: + case 0x10B7D: + case 0x10BAE: + case 0x10E6A: + case 0x10F23: + case 0x10F53: + case 0x1105C: + case 0x111EB: + case 0x1173B: + case 0x118EB: + case 0x11C64: + case 0x1D36A: + case 0x1EC7B: + return (double) 20.0; + case 0x1011A: + case 0x102F4: + case 0x109D3: + case 0x10E73: + case 0x1EC84: + return (double) 200.0; + case 0x10123: + case 0x109DC: + case 0x1EC8D: + return (double) 2000.0; + case 0x1012C: + case 0x109E5: + case 0x1EC96: + return (double) 20000.0; + case 0x109EE: + case 0x1EC9F: + return (double) 200000.0; + case 0x1ECA2: + return (double) 20000000.0; + case 0x3251: + return (double) 21.0; + case 0x12432: + return (double) 216000.0; + case 0x3252: + return (double) 22.0; + case 0x3253: + return (double) 23.0; + case 0x3254: + return (double) 24.0; + case 0x3255: + return (double) 25.0; + case 0x3256: + return (double) 26.0; + case 0x3257: + return (double) 27.0; + case 0x3258: + return (double) 28.0; + case 0x3259: + return (double) 29.0; + case 0x0033: + case 0x00B3: + case 0x0663: + case 0x06F3: + case 0x07C3: + case 0x0969: + case 0x09E9: + case 0x0A69: + case 0x0AE9: + case 0x0B69: + case 0x0BE9: + case 0x0C69: + case 0x0C7B: + case 0x0C7E: + case 0x0CE9: + case 0x0D69: + case 0x0DE9: + case 0x0E53: + case 0x0ED3: + case 0x0F23: + case 0x1043: + case 0x1093: + case 0x136B: + case 0x17E3: + case 0x17F3: + case 0x1813: + case 0x1949: + case 0x19D3: + case 0x1A83: + case 0x1A93: + case 0x1B53: + case 0x1BB3: + case 0x1C43: + case 0x1C53: + case 0x2083: + case 0x2162: + case 0x2172: + case 0x2462: + case 0x2476: + case 0x248A: + case 0x24F7: + case 0x2778: + case 0x2782: + case 0x278C: + case 0x3023: + case 0x3194: + case 0x3222: + case 0x3282: + case 0x4E09: + case 0x4EE8: + case 0x53C1: + case 0x53C2: + case 0x53C3: + case 0x53C4: + case 0x5F0E: + case 0xA623: + case 0xA6E8: + case 0xA8D3: + case 0xA903: + case 0xA9D3: + case 0xA9F3: + case 0xAA53: + case 0xABF3: + case 0xF96B: + case 0xFF13: + case 0x10109: + case 0x102E3: + case 0x104A3: + case 0x1085A: + case 0x1087B: + case 0x108A9: + case 0x1091B: + case 0x109C2: + case 0x10A42: + case 0x10B5A: + case 0x10B7A: + case 0x10BAB: + case 0x10D33: + case 0x10E62: + case 0x10F1F: + case 0x11054: + case 0x11069: + case 0x110F3: + case 0x11139: + case 0x111D3: + case 0x111E3: + case 0x112F3: + case 0x11453: + case 0x114D3: + case 0x11653: + case 0x116C3: + case 0x11733: + case 0x118E3: + case 0x11C53: + case 0x11C5C: + case 0x11D53: + case 0x11DA3: + case 0x12401: + case 0x12408: + case 0x12417: + case 0x12420: + case 0x12424: + case 0x12425: + case 0x1242E: + case 0x1242F: + case 0x12436: + case 0x12437: + case 0x1243A: + case 0x1243B: + case 0x1244B: + case 0x12451: + case 0x12457: + case 0x16A63: + case 0x16B53: + case 0x16E83: + case 0x16E96: + case 0x1D2E3: + case 0x1D362: + case 0x1D374: + case 0x1D7D1: + case 0x1D7DB: + case 0x1D7E5: + case 0x1D7EF: + case 0x1D7F9: + case 0x1E8C9: + case 0x1E953: + case 0x1EC73: + case 0x1ECA5: + case 0x1F104: + case 0x20AFD: + case 0x20B19: + case 0x22998: + case 0x23B1B: + return (double) 3.0; + case 0x109F8: + return (double) 3.0/12.0; + case 0x09F6: + case 0x0B77: + case 0x0D78: + case 0xA835: + return (double) 3.0/16.0; + case 0x0F2B: + return (double) 3.0/2.0; + case 0x0D5D: + return (double) 3.0/20.0; + case 0x00BE: + case 0x09F8: + case 0x0B74: + case 0x0D75: + case 0xA832: + case 0x10178: + case 0x1ECAF: + return (double) 3.0/4.0; + case 0x2157: + return (double) 3.0/5.0; + case 0x215C: + return (double) 3.0/8.0; + case 0x0D5A: + return (double) 3.0/80.0; + case 0x1374: + case 0x303A: + case 0x324A: + case 0x325A: + case 0x5345: + case 0x10112: + case 0x10165: + case 0x102EC: + case 0x109CB: + case 0x10E6B: + case 0x10F24: + case 0x1105D: + case 0x111EC: + case 0x118EC: + case 0x11C65: + case 0x1D36B: + case 0x1EC7C: + case 0x20983: + return (double) 30.0; + case 0x1011B: + case 0x1016B: + case 0x102F5: + case 0x109D4: + case 0x10E74: + case 0x1EC85: + return (double) 300.0; + case 0x10124: + case 0x109DD: + case 0x1EC8E: + return (double) 3000.0; + case 0x1012D: + case 0x109E6: + case 0x1EC97: + return (double) 30000.0; + case 0x109EF: + return (double) 300000.0; + case 0x325B: + return (double) 31.0; + case 0x325C: + return (double) 32.0; + case 0x325D: + return (double) 33.0; + case 0x325E: + return (double) 34.0; + case 0x325F: + return (double) 35.0; + case 0x32B1: + return (double) 36.0; + case 0x32B2: + return (double) 37.0; + case 0x32B3: + return (double) 38.0; + case 0x32B4: + return (double) 39.0; + case 0x0034: + case 0x0664: + case 0x06F4: + case 0x07C4: + case 0x096A: + case 0x09EA: + case 0x0A6A: + case 0x0AEA: + case 0x0B6A: + case 0x0BEA: + case 0x0C6A: + case 0x0CEA: + case 0x0D6A: + case 0x0DEA: + case 0x0E54: + case 0x0ED4: + case 0x0F24: + case 0x1044: + case 0x1094: + case 0x136C: + case 0x17E4: + case 0x17F4: + case 0x1814: + case 0x194A: + case 0x19D4: + case 0x1A84: + case 0x1A94: + case 0x1B54: + case 0x1BB4: + case 0x1C44: + case 0x1C54: + case 0x2074: + case 0x2084: + case 0x2163: + case 0x2173: + case 0x2463: + case 0x2477: + case 0x248B: + case 0x24F8: + case 0x2779: + case 0x2783: + case 0x278D: + case 0x3024: + case 0x3195: + case 0x3223: + case 0x3283: + case 0x4E96: + case 0x56DB: + case 0x8086: + case 0xA624: + case 0xA6E9: + case 0xA8D4: + case 0xA904: + case 0xA9D4: + case 0xA9F4: + case 0xAA54: + case 0xABF4: + case 0xFF14: + case 0x1010A: + case 0x102E4: + case 0x104A4: + case 0x1087C: + case 0x108AA: + case 0x108AB: + case 0x109C3: + case 0x10A43: + case 0x10B5B: + case 0x10B7B: + case 0x10BAC: + case 0x10D34: + case 0x10E63: + case 0x10F20: + case 0x11055: + case 0x1106A: + case 0x110F4: + case 0x1113A: + case 0x111D4: + case 0x111E4: + case 0x112F4: + case 0x11454: + case 0x114D4: + case 0x11654: + case 0x116C4: + case 0x11734: + case 0x118E4: + case 0x11C54: + case 0x11C5D: + case 0x11D54: + case 0x11DA4: + case 0x12402: + case 0x12409: + case 0x1240F: + case 0x12418: + case 0x12421: + case 0x12426: + case 0x12430: + case 0x12438: + case 0x1243C: + case 0x1243D: + case 0x1243E: + case 0x1243F: + case 0x1244C: + case 0x12452: + case 0x12453: + case 0x12469: + case 0x16A64: + case 0x16B54: + case 0x16E84: + case 0x1D2E4: + case 0x1D363: + case 0x1D375: + case 0x1D7D2: + case 0x1D7DC: + case 0x1D7E6: + case 0x1D7F0: + case 0x1D7FA: + case 0x1E8CA: + case 0x1E954: + case 0x1EC74: + case 0x1ECA6: + case 0x1F105: + case 0x20064: + case 0x200E2: + case 0x2626D: + return (double) 4.0; + case 0x109F9: + return (double) 4.0/12.0; + case 0x2158: + return (double) 4.0/5.0; + case 0x1375: + case 0x324B: + case 0x32B5: + case 0x534C: + case 0x10113: + case 0x102ED: + case 0x109CC: + case 0x10E6C: + case 0x1105E: + case 0x111ED: + case 0x118ED: + case 0x11C66: + case 0x12467: + case 0x1D36C: + case 0x1EC7D: + case 0x2098C: + case 0x2099C: + return (double) 40.0; + case 0x1011C: + case 0x102F6: + case 0x109D5: + case 0x10E75: + case 0x1EC86: + return (double) 400.0; + case 0x10125: + case 0x109DE: + case 0x1EC8F: + return (double) 4000.0; + case 0x1012E: + case 0x109E7: + case 0x1EC98: + return (double) 40000.0; + case 0x109F0: + return (double) 400000.0; + case 0x32B6: + return (double) 41.0; + case 0x32B7: + return (double) 42.0; + case 0x32B8: + return (double) 43.0; + case 0x12433: + return (double) 432000.0; + case 0x32B9: + return (double) 44.0; + case 0x32BA: + return (double) 45.0; + case 0x32BB: + return (double) 46.0; + case 0x32BC: + return (double) 47.0; + case 0x32BD: + return (double) 48.0; + case 0x32BE: + return (double) 49.0; + case 0x0035: + case 0x0665: + case 0x06F5: + case 0x07C5: + case 0x096B: + case 0x09EB: + case 0x0A6B: + case 0x0AEB: + case 0x0B6B: + case 0x0BEB: + case 0x0C6B: + case 0x0CEB: + case 0x0D6B: + case 0x0DEB: + case 0x0E55: + case 0x0ED5: + case 0x0F25: + case 0x1045: + case 0x1095: + case 0x136D: + case 0x17E5: + case 0x17F5: + case 0x1815: + case 0x194B: + case 0x19D5: + case 0x1A85: + case 0x1A95: + case 0x1B55: + case 0x1BB5: + case 0x1C45: + case 0x1C55: + case 0x2075: + case 0x2085: + case 0x2164: + case 0x2174: + case 0x2464: + case 0x2478: + case 0x248C: + case 0x24F9: + case 0x277A: + case 0x2784: + case 0x278E: + case 0x3025: + case 0x3224: + case 0x3284: + case 0x3405: + case 0x382A: + case 0x4E94: + case 0x4F0D: + case 0xA625: + case 0xA6EA: + case 0xA8D5: + case 0xA905: + case 0xA9D5: + case 0xA9F5: + case 0xAA55: + case 0xABF5: + case 0xFF15: + case 0x1010B: + case 0x10143: + case 0x10148: + case 0x1014F: + case 0x1015F: + case 0x10173: + case 0x102E5: + case 0x10321: + case 0x104A5: + case 0x1087D: + case 0x108AC: + case 0x108FC: + case 0x109C4: + case 0x10AEC: + case 0x10CFB: + case 0x10D35: + case 0x10E64: + case 0x10F21: + case 0x11056: + case 0x1106B: + case 0x110F5: + case 0x1113B: + case 0x111D5: + case 0x111E5: + case 0x112F5: + case 0x11455: + case 0x114D5: + case 0x11655: + case 0x116C5: + case 0x11735: + case 0x118E5: + case 0x11C55: + case 0x11C5E: + case 0x11D55: + case 0x11DA5: + case 0x12403: + case 0x1240A: + case 0x12410: + case 0x12419: + case 0x12422: + case 0x12427: + case 0x12431: + case 0x12439: + case 0x1244D: + case 0x12454: + case 0x12455: + case 0x1246A: + case 0x16A65: + case 0x16B55: + case 0x16E85: + case 0x1D2E5: + case 0x1D364: + case 0x1D376: + case 0x1D378: + case 0x1D7D3: + case 0x1D7DD: + case 0x1D7E7: + case 0x1D7F1: + case 0x1D7FB: + case 0x1E8CB: + case 0x1E955: + case 0x1EC75: + case 0x1ECA7: + case 0x1F106: + case 0x20121: + return (double) 5.0; + case 0x109FA: + return (double) 5.0/12.0; + case 0x0F2C: + return (double) 5.0/2.0; + case 0x215A: + case 0x1245C: + return (double) 5.0/6.0; + case 0x215D: + return (double) 5.0/8.0; + case 0x1376: + case 0x216C: + case 0x217C: + case 0x2186: + case 0x324C: + case 0x32BF: + case 0x10114: + case 0x10144: + case 0x1014A: + case 0x10151: + case 0x10166: + case 0x10167: + case 0x10168: + case 0x10169: + case 0x10174: + case 0x102EE: + case 0x10323: + case 0x109CD: + case 0x10A7E: + case 0x10CFD: + case 0x10E6D: + case 0x1105F: + case 0x111EE: + case 0x118EE: + case 0x11C67: + case 0x12468: + case 0x1D36D: + case 0x1EC7E: + return (double) 50.0; + case 0x216E: + case 0x217E: + case 0x1011D: + case 0x10145: + case 0x1014C: + case 0x10153: + case 0x1016C: + case 0x1016D: + case 0x1016E: + case 0x1016F: + case 0x10170: + case 0x102F7: + case 0x109D6: + case 0x10E76: + case 0x1EC87: + return (double) 500.0; + case 0x2181: + case 0x10126: + case 0x10146: + case 0x1014E: + case 0x10172: + case 0x109DF: + case 0x1EC90: + return (double) 5000.0; + case 0x2187: + case 0x1012F: + case 0x10147: + case 0x10156: + case 0x109E8: + case 0x1EC99: + return (double) 50000.0; + case 0x109F1: + return (double) 500000.0; + case 0x0036: + case 0x0666: + case 0x06F6: + case 0x07C6: + case 0x096C: + case 0x09EC: + case 0x0A6C: + case 0x0AEC: + case 0x0B6C: + case 0x0BEC: + case 0x0C6C: + case 0x0CEC: + case 0x0D6C: + case 0x0DEC: + case 0x0E56: + case 0x0ED6: + case 0x0F26: + case 0x1046: + case 0x1096: + case 0x136E: + case 0x17E6: + case 0x17F6: + case 0x1816: + case 0x194C: + case 0x19D6: + case 0x1A86: + case 0x1A96: + case 0x1B56: + case 0x1BB6: + case 0x1C46: + case 0x1C56: + case 0x2076: + case 0x2086: + case 0x2165: + case 0x2175: + case 0x2185: + case 0x2465: + case 0x2479: + case 0x248D: + case 0x24FA: + case 0x277B: + case 0x2785: + case 0x278F: + case 0x3026: + case 0x3225: + case 0x3285: + case 0x516D: + case 0x9646: + case 0x9678: + case 0xA626: + case 0xA6EB: + case 0xA8D6: + case 0xA906: + case 0xA9D6: + case 0xA9F6: + case 0xAA56: + case 0xABF6: + case 0xF9D1: + case 0xF9D3: + case 0xFF16: + case 0x1010C: + case 0x102E6: + case 0x104A6: + case 0x109C5: + case 0x10D36: + case 0x10E65: + case 0x11057: + case 0x1106C: + case 0x110F6: + case 0x1113C: + case 0x111D6: + case 0x111E6: + case 0x112F6: + case 0x11456: + case 0x114D6: + case 0x11656: + case 0x116C6: + case 0x11736: + case 0x118E6: + case 0x11C56: + case 0x11C5F: + case 0x11D56: + case 0x11DA6: + case 0x12404: + case 0x1240B: + case 0x12411: + case 0x1241A: + case 0x12428: + case 0x12440: + case 0x1244E: + case 0x1246B: + case 0x16A66: + case 0x16B56: + case 0x16E86: + case 0x1D2E6: + case 0x1D365: + case 0x1D7D4: + case 0x1D7DE: + case 0x1D7E8: + case 0x1D7F2: + case 0x1D7FC: + case 0x1E8CC: + case 0x1E956: + case 0x1EC76: + case 0x1ECA8: + case 0x1F107: + case 0x20AEA: + return (double) 6.0; + case 0x109FB: + return (double) 6.0/12.0; + case 0x1377: + case 0x324D: + case 0x10115: + case 0x102EF: + case 0x109CE: + case 0x10E6E: + case 0x11060: + case 0x111EF: + case 0x118EF: + case 0x11C68: + case 0x1D36E: + case 0x1EC7F: + return (double) 60.0; + case 0x1011E: + case 0x102F8: + case 0x109D7: + case 0x10E77: + case 0x1EC88: + return (double) 600.0; + case 0x10127: + case 0x109E0: + case 0x1EC91: + return (double) 6000.0; + case 0x10130: + case 0x109E9: + case 0x1EC9A: + return (double) 60000.0; + case 0x109F2: + return (double) 600000.0; + case 0x0037: + case 0x0667: + case 0x06F7: + case 0x07C7: + case 0x096D: + case 0x09ED: + case 0x0A6D: + case 0x0AED: + case 0x0B6D: + case 0x0BED: + case 0x0C6D: + case 0x0CED: + case 0x0D6D: + case 0x0DED: + case 0x0E57: + case 0x0ED7: + case 0x0F27: + case 0x1047: + case 0x1097: + case 0x136F: + case 0x17E7: + case 0x17F7: + case 0x1817: + case 0x194D: + case 0x19D7: + case 0x1A87: + case 0x1A97: + case 0x1B57: + case 0x1BB7: + case 0x1C47: + case 0x1C57: + case 0x2077: + case 0x2087: + case 0x2166: + case 0x2176: + case 0x2466: + case 0x247A: + case 0x248E: + case 0x24FB: + case 0x277C: + case 0x2786: + case 0x2790: + case 0x3027: + case 0x3226: + case 0x3286: + case 0x3B4D: + case 0x4E03: + case 0x67D2: + case 0x6F06: + case 0xA627: + case 0xA6EC: + case 0xA8D7: + case 0xA907: + case 0xA9D7: + case 0xA9F7: + case 0xAA57: + case 0xABF7: + case 0xFF17: + case 0x1010D: + case 0x102E7: + case 0x104A7: + case 0x109C6: + case 0x10D37: + case 0x10E66: + case 0x11058: + case 0x1106D: + case 0x110F7: + case 0x1113D: + case 0x111D7: + case 0x111E7: + case 0x112F7: + case 0x11457: + case 0x114D7: + case 0x11657: + case 0x116C7: + case 0x11737: + case 0x118E7: + case 0x11C57: + case 0x11C60: + case 0x11D57: + case 0x11DA7: + case 0x12405: + case 0x1240C: + case 0x12412: + case 0x1241B: + case 0x12429: + case 0x12441: + case 0x12442: + case 0x12443: + case 0x1246C: + case 0x16A67: + case 0x16B57: + case 0x16E87: + case 0x1D2E7: + case 0x1D366: + case 0x1D7D5: + case 0x1D7DF: + case 0x1D7E9: + case 0x1D7F3: + case 0x1D7FD: + case 0x1E8CD: + case 0x1E957: + case 0x1EC77: + case 0x1ECA9: + case 0x1F108: + case 0x20001: + return (double) 7.0; + case 0x109FC: + return (double) 7.0/12.0; + case 0x0F2D: + return (double) 7.0/2.0; + case 0x215E: + return (double) 7.0/8.0; + case 0x1378: + case 0x324E: + case 0x10116: + case 0x102F0: + case 0x109CF: + case 0x10E6F: + case 0x11061: + case 0x111F0: + case 0x118F0: + case 0x11C69: + case 0x1D36F: + case 0x1EC80: + return (double) 70.0; + case 0x1011F: + case 0x102F9: + case 0x109D8: + case 0x10E78: + case 0x1EC89: + return (double) 700.0; + case 0x10128: + case 0x109E1: + case 0x1EC92: + return (double) 7000.0; + case 0x10131: + case 0x109EA: + case 0x1EC9B: + return (double) 70000.0; + case 0x109F3: + return (double) 700000.0; + case 0x0038: + case 0x0668: + case 0x06F8: + case 0x07C8: + case 0x096E: + case 0x09EE: + case 0x0A6E: + case 0x0AEE: + case 0x0B6E: + case 0x0BEE: + case 0x0C6E: + case 0x0CEE: + case 0x0D6E: + case 0x0DEE: + case 0x0E58: + case 0x0ED8: + case 0x0F28: + case 0x1048: + case 0x1098: + case 0x1370: + case 0x17E8: + case 0x17F8: + case 0x1818: + case 0x194E: + case 0x19D8: + case 0x1A88: + case 0x1A98: + case 0x1B58: + case 0x1BB8: + case 0x1C48: + case 0x1C58: + case 0x2078: + case 0x2088: + case 0x2167: + case 0x2177: + case 0x2467: + case 0x247B: + case 0x248F: + case 0x24FC: + case 0x277D: + case 0x2787: + case 0x2791: + case 0x3028: + case 0x3227: + case 0x3287: + case 0x516B: + case 0x634C: + case 0xA628: + case 0xA6ED: + case 0xA8D8: + case 0xA908: + case 0xA9D8: + case 0xA9F8: + case 0xAA58: + case 0xABF8: + case 0xFF18: + case 0x1010E: + case 0x102E8: + case 0x104A8: + case 0x109C7: + case 0x10D38: + case 0x10E67: + case 0x11059: + case 0x1106E: + case 0x110F8: + case 0x1113E: + case 0x111D8: + case 0x111E8: + case 0x112F8: + case 0x11458: + case 0x114D8: + case 0x11658: + case 0x116C8: + case 0x11738: + case 0x118E8: + case 0x11C58: + case 0x11C61: + case 0x11D58: + case 0x11DA8: + case 0x12406: + case 0x1240D: + case 0x12413: + case 0x1241C: + case 0x1242A: + case 0x12444: + case 0x12445: + case 0x1246D: + case 0x16A68: + case 0x16B58: + case 0x16E88: + case 0x1D2E8: + case 0x1D367: + case 0x1D7D6: + case 0x1D7E0: + case 0x1D7EA: + case 0x1D7F4: + case 0x1D7FE: + case 0x1E8CE: + case 0x1E958: + case 0x1EC78: + case 0x1ECAA: + case 0x1F109: + return (double) 8.0; + case 0x109FD: + return (double) 8.0/12.0; + case 0x1379: + case 0x324F: + case 0x10117: + case 0x102F1: + case 0x10E70: + case 0x11062: + case 0x111F1: + case 0x118F1: + case 0x11C6A: + case 0x1D370: + case 0x1EC81: + return (double) 80.0; + case 0x10120: + case 0x102FA: + case 0x109D9: + case 0x10E79: + case 0x1EC8A: + return (double) 800.0; + case 0x10129: + case 0x109E2: + case 0x1EC93: + return (double) 8000.0; + case 0x10132: + case 0x109EB: + case 0x1EC9C: + return (double) 80000.0; + case 0x109F4: + return (double) 800000.0; + case 0x0039: + case 0x0669: + case 0x06F9: + case 0x07C9: + case 0x096F: + case 0x09EF: + case 0x0A6F: + case 0x0AEF: + case 0x0B6F: + case 0x0BEF: + case 0x0C6F: + case 0x0CEF: + case 0x0D6F: + case 0x0DEF: + case 0x0E59: + case 0x0ED9: + case 0x0F29: + case 0x1049: + case 0x1099: + case 0x1371: + case 0x17E9: + case 0x17F9: + case 0x1819: + case 0x194F: + case 0x19D9: + case 0x1A89: + case 0x1A99: + case 0x1B59: + case 0x1BB9: + case 0x1C49: + case 0x1C59: + case 0x2079: + case 0x2089: + case 0x2168: + case 0x2178: + case 0x2468: + case 0x247C: + case 0x2490: + case 0x24FD: + case 0x277E: + case 0x2788: + case 0x2792: + case 0x3029: + case 0x3228: + case 0x3288: + case 0x4E5D: + case 0x5EFE: + case 0x7396: + case 0xA629: + case 0xA6EE: + case 0xA8D9: + case 0xA909: + case 0xA9D9: + case 0xA9F9: + case 0xAA59: + case 0xABF9: + case 0xFF19: + case 0x1010F: + case 0x102E9: + case 0x104A9: + case 0x109C8: + case 0x10D39: + case 0x10E68: + case 0x1105A: + case 0x1106F: + case 0x110F9: + case 0x1113F: + case 0x111D9: + case 0x111E9: + case 0x112F9: + case 0x11459: + case 0x114D9: + case 0x11659: + case 0x116C9: + case 0x11739: + case 0x118E9: + case 0x11C59: + case 0x11C62: + case 0x11D59: + case 0x11DA9: + case 0x12407: + case 0x1240E: + case 0x12414: + case 0x1241D: + case 0x1242B: + case 0x12446: + case 0x12447: + case 0x12448: + case 0x12449: + case 0x1246E: + case 0x16A69: + case 0x16B59: + case 0x16E89: + case 0x1D2E9: + case 0x1D368: + case 0x1D7D7: + case 0x1D7E1: + case 0x1D7EB: + case 0x1D7F5: + case 0x1D7FF: + case 0x1E8CF: + case 0x1E959: + case 0x1EC79: + case 0x1ECAB: + case 0x1F10A: + case 0x2F890: + return (double) 9.0; + case 0x109FE: + return (double) 9.0/12.0; + case 0x0F2E: + return (double) 9.0/2.0; + case 0x137A: + case 0x10118: + case 0x102F2: + case 0x10341: + case 0x10E71: + case 0x11063: + case 0x111F2: + case 0x118F2: + case 0x11C6B: + case 0x1D371: + case 0x1EC82: + return (double) 90.0; + case 0x10121: + case 0x102FB: + case 0x1034A: + case 0x109DA: + case 0x10E7A: + case 0x1EC8B: + return (double) 900.0; + case 0x1012A: + case 0x109E3: + case 0x1EC94: + return (double) 9000.0; + case 0x10133: + case 0x109EC: + case 0x1EC9D: + return (double) 90000.0; + case 0x109F5: + return (double) 900000.0; + } + return -1.0; +} + +/* Returns 1 for Unicode characters having the bidirectional + * type 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise. + */ +int numba_PyUnicode_IsWhitespace(const Py_UCS4 ch) +{ + switch (ch) { + case 0x0009: + case 0x000A: + case 0x000B: + case 0x000C: + case 0x000D: + case 0x001C: + case 0x001D: + case 0x001E: + case 0x001F: + case 0x0020: + case 0x0085: + case 0x00A0: + case 0x1680: + case 0x2000: + case 0x2001: + case 0x2002: + case 0x2003: + case 0x2004: + case 0x2005: + case 0x2006: + case 0x2007: + case 0x2008: + case 0x2009: + case 0x200A: + case 0x2028: + case 0x2029: + case 0x202F: + case 0x205F: + case 0x3000: + return 1; + } + return 0; +} + +/* Returns 1 for Unicode characters having the line break + * property 'BK', 'CR', 'LF' or 'NL' or having bidirectional + * type 'B', 0 otherwise. + */ +int numba_PyUnicode_IsLinebreak(const Py_UCS4 ch) +{ + switch (ch) { + case 0x000A: + case 0x000B: + case 0x000C: + case 0x000D: + case 0x001C: + case 0x001D: + case 0x001E: + case 0x0085: + case 0x2028: + case 0x2029: + return 1; + } + return 0; +} + +#endif /* _UNICODETYPE_DB_H */ diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_version.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_version.py new file mode 100644 index 000000000..06d328843 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/_version.py @@ -0,0 +1,238 @@ +# This file helps to compute a version number in source trees obtained from +# git-archive tarball (such as those provided by githubs download-from-tag +# feature). Distribution tarballs (built by setup.py sdist) and build +# directories (produced by setup.py build) will contain a much shorter file +# that just contains the computed version number. + +# This file is released into the public domain. Generated by +# versioneer-0.14 (https://github.com/warner/python-versioneer) + +import errno +import os +import re +import subprocess +import sys + +# these strings will be replaced by git during git-archive +git_refnames = "$Format:%d$" +git_full = "$Format:%H$" + +# these strings are filled in when 'setup.py versioneer' creates _version.py +tag_prefix = "" +parentdir_prefix = "numba-" +versionfile_source = "numba/_version.py" + + +def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): + assert isinstance(commands, list) + p = None + for c in commands: + try: + # remember shell=False, so use git.cmd on windows, not just git + p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr + else None)) + break + except EnvironmentError: + e = sys.exc_info()[1] + if e.errno == errno.ENOENT: + continue + if verbose: + print("unable to run %s" % args[0]) + print(e) + return None + else: + if verbose: + print("unable to find command, tried %s" % (commands,)) + return None + stdout = p.communicate()[0].strip() + if sys.version_info[0] >= 3: + stdout = stdout.decode() + if p.returncode != 0: + if verbose: + print("unable to run %s (error)" % args[0]) + return None + return stdout + + +def versions_from_parentdir(parentdir_prefix, root, verbose=False): + # Source tarballs conventionally unpack into a directory that includes + # both the project name and a version string. + dirname = os.path.basename(root) + if not dirname.startswith(parentdir_prefix): + if verbose: + print("guessing rootdir is '%s', but '%s' doesn't start with " + "prefix '%s'" % (root, dirname, parentdir_prefix)) + return None + return {"version": dirname[len(parentdir_prefix):], "full": ""} + + +def git_get_keywords(versionfile_abs): + # the code embedded in _version.py can just fetch the value of these + # keywords. When used from setup.py, we don't want to import _version.py, + # so we do it with a regexp instead. This function is not used from + # _version.py. + keywords = {} + try: + f = open(versionfile_abs, "r") + for line in f.readlines(): + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + f.close() + except EnvironmentError: + pass + return keywords + + +def git_versions_from_keywords(keywords, tag_prefix, verbose=False): + if not keywords: + return {} # keyword-finding function failed to find keywords + refnames = keywords["refnames"].strip() + if refnames.startswith("$Format"): + if verbose: + print("keywords are unexpanded, not using") + return {} # unexpanded, so not in an unpacked git-archive tarball + refs = set([r.strip() for r in refnames.strip("()").split(",")]) + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "master". + tags = set([r for r in refs if re.search(r'\d', r)]) + if verbose: + print("discarding '%s', no digits" % ",".join(refs-tags)) + if verbose: + print("likely tags: %s" % ",".join(sorted(tags))) + for ref in sorted(tags): + # sorting will prefer e.g. "2.0" over "2.0rc1" + if ref.startswith(tag_prefix): + r = ref[len(tag_prefix):] + if verbose: + print("picking %s" % r) + return {"version": r, + "full": keywords["full"].strip()} + # no suitable tags, so version is "0+unknown", but full hex is still there + if verbose: + print("no suitable tags, using unknown + full revision id") + return {"version": "0+unknown", + "full": keywords["full"].strip()} + + +def git_parse_vcs_describe(git_describe, tag_prefix, verbose=False): + # TAG-NUM-gHEX[-dirty] or HEX[-dirty] . TAG might have hyphens. + + # dirty + dirty = git_describe.endswith("-dirty") + if dirty: + git_describe = git_describe[:git_describe.rindex("-dirty")] + dirty_suffix = ".dirty" if dirty else "" + + # now we have TAG-NUM-gHEX or HEX + + if "-" not in git_describe: # just HEX + return "0+untagged.g"+git_describe+dirty_suffix, dirty + + # just TAG-NUM-gHEX + mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + if not mo: + # unparseable. Maybe git-describe is misbehaving? + return "0+unparseable"+dirty_suffix, dirty + + # tag + full_tag = mo.group(1) + if not full_tag.startswith(tag_prefix): + if verbose: + fmt = "tag '%s' doesn't start with prefix '%s'" + print(fmt % (full_tag, tag_prefix)) + return None, dirty + tag = full_tag[len(tag_prefix):] + + # distance: number of commits since tag + distance = int(mo.group(2)) + + # commit: short hex revision ID + commit = mo.group(3) + + # now build up version string, with post-release "local version + # identifier". Our goal: TAG[+NUM.gHEX[.dirty]] . Note that if you get a + # tagged build and then dirty it, you'll get TAG+0.gHEX.dirty . So you + # can always test version.endswith(".dirty"). + version = tag + if distance or dirty: + version += "+%d.g%s" % (distance, commit) + dirty_suffix + + return version, dirty + + +def git_versions_from_vcs(tag_prefix, root, verbose=False): + # this runs 'git' from the root of the source tree. This only gets called + # if the git-archive 'subst' keywords were *not* expanded, and + # _version.py hasn't already been rewritten with a short version string, + # meaning we're inside a checked out source tree. + + if not os.path.exists(os.path.join(root, ".git")): + if verbose: + print("no .git in %s" % root) + return {} # get_versions() will try next method + + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + # if there is a tag, this yields TAG-NUM-gHEX[-dirty] + # if there are no tags, this yields HEX[-dirty] (no NUM) + stdout = run_command(GITS, ["describe", "--tags", "--dirty", + "--always", "--long"], + cwd=root) + # --long was added in git-1.5.5 + if stdout is None: + return {} # try next method + version, dirty = git_parse_vcs_describe(stdout, tag_prefix, verbose) + + # build "full", which is FULLHEX[.dirty] + stdout = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) + if stdout is None: + return {} + full = stdout.strip() + if dirty: + full += ".dirty" + + return {"version": version, "full": full} + + +def get_versions(default={"version": "0+unknown", "full": ""}, verbose=False): + # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have + # __file__, we can work backwards from there to the root. Some + # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which + # case we can only use expanded keywords. + + keywords = {"refnames": git_refnames, "full": git_full} + ver = git_versions_from_keywords(keywords, tag_prefix, verbose) + if ver: + return ver + + try: + root = os.path.realpath(__file__) + # versionfile_source is the relative path from the top of the source + # tree (where the .git directory might live) to this file. Invert + # this to find the root from __file__. + for i in versionfile_source.split('/'): + root = os.path.dirname(root) + except NameError: + return default + + return (git_versions_from_vcs(tag_prefix, root, verbose) + or versions_from_parentdir(parentdir_prefix, root, verbose) + or default) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/capsulethunk.h b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/capsulethunk.h new file mode 100644 index 000000000..4bdf5b41f --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/capsulethunk.h @@ -0,0 +1,108 @@ +/** + + This is a modified version of capsulethunk.h for use in llvmpy + +**/ + +#ifndef __CAPSULETHUNK_H +#define __CAPSULETHUNK_H + +#if ( (PY_VERSION_HEX < 0x02070000) \ + || ((PY_VERSION_HEX >= 0x03000000) \ + && (PY_VERSION_HEX < 0x03010000)) ) + +//#define Assert(X) do_assert(!!(X), #X, __FILE__, __LINE__) +#define Assert(X) + +static +void do_assert(int cond, const char * msg, const char *file, unsigned line){ + if (!cond) { + fprintf(stderr, "Assertion failed %s:%d\n%s\n", file, line, msg); + exit(1); + } +} + +typedef void (*PyCapsule_Destructor)(PyObject *); + +struct FakePyCapsule_Desc { + const char *name; + void *context; + PyCapsule_Destructor dtor; + PyObject *parent; + + FakePyCapsule_Desc() : name(0), context(0), dtor(0) {} +}; + +static +FakePyCapsule_Desc* get_pycobj_desc(PyObject *p){ + void *desc = ((PyCObject*)p)->desc; + Assert(desc && "No desc in PyCObject"); + return static_cast(desc); +} + +static +void pycobject_pycapsule_dtor(void *p, void *desc){ + Assert(desc); + Assert(p); + FakePyCapsule_Desc *fpc_desc = static_cast(desc); + Assert(fpc_desc->parent); + Assert(PyCObject_Check(fpc_desc->parent)); + fpc_desc->dtor(static_cast(fpc_desc->parent)); + delete fpc_desc; +} + +static +PyObject* PyCapsule_New(void* ptr, const char *name, PyCapsule_Destructor dtor) +{ + FakePyCapsule_Desc *desc = new FakePyCapsule_Desc; + desc->name = name; + desc->context = NULL; + desc->dtor = dtor; + PyObject *p = PyCObject_FromVoidPtrAndDesc(ptr, desc, + pycobject_pycapsule_dtor); + desc->parent = p; + return p; +} + +static +int PyCapsule_CheckExact(PyObject *p) +{ + return PyCObject_Check(p); +} + +static +void* PyCapsule_GetPointer(PyObject *p, const char *name) +{ + Assert(PyCapsule_CheckExact(p)); + if (strcmp(get_pycobj_desc(p)->name, name) != 0) { + PyErr_SetString(PyExc_ValueError, "Invalid PyCapsule object"); + } + return PyCObject_AsVoidPtr(p); +} + +static +void* PyCapsule_GetContext(PyObject *p) +{ + Assert(p); + Assert(PyCapsule_CheckExact(p)); + return get_pycobj_desc(p)->context; +} + +static +int PyCapsule_SetContext(PyObject *p, void *context) +{ + Assert(PyCapsule_CheckExact(p)); + get_pycobj_desc(p)->context = context; + return 0; +} + +static +const char * PyCapsule_GetName(PyObject *p) +{ +// Assert(PyCapsule_CheckExact(p)); + return get_pycobj_desc(p)->name; +} + +#endif /* #if PY_VERSION_HEX < 0x02070000 */ + +#endif /* __CAPSULETHUNK_H */ diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cext/__init__.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cext/__init__.py new file mode 100644 index 000000000..0ff54a4d6 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cext/__init__.py @@ -0,0 +1,23 @@ +""" +Utilities for getting information about Numba C extensions +""" + +import os + + +def get_extension_libs(): + """Return the .c files in the `numba.cext` directory. + """ + libs = [] + base = get_path() + for fn in os.listdir(base): + if fn.endswith('.c'): + fn = os.path.join(base, fn) + libs.append(fn) + return libs + + +def get_path(): + """Returns the path to the directory for `numba.cext`. + """ + return os.path.abspath(os.path.join(os.path.dirname(__file__))) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cext/cext.h b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cext/cext.h new file mode 100644 index 000000000..88188dc85 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cext/cext.h @@ -0,0 +1,18 @@ +#ifndef NUMBA_EXTENSION_HELPER_H_ +#define NUMBA_EXTENSION_HELPER_H_ + +#include "Python.h" +#include "../_numba_common.h" + +/* Define all runtime-required symbols in this C module, but do not + export them outside the shared library if possible. */ +#define NUMBA_EXPORT_FUNC(_rettype) VISIBILITY_HIDDEN _rettype +#define NUMBA_EXPORT_DATA(_vartype) VISIBILITY_HIDDEN _vartype + +NUMBA_EXPORT_FUNC(Py_ssize_t) +aligned_size(Py_ssize_t sz); + +#include "dictobject.h" +#include "listobject.h" + +#endif // end NUMBA_EXTENSION_HELPER_H_ diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cext/dictobject.c b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cext/dictobject.c new file mode 100644 index 000000000..37db7593b --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cext/dictobject.c @@ -0,0 +1,1191 @@ +/* The following is adapted from CPython3.7. +The exact commit is: + +- https://github.com/python/cpython/blob/44467e8ea4cea390b0718702291b4cfe8ddd67ed/Objects/dictobject.c + + +*/ + +/* Dictionary object implementation using a hash table */ + +/* The distribution includes a separate file, Objects/dictnotes.txt, + describing explorations into dictionary design and optimization. + It covers typical dictionary use patterns, the parameters for + tuning dictionaries, and several ideas for possible optimizations. +*/ + +/* PyDictKeysObject + +This implements the dictionary's hashtable. + +As of Python 3.6, this is compact and ordered. Basic idea is described here: +* https://mail.python.org/pipermail/python-dev/2012-December/123028.html +* https://morepypy.blogspot.com/2015/01/faster-more-memory-efficient-and-more.html + +layout: + ++---------------+ +| dk_refcnt | +| dk_size | +| dk_lookup | +| dk_usable | +| dk_nentries | ++---------------+ +| dk_indices | +| | ++---------------+ +| dk_entries | +| | ++---------------+ + +dk_indices is actual hashtable. It holds index in entries, or DKIX_EMPTY(-1) +or DKIX_DUMMY(-2). +Size of indices is dk_size. Type of each index in indices is vary on dk_size: + +* int8 for dk_size <= 128 +* int16 for 256 <= dk_size <= 2**15 +* int32 for 2**16 <= dk_size <= 2**31 +* int64 for 2**32 <= dk_size + +dk_entries is array of PyDictKeyEntry. It's size is USABLE_FRACTION(dk_size). +DK_ENTRIES(dk) can be used to get pointer to entries. + +NOTE: Since negative value is used for DKIX_EMPTY and DKIX_DUMMY, type of +dk_indices entry is signed integer and int16 is used for table which +dk_size == 256. +*/ + + +/* +The DictObject can be in one of two forms. + +Either: + A combined table: + ma_values == NULL, dk_refcnt == 1. + Values are stored in the me_value field of the PyDictKeysObject. +Or: + + (Numba dev notes: split table logic is removed) + + A split table: + ma_values != NULL, dk_refcnt >= 1 + Values are stored in the ma_values array. + Only string (unicode) keys are allowed. + All dicts sharing same key must have same insertion order. + +There are four kinds of slots in the table (slot is index, and +DK_ENTRIES(keys)[index] if index >= 0): + +1. Unused. index == DKIX_EMPTY + Does not hold an active (key, value) pair now and never did. Unused can + transition to Active upon key insertion. This is each slot's initial state. + +2. Active. index >= 0, me_key != NULL and me_value != NULL + Holds an active (key, value) pair. Active can transition to Dummy or + Pending upon key deletion (for combined and split tables respectively). + This is the only case in which me_value != NULL. + +3. Dummy. index == DKIX_DUMMY (combined only) + Previously held an active (key, value) pair, but that was deleted and an + active pair has not yet overwritten the slot. Dummy can transition to + Active upon key insertion. Dummy slots cannot be made Unused again + else the probe sequence in case of collision would have no way to know + they were once active. + +4. Pending. index >= 0, key != NULL, and value == NULL (split only) + Not yet inserted in split-table. +*/ + +/* +Preserving insertion order + +It's simple for combined table. Since dk_entries is mostly append only, we can +get insertion order by just iterating dk_entries. + +One exception is .popitem(). It removes last item in dk_entries and decrement +dk_nentries to achieve amortized O(1). Since there are DKIX_DUMMY remains in +dk_indices, we can't increment dk_usable even though dk_nentries is +decremented. + +In split table, inserting into pending entry is allowed only for dk_entries[ix] +where ix == mp->ma_used. Inserting into other index and deleting item cause +converting the dict to the combined table. +*/ + + +/* D_MINSIZE (adapted from PyDict_MINSIZE) + * is the starting size for any new dict. + * 8 allows dicts with no more than 5 active entries; experiments suggested + * this suffices for the majority of dicts (consisting mostly of usually-small + * dicts created to pass keyword arguments). + * Making this 8, rather than 4 reduces the number of resizes for most + * dictionaries, without any significant extra memory use. + */ +#define D_MINSIZE 8 + +#include "dictobject.h" + + +#if defined(_MSC_VER) +# if _MSC_VER <= 1900 /* Visual Studio 2014 */ + typedef __int8 int8_t; + typedef __int16 int16_t; + typedef __int32 int32_t; + typedef __int64 int64_t; +# endif + /* Use _alloca() to dynamically allocate on the stack on MSVC */ + #define STACK_ALLOC(Type, Name, Size) Type * const Name = _alloca(Size); +#else + #define STACK_ALLOC(Type, Name, Size) Type Name[Size]; +#endif + + +/*[clinic input] +class dict "PyDictObject *" "&PyDict_Type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=f157a5a0ce9589d6]*/ + + +/* +To ensure the lookup algorithm terminates, there must be at least one Unused +slot (NULL key) in the table. +To avoid slowing down lookups on a near-full table, we resize the table when +it's USABLE_FRACTION (currently two-thirds) full. +*/ + +#define PERTURB_SHIFT 5 + +/* +Major subtleties ahead: Most hash schemes depend on having a "good" hash +function, in the sense of simulating randomness. Python doesn't: its most +important hash functions (for ints) are very regular in common +cases: + + >>>[hash(i) for i in range(4)] + [0, 1, 2, 3] + +This isn't necessarily bad! To the contrary, in a table of size 2**i, taking +the low-order i bits as the initial table index is extremely fast, and there +are no collisions at all for dicts indexed by a contiguous range of ints. So +this gives better-than-random behavior in common cases, and that's very +desirable. + +OTOH, when collisions occur, the tendency to fill contiguous slices of the +hash table makes a good collision resolution strategy crucial. Taking only +the last i bits of the hash code is also vulnerable: for example, consider +the list [i << 16 for i in range(20000)] as a set of keys. Since ints are +their own hash codes, and this fits in a dict of size 2**15, the last 15 bits + of every hash code are all 0: they *all* map to the same table index. + +But catering to unusual cases should not slow the usual ones, so we just take +the last i bits anyway. It's up to collision resolution to do the rest. If +we *usually* find the key we're looking for on the first try (and, it turns +out, we usually do -- the table load factor is kept under 2/3, so the odds +are solidly in our favor), then it makes best sense to keep the initial index +computation dirt cheap. + +The first half of collision resolution is to visit table indices via this +recurrence: + + j = ((5*j) + 1) mod 2**i + +For any initial j in range(2**i), repeating that 2**i times generates each +int in range(2**i) exactly once (see any text on random-number generation for +proof). By itself, this doesn't help much: like linear probing (setting +j += 1, or j -= 1, on each loop trip), it scans the table entries in a fixed +order. This would be bad, except that's not the only thing we do, and it's +actually *good* in the common cases where hash keys are consecutive. In an +example that's really too small to make this entirely clear, for a table of +size 2**3 the order of indices is: + + 0 -> 1 -> 6 -> 7 -> 4 -> 5 -> 2 -> 3 -> 0 [and here it's repeating] + +If two things come in at index 5, the first place we look after is index 2, +not 6, so if another comes in at index 6 the collision at 5 didn't hurt it. +Linear probing is deadly in this case because there the fixed probe order +is the *same* as the order consecutive keys are likely to arrive. But it's +extremely unlikely hash codes will follow a 5*j+1 recurrence by accident, +and certain that consecutive hash codes do not. + +The other half of the strategy is to get the other bits of the hash code +into play. This is done by initializing a (unsigned) vrbl "perturb" to the +full hash code, and changing the recurrence to: + + perturb >>= PERTURB_SHIFT; + j = (5*j) + 1 + perturb; + use j % 2**i as the next table index; + +Now the probe sequence depends (eventually) on every bit in the hash code, +and the pseudo-scrambling property of recurring on 5*j+1 is more valuable, +because it quickly magnifies small differences in the bits that didn't affect +the initial index. Note that because perturb is unsigned, if the recurrence +is executed often enough perturb eventually becomes and remains 0. At that +point (very rarely reached) the recurrence is on (just) 5*j+1 again, and +that's certain to find an empty slot eventually (since it generates every int +in range(2**i), and we make sure there's always at least one empty slot). + +Selecting a good value for PERTURB_SHIFT is a balancing act. You want it +small so that the high bits of the hash code continue to affect the probe +sequence across iterations; but you want it large so that in really bad cases +the high-order hash bits have an effect on early iterations. 5 was "the +best" in minimizing total collisions across experiments Tim Peters ran (on +both normal and pathological cases), but 4 and 6 weren't significantly worse. + +Historical: Reimer Behrends contributed the idea of using a polynomial-based +approach, using repeated multiplication by x in GF(2**n) where an irreducible +polynomial for each table size was chosen such that x was a primitive root. +Christian Tismer later extended that to use division by x instead, as an +efficient way to get the high bits of the hash code into play. This scheme +also gave excellent collision statistics, but was more expensive: two +if-tests were required inside the loop; computing "the next" index took about +the same number of operations but without as much potential parallelism +(e.g., computing 5*j can go on at the same time as computing 1+perturb in the +above, and then shifting perturb can be done while the table index is being +masked); and the PyDictObject struct required a member to hold the table's +polynomial. In Tim's experiments the current scheme ran faster, produced +equally good collision statistics, needed less code & used less memory. + +*/ + +#define DKIX_EMPTY (-1) +#define DKIX_DUMMY (-2) /* Used internally */ +#define DKIX_ERROR (-3) + +typedef enum { + OK = 0, + OK_REPLACED = 1, + ERR_NO_MEMORY = -1, + ERR_DICT_MUTATED = -2, + ERR_ITER_EXHAUSTED = -3, + ERR_DICT_EMPTY = -4, + ERR_CMP_FAILED = -5, +} Status; + + +#ifndef NDEBUG +static +int mem_cmp_zeros(void *obj, size_t n){ + int diff = 0; + char *mem = obj; + char *it; + for (it = mem; it < mem + n; ++it) { + if (*it != 0) diff += 1; + } + return diff; +} +#endif + +#define D_MASK(dk) ((dk)->size-1) +#define D_GROWTH_RATE(d) ((d)->used*3) + +static int +ix_size(Py_ssize_t size) { + if ( size < 0xff ) return 1; + if ( size < 0xffff ) return 2; + if ( size < 0xffffffff ) return 4; + return sizeof(int64_t); +} + +#ifndef NDEBUG +/* NOTE: This function is only used in assert()s */ +/* Align pointer *ptr* to pointer size */ +static void* +aligned_pointer(void *ptr) { + return (void*)aligned_size((size_t)ptr); +} +#endif + +/* lookup indices. returns DKIX_EMPTY, DKIX_DUMMY, or ix >=0 */ +static Py_ssize_t +get_index(NB_DictKeys *dk, Py_ssize_t i) +{ + Py_ssize_t s = dk->size; + Py_ssize_t ix; + + if (s <= 0xff) { + int8_t *indices = (int8_t*)(dk->indices); + assert (i < dk->size); + ix = indices[i]; + } + else if (s <= 0xffff) { + int16_t *indices = (int16_t*)(dk->indices); + ix = indices[i]; + } +#if SIZEOF_VOID_P > 4 + else if (s > 0xffffffff) { + int64_t *indices = (int64_t*)(dk->indices); + ix = indices[i]; + } +#endif + else { + int32_t *indices = (int32_t*)(dk->indices); + ix = indices[i]; + } + assert(ix >= DKIX_DUMMY); + return ix; +} + +/* write to indices. */ +static void +set_index(NB_DictKeys *dk, Py_ssize_t i, Py_ssize_t ix) +{ + Py_ssize_t s = dk->size; + + assert(ix >= DKIX_DUMMY); + + if (s <= 0xff) { + int8_t *indices = (int8_t*)(dk->indices); + assert(ix <= 0x7f); + indices[i] = (char)ix; + } + else if (s <= 0xffff) { + int16_t *indices = (int16_t*)(dk->indices); + assert(ix <= 0x7fff); + indices[i] = (int16_t)ix; + } +#if SIZEOF_VOID_P > 4 + else if (s > 0xffffffff) { + int64_t *indices = (int64_t*)(dk->indices); + indices[i] = ix; + } +#endif + else { + int32_t *indices = (int32_t*)(dk->indices); + assert(ix <= 0x7fffffff); + indices[i] = (int32_t)ix; + } +} + + +/* USABLE_FRACTION is the maximum dictionary load. + * Increasing this ratio makes dictionaries more dense resulting in more + * collisions. Decreasing it improves sparseness at the expense of spreading + * indices over more cache lines and at the cost of total memory consumed. + * + * USABLE_FRACTION must obey the following: + * (0 < USABLE_FRACTION(n) < n) for all n >= 2 + * + * USABLE_FRACTION should be quick to calculate. + * Fractions around 1/2 to 2/3 seem to work well in practice. + */ +#define USABLE_FRACTION(n) (((n) << 1)/3) + +/* Alternative fraction that is otherwise close enough to 2n/3 to make + * little difference. 8 * 2/3 == 8 * 5/8 == 5. 16 * 2/3 == 16 * 5/8 == 10. + * 32 * 2/3 = 21, 32 * 5/8 = 20. + * Its advantage is that it is faster to compute on machines with slow division. + * #define USABLE_FRACTION(n) (((n) >> 1) + ((n) >> 2) - ((n) >> 3)) + */ + +/* GROWTH_RATE. Growth rate upon hitting maximum load. + * Currently set to used*3. + * This means that dicts double in size when growing without deletions, + * but have more head room when the number of deletions is on a par with the + * number of insertions. See also bpo-17563 and bpo-33205. + * + * GROWTH_RATE was set to used*4 up to version 3.2. + * GROWTH_RATE was set to used*2 in version 3.3.0 + * GROWTH_RATE was set to used*2 + capacity/2 in 3.4.0-3.6.0. + */ +#define GROWTH_RATE(d) ((d)->ma_used*3) + + +static NB_DictEntry* +get_entry(NB_DictKeys *dk, Py_ssize_t idx) { + Py_ssize_t offset; + char *ptr; + + assert (idx < dk->size); + offset = idx * dk->entry_size; + ptr = dk->indices + dk->entry_offset + offset; + return (NB_DictEntry*)ptr; +} + +static void +zero_key(NB_DictKeys *dk, char *data){ + memset(data, 0, dk->key_size); +} + +static void +zero_val(NB_DictKeys *dk, char *data){ + memset(data, 0, dk->val_size); +} + +static void +copy_key(NB_DictKeys *dk, char *dst, const char *src){ + memcpy(dst, src, dk->key_size); +} + +static void +copy_val(NB_DictKeys *dk, char *dst, const char *src){ + memcpy(dst, src, dk->val_size); +} + +/* Returns -1 for error; 0 for not equal; 1 for equal */ +static int +key_equal(NB_DictKeys *dk, const char *lhs, const char *rhs) { + if ( dk->methods.key_equal ) { + return dk->methods.key_equal(lhs, rhs); + } else { + return memcmp(lhs, rhs, dk->key_size) == 0; + } +} + +static char * +entry_get_key(NB_DictKeys *dk, NB_DictEntry* entry) { + char * out = entry->keyvalue; + assert (out == aligned_pointer(out)); + return out; +} + +static char * +entry_get_val(NB_DictKeys *dk, NB_DictEntry* entry) { + char * out = entry_get_key(dk, entry) + aligned_size(dk->key_size); + assert (out == aligned_pointer(out)); + return out; +} + +static void +dk_incref_key(NB_DictKeys *dk, const char *key) { + if ( dk->methods.key_incref ) { + dk->methods.key_incref(key); + } +} + +static void +dk_decref_key(NB_DictKeys *dk, const char *key) { + if ( dk->methods.key_decref ) { + dk->methods.key_decref(key); + } +} + +static void +dk_incref_val(NB_DictKeys *dk, const char *val) { + if ( dk->methods.value_incref ) { + dk->methods.value_incref(val); + } +} + +static void +dk_decref_val(NB_DictKeys *dk, const char *val) { + if ( dk->methods.value_decref ) { + dk->methods.value_decref(val); + } +} + + +void +numba_dictkeys_free(NB_DictKeys *dk) { + /* Clear all references from the entries */ + Py_ssize_t i; + NB_DictEntry *ep; + + for (i = 0; i < dk->nentries; i++) { + ep = get_entry(dk, i); + if (ep->hash != DKIX_EMPTY) { + dk_decref_key(dk, entry_get_key(dk, ep)); + dk_decref_val(dk, entry_get_val(dk, ep)); + } + } + /* Deallocate */ + free(dk); +} + +void +numba_dict_free(NB_Dict *d) { + numba_dictkeys_free(d->keys); + free(d); +} + +Py_ssize_t +numba_dict_length(NB_Dict *d) { + return d->used; +} + +/* Allocate new dictionary keys + +Adapted from CPython's new_keys_object(). +*/ +int +numba_dictkeys_new(NB_DictKeys **out, Py_ssize_t size, Py_ssize_t key_size, Py_ssize_t val_size) { + Py_ssize_t usable = USABLE_FRACTION(size); + Py_ssize_t index_size = ix_size(size); + Py_ssize_t entry_size = aligned_size(sizeof(NB_DictEntry) + aligned_size(key_size) + aligned_size(val_size)); + Py_ssize_t entry_offset = aligned_size(index_size * size); + Py_ssize_t alloc_size = sizeof(NB_DictKeys) + entry_offset + entry_size * usable; + + NB_DictKeys *dk = malloc(aligned_size(alloc_size)); + if (!dk) return ERR_NO_MEMORY; + + assert ( size >= D_MINSIZE ); + + dk->size = size; + dk->usable = usable; + dk->nentries = 0; + dk->key_size = key_size; + dk->val_size = val_size; + dk->entry_offset = entry_offset; + dk->entry_size = entry_size; + + assert (aligned_pointer(dk->indices) == dk->indices ); + /* Ensure that the method table is all nulls */ + memset(&dk->methods, 0x00, sizeof(type_based_methods_table)); + /* Ensure hash is (-1) for empty entry */ + memset(dk->indices, 0xff, entry_offset + entry_size * usable); + + *out = dk; + return OK; +} + + +/* Allocate new dictionary */ +int +numba_dict_new(NB_Dict **out, Py_ssize_t size, Py_ssize_t key_size, Py_ssize_t val_size) { + NB_DictKeys* dk; + NB_Dict *d; + int status = numba_dictkeys_new(&dk, size, key_size, val_size); + if (status != OK) return status; + + d = malloc(sizeof(NB_Dict)); + if (!d) { + numba_dictkeys_free(dk); + return ERR_NO_MEMORY; + } + + d->used = 0; + d->keys = dk; + *out = d; + return OK; +} + +/* +Adapted from CPython lookdict_index(). + +Search index of hash table from offset of entry table +*/ +static Py_ssize_t +lookdict_index(NB_DictKeys *dk, Py_hash_t hash, Py_ssize_t index) +{ + size_t mask = D_MASK(dk); + size_t perturb = (size_t)hash; + size_t i = (size_t)hash & mask; + + for (;;) { + Py_ssize_t ix = get_index(dk, i); + if (ix == index) { + return i; + } + if (ix == DKIX_EMPTY) { + return DKIX_EMPTY; + } + perturb >>= PERTURB_SHIFT; + i = mask & (i*5 + perturb + 1); + } + assert(0 && "unreachable"); +} + +/* + +Adapted from the CPython3.7 lookdict(). + +The basic lookup function used by all operations. +This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4. +Open addressing is preferred over chaining since the link overhead for +chaining would be substantial (100% with typical malloc overhead). + +The initial probe index is computed as hash mod the table size. Subsequent +probe indices are computed as explained earlier. + +All arithmetic on hash should ignore overflow. + +The details in this version are due to Tim Peters, building on many past +contributions by Reimer Behrends, Jyrki Alakuijala, Vladimir Marangozov and +Christian Tismer. + +lookdict() is general-purpose, and may return DKIX_ERROR if (and only if) a +comparison raises an exception. +lookdict_unicode() below is specialized to string keys, comparison of which can +never raise an exception; that function can never return DKIX_ERROR when key +is string. Otherwise, it falls back to lookdict(). +lookdict_unicode_nodummy is further specialized for string keys that cannot be +the value. +For both, when the key isn't found a DKIX_EMPTY is returned. +*/ +Py_ssize_t +numba_dict_lookup(NB_Dict *d, const char *key_bytes, Py_hash_t hash, char *oldval_bytes) +{ + NB_DictKeys *dk = d->keys; + size_t mask = D_MASK(dk); + size_t perturb = hash; + size_t i = (size_t)hash & mask; + + for (;;) { + Py_ssize_t ix = get_index(dk, i); + if (ix == DKIX_EMPTY) { + zero_val(dk, oldval_bytes); + return ix; + } + if (ix >= 0) { + NB_DictEntry *ep = get_entry(dk, ix); + const char *startkey = NULL; + if (ep->hash == hash) { + int cmp; + + startkey = entry_get_key(dk, ep); + cmp = key_equal(dk, startkey, key_bytes); + if (cmp < 0) { + // error'ed in comparison + memset(oldval_bytes, 0, dk->val_size); + return DKIX_ERROR; + } + if (cmp > 0) { + // key is equal; retrieve the value. + copy_val(dk, oldval_bytes, entry_get_val(dk, ep)); + return ix; + } + } + } + perturb >>= PERTURB_SHIFT; + i = (i*5 + perturb + 1) & mask; + } + assert(0 && "unreachable"); +} + + +/* Internal function to find slot for an item from its hash + when it is known that the key is not present in the dict. + + The dict must be combined. */ +static Py_ssize_t +find_empty_slot(NB_DictKeys *dk, Py_hash_t hash){ + size_t mask; + size_t i; + Py_ssize_t ix; + size_t perturb; + + assert(dk != NULL); + + mask = D_MASK(dk); + i = hash & mask; + ix = get_index(dk, i); + for (perturb = hash; ix >= 0;) { + perturb >>= PERTURB_SHIFT; + i = (i*5 + perturb + 1) & mask; + ix = get_index(dk, i); + } + return i; +} + +static int +insertion_resize(NB_Dict *d) +{ + return numba_dict_resize(d, D_GROWTH_RATE(d)); +} + +int +numba_dict_insert( + NB_Dict *d, + const char *key_bytes, + Py_hash_t hash, + const char *val_bytes, + char *oldval_bytes + ) +{ + + NB_DictKeys *dk = d->keys; + + Py_ssize_t ix = numba_dict_lookup(d, key_bytes, hash, oldval_bytes); + if (ix == DKIX_ERROR) { + // exception in key comparison in lookup. + return ERR_CMP_FAILED; + } + + if (ix == DKIX_EMPTY) { + /* Insert into new slot */ + Py_ssize_t hashpos; + NB_DictEntry *ep; + + if (dk->usable <= 0) { + /* Need to resize */ + if (insertion_resize(d) != OK) + return ERR_NO_MEMORY; + else + dk = d->keys; // reload + } + hashpos = find_empty_slot(dk, hash); + ep = get_entry(dk, dk->nentries); + set_index(dk, hashpos, dk->nentries); + copy_key(dk, entry_get_key(dk, ep), key_bytes); + assert ( hash != -1 ); + ep->hash = hash; + copy_val(dk, entry_get_val(dk, ep), val_bytes); + + /* incref */ + dk_incref_key(dk, key_bytes); + dk_incref_val(dk, val_bytes); + + d->used += 1; + dk->usable -= 1; + dk->nentries += 1; + assert (dk->usable >= 0); + return OK; + } else { + /* Replace existing value in the slot at ix */ + /* decref old value */ + dk_decref_val(dk, oldval_bytes); + // Replace the previous value + copy_val(dk, entry_get_val(dk, get_entry(dk, ix)), val_bytes); + + /* incref */ + dk_incref_val(dk, val_bytes); + return OK_REPLACED; + } +} + +/* +Adapted from build_indices(). +Internal routine used by dictresize() to build a hashtable of entries. +*/ +void +build_indices(NB_DictKeys *keys, Py_ssize_t n) { + size_t mask = (size_t)D_MASK(keys); + Py_ssize_t ix; + for (ix = 0; ix != n; ix++) { + size_t perturb; + Py_hash_t hash = get_entry(keys, ix)->hash; + size_t i = hash & mask; + for (perturb = hash; get_index(keys, i) != DKIX_EMPTY;) { + perturb >>= PERTURB_SHIFT; + i = mask & (i*5 + perturb + 1); + } + set_index(keys, i, ix); + } +} + +/* + +Adapted from CPython dictresize(). + +Restructure the table by allocating a new table and reinserting all +items again. When entries have been deleted, the new table may +actually be smaller than the old one. +If a table is split (its keys and hashes are shared, its values are not), +then the values are temporarily copied into the table, it is resized as +a combined table, then the me_value slots in the old table are NULLed out. +After resizing a table is always combined, +but can be resplit by make_keys_shared(). +*/ +int +numba_dict_resize(NB_Dict *d, Py_ssize_t minsize) { + Py_ssize_t newsize, numentries; + NB_DictKeys *oldkeys; + int status; + + /* Find the smallest table size > minused. */ + for (newsize = D_MINSIZE; + newsize < minsize && newsize > 0; + newsize <<= 1) + ; + if (newsize <= 0) { + return ERR_NO_MEMORY; + } + oldkeys = d->keys; + + /* NOTE: Current odict checks mp->ma_keys to detect resize happen. + * So we can't reuse oldkeys even if oldkeys->dk_size == newsize. + * TODO: Try reusing oldkeys when reimplement odict. + */ + + /* Allocate a new table. */ + status = numba_dictkeys_new( + &d->keys, newsize, oldkeys->key_size, oldkeys->val_size + ); + if (status != OK) { + d->keys = oldkeys; + return status; + } + // New table must be large enough. + assert(d->keys->usable >= d->used); + // Copy method table + memcpy(&d->keys->methods, &oldkeys->methods, sizeof(type_based_methods_table)); + + numentries = d->used; + + if (oldkeys->nentries == numentries) { + NB_DictEntry *oldentries, *newentries; + + oldentries = get_entry(oldkeys, 0); + newentries = get_entry(d->keys, 0); + memcpy(newentries, oldentries, numentries * oldkeys->entry_size); + // to avoid decref + memset(oldentries, 0xff, numentries * oldkeys->entry_size); + } + else { + Py_ssize_t i; + size_t epi = 0; + for (i=0; ihash == (-1) hash means it is empty + + Here, we skip until a non empty entry is encountered. + */ + while( get_entry(oldkeys, epi)->hash == DKIX_EMPTY ) { + assert( mem_cmp_zeros(entry_get_val(oldkeys, get_entry(oldkeys, epi)), oldkeys->val_size) == 0 ); + epi += 1; + } + memcpy( + get_entry(d->keys, i), + get_entry(oldkeys, epi), + oldkeys->entry_size + ); + get_entry(oldkeys, epi)->hash = DKIX_EMPTY; // to avoid decref + epi += 1; + + } + + } + numba_dictkeys_free(oldkeys); + + build_indices(d->keys, numentries); + d->keys->usable -= numentries; + d->keys->nentries = numentries; + return OK; +} + +/* + Adapted from CPython delitem_common + */ +int +numba_dict_delitem(NB_Dict *d, Py_hash_t hash, Py_ssize_t ix) +{ + Py_ssize_t hashpos; + NB_DictEntry *ep; + NB_DictKeys *dk = d->keys; + + hashpos = lookdict_index(dk, hash, ix); + assert(hashpos >= 0); + + d->used -= 1; + ep = get_entry(dk, ix); + set_index(dk, hashpos, DKIX_DUMMY); + + /* decref */ + dk_decref_key(dk, entry_get_key(dk, ep)); + dk_decref_val(dk, entry_get_val(dk, ep)); + + /* zero the entries */ + zero_key(dk, entry_get_key(dk, ep)); + zero_val(dk, entry_get_val(dk, ep)); + ep->hash = DKIX_EMPTY; // to mark it as empty; + + return OK; +} + + +/** + * Adapted from dict_popitem + * + */ +int +numba_dict_popitem(NB_Dict *d, char *key_bytes, char *val_bytes) +{ + Py_ssize_t i, j; + char *key_ptr, *val_ptr; + NB_DictEntry *ep = NULL; + + if (d->used == 0) { + return ERR_DICT_EMPTY; + } + + /* Pop last item */ + i = d->keys->nentries - 1; + while (i >= 0 && (ep = get_entry(d->keys, i))->hash == DKIX_EMPTY ) { + i--; + } + assert(i >= 0); + + j = lookdict_index(d->keys, ep->hash, i); + assert(j >= 0); + assert(get_index(d->keys, j) == i); + set_index(d->keys, j, DKIX_DUMMY); + + key_ptr = entry_get_key(d->keys, ep); + val_ptr = entry_get_val(d->keys, ep); + + copy_key(d->keys, key_bytes, key_ptr); + copy_val(d->keys, val_bytes, val_ptr); + + zero_key(d->keys, key_ptr); + zero_val(d->keys, val_ptr); + + /* We can't dk_usable++ since there is DKIX_DUMMY in indices */ + d->keys->nentries = i; + d->used--; + + return OK; +} + +void +numba_dict_dump(NB_Dict *d) { + long long i, j, k; + long long size, n; + char *cp; + NB_DictEntry *ep; + NB_DictKeys *dk = d->keys; + + n = d->used; + size = dk->nentries; + + printf("Dict dump\n"); + printf(" key_size = %lld\n", (long long)d->keys->key_size); + printf(" val_size = %lld\n", (long long)d->keys->val_size); + + for (i = 0, j = 0; i < size; i++) { + ep = get_entry(dk, i); + if (ep->hash != DKIX_EMPTY) { + long long hash = ep->hash; + printf(" key="); + for (cp=entry_get_key(dk, ep), k=0; k < d->keys->key_size; ++k, ++cp){ + printf("%02x ", ((int)*cp) & 0xff); + } + printf(" hash=%llu value=", hash); + for (cp=entry_get_val(dk, ep), k=0; k < d->keys->val_size; ++k, ++cp){ + printf("%02x ", ((int)*cp) & 0xff); + } + printf("\n"); + j++; + } + } + printf("j = %lld; n = %lld\n", j, n); + assert(j == n); +} + +size_t +numba_dict_iter_sizeof() { + return sizeof(NB_DictIter); +} + +void +numba_dict_iter(NB_DictIter *it, NB_Dict *d) { + it->parent = d; + it->parent_keys = d->keys; + it->size = d->used; + it->pos = 0; +} + +int +numba_dict_iter_next(NB_DictIter *it, const char **key_ptr, const char **val_ptr) { + /* Detect dictionary mutation during iteration */ + NB_DictKeys *dk; + if (it->parent->keys != it->parent_keys || + it->parent->used != it->size) { + return ERR_DICT_MUTATED; + } + dk = it->parent_keys; + while ( it->pos < dk->nentries ) { + NB_DictEntry *ep = get_entry(dk, it->pos++); + if ( ep->hash != DKIX_EMPTY ) { + *key_ptr = entry_get_key(dk, ep); + *val_ptr = entry_get_val(dk, ep); + return OK; + } + } + return ERR_ITER_EXHAUSTED; +} + +int +numba_dict_insert_ez( + NB_Dict *d, + const char *key_bytes, + Py_hash_t hash, + const char *val_bytes + ) +{ + STACK_ALLOC(char, old, d->keys->val_size); + return numba_dict_insert(d, key_bytes, hash, val_bytes, old); +} + +int +numba_dict_new_minsize(NB_Dict **out, Py_ssize_t key_size, Py_ssize_t val_size) +{ + return numba_dict_new(out, D_MINSIZE, key_size, val_size); +} + +void +numba_dict_set_method_table(NB_Dict *d, type_based_methods_table *methods) +{ + memcpy(&d->keys->methods, methods, sizeof(type_based_methods_table)); +} + + +#define CHECK(CASE) { \ + if ( !(CASE) ) { \ + printf("'%s' failed file %s:%d\n", #CASE, __FILE__, __LINE__); \ + return 1; \ + } \ +} + +int +numba_test_dict(void) { + NB_Dict *d; + int status; + Py_ssize_t ix; + Py_ssize_t usable; + Py_ssize_t it_count; + const char *it_key, *it_val; + NB_DictIter iter; + +#if defined(_MSC_VER) + /* So that VS2008 compiler is happy */ + char *got_key, *got_value; + got_key = _alloca(4); + got_value = _alloca(8); +#else + char got_key[4]; + char got_value[8]; +#endif + puts("test_dict"); + + status = numba_dict_new(&d, D_MINSIZE, 4, 8); + CHECK(status == OK); + CHECK(d->keys->size == D_MINSIZE); + CHECK(d->keys->key_size == 4); + CHECK(d->keys->val_size == 8); + CHECK(ix_size(d->keys->size) == 1); + printf("aligned_size(index_size * size) = %d\n", (int)(aligned_size(ix_size(d->keys->size) * d->keys->size))); + + printf("d %p\n", d); + printf("d->usable = %u\n", (int)d->keys->usable); + usable = d->keys->usable; + printf("d[0] %d\n", (int)((char*)get_entry(d->keys, 0) - (char*)d->keys)); + CHECK ((char*)get_entry(d->keys, 0) - (char*)d->keys->indices == d->keys->entry_offset); + printf("d[1] %d\n", (int)((char*)get_entry(d->keys, 1) - (char*)d->keys)); + CHECK ((char*)get_entry(d->keys, 1) - (char*)d->keys->indices == d->keys->entry_offset + d->keys->entry_size); + + ix = numba_dict_lookup(d, "bef", 0xbeef, got_value); + printf("ix = %d\n", (int)ix); + CHECK (ix == DKIX_EMPTY); + + // insert 1st key + status = numba_dict_insert(d, "bef", 0xbeef, "1234567", got_value); + CHECK (status == OK); + CHECK (d->used == 1); + CHECK (d->keys->usable == usable - d->used); + + // insert same key + status = numba_dict_insert(d, "bef", 0xbeef, "1234567", got_value); + CHECK (status == OK_REPLACED); + printf("got_value %s\n", got_value); + CHECK (d->used == 1); + CHECK (d->keys->usable == usable - d->used); + + // insert 2nd key + status = numba_dict_insert(d, "beg", 0xbeef, "1234568", got_value); + CHECK (status == OK); + CHECK (d->used == 2); + CHECK (d->keys->usable == usable - d->used); + + // insert 3rd key + status = numba_dict_insert(d, "beh", 0xcafe, "1234569", got_value); + CHECK (status == OK); + CHECK (d->used == 3); + CHECK (d->keys->usable == usable - d->used); + + // replace key "bef"'s value + status = numba_dict_insert(d, "bef", 0xbeef, "7654321", got_value); + CHECK (status == OK_REPLACED); + CHECK (d->used == 3); + CHECK (d->keys->usable == usable - d->used); + + // insert 4th key + status = numba_dict_insert(d, "bei", 0xcafe, "0_0_0_1", got_value); + CHECK (status == OK); + CHECK (d->used == 4); + CHECK (d->keys->usable == usable - d->used); + + // insert 5th key + status = numba_dict_insert(d, "bej", 0xcafe, "0_0_0_2", got_value); + CHECK (status == OK); + CHECK (d->used == 5); + CHECK (d->keys->usable == usable - d->used); + + // insert 6th key & triggers resize + status = numba_dict_insert(d, "bek", 0xcafe, "0_0_0_3", got_value); + CHECK (status == OK); + CHECK (d->used == 6); + CHECK (d->keys->usable == USABLE_FRACTION(d->keys->size) - d->used); + + // Dump + numba_dict_dump(d); + + // Make sure everything are still in there + ix = numba_dict_lookup(d, "bef", 0xbeef, got_value); + CHECK (ix >= 0); + CHECK (memcpy(got_value, "7654321", d->keys->val_size)); + + ix = numba_dict_lookup(d, "beg", 0xbeef, got_value); + CHECK (ix >= 0); + CHECK (memcpy(got_value, "1234567", d->keys->val_size)); + + ix = numba_dict_lookup(d, "beh", 0xcafe, got_value); + printf("ix = %d\n", (int)ix); + CHECK (ix >= 0); + CHECK (memcpy(got_value, "1234569", d->keys->val_size)); + + ix = numba_dict_lookup(d, "bei", 0xcafe, got_value); + CHECK (ix >= 0); + CHECK (memcpy(got_value, "0_0_0_1", d->keys->val_size)); + + ix = numba_dict_lookup(d, "bej", 0xcafe, got_value); + CHECK (ix >= 0); + CHECK (memcpy(got_value, "0_0_0_2", d->keys->val_size)); + + ix = numba_dict_lookup(d, "bek", 0xcafe, got_value); + CHECK (ix >= 0); + CHECK (memcpy(got_value, "0_0_0_3", d->keys->val_size)); + + // Test delete + ix = numba_dict_lookup(d, "beg", 0xbeef, got_value); + status = numba_dict_delitem(d, 0xbeef, ix); + CHECK (status == OK); + + ix = numba_dict_lookup(d, "beg", 0xbeef, got_value); + CHECK (ix == DKIX_EMPTY); // not found + + ix = numba_dict_lookup(d, "bef", 0xbeef, got_value); + CHECK (ix >= 0); + ix = numba_dict_lookup(d, "beh", 0xcafe, got_value); + CHECK (ix >= 0); + + + // Test popitem + // They are always the last item + status = numba_dict_popitem(d, got_key, got_value); + CHECK(status == OK); + CHECK(memcmp("bek", got_key, d->keys->key_size) == 0); + CHECK(memcmp("0_0_0_3", got_value, d->keys->val_size) == 0); + + status = numba_dict_popitem(d, got_key, got_value); + CHECK(status == OK); + CHECK(memcmp("bej", got_key, d->keys->key_size) == 0); + CHECK(memcmp("0_0_0_2", got_value, d->keys->val_size) == 0); + + // Test iterator + CHECK( d->used > 0 ); + numba_dict_iter(&iter, d); + it_count = 0; + while ( (status = numba_dict_iter_next(&iter, &it_key, &it_val)) == OK) { + it_count += 1; // valid items + CHECK(it_key != NULL); + CHECK(it_val != NULL); + } + + CHECK(status == ERR_ITER_EXHAUSTED); + CHECK(d->used == it_count); + + numba_dict_free(d); + return 0; + +} + +#undef CHECK diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cext/dictobject.h b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cext/dictobject.h new file mode 100644 index 000000000..74b555a02 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cext/dictobject.h @@ -0,0 +1,222 @@ +/* Adapted from CPython3.7 Objects/dict-common.h */ +#include "Python.h" +#include "../_pymodule.h" +#include "cext.h" + +#ifndef NUMBA_DICT_COMMON_H +#define NUMBA_DICT_COMMON_H + +typedef struct { + /* Uses Py_ssize_t instead of Py_hash_t to guarantee word size alignment */ + Py_ssize_t hash; + char keyvalue[]; +} NB_DictEntry; + + +typedef int (*dict_key_comparator_t)(const char *lhs, const char *rhs); +typedef void (*dict_refcount_op_t)(const void*); + + +typedef struct { + dict_key_comparator_t key_equal; + dict_refcount_op_t key_incref; + dict_refcount_op_t key_decref; + dict_refcount_op_t value_incref; + dict_refcount_op_t value_decref; +} type_based_methods_table; + + +typedef struct { + /* hash table size */ + Py_ssize_t size; + /* Usable size of the hash table. + Also, size of the entries */ + Py_ssize_t usable; + /* hash table used entries */ + Py_ssize_t nentries; + /* Entry info + - key_size is the sizeof key type + - val_size is the sizeof value type + - entry_size is key_size + val_size + alignment + */ + Py_ssize_t key_size, val_size, entry_size; + /* Byte offset from indices to the first entry. */ + Py_ssize_t entry_offset; + + /* Method table for type-dependent operations. */ + type_based_methods_table methods; + + /* hash table */ + char indices[]; +} NB_DictKeys; + + +typedef struct { + /* num of elements in the hashtable */ + Py_ssize_t used; + NB_DictKeys *keys; +} NB_Dict; + + +typedef struct { + /* parent dictionary */ + NB_Dict *parent; + /* parent keys object */ + NB_DictKeys *parent_keys; + /* dict size */ + Py_ssize_t size; + /* iterator position; indicates the next position to read */ + Py_ssize_t pos; +} NB_DictIter; + + + +/* A test function for the dict +Returns 0 for OK; 1 for failure. +*/ +NUMBA_EXPORT_FUNC(int) +numba_test_dict(void); + +/* Allocate a new dict +Parameters +- NB_Dict **out + Output for the new dictionary. +- Py_ssize_t size + Hashtable size. Must be power of two. +- Py_ssize_t key_size + Size of a key entry. +- Py_ssize_t val_size + Size of a value entry. +*/ +NUMBA_EXPORT_FUNC(int) +numba_dict_new(NB_Dict **out, Py_ssize_t size, Py_ssize_t key_size, Py_ssize_t val_size); + +/* Free a dict */ +NUMBA_EXPORT_FUNC(void) +numba_dict_free(NB_Dict *d); + +/* Returns length of a dict */ +NUMBA_EXPORT_FUNC(Py_ssize_t) +numba_dict_length(NB_Dict *d); + +/* Allocates a new dict at the minimal size +See numba_dict_new(). +*/ +NUMBA_EXPORT_FUNC(int) +numba_dict_new_minsize(NB_Dict **out, Py_ssize_t key_size, Py_ssize_t val_size); + +/* Set the method table for type specific operations +*/ +NUMBA_EXPORT_FUNC(void) +numba_dict_set_method_table(NB_Dict *d, type_based_methods_table *methods); + +/* Lookup a key + +Parameters +- NB_Dict *d + The dictionary object. +- const char *key_bytes + The key as a byte buffer. +- Py_hash_t hash + The precomputed hash of the key. +- char *oldval_bytes + An output parameter to store the associated value if the key is found. + Must point to memory of sufficient size to store the value. +*/ +NUMBA_EXPORT_FUNC(Py_ssize_t) +numba_dict_lookup(NB_Dict *d, const char *key_bytes, Py_hash_t hash, char *oldval_bytes); + +/* Resize the dict to at least *minsize*. +*/ +NUMBA_EXPORT_FUNC(int) +numba_dict_resize(NB_Dict *d, Py_ssize_t minsize); + +/* Insert to the dict + +Parameters +- NB_Dict *d + The dictionary object. +- const char *key_bytes + The key as a byte buffer. +- Py_hash_t hash + The precomputed hash of key. +- const char *val_bytes + The value as a byte buffer. +- char *oldval_bytes + An output buffer to store the replaced value. + Must point to memory of sufficient size to store the value. + +Returns +- < 0 for error +- 0 for ok +- 1 for ok and oldval_bytes has a copy of the replaced value. +*/ +NUMBA_EXPORT_FUNC(int) +numba_dict_insert(NB_Dict *d, const char *key_bytes, Py_hash_t hash, const char *val_bytes, char *oldval_bytes); + +/* Same as numba_dict_insert() but oldval_bytes is not needed */ +NUMBA_EXPORT_FUNC(int) +numba_dict_insert_ez(NB_Dict *d, const char *key_bytes, Py_hash_t hash, const char *val_bytes); + +/* Delete an entry from the dict +Parameters +- NB_Dict *d + The dictionary +- Py_hash_t hash + Precomputed hash of the key to be deleted +- Py_ssize_t ix + Precomputed entry index of the key to be deleted. + Usually results of numba_dict_lookup(). +*/ +NUMBA_EXPORT_FUNC(int) +numba_dict_delitem(NB_Dict *d, Py_hash_t hash, Py_ssize_t ix); + +/* Remove an item from the dict +Parameters +- NB_Dict *d + The dictionary +- char *key_bytes + Output. The key as a byte buffer +- char *val_bytes + Output. The value as a byte buffer +*/ +NUMBA_EXPORT_FUNC(int) +numba_dict_popitem(NB_Dict *d, char *key_bytes, char *val_bytes); + +/* Returns the sizeof a dictionary iterator +*/ +NUMBA_EXPORT_FUNC(size_t) +numba_dict_iter_sizeof(void); + +/* Fill a NB_DictIter for a dictionary to begin iteration +Parameters +- NB_DictIter *it + Output. Must points to memory of size at least `numba_dict_iter_sizeof()`. +- NB_Dict *d + The dictionary to be iterated. +*/ +NUMBA_EXPORT_FUNC(void) +numba_dict_iter(NB_DictIter *it, NB_Dict *d); + +/* Advance the iterator +Parameters +- NB_DictIter *it + The iterator +- const char **key_ptr + Output pointer for the key. Points to data in the dictionary. +- const char **val_ptr + Output pointer for the key. Points to data in the dictionary. + +Returns +- 0 for success; valid key_ptr and val_ptr +- ERR_ITER_EXHAUSTED for end of iterator. +- ERR_DICT_MUTATED for detected dictionary mutation. +*/ +NUMBA_EXPORT_FUNC(int) +numba_dict_iter_next(NB_DictIter *it, const char **key_ptr, const char **val_ptr); + + +NUMBA_EXPORT_FUNC(void) +numba_dict_dump(NB_Dict *); + +#endif diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cext/listobject.c b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cext/listobject.c new file mode 100644 index 000000000..9de03f5ff --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cext/listobject.c @@ -0,0 +1,977 @@ +#include "listobject.h" + +/* This implements the C component of the Numba typed list. It is loosely + * inspired by the list implementation of the cpython list with some parts + * taken from the cpython slice implementation. The exact commit-id of the + * relevant files are: + * + * https://github.com/python/cpython/blob/51ddab8dae056867f3595ab3400bffc93f67c8d4/Objects/listobject.c + * https://github.com/python/cpython/blob/51ddab8dae056867f3595ab3400bffc93f67c8d4/Objects/sliceobject.c + * + * Algorithmically, this list is very similar to the cpython implementation so + * it should have the same performance (Big-O) characteristics for accessing, + * adding and removing elements/items. Specifically, it implements the same + * algorithms for list overallocation and growth. However, it never deals with + * PyObject types and instead must be typed with a type-size. As a result, the + * typed-list is type homogeneous and in contrast to the cpython version can + * not store a mixture of arbitrarily typed objects. Reference counting via the + * Numba Runtime (NRT) is supported and incrementing and decrementing functions + * are store as part of the struct and can be setup from the compiler level. + * + * Importantly, only a very limited subset of the cpython c functions have been + * ported over and the rest have been implemented (in Python) at the compiler + * level using the c functions provided. Additionally, initialization of, and + * iteration over, a ListIter is provided + * + * The following functions are implemented for the list: + * + * - Check valid index valid_index + * - Creation numba_list_new + * - Deletion numba_list_free + * - Accessing the length numba_list_length + * - Appending to the list numba_list_append + * - Getting an item numba_list_setitem + * - Setting an item numba_list_getitem + * - Resizing the list numba_list_resize + * - Deleting an item numba_list_delitem + * - Deleting a slice numba_list_delete_slice + * + * As you can see, only a single function for slices is implemented. The rest + * is all done entirely at the compiler level which then calls the c functions + * to mutate the list accordingly. Since slicing allows for replace, insert and + * delete operations over multiple items, we can simply implement those using + * the basic functions above. + * + * The following additional functions are implemented for the list, these are + * needed to make the list work within Numba. + * + * - Accessing the allocation numba_list_allocated + * - Copying an item copy_item + * - Calling incref on item list_incref_item + * - Calling decref on item list_decref_item + * - Set method table numba_list_set_method_table + * + * The following functions are implemented for the iterator: + * + * - Size of the iterator numba_list_iter_size + * - Initialization of iter numba_list_iter + * - Get next item from iter numba_list_iter_next + * + * Two methods are provided to query and set the 'is_mutable': + * + * - Query numba_list_is_mutable + * - Set numba_list_set_is_mutable + * + * Lastly a set of pure C level tests are provided which come in handy when + * needing to use valgrind and friends. + * + */ + + +/* Return status for the list functions. + */ +typedef enum { + LIST_OK = 0, + LIST_ERR_INDEX = -1, + LIST_ERR_NO_MEMORY = -2, + LIST_ERR_MUTATED = -3, + LIST_ERR_ITER_EXHAUSTED = -4, + LIST_ERR_IMMUTABLE = -5, +} ListStatus; + +/* Copy an item from a list. + * + * lp: a list + * dst: destination pointer + * src: source pointer + */ +static void +copy_item(NB_List *lp, char *dst, const char *src){ + memcpy(dst, src, lp->item_size); +} + +/* Increment a reference to an item in a list. + * + * lp: a list + * item: the item to increment the reference for + */ +static void +list_incref_item(NB_List *lp, const char *item){ + if (lp->methods.item_incref) { + lp->methods.item_incref(item); + } +} + +/* Decrement a reference to an item in a list. + * + * lp: a list + * item: the item to decrement the reference for + */ +static void +list_decref_item(NB_List *lp, const char *item){ + if (lp->methods.item_decref) { + lp->methods.item_decref(item); + } +} + +/* Setup the method table for a list. + * + * This function is used from the compiler level to initialize the internal + * method table. + * + * lp: a list + * methods: the methods table to set up + */ +void +numba_list_set_method_table(NB_List *lp, list_type_based_methods_table *methods) +{ + memcpy(&lp->methods, methods, sizeof(list_type_based_methods_table)); +} + +/* Check if a list index is valid. + * + * i: the index to check + * limit: the size of a list + * + * Adapted from CPython's valid_index(). + * + * FIXME: need to find a way to inline this, even for Python 2.7 on Windows + */ +static int +valid_index(Py_ssize_t i, Py_ssize_t limit){ + /* The cast to size_t lets us use just a single comparison + to check whether i is in the range: 0 <= i < limit. + + See: Section 14.2 "Bounds Checking" in the Agner Fog + optimization manual found at: + https://www.agner.org/optimize/optimizing_cpp.pdf + */ + return (size_t) i < (size_t) limit; +} + +/* Initialize a new list. + * + * out: pointer to hold an initialized list + * item_size: the size in bytes of the items in the list + * allocated: preallocation of the list in items + * + * This will allocate sufficient memory to hold the list structure and any + * items if requested (allocated != 0). See _listobject.h for more information + * on the NB_List struct. + */ +int +numba_list_new(NB_List **out, Py_ssize_t item_size, Py_ssize_t allocated){ + NB_List *lp; + char *items; + // allocate memory to hold the struct + lp = malloc(aligned_size(sizeof(NB_List))); + if (lp == NULL) { + return LIST_ERR_NO_MEMORY; + } + // set up members + lp->size = 0; + lp->item_size = item_size; + lp->allocated = allocated; + lp->is_mutable = 1; + // set method table to zero */ + memset(&lp->methods, 0x00, sizeof(list_type_based_methods_table)); + // allocate memory to hold items, if requested + if (allocated != 0) { + items = malloc(aligned_size(lp->item_size * allocated)); + // allocated was definitely not zero, if malloc returns NULL + // this is definitely an error + if (items == NULL) { + // free previously allocated struct to avoid leaking memory + free(lp); + return LIST_ERR_NO_MEMORY; + } + lp->items = items; + } + else { + // be explicit + lp->items = NULL; + } + *out = lp; + return LIST_OK; +} + +/* Free the memory associated with a list. + * + * lp: a list + */ +void +numba_list_free(NB_List *lp) { + // decref all items, if needed + Py_ssize_t i; + if (lp->methods.item_decref) { + for (i = 0; i < lp->size; i++) { + char *item = lp->items + lp->item_size * i; + list_decref_item(lp, item); + } + } + // free items and list + if (lp->items != NULL) { + free(lp->items); + } + free(lp); +} + +/* Return the base pointer of the list items. + */ +char * +numba_list_base_ptr(NB_List *lp) +{ + return lp->items; +} + +/* Return the address of the list size. + */ +Py_ssize_t +numba_list_size_address(NB_List *lp) +{ + return (Py_ssize_t)&lp->size; +} + + +/* Return the length of a list. + * + * lp: a list + */ +Py_ssize_t +numba_list_length(NB_List *lp) { + return lp->size; +} + +/* Return the current allocation of a list. + * + * lp: a list + */ +Py_ssize_t +numba_list_allocated(NB_List *lp) { + return lp->allocated; +} + +/* Return the mutability status of the list + * + * lp: a list + * + */ +int +numba_list_is_mutable(NB_List *lp){ + return lp->is_mutable; +} + +/* Set the is_mutable attribute + * + * lp: a list + * is_mutable: an int, 0(False) or 1(True) + * + */ +void +numba_list_set_is_mutable(NB_List *lp, int is_mutable){ + lp->is_mutable = is_mutable; +} + +/* Set an item in a list. + * + * lp: a list + * index: the index of the item to set (must be in range 0 <= index < len(list)) + * item: the item to set + * + * This assume there is already an element at the given index that will be + * overwritten and thereby have its reference decremented. DO NOT use this to + * write to an unassigned location. + */ +int +numba_list_setitem(NB_List *lp, Py_ssize_t index, const char *item) { + char *loc; + // check for mutability + if (!lp->is_mutable) { + return LIST_ERR_IMMUTABLE; + } + // check index is valid + // FIXME: this can be (and probably is) checked at the compiler level + if (!valid_index(index, lp->size)) { + return LIST_ERR_INDEX; + } + // set item at desired location + loc = lp->items + lp-> item_size * index; + list_decref_item(lp, loc); + copy_item(lp, loc, item); + list_incref_item(lp, loc); + return LIST_OK; +} + +/* Get an item from a list. + * + * lp: a list + * index: the index of the item to get (must be in range 0 <= index < len(list)) + * out: a pointer to hold the item + */ +int +numba_list_getitem(NB_List *lp, Py_ssize_t index, char *out) { + char *loc; + // check index is valid + // FIXME: this can be (and probably is) checked at the compiler level + if (!valid_index(index, lp->size)) { + return LIST_ERR_INDEX; + } + // get item at desired location + loc = lp->items + lp->item_size * index; + copy_item(lp, out, loc); + return LIST_OK; +} + +/* Append an item to the end of a list. + * + * lp: a list + * item: the item to append. + */ +int +numba_list_append(NB_List *lp, const char *item) { + char *loc; + // check for mutability + if (!lp->is_mutable) { + return LIST_ERR_IMMUTABLE; + } + // resize by one, will change list size + int result = numba_list_resize(lp, lp->size + 1); + if(result < LIST_OK) { + return result; + } + // insert item at index: original size before resize + loc = lp->items + lp->item_size * (lp->size - 1); + copy_item(lp, loc, item); + list_incref_item(lp, loc); + return LIST_OK; +} + +/* Resize a list. + * + * lp: a list + * newsize: the desired new size of the list. + * + * This will increase or decrease the size of the list, including reallocating + * the required memory and increasing the total allocation (additional free + * space to hold new items). + * + * + * Adapted from CPython's list_resize(). + * + * Ensure lp->items has room for at least newsize elements, and set + * lp->size to newsize. If newsize > lp->size on entry, the content + * of the new slots at exit is undefined heap trash; it's the caller's + * responsibility to overwrite them with sane values. + * The number of allocated elements may grow, shrink, or stay the same. + * Failure is impossible if newsize <= lp->allocated on entry, although + * that partly relies on an assumption that the system realloc() never + * fails when passed a number of bytes <= the number of bytes last + * allocated (the C standard doesn't guarantee this, but it's hard to + * imagine a realloc implementation where it wouldn't be true). + * Note that lp->items may change, and even if newsize is less + * than lp->size on entry. + */ +int +numba_list_resize(NB_List *lp, Py_ssize_t newsize) { + char * items; + // check for mutability + if (!lp->is_mutable) { + return LIST_ERR_IMMUTABLE; + } + size_t new_allocated, num_allocated_bytes; + /* Bypass realloc() when a previous overallocation is large enough + to accommodate the newsize. If the newsize falls lower than half + the allocated size, then proceed with the realloc() to shrink the list. + */ + if (lp->allocated >= newsize && newsize >= (lp->allocated >> 1)) { + assert(lp->items != NULL || newsize == 0); + lp->size = newsize; + return LIST_OK; + } + /* This over-allocates proportional to the list size, making room + * for additional growth. The over-allocation is mild, but is + * enough to give linear-time amortized behavior over a long + * sequence of appends() in the presence of a poorly-performing + * system realloc(). + * The growth pattern is: 0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ... + * Note: new_allocated won't overflow because the largest possible value + * is PY_SSIZE_T_MAX * (9 / 8) + 6 which always fits in a size_t. + */ + new_allocated = (size_t)newsize + (newsize >> 3) + (newsize < 9 ? 3 : 6); + if (new_allocated > (size_t)PY_SSIZE_T_MAX / lp->item_size) { + return LIST_ERR_NO_MEMORY; + } + + if (newsize == 0) + new_allocated = 0; + num_allocated_bytes = new_allocated * lp->item_size; + items = realloc(lp->items, aligned_size(num_allocated_bytes)); + // realloc may return NULL if requested size is 0 + if (num_allocated_bytes != 0 && items == NULL) { + return LIST_ERR_NO_MEMORY; + } + lp->items = items; + lp->size = newsize; + lp->allocated = (Py_ssize_t)new_allocated; + return LIST_OK; +} + +/* Delete a single item. + * + * lp: a list + * index: the index of the item to delete + * (must be in range 0 <= index < len(list)) + * + * */ +int +numba_list_delitem(NB_List *lp, Py_ssize_t index) { + int result; + char *loc, *new_loc; + Py_ssize_t leftover_bytes; + // check for mutability + if (!lp->is_mutable) { + return LIST_ERR_IMMUTABLE; + } + // check index is valid + // FIXME: this can be (and probably is) checked at the compiler level + if (!valid_index(index, lp->size)) { + return LIST_ERR_INDEX; + } + // obtain item and decref if needed + loc = lp->items + lp->item_size * index; + list_decref_item(lp, loc); + if (index != lp->size - 1) { + // delitem from somewhere other than the end, incur the memory copy + leftover_bytes = (lp->size - 1 - index) * lp->item_size; + new_loc = lp->items + (lp->item_size * (index + 1)); + // use memmove instead of memcpy since we may be dealing with + // overlapping regions of memory and the behaviour of memcpy is + // undefined in such situation (C99). + memmove(loc, new_loc, leftover_bytes); + } + // finally, shrink list by one + result = numba_list_resize(lp, lp->size - 1); + if(result < LIST_OK) { + // Since we are decreasing the size, this should never happen + return result; + } + return LIST_OK; + +} + +/* Delete a slice + * + * start: the start index of ths slice + * stop: the stop index of the slice (not included) + * step: the step to take + * + * This function assumes that the start and stop were clipped appropriately. + * I.e. if step > 0 start >= 0 and stop <= len(l) and + * if step < 0 start <= length and stop >= -1 + * step != 0 and no Python negative indexing allowed. + * + * This code was copied and edited from the relevant section in + * list_ass_subscript from the cpython implementation, see the top of this file + * for the exact source + */ +int +numba_list_delete_slice(NB_List *lp, + Py_ssize_t start, Py_ssize_t stop, Py_ssize_t step) { + int result, i, slicelength, new_length; + char *loc, *new_loc; + Py_ssize_t leftover_bytes, cur, lim; + // check for mutability + if (!lp->is_mutable) { + return LIST_ERR_IMMUTABLE; + } + // calculate the slicelength, taken from PySlice_AdjustIndices, see the top + // of this file for the exact source + if (step > 0) { + slicelength = start < stop ? (stop - start - 1) / step + 1 : 0; + } else { + slicelength = stop < start ? (start - stop - 1) / -step + 1 : 0; + } + if (slicelength <= 0){ + return LIST_OK; + } + new_length = lp->size - slicelength; + // reverse step and indices + if (step < 0) { + stop = start + 1; + start = stop + step * (slicelength - 1) - 1; + step = -step; + } + if (step == 1) { + // decref if needed + if (lp->methods.item_decref) { + for (i = start ; i < stop ; i++){ + loc = lp->items + lp->item_size * i; + lp->methods.item_decref(loc); + } + } + // memmove items into place + leftover_bytes = (lp->size - stop) * lp->item_size; + loc = lp->items + lp->item_size * start; + new_loc = lp->items + lp->item_size * stop; + memmove(loc, new_loc, leftover_bytes); + } + else { // step != 1 + /* drawing pictures might help understand these for + * loops. Basically, we memmove the parts of the + * list that are *not* part of the slice: step-1 + * items for each item that is part of the slice, + * and then tail end of the list that was not + * covered by the slice + * + * */ + for (cur = start, // index of item to be deleted + i = 0; // counter of total items deleted so far + cur < stop; + cur += step, + i++) { + lim = step - 1; // number of leftover items after deletion of item + // clip limit, in case we are at the end of the slice, and there + // are now less than step-1 items to be moved + if (cur + step >= lp->size) { + lim = lp->size - cur - 1; + } + // decref item being removed + loc = lp->items + lp->item_size * cur; + list_decref_item(lp, loc); + /* memmove the aforementioned step-1 (or less) items + * dst : index of deleted item minus total deleted sofar + * src : index of deleted item plus one (next item) + */ + memmove(lp->items + lp->item_size * (cur - i), + lp->items + lp->item_size * (cur + 1), + lim * lp->item_size); + } + // memmove tail of the list + cur = start + slicelength * step; + if (cur < lp->size) { + memmove(lp->items + lp->item_size * (cur - slicelength), + lp->items + lp->item_size * cur, + (lp->size - cur) * lp->item_size); + } + } + // resize to correct size + result = numba_list_resize(lp, new_length); + if(result < LIST_OK) { + // Since we are decreasing the size, this should never happen + return result; + } + return LIST_OK; +} + + +/* Return the size of the list iterator (NB_ListIter) struct. + */ +size_t +numba_list_iter_sizeof() { + return sizeof(NB_ListIter); +} + +/* Initialize a list iterator (NB_ListIter). + * + * it: an iterator + * lp: a list to iterate over + */ +void +numba_list_iter(NB_ListIter *it, NB_List *lp) { + // set members of iterator + it->parent = lp; + it->size = lp->size; + it->pos = 0; +} + +/* Obtain the next item from a list iterator. + * + * it: an iterator + * item_ptr: pointer to hold the next item + */ +int +numba_list_iter_next(NB_ListIter *it, const char **item_ptr) { + NB_List *lp; + lp = it->parent; + /* FIXME: Detect list mutation during iteration */ + if (lp->size != it->size) { + return LIST_ERR_MUTATED; + } + // get next element + if (it->pos < lp->size) { + *item_ptr = lp->items + lp->item_size * it->pos++; + return LIST_OK; + }else{ + return LIST_ERR_ITER_EXHAUSTED; + } +} + + +#define CHECK(CASE) { \ + if ( !(CASE) ) { \ + printf("'%s' failed file %s:%d\n", #CASE, __FILE__, __LINE__); \ + return -1; \ + } \ +} + +/* Basic C based tests for the list. + */ +int +numba_test_list(void) { + NB_List *lp = NULL; + int status, i; + Py_ssize_t it_count; + const char *it_item = NULL; + NB_ListIter iter; + char got_item[4] = "\x00\x00\x00\x00"; + const char *test_items_1 = NULL, *test_items_2 = NULL; + char *test_items_3 = NULL; + puts("test_list"); + + + status = numba_list_new(&lp, 4, 0); + CHECK(status == LIST_OK); + CHECK(lp->item_size == 4); + CHECK(lp->size == 0); + CHECK(lp->allocated == 0); + CHECK(lp->is_mutable == 1); + + // flip and check the is_mutable bit + CHECK(numba_list_is_mutable(lp) == 1); + numba_list_set_is_mutable(lp, 0); + CHECK(numba_list_is_mutable(lp) == 0); + numba_list_set_is_mutable(lp, 1); + CHECK(numba_list_is_mutable(lp) == 1); + + // append 1st item, this will cause a realloc + status = numba_list_append(lp, "abc"); + CHECK(status == LIST_OK); + CHECK(lp->size == 1); + CHECK(lp->allocated == 4); + status = numba_list_getitem(lp, 0, got_item); + CHECK(status == LIST_OK); + CHECK(memcmp(got_item, "abc", 4) == 0); + + // append 2nd item + status = numba_list_append(lp, "def"); + CHECK(status == LIST_OK); + CHECK(lp->size == 2); + CHECK(lp->allocated == 4); + status = numba_list_getitem(lp, 1, got_item); + CHECK(status == LIST_OK); + CHECK(memcmp(got_item, "def", 4) == 0); + + // append 3rd item + status = numba_list_append(lp, "ghi"); + CHECK(status == LIST_OK); + CHECK(lp->size == 3); + CHECK(lp->allocated == 4); + status = numba_list_getitem(lp, 2, got_item); + CHECK(status == LIST_OK); + CHECK(memcmp(got_item, "ghi", 4) == 0); + + // append 4th item + status = numba_list_append(lp, "jkl"); + CHECK(status == LIST_OK); + CHECK(lp->size == 4); + CHECK(lp->allocated == 4); + status = numba_list_getitem(lp, 3, got_item); + CHECK(status == LIST_OK); + CHECK(memcmp(got_item, "jkl", 4) == 0); + + // append 5th item, this will cause another realloc + status = numba_list_append(lp, "mno"); + CHECK(status == LIST_OK); + CHECK(lp->size == 5); + CHECK(lp->allocated == 8); + status = numba_list_getitem(lp, 4, got_item); + CHECK(status == LIST_OK); + CHECK(memcmp(got_item, "mno", 4) == 0); + + // overwrite 1st item + status = numba_list_setitem(lp, 0, "pqr"); + CHECK(status == LIST_OK); + CHECK(lp->size == 5); + CHECK(lp->allocated == 8); + status = numba_list_getitem(lp, 0, got_item); + CHECK(status == LIST_OK); + CHECK(memcmp(got_item, "pqr", 4) == 0); + + // get and del 1st item, check item shift + status = numba_list_getitem(lp, 0, got_item); + status = numba_list_delitem(lp, 0); + CHECK(status == LIST_OK); + CHECK(lp->size == 4); + CHECK(lp->allocated == 8); + CHECK(memcmp(got_item, "pqr", 4) == 0); + CHECK(memcmp(lp->items, "def\x00ghi\x00jkl\x00mno\x00", 16) == 0); + + // get and del last (4th) item, no shift since only last item affected + status = numba_list_getitem(lp, 3, got_item); + status = numba_list_delitem(lp, 3); + CHECK(status == LIST_OK); + CHECK(lp->size == 3); + CHECK(lp->allocated == 6); // this also shrinks the allocation + CHECK(memcmp(got_item, "mno", 4) == 0); + CHECK(memcmp(lp->items, "def\x00ghi\x00jkl\x00", 12) == 0); + + // flip and check the is_mutable member + CHECK(numba_list_is_mutable(lp) == 1); + numba_list_set_is_mutable(lp, 0); + CHECK(numba_list_is_mutable(lp) == 0); + + // ensure that any attempts to mutate an immutable list fail + CHECK(numba_list_setitem(lp, 0, "zzz") == LIST_ERR_IMMUTABLE); + CHECK(numba_list_append(lp, "zzz") == LIST_ERR_IMMUTABLE); + CHECK(numba_list_delitem(lp, 0) == LIST_ERR_IMMUTABLE); + CHECK(numba_list_resize(lp, 23) == LIST_ERR_IMMUTABLE); + CHECK(numba_list_delete_slice(lp, 0, 3, 1) == LIST_ERR_IMMUTABLE); + + // ensure that all attempts to query/read from and immutable list succeed + CHECK(numba_list_length(lp) == 3); + status = numba_list_getitem(lp, 0, got_item); + CHECK(status == LIST_OK); + CHECK(memcmp(got_item, "def", 4) == 0); + + // flip the is_mutable member back and check + numba_list_set_is_mutable(lp, 1); + CHECK(numba_list_is_mutable(lp) == 1); + + // test iterator + CHECK(lp->size > 0); + numba_list_iter(&iter, lp); + it_count = 0; + CHECK(iter.parent == lp); + CHECK(iter.pos == it_count); + + // current contents of list + test_items_1 = "def\x00ghi\x00jkl\x00"; + while ( (status = numba_list_iter_next(&iter, &it_item)) == LIST_OK) { + it_count += 1; + CHECK(iter.pos == it_count); // check iterator position + CHECK(it_item != NULL); // quick check item is non-null + // go fishing in test_items_1 + CHECK(memcmp((const char *)test_items_1 + ((it_count - 1) * 4), it_item, 4) == 0); + } + + CHECK(status == LIST_ERR_ITER_EXHAUSTED); + CHECK(lp->size == it_count); + + // free existing list + numba_list_free(lp); + + // test growth upon append and shrink during delitem + status = numba_list_new(&lp, 1, 0); + CHECK(status == LIST_OK); + CHECK(lp->item_size == 1); + CHECK(lp->size == 0); + CHECK(lp->allocated == 0); + + // first, grow the list + // Use exactly 17 elements, should go through the allocation pattern: + // 0, 4, 8, 16, 25 + for (i = 0; i < 17 ; i++) { + switch(i) { + // Check the allocation before + case 0: CHECK(lp->allocated == 0); break; + case 4: CHECK(lp->allocated == 4); break; + case 8: CHECK(lp->allocated == 8); break; + case 16: CHECK(lp->allocated == 16); break; + } + status = numba_list_append(lp, (const char*)&i); + CHECK(status == LIST_OK); + switch(i) { + // Check that the growth happened accordingly + case 0: CHECK(lp->allocated == 4); break; + case 4: CHECK(lp->allocated == 8); break; + case 8: CHECK(lp->allocated == 16); break; + case 16: CHECK(lp->allocated == 25); break; + } + } + CHECK(lp->size == 17); + + // Check current contents of list + test_items_2 = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"; + CHECK(memcmp(lp->items, test_items_2, 17) == 0); + + // Now, delete them again and check that list shrinks + for (i = 17; i > 0 ; i--) { + switch(i) { + // Check the allocation before delitem + case 17: CHECK(lp->allocated == 25); break; + case 12: CHECK(lp->allocated == 25); break; + case 9: CHECK(lp->allocated == 18); break; + case 6: CHECK(lp->allocated == 12); break; + case 4: CHECK(lp->allocated == 8); break; + case 3: CHECK(lp->allocated == 6); break; + case 2: CHECK(lp->allocated == 5); break; + case 1: CHECK(lp->allocated == 4); break; + } + status = numba_list_getitem(lp, i-1, got_item); + status = numba_list_delitem(lp, i-1); + CHECK(status == LIST_OK); + switch(i) { + // Check that the shrink happened accordingly + case 17: CHECK(lp->allocated == 25); break; + case 12: CHECK(lp->allocated == 18); break; + case 9: CHECK(lp->allocated == 12); break; + case 6: CHECK(lp->allocated == 8); break; + case 4: CHECK(lp->allocated == 6); break; + case 3: CHECK(lp->allocated == 5); break; + case 2: CHECK(lp->allocated == 4); break; + case 1: CHECK(lp->allocated == 0); break; + } + } + // free existing list + numba_list_free(lp); + + + // Setup list for testing delete_slice + status = numba_list_new(&lp, 1, 0); + CHECK(status == LIST_OK); + CHECK(lp->item_size == 1); + CHECK(lp->size == 0); + CHECK(lp->allocated == 0); + for (i = 0; i < 17 ; i++) { + status = numba_list_append(lp, (const char*)&i); + CHECK(status == LIST_OK); + } + CHECK(lp->size == 17); + test_items_3 = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"; + CHECK(memcmp(lp->items, test_items_3, 17) == 0); + + // delete multiple elements from the middle + status = numba_list_delete_slice(lp, 2, 5, 1); + CHECK(status == LIST_OK); + CHECK(lp->size == 14); + test_items_3 = "\x00\x01\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"; + CHECK(memcmp(lp->items, test_items_3, 14) == 0); + + // delete single element from start + status = numba_list_delete_slice(lp, 0, 1, 1); + CHECK(status == LIST_OK); + CHECK(lp->size == 13); + test_items_3 = "\x01\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"; + CHECK(memcmp(lp->items, test_items_3, 13) == 0); + + // delete single element from end + status = numba_list_delete_slice(lp, 12, 13, 1); + CHECK(status == LIST_OK); + CHECK(lp->size == 12); + test_items_3 = "\x01\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"; + CHECK(memcmp(lp->items, test_items_3, 12) == 0); + + // delete single element from middle + status = numba_list_delete_slice(lp, 4, 5, 1); + CHECK(status == LIST_OK); + CHECK(lp->size == 11); + test_items_3 = "\x01\x05\x06\x07\x09\x0a\x0b\x0c\x0d\x0e\x0f"; + CHECK(memcmp(lp->items, test_items_3, 11) == 0); + + // delete all elements except first and last + status = numba_list_delete_slice(lp, 1, 10, 1); + CHECK(status == LIST_OK); + CHECK(lp->size == 2); + test_items_3 = "\x01\x0f"; + CHECK(memcmp(lp->items, test_items_3, 2) == 0); + + // delete all remaining elements + status = numba_list_delete_slice(lp, 0, lp->size, 1); + CHECK(status == LIST_OK); + CHECK(lp->size == 0); + test_items_3 = ""; + CHECK(memcmp(lp->items, test_items_3, 0) == 0); + + // free existing list + numba_list_free(lp); + + // Setup list for testing delete_slice with non unary step + status = numba_list_new(&lp, 1, 0); + CHECK(status == LIST_OK); + CHECK(lp->item_size == 1); + CHECK(lp->size == 0); + CHECK(lp->allocated == 0); + for (i = 0; i < 17 ; i++) { + status = numba_list_append(lp, (const char*)&i); + CHECK(status == LIST_OK); + } + CHECK(lp->size == 17); + + // delete all items with odd index + status = numba_list_delete_slice(lp, 0, 17, 2); + CHECK(status == LIST_OK); + CHECK(lp->size == 8); + test_items_3 = "\x01\x03\x05\x07\x09\x0b\x0d\x0f"; + CHECK(memcmp(lp->items, test_items_3, 8) == 0); + + // delete with a step of 4, starting at index 1 + status = numba_list_delete_slice(lp, 1, 8, 4); + CHECK(status == LIST_OK); + CHECK(lp->size == 6); + test_items_3 = "\x01\x05\x07\x09\x0d\x0f"; + CHECK(memcmp(lp->items, test_items_3, 6) == 0); + + // delete with a step of 2, but finish before end of list + status = numba_list_delete_slice(lp, 0, 4, 2); + CHECK(status == LIST_OK); + CHECK(lp->size == 4); + test_items_3 = "\x05\x09\x0d\x0f"; + CHECK(memcmp(lp->items, test_items_3, 4) == 0); + + // no-op on empty slice + status = numba_list_delete_slice(lp, 0, 0, 1); + CHECK(status == LIST_OK); + CHECK(lp->size == 4); + test_items_3 = "\x05\x09\x0d\x0f"; + CHECK(memcmp(lp->items, test_items_3, 4) == 0); + + // no-op on empty slice, non-zero index + status = numba_list_delete_slice(lp, 2, 2, 1); + CHECK(status == LIST_OK); + CHECK(lp->size == 4); + test_items_3 = "\x05\x09\x0d\x0f"; + CHECK(memcmp(lp->items, test_items_3, 4) == 0); + + // free list and return 0 + numba_list_free(lp); + + // Setup list for testing delete_slice with negative step + status = numba_list_new(&lp, 1, 0); + CHECK(status == LIST_OK); + CHECK(lp->item_size == 1); + CHECK(lp->size == 0); + CHECK(lp->allocated == 0); + for (i = 0; i < 17 ; i++) { + status = numba_list_append(lp, (const char*)&i); + CHECK(status == LIST_OK); + } + CHECK(lp->size == 17); + + // delete all items using unary negative slice + status = numba_list_delete_slice(lp, 16, -1, -1); + CHECK(status == LIST_OK); + CHECK(lp->size == 0); + + // refill list + for (i = 0; i < 17 ; i++) { + status = numba_list_append(lp, (const char*)&i); + CHECK(status == LIST_OK); + } + + // delete all items using unary negative slice + // need to start at index of last item (16) and + // go beyond first item, i.e. -1 in Cd + status = numba_list_delete_slice(lp, 16, -1, -2); + CHECK(status == LIST_OK); + CHECK(lp->size == 8); + test_items_3 = "\x01\x03\x05\x07\x09\x0b\x0d\x0f"; + CHECK(memcmp(lp->items, test_items_3, 8) == 0); + + // free list and return 0 + numba_list_free(lp); + return 0; + + +} + +#undef CHECK diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cext/listobject.h b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cext/listobject.h new file mode 100644 index 000000000..848ffe640 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cext/listobject.h @@ -0,0 +1,135 @@ +/* Adapted from CPython3.7 Include/listobject.h + * + * The exact commit-id of the relevant file is: + * + * https://github.com/python/cpython/blob/51ddab8dae056867f3595ab3400bffc93f67c8d4/Include/listobject.h + * + * + * */ + +#ifndef NUMBA_LIST_H +#define NUMBA_LIST_H + +#include "Python.h" +#include "cext.h" + +typedef void (*list_refcount_op_t)(const void*); + +typedef struct { + list_refcount_op_t item_incref; + list_refcount_op_t item_decref; +} list_type_based_methods_table; + +/* This is the struct for the Numba typed list. It is largely inspired by the + * CPython list struct in listobject.h. In essence the list is a homogeneously + * typed container that can grow and shrink upon insertion and deletion. This + * means that appending an item to, or removing an item from, the end of the + * list, this will have a O(1) amortized runtime. This matches the + * behaviour of the CPython list type and it will grow with the same + * increments. + * + * 'items' contains space for 'allocated' elements. The number + * currently in use is 'size'. The size in bytes of the items stored in the + * list is given by 'item_size'. + * + * Invariants: + * 0 <= size <= allocated + * len(list) == size + * item == NULL implies size == allocated == 0 + * + * FIXME: list.sort() temporarily sets allocated to -1 to detect mutations. + * + * Items must normally not be NULL, except during construction when + * the list is not yet visible outside the function that builds it. + * + * Additionally, this list has boolean member 'is_mutable' that can be used to + * set a list as immutable. Two functions to query and set this member are + * provided. Any attempt to mutate an immutable list will result in a status + * of LIST_ERR_IMMUTABLE. + * + */ +typedef struct { + /* size of the list in items */ + Py_ssize_t size; + /* size of the list items in bytes */ + Py_ssize_t item_size; + /* total allocated slots in items */ + Py_ssize_t allocated; + /* is the list mutable */ + int is_mutable; + /* method table for type-dependent operations */ + list_type_based_methods_table methods; + /* array/pointer for items. Interpretation is governed by item_size */ + char * items; +} NB_List; + + +typedef struct { + /* parent list */ + NB_List *parent; + /* list size */ + Py_ssize_t size; + /* iterator position; indicates the next position to read */ + Py_ssize_t pos; +} NB_ListIter; + +NUMBA_EXPORT_FUNC(void) +numba_list_set_method_table(NB_List *lp, list_type_based_methods_table *methods); + +NUMBA_EXPORT_FUNC(int) +numba_list_new(NB_List **out, Py_ssize_t item_size, Py_ssize_t allocated); + +NUMBA_EXPORT_FUNC(void) +numba_list_free(NB_List *lp); + +NUMBA_EXPORT_FUNC(char *) +numba_list_base_ptr(NB_List *lp); + +NUMBA_EXPORT_FUNC(Py_ssize_t) +numba_list_size_address(NB_List *lp); + +NUMBA_EXPORT_FUNC(Py_ssize_t) +numba_list_length(NB_List *lp); + +NUMBA_EXPORT_FUNC(Py_ssize_t) +numba_list_allocated(NB_List *lp); + +NUMBA_EXPORT_FUNC(int) +numba_list_is_mutable(NB_List *lp); + +NUMBA_EXPORT_FUNC(void) +numba_list_set_is_mutable(NB_List *lp, int is_mutable); + +NUMBA_EXPORT_FUNC(int) +numba_list_setitem(NB_List *lp, Py_ssize_t index, const char *item); + +NUMBA_EXPORT_FUNC(int) +numba_list_getitem(NB_List *lp, Py_ssize_t index, char *out); + +NUMBA_EXPORT_FUNC(int) +numba_list_append(NB_List *lp, const char *item); + +// FIXME: should this be public? +NUMBA_EXPORT_FUNC(int) +numba_list_resize(NB_List *lp, Py_ssize_t newsize); + +NUMBA_EXPORT_FUNC(int) +numba_list_delitem(NB_List *lp, Py_ssize_t index); + +NUMBA_EXPORT_FUNC(int) +numba_list_delete_slice(NB_List *lp, + Py_ssize_t start, Py_ssize_t stop, Py_ssize_t step); + +NUMBA_EXPORT_FUNC(size_t) +numba_list_iter_sizeof(void); + +NUMBA_EXPORT_FUNC(void) +numba_list_iter(NB_ListIter *it, NB_List *l); + +NUMBA_EXPORT_FUNC(int) +numba_list_iter_next(NB_ListIter *it, const char **item_ptr); + +NUMBA_EXPORT_FUNC(int) +numba_test_list(void); + +#endif diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cext/utils.c b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cext/utils.c new file mode 100644 index 000000000..17f4992f4 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cext/utils.c @@ -0,0 +1,8 @@ +#include "cext.h" + +/* Align size *sz* to pointer width */ +Py_ssize_t +aligned_size(Py_ssize_t sz) { + Py_ssize_t alignment = sizeof(void*); + return sz + (alignment - sz % alignment) % alignment; +} diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cloudpickle/__init__.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cloudpickle/__init__.py new file mode 100644 index 000000000..231a09795 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cloudpickle/__init__.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +# NOTE: The following imports are adapted to use as a vendored subpackage. +# from https://github.com/cloudpipe/cloudpickle/blob/d3279a0689b769d5315fc6ff00cd0f5897844526/cloudpickle/init.py +from .cloudpickle import * # noqa +from .cloudpickle_fast import CloudPickler, dumps, dump # noqa + +# Conform to the convention used by python serialization libraries, which +# expose their Pickler subclass at top-level under the "Pickler" name. +Pickler = CloudPickler + +__version__ = '1.6.0' diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cloudpickle/cloudpickle.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cloudpickle/cloudpickle.py new file mode 100644 index 000000000..f7fd4dcbf --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cloudpickle/cloudpickle.py @@ -0,0 +1,848 @@ +""" +This class is defined to override standard pickle functionality + +The goals of it follow: +-Serialize lambdas and nested functions to compiled byte code +-Deal with main module correctly +-Deal with other non-serializable objects + +It does not include an unpickler, as standard python unpickling suffices. + +This module was extracted from the `cloud` package, developed by `PiCloud, Inc. +`_. + +Copyright (c) 2012, Regents of the University of California. +Copyright (c) 2009 `PiCloud, Inc. `_. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the University of California, Berkeley nor the + names of its contributors may be used to endorse or promote + products derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" +from __future__ import print_function + +import builtins +import dis +import opcode +import platform +import sys +import types +import weakref +import uuid +import threading +import typing +import warnings + +from .compat import pickle +from typing import Generic, Union, Tuple, Callable +from pickle import _getattribute +from importlib._bootstrap import _find_spec + +try: # pragma: no branch + import typing_extensions as _typing_extensions + from typing_extensions import Literal, Final +except ImportError: + _typing_extensions = Literal = Final = None + +if sys.version_info >= (3, 5, 3): + from typing import ClassVar +else: # pragma: no cover + ClassVar = None + +if sys.version_info >= (3, 8): + from types import CellType +else: + def f(): + a = 1 + + def g(): + return a + return g + CellType = type(f().__closure__[0]) + + +# cloudpickle is meant for inter process communication: we expect all +# communicating processes to run the same Python version hence we favor +# communication speed over compatibility: +DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL + +# Track the provenance of reconstructed dynamic classes to make it possible to +# reconstruct instances from the matching singleton class definition when +# appropriate and preserve the usual "isinstance" semantics of Python objects. +_DYNAMIC_CLASS_TRACKER_BY_CLASS = weakref.WeakKeyDictionary() +_DYNAMIC_CLASS_TRACKER_BY_ID = weakref.WeakValueDictionary() +_DYNAMIC_CLASS_TRACKER_LOCK = threading.Lock() +_DYNAMIC_CLASS_TRACKER_REUSING = weakref.WeakSet() + +PYPY = platform.python_implementation() == "PyPy" + +builtin_code_type = None +if PYPY: + # builtin-code objects only exist in pypy + builtin_code_type = type(float.__new__.__code__) + +_extract_code_globals_cache = weakref.WeakKeyDictionary() + + +def _get_or_create_tracker_id(class_def): + with _DYNAMIC_CLASS_TRACKER_LOCK: + class_tracker_id = _DYNAMIC_CLASS_TRACKER_BY_CLASS.get(class_def) + if class_tracker_id is None: + class_tracker_id = uuid.uuid4().hex + _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id + _DYNAMIC_CLASS_TRACKER_BY_ID[class_tracker_id] = class_def + return class_tracker_id + + +def _lookup_class_or_track(class_tracker_id, class_def): + if class_tracker_id is not None: + with _DYNAMIC_CLASS_TRACKER_LOCK: + orig_class_def = class_def + class_def = _DYNAMIC_CLASS_TRACKER_BY_ID.setdefault( + class_tracker_id, class_def) + _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id + # Check if we are reusing a previous class_def + if orig_class_def is not class_def: + # Remember the class_def is being reused + _DYNAMIC_CLASS_TRACKER_REUSING.add(class_def) + return class_def + + +def _whichmodule(obj, name): + """Find the module an object belongs to. + + This function differs from ``pickle.whichmodule`` in two ways: + - it does not mangle the cases where obj's module is __main__ and obj was + not found in any module. + - Errors arising during module introspection are ignored, as those errors + are considered unwanted side effects. + """ + if sys.version_info[:2] < (3, 7) and isinstance(obj, typing.TypeVar): # pragma: no branch # noqa + # Workaround bug in old Python versions: prior to Python 3.7, + # T.__module__ would always be set to "typing" even when the TypeVar T + # would be defined in a different module. + # + # For such older Python versions, we ignore the __module__ attribute of + # TypeVar instances and instead exhaustively lookup those instances in + # all currently imported modules. + module_name = None + else: + module_name = getattr(obj, '__module__', None) + + if module_name is not None: + return module_name + # Protect the iteration by using a copy of sys.modules against dynamic + # modules that trigger imports of other modules upon calls to getattr or + # other threads importing at the same time. + for module_name, module in sys.modules.copy().items(): + # Some modules such as coverage can inject non-module objects inside + # sys.modules + if ( + module_name == '__main__' or + module is None or + not isinstance(module, types.ModuleType) + ): + continue + try: + if _getattribute(module, name)[0] is obj: + return module_name + except Exception: + pass + return None + + +def _is_importable(obj, name=None): + """Dispatcher utility to test the importability of various constructs.""" + if isinstance(obj, types.FunctionType): + return _lookup_module_and_qualname(obj, name=name) is not None + elif issubclass(type(obj), type): + return _lookup_module_and_qualname(obj, name=name) is not None + elif isinstance(obj, types.ModuleType): + # We assume that sys.modules is primarily used as a cache mechanism for + # the Python import machinery. Checking if a module has been added in + # is sys.modules therefore a cheap and simple heuristic to tell us whether + # we can assume that a given module could be imported by name in + # another Python process. + return obj.__name__ in sys.modules + else: + raise TypeError( + "cannot check importability of {} instances".format( + type(obj).__name__) + ) + + +def _lookup_module_and_qualname(obj, name=None): + if name is None: + name = getattr(obj, '__qualname__', None) + if name is None: # pragma: no cover + # This used to be needed for Python 2.7 support but is probably not + # needed anymore. However we keep the __name__ introspection in case + # users of cloudpickle rely on this old behavior for unknown reasons. + name = getattr(obj, '__name__', None) + + module_name = _whichmodule(obj, name) + + if module_name is None: + # In this case, obj.__module__ is None AND obj was not found in any + # imported module. obj is thus treated as dynamic. + return None + + if module_name == "__main__": + return None + + # Note: if module_name is in sys.modules, the corresponding module is + # assumed importable at unpickling time. See #357 + module = sys.modules.get(module_name, None) + if module is None: + # The main reason why obj's module would not be imported is that this + # module has been dynamically created, using for example + # types.ModuleType. The other possibility is that module was removed + # from sys.modules after obj was created/imported. But this case is not + # supported, as the standard pickle does not support it either. + return None + + try: + obj2, parent = _getattribute(module, name) + except AttributeError: + # obj was not found inside the module it points to + return None + if obj2 is not obj: + return None + return module, name + + +def _extract_code_globals(co): + """ + Find all globals names read or written to by codeblock co + """ + out_names = _extract_code_globals_cache.get(co) + if out_names is None: + names = co.co_names + out_names = {names[oparg] for _, oparg in _walk_global_ops(co)} + + # Declaring a function inside another one using the "def ..." + # syntax generates a constant code object corresponding to one + # of the nested function's As the nested function may itself need + # global variables, we need to introspect its code, extract its + # globals, (look for code object in it's co_consts attribute..) and + # add the result to code_globals + if co.co_consts: + for const in co.co_consts: + if isinstance(const, types.CodeType): + out_names |= _extract_code_globals(const) + + _extract_code_globals_cache[co] = out_names + + return out_names + + +def _find_imported_submodules(code, top_level_dependencies): + """ + Find currently imported submodules used by a function. + + Submodules used by a function need to be detected and referenced for the + function to work correctly at depickling time. Because submodules can be + referenced as attribute of their parent package (``package.submodule``), we + need a special introspection technique that does not rely on GLOBAL-related + opcodes to find references of them in a code object. + + Example: + ``` + import concurrent.futures + import cloudpickle + def func(): + x = concurrent.futures.ThreadPoolExecutor + if __name__ == '__main__': + cloudpickle.dumps(func) + ``` + The globals extracted by cloudpickle in the function's state include the + concurrent package, but not its submodule (here, concurrent.futures), which + is the module used by func. Find_imported_submodules will detect the usage + of concurrent.futures. Saving this module alongside with func will ensure + that calling func once depickled does not fail due to concurrent.futures + not being imported + """ + + subimports = [] + # check if any known dependency is an imported package + for x in top_level_dependencies: + if (isinstance(x, types.ModuleType) and + hasattr(x, '__package__') and x.__package__): + # check if the package has any currently loaded sub-imports + prefix = x.__name__ + '.' + # A concurrent thread could mutate sys.modules, + # make sure we iterate over a copy to avoid exceptions + for name in list(sys.modules): + # Older versions of pytest will add a "None" module to + # sys.modules. + if name is not None and name.startswith(prefix): + # check whether the function can address the sub-module + tokens = set(name[len(prefix):].split('.')) + if not tokens - set(code.co_names): + subimports.append(sys.modules[name]) + return subimports + + +def cell_set(cell, value): + """Set the value of a closure cell. + + The point of this function is to set the cell_contents attribute of a cell + after its creation. This operation is necessary in case the cell contains a + reference to the function the cell belongs to, as when calling the + function's constructor + ``f = types.FunctionType(code, globals, name, argdefs, closure)``, + closure will not be able to contain the yet-to-be-created f. + + In Python3.7, cell_contents is writeable, so setting the contents of a cell + can be done simply using + >>> cell.cell_contents = value + + In earlier Python3 versions, the cell_contents attribute of a cell is read + only, but this limitation can be worked around by leveraging the Python 3 + ``nonlocal`` keyword. + + In Python2 however, this attribute is read only, and there is no + ``nonlocal`` keyword. For this reason, we need to come up with more + complicated hacks to set this attribute. + + The chosen approach is to create a function with a STORE_DEREF opcode, + which sets the content of a closure variable. Typically: + + >>> def inner(value): + ... lambda: cell # the lambda makes cell a closure + ... cell = value # cell is a closure, so this triggers a STORE_DEREF + + (Note that in Python2, A STORE_DEREF can never be triggered from an inner + function. The function g for example here + >>> def f(var): + ... def g(): + ... var += 1 + ... return g + + will not modify the closure variable ``var```inplace, but instead try to + load a local variable var and increment it. As g does not assign the local + variable ``var`` any initial value, calling f(1)() will fail at runtime.) + + Our objective is to set the value of a given cell ``cell``. So we need to + somewhat reference our ``cell`` object into the ``inner`` function so that + this object (and not the smoke cell of the lambda function) gets affected + by the STORE_DEREF operation. + + In inner, ``cell`` is referenced as a cell variable (an enclosing variable + that is referenced by the inner function). If we create a new function + cell_set with the exact same code as ``inner``, but with ``cell`` marked as + a free variable instead, the STORE_DEREF will be applied on its closure - + ``cell``, which we can specify explicitly during construction! The new + cell_set variable thus actually sets the contents of a specified cell! + + Note: we do not make use of the ``nonlocal`` keyword to set the contents of + a cell in early python3 versions to limit possible syntax errors in case + test and checker libraries decide to parse the whole file. + """ + + if sys.version_info[:2] >= (3, 7): # pragma: no branch + cell.cell_contents = value + else: + _cell_set = types.FunctionType( + _cell_set_template_code, {}, '_cell_set', (), (cell,),) + _cell_set(value) + + +def _make_cell_set_template_code(): + def _cell_set_factory(value): + lambda: cell + cell = value + + co = _cell_set_factory.__code__ + + _cell_set_template_code = types.CodeType( + co.co_argcount, + co.co_kwonlyargcount, # Python 3 only argument + co.co_nlocals, + co.co_stacksize, + co.co_flags, + co.co_code, + co.co_consts, + co.co_names, + co.co_varnames, + co.co_filename, + co.co_name, + co.co_firstlineno, + co.co_lnotab, + co.co_cellvars, # co_freevars is initialized with co_cellvars + (), # co_cellvars is made empty + ) + return _cell_set_template_code + + +if sys.version_info[:2] < (3, 7): + _cell_set_template_code = _make_cell_set_template_code() + +# relevant opcodes +STORE_GLOBAL = opcode.opmap['STORE_GLOBAL'] +DELETE_GLOBAL = opcode.opmap['DELETE_GLOBAL'] +LOAD_GLOBAL = opcode.opmap['LOAD_GLOBAL'] +GLOBAL_OPS = (STORE_GLOBAL, DELETE_GLOBAL, LOAD_GLOBAL) +HAVE_ARGUMENT = dis.HAVE_ARGUMENT +EXTENDED_ARG = dis.EXTENDED_ARG + + +_BUILTIN_TYPE_NAMES = {} +for k, v in types.__dict__.items(): + if type(v) is type: + _BUILTIN_TYPE_NAMES[v] = k + + +def _builtin_type(name): + if name == "ClassType": # pragma: no cover + # Backward compat to load pickle files generated with cloudpickle + # < 1.3 even if loading pickle files from older versions is not + # officially supported. + return type + return getattr(types, name) + + +def _walk_global_ops(code): + """ + Yield (opcode, argument number) tuples for all + global-referencing instructions in *code*. + """ + for instr in dis.get_instructions(code): + op = instr.opcode + if op in GLOBAL_OPS: + yield op, instr.arg + + +def _extract_class_dict(cls): + """Retrieve a copy of the dict of a class without the inherited methods""" + clsdict = dict(cls.__dict__) # copy dict proxy to a dict + if len(cls.__bases__) == 1: + inherited_dict = cls.__bases__[0].__dict__ + else: + inherited_dict = {} + for base in reversed(cls.__bases__): + inherited_dict.update(base.__dict__) + to_remove = [] + for name, value in clsdict.items(): + try: + base_value = inherited_dict[name] + if value is base_value: + to_remove.append(name) + except KeyError: + pass + for name in to_remove: + clsdict.pop(name) + return clsdict + + +if sys.version_info[:2] < (3, 7): # pragma: no branch + def _is_parametrized_type_hint(obj): + # This is very cheap but might generate false positives. + # general typing Constructs + is_typing = getattr(obj, '__origin__', None) is not None + + # typing_extensions.Literal + is_litteral = getattr(obj, '__values__', None) is not None + + # typing_extensions.Final + is_final = getattr(obj, '__type__', None) is not None + + # typing.Union/Tuple for old Python 3.5 + is_union = getattr(obj, '__union_params__', None) is not None + is_tuple = getattr(obj, '__tuple_params__', None) is not None + is_callable = ( + getattr(obj, '__result__', None) is not None and + getattr(obj, '__args__', None) is not None + ) + return any((is_typing, is_litteral, is_final, is_union, is_tuple, + is_callable)) + + def _create_parametrized_type_hint(origin, args): + return origin[args] +else: + _is_parametrized_type_hint = None + _create_parametrized_type_hint = None + + +def parametrized_type_hint_getinitargs(obj): + # The distorted type check semantic for typing construct becomes: + # ``type(obj) is type(TypeHint)``, which means "obj is a + # parametrized TypeHint" + if type(obj) is type(Literal): # pragma: no branch + initargs = (Literal, obj.__values__) + elif type(obj) is type(Final): # pragma: no branch + initargs = (Final, obj.__type__) + elif type(obj) is type(ClassVar): + initargs = (ClassVar, obj.__type__) + elif type(obj) is type(Generic): + parameters = obj.__parameters__ + if len(obj.__parameters__) > 0: + # in early Python 3.5, __parameters__ was sometimes + # preferred to __args__ + initargs = (obj.__origin__, parameters) + + else: + initargs = (obj.__origin__, obj.__args__) + elif type(obj) is type(Union): + if sys.version_info < (3, 5, 3): # pragma: no cover + initargs = (Union, obj.__union_params__) + else: + initargs = (Union, obj.__args__) + elif type(obj) is type(Tuple): + if sys.version_info < (3, 5, 3): # pragma: no cover + initargs = (Tuple, obj.__tuple_params__) + else: + initargs = (Tuple, obj.__args__) + elif type(obj) is type(Callable): + if sys.version_info < (3, 5, 3): # pragma: no cover + args = obj.__args__ + result = obj.__result__ + if args != Ellipsis: + if isinstance(args, tuple): + args = list(args) + else: + args = [args] + else: + (*args, result) = obj.__args__ + if len(args) == 1 and args[0] is Ellipsis: + args = Ellipsis + else: + args = list(args) + initargs = (Callable, (args, result)) + else: # pragma: no cover + raise pickle.PicklingError( + "Cloudpickle Error: Unknown type {}".format(type(obj)) + ) + return initargs + + +# Tornado support + +def is_tornado_coroutine(func): + """ + Return whether *func* is a Tornado coroutine function. + Running coroutines are not supported. + """ + if 'tornado.gen' not in sys.modules: + return False + gen = sys.modules['tornado.gen'] + if not hasattr(gen, "is_coroutine_function"): + # Tornado version is too old + return False + return gen.is_coroutine_function(func) + + +def _rebuild_tornado_coroutine(func): + from tornado import gen + return gen.coroutine(func) + + +# including pickles unloading functions in this namespace +load = pickle.load +loads = pickle.loads + + +# hack for __import__ not working as desired +def subimport(name): + __import__(name) + return sys.modules[name] + + +def dynamic_subimport(name, vars): + mod = types.ModuleType(name) + mod.__dict__.update(vars) + mod.__dict__['__builtins__'] = builtins.__dict__ + return mod + + +def _gen_ellipsis(): + return Ellipsis + + +def _gen_not_implemented(): + return NotImplemented + + +def _get_cell_contents(cell): + try: + return cell.cell_contents + except ValueError: + # sentinel used by ``_fill_function`` which will leave the cell empty + return _empty_cell_value + + +def instance(cls): + """Create a new instance of a class. + + Parameters + ---------- + cls : type + The class to create an instance of. + + Returns + ------- + instance : cls + A new instance of ``cls``. + """ + return cls() + + +@instance +class _empty_cell_value(object): + """sentinel for empty closures + """ + @classmethod + def __reduce__(cls): + return cls.__name__ + + +def _fill_function(*args): + """Fills in the rest of function data into the skeleton function object + + The skeleton itself is create by _make_skel_func(). + """ + if len(args) == 2: + func = args[0] + state = args[1] + elif len(args) == 5: + # Backwards compat for cloudpickle v0.4.0, after which the `module` + # argument was introduced + func = args[0] + keys = ['globals', 'defaults', 'dict', 'closure_values'] + state = dict(zip(keys, args[1:])) + elif len(args) == 6: + # Backwards compat for cloudpickle v0.4.1, after which the function + # state was passed as a dict to the _fill_function it-self. + func = args[0] + keys = ['globals', 'defaults', 'dict', 'module', 'closure_values'] + state = dict(zip(keys, args[1:])) + else: + raise ValueError('Unexpected _fill_value arguments: %r' % (args,)) + + # - At pickling time, any dynamic global variable used by func is + # serialized by value (in state['globals']). + # - At unpickling time, func's __globals__ attribute is initialized by + # first retrieving an empty isolated namespace that will be shared + # with other functions pickled from the same original module + # by the same CloudPickler instance and then updated with the + # content of state['globals'] to populate the shared isolated + # namespace with all the global variables that are specifically + # referenced for this function. + func.__globals__.update(state['globals']) + + func.__defaults__ = state['defaults'] + func.__dict__ = state['dict'] + if 'annotations' in state: + func.__annotations__ = state['annotations'] + if 'doc' in state: + func.__doc__ = state['doc'] + if 'name' in state: + func.__name__ = state['name'] + if 'module' in state: + func.__module__ = state['module'] + if 'qualname' in state: + func.__qualname__ = state['qualname'] + if 'kwdefaults' in state: + func.__kwdefaults__ = state['kwdefaults'] + # _cloudpickle_subimports is a set of submodules that must be loaded for + # the pickled function to work correctly at unpickling time. Now that these + # submodules are depickled (hence imported), they can be removed from the + # object's state (the object state only served as a reference holder to + # these submodules) + if '_cloudpickle_submodules' in state: + state.pop('_cloudpickle_submodules') + + cells = func.__closure__ + if cells is not None: + for cell, value in zip(cells, state['closure_values']): + if value is not _empty_cell_value: + cell_set(cell, value) + + return func + + +def _make_empty_cell(): + if False: + # trick the compiler into creating an empty cell in our lambda + cell = None + raise AssertionError('this route should not be executed') + + return (lambda: cell).__closure__[0] + + +def _make_cell(value=_empty_cell_value): + cell = _make_empty_cell() + if value is not _empty_cell_value: + cell_set(cell, value) + return cell + + +def _make_skel_func(code, cell_count, base_globals=None): + """ Creates a skeleton function object that contains just the provided + code and the correct number of cells in func_closure. All other + func attributes (e.g. func_globals) are empty. + """ + # This function is deprecated and should be removed in cloudpickle 1.7 + warnings.warn( + "A pickle file created using an old (<=1.4.1) version of cloudpicke " + "is currently being loaded. This is not supported by cloudpickle and " + "will break in cloudpickle 1.7", category=UserWarning + ) + # This is backward-compatibility code: for cloudpickle versions between + # 0.5.4 and 0.7, base_globals could be a string or None. base_globals + # should now always be a dictionary. + if base_globals is None or isinstance(base_globals, str): + base_globals = {} + + base_globals['__builtins__'] = __builtins__ + + closure = ( + tuple(_make_empty_cell() for _ in range(cell_count)) + if cell_count >= 0 else + None + ) + return types.FunctionType(code, base_globals, None, None, closure) + + +def _make_skeleton_class(type_constructor, name, bases, type_kwargs, + class_tracker_id, extra): + """Build dynamic class with an empty __dict__ to be filled once memoized + + If class_tracker_id is not None, try to lookup an existing class definition + matching that id. If none is found, track a newly reconstructed class + definition under that id so that other instances stemming from the same + class id will also reuse this class definition. + + The "extra" variable is meant to be a dict (or None) that can be used for + forward compatibility shall the need arise. + """ + skeleton_class = types.new_class( + name, bases, {'metaclass': type_constructor}, + lambda ns: ns.update(type_kwargs) + ) + return _lookup_class_or_track(class_tracker_id, skeleton_class) + + +def _rehydrate_skeleton_class(skeleton_class, class_dict): + """Put attributes from `class_dict` back on `skeleton_class`. + + See CloudPickler.save_dynamic_class for more info. + """ + registry = None + for attrname, attr in class_dict.items(): + if attrname == "_abc_impl": + registry = attr + else: + setattr(skeleton_class, attrname, attr) + if registry is not None: + for subclass in registry: + skeleton_class.register(subclass) + + return skeleton_class + + +def _make_skeleton_enum(bases, name, qualname, members, module, + class_tracker_id, extra): + """Build dynamic enum with an empty __dict__ to be filled once memoized + + The creation of the enum class is inspired by the code of + EnumMeta._create_. + + If class_tracker_id is not None, try to lookup an existing enum definition + matching that id. If none is found, track a newly reconstructed enum + definition under that id so that other instances stemming from the same + class id will also reuse this enum definition. + + The "extra" variable is meant to be a dict (or None) that can be used for + forward compatibility shall the need arise. + """ + # enums always inherit from their base Enum class at the last position in + # the list of base classes: + enum_base = bases[-1] + metacls = enum_base.__class__ + classdict = metacls.__prepare__(name, bases) + + for member_name, member_value in members.items(): + classdict[member_name] = member_value + enum_class = metacls.__new__(metacls, name, bases, classdict) + enum_class.__module__ = module + enum_class.__qualname__ = qualname + + return _lookup_class_or_track(class_tracker_id, enum_class) + + +def _make_typevar(name, bound, constraints, covariant, contravariant, + class_tracker_id): + tv = typing.TypeVar( + name, *constraints, bound=bound, + covariant=covariant, contravariant=contravariant + ) + if class_tracker_id is not None: + return _lookup_class_or_track(class_tracker_id, tv) + else: # pragma: nocover + # Only for Python 3.5.3 compat. + return tv + + +def _decompose_typevar(obj): + try: + class_tracker_id = _get_or_create_tracker_id(obj) + except TypeError: # pragma: nocover + # TypeVar instances are not weakref-able in Python 3.5.3 + class_tracker_id = None + return ( + obj.__name__, obj.__bound__, obj.__constraints__, + obj.__covariant__, obj.__contravariant__, + class_tracker_id, + ) + + +def _typevar_reduce(obj): + # TypeVar instances have no __qualname__ hence we pass the name explicitly. + module_and_name = _lookup_module_and_qualname(obj, name=obj.__name__) + if module_and_name is None: + return (_make_typevar, _decompose_typevar(obj)) + return (getattr, module_and_name) + + +def _get_bases(typ): + if hasattr(typ, '__orig_bases__'): + # For generic types (see PEP 560) + bases_attr = '__orig_bases__' + else: + # For regular class objects + bases_attr = '__bases__' + return getattr(typ, bases_attr) + + +def _make_dict_keys(obj): + return dict.fromkeys(obj).keys() + + +def _make_dict_values(obj): + return {i: _ for i, _ in enumerate(obj)}.values() + + +def _make_dict_items(obj): + return obj.items() diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cloudpickle/cloudpickle_fast.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cloudpickle/cloudpickle_fast.py new file mode 100644 index 000000000..216450495 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cloudpickle/cloudpickle_fast.py @@ -0,0 +1,775 @@ +""" +New, fast version of the CloudPickler. + +This new CloudPickler class can now extend the fast C Pickler instead of the +previous Python implementation of the Pickler class. Because this functionality +is only available for Python versions 3.8+, a lot of backward-compatibility +code is also removed. + +Note that the C Pickler sublassing API is CPython-specific. Therefore, some +guards present in cloudpickle.py that were written to handle PyPy specificities +are not present in cloudpickle_fast.py +""" +import _collections_abc +import abc +import copyreg +import io +import itertools +import logging +import sys +import struct +import types +import weakref +import typing + +from enum import Enum +from collections import ChainMap + +from .compat import pickle, Pickler +from .cloudpickle import ( + _extract_code_globals, _BUILTIN_TYPE_NAMES, DEFAULT_PROTOCOL, + _find_imported_submodules, _get_cell_contents, _is_importable, + _builtin_type, _get_or_create_tracker_id, _make_skeleton_class, + _make_skeleton_enum, _extract_class_dict, dynamic_subimport, subimport, + _typevar_reduce, _get_bases, _make_cell, _make_empty_cell, CellType, + _is_parametrized_type_hint, PYPY, cell_set, + parametrized_type_hint_getinitargs, _create_parametrized_type_hint, + builtin_code_type, + _make_dict_keys, _make_dict_values, _make_dict_items, + _DYNAMIC_CLASS_TRACKER_REUSING, +) + + +if pickle.HIGHEST_PROTOCOL >= 5 and not PYPY: + # Shorthands similar to pickle.dump/pickle.dumps + + def dump(obj, file, protocol=None, buffer_callback=None): + """Serialize obj as bytes streamed into file + + protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to + pickle.HIGHEST_PROTOCOL. This setting favors maximum communication + speed between processes running the same Python version. + + Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure + compatibility with older versions of Python. + """ + CloudPickler( + file, protocol=protocol, buffer_callback=buffer_callback + ).dump(obj) + + def dumps(obj, protocol=None, buffer_callback=None): + """Serialize obj as a string of bytes allocated in memory + + protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to + pickle.HIGHEST_PROTOCOL. This setting favors maximum communication + speed between processes running the same Python version. + + Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure + compatibility with older versions of Python. + """ + with io.BytesIO() as file: + cp = CloudPickler( + file, protocol=protocol, buffer_callback=buffer_callback + ) + cp.dump(obj) + return file.getvalue() + +else: + # Shorthands similar to pickle.dump/pickle.dumps + def dump(obj, file, protocol=None): + """Serialize obj as bytes streamed into file + + protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to + pickle.HIGHEST_PROTOCOL. This setting favors maximum communication + speed between processes running the same Python version. + + Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure + compatibility with older versions of Python. + """ + CloudPickler(file, protocol=protocol).dump(obj) + + def dumps(obj, protocol=None): + """Serialize obj as a string of bytes allocated in memory + + protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to + pickle.HIGHEST_PROTOCOL. This setting favors maximum communication + speed between processes running the same Python version. + + Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure + compatibility with older versions of Python. + """ + with io.BytesIO() as file: + cp = CloudPickler(file, protocol=protocol) + cp.dump(obj) + return file.getvalue() + + +load, loads = pickle.load, pickle.loads + + +# COLLECTION OF OBJECTS __getnewargs__-LIKE METHODS +# ------------------------------------------------- + +def _class_getnewargs(obj): + type_kwargs = {} + if "__slots__" in obj.__dict__: + type_kwargs["__slots__"] = obj.__slots__ + + __dict__ = obj.__dict__.get('__dict__', None) + if isinstance(__dict__, property): + type_kwargs['__dict__'] = __dict__ + + return (type(obj), obj.__name__, _get_bases(obj), type_kwargs, + _get_or_create_tracker_id(obj), None) + + +def _enum_getnewargs(obj): + members = dict((e.name, e.value) for e in obj) + return (obj.__bases__, obj.__name__, obj.__qualname__, members, + obj.__module__, _get_or_create_tracker_id(obj), None) + + +# COLLECTION OF OBJECTS RECONSTRUCTORS +# ------------------------------------ +def _file_reconstructor(retval): + return retval + + +# COLLECTION OF OBJECTS STATE GETTERS +# ----------------------------------- +def _function_getstate(func): + # - Put func's dynamic attributes (stored in func.__dict__) in state. These + # attributes will be restored at unpickling time using + # f.__dict__.update(state) + # - Put func's members into slotstate. Such attributes will be restored at + # unpickling time by iterating over slotstate and calling setattr(func, + # slotname, slotvalue) + slotstate = { + "__name__": func.__name__, + "__qualname__": func.__qualname__, + "__annotations__": func.__annotations__, + "__kwdefaults__": func.__kwdefaults__, + "__defaults__": func.__defaults__, + "__module__": func.__module__, + "__doc__": func.__doc__, + "__closure__": func.__closure__, + } + + f_globals_ref = _extract_code_globals(func.__code__) + f_globals = {k: func.__globals__[k] for k in f_globals_ref if k in + func.__globals__} + + closure_values = ( + list(map(_get_cell_contents, func.__closure__)) + if func.__closure__ is not None else () + ) + + # Extract currently-imported submodules used by func. Storing these modules + # in a smoke _cloudpickle_subimports attribute of the object's state will + # trigger the side effect of importing these modules at unpickling time + # (which is necessary for func to work correctly once depickled) + slotstate["_cloudpickle_submodules"] = _find_imported_submodules( + func.__code__, itertools.chain(f_globals.values(), closure_values)) + slotstate["__globals__"] = f_globals + + state = func.__dict__ + return state, slotstate + + +def _class_getstate(obj): + clsdict = _extract_class_dict(obj) + clsdict.pop('__weakref__', None) + + if issubclass(type(obj), abc.ABCMeta): + # If obj is an instance of an ABCMeta subclass, dont pickle the + # cache/negative caches populated during isinstance/issubclass + # checks, but pickle the list of registered subclasses of obj. + clsdict.pop('_abc_cache', None) + clsdict.pop('_abc_negative_cache', None) + clsdict.pop('_abc_negative_cache_version', None) + registry = clsdict.pop('_abc_registry', None) + if registry is None: + # in Python3.7+, the abc caches and registered subclasses of a + # class are bundled into the single _abc_impl attribute + clsdict.pop('_abc_impl', None) + (registry, _, _, _) = abc._get_dump(obj) + + clsdict["_abc_impl"] = [subclass_weakref() + for subclass_weakref in registry] + else: + # In the above if clause, registry is a set of weakrefs -- in + # this case, registry is a WeakSet + clsdict["_abc_impl"] = [type_ for type_ in registry] + + if "__slots__" in clsdict: + # pickle string length optimization: member descriptors of obj are + # created automatically from obj's __slots__ attribute, no need to + # save them in obj's state + if isinstance(obj.__slots__, str): + clsdict.pop(obj.__slots__) + else: + for k in obj.__slots__: + clsdict.pop(k, None) + + clsdict.pop('__dict__', None) # unpicklable property object + + return (clsdict, {}) + + +def _enum_getstate(obj): + clsdict, slotstate = _class_getstate(obj) + + members = dict((e.name, e.value) for e in obj) + # Cleanup the clsdict that will be passed to _rehydrate_skeleton_class: + # Those attributes are already handled by the metaclass. + for attrname in ["_generate_next_value_", "_member_names_", + "_member_map_", "_member_type_", + "_value2member_map_"]: + clsdict.pop(attrname, None) + for member in members: + clsdict.pop(member) + # Special handling of Enum subclasses + return clsdict, slotstate + + +# COLLECTIONS OF OBJECTS REDUCERS +# ------------------------------- +# A reducer is a function taking a single argument (obj), and that returns a +# tuple with all the necessary data to re-construct obj. Apart from a few +# exceptions (list, dict, bytes, int, etc.), a reducer is necessary to +# correctly pickle an object. +# While many built-in objects (Exceptions objects, instances of the "object" +# class, etc), are shipped with their own built-in reducer (invoked using +# obj.__reduce__), some do not. The following methods were created to "fill +# these holes". + +def _code_reduce(obj): + """codeobject reducer""" + if hasattr(obj, "co_posonlyargcount"): # pragma: no branch + args = ( + obj.co_argcount, obj.co_posonlyargcount, + obj.co_kwonlyargcount, obj.co_nlocals, obj.co_stacksize, + obj.co_flags, obj.co_code, obj.co_consts, obj.co_names, + obj.co_varnames, obj.co_filename, obj.co_name, + obj.co_firstlineno, obj.co_lnotab, obj.co_freevars, + obj.co_cellvars + ) + else: + args = ( + obj.co_argcount, obj.co_kwonlyargcount, obj.co_nlocals, + obj.co_stacksize, obj.co_flags, obj.co_code, obj.co_consts, + obj.co_names, obj.co_varnames, obj.co_filename, + obj.co_name, obj.co_firstlineno, obj.co_lnotab, + obj.co_freevars, obj.co_cellvars + ) + return types.CodeType, args + + +def _cell_reduce(obj): + """Cell (containing values of a function's free variables) reducer""" + try: + obj.cell_contents + except ValueError: # cell is empty + return _make_empty_cell, () + else: + return _make_cell, (obj.cell_contents, ) + + +def _classmethod_reduce(obj): + orig_func = obj.__func__ + return type(obj), (orig_func,) + + +def _file_reduce(obj): + """Save a file""" + import io + + if not hasattr(obj, "name") or not hasattr(obj, "mode"): + raise pickle.PicklingError( + "Cannot pickle files that do not map to an actual file" + ) + if obj is sys.stdout: + return getattr, (sys, "stdout") + if obj is sys.stderr: + return getattr, (sys, "stderr") + if obj is sys.stdin: + raise pickle.PicklingError("Cannot pickle standard input") + if obj.closed: + raise pickle.PicklingError("Cannot pickle closed files") + if hasattr(obj, "isatty") and obj.isatty(): + raise pickle.PicklingError( + "Cannot pickle files that map to tty objects" + ) + if "r" not in obj.mode and "+" not in obj.mode: + raise pickle.PicklingError( + "Cannot pickle files that are not opened for reading: %s" + % obj.mode + ) + + name = obj.name + + retval = io.StringIO() + + try: + # Read the whole file + curloc = obj.tell() + obj.seek(0) + contents = obj.read() + obj.seek(curloc) + except IOError as e: + raise pickle.PicklingError( + "Cannot pickle file %s as it cannot be read" % name + ) from e + retval.write(contents) + retval.seek(curloc) + + retval.name = name + return _file_reconstructor, (retval,) + + +def _getset_descriptor_reduce(obj): + return getattr, (obj.__objclass__, obj.__name__) + + +def _mappingproxy_reduce(obj): + return types.MappingProxyType, (dict(obj),) + + +def _memoryview_reduce(obj): + return bytes, (obj.tobytes(),) + + +def _module_reduce(obj): + if _is_importable(obj): + return subimport, (obj.__name__,) + else: + obj.__dict__.pop('__builtins__', None) + return dynamic_subimport, (obj.__name__, vars(obj)) + + +def _method_reduce(obj): + return (types.MethodType, (obj.__func__, obj.__self__)) + + +def _logger_reduce(obj): + return logging.getLogger, (obj.name,) + + +def _root_logger_reduce(obj): + return logging.getLogger, () + + +def _property_reduce(obj): + return property, (obj.fget, obj.fset, obj.fdel, obj.__doc__) + + +def _weakset_reduce(obj): + return weakref.WeakSet, (list(obj),) + + +def _dynamic_class_reduce(obj): + """ + Save a class that can't be stored as module global. + + This method is used to serialize classes that are defined inside + functions, or that otherwise can't be serialized as attribute lookups + from global modules. + """ + if Enum is not None and issubclass(obj, Enum): + return ( + _make_skeleton_enum, _enum_getnewargs(obj), _enum_getstate(obj), + None, None, _class_setstate + ) + else: + return ( + _make_skeleton_class, _class_getnewargs(obj), _class_getstate(obj), + None, None, _class_setstate + ) + + +def _class_reduce(obj): + """Select the reducer depending on the dynamic nature of the class obj""" + if obj is type(None): # noqa + return type, (None,) + elif obj is type(Ellipsis): + return type, (Ellipsis,) + elif obj is type(NotImplemented): + return type, (NotImplemented,) + elif obj in _BUILTIN_TYPE_NAMES: + return _builtin_type, (_BUILTIN_TYPE_NAMES[obj],) + elif not _is_importable(obj): + return _dynamic_class_reduce(obj) + return NotImplemented + + +def _dict_keys_reduce(obj): + # Safer not to ship the full dict as sending the rest might + # be unintended and could potentially cause leaking of + # sensitive information + return _make_dict_keys, (list(obj), ) + + +def _dict_values_reduce(obj): + # Safer not to ship the full dict as sending the rest might + # be unintended and could potentially cause leaking of + # sensitive information + return _make_dict_values, (list(obj), ) + + +def _dict_items_reduce(obj): + return _make_dict_items, (dict(obj), ) + + +# COLLECTIONS OF OBJECTS STATE SETTERS +# ------------------------------------ +# state setters are called at unpickling time, once the object is created and +# it has to be updated to how it was at unpickling time. + + +def _function_setstate(obj, state): + """Update the state of a dynaamic function. + + As __closure__ and __globals__ are readonly attributes of a function, we + cannot rely on the native setstate routine of pickle.load_build, that calls + setattr on items of the slotstate. Instead, we have to modify them inplace. + """ + state, slotstate = state + obj.__dict__.update(state) + + obj_globals = slotstate.pop("__globals__") + obj_closure = slotstate.pop("__closure__") + # _cloudpickle_subimports is a set of submodules that must be loaded for + # the pickled function to work correctly at unpickling time. Now that these + # submodules are depickled (hence imported), they can be removed from the + # object's state (the object state only served as a reference holder to + # these submodules) + slotstate.pop("_cloudpickle_submodules") + + obj.__globals__.update(obj_globals) + obj.__globals__["__builtins__"] = __builtins__ + + if obj_closure is not None: + for i, cell in enumerate(obj_closure): + try: + value = cell.cell_contents + except ValueError: # cell is empty + continue + cell_set(obj.__closure__[i], value) + + for k, v in slotstate.items(): + setattr(obj, k, v) + + +def _class_setstate(obj, state): + # Check if class is being reused and needs bypass setstate logic. + if obj in _DYNAMIC_CLASS_TRACKER_REUSING: + return obj + + state, slotstate = state + registry = None + for attrname, attr in state.items(): + if attrname == "_abc_impl": + registry = attr + else: + setattr(obj, attrname, attr) + if registry is not None: + for subclass in registry: + obj.register(subclass) + + return obj + + +class CloudPickler(Pickler): + # set of reducers defined and used by cloudpickle (private) + _dispatch_table = {} + _dispatch_table[classmethod] = _classmethod_reduce + _dispatch_table[io.TextIOWrapper] = _file_reduce + _dispatch_table[logging.Logger] = _logger_reduce + _dispatch_table[logging.RootLogger] = _root_logger_reduce + _dispatch_table[memoryview] = _memoryview_reduce + _dispatch_table[property] = _property_reduce + _dispatch_table[staticmethod] = _classmethod_reduce + _dispatch_table[CellType] = _cell_reduce + _dispatch_table[types.CodeType] = _code_reduce + _dispatch_table[types.GetSetDescriptorType] = _getset_descriptor_reduce + _dispatch_table[types.ModuleType] = _module_reduce + _dispatch_table[types.MethodType] = _method_reduce + _dispatch_table[types.MappingProxyType] = _mappingproxy_reduce + _dispatch_table[weakref.WeakSet] = _weakset_reduce + _dispatch_table[typing.TypeVar] = _typevar_reduce + _dispatch_table[_collections_abc.dict_keys] = _dict_keys_reduce + _dispatch_table[_collections_abc.dict_values] = _dict_values_reduce + _dispatch_table[_collections_abc.dict_items] = _dict_items_reduce + + + dispatch_table = ChainMap(_dispatch_table, copyreg.dispatch_table) + + # function reducers are defined as instance methods of CloudPickler + # objects, as they rely on a CloudPickler attribute (globals_ref) + def _dynamic_function_reduce(self, func): + """Reduce a function that is not pickleable via attribute lookup.""" + newargs = self._function_getnewargs(func) + state = _function_getstate(func) + return (types.FunctionType, newargs, state, None, None, + _function_setstate) + + def _function_reduce(self, obj): + """Reducer for function objects. + + If obj is a top-level attribute of a file-backed module, this + reducer returns NotImplemented, making the CloudPickler fallback to + traditional _pickle.Pickler routines to save obj. Otherwise, it reduces + obj using a custom cloudpickle reducer designed specifically to handle + dynamic functions. + + As opposed to cloudpickle.py, There no special handling for builtin + pypy functions because cloudpickle_fast is CPython-specific. + """ + if _is_importable(obj): + return NotImplemented + else: + return self._dynamic_function_reduce(obj) + + def _function_getnewargs(self, func): + code = func.__code__ + + # base_globals represents the future global namespace of func at + # unpickling time. Looking it up and storing it in + # CloudpiPickler.globals_ref allow functions sharing the same globals + # at pickling time to also share them once unpickled, at one condition: + # since globals_ref is an attribute of a CloudPickler instance, and + # that a new CloudPickler is created each time pickle.dump or + # pickle.dumps is called, functions also need to be saved within the + # same invocation of cloudpickle.dump/cloudpickle.dumps (for example: + # cloudpickle.dumps([f1, f2])). There is no such limitation when using + # CloudPickler.dump, as long as the multiple invocations are bound to + # the same CloudPickler. + base_globals = self.globals_ref.setdefault(id(func.__globals__), {}) + + if base_globals == {}: + # Add module attributes used to resolve relative imports + # instructions inside func. + for k in ["__package__", "__name__", "__path__", "__file__"]: + if k in func.__globals__: + base_globals[k] = func.__globals__[k] + + # Do not bind the free variables before the function is created to + # avoid infinite recursion. + if func.__closure__ is None: + closure = None + else: + closure = tuple( + _make_empty_cell() for _ in range(len(code.co_freevars))) + + return code, base_globals, None, None, closure + + def dump(self, obj): + try: + return Pickler.dump(self, obj) + except RuntimeError as e: + if "recursion" in e.args[0]: + msg = ( + "Could not pickle object as excessively deep recursion " + "required." + ) + raise pickle.PicklingError(msg) from e + else: + raise + + if pickle.HIGHEST_PROTOCOL >= 5: + # `CloudPickler.dispatch` is only left for backward compatibility - note + # that when using protocol 5, `CloudPickler.dispatch` is not an + # extension of `Pickler.dispatch` dictionary, because CloudPickler + # subclasses the C-implemented Pickler, which does not expose a + # `dispatch` attribute. Earlier versions of the protocol 5 CloudPickler + # used `CloudPickler.dispatch` as a class-level attribute storing all + # reducers implemented by cloudpickle, but the attribute name was not a + # great choice given the meaning of `Cloudpickler.dispatch` when + # `CloudPickler` extends the pure-python pickler. + dispatch = dispatch_table + + # Implementation of the reducer_override callback, in order to + # efficiently serialize dynamic functions and classes by subclassing + # the C-implemented Pickler. + # TODO: decorrelate reducer_override (which is tied to CPython's + # implementation - would it make sense to backport it to pypy? - and + # pickle's protocol 5 which is implementation agnostic. Currently, the + # availability of both notions coincide on CPython's pickle and the + # pickle5 backport, but it may not be the case anymore when pypy + # implements protocol 5 + def __init__(self, file, protocol=None, buffer_callback=None): + if protocol is None: + protocol = DEFAULT_PROTOCOL + Pickler.__init__( + self, file, protocol=protocol, buffer_callback=buffer_callback + ) + # map functions __globals__ attribute ids, to ensure that functions + # sharing the same global namespace at pickling time also share + # their global namespace at unpickling time. + self.globals_ref = {} + self.proto = int(protocol) + + def reducer_override(self, obj): + """Type-agnostic reducing callback for function and classes. + + For performance reasons, subclasses of the C _pickle.Pickler class + cannot register custom reducers for functions and classes in the + dispatch_table. Reducer for such types must instead implemented in + the special reducer_override method. + + Note that method will be called for any object except a few + builtin-types (int, lists, dicts etc.), which differs from reducers + in the Pickler's dispatch_table, each of them being invoked for + objects of a specific type only. + + This property comes in handy for classes: although most classes are + instances of the ``type`` metaclass, some of them can be instances + of other custom metaclasses (such as enum.EnumMeta for example). In + particular, the metaclass will likely not be known in advance, and + thus cannot be special-cased using an entry in the dispatch_table. + reducer_override, among other things, allows us to register a + reducer that will be called for any class, independently of its + type. + + + Notes: + + * reducer_override has the priority over dispatch_table-registered + reducers. + * reducer_override can be used to fix other limitations of + cloudpickle for other types that suffered from type-specific + reducers, such as Exceptions. See + https://github.com/cloudpipe/cloudpickle/issues/248 + """ + if sys.version_info[:2] < (3, 7) and _is_parametrized_type_hint(obj): # noqa # pragma: no branch + return ( + _create_parametrized_type_hint, + parametrized_type_hint_getinitargs(obj) + ) + t = type(obj) + try: + is_anyclass = issubclass(t, type) + except TypeError: # t is not a class (old Boost; see SF #502085) + is_anyclass = False + + if is_anyclass: + return _class_reduce(obj) + elif isinstance(obj, types.FunctionType): + return self._function_reduce(obj) + else: + # fallback to save_global, including the Pickler's + # distpatch_table + return NotImplemented + + else: + # When reducer_override is not available, hack the pure-Python + # Pickler's types.FunctionType and type savers. Note: the type saver + # must override Pickler.save_global, because pickle.py contains a + # hard-coded call to save_global when pickling meta-classes. + dispatch = Pickler.dispatch.copy() + + def __init__(self, file, protocol=None): + if protocol is None: + protocol = DEFAULT_PROTOCOL + Pickler.__init__(self, file, protocol=protocol) + # map functions __globals__ attribute ids, to ensure that functions + # sharing the same global namespace at pickling time also share + # their global namespace at unpickling time. + self.globals_ref = {} + assert hasattr(self, 'proto') + + def _save_reduce_pickle5(self, func, args, state=None, listitems=None, + dictitems=None, state_setter=None, obj=None): + save = self.save + write = self.write + self.save_reduce( + func, args, state=None, listitems=listitems, + dictitems=dictitems, obj=obj + ) + # backport of the Python 3.8 state_setter pickle operations + save(state_setter) + save(obj) # simple BINGET opcode as obj is already memoized. + save(state) + write(pickle.TUPLE2) + # Trigger a state_setter(obj, state) function call. + write(pickle.REDUCE) + # The purpose of state_setter is to carry-out an + # inplace modification of obj. We do not care about what the + # method might return, so its output is eventually removed from + # the stack. + write(pickle.POP) + + def save_global(self, obj, name=None, pack=struct.pack): + """ + Save a "global". + + The name of this method is somewhat misleading: all types get + dispatched here. + """ + if obj is type(None): # noqa + return self.save_reduce(type, (None,), obj=obj) + elif obj is type(Ellipsis): + return self.save_reduce(type, (Ellipsis,), obj=obj) + elif obj is type(NotImplemented): + return self.save_reduce(type, (NotImplemented,), obj=obj) + elif obj in _BUILTIN_TYPE_NAMES: + return self.save_reduce( + _builtin_type, (_BUILTIN_TYPE_NAMES[obj],), obj=obj) + + if sys.version_info[:2] < (3, 7) and _is_parametrized_type_hint(obj): # noqa # pragma: no branch + # Parametrized typing constructs in Python < 3.7 are not + # compatible with type checks and ``isinstance`` semantics. For + # this reason, it is easier to detect them using a + # duck-typing-based check (``_is_parametrized_type_hint``) than + # to populate the Pickler's dispatch with type-specific savers. + self.save_reduce( + _create_parametrized_type_hint, + parametrized_type_hint_getinitargs(obj), + obj=obj + ) + elif name is not None: + Pickler.save_global(self, obj, name=name) + elif not _is_importable(obj, name=name): + self._save_reduce_pickle5(*_dynamic_class_reduce(obj), obj=obj) + else: + Pickler.save_global(self, obj, name=name) + dispatch[type] = save_global + + def save_function(self, obj, name=None): + """ Registered with the dispatch to handle all function types. + + Determines what kind of function obj is (e.g. lambda, defined at + interactive prompt, etc) and handles the pickling appropriately. + """ + if _is_importable(obj, name=name): + return Pickler.save_global(self, obj, name=name) + elif PYPY and isinstance(obj.__code__, builtin_code_type): + return self.save_pypy_builtin_func(obj) + else: + return self._save_reduce_pickle5( + *self._dynamic_function_reduce(obj), obj=obj + ) + + def save_pypy_builtin_func(self, obj): + """Save pypy equivalent of builtin functions. + PyPy does not have the concept of builtin-functions. Instead, + builtin-functions are simple function instances, but with a + builtin-code attribute. + Most of the time, builtin functions should be pickled by attribute. + But PyPy has flaky support for __qualname__, so some builtin + functions such as float.__new__ will be classified as dynamic. For + this reason only, we created this special routine. Because + builtin-functions are not expected to have closure or globals, + there is no additional hack (compared the one already implemented + in pickle) to protect ourselves from reference cycles. A simple + (reconstructor, newargs, obj.__dict__) tuple is save_reduced. Note + also that PyPy improved their support for __qualname__ in v3.6, so + this routing should be removed when cloudpickle supports only PyPy + 3.6 and later. + """ + rv = (types.FunctionType, (obj.__code__, {}, obj.__name__, + obj.__defaults__, obj.__closure__), + obj.__dict__) + self.save_reduce(*rv, obj=obj) + + dispatch[types.FunctionType] = save_function diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cloudpickle/compat.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cloudpickle/compat.py new file mode 100644 index 000000000..afa285f62 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/cloudpickle/compat.py @@ -0,0 +1,13 @@ +import sys + + +if sys.version_info < (3, 8): + try: + import pickle5 as pickle # noqa: F401 + from pickle5 import Pickler # noqa: F401 + except ImportError: + import pickle # noqa: F401 + from pickle import _Pickler as Pickler # noqa: F401 +else: + import pickle # noqa: F401 + from _pickle import Pickler # noqa: F401 diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/__init__.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/analysis.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/analysis.py new file mode 100644 index 000000000..5b65599c5 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/analysis.py @@ -0,0 +1,722 @@ +""" +Utils for IR analysis +""" +import operator +from functools import reduce +from collections import namedtuple, defaultdict + +from .controlflow import CFGraph +from numba.core import types, errors, ir, consts +from numba.misc import special + +# +# Analysis related to variable lifetime +# + +_use_defs_result = namedtuple('use_defs_result', 'usemap,defmap') + +# other packages that define new nodes add calls for finding defs +# format: {type:function} +ir_extension_usedefs = {} + + +def compute_use_defs(blocks): + """ + Find variable use/def per block. + """ + + var_use_map = {} # { block offset -> set of vars } + var_def_map = {} # { block offset -> set of vars } + for offset, ir_block in blocks.items(): + var_use_map[offset] = use_set = set() + var_def_map[offset] = def_set = set() + for stmt in ir_block.body: + if type(stmt) in ir_extension_usedefs: + func = ir_extension_usedefs[type(stmt)] + func(stmt, use_set, def_set) + continue + if isinstance(stmt, ir.Assign): + if isinstance(stmt.value, ir.Inst): + rhs_set = set(var.name for var in stmt.value.list_vars()) + elif isinstance(stmt.value, ir.Var): + rhs_set = set([stmt.value.name]) + elif isinstance(stmt.value, (ir.Arg, ir.Const, ir.Global, + ir.FreeVar)): + rhs_set = () + else: + raise AssertionError('unreachable', type(stmt.value)) + # If lhs not in rhs of the assignment + if stmt.target.name not in rhs_set: + def_set.add(stmt.target.name) + + for var in stmt.list_vars(): + # do not include locally defined vars to use-map + if var.name not in def_set: + use_set.add(var.name) + + return _use_defs_result(usemap=var_use_map, defmap=var_def_map) + + +def compute_live_map(cfg, blocks, var_use_map, var_def_map): + """ + Find variables that must be alive at the ENTRY of each block. + We use a simple fix-point algorithm that iterates until the set of + live variables is unchanged for each block. + """ + def fix_point_progress(dct): + """Helper function to determine if a fix-point has been reached. + """ + return tuple(len(v) for v in dct.values()) + + def fix_point(fn, dct): + """Helper function to run fix-point algorithm. + """ + old_point = None + new_point = fix_point_progress(dct) + while old_point != new_point: + fn(dct) + old_point = new_point + new_point = fix_point_progress(dct) + + def def_reach(dct): + """Find all variable definition reachable at the entry of a block + """ + for offset in var_def_map: + used_or_defined = var_def_map[offset] | var_use_map[offset] + dct[offset] |= used_or_defined + # Propagate to outgoing nodes + for out_blk, _ in cfg.successors(offset): + dct[out_blk] |= dct[offset] + + def liveness(dct): + """Find live variables. + + Push var usage backward. + """ + for offset in dct: + # Live vars here + live_vars = dct[offset] + for inc_blk, _data in cfg.predecessors(offset): + # Reachable at the predecessor + reachable = live_vars & def_reach_map[inc_blk] + # But not defined in the predecessor + dct[inc_blk] |= reachable - var_def_map[inc_blk] + + live_map = {} + for offset in blocks.keys(): + live_map[offset] = set(var_use_map[offset]) + + def_reach_map = defaultdict(set) + fix_point(def_reach, def_reach_map) + fix_point(liveness, live_map) + return live_map + + +_dead_maps_result = namedtuple('dead_maps_result', 'internal,escaping,combined') + + +def compute_dead_maps(cfg, blocks, live_map, var_def_map): + """ + Compute the end-of-live information for variables. + `live_map` contains a mapping of block offset to all the living + variables at the ENTRY of the block. + """ + # The following three dictionaries will be + # { block offset -> set of variables to delete } + # all vars that should be deleted at the start of the successors + escaping_dead_map = defaultdict(set) + # all vars that should be deleted within this block + internal_dead_map = defaultdict(set) + # all vars that should be deleted after the function exit + exit_dead_map = defaultdict(set) + + for offset, ir_block in blocks.items(): + # live vars WITHIN the block will include all the locally + # defined variables + cur_live_set = live_map[offset] | var_def_map[offset] + # vars alive in the outgoing blocks + outgoing_live_map = dict((out_blk, live_map[out_blk]) + for out_blk, _data in cfg.successors(offset)) + # vars to keep alive for the terminator + terminator_liveset = set(v.name + for v in ir_block.terminator.list_vars()) + # vars to keep alive in the successors + combined_liveset = reduce(operator.or_, outgoing_live_map.values(), + set()) + # include variables used in terminator + combined_liveset |= terminator_liveset + # vars that are dead within the block because they are not + # propagated to any outgoing blocks + internal_set = cur_live_set - combined_liveset + internal_dead_map[offset] = internal_set + # vars that escape this block + escaping_live_set = cur_live_set - internal_set + for out_blk, new_live_set in outgoing_live_map.items(): + # successor should delete the unused escaped vars + new_live_set = new_live_set | var_def_map[out_blk] + escaping_dead_map[out_blk] |= escaping_live_set - new_live_set + + # if no outgoing blocks + if not outgoing_live_map: + # insert var used by terminator + exit_dead_map[offset] = terminator_liveset + + # Verify that the dead maps cover all live variables + all_vars = reduce(operator.or_, live_map.values(), set()) + internal_dead_vars = reduce(operator.or_, internal_dead_map.values(), + set()) + escaping_dead_vars = reduce(operator.or_, escaping_dead_map.values(), + set()) + exit_dead_vars = reduce(operator.or_, exit_dead_map.values(), set()) + dead_vars = (internal_dead_vars | escaping_dead_vars | exit_dead_vars) + missing_vars = all_vars - dead_vars + if missing_vars: + # There are no exit points + if not cfg.exit_points(): + # We won't be able to verify this + pass + else: + msg = 'liveness info missing for vars: {0}'.format(missing_vars) + raise RuntimeError(msg) + + combined = dict((k, internal_dead_map[k] | escaping_dead_map[k]) + for k in blocks) + + return _dead_maps_result(internal=internal_dead_map, + escaping=escaping_dead_map, + combined=combined) + + +def compute_live_variables(cfg, blocks, var_def_map, var_dead_map): + """ + Compute the live variables at the beginning of each block + and at each yield point. + The ``var_def_map`` and ``var_dead_map`` indicates the variable defined + and deleted at each block, respectively. + """ + # live var at the entry per block + block_entry_vars = defaultdict(set) + + def fix_point_progress(): + return tuple(map(len, block_entry_vars.values())) + + old_point = None + new_point = fix_point_progress() + + # Propagate defined variables and still live the successors. + # (note the entry block automatically gets an empty set) + + # Note: This is finding the actual available variables at the entry + # of each block. The algorithm in compute_live_map() is finding + # the variable that must be available at the entry of each block. + # This is top-down in the dataflow. The other one is bottom-up. + while old_point != new_point: + # We iterate until the result stabilizes. This is necessary + # because of loops in the graphself. + for offset in blocks: + # vars available + variable defined + avail = block_entry_vars[offset] | var_def_map[offset] + # subtract variables deleted + avail -= var_dead_map[offset] + # add ``avail`` to each successors + for succ, _data in cfg.successors(offset): + block_entry_vars[succ] |= avail + + old_point = new_point + new_point = fix_point_progress() + + return block_entry_vars + + +# +# Analysis related to controlflow +# + +def compute_cfg_from_blocks(blocks): + cfg = CFGraph() + for k in blocks: + cfg.add_node(k) + + for k, b in blocks.items(): + term = b.terminator + for target in term.get_targets(): + cfg.add_edge(k, target) + + cfg.set_entry_point(min(blocks)) + cfg.process() + return cfg + + +def find_top_level_loops(cfg): + """ + A generator that yields toplevel loops given a control-flow-graph + """ + blocks_in_loop = set() + # get loop bodies + for loop in cfg.loops().values(): + insiders = set(loop.body) | set(loop.entries) | set(loop.exits) + insiders.discard(loop.header) + blocks_in_loop |= insiders + # find loop that is not part of other loops + for loop in cfg.loops().values(): + if loop.header not in blocks_in_loop: + yield _fix_loop_exit(cfg, loop) + + +def _fix_loop_exit(cfg, loop): + """ + Fixes loop.exits for Py3.8 bytecode CFG changes. + This is to handle `break` inside loops. + """ + # Computes the common postdoms of exit nodes + postdoms = cfg.post_dominators() + exits = reduce( + operator.and_, + [postdoms[b] for b in loop.exits], + loop.exits, + ) + if exits: + # Put the non-common-exits as body nodes + body = loop.body | loop.exits - exits + return loop._replace(exits=exits, body=body) + else: + return loop + + +# Used to describe a nullified condition in dead branch pruning +nullified = namedtuple('nullified', 'condition, taken_br, rewrite_stmt') + + +# Functions to manipulate IR +def dead_branch_prune(func_ir, called_args): + """ + Removes dead branches based on constant inference from function args. + This directly mutates the IR. + + func_ir is the IR + called_args are the actual arguments with which the function is called + """ + from numba.core.ir_utils import (get_definition, guard, find_const, + GuardException) + + DEBUG = 0 + + def find_branches(func_ir): + # find *all* branches + branches = [] + for blk in func_ir.blocks.values(): + branch_or_jump = blk.body[-1] + if isinstance(branch_or_jump, ir.Branch): + branch = branch_or_jump + pred = guard(get_definition, func_ir, branch.cond.name) + if pred is not None and pred.op == "call": + function = guard(get_definition, func_ir, pred.func) + if (function is not None and + isinstance(function, ir.Global) and + function.value is bool): + condition = guard(get_definition, func_ir, pred.args[0]) + if condition is not None: + branches.append((branch, condition, blk)) + return branches + + def do_prune(take_truebr, blk): + keep = branch.truebr if take_truebr else branch.falsebr + # replace the branch with a direct jump + jmp = ir.Jump(keep, loc=branch.loc) + blk.body[-1] = jmp + return 1 if keep == branch.truebr else 0 + + def prune_by_type(branch, condition, blk, *conds): + # this prunes a given branch and fixes up the IR + # at least one needs to be a NoneType + lhs_cond, rhs_cond = conds + lhs_none = isinstance(lhs_cond, types.NoneType) + rhs_none = isinstance(rhs_cond, types.NoneType) + if lhs_none or rhs_none: + try: + take_truebr = condition.fn(lhs_cond, rhs_cond) + except Exception: + return False, None + if DEBUG > 0: + kill = branch.falsebr if take_truebr else branch.truebr + print("Pruning %s" % kill, branch, lhs_cond, rhs_cond, + condition.fn) + taken = do_prune(take_truebr, blk) + return True, taken + return False, None + + def prune_by_value(branch, condition, blk, *conds): + lhs_cond, rhs_cond = conds + try: + take_truebr = condition.fn(lhs_cond, rhs_cond) + except Exception: + return False, None + if DEBUG > 0: + kill = branch.falsebr if take_truebr else branch.truebr + print("Pruning %s" % kill, branch, lhs_cond, rhs_cond, condition.fn) + taken = do_prune(take_truebr, blk) + return True, taken + + def prune_by_predicate(branch, pred, blk): + try: + # Just to prevent accidents, whilst already guarded, ensure this + # is an ir.Const + if not isinstance(pred, (ir.Const, ir.FreeVar, ir.Global)): + raise TypeError('Expected constant Numba IR node') + take_truebr = bool(pred.value) + except TypeError: + return False, None + if DEBUG > 0: + kill = branch.falsebr if take_truebr else branch.truebr + print("Pruning %s" % kill, branch, pred) + taken = do_prune(take_truebr, blk) + return True, taken + + class Unknown(object): + pass + + def resolve_input_arg_const(input_arg_idx): + """ + Resolves an input arg to a constant (if possible) + """ + input_arg_ty = called_args[input_arg_idx] + + # comparing to None? + if isinstance(input_arg_ty, types.NoneType): + return input_arg_ty + + # is it a kwarg default + if isinstance(input_arg_ty, types.Omitted): + val = input_arg_ty.value + if isinstance(val, types.NoneType): + return val + elif val is None: + return types.NoneType('none') + + # literal type, return the type itself so comparisons like `x == None` + # still work as e.g. x = types.int64 will never be None/NoneType so + # the branch can still be pruned + return getattr(input_arg_ty, 'literal_type', Unknown()) + + if DEBUG > 1: + print("before".center(80, '-')) + print(func_ir.dump()) + + phi2lbl = dict() + phi2asgn = dict() + for lbl, blk in func_ir.blocks.items(): + for stmt in blk.body: + if isinstance(stmt, ir.Assign): + if isinstance(stmt.value, ir.Expr) and stmt.value.op == 'phi': + phi2lbl[stmt.value] = lbl + phi2asgn[stmt.value] = stmt + + # This looks for branches where: + # at least one arg of the condition is in input args and const + # at least one an arg of the condition is a const + # if the condition is met it will replace the branch with a jump + branch_info = find_branches(func_ir) + # stores conditions that have no impact post prune + nullified_conditions = [] + + for branch, condition, blk in branch_info: + const_conds = [] + if isinstance(condition, ir.Expr) and condition.op == 'binop': + prune = prune_by_value + for arg in [condition.lhs, condition.rhs]: + resolved_const = Unknown() + arg_def = guard(get_definition, func_ir, arg) + if isinstance(arg_def, ir.Arg): + # it's an e.g. literal argument to the function + resolved_const = resolve_input_arg_const(arg_def.index) + prune = prune_by_type + else: + # it's some const argument to the function, cannot use guard + # here as the const itself may be None + try: + resolved_const = find_const(func_ir, arg) + if resolved_const is None: + resolved_const = types.NoneType('none') + except GuardException: + pass + + if not isinstance(resolved_const, Unknown): + const_conds.append(resolved_const) + + # lhs/rhs are consts + if len(const_conds) == 2: + # prune the branch, switch the branch for an unconditional jump + prune_stat, taken = prune(branch, condition, blk, *const_conds) + if(prune_stat): + # add the condition to the list of nullified conditions + nullified_conditions.append(nullified(condition, taken, + True)) + else: + # see if this is a branch on a constant value predicate + resolved_const = Unknown() + try: + pred_call = get_definition(func_ir, branch.cond) + resolved_const = find_const(func_ir, pred_call.args[0]) + if resolved_const is None: + resolved_const = types.NoneType('none') + except GuardException: + pass + + if not isinstance(resolved_const, Unknown): + prune_stat, taken = prune_by_predicate(branch, condition, blk) + if(prune_stat): + # add the condition to the list of nullified conditions + nullified_conditions.append(nullified(condition, taken, + False)) + + # 'ERE BE DRAGONS... + # It is the evaluation of the condition expression that often trips up type + # inference, so ideally it would be removed as it is effectively rendered + # dead by the unconditional jump if a branch was pruned. However, there may + # be references to the condition that exist in multiple places (e.g. dels) + # and we cannot run DCE here as typing has not taken place to give enough + # information to run DCE safely. Upshot of all this is the condition gets + # rewritten below into a benign const that typing will be happy with and DCE + # can remove it and its reference post typing when it is safe to do so + # (if desired). It is required that the const is assigned a value that + # indicates the branch taken as its mutated value would be read in the case + # of object mode fall back in place of the condition itself. For + # completeness the func_ir._definitions and ._consts are also updated to + # make the IR state self consistent. + + deadcond = [x.condition for x in nullified_conditions] + for _, cond, blk in branch_info: + if cond in deadcond: + for x in blk.body: + if isinstance(x, ir.Assign) and x.value is cond: + # rewrite the condition as a true/false bit + nullified_info = nullified_conditions[deadcond.index(cond)] + # only do a rewrite of conditions, predicates need to retain + # their value as they may be used later. + if nullified_info.rewrite_stmt: + branch_bit = nullified_info.taken_br + x.value = ir.Const(branch_bit, loc=x.loc) + # update the specific definition to the new const + defns = func_ir._definitions[x.target.name] + repl_idx = defns.index(cond) + defns[repl_idx] = x.value + + # Check post dominators of dead nodes from in the original CFG for use of + # vars that are being removed in the dead blocks which might be referred to + # by phi nodes. + # + # Multiple things to fix up: + # + # 1. Cases like: + # + # A A + # |\ | + # | B --> B + # |/ | + # C C + # + # i.e. the branch is dead but the block is still alive. In this case CFG + # simplification will fuse A-B-C and any phi in C can be updated as an + # direct assignment from the last assigned version in the dominators of the + # fused block. + # + # 2. Cases like: + # + # A A + # / \ | + # B C --> B + # \ / | + # D D + # + # i.e. the block C is dead. In this case the phis in D need updating to + # reflect the collapse of the phi condition. This should result in a direct + # assignment of the surviving version in B to the LHS of the phi in D. + + new_cfg = compute_cfg_from_blocks(func_ir.blocks) + dead_blocks = new_cfg.dead_nodes() + + # for all phis that are still in live blocks. + for phi, lbl in phi2lbl.items(): + if lbl in dead_blocks: + continue + new_incoming = [x[0] for x in new_cfg.predecessors(lbl)] + if set(new_incoming) != set(phi.incoming_blocks): + # Something has changed in the CFG... + if len(new_incoming) == 1: + # There's now just one incoming. Replace the PHI node by a + # direct assignment + idx = phi.incoming_blocks.index(new_incoming[0]) + phi2asgn[phi].value = phi.incoming_values[idx] + else: + # There's more than one incoming still, then look through the + # incoming and remove dead + ic_val_tmp = [] + ic_blk_tmp = [] + for ic_val, ic_blk in zip(phi.incoming_values, + phi.incoming_blocks): + if ic_blk in dead_blocks: + continue + else: + ic_val_tmp.append(ic_val) + ic_blk_tmp.append(ic_blk) + phi.incoming_values.clear() + phi.incoming_values.extend(ic_val_tmp) + phi.incoming_blocks.clear() + phi.incoming_blocks.extend(ic_blk_tmp) + + # Remove dead blocks, this is safe as it relies on the CFG only. + for dead in dead_blocks: + del func_ir.blocks[dead] + + # if conditions were nullified then consts were rewritten, update + if nullified_conditions: + func_ir._consts = consts.ConstantInference(func_ir) + + if DEBUG > 1: + print("after".center(80, '-')) + print(func_ir.dump()) + + +def rewrite_semantic_constants(func_ir, called_args): + """ + This rewrites values known to be constant by their semantics as ir.Const + nodes, this is to give branch pruning the best chance possible of killing + branches. An example might be rewriting len(tuple) as the literal length. + + func_ir is the IR + called_args are the actual arguments with which the function is called + """ + DEBUG = 0 + + if DEBUG > 1: + print(("rewrite_semantic_constants: " + + func_ir.func_id.func_name).center(80, '-')) + print("before".center(80, '*')) + func_ir.dump() + + def rewrite_statement(func_ir, stmt, new_val): + """ + Rewrites the stmt as a ir.Const new_val and fixes up the entries in + func_ir._definitions + """ + stmt.value = ir.Const(new_val, stmt.loc) + defns = func_ir._definitions[stmt.target.name] + repl_idx = defns.index(val) + defns[repl_idx] = stmt.value + + def rewrite_array_ndim(val, func_ir, called_args): + # rewrite Array.ndim as const(ndim) + if getattr(val, 'op', None) == 'getattr': + if val.attr == 'ndim': + arg_def = guard(get_definition, func_ir, val.value) + if isinstance(arg_def, ir.Arg): + argty = called_args[arg_def.index] + if isinstance(argty, types.Array): + rewrite_statement(func_ir, stmt, argty.ndim) + + def rewrite_tuple_len(val, func_ir, called_args): + # rewrite len(tuple) as const(len(tuple)) + if getattr(val, 'op', None) == 'call': + func = guard(get_definition, func_ir, val.func) + if (func is not None and isinstance(func, ir.Global) and + getattr(func, 'value', None) is len): + + (arg,) = val.args + arg_def = guard(get_definition, func_ir, arg) + if isinstance(arg_def, ir.Arg): + argty = called_args[arg_def.index] + if isinstance(argty, types.BaseTuple): + rewrite_statement(func_ir, stmt, argty.count) + elif (isinstance(arg_def, ir.Expr) and + arg_def.op == 'typed_getitem'): + argty = arg_def.dtype + if isinstance(argty, types.BaseTuple): + rewrite_statement(func_ir, stmt, argty.count) + + from numba.core.ir_utils import get_definition, guard + for blk in func_ir.blocks.values(): + for stmt in blk.body: + if isinstance(stmt, ir.Assign): + val = stmt.value + if isinstance(val, ir.Expr): + rewrite_array_ndim(val, func_ir, called_args) + rewrite_tuple_len(val, func_ir, called_args) + + if DEBUG > 1: + print("after".center(80, '*')) + func_ir.dump() + print('-' * 80) + + +def find_literally_calls(func_ir, argtypes): + """An analysis to find `numba.literally` call inside the given IR. + When an unsatisfied literal typing request is found, a `ForceLiteralArg` + exception is raised. + + Parameters + ---------- + + func_ir : numba.ir.FunctionIR + + argtypes : Sequence[numba.types.Type] + The argument types. + """ + from numba.core import ir_utils + + marked_args = set() + first_loc = {} + # Scan for literally calls + for blk in func_ir.blocks.values(): + for assign in blk.find_exprs(op='call'): + var = ir_utils.guard(ir_utils.get_definition, func_ir, assign.func) + if isinstance(var, (ir.Global, ir.FreeVar)): + fnobj = var.value + else: + fnobj = ir_utils.guard(ir_utils.resolve_func_from_module, + func_ir, var) + if fnobj is special.literally: + # Found + [arg] = assign.args + defarg = func_ir.get_definition(arg) + if isinstance(defarg, ir.Arg): + argindex = defarg.index + marked_args.add(argindex) + first_loc.setdefault(argindex, assign.loc) + # Signal the dispatcher to force literal typing + for pos in marked_args: + query_arg = argtypes[pos] + do_raise = (isinstance(query_arg, types.InitialValue) and + query_arg.initial_value is None) + if do_raise: + loc = first_loc[pos] + raise errors.ForceLiteralArg(marked_args, loc=loc) + + if not isinstance(query_arg, (types.Literal, types.InitialValue)): + loc = first_loc[pos] + raise errors.ForceLiteralArg(marked_args, loc=loc) + + +ir_extension_use_alloca = {} + + +def must_use_alloca(blocks): + """ + Analyzes a dictionary of blocks to find variables that must be + stack allocated with alloca. For each statement in the blocks, + determine if that statement requires certain variables to be + stack allocated. This function uses the extension point + ir_extension_use_alloca to allow other IR node types like parfors + to register to be processed by this analysis function. At the + moment, parfors are the only IR node types that may require + something to be stack allocated. + """ + use_alloca_vars = set() + + for ir_block in blocks.values(): + for stmt in ir_block.body: + if type(stmt) in ir_extension_use_alloca: + func = ir_extension_use_alloca[type(stmt)] + func(stmt, use_alloca_vars) + continue + + return use_alloca_vars diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/annotations/__init__.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/annotations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/annotations/pretty_annotate.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/annotations/pretty_annotate.py new file mode 100644 index 000000000..6e4f43b91 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/annotations/pretty_annotate.py @@ -0,0 +1,283 @@ +""" +This module implements code highlighting of numba function annotations. +""" + +from warnings import warn + +warn("The pretty_annotate functionality is experimental and might change API", + FutureWarning) + +def hllines(code, style): + try: + from pygments import highlight + from pygments.lexers import PythonLexer + from pygments.formatters import HtmlFormatter + except ImportError: + raise ImportError("please install the 'pygments' package") + pylex = PythonLexer() + "Given a code string, return a list of html-highlighted lines" + hf = HtmlFormatter(noclasses=True, style=style, nowrap=True) + res = highlight(code, pylex, hf) + return res.splitlines() + + +def htlines(code, style): + try: + from pygments import highlight + from pygments.lexers import PythonLexer + # TerminalFormatter does not support themes, Terminal256 should, + # but seem to not work. + from pygments.formatters import TerminalFormatter + except ImportError: + raise ImportError("please install the 'pygments' package") + pylex = PythonLexer() + "Given a code string, return a list of ANSI-highlighted lines" + hf = TerminalFormatter(style=style) + res = highlight(code, pylex, hf) + return res.splitlines() + +def get_ansi_template(): + try: + from jinja2 import Template + except ImportError: + raise ImportError("please install the 'jinja2' package") + return Template(""" + {%- for func_key in func_data.keys() -%} + Function name: \x1b[34m{{func_data[func_key]['funcname']}}\x1b[39;49;00m + {%- if func_data[func_key]['filename'] -%} + {{'\n'}}In file: \x1b[34m{{func_data[func_key]['filename'] -}}\x1b[39;49;00m + {%- endif -%} + {{'\n'}}With signature: \x1b[34m{{func_key[1]}}\x1b[39;49;00m + {{- "\n" -}} + {%- for num, line, hl, hc in func_data[func_key]['pygments_lines'] -%} + {{-'\n'}}{{ num}}: {{hc-}} + {%- if func_data[func_key]['ir_lines'][num] -%} + {%- for ir_line, ir_line_type in func_data[func_key]['ir_lines'][num] %} + {{-'\n'}}--{{- ' '*func_data[func_key]['python_indent'][num]}} + {{- ' '*(func_data[func_key]['ir_indent'][num][loop.index0]+4) + }}{{ir_line }}\x1b[41m{{ir_line_type-}}\x1b[39;49;00m + {%- endfor -%} + {%- endif -%} + {%- endfor -%} + {%- endfor -%} + """) + return ansi_template + +def get_html_template(): + try: + from jinja2 import Template + except ImportError: + raise ImportError("please install the 'jinja2' package") + return Template(""" + + + + + + + {% for func_key in func_data.keys() %} + +
+ + {%- for num, line, hl, hc in func_data[func_key]['pygments_lines'] -%} + {%- if func_data[func_key]['ir_lines'][num] %} + + {% else -%} + + {%- endif -%} + {%- endfor -%} +
+
+ + + {{num}}: + {{' '*func_data[func_key]['python_indent'][num]}}{{hl}} + + + + + {%- for ir_line, ir_line_type in func_data[func_key]['ir_lines'][num] %} + + + + {%- endfor -%} + +
+   + {{- ' '*func_data[func_key]['python_indent'][num]}} + {{ ' '*func_data[func_key]['ir_indent'][num][loop.index0]}}{{ir_line|e -}} + {{ir_line_type}} + +
+
+
+ + {{num}}: + {{' '*func_data[func_key]['python_indent'][num]}}{{hl}} + +
+
+ {% endfor %} + + + """) + + +def reform_code(annotation): + """ + Extract the code from the Numba annotation datastructure. + + Pygments can only highlight full multi-line strings, the Numba + annotation is list of single lines, with indentation removed. + """ + ident_dict = annotation['python_indent'] + s= '' + for n,l in annotation['python_lines']: + s = s+' '*ident_dict[n]+l+'\n' + return s + + +class Annotate: + """ + Construct syntax highlighted annotation for a given jitted function: + + Example: + + >>> import numba + >>> from numba.pretty_annotate import Annotate + >>> @numba.jit + ... def test(q): + ... res = 0 + ... for i in range(q): + ... res += i + ... return res + ... + >>> test(10) + 45 + >>> Annotate(test) + + The last line will return an HTML and/or ANSI representation that will be + displayed accordingly in Jupyter/IPython. + + Function annotations persist across compilation for newly encountered + type signatures and as a result annotations are shown for all signatures + by default. + + Annotations for a specific signature can be shown by using the + ``signature`` parameter. + + >>> @numba.jit + ... def add(x, y): + ... return x + y + ... + >>> add(1, 2) + 3 + >>> add(1.3, 5.7) + 7.0 + >>> add.signatures + [(int64, int64), (float64, float64)] + >>> Annotate(add, signature=add.signatures[1]) # annotation for (float64, float64) + """ + def __init__(self, function, signature=None, **kwargs): + + style = kwargs.get('style', 'default') + if not function.signatures: + raise ValueError('function need to be jitted for at least one signature') + ann = function.get_annotation_info(signature=signature) + self.ann = ann + + for k,v in ann.items(): + res = hllines(reform_code(v), style) + rest = htlines(reform_code(v), style) + v['pygments_lines'] = [(a,b,c, d) for (a,b),c, d in zip(v['python_lines'], res, rest)] + + def _repr_html_(self): + return get_html_template().render(func_data=self.ann) + + def __repr__(self): + return get_ansi_template().render(func_data=self.ann) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/annotations/template.html b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/annotations/template.html new file mode 100644 index 000000000..73e2f6f85 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/annotations/template.html @@ -0,0 +1,144 @@ + + + + + + + + + + + {% for func_key in func_data.keys() %} + + {% set loop1 = loop %} + + + +
+ + + {%- for num, line in func_data[func_key]['python_lines'] -%} + {%- if func_data[func_key]['ir_lines'][num] %} + + {% else -%} + + {%- endif -%} + {%- endfor -%} +
+
+ + + {{num}}: + {{func_data[func_key]['python_indent'][num]}}{{line|e}} + + + + + {%- for ir_line, ir_line_type in func_data[func_key]['ir_lines'][num] %} + + + + {%- endfor -%} + +
  + {{- func_data[func_key]['python_indent'][num]}} + {{func_data[func_key]['ir_indent'][num][loop.index0]}}{{ir_line|e -}} + {{ir_line_type}} + +
+
+
+ + {{num}}: + {{func_data[func_key]['python_indent'][num]}}{{line|e}} + +
+
+ +


+ + {% endfor %} + + + + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/annotations/type_annotations.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/annotations/type_annotations.py new file mode 100644 index 000000000..47bd01250 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/annotations/type_annotations.py @@ -0,0 +1,283 @@ +from collections import defaultdict, OrderedDict +from collections.abc import Mapping +from contextlib import closing +import copy +import inspect +import os +import re +import sys +import textwrap +from io import StringIO + +import numba.core.dispatcher +from numba.core import ir + + +class SourceLines(Mapping): + def __init__(self, func): + + try: + lines, startno = inspect.getsourcelines(func) + except OSError: + self.lines = () + self.startno = 0 + else: + self.lines = textwrap.dedent(''.join(lines)).splitlines() + self.startno = startno + + def __getitem__(self, lineno): + try: + return self.lines[lineno - self.startno].rstrip() + except IndexError: + return '' + + def __iter__(self): + return iter((self.startno + i) for i in range(len(self.lines))) + + def __len__(self): + return len(self.lines) + + @property + def avail(self): + return bool(self.lines) + + +class TypeAnnotation(object): + + # func_data dict stores annotation data for all functions that are + # compiled. We store the data in the TypeAnnotation class since a new + # TypeAnnotation instance is created for each function that is compiled. + # For every function that is compiled, we add the type annotation data to + # this dict and write the html annotation file to disk (rewrite the html + # file for every function since we don't know if this is the last function + # to be compiled). + func_data = OrderedDict() + + def __init__(self, func_ir, typemap, calltypes, lifted, lifted_from, + args, return_type, html_output=None): + self.func_id = func_ir.func_id + self.blocks = func_ir.blocks + self.typemap = typemap + self.calltypes = calltypes + self.filename = func_ir.loc.filename + self.linenum = str(func_ir.loc.line) + self.signature = str(args) + ' -> ' + str(return_type) + + # lifted loop information + self.lifted = lifted + self.num_lifted_loops = len(lifted) + + # If this is a lifted loop function that is being compiled, lifted_from + # points to annotation data from function that this loop lifted function + # was lifted from. This is used to stick lifted loop annotations back + # into original function. + self.lifted_from = lifted_from + + def prepare_annotations(self): + # Prepare annotations + groupedinst = defaultdict(list) + found_lifted_loop = False + #for blkid, blk in self.blocks.items(): + for blkid in sorted(self.blocks.keys()): + blk = self.blocks[blkid] + groupedinst[blk.loc.line].append("label %s" % blkid) + for inst in blk.body: + lineno = inst.loc.line + + if isinstance(inst, ir.Assign): + if found_lifted_loop: + atype = 'XXX Lifted Loop XXX' + found_lifted_loop = False + elif (isinstance(inst.value, ir.Expr) and + inst.value.op == 'call'): + atype = self.calltypes[inst.value] + elif (isinstance(inst.value, ir.Const) and + isinstance(inst.value.value, numba.core.dispatcher.LiftedLoop)): + atype = 'XXX Lifted Loop XXX' + found_lifted_loop = True + else: + # TODO: fix parfor lowering so that typemap is valid. + atype = self.typemap.get(inst.target.name, "") + + aline = "%s = %s :: %s" % (inst.target, inst.value, atype) + elif isinstance(inst, ir.SetItem): + atype = self.calltypes[inst] + aline = "%s :: %s" % (inst, atype) + else: + aline = "%s" % inst + groupedinst[lineno].append(" %s" % aline) + return groupedinst + + def annotate(self): + source = SourceLines(self.func_id.func) + # if not source.avail: + # return "Source code unavailable" + + groupedinst = self.prepare_annotations() + + # Format annotations + io = StringIO() + with closing(io): + if source.avail: + print("# File: %s" % self.filename, file=io) + for num in source: + srcline = source[num] + ind = _getindent(srcline) + print("%s# --- LINE %d --- " % (ind, num), file=io) + for inst in groupedinst[num]: + print('%s# %s' % (ind, inst), file=io) + print(file=io) + print(srcline, file=io) + print(file=io) + if self.lifted: + print("# The function contains lifted loops", file=io) + for loop in self.lifted: + print("# Loop at line %d" % loop.get_source_location(), + file=io) + print("# Has %d overloads" % len(loop.overloads), + file=io) + for cres in loop.overloads.values(): + print(cres.type_annotation, file=io) + else: + print("# Source code unavailable", file=io) + for num in groupedinst: + for inst in groupedinst[num]: + print('%s' % (inst,), file=io) + print(file=io) + + return io.getvalue() + + def html_annotate(self, outfile): + # ensure that annotation information is assembled + self.annotate_raw() + # make a deep copy ahead of the pending mutations + func_data = copy.deepcopy(self.func_data) + + key = 'python_indent' + for this_func in func_data.values(): + if key in this_func: + idents = {} + for line, amount in this_func[key].items(): + idents[line] = ' ' * amount + this_func[key] = idents + + key = 'ir_indent' + for this_func in func_data.values(): + if key in this_func: + idents = {} + for line, ir_id in this_func[key].items(): + idents[line] = [' ' * amount for amount in ir_id] + this_func[key] = idents + + + + try: + from jinja2 import Template + except ImportError: + raise ImportError("please install the 'jinja2' package") + + root = os.path.join(os.path.dirname(__file__)) + template_filename = os.path.join(root, 'template.html') + with open(template_filename, 'r') as template: + html = template.read() + + template = Template(html) + rendered = template.render(func_data=func_data) + outfile.write(rendered) + + def annotate_raw(self): + """ + This returns "raw" annotation information i.e. it has no output format + specific markup included. + """ + python_source = SourceLines(self.func_id.func) + ir_lines = self.prepare_annotations() + line_nums = [num for num in python_source] + lifted_lines = [l.get_source_location() for l in self.lifted] + + def add_ir_line(func_data, line): + line_str = line.strip() + line_type = '' + if line_str.endswith('pyobject'): + line_str = line_str.replace('pyobject', '') + line_type = 'pyobject' + func_data['ir_lines'][num].append((line_str, line_type)) + indent_len = len(_getindent(line)) + func_data['ir_indent'][num].append(indent_len) + + func_key = (self.func_id.filename + ':' + str(self.func_id.firstlineno + 1), + self.signature) + if self.lifted_from is not None and self.lifted_from[1]['num_lifted_loops'] > 0: + # This is a lifted loop function that is being compiled. Get the + # numba ir for lines in loop function to use for annotating + # original python function that the loop was lifted from. + func_data = self.lifted_from[1] + for num in line_nums: + if num not in ir_lines.keys(): + continue + func_data['ir_lines'][num] = [] + func_data['ir_indent'][num] = [] + for line in ir_lines[num]: + add_ir_line(func_data, line) + if line.strip().endswith('pyobject'): + func_data['python_tags'][num] = 'object_tag' + # If any pyobject line is found, make sure original python + # line that was marked as a lifted loop start line is tagged + # as an object line instead. Lifted loop start lines should + # only be marked as lifted loop lines if the lifted loop + # was successfully compiled in nopython mode. + func_data['python_tags'][self.lifted_from[0]] = 'object_tag' + + # We're done with this lifted loop, so decrement lifted loop counter. + # When lifted loop counter hits zero, that means we're ready to write + # out annotations to html file. + self.lifted_from[1]['num_lifted_loops'] -= 1 + + elif func_key not in TypeAnnotation.func_data.keys(): + TypeAnnotation.func_data[func_key] = {} + func_data = TypeAnnotation.func_data[func_key] + + for i, loop in enumerate(self.lifted): + # Make sure that when we process each lifted loop function later, + # we'll know where it originally came from. + loop.lifted_from = (lifted_lines[i], func_data) + func_data['num_lifted_loops'] = self.num_lifted_loops + + func_data['filename'] = self.filename + func_data['funcname'] = self.func_id.func_name + func_data['python_lines'] = [] + func_data['python_indent'] = {} + func_data['python_tags'] = {} + func_data['ir_lines'] = {} + func_data['ir_indent'] = {} + + for num in line_nums: + func_data['python_lines'].append((num, python_source[num].strip())) + indent_len = len(_getindent(python_source[num])) + func_data['python_indent'][num] = indent_len + func_data['python_tags'][num] = '' + func_data['ir_lines'][num] = [] + func_data['ir_indent'][num] = [] + + for line in ir_lines[num]: + add_ir_line(func_data, line) + if num in lifted_lines: + func_data['python_tags'][num] = 'lifted_tag' + elif line.strip().endswith('pyobject'): + func_data['python_tags'][num] = 'object_tag' + return self.func_data + + + def __str__(self): + return self.annotate() + + +re_longest_white_prefix = re.compile(r'^\s*') + + +def _getindent(text): + m = re_longest_white_prefix.match(text) + if not m: + return '' + else: + return ' ' * len(m.group(0)) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/base.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/base.py new file mode 100644 index 000000000..9622a3f09 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/base.py @@ -0,0 +1,1255 @@ +from collections import defaultdict +import copy +import sys +from itertools import permutations, takewhile +from contextlib import contextmanager + +from llvmlite import ir as llvmir +from llvmlite.ir import Constant +import llvmlite.binding as ll + +from numba.core import types, utils, datamodel, debuginfo, funcdesc, config, cgutils, imputils +from numba.core import event, errors, targetconfig +from numba import _dynfunc, _helperlib +from numba.core.compiler_lock import global_compiler_lock +from numba.core.pythonapi import PythonAPI +from numba.core.imputils import (user_function, user_generator, + builtin_registry, impl_ret_borrowed, + RegistryLoader) +from numba.cpython import builtins + +GENERIC_POINTER = llvmir.PointerType(llvmir.IntType(8)) +PYOBJECT = GENERIC_POINTER +void_ptr = GENERIC_POINTER + + +class OverloadSelector(object): + """ + An object matching an actual signature against a registry of formal + signatures and choosing the best candidate, if any. + + In the current implementation: + - a "signature" is a tuple of type classes or type instances + - the "best candidate" is the most specific match + """ + + def __init__(self): + # A list of (formal args tuple, value) + self.versions = [] + self._cache = {} + + def find(self, sig): + out = self._cache.get(sig) + if out is None: + out = self._find(sig) + self._cache[sig] = out + return out + + def _find(self, sig): + candidates = self._select_compatible(sig) + if candidates: + return candidates[self._best_signature(candidates)] + else: + raise errors.NumbaNotImplementedError(f'{self}, {sig}') + + def _select_compatible(self, sig): + """ + Select all compatible signatures and their implementation. + """ + out = {} + for ver_sig, impl in self.versions: + if self._match_arglist(ver_sig, sig): + out[ver_sig] = impl + return out + + def _best_signature(self, candidates): + """ + Returns the best signature out of the candidates + """ + ordered, genericity = self._sort_signatures(candidates) + # check for ambiguous signatures + if len(ordered) > 1: + firstscore = genericity[ordered[0]] + same = list(takewhile(lambda x: genericity[x] == firstscore, + ordered)) + if len(same) > 1: + msg = ["{n} ambiguous signatures".format(n=len(same))] + for sig in same: + msg += ["{0} => {1}".format(sig, candidates[sig])] + raise errors.NumbaTypeError('\n'.join(msg)) + return ordered[0] + + def _sort_signatures(self, candidates): + """ + Sort signatures in ascending level of genericity. + + Returns a 2-tuple: + + * ordered list of signatures + * dictionary containing genericity scores + """ + # score by genericity + genericity = defaultdict(int) + for this, other in permutations(candidates.keys(), r=2): + matched = self._match_arglist(formal_args=this, actual_args=other) + if matched: + # genericity score +1 for every another compatible signature + genericity[this] += 1 + # order candidates in ascending level of genericity + ordered = sorted(candidates.keys(), key=lambda x: genericity[x]) + return ordered, genericity + + def _match_arglist(self, formal_args, actual_args): + """ + Returns True if the signature is "matching". + A formal signature is "matching" if the actual signature matches exactly + or if the formal signature is a compatible generic signature. + """ + # normalize VarArg + if formal_args and isinstance(formal_args[-1], types.VarArg): + ndiff = len(actual_args) - len(formal_args) + 1 + formal_args = formal_args[:-1] + (formal_args[-1].dtype,) * ndiff + + if len(formal_args) != len(actual_args): + return False + + for formal, actual in zip(formal_args, actual_args): + if not self._match(formal, actual): + return False + + return True + + def _match(self, formal, actual): + if formal == actual: + # formal argument matches actual arguments + return True + elif types.Any == formal: + # formal argument is any + return True + elif isinstance(formal, type) and issubclass(formal, types.Type): + if isinstance(actual, type) and issubclass(actual, formal): + # formal arg is a type class and actual arg is a subclass + return True + elif isinstance(actual, formal): + # formal arg is a type class of which actual arg is an instance + return True + + def append(self, value, sig): + """ + Add a formal signature and its associated value. + """ + assert isinstance(sig, tuple), (value, sig) + self.versions.append((sig, value)) + self._cache.clear() + + +@utils.runonce +def _load_global_helpers(): + """ + Execute once to install special symbols into the LLVM symbol table. + """ + # This is Py_None's real C name + ll.add_symbol("_Py_NoneStruct", id(None)) + + # Add Numba C helper functions + for c_helpers in (_helperlib.c_helpers, _dynfunc.c_helpers): + for py_name, c_address in c_helpers.items(): + c_name = "numba_" + py_name + ll.add_symbol(c_name, c_address) + + # Add Numpy C helpers (npy_XXX) + for c_name, c_address in _helperlib.npymath_exports.items(): + ll.add_symbol(c_name, c_address) + + # Add all built-in exception classes + for obj in utils.builtins.__dict__.values(): + if isinstance(obj, type) and issubclass(obj, BaseException): + ll.add_symbol("PyExc_%s" % (obj.__name__), id(obj)) + + +class BaseContext(object): + """ + + Notes on Structure + ------------------ + + Most objects are lowered as plain-old-data structure in the generated + llvm. They are passed around by reference (a pointer to the structure). + Only POD structure can live across function boundaries by copying the + data. + """ + # True if the target requires strict alignment + # Causes exception to be raised if the record members are not aligned. + strict_alignment = False + + # Force powi implementation as math.pow call + implement_powi_as_math_call = False + implement_pow_as_math_call = False + + # Emit Debug info + enable_debuginfo = False + DIBuilder = debuginfo.DIBuilder + + # Bound checking + @property + def enable_boundscheck(self): + if config.BOUNDSCHECK is not None: + return config.BOUNDSCHECK + return self._boundscheck + + @enable_boundscheck.setter + def enable_boundscheck(self, value): + self._boundscheck = value + + # NRT + enable_nrt = False + + # Auto parallelization + auto_parallel = False + + # PYCC + aot_mode = False + + # Error model for various operations (only FP exceptions currently) + error_model = None + + # Whether dynamic globals (CPU runtime addresses) is allowed + allow_dynamic_globals = False + + # Fast math flags + fastmath = False + + # python execution environment + environment = None + + # the function descriptor + fndesc = None + + def __init__(self, typing_context, target): + _load_global_helpers() + + self.address_size = utils.MACHINE_BITS + self.typing_context = typing_context + from numba.core.target_extension import target_registry + self.target_name = target + self.target = target_registry[target] + + # A mapping of installed registries to their loaders + self._registries = {} + # Declarations loaded from registries and other sources + self._defns = defaultdict(OverloadSelector) + self._getattrs = defaultdict(OverloadSelector) + self._setattrs = defaultdict(OverloadSelector) + self._casts = OverloadSelector() + self._get_constants = OverloadSelector() + # Other declarations + self._generators = {} + self.special_ops = {} + self.cached_internal_func = {} + self._pid = None + self._codelib_stack = [] + + self._boundscheck = False + + self.data_model_manager = datamodel.default_manager + + # Initialize + self.init() + + def init(self): + """ + For subclasses to add initializer + """ + + def refresh(self): + """ + Refresh context with new declarations from known registries. + Useful for third-party extensions. + """ + # load target specific registries + self.load_additional_registries() + + # Populate the builtin registry, this has to happen after loading + # additional registries as some of the "additional" registries write + # their implementations into the builtin_registry and would be missed if + # this ran first. + self.install_registry(builtin_registry) + + # Also refresh typing context, since @overload declarations can + # affect it. + self.typing_context.refresh() + + def load_additional_registries(self): + """ + Load target-specific registries. Can be overridden by subclasses. + """ + + def mangler(self, name, types, *, abi_tags=(), uid=None): + """ + Perform name mangling. + """ + return funcdesc.default_mangler(name, types, abi_tags=abi_tags, uid=uid) + + def get_env_name(self, fndesc): + """Get the environment name given a FunctionDescriptor. + + Use this instead of the ``fndesc.env_name`` so that the target-context + can provide necessary mangling of the symbol to meet ABI requirements. + """ + return fndesc.env_name + + def declare_env_global(self, module, envname): + """Declare the Environment pointer as a global of the module. + + The pointer is initialized to NULL. It must be filled by the runtime + with the actual address of the Env before the associated function + can be executed. + + Parameters + ---------- + module : + The LLVM Module + envname : str + The name of the global variable. + """ + if envname not in module.globals: + gv = llvmir.GlobalVariable(module, cgutils.voidptr_t, name=envname) + gv.linkage = 'common' + gv.initializer = cgutils.get_null_value(gv.type.pointee) + + return module.globals[envname] + + def get_arg_packer(self, fe_args): + return datamodel.ArgPacker(self.data_model_manager, fe_args) + + def get_data_packer(self, fe_types): + return datamodel.DataPacker(self.data_model_manager, fe_types) + + @property + def target_data(self): + raise NotImplementedError + + @utils.cached_property + def nonconst_module_attrs(self): + """ + All module attrs are constant for targets using BaseContext. + """ + return tuple() + + @utils.cached_property + def nrt(self): + from numba.core.runtime.context import NRTContext + return NRTContext(self, self.enable_nrt) + + def subtarget(self, **kws): + obj = copy.copy(self) # shallow copy + for k, v in kws.items(): + if not hasattr(obj, k): + raise NameError("unknown option {0!r}".format(k)) + setattr(obj, k, v) + if obj.codegen() is not self.codegen(): + # We can't share functions across different codegens + obj.cached_internal_func = {} + return obj + + def install_registry(self, registry): + """ + Install a *registry* (a imputils.Registry instance) of function + and attribute implementations. + """ + try: + loader = self._registries[registry] + except KeyError: + loader = RegistryLoader(registry) + self._registries[registry] = loader + self.insert_func_defn(loader.new_registrations('functions')) + self._insert_getattr_defn(loader.new_registrations('getattrs')) + self._insert_setattr_defn(loader.new_registrations('setattrs')) + self._insert_cast_defn(loader.new_registrations('casts')) + self._insert_get_constant_defn(loader.new_registrations('constants')) + + def insert_func_defn(self, defns): + for impl, func, sig in defns: + self._defns[func].append(impl, sig) + + def _insert_getattr_defn(self, defns): + for impl, attr, sig in defns: + self._getattrs[attr].append(impl, sig) + + def _insert_setattr_defn(self, defns): + for impl, attr, sig in defns: + self._setattrs[attr].append(impl, sig) + + def _insert_cast_defn(self, defns): + for impl, sig in defns: + self._casts.append(impl, sig) + + def _insert_get_constant_defn(self, defns): + for impl, sig in defns: + self._get_constants.append(impl, sig) + + def insert_user_function(self, func, fndesc, libs=()): + impl = user_function(fndesc, libs) + self._defns[func].append(impl, impl.signature) + + def insert_generator(self, genty, gendesc, libs=()): + assert isinstance(genty, types.Generator) + impl = user_generator(gendesc, libs) + self._generators[genty] = gendesc, impl + + def remove_user_function(self, func): + """ + Remove user function *func*. + KeyError is raised if the function isn't known to us. + """ + del self._defns[func] + + def get_external_function_type(self, fndesc): + argtypes = [self.get_argument_type(aty) + for aty in fndesc.argtypes] + # don't wrap in pointer + restype = self.get_argument_type(fndesc.restype) + fnty = llvmir.FunctionType(restype, argtypes) + return fnty + + def declare_function(self, module, fndesc): + fnty = self.call_conv.get_function_type(fndesc.restype, fndesc.argtypes) + fn = cgutils.get_or_insert_function(module, fnty, fndesc.mangled_name) + self.call_conv.decorate_function(fn, fndesc.args, fndesc.argtypes, noalias=fndesc.noalias) + if fndesc.inline: + fn.attributes.add('alwaysinline') + # alwaysinline overrides optnone + fn.attributes.discard('noinline') + fn.attributes.discard('optnone') + return fn + + def declare_external_function(self, module, fndesc): + fnty = self.get_external_function_type(fndesc) + fn = cgutils.get_or_insert_function(module, fnty, fndesc.mangled_name) + assert fn.is_declaration + for ak, av in zip(fndesc.args, fn.args): + av.name = "arg.%s" % ak + return fn + + def insert_const_string(self, mod, string): + """ + Insert constant *string* (a str object) into module *mod*. + """ + stringtype = GENERIC_POINTER + name = ".const.%s" % string + text = cgutils.make_bytearray(string.encode("utf-8") + b"\x00") + gv = self.insert_unique_const(mod, name, text) + return Constant.bitcast(gv, stringtype) + + def insert_const_bytes(self, mod, bytes, name=None): + """ + Insert constant *byte* (a `bytes` object) into module *mod*. + """ + stringtype = GENERIC_POINTER + name = ".bytes.%s" % (name or hash(bytes)) + text = cgutils.make_bytearray(bytes) + gv = self.insert_unique_const(mod, name, text) + return Constant.bitcast(gv, stringtype) + + def insert_unique_const(self, mod, name, val): + """ + Insert a unique internal constant named *name*, with LLVM value + *val*, into module *mod*. + """ + try: + gv = mod.get_global(name) + except KeyError: + return cgutils.global_constant(mod, name, val) + else: + return gv + + def get_argument_type(self, ty): + return self.data_model_manager[ty].get_argument_type() + + def get_return_type(self, ty): + return self.data_model_manager[ty].get_return_type() + + def get_data_type(self, ty): + """ + Get a LLVM data representation of the Numba type *ty* that is safe + for storage. Record data are stored as byte array. + + The return value is a llvmlite.ir.Type object, or None if the type + is an opaque pointer (???). + """ + return self.data_model_manager[ty].get_data_type() + + def get_value_type(self, ty): + return self.data_model_manager[ty].get_value_type() + + def pack_value(self, builder, ty, value, ptr, align=None): + """ + Pack value into the array storage at *ptr*. + If *align* is given, it is the guaranteed alignment for *ptr* + (by default, the standard ABI alignment). + """ + dataval = self.data_model_manager[ty].as_data(builder, value) + builder.store(dataval, ptr, align=align) + + def unpack_value(self, builder, ty, ptr, align=None): + """ + Unpack value from the array storage at *ptr*. + If *align* is given, it is the guaranteed alignment for *ptr* + (by default, the standard ABI alignment). + """ + dm = self.data_model_manager[ty] + return dm.load_from_data_pointer(builder, ptr, align) + + def get_constant_generic(self, builder, ty, val): + """ + Return a LLVM constant representing value *val* of Numba type *ty*. + """ + try: + impl = self._get_constants.find((ty,)) + return impl(self, builder, ty, val) + except NotImplementedError: + raise NotImplementedError("Cannot lower constant of type '%s'" % (ty,)) + + def get_constant(self, ty, val): + """ + Same as get_constant_generic(), but without specifying *builder*. + Works only for simple types. + """ + # HACK: pass builder=None to preserve get_constant() API + return self.get_constant_generic(None, ty, val) + + def get_constant_undef(self, ty): + lty = self.get_value_type(ty) + return Constant(lty, llvmir.Undefined) + + def get_constant_null(self, ty): + lty = self.get_value_type(ty) + return Constant(lty, None) + + def get_function(self, fn, sig, _firstcall=True): + """ + Return the implementation of function *fn* for signature *sig*. + The return value is a callable with the signature (builder, args). + """ + assert sig is not None + sig = sig.as_function() + if isinstance(fn, types.Callable): + key = fn.get_impl_key(sig) + overloads = self._defns[key] + else: + key = fn + overloads = self._defns[key] + + try: + return _wrap_impl(overloads.find(sig.args), self, sig) + except errors.NumbaNotImplementedError: + pass + if isinstance(fn, types.Type): + # It's a type instance => try to find a definition for the type class + try: + return self.get_function(type(fn), sig) + except NotImplementedError: + # Raise exception for the type instance, for a better error message + pass + + # Automatically refresh the context to load new registries if we are + # calling the first time. + if _firstcall: + self.refresh() + return self.get_function(fn, sig, _firstcall=False) + + raise NotImplementedError("No definition for lowering %s%s" % (key, sig)) + + def get_generator_desc(self, genty): + """ + """ + return self._generators[genty][0] + + def get_generator_impl(self, genty): + """ + """ + res = self._generators[genty][1] + self.add_linking_libs(getattr(res, 'libs', ())) + return res + + def get_bound_function(self, builder, obj, ty): + assert self.get_value_type(ty) == obj.type + return obj + + def get_getattr(self, typ, attr): + """ + Get the getattr() implementation for the given type and attribute name. + The return value is a callable with the signature + (context, builder, typ, val, attr). + """ + const_attr = (typ, attr) not in self.nonconst_module_attrs + is_module = isinstance(typ, types.Module) + if is_module and const_attr: + # Implement getattr for module-level globals that we treat as + # constants. + # XXX We shouldn't have to retype this + attrty = self.typing_context.resolve_module_constants(typ, attr) + if attrty is None or isinstance(attrty, types.Dummy): + # No implementation required for dummies (functions, modules...), + # which are dealt with later + return None + else: + pyval = getattr(typ.pymod, attr) + def imp(context, builder, typ, val, attr): + llval = self.get_constant_generic(builder, attrty, pyval) + return impl_ret_borrowed(context, builder, attrty, llval) + return imp + + # Lookup specific getattr implementation for this type and attribute + overloads = self._getattrs[attr] + try: + return overloads.find((typ,)) + except errors.NumbaNotImplementedError: + pass + # Lookup generic getattr implementation for this type + overloads = self._getattrs[None] + try: + return overloads.find((typ,)) + except errors.NumbaNotImplementedError: + pass + + raise NotImplementedError("No definition for lowering %s.%s" % (typ, attr)) + + def get_setattr(self, attr, sig): + """ + Get the setattr() implementation for the given attribute name + and signature. + The return value is a callable with the signature (builder, args). + """ + assert len(sig.args) == 2 + typ = sig.args[0] + valty = sig.args[1] + + def wrap_setattr(impl): + def wrapped(builder, args): + return impl(self, builder, sig, args, attr) + return wrapped + + # Lookup specific setattr implementation for this type and attribute + overloads = self._setattrs[attr] + try: + return wrap_setattr(overloads.find((typ, valty))) + except errors.NumbaNotImplementedError: + pass + # Lookup generic setattr implementation for this type + overloads = self._setattrs[None] + try: + return wrap_setattr(overloads.find((typ, valty))) + except errors.NumbaNotImplementedError: + pass + + raise NotImplementedError("No definition for lowering %s.%s = %s" + % (typ, attr, valty)) + + def get_argument_value(self, builder, ty, val): + """ + Argument representation to local value representation + """ + return self.data_model_manager[ty].from_argument(builder, val) + + def get_returned_value(self, builder, ty, val): + """ + Return value representation to local value representation + """ + return self.data_model_manager[ty].from_return(builder, val) + + def get_return_value(self, builder, ty, val): + """ + Local value representation to return type representation + """ + return self.data_model_manager[ty].as_return(builder, val) + + def get_value_as_argument(self, builder, ty, val): + """Prepare local value representation as argument type representation + """ + return self.data_model_manager[ty].as_argument(builder, val) + + def get_value_as_data(self, builder, ty, val): + return self.data_model_manager[ty].as_data(builder, val) + + def get_data_as_value(self, builder, ty, val): + return self.data_model_manager[ty].from_data(builder, val) + + def pair_first(self, builder, val, ty): + """ + Extract the first element of a heterogeneous pair. + """ + pair = self.make_helper(builder, ty, val) + return pair.first + + def pair_second(self, builder, val, ty): + """ + Extract the second element of a heterogeneous pair. + """ + pair = self.make_helper(builder, ty, val) + return pair.second + + def cast(self, builder, val, fromty, toty): + """ + Cast a value of type *fromty* to type *toty*. + This implements implicit conversions as can happen due to the + granularity of the Numba type system, or lax Python semantics. + """ + if fromty == toty or toty == types.Any: + return val + try: + impl = self._casts.find((fromty, toty)) + return impl(self, builder, fromty, toty, val) + except errors.NumbaNotImplementedError: + raise errors.NumbaNotImplementedError( + "Cannot cast %s to %s: %s" % (fromty, toty, val)) + + def generic_compare(self, builder, key, argtypes, args): + """ + Compare the given LLVM values of the given Numba types using + the comparison *key* (e.g. '=='). The values are first cast to + a common safe conversion type. + """ + at, bt = argtypes + av, bv = args + ty = self.typing_context.unify_types(at, bt) + assert ty is not None + cav = self.cast(builder, av, at, ty) + cbv = self.cast(builder, bv, bt, ty) + fnty = self.typing_context.resolve_value_type(key) + # the sig is homogeneous in the unified casted type + cmpsig = fnty.get_call_type(self.typing_context, (ty, ty), {}) + cmpfunc = self.get_function(fnty, cmpsig) + self.add_linking_libs(getattr(cmpfunc, 'libs', ())) + return cmpfunc(builder, (cav, cbv)) + + def make_optional_none(self, builder, valtype): + optval = self.make_helper(builder, types.Optional(valtype)) + optval.valid = cgutils.false_bit + return optval._getvalue() + + def make_optional_value(self, builder, valtype, value): + optval = self.make_helper(builder, types.Optional(valtype)) + optval.valid = cgutils.true_bit + optval.data = value + return optval._getvalue() + + def is_true(self, builder, typ, val): + """ + Return the truth value of a value of the given Numba type. + """ + fnty = self.typing_context.resolve_value_type(bool) + sig = fnty.get_call_type(self.typing_context, (typ,), {}) + impl = self.get_function(fnty, sig) + return impl(builder, (val,)) + + def get_c_value(self, builder, typ, name, dllimport=False): + """ + Get a global value through its C-accessible *name*, with the given + LLVM type. + If *dllimport* is true, the symbol will be marked as imported + from a DLL (necessary for AOT compilation under Windows). + """ + module = builder.function.module + try: + gv = module.globals[name] + except KeyError: + gv = cgutils.add_global_variable(module, typ, name) + if dllimport and self.aot_mode and sys.platform == 'win32': + gv.storage_class = "dllimport" + return gv + + def call_external_function(self, builder, callee, argtys, args): + args = [self.get_value_as_argument(builder, ty, arg) + for ty, arg in zip(argtys, args)] + retval = builder.call(callee, args) + return retval + + def get_function_pointer_type(self, typ): + return self.data_model_manager[typ].get_data_type() + + def call_function_pointer(self, builder, funcptr, args, cconv=None): + return builder.call(funcptr, args, cconv=cconv) + + def print_string(self, builder, text): + mod = builder.module + cstring = GENERIC_POINTER + fnty = llvmir.FunctionType(llvmir.IntType(32), [cstring]) + puts = cgutils.get_or_insert_function(mod, fnty, "puts") + return builder.call(puts, [text]) + + def debug_print(self, builder, text): + mod = builder.module + cstr = self.insert_const_string(mod, str(text)) + self.print_string(builder, cstr) + + def printf(self, builder, format_string, *args): + mod = builder.module + if isinstance(format_string, str): + cstr = self.insert_const_string(mod, format_string) + else: + cstr = format_string + fnty = llvmir.FunctionType(llvmir.IntType(32), (GENERIC_POINTER,), var_arg=True) + fn = cgutils.get_or_insert_function(mod, fnty, "printf") + return builder.call(fn, (cstr,) + tuple(args)) + + def get_struct_type(self, struct): + """ + Get the LLVM struct type for the given Structure class *struct*. + """ + fields = [self.get_value_type(v) for _, v in struct._fields] + return llvmir.LiteralStructType(fields) + + def get_dummy_value(self): + return Constant(self.get_dummy_type(), None) + + def get_dummy_type(self): + return GENERIC_POINTER + + def _compile_subroutine_no_cache(self, builder, impl, sig, locals={}, + flags=None): + """ + Invoke the compiler to compile a function to be used inside a + nopython function, but without generating code to call that + function. + + Note this context's flags are not inherited. + """ + # Compile + from numba.core import compiler + + with global_compiler_lock: + codegen = self.codegen() + library = codegen.create_library(impl.__name__) + if flags is None: + + cstk = targetconfig.ConfigStack() + flags = compiler.Flags() + if cstk: + tls_flags = cstk.top() + if tls_flags.is_set("nrt") and tls_flags.nrt: + flags.nrt = True + + flags.no_compile = True + flags.no_cpython_wrapper = True + flags.no_cfunc_wrapper = True + + cres = compiler.compile_internal(self.typing_context, self, + library, + impl, sig.args, + sig.return_type, flags, + locals=locals) + + # Allow inlining the function inside callers. + self.active_code_library.add_linking_library(cres.library) + return cres + + def compile_subroutine(self, builder, impl, sig, locals={}, flags=None, + caching=True): + """ + Compile the function *impl* for the given *sig* (in nopython mode). + Return an instance of CompileResult. + + If *caching* evaluates True, the function keeps the compiled function + for reuse in *.cached_internal_func*. + """ + cache_key = (impl.__code__, sig, type(self.error_model)) + if not caching: + cached = None + else: + if impl.__closure__: + # XXX This obviously won't work if a cell's value is + # unhashable. + cache_key += tuple(c.cell_contents for c in impl.__closure__) + cached = self.cached_internal_func.get(cache_key) + if cached is None: + cres = self._compile_subroutine_no_cache(builder, impl, sig, + locals=locals, + flags=flags) + self.cached_internal_func[cache_key] = cres + + cres = self.cached_internal_func[cache_key] + # Allow inlining the function inside callers. + self.active_code_library.add_linking_library(cres.library) + return cres + + def compile_internal(self, builder, impl, sig, args, locals={}): + """ + Like compile_subroutine(), but also call the function with the given + *args*. + """ + cres = self.compile_subroutine(builder, impl, sig, locals) + return self.call_internal(builder, cres.fndesc, sig, args) + + def call_internal(self, builder, fndesc, sig, args): + """ + Given the function descriptor of an internally compiled function, + emit a call to that function with the given arguments. + """ + status, res = self.call_internal_no_propagate(builder, fndesc, sig, args) + with cgutils.if_unlikely(builder, status.is_error): + self.call_conv.return_status_propagate(builder, status) + + res = imputils.fix_returning_optional(self, builder, sig, status, res) + return res + + def call_internal_no_propagate(self, builder, fndesc, sig, args): + """Similar to `.call_internal()` but does not handle or propagate + the return status automatically. + """ + # Add call to the generated function + llvm_mod = builder.module + fn = self.declare_function(llvm_mod, fndesc) + status, res = self.call_conv.call_function(builder, fn, sig.return_type, + sig.args, args) + return status, res + + def call_unresolved(self, builder, name, sig, args): + """ + Insert a function call to an unresolved symbol with the given *name*. + + Note: this is used for recursive call. + + In the mutual recursion case:: + + @njit + def foo(): + ... # calls bar() + + @njit + def bar(): + ... # calls foo() + + foo() + + When foo() is called, the compilation of bar() is fully completed + (codegen'ed and loaded) before foo() is. Since MCJIT's eager compilation + doesn't allow loading modules with declare-only functions (which is + needed for foo() in bar()), the call_unresolved injects a global + variable that the "linker" can update even after the module is loaded by + MCJIT. The linker would allocate space for the global variable before + the bar() module is loaded. When later foo() module is defined, it will + update bar()'s reference to foo(). + + The legacy lazy JIT and the new ORC JIT would allow a declare-only + function be used in a module as long as it is defined by the time of its + first use. + """ + # Insert an unresolved reference to the function being called. + codegen = self.codegen() + fnty = self.call_conv.get_function_type(sig.return_type, sig.args) + fn = codegen.insert_unresolved_ref(builder, fnty, name) + # Normal call sequence + status, res = self.call_conv.call_function(builder, fn, sig.return_type, + sig.args, args) + with cgutils.if_unlikely(builder, status.is_error): + self.call_conv.return_status_propagate(builder, status) + + res = imputils.fix_returning_optional(self, builder, sig, status, res) + return res + + def get_executable(self, func, fndesc, env): + raise NotImplementedError + + def get_python_api(self, builder): + return PythonAPI(self, builder) + + def sentry_record_alignment(self, rectyp, attr): + """ + Assumes offset starts from a properly aligned location + """ + if self.strict_alignment: + offset = rectyp.offset(attr) + elemty = rectyp.typeof(attr) + if isinstance(elemty, types.NestedArray): + # For a NestedArray we need to consider the data type of + # elements of the array for alignment, not the array structure + # itself + elemty = elemty.dtype + align = self.get_abi_alignment(self.get_data_type(elemty)) + if offset % align: + msg = "{rec}.{attr} of type {type} is not aligned".format( + rec=rectyp, attr=attr, type=elemty) + raise TypeError(msg) + + def get_helper_class(self, typ, kind='value'): + """ + Get a helper class for the given *typ*. + """ + # XXX handle all types: complex, array, etc. + # XXX should it be a method on the model instead? this would allow a default kind... + return cgutils.create_struct_proxy(typ, kind) + + def _make_helper(self, builder, typ, value=None, ref=None, kind='value'): + cls = self.get_helper_class(typ, kind) + return cls(self, builder, value=value, ref=ref) + + def make_helper(self, builder, typ, value=None, ref=None): + """ + Get a helper object to access the *typ*'s members, + for the given value or reference. + """ + return self._make_helper(builder, typ, value, ref, kind='value') + + def make_data_helper(self, builder, typ, ref=None): + """ + As make_helper(), but considers the value as stored in memory, + rather than a live value. + """ + return self._make_helper(builder, typ, ref=ref, kind='data') + + def make_array(self, typ): + from numba.np import arrayobj + return arrayobj.make_array(typ) + + def populate_array(self, arr, **kwargs): + """ + Populate array structure. + """ + from numba.np import arrayobj + return arrayobj.populate_array(arr, **kwargs) + + def make_complex(self, builder, typ, value=None): + """ + Get a helper object to access the given complex numbers' members. + """ + assert isinstance(typ, types.Complex), typ + return self.make_helper(builder, typ, value) + + def make_tuple(self, builder, typ, values): + """ + Create a tuple of the given *typ* containing the *values*. + """ + tup = self.get_constant_undef(typ) + for i, val in enumerate(values): + tup = builder.insert_value(tup, val, i) + return tup + + def make_constant_array(self, builder, typ, ary): + """ + Create an array structure reifying the given constant array. + A low-level contiguous array constant is created in the LLVM IR. + """ + datatype = self.get_data_type(typ.dtype) + # don't freeze ary of non-contig or bigger than 1MB + size_limit = 10**6 + + if (self.allow_dynamic_globals and + (typ.layout not in 'FC' or ary.nbytes > size_limit)): + # get pointer from the ary + dataptr = ary.ctypes.data + data = self.add_dynamic_addr(builder, dataptr, info=str(type(dataptr))) + rt_addr = self.add_dynamic_addr(builder, id(ary), info=str(type(ary))) + else: + # Handle data: reify the flattened array in "C" or "F" order as a + # global array of bytes. + flat = ary.flatten(order=typ.layout) + # Note: we use `bytearray(flat.data)` instead of `bytearray(flat)` to + # workaround issue #1850 which is due to numpy issue #3147 + consts = cgutils.create_constant_array(llvmir.IntType(8), bytearray(flat.data)) + data = cgutils.global_constant(builder, ".const.array.data", consts) + # Ensure correct data alignment (issue #1933) + data.align = self.get_abi_alignment(datatype) + # No reference to parent ndarray + rt_addr = None + + # Handle shape + llintp = self.get_value_type(types.intp) + shapevals = [self.get_constant(types.intp, s) for s in ary.shape] + cshape = cgutils.create_constant_array(llintp, shapevals) + + # Handle strides + stridevals = [self.get_constant(types.intp, s) for s in ary.strides] + cstrides = cgutils.create_constant_array(llintp, stridevals) + + # Create array structure + cary = self.make_array(typ)(self, builder) + + intp_itemsize = self.get_constant(types.intp, ary.dtype.itemsize) + self.populate_array(cary, + data=builder.bitcast(data, cary.data.type), + shape=cshape, + strides=cstrides, + itemsize=intp_itemsize, + parent=rt_addr, + meminfo=None) + + return cary._getvalue() + + def add_dynamic_addr(self, builder, intaddr, info): + """ + Returns dynamic address as a void pointer `i8*`. + + Internally, a global variable is added to inform the lowerer about + the usage of dynamic addresses. Caching will be disabled. + """ + assert self.allow_dynamic_globals, "dyn globals disabled in this target" + assert isinstance(intaddr, int), 'dyn addr not of int type' + mod = builder.module + llvoidptr = self.get_value_type(types.voidptr) + addr = self.get_constant(types.uintp, intaddr).inttoptr(llvoidptr) + # Use a unique name by embedding the address value + symname = 'numba.dynamic.globals.{:x}'.format(intaddr) + gv = cgutils.add_global_variable(mod, llvoidptr, symname) + # Use linkonce linkage to allow merging with other GV of the same name. + # And, avoid optimization from assuming its value. + gv.linkage = 'linkonce' + gv.initializer = addr + return builder.load(gv) + + def get_abi_sizeof(self, ty): + """ + Get the ABI size of LLVM type *ty*. + """ + assert isinstance(ty, llvmir.Type), "Expected LLVM type" + return ty.get_abi_size(self.target_data) + + def get_abi_alignment(self, ty): + """ + Get the ABI alignment of LLVM type *ty*. + """ + assert isinstance(ty, llvmir.Type), "Expected LLVM type" + return ty.get_abi_alignment(self.target_data) + + def get_preferred_array_alignment(context, ty): + """ + Get preferred array alignment for Numba type *ty*. + """ + # AVX prefers 32-byte alignment + return 32 + + def post_lowering(self, mod, library): + """Run target specific post-lowering transformation here. + """ + + def create_module(self, name): + """Create a LLVM module + + The default implementation in BaseContext always raises a + ``NotImplementedError`` exception. Subclasses should implement + this method. + """ + raise NotImplementedError + + @property + def active_code_library(self): + """Get the active code library + """ + return self._codelib_stack[-1] + + @contextmanager + def push_code_library(self, lib): + """Push the active code library for the context + """ + self._codelib_stack.append(lib) + try: + yield + finally: + self._codelib_stack.pop() + + def add_linking_libs(self, libs): + """Add iterable of linking libraries to the *active_code_library*. + """ + colib = self.active_code_library + for lib in libs: + colib.add_linking_library(lib) + + def get_ufunc_info(self, ufunc_key): + """Get the ufunc implementation for a given ufunc object. + + The default implementation in BaseContext always raises a + ``NotImplementedError`` exception. Subclasses may raise ``KeyError`` + to signal that the given ``ufunc_key`` is not available. + + Parameters + ---------- + ufunc_key : NumPy ufunc + + Returns + ------- + res : dict[str, callable] + A mapping of a NumPy ufunc type signature to a lower-level + implementation. + """ + raise NotImplementedError(f"{self} does not support ufunc") + +class _wrap_impl(object): + """ + A wrapper object to call an implementation function with some predefined + (context, signature) arguments. + The wrapper also forwards attribute queries, which is important. + """ + + def __init__(self, imp, context, sig): + self._callable = _wrap_missing_loc(imp) + self._imp = self._callable() + self._context = context + self._sig = sig + + def __call__(self, builder, args, loc=None): + res = self._imp(self._context, builder, self._sig, args, loc=loc) + self._context.add_linking_libs(getattr(self, 'libs', ())) + return res + + def __getattr__(self, item): + return getattr(self._imp, item) + + def __repr__(self): + return "" % repr(self._callable) + +def _has_loc(fn): + """Does function *fn* take ``loc`` argument? + """ + sig = utils.pysignature(fn) + return 'loc' in sig.parameters + + +class _wrap_missing_loc(object): + + def __init__(self, fn): + self.func = fn # store this to help with debug + + def __call__(self): + """Wrap function for missing ``loc`` keyword argument. + Otherwise, return the original *fn*. + """ + fn = self.func + if not _has_loc(fn): + def wrapper(*args, **kwargs): + kwargs.pop('loc') # drop unused loc + return fn(*args, **kwargs) + + # Copy the following attributes from the wrapped. + # Following similar implementation as functools.wraps but + # ignore attributes if not available (i.e fix py2.7) + attrs = '__name__', 'libs' + for attr in attrs: + try: + val = getattr(fn, attr) + except AttributeError: + pass + else: + setattr(wrapper, attr, val) + + return wrapper + else: + return fn + + def __repr__(self): + return "" % self.func + + +@utils.runonce +def _initialize_llvm_lock_event(): + """Initial event triggers for LLVM lock + """ + def enter_fn(): + event.start_event("numba:llvm_lock") + + def exit_fn(): + event.end_event("numba:llvm_lock") + + ll.ffi.register_lock_callback(enter_fn, exit_fn) + + +_initialize_llvm_lock_event() diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/boxing.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/boxing.py new file mode 100644 index 000000000..011d3a87b --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/boxing.py @@ -0,0 +1,1317 @@ +""" +Boxing and unboxing of native Numba values to / from CPython objects. +""" + +from llvmlite import ir + +from numba.core import types, cgutils +from numba.core.pythonapi import box, unbox, reflect, NativeValue +from numba.core.errors import NumbaNotImplementedError +from numba.core.typing.typeof import typeof, Purpose + +from numba.cpython import setobj, listobj +from numba.np import numpy_support +from contextlib import contextmanager, ExitStack + + +# +# Scalar types +# + +@box(types.Boolean) +def box_bool(typ, val, c): + return c.pyapi.bool_from_bool(val) + +@unbox(types.Boolean) +def unbox_boolean(typ, obj, c): + istrue = c.pyapi.object_istrue(obj) + zero = ir.Constant(istrue.type, 0) + val = c.builder.icmp_signed('!=', istrue, zero) + return NativeValue(val, is_error=c.pyapi.c_api_error()) + + +@box(types.IntegerLiteral) +@box(types.BooleanLiteral) +def box_literal_integer(typ, val, c): + val = c.context.cast(c.builder, val, typ, typ.literal_type) + return c.box(typ.literal_type, val) + + +@box(types.Integer) +def box_integer(typ, val, c): + if typ.signed: + ival = c.builder.sext(val, c.pyapi.longlong) + return c.pyapi.long_from_longlong(ival) + else: + ullval = c.builder.zext(val, c.pyapi.ulonglong) + return c.pyapi.long_from_ulonglong(ullval) + +@unbox(types.Integer) +def unbox_integer(typ, obj, c): + ll_type = c.context.get_argument_type(typ) + val = cgutils.alloca_once(c.builder, ll_type) + longobj = c.pyapi.number_long(obj) + with c.pyapi.if_object_ok(longobj): + if typ.signed: + llval = c.pyapi.long_as_longlong(longobj) + else: + llval = c.pyapi.long_as_ulonglong(longobj) + c.pyapi.decref(longobj) + c.builder.store(c.builder.trunc(llval, ll_type), val) + return NativeValue(c.builder.load(val), + is_error=c.pyapi.c_api_error()) + + +@box(types.Float) +def box_float(typ, val, c): + if typ == types.float32: + dbval = c.builder.fpext(val, c.pyapi.double) + else: + assert typ == types.float64 + dbval = val + return c.pyapi.float_from_double(dbval) + +@unbox(types.Float) +def unbox_float(typ, obj, c): + fobj = c.pyapi.number_float(obj) + dbval = c.pyapi.float_as_double(fobj) + c.pyapi.decref(fobj) + if typ == types.float32: + val = c.builder.fptrunc(dbval, + c.context.get_argument_type(typ)) + else: + assert typ == types.float64 + val = dbval + return NativeValue(val, is_error=c.pyapi.c_api_error()) + + +@box(types.Complex) +def box_complex(typ, val, c): + cval = c.context.make_complex(c.builder, typ, value=val) + + if typ == types.complex64: + freal = c.builder.fpext(cval.real, c.pyapi.double) + fimag = c.builder.fpext(cval.imag, c.pyapi.double) + else: + assert typ == types.complex128 + freal, fimag = cval.real, cval.imag + return c.pyapi.complex_from_doubles(freal, fimag) + +@unbox(types.Complex) +def unbox_complex(typ, obj, c): + # First unbox to complex128, since that's what CPython gives us + c128 = c.context.make_complex(c.builder, types.complex128) + ok = c.pyapi.complex_adaptor(obj, c128._getpointer()) + failed = cgutils.is_false(c.builder, ok) + + with cgutils.if_unlikely(c.builder, failed): + c.pyapi.err_set_string("PyExc_TypeError", + "conversion to %s failed" % (typ,)) + + if typ == types.complex64: + # Downcast to complex64 if necessary + cplx = c.context.make_complex(c.builder, typ) + cplx.real = c.context.cast(c.builder, c128.real, + types.float64, types.float32) + cplx.imag = c.context.cast(c.builder, c128.imag, + types.float64, types.float32) + else: + assert typ == types.complex128 + cplx = c128 + return NativeValue(cplx._getvalue(), is_error=failed) + + +@box(types.NoneType) +def box_none(typ, val, c): + return c.pyapi.make_none() + +@unbox(types.NoneType) +@unbox(types.EllipsisType) +def unbox_none(typ, val, c): + return NativeValue(c.context.get_dummy_value()) + + +@box(types.NPDatetime) +def box_npdatetime(typ, val, c): + return c.pyapi.create_np_datetime(val, typ.unit_code) + +@unbox(types.NPDatetime) +def unbox_npdatetime(typ, obj, c): + val = c.pyapi.extract_np_datetime(obj) + return NativeValue(val, is_error=c.pyapi.c_api_error()) + + +@box(types.NPTimedelta) +def box_nptimedelta(typ, val, c): + return c.pyapi.create_np_timedelta(val, typ.unit_code) + +@unbox(types.NPTimedelta) +def unbox_nptimedelta(typ, obj, c): + val = c.pyapi.extract_np_timedelta(obj) + return NativeValue(val, is_error=c.pyapi.c_api_error()) + + +@box(types.RawPointer) +def box_raw_pointer(typ, val, c): + """ + Convert a raw pointer to a Python int. + """ + ll_intp = c.context.get_value_type(types.uintp) + addr = c.builder.ptrtoint(val, ll_intp) + return c.box(types.uintp, addr) + + +@box(types.EnumMember) +def box_enum(typ, val, c): + """ + Fetch an enum member given its native value. + """ + valobj = c.box(typ.dtype, val) + # Call the enum class with the value object + cls_obj = c.pyapi.unserialize(c.pyapi.serialize_object(typ.instance_class)) + return c.pyapi.call_function_objargs(cls_obj, (valobj,)) + + +@unbox(types.EnumMember) +def unbox_enum(typ, obj, c): + """ + Convert an enum member's value to its native value. + """ + valobj = c.pyapi.object_getattr_string(obj, "value") + return c.unbox(typ.dtype, valobj) + +# +# Composite types +# + +@box(types.Record) +def box_record(typ, val, c): + # Note we will create a copy of the record + # This is the only safe way. + size = ir.Constant(ir.IntType(32), val.type.pointee.count) + ptr = c.builder.bitcast(val, ir.PointerType(ir.IntType(8))) + return c.pyapi.recreate_record(ptr, size, typ.dtype, c.env_manager) + + +@unbox(types.Record) +def unbox_record(typ, obj, c): + buf = c.pyapi.alloca_buffer() + ptr = c.pyapi.extract_record_data(obj, buf) + is_error = cgutils.is_null(c.builder, ptr) + + ltyp = c.context.get_value_type(typ) + val = c.builder.bitcast(ptr, ltyp) + + def cleanup(): + c.pyapi.release_buffer(buf) + return NativeValue(val, cleanup=cleanup, is_error=is_error) + + +@box(types.UnicodeCharSeq) +def box_unicodecharseq(typ, val, c): + # XXX could kind be determined from strptr? + unicode_kind = { + 1: c.pyapi.py_unicode_1byte_kind, + 2: c.pyapi.py_unicode_2byte_kind, + 4: c.pyapi.py_unicode_4byte_kind}[numpy_support.sizeof_unicode_char] + kind = c.context.get_constant(types.int32, unicode_kind) + rawptr = cgutils.alloca_once_value(c.builder, value=val) + strptr = c.builder.bitcast(rawptr, c.pyapi.cstring) + + fullsize = c.context.get_constant(types.intp, typ.count) + zero = fullsize.type(0) + one = fullsize.type(1) + step = fullsize.type(numpy_support.sizeof_unicode_char) + count = cgutils.alloca_once_value(c.builder, zero) + with cgutils.loop_nest(c.builder, [fullsize], fullsize.type) as [idx]: + # Get char at idx + ch = c.builder.load(c.builder.gep(strptr, [c.builder.mul(idx, step)])) + # If the char is a non-null-byte, store the next index as count + with c.builder.if_then(cgutils.is_not_null(c.builder, ch)): + c.builder.store(c.builder.add(idx, one), count) + strlen = c.builder.load(count) + return c.pyapi.string_from_kind_and_data(kind, strptr, strlen) + + +@unbox(types.UnicodeCharSeq) +def unbox_unicodecharseq(typ, obj, c): + lty = c.context.get_value_type(typ) + + ok, buffer, size, kind, is_ascii, hashv = \ + c.pyapi.string_as_string_size_and_kind(obj) + + # If conversion is ok, copy the buffer to the output storage. + with cgutils.if_likely(c.builder, ok): + # Check if the returned string size fits in the charseq + storage_size = ir.Constant(size.type, typ.count) + size_fits = c.builder.icmp_unsigned("<=", size, storage_size) + + # Allow truncation of string + size = c.builder.select(size_fits, size, storage_size) + + # Initialize output to zero bytes + null_string = ir.Constant(lty, None) + outspace = cgutils.alloca_once_value(c.builder, null_string) + + # We don't need to set the NULL-terminator because the storage + # is already zero-filled. + cgutils.memcpy(c.builder, + c.builder.bitcast(outspace, buffer.type), + buffer, size) + + ret = c.builder.load(outspace) + return NativeValue(ret, is_error=c.builder.not_(ok)) + + +@box(types.Bytes) +def box_bytes(typ, val, c): + obj = c.context.make_helper(c.builder, typ, val) + ret = c.pyapi.bytes_from_string_and_size(obj.data, obj.nitems) + c.context.nrt.decref(c.builder, typ, val) + return ret + + +@box(types.CharSeq) +def box_charseq(typ, val, c): + rawptr = cgutils.alloca_once_value(c.builder, value=val) + strptr = c.builder.bitcast(rawptr, c.pyapi.cstring) + fullsize = c.context.get_constant(types.intp, typ.count) + zero = fullsize.type(0) + one = fullsize.type(1) + count = cgutils.alloca_once_value(c.builder, zero) + + # Find the length of the string, mimicking Numpy's behaviour: + # search for the last non-null byte in the underlying storage + # (e.g. b'A\0\0B\0\0\0' will return the logical string b'A\0\0B') + with cgutils.loop_nest(c.builder, [fullsize], fullsize.type) as [idx]: + # Get char at idx + ch = c.builder.load(c.builder.gep(strptr, [idx])) + # If the char is a non-null-byte, store the next index as count + with c.builder.if_then(cgutils.is_not_null(c.builder, ch)): + c.builder.store(c.builder.add(idx, one), count) + + strlen = c.builder.load(count) + return c.pyapi.bytes_from_string_and_size(strptr, strlen) + + +@unbox(types.CharSeq) +def unbox_charseq(typ, obj, c): + lty = c.context.get_value_type(typ) + ok, buffer, size = c.pyapi.string_as_string_and_size(obj) + + # If conversion is ok, copy the buffer to the output storage. + with cgutils.if_likely(c.builder, ok): + # Check if the returned string size fits in the charseq + storage_size = ir.Constant(size.type, typ.count) + size_fits = c.builder.icmp_unsigned("<=", size, storage_size) + + # Allow truncation of string + size = c.builder.select(size_fits, size, storage_size) + + # Initialize output to zero bytes + null_string = ir.Constant(lty, None) + outspace = cgutils.alloca_once_value(c.builder, null_string) + + # We don't need to set the NULL-terminator because the storage + # is already zero-filled. + cgutils.memcpy(c.builder, + c.builder.bitcast(outspace, buffer.type), + buffer, size) + + ret = c.builder.load(outspace) + return NativeValue(ret, is_error=c.builder.not_(ok)) + + +@box(types.Optional) +def box_optional(typ, val, c): + optval = c.context.make_helper(c.builder, typ, val) + ret = cgutils.alloca_once_value(c.builder, c.pyapi.borrow_none()) + with c.builder.if_else(optval.valid) as (then, otherwise): + with then: + validres = c.box(typ.type, optval.data) + c.builder.store(validres, ret) + with otherwise: + c.builder.store(c.pyapi.make_none(), ret) + return c.builder.load(ret) + + +@unbox(types.Optional) +def unbox_optional(typ, obj, c): + """ + Convert object *obj* to a native optional structure. + """ + noneval = c.context.make_optional_none(c.builder, typ.type) + is_not_none = c.builder.icmp_signed('!=', obj, c.pyapi.borrow_none()) + + retptr = cgutils.alloca_once(c.builder, noneval.type) + errptr = cgutils.alloca_once_value(c.builder, cgutils.false_bit) + + with c.builder.if_else(is_not_none) as (then, orelse): + with then: + native = c.unbox(typ.type, obj) + just = c.context.make_optional_value(c.builder, + typ.type, native.value) + c.builder.store(just, retptr) + c.builder.store(native.is_error, errptr) + + with orelse: + c.builder.store(noneval, retptr) + + if native.cleanup is not None: + def cleanup(): + with c.builder.if_then(is_not_none): + native.cleanup() + else: + cleanup = None + + ret = c.builder.load(retptr) + return NativeValue(ret, is_error=c.builder.load(errptr), + cleanup=cleanup) + + +@unbox(types.SliceType) +def unbox_slice(typ, obj, c): + """ + Convert object *obj* to a native slice structure. + """ + from numba.cpython import slicing + ok, start, stop, step = c.pyapi.slice_as_ints(obj) + sli = c.context.make_helper(c.builder, typ) + sli.start = start + sli.stop = stop + sli.step = step + return NativeValue(sli._getvalue(), is_error=c.builder.not_(ok)) + +@box(types.SliceLiteral) +def box_slice_literal(typ, val, c): + # Check for integer overflows at compile time. + slice_lit = typ.literal_value + for field_name in ("start", "stop", "step"): + field_obj = getattr(slice_lit, field_name) + if isinstance(field_obj, int): + try: + typeof(field_obj, Purpose) + except ValueError as e: + raise ValueError(( + f"Unable to create literal slice. " + f"Error encountered with {field_name} " + f"attribute. {str(e)}") + ) + + py_ctor, py_args = typ.literal_value.__reduce__() + serialized_ctor = c.pyapi.serialize_object(py_ctor) + serialized_args = c.pyapi.serialize_object(py_args) + ctor = c.pyapi.unserialize(serialized_ctor) + args = c.pyapi.unserialize(serialized_args) + obj = c.pyapi.call(ctor, args) + c.pyapi.decref(ctor) + c.pyapi.decref(args) + return obj + +@unbox(types.StringLiteral) +def unbox_string_literal(typ, obj, c): + # A string literal is a dummy value + return NativeValue(c.context.get_dummy_value()) + +# +# Collections +# + +# NOTE: boxing functions are supposed to steal any NRT references in +# the given native value. + +@box(types.Array) +def box_array(typ, val, c): + nativearycls = c.context.make_array(typ) + nativeary = nativearycls(c.context, c.builder, value=val) + if c.context.enable_nrt: + np_dtype = numpy_support.as_dtype(typ.dtype) + dtypeptr = c.env_manager.read_const(c.env_manager.add_const(np_dtype)) + newary = c.pyapi.nrt_adapt_ndarray_to_python(typ, val, dtypeptr) + # Steals NRT ref + c.context.nrt.decref(c.builder, typ, val) + return newary + else: + parent = nativeary.parent + c.pyapi.incref(parent) + return parent + + +@unbox(types.Buffer) +def unbox_buffer(typ, obj, c): + """ + Convert a Py_buffer-providing object to a native array structure. + """ + buf = c.pyapi.alloca_buffer() + res = c.pyapi.get_buffer(obj, buf) + is_error = cgutils.is_not_null(c.builder, res) + + nativearycls = c.context.make_array(typ) + nativeary = nativearycls(c.context, c.builder) + aryptr = nativeary._getpointer() + + with cgutils.if_likely(c.builder, c.builder.not_(is_error)): + ptr = c.builder.bitcast(aryptr, c.pyapi.voidptr) + if c.context.enable_nrt: + c.pyapi.nrt_adapt_buffer_from_python(buf, ptr) + else: + c.pyapi.numba_buffer_adaptor(buf, ptr) + + def cleanup(): + c.pyapi.release_buffer(buf) + + return NativeValue(c.builder.load(aryptr), is_error=is_error, + cleanup=cleanup) + +@unbox(types.Array) +def unbox_array(typ, obj, c): + """ + Convert a Numpy array object to a native array structure. + """ + # This is necessary because unbox_buffer() does not work on some + # dtypes, e.g. datetime64 and timedelta64. + # TODO check matching dtype. + # currently, mismatching dtype will still work and causes + # potential memory corruption + nativearycls = c.context.make_array(typ) + nativeary = nativearycls(c.context, c.builder) + aryptr = nativeary._getpointer() + + ptr = c.builder.bitcast(aryptr, c.pyapi.voidptr) + if c.context.enable_nrt: + errcode = c.pyapi.nrt_adapt_ndarray_from_python(obj, ptr) + else: + errcode = c.pyapi.numba_array_adaptor(obj, ptr) + + # TODO: here we have minimal typechecking by the itemsize. + # need to do better + try: + expected_itemsize = numpy_support.as_dtype(typ.dtype).itemsize + except NumbaNotImplementedError: + # Don't check types that can't be `as_dtype()`-ed + itemsize_mismatch = cgutils.false_bit + else: + expected_itemsize = nativeary.itemsize.type(expected_itemsize) + itemsize_mismatch = c.builder.icmp_unsigned( + '!=', + nativeary.itemsize, + expected_itemsize, + ) + + failed = c.builder.or_( + cgutils.is_not_null(c.builder, errcode), + itemsize_mismatch, + ) + # Handle error + with c.builder.if_then(failed, likely=False): + c.pyapi.err_set_string("PyExc_TypeError", + "can't unbox array from PyObject into " + "native value. The object maybe of a " + "different type") + return NativeValue(c.builder.load(aryptr), is_error=failed) + + +@box(types.Tuple) +@box(types.UniTuple) +def box_tuple(typ, val, c): + """ + Convert native array or structure *val* to a tuple object. + """ + tuple_val = c.pyapi.tuple_new(typ.count) + + for i, dtype in enumerate(typ): + item = c.builder.extract_value(val, i) + obj = c.box(dtype, item) + c.pyapi.tuple_setitem(tuple_val, i, obj) + + return tuple_val + +@box(types.NamedTuple) +@box(types.NamedUniTuple) +def box_namedtuple(typ, val, c): + """ + Convert native array or structure *val* to a namedtuple object. + """ + cls_obj = c.pyapi.unserialize(c.pyapi.serialize_object(typ.instance_class)) + tuple_obj = box_tuple(typ, val, c) + obj = c.pyapi.call(cls_obj, tuple_obj) + c.pyapi.decref(cls_obj) + c.pyapi.decref(tuple_obj) + return obj + + +@unbox(types.BaseTuple) +def unbox_tuple(typ, obj, c): + """ + Convert tuple *obj* to a native array (if homogeneous) or structure. + """ + n = len(typ) + values = [] + cleanups = [] + lty = c.context.get_value_type(typ) + + is_error_ptr = cgutils.alloca_once_value(c.builder, cgutils.false_bit) + value_ptr = cgutils.alloca_once(c.builder, lty) + + # Issue #1638: need to check the tuple size + actual_size = c.pyapi.tuple_size(obj) + size_matches = c.builder.icmp_unsigned('==', actual_size, + ir.Constant(actual_size.type, n)) + with c.builder.if_then(c.builder.not_(size_matches), likely=False): + c.pyapi.err_format( + "PyExc_ValueError", + "size mismatch for tuple, expected %d element(s) but got %%zd" % (n,), + actual_size) + c.builder.store(cgutils.true_bit, is_error_ptr) + + # We unbox the items even if not `size_matches`, to avoid issues with + # the generated IR (instruction doesn't dominate all uses) + for i, eltype in enumerate(typ): + elem = c.pyapi.tuple_getitem(obj, i) + native = c.unbox(eltype, elem) + values.append(native.value) + with c.builder.if_then(native.is_error, likely=False): + c.builder.store(cgutils.true_bit, is_error_ptr) + if native.cleanup is not None: + cleanups.append(native.cleanup) + + value = c.context.make_tuple(c.builder, typ, values) + c.builder.store(value, value_ptr) + + if cleanups: + with c.builder.if_then(size_matches, likely=True): + def cleanup(): + for func in reversed(cleanups): + func() + else: + cleanup = None + + return NativeValue(c.builder.load(value_ptr), cleanup=cleanup, + is_error=c.builder.load(is_error_ptr)) + + +@box(types.List) +def box_list(typ, val, c): + """ + Convert native list *val* to a list object. + """ + list = listobj.ListInstance(c.context, c.builder, typ, val) + obj = list.parent + res = cgutils.alloca_once_value(c.builder, obj) + with c.builder.if_else(cgutils.is_not_null(c.builder, obj)) as (has_parent, otherwise): + with has_parent: + # List is actually reflected => return the original object + # (note not all list instances whose *type* is reflected are + # actually reflected; see numba.tests.test_lists for an example) + c.pyapi.incref(obj) + + with otherwise: + # Build a new Python list + nitems = list.size + obj = c.pyapi.list_new(nitems) + with c.builder.if_then(cgutils.is_not_null(c.builder, obj), + likely=True): + with cgutils.for_range(c.builder, nitems) as loop: + item = list.getitem(loop.index) + list.incref_value(item) + itemobj = c.box(typ.dtype, item) + c.pyapi.list_setitem(obj, loop.index, itemobj) + + c.builder.store(obj, res) + + # Steal NRT ref + c.context.nrt.decref(c.builder, typ, val) + return c.builder.load(res) + + +class _NumbaTypeHelper(object): + """A helper for acquiring `numba.typeof` for type checking. + + Usage + ----- + + # `c` is the boxing context. + with _NumbaTypeHelper(c) as nth: + # This contextmanager maintains the lifetime of the `numba.typeof` + # function. + the_numba_type = nth.typeof(some_object) + # Do work on the type object + do_checks(the_numba_type) + # Cleanup + c.pyapi.decref(the_numba_type) + # At this point *nth* should not be used. + """ + def __init__(self, c): + self.c = c + + def __enter__(self): + c = self.c + numba_name = c.context.insert_const_string(c.builder.module, 'numba') + numba_mod = c.pyapi.import_module_noblock(numba_name) + typeof_fn = c.pyapi.object_getattr_string(numba_mod, 'typeof') + self.typeof_fn = typeof_fn + c.pyapi.decref(numba_mod) + return self + + def __exit__(self, *args, **kwargs): + c = self.c + c.pyapi.decref(self.typeof_fn) + + def typeof(self, obj): + res = self.c.pyapi.call_function_objargs(self.typeof_fn, [obj]) + return res + + +def _python_list_to_native(typ, obj, c, size, listptr, errorptr): + """ + Construct a new native list from a Python list. + """ + def check_element_type(nth, itemobj, expected_typobj): + typobj = nth.typeof(itemobj) + # Check if *typobj* is NULL + with c.builder.if_then( + cgutils.is_null(c.builder, typobj), + likely=False, + ): + c.builder.store(cgutils.true_bit, errorptr) + loop.do_break() + # Mandate that objects all have the same exact type + type_mismatch = c.builder.icmp_signed('!=', typobj, expected_typobj) + + with c.builder.if_then(type_mismatch, likely=False): + c.builder.store(cgutils.true_bit, errorptr) + c.pyapi.err_format( + "PyExc_TypeError", + "can't unbox heterogeneous list: %S != %S", + expected_typobj, typobj, + ) + c.pyapi.decref(typobj) + loop.do_break() + c.pyapi.decref(typobj) + + # Allocate a new native list + ok, list = listobj.ListInstance.allocate_ex(c.context, c.builder, typ, size) + with c.builder.if_else(ok, likely=True) as (if_ok, if_not_ok): + with if_ok: + list.size = size + zero = ir.Constant(size.type, 0) + with c.builder.if_then(c.builder.icmp_signed('>', size, zero), + likely=True): + # Traverse Python list and unbox objects into native list + with _NumbaTypeHelper(c) as nth: + # Note: *expected_typobj* can't be NULL + expected_typobj = nth.typeof(c.pyapi.list_getitem(obj, zero)) + with cgutils.for_range(c.builder, size) as loop: + itemobj = c.pyapi.list_getitem(obj, loop.index) + check_element_type(nth, itemobj, expected_typobj) + # XXX we don't call native cleanup for each + # list element, since that would require keeping + # of which unboxings have been successful. + native = c.unbox(typ.dtype, itemobj) + with c.builder.if_then(native.is_error, likely=False): + c.builder.store(cgutils.true_bit, errorptr) + loop.do_break() + # The reference is borrowed so incref=False + list.setitem(loop.index, native.value, incref=False) + c.pyapi.decref(expected_typobj) + if typ.reflected: + list.parent = obj + # Stuff meminfo pointer into the Python object for + # later reuse. + with c.builder.if_then(c.builder.not_(c.builder.load(errorptr)), + likely=False): + c.pyapi.object_set_private_data(obj, list.meminfo) + list.set_dirty(False) + c.builder.store(list.value, listptr) + + with if_not_ok: + c.builder.store(cgutils.true_bit, errorptr) + + # If an error occurred, drop the whole native list + with c.builder.if_then(c.builder.load(errorptr)): + c.context.nrt.decref(c.builder, typ, list.value) + + +@unbox(types.List) +def unbox_list(typ, obj, c): + """ + Convert list *obj* to a native list. + + If list was previously unboxed, we reuse the existing native list + to ensure consistency. + """ + size = c.pyapi.list_size(obj) + + errorptr = cgutils.alloca_once_value(c.builder, cgutils.false_bit) + listptr = cgutils.alloca_once(c.builder, c.context.get_value_type(typ)) + + # See if the list was previously unboxed, if so, re-use the meminfo. + ptr = c.pyapi.object_get_private_data(obj) + + with c.builder.if_else(cgutils.is_not_null(c.builder, ptr)) \ + as (has_meminfo, otherwise): + + with has_meminfo: + # List was previously unboxed => reuse meminfo + list = listobj.ListInstance.from_meminfo(c.context, c.builder, typ, ptr) + list.size = size + if typ.reflected: + list.parent = obj + c.builder.store(list.value, listptr) + + with otherwise: + _python_list_to_native(typ, obj, c, size, listptr, errorptr) + + def cleanup(): + # Clean up the associated pointer, as the meminfo is now invalid. + c.pyapi.object_reset_private_data(obj) + + return NativeValue(c.builder.load(listptr), + is_error=c.builder.load(errorptr), + cleanup=cleanup) + + +@reflect(types.List) +def reflect_list(typ, val, c): + """ + Reflect the native list's contents into the Python object. + """ + if not typ.reflected: + return + if typ.dtype.reflected: + msg = "cannot reflect element of reflected container: {}\n".format(typ) + raise TypeError(msg) + + list = listobj.ListInstance(c.context, c.builder, typ, val) + with c.builder.if_then(list.dirty, likely=False): + obj = list.parent + size = c.pyapi.list_size(obj) + new_size = list.size + diff = c.builder.sub(new_size, size) + diff_gt_0 = c.builder.icmp_signed('>=', diff, + ir.Constant(diff.type, 0)) + with c.builder.if_else(diff_gt_0) as (if_grow, if_shrink): + # XXX no error checking below + with if_grow: + # First overwrite existing items + with cgutils.for_range(c.builder, size) as loop: + item = list.getitem(loop.index) + list.incref_value(item) + itemobj = c.box(typ.dtype, item) + c.pyapi.list_setitem(obj, loop.index, itemobj) + # Then add missing items + with cgutils.for_range(c.builder, diff) as loop: + idx = c.builder.add(size, loop.index) + item = list.getitem(idx) + list.incref_value(item) + itemobj = c.box(typ.dtype, item) + c.pyapi.list_append(obj, itemobj) + c.pyapi.decref(itemobj) + + with if_shrink: + # First delete list tail + c.pyapi.list_setslice(obj, new_size, size, None) + # Then overwrite remaining items + with cgutils.for_range(c.builder, new_size) as loop: + item = list.getitem(loop.index) + list.incref_value(item) + itemobj = c.box(typ.dtype, item) + c.pyapi.list_setitem(obj, loop.index, itemobj) + + # Mark the list clean, in case it is reflected twice + list.set_dirty(False) + + +def _python_set_to_native(typ, obj, c, size, setptr, errorptr): + """ + Construct a new native set from a Python set. + """ + # Allocate a new native set + ok, inst = setobj.SetInstance.allocate_ex(c.context, c.builder, typ, size) + with c.builder.if_else(ok, likely=True) as (if_ok, if_not_ok): + with if_ok: + # Traverse Python set and unbox objects into native set + typobjptr = cgutils.alloca_once_value(c.builder, + ir.Constant(c.pyapi.pyobj, None)) + + with c.pyapi.set_iterate(obj) as loop: + itemobj = loop.value + # Mandate that objects all have the same exact type + typobj = c.pyapi.get_type(itemobj) + expected_typobj = c.builder.load(typobjptr) + + with c.builder.if_else( + cgutils.is_null(c.builder, expected_typobj), + likely=False) as (if_first, if_not_first): + with if_first: + # First iteration => store item type + c.builder.store(typobj, typobjptr) + with if_not_first: + # Otherwise, check item type + type_mismatch = c.builder.icmp_signed('!=', typobj, + expected_typobj) + with c.builder.if_then(type_mismatch, likely=False): + c.builder.store(cgutils.true_bit, errorptr) + c.pyapi.err_set_string("PyExc_TypeError", + "can't unbox heterogeneous set") + loop.do_break() + + # XXX we don't call native cleanup for each set element, + # since that would require keeping track + # of which unboxings have been successful. + native = c.unbox(typ.dtype, itemobj) + with c.builder.if_then(native.is_error, likely=False): + c.builder.store(cgutils.true_bit, errorptr) + inst.add_pyapi(c.pyapi, native.value, do_resize=False) + + if typ.reflected: + inst.parent = obj + # Associate meminfo pointer with the Python object for later reuse. + with c.builder.if_then(c.builder.not_(c.builder.load(errorptr)), + likely=False): + c.pyapi.object_set_private_data(obj, inst.meminfo) + inst.set_dirty(False) + c.builder.store(inst.value, setptr) + + with if_not_ok: + c.builder.store(cgutils.true_bit, errorptr) + + # If an error occurred, drop the whole native set + with c.builder.if_then(c.builder.load(errorptr)): + c.context.nrt.decref(c.builder, typ, inst.value) + + +@unbox(types.Set) +def unbox_set(typ, obj, c): + """ + Convert set *obj* to a native set. + + If set was previously unboxed, we reuse the existing native set + to ensure consistency. + """ + size = c.pyapi.set_size(obj) + + errorptr = cgutils.alloca_once_value(c.builder, cgutils.false_bit) + setptr = cgutils.alloca_once(c.builder, c.context.get_value_type(typ)) + + # See if the set was previously unboxed, if so, re-use the meminfo. + ptr = c.pyapi.object_get_private_data(obj) + + with c.builder.if_else(cgutils.is_not_null(c.builder, ptr)) \ + as (has_meminfo, otherwise): + + with has_meminfo: + # Set was previously unboxed => reuse meminfo + inst = setobj.SetInstance.from_meminfo(c.context, c.builder, typ, ptr) + if typ.reflected: + inst.parent = obj + c.builder.store(inst.value, setptr) + + with otherwise: + _python_set_to_native(typ, obj, c, size, setptr, errorptr) + + def cleanup(): + # Clean up the associated pointer, as the meminfo is now invalid. + c.pyapi.object_reset_private_data(obj) + + return NativeValue(c.builder.load(setptr), + is_error=c.builder.load(errorptr), + cleanup=cleanup) + + +def _native_set_to_python_list(typ, payload, c): + """ + Create a Python list from a native set's items. + """ + nitems = payload.used + listobj = c.pyapi.list_new(nitems) + ok = cgutils.is_not_null(c.builder, listobj) + with c.builder.if_then(ok, likely=True): + index = cgutils.alloca_once_value(c.builder, + ir.Constant(nitems.type, 0)) + with payload._iterate() as loop: + i = c.builder.load(index) + item = loop.entry.key + c.context.nrt.incref(c.builder, typ.dtype, item) + itemobj = c.box(typ.dtype, item) + c.pyapi.list_setitem(listobj, i, itemobj) + i = c.builder.add(i, ir.Constant(i.type, 1)) + c.builder.store(i, index) + + return ok, listobj + + +@box(types.Set) +def box_set(typ, val, c): + """ + Convert native set *val* to a set object. + """ + inst = setobj.SetInstance(c.context, c.builder, typ, val) + obj = inst.parent + res = cgutils.alloca_once_value(c.builder, obj) + + with c.builder.if_else(cgutils.is_not_null(c.builder, obj)) as (has_parent, otherwise): + with has_parent: + # Set is actually reflected => return the original object + # (note not all set instances whose *type* is reflected are + # actually reflected; see numba.tests.test_sets for an example) + c.pyapi.incref(obj) + + with otherwise: + # Build a new Python list and then create a set from that + payload = inst.payload + ok, listobj = _native_set_to_python_list(typ, payload, c) + with c.builder.if_then(ok, likely=True): + obj = c.pyapi.set_new(listobj) + c.pyapi.decref(listobj) + c.builder.store(obj, res) + + # Steal NRT ref + c.context.nrt.decref(c.builder, typ, val) + return c.builder.load(res) + +@reflect(types.Set) +def reflect_set(typ, val, c): + """ + Reflect the native set's contents into the Python object. + """ + if not typ.reflected: + return + inst = setobj.SetInstance(c.context, c.builder, typ, val) + payload = inst.payload + + with c.builder.if_then(payload.dirty, likely=False): + obj = inst.parent + # XXX errors are not dealt with below + c.pyapi.set_clear(obj) + + # Build a new Python list and then update the set with that + ok, listobj = _native_set_to_python_list(typ, payload, c) + with c.builder.if_then(ok, likely=True): + c.pyapi.set_update(obj, listobj) + c.pyapi.decref(listobj) + + # Mark the set clean, in case it is reflected twice + inst.set_dirty(False) + + +# +# Other types +# + +@box(types.Generator) +def box_generator(typ, val, c): + return c.pyapi.from_native_generator(val, typ, c.env_manager.env_ptr) + +@unbox(types.Generator) +def unbox_generator(typ, obj, c): + return c.pyapi.to_native_generator(obj, typ) + + +@box(types.DType) +def box_dtype(typ, val, c): + np_dtype = numpy_support.as_dtype(typ.dtype) + return c.pyapi.unserialize(c.pyapi.serialize_object(np_dtype)) + +@unbox(types.DType) +def unbox_dtype(typ, val, c): + return NativeValue(c.context.get_dummy_value()) + + +@box(types.NumberClass) +def box_number_class(typ, val, c): + np_dtype = numpy_support.as_dtype(typ.dtype) + return c.pyapi.unserialize(c.pyapi.serialize_object(np_dtype)) + +@unbox(types.NumberClass) +def unbox_number_class(typ, val, c): + return NativeValue(c.context.get_dummy_value()) + + +@box(types.PyObject) +@box(types.Object) +def box_pyobject(typ, val, c): + return val + +@unbox(types.PyObject) +@unbox(types.Object) +def unbox_pyobject(typ, obj, c): + return NativeValue(obj) + + +@unbox(types.ExternalFunctionPointer) +def unbox_funcptr(typ, obj, c): + if typ.get_pointer is None: + raise NotImplementedError(typ) + + # Call get_pointer() on the object to get the raw pointer value + ptrty = c.context.get_function_pointer_type(typ) + ret = cgutils.alloca_once_value(c.builder, + ir.Constant(ptrty, None), + name='fnptr') + ser = c.pyapi.serialize_object(typ.get_pointer) + get_pointer = c.pyapi.unserialize(ser) + with cgutils.if_likely(c.builder, + cgutils.is_not_null(c.builder, get_pointer)): + intobj = c.pyapi.call_function_objargs(get_pointer, (obj,)) + c.pyapi.decref(get_pointer) + with cgutils.if_likely(c.builder, + cgutils.is_not_null(c.builder, intobj)): + ptr = c.pyapi.long_as_voidptr(intobj) + c.pyapi.decref(intobj) + c.builder.store(c.builder.bitcast(ptr, ptrty), ret) + return NativeValue(c.builder.load(ret), is_error=c.pyapi.c_api_error()) + +@box(types.DeferredType) +def box_deferred(typ, val, c): + out = c.pyapi.from_native_value(typ.get(), + c.builder.extract_value(val, [0]), + env_manager=c.env_manager) + return out + + +@unbox(types.DeferredType) +def unbox_deferred(typ, obj, c): + native_value = c.pyapi.to_native_value(typ.get(), obj) + model = c.context.data_model_manager[typ] + res = model.set(c.builder, model.make_uninitialized(), native_value.value) + return NativeValue(res, is_error=native_value.is_error, + cleanup=native_value.cleanup) + + +@unbox(types.Dispatcher) +def unbox_dispatcher(typ, obj, c): + # In native code, Dispatcher types can be casted to FunctionType. + return NativeValue(obj) + + +@box(types.Dispatcher) +def box_pyobject(typ, val, c): + c.pyapi.incref(val) + return val + + +def unbox_unsupported(typ, obj, c): + c.pyapi.err_set_string("PyExc_TypeError", + "can't unbox {!r} type".format(typ)) + res = c.context.get_constant_null(typ) + return NativeValue(res, is_error=cgutils.true_bit) + + +def box_unsupported(typ, val, c): + msg = "cannot convert native %s to Python object" % (typ,) + c.pyapi.err_set_string("PyExc_TypeError", msg) + res = c.pyapi.get_null_object() + return res + + +@box(types.Literal) +def box_literal(typ, val, c): + # Const type contains the python object of the constant value, + # which we can directly return. + retval = typ.literal_value + # Serialize the value into the IR + return c.pyapi.unserialize(c.pyapi.serialize_object(retval)) + + +@box(types.MemInfoPointer) +def box_meminfo_pointer(typ, val, c): + return c.pyapi.nrt_meminfo_as_pyobject(val) + + +@unbox(types.MemInfoPointer) +def unbox_meminfo_pointer(typ, obj, c): + res = c.pyapi.nrt_meminfo_from_pyobject(obj) + errored = cgutils.is_null(c.builder, res) + return NativeValue(res, is_error=errored) + +@unbox(types.TypeRef) +def unbox_typeref(typ, val, c): + return NativeValue(c.context.get_dummy_value(), is_error=cgutils.false_bit) + + +@box(types.LiteralStrKeyDict) +def box_LiteralStrKeyDict(typ, val, c): + return box_unsupported(typ, val, c) + + +@contextmanager +def early_exit_if(builder, stack: ExitStack, cond): + then, otherwise = stack.enter_context(builder.if_else(cond, likely=False)) + with then: + yield + stack.enter_context(otherwise) + + +def early_exit_if_null(builder, stack, obj): + return early_exit_if(builder, stack, cgutils.is_null(builder, obj)) + + +# Original implementation at: https://github.com/numba/numba/issues/4499#issuecomment-1063138477 +@unbox(types.NumPyRandomBitGeneratorType) +def unbox_numpy_random_bitgenerator(typ, obj, c): + """ + The bit_generator instance has a `.ctypes` attr which is a namedtuple + with the following members (types): + * state_address (Python int) + * state (ctypes.c_void_p) + * next_uint64 (ctypes.CFunctionType instance) + * next_uint32 (ctypes.CFunctionType instance) + * next_double (ctypes.CFunctionType instance) + * bit_generator (ctypes.c_void_p) + """ + + is_error_ptr = cgutils.alloca_once_value(c.builder, cgutils.false_bit) + extra_refs = [] + + def clear_extra_refs(): + for _ref in extra_refs: + c.pyapi.decref(_ref) + + def handle_failure(): + c.builder.store(cgutils.true_bit, is_error_ptr) + clear_extra_refs() + + with ExitStack() as stack: + + def object_getattr_safely(obj, attr): + attr_obj = c.pyapi.object_getattr_string(obj, attr) + extra_refs.append(attr_obj) + return attr_obj + + struct_ptr = cgutils.create_struct_proxy(typ)(c.context, c.builder) + struct_ptr.parent = obj + + # Get the .ctypes attr + ctypes_binding = object_getattr_safely(obj, 'ctypes') + with early_exit_if_null(c.builder, stack, ctypes_binding): + handle_failure() + + # Look up the "state_address" member and wire it into the struct + interface_state_address = object_getattr_safely( + ctypes_binding, 'state_address') + with early_exit_if_null(c.builder, stack, interface_state_address): + handle_failure() + + setattr(struct_ptr, 'state_address', + c.unbox(types.uintp, interface_state_address).value) + + # Look up the "state" member and wire it into the struct + interface_state = object_getattr_safely(ctypes_binding, 'state') + with early_exit_if_null(c.builder, stack, interface_state): + handle_failure() + + interface_state_value = object_getattr_safely( + interface_state, 'value') + with early_exit_if_null(c.builder, stack, interface_state_value): + handle_failure() + setattr( + struct_ptr, + 'state', + c.unbox( + types.uintp, + interface_state_value).value) + + # Want to store callable function pointers to these CFunctionTypes, so + # import ctypes and use it to cast the CFunctionTypes to c_void_p and + # store the results. + # First find ctypes.cast, and ctypes.c_void_p + ctypes_name = c.context.insert_const_string(c.builder.module, 'ctypes') + ctypes_module = c.pyapi.import_module_noblock(ctypes_name) + extra_refs.append(ctypes_module) + with early_exit_if_null(c.builder, stack, ctypes_module): + handle_failure() + + ct_cast = object_getattr_safely(ctypes_module, 'cast') + with early_exit_if_null(c.builder, stack, ct_cast): + handle_failure() + + ct_voidptr_ty = object_getattr_safely(ctypes_module, 'c_void_p') + with early_exit_if_null(c.builder, stack, ct_voidptr_ty): + handle_failure() + + # This wires in the fnptrs referred to by name + def wire_in_fnptrs(name): + # Find the CFunctionType function + interface_next_fn = c.pyapi.object_getattr_string( + ctypes_binding, name) + + extra_refs.append(interface_next_fn) + with early_exit_if_null(c.builder, stack, interface_next_fn): + handle_failure() + + # Want to do ctypes.cast(CFunctionType, ctypes.c_void_p), create an + # args tuple for that. + extra_refs.append(ct_voidptr_ty) + args = c.pyapi.tuple_pack([interface_next_fn, ct_voidptr_ty]) + with early_exit_if_null(c.builder, stack, args): + handle_failure() + extra_refs.append(ct_voidptr_ty) + + # Call ctypes.cast() + interface_next_fn_casted = c.pyapi.call(ct_cast, args) + + # Fetch the .value attr on the resulting ctypes.c_void_p for storage + # in the function pointer slot. + interface_next_fn_casted_value = object_getattr_safely( + interface_next_fn_casted, 'value') + with early_exit_if_null(c.builder, stack, interface_next_fn_casted_value): + handle_failure() + + # Wire up + setattr(struct_ptr, f'fnptr_{name}', + c.unbox(types.uintp, interface_next_fn_casted_value).value) + + + wire_in_fnptrs('next_double') + wire_in_fnptrs('next_uint64') + wire_in_fnptrs('next_uint32') + + clear_extra_refs() + + return NativeValue(struct_ptr._getvalue(), is_error=c.builder.load(is_error_ptr)) + +_bit_gen_type = types.NumPyRandomBitGeneratorType('bit_generator') + +@unbox(types.NumPyRandomGeneratorType) +def unbox_numpy_random_generator(typ, obj, c): + """ + Here we're creating a NumPyRandomGeneratorType StructModel with following fields: + * ('bit_generator', _bit_gen_type): The unboxed BitGenerator associated with + this Generator object instance. + * ('parent', types.pyobject): Pointer to the original Generator PyObject. + * ('meminfo', types.MemInfoPointer(types.voidptr)): The information about the memory + stored at the pointer (to the original Generator PyObject). This is useful for + keeping track of reference counts within the Python runtime. Helps prevent cases + where deletion happens in Python runtime without NRT being awareness of it. + """ + is_error_ptr = cgutils.alloca_once_value(c.builder, cgutils.false_bit) + + with ExitStack() as stack: + struct_ptr = cgutils.create_struct_proxy(typ)(c.context, c.builder) + bit_gen_inst = c.pyapi.object_getattr_string(obj, 'bit_generator') + with early_exit_if_null(c.builder, stack, bit_gen_inst): + c.builder.store(cgutils.true_bit, is_error_ptr) + unboxed = c.unbox(_bit_gen_type, bit_gen_inst).value + struct_ptr.bit_generator = unboxed + struct_ptr.parent = obj + NULL = cgutils.voidptr_t(None) + struct_ptr.meminfo = c.pyapi.nrt_meminfo_new_from_pyobject( + NULL, # there's no data + obj, # the python object, the call to nrt_meminfo_new_from_pyobject + # will py_incref + ) + c.pyapi.decref(bit_gen_inst) + + return NativeValue(struct_ptr._getvalue(), is_error=c.builder.load(is_error_ptr)) + + +@box(types.NumPyRandomGeneratorType) +def box_numpy_random_generator(typ, val, c): + inst = c.context.make_helper(c.builder, typ, val) + obj = inst.parent + res = cgutils.alloca_once_value(c.builder, obj) + c.pyapi.incref(obj) + # Steal NRT ref + c.context.nrt.decref(c.builder, typ, val) + return c.builder.load(res) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/bytecode.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/bytecode.py new file mode 100644 index 000000000..13516cd15 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/bytecode.py @@ -0,0 +1,369 @@ +from collections import namedtuple, OrderedDict +import dis +import inspect +import itertools +from types import CodeType, ModuleType + +from numba.core import errors, utils, serialize +from numba.core.utils import PYVERSION + +opcode_info = namedtuple('opcode_info', ['argsize']) + +# The following offset is used as a hack to inject a NOP at the start of the +# bytecode. So that function starting with `while True` will not have block-0 +# as a jump target. The Lowerer puts argument initialization at block-0. +_FIXED_OFFSET = 2 + + +def get_function_object(obj): + """ + Objects that wraps function should provide a "__numba__" magic attribute + that contains a name of an attribute that contains the actual python + function object. + """ + attr = getattr(obj, "__numba__", None) + if attr: + return getattr(obj, attr) + return obj + + +def get_code_object(obj): + "Shamelessly borrowed from llpython" + return getattr(obj, '__code__', getattr(obj, 'func_code', None)) + + +def _as_opcodes(seq): + lst = [] + for s in seq: + c = dis.opmap.get(s) + if c is not None: + lst.append(c) + return lst + + +JREL_OPS = frozenset(dis.hasjrel) +JABS_OPS = frozenset(dis.hasjabs) +JUMP_OPS = JREL_OPS | JABS_OPS +TERM_OPS = frozenset(_as_opcodes(['RETURN_VALUE', 'RAISE_VARARGS'])) +EXTENDED_ARG = dis.EXTENDED_ARG +HAVE_ARGUMENT = dis.HAVE_ARGUMENT + + +class ByteCodeInst(object): + ''' + Attributes + ---------- + - offset: + byte offset of opcode + - opcode: + opcode integer value + - arg: + instruction arg + - lineno: + -1 means unknown + ''' + __slots__ = 'offset', 'next', 'opcode', 'opname', 'arg', 'lineno' + + def __init__(self, offset, opcode, arg, nextoffset): + self.offset = offset + self.next = nextoffset + self.opcode = opcode + self.opname = dis.opname[opcode] + self.arg = arg + self.lineno = -1 # unknown line number + + @property + def is_jump(self): + return self.opcode in JUMP_OPS + + @property + def is_terminator(self): + return self.opcode in TERM_OPS + + def get_jump_target(self): + # With Python 3.10 the addressing of "bytecode" instructions has + # changed from using bytes to using 16-bit words instead. As a + # consequence the code to determine where a jump will lead had to be + # adapted. + # See also: + # https://bugs.python.org/issue26647 + # https://bugs.python.org/issue27129 + # https://github.com/python/cpython/pull/25069 + assert self.is_jump + if PYVERSION >= (3, 10): + if self.opcode in JREL_OPS: + return self.next + self.arg * 2 + else: + assert self.opcode in JABS_OPS + return self.arg * 2 - 2 + else: + if self.opcode in JREL_OPS: + return self.next + self.arg + else: + assert self.opcode in JABS_OPS + return self.arg + + def __repr__(self): + return '%s(arg=%s, lineno=%d)' % (self.opname, self.arg, self.lineno) + + @property + def block_effect(self): + """Effect of the block stack + Returns +1 (push), 0 (none) or -1 (pop) + """ + if self.opname.startswith('SETUP_'): + return 1 + elif self.opname == 'POP_BLOCK': + return -1 + else: + return 0 + + +CODE_LEN = 1 +ARG_LEN = 1 +NO_ARG_LEN = 1 + +OPCODE_NOP = dis.opname.index('NOP') + + +# Adapted from Lib/dis.py +def _unpack_opargs(code): + """ + Returns a 4-int-tuple of + (bytecode offset, opcode, argument, offset of next bytecode). + """ + extended_arg = 0 + n = len(code) + offset = i = 0 + while i < n: + op = code[i] + i += CODE_LEN + if op >= HAVE_ARGUMENT: + arg = code[i] | extended_arg + for j in range(ARG_LEN): + arg |= code[i + j] << (8 * j) + i += ARG_LEN + if op == EXTENDED_ARG: + extended_arg = arg << 8 * ARG_LEN + continue + else: + arg = None + i += NO_ARG_LEN + + extended_arg = 0 + yield (offset, op, arg, i) + offset = i # Mark inst offset at first extended + + +def _patched_opargs(bc_stream): + """Patch the bytecode stream. + + - Adds a NOP bytecode at the start to avoid jump target being at the entry. + """ + # Injected NOP + yield (0, OPCODE_NOP, None, _FIXED_OFFSET) + # Adjust bytecode offset for the rest of the stream + for offset, opcode, arg, nextoffset in bc_stream: + # If the opcode has an absolute jump target, adjust it. + if opcode in JABS_OPS: + arg += _FIXED_OFFSET + yield offset + _FIXED_OFFSET, opcode, arg, nextoffset + _FIXED_OFFSET + + +class ByteCodeIter(object): + def __init__(self, code): + self.code = code + self.iter = iter(_patched_opargs(_unpack_opargs(self.code.co_code))) + + def __iter__(self): + return self + + def _fetch_opcode(self): + return next(self.iter) + + def next(self): + offset, opcode, arg, nextoffset = self._fetch_opcode() + return offset, ByteCodeInst(offset=offset, opcode=opcode, arg=arg, + nextoffset=nextoffset) + + __next__ = next + + def read_arg(self, size): + buf = 0 + for i in range(size): + _offset, byte = next(self.iter) + buf |= byte << (8 * i) + return buf + + +class ByteCode(object): + """ + The decoded bytecode of a function, and related information. + """ + __slots__ = ('func_id', 'co_names', 'co_varnames', 'co_consts', + 'co_cellvars', 'co_freevars', 'table', 'labels') + + def __init__(self, func_id): + code = func_id.code + + labels = set(x + _FIXED_OFFSET for x in dis.findlabels(code.co_code)) + labels.add(0) + + # A map of {offset: ByteCodeInst} + table = OrderedDict(ByteCodeIter(code)) + self._compute_lineno(table, code) + + self.func_id = func_id + self.co_names = code.co_names + self.co_varnames = code.co_varnames + self.co_consts = code.co_consts + self.co_cellvars = code.co_cellvars + self.co_freevars = code.co_freevars + self.table = table + self.labels = sorted(labels) + + @classmethod + def _compute_lineno(cls, table, code): + """ + Compute the line numbers for all bytecode instructions. + """ + for offset, lineno in dis.findlinestarts(code): + adj_offset = offset + _FIXED_OFFSET + if adj_offset in table: + table[adj_offset].lineno = lineno + # Assign unfilled lineno + # Start with first bytecode's lineno + known = table[_FIXED_OFFSET].lineno + for inst in table.values(): + if inst.lineno >= 0: + known = inst.lineno + else: + inst.lineno = known + return table + + def __iter__(self): + return iter(self.table.values()) + + def __getitem__(self, offset): + return self.table[offset] + + def __contains__(self, offset): + return offset in self.table + + def dump(self): + def label_marker(i): + if i[1].offset in self.labels: + return '>' + else: + return ' ' + + return '\n'.join('%s %10s\t%s' % ((label_marker(i),) + i) + for i in self.table.items()) + + @classmethod + def _compute_used_globals(cls, func, table, co_consts, co_names): + """ + Compute the globals used by the function with the given + bytecode table. + """ + d = {} + globs = func.__globals__ + builtins = globs.get('__builtins__', utils.builtins) + if isinstance(builtins, ModuleType): + builtins = builtins.__dict__ + # Look for LOAD_GLOBALs in the bytecode + for inst in table.values(): + if inst.opname == 'LOAD_GLOBAL': + name = co_names[inst.arg] + if name not in d: + try: + value = globs[name] + except KeyError: + value = builtins[name] + d[name] = value + # Add globals used by any nested code object + for co in co_consts: + if isinstance(co, CodeType): + subtable = OrderedDict(ByteCodeIter(co)) + d.update(cls._compute_used_globals(func, subtable, + co.co_consts, co.co_names)) + return d + + def get_used_globals(self): + """ + Get a {name: value} map of the globals used by this code + object and any nested code objects. + """ + return self._compute_used_globals(self.func_id.func, self.table, + self.co_consts, self.co_names) + + +class FunctionIdentity(serialize.ReduceMixin): + """ + A function's identity and metadata. + + Note this typically represents a function whose bytecode is + being compiled, not necessarily the top-level user function + (the two might be distinct, e.g. in the `@generated_jit` case). + """ + _unique_ids = itertools.count(1) + + @classmethod + def from_function(cls, pyfunc): + """ + Create the FunctionIdentity of the given function. + """ + func = get_function_object(pyfunc) + code = get_code_object(func) + pysig = utils.pysignature(func) + if not code: + raise errors.ByteCodeSupportError( + "%s does not provide its bytecode" % func) + + try: + func_qualname = func.__qualname__ + except AttributeError: + func_qualname = func.__name__ + + self = cls() + self.func = func + self.func_qualname = func_qualname + self.func_name = func_qualname.split('.')[-1] + self.code = code + self.module = inspect.getmodule(func) + self.modname = (utils._dynamic_modname + if self.module is None + else self.module.__name__) + self.is_generator = inspect.isgeneratorfunction(func) + self.pysig = pysig + self.filename = code.co_filename + self.firstlineno = code.co_firstlineno + self.arg_count = len(pysig.parameters) + self.arg_names = list(pysig.parameters) + + # Even the same function definition can be compiled into + # several different function objects with distinct closure + # variables, so we make sure to disambiguate using an unique id. + uid = next(cls._unique_ids) + self.unique_name = '{}${}'.format(self.func_qualname, uid) + self.unique_id = uid + + return self + + def derive(self): + """Copy the object and increment the unique counter. + """ + return self.from_function(self.func) + + def _reduce_states(self): + """ + NOTE: part of ReduceMixin protocol + """ + return dict(pyfunc=self.func) + + @classmethod + def _rebuild(cls, pyfunc): + """ + NOTE: part of ReduceMixin protocol + """ + return cls.from_function(pyfunc) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/byteflow.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/byteflow.py new file mode 100644 index 000000000..19b595967 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/byteflow.py @@ -0,0 +1,1555 @@ +""" +Implement python 3.8+ bytecode analysis +""" + +from pprint import pformat +import logging +from collections import namedtuple, defaultdict, deque +from functools import total_ordering + +from numba.core.utils import UniqueDict, PYVERSION +from numba.core.controlflow import NEW_BLOCKERS, CFGraph +from numba.core.ir import Loc +from numba.core.errors import UnsupportedError + + +_logger = logging.getLogger(__name__) + + +_EXCEPT_STACK_OFFSET = 6 +_FINALLY_POP = _EXCEPT_STACK_OFFSET if PYVERSION >= (3, 8) else 1 +_NO_RAISE_OPS = frozenset({ + 'LOAD_CONST', +}) + + +@total_ordering +class BlockKind(object): + """Kinds of block to make related code safer than just `str`. + """ + _members = frozenset({ + 'LOOP', + 'TRY', 'EXCEPT', 'FINALLY', + 'WITH', 'WITH_FINALLY', + }) + + def __init__(self, value): + assert value in self._members + self._value = value + + def __hash__(self): + return hash((type(self), self._value)) + + def __lt__(self, other): + if isinstance(other, BlockKind): + return self._value < other._value + else: + raise TypeError('cannot compare to {!r}'.format(type(other))) + + def __eq__(self, other): + if isinstance(other, BlockKind): + return self._value == other._value + else: + raise TypeError('cannot compare to {!r}'.format(type(other))) + + def __repr__(self): + return "BlockKind({})".format(self._value) + + +class _lazy_pformat(object): + def __init__(self, *args, **kwargs): + self.args = args + self.kwargs = kwargs + + def __str__(self): + return pformat(*self.args, **self.kwargs) + + +class Flow(object): + """Data+Control Flow analysis. + + Simulate execution to recover dataflow and controlflow information. + """ + def __init__(self, bytecode): + _logger.debug("bytecode dump:\n%s", bytecode.dump()) + self._bytecode = bytecode + self.block_infos = UniqueDict() + + def run(self): + """Run a trace over the bytecode over all reachable path. + + The trace starts at bytecode offset 0 and gathers stack and control- + flow information by partially interpreting each bytecode. + Each ``State`` instance in the trace corresponds to a basic-block. + The State instances forks when a jump instruction is encountered. + A newly forked state is then added to the list of pending states. + The trace ends when there are no more pending states. + """ + firststate = State(bytecode=self._bytecode, pc=0, nstack=0, + blockstack=()) + runner = TraceRunner(debug_filename=self._bytecode.func_id.filename) + runner.pending.append(firststate) + + # Enforce unique-ness on initial PC to avoid re-entering the PC with + # a different stack-depth. We don't know if such a case is ever + # possible, but no such case has been encountered in our tests. + first_encounter = UniqueDict() + # Loop over each pending state at a initial PC. + # Each state is tracing a basic block + while runner.pending: + _logger.debug("pending: %s", runner.pending) + state = runner.pending.popleft() + if state not in runner.finished: + _logger.debug("stack: %s", state._stack) + first_encounter[state.pc_initial] = state + # Loop over the state until it is terminated. + while True: + runner.dispatch(state) + # Terminated? + if state.has_terminated(): + break + elif (state.has_active_try() and + state.get_inst().opname not in _NO_RAISE_OPS): + # Is in a *try* block + state.fork(pc=state.get_inst().next) + tryblk = state.get_top_block('TRY') + state.pop_block_and_above(tryblk) + nstack = state.stack_depth + kwargs = {} + if nstack > tryblk['entry_stack']: + kwargs['npop'] = nstack - tryblk['entry_stack'] + handler = tryblk['handler'] + kwargs['npush'] = { + BlockKind('EXCEPT'): _EXCEPT_STACK_OFFSET, + BlockKind('FINALLY'): _FINALLY_POP + }[handler['kind']] + kwargs['extra_block'] = handler + state.fork(pc=tryblk['end'], **kwargs) + break + else: + state.advance_pc() + # Must the new PC be a new block? + if self._is_implicit_new_block(state): + # check if this is a with...as, abort if so + self._guard_with_as(state) + # else split + state.split_new_block() + break + _logger.debug("end state. edges=%s", state.outgoing_edges) + runner.finished.add(state) + out_states = state.get_outgoing_states() + runner.pending.extend(out_states) + + # Complete controlflow + self._build_cfg(runner.finished) + # Prune redundant PHI-nodes + self._prune_phis(runner) + # Post process + for state in sorted(runner.finished, key=lambda x: x.pc_initial): + self.block_infos[state.pc_initial] = si = adapt_state_infos(state) + _logger.debug("block_infos %s:\n%s", state, si) + + def _build_cfg(self, all_states): + graph = CFGraph() + for state in all_states: + b = state.pc_initial + graph.add_node(b) + for state in all_states: + for edge in state.outgoing_edges: + graph.add_edge(state.pc_initial, edge.pc, 0) + graph.set_entry_point(0) + graph.process() + self.cfgraph = graph + + def _prune_phis(self, runner): + # Find phis that are unused in the local block + _logger.debug("Prune PHIs".center(60, '-')) + + # Compute dataflow for used phis and propagate + + # 1. Get used-phis for each block + # Map block to used_phis + def get_used_phis_per_state(): + used_phis = defaultdict(set) + phi_set = set() + for state in runner.finished: + used = set(state._used_regs) + phis = set(state._phis) + used_phis[state] |= phis & used + phi_set |= phis + return used_phis, phi_set + + # Find use-defs + def find_use_defs(): + defmap = {} + phismap = defaultdict(set) + for state in runner.finished: + for phi, rhs in state._outgoing_phis.items(): + if rhs not in phi_set: + # Is a definition + defmap[phi] = state + phismap[phi].add((rhs, state)) + _logger.debug("defmap: %s", _lazy_pformat(defmap)) + _logger.debug("phismap: %s", _lazy_pformat(phismap)) + return defmap, phismap + + def propagate_phi_map(phismap): + """An iterative dataflow algorithm to find the definition + (the source) of each PHI node. + """ + blacklist = defaultdict(set) + + while True: + changing = False + for phi, defsites in sorted(list(phismap.items())): + for rhs, state in sorted(list(defsites)): + if rhs in phi_set: + defsites |= phismap[rhs] + blacklist[phi].add((rhs, state)) + to_remove = blacklist[phi] + if to_remove & defsites: + defsites -= to_remove + changing = True + + _logger.debug("changing phismap: %s", _lazy_pformat(phismap)) + if not changing: + break + + def apply_changes(used_phis, phismap): + keep = {} + for state, used_set in used_phis.items(): + for phi in used_set: + keep[phi] = phismap[phi] + _logger.debug("keep phismap: %s", _lazy_pformat(keep)) + new_out = defaultdict(dict) + for phi in keep: + for rhs, state in keep[phi]: + new_out[state][phi] = rhs + + _logger.debug("new_out: %s", _lazy_pformat(new_out)) + for state in runner.finished: + state._outgoing_phis.clear() + state._outgoing_phis.update(new_out[state]) + + used_phis, phi_set = get_used_phis_per_state() + _logger.debug("Used_phis: %s", _lazy_pformat(used_phis)) + defmap, phismap = find_use_defs() + propagate_phi_map(phismap) + apply_changes(used_phis, phismap) + _logger.debug("DONE Prune PHIs".center(60, '-')) + + def _is_implicit_new_block(self, state): + inst = state.get_inst() + + if inst.offset in self._bytecode.labels: + return True + elif inst.opname in NEW_BLOCKERS: + return True + else: + return False + + def _guard_with_as(self, state): + """Checks if the next instruction after a SETUP_WITH is something other + than a POP_TOP, if it is something else it'll be some sort of store + which is not supported (this corresponds to `with CTXMGR as VAR(S)`).""" + current_inst = state.get_inst() + if current_inst.opname == "SETUP_WITH": + next_op = self._bytecode[current_inst.next].opname + if next_op != "POP_TOP": + msg = ("The 'with (context manager) as " + "(variable):' construct is not " + "supported.") + raise UnsupportedError(msg) + + +class TraceRunner(object): + """Trace runner contains the states for the trace and the opcode dispatch. + """ + def __init__(self, debug_filename): + self.debug_filename = debug_filename + self.pending = deque() + self.finished = set() + + def get_debug_loc(self, lineno): + return Loc(self.debug_filename, lineno) + + def dispatch(self, state): + inst = state.get_inst() + _logger.debug("dispatch pc=%s, inst=%s", state._pc, inst) + _logger.debug("stack %s", state._stack) + fn = getattr(self, "op_{}".format(inst.opname), None) + if fn is not None: + fn(state, inst) + else: + msg = "Use of unsupported opcode (%s) found" % inst.opname + raise UnsupportedError(msg, loc=self.get_debug_loc(inst.lineno)) + + def op_NOP(self, state, inst): + state.append(inst) + + def op_FORMAT_VALUE(self, state, inst): + """ + FORMAT_VALUE(flags): flags argument specifies format spec which is + not supported yet. Currently, we just call str() on the value. + Pops a value from stack and pushes results back. + Required for supporting f-strings. + https://docs.python.org/3/library/dis.html#opcode-FORMAT_VALUE + """ + if inst.arg != 0: + msg = "format spec in f-strings not supported yet" + raise UnsupportedError(msg, loc=self.get_debug_loc(inst.lineno)) + value = state.pop() + strvar = state.make_temp() + res = state.make_temp() + state.append(inst, value=value, res=res, strvar=strvar) + state.push(res) + + def op_BUILD_STRING(self, state, inst): + """ + BUILD_STRING(count): Concatenates count strings from the stack and + pushes the resulting string onto the stack. + Required for supporting f-strings. + https://docs.python.org/3/library/dis.html#opcode-BUILD_STRING + """ + count = inst.arg + strings = list(reversed([state.pop() for _ in range(count)])) + # corner case: f"" + if count == 0: + tmps = [state.make_temp()] + else: + tmps = [state.make_temp() for _ in range(count - 1)] + state.append(inst, strings=strings, tmps=tmps) + state.push(tmps[-1]) + + def op_POP_TOP(self, state, inst): + state.pop() + + def op_LOAD_GLOBAL(self, state, inst): + res = state.make_temp() + state.append(inst, res=res) + state.push(res) + + def op_LOAD_DEREF(self, state, inst): + res = state.make_temp() + state.append(inst, res=res) + state.push(res) + + def op_LOAD_CONST(self, state, inst): + res = state.make_temp("const") + state.push(res) + state.append(inst, res=res) + + def op_LOAD_ATTR(self, state, inst): + item = state.pop() + res = state.make_temp() + state.append(inst, item=item, res=res) + state.push(res) + + def op_LOAD_FAST(self, state, inst): + name = state.get_varname(inst) + res = state.make_temp(name) + state.append(inst, res=res) + state.push(res) + + def op_DELETE_FAST(self, state, inst): + state.append(inst) + + def op_DELETE_ATTR(self, state, inst): + target = state.pop() + state.append(inst, target=target) + + def op_STORE_ATTR(self, state, inst): + target = state.pop() + value = state.pop() + state.append(inst, target=target, value=value) + + def op_STORE_DEREF(self, state, inst): + value = state.pop() + state.append(inst, value=value) + + def op_STORE_FAST(self, state, inst): + value = state.pop() + state.append(inst, value=value) + + def op_SLICE_1(self, state, inst): + """ + TOS = TOS1[TOS:] + """ + tos = state.pop() + tos1 = state.pop() + res = state.make_temp() + slicevar = state.make_temp() + indexvar = state.make_temp() + nonevar = state.make_temp() + state.append( + inst, + base=tos1, + start=tos, + res=res, + slicevar=slicevar, + indexvar=indexvar, + nonevar=nonevar, + ) + state.push(res) + + def op_SLICE_2(self, state, inst): + """ + TOS = TOS1[:TOS] + """ + tos = state.pop() + tos1 = state.pop() + res = state.make_temp() + slicevar = state.make_temp() + indexvar = state.make_temp() + nonevar = state.make_temp() + state.append( + inst, + base=tos1, + stop=tos, + res=res, + slicevar=slicevar, + indexvar=indexvar, + nonevar=nonevar, + ) + state.push(res) + + def op_SLICE_3(self, state, inst): + """ + TOS = TOS2[TOS1:TOS] + """ + tos = state.pop() + tos1 = state.pop() + tos2 = state.pop() + res = state.make_temp() + slicevar = state.make_temp() + indexvar = state.make_temp() + state.append( + inst, + base=tos2, + start=tos1, + stop=tos, + res=res, + slicevar=slicevar, + indexvar=indexvar, + ) + state.push(res) + + def op_STORE_SLICE_0(self, state, inst): + """ + TOS[:] = TOS1 + """ + tos = state.pop() + value = state.pop() + slicevar = state.make_temp() + indexvar = state.make_temp() + nonevar = state.make_temp() + state.append( + inst, + base=tos, + value=value, + slicevar=slicevar, + indexvar=indexvar, + nonevar=nonevar, + ) + + def op_STORE_SLICE_1(self, state, inst): + """ + TOS1[TOS:] = TOS2 + """ + tos = state.pop() + tos1 = state.pop() + value = state.pop() + slicevar = state.make_temp() + indexvar = state.make_temp() + nonevar = state.make_temp() + state.append( + inst, + base=tos1, + start=tos, + slicevar=slicevar, + value=value, + indexvar=indexvar, + nonevar=nonevar, + ) + + def op_STORE_SLICE_2(self, state, inst): + """ + TOS1[:TOS] = TOS2 + """ + tos = state.pop() + tos1 = state.pop() + value = state.pop() + slicevar = state.make_temp() + indexvar = state.make_temp() + nonevar = state.make_temp() + state.append( + inst, + base=tos1, + stop=tos, + value=value, + slicevar=slicevar, + indexvar=indexvar, + nonevar=nonevar, + ) + + def op_STORE_SLICE_3(self, state, inst): + """ + TOS2[TOS1:TOS] = TOS3 + """ + tos = state.pop() + tos1 = state.pop() + tos2 = state.pop() + value = state.pop() + slicevar = state.make_temp() + indexvar = state.make_temp() + state.append( + inst, + base=tos2, + start=tos1, + stop=tos, + value=value, + slicevar=slicevar, + indexvar=indexvar, + ) + + def op_DELETE_SLICE_0(self, state, inst): + """ + del TOS[:] + """ + tos = state.pop() + slicevar = state.make_temp() + indexvar = state.make_temp() + nonevar = state.make_temp() + state.append( + inst, base=tos, slicevar=slicevar, indexvar=indexvar, + nonevar=nonevar, + ) + + def op_DELETE_SLICE_1(self, state, inst): + """ + del TOS1[TOS:] + """ + tos = state.pop() + tos1 = state.pop() + slicevar = state.make_temp() + indexvar = state.make_temp() + nonevar = state.make_temp() + state.append( + inst, + base=tos1, + start=tos, + slicevar=slicevar, + indexvar=indexvar, + nonevar=nonevar, + ) + + def op_DELETE_SLICE_2(self, state, inst): + """ + del TOS1[:TOS] + """ + tos = state.pop() + tos1 = state.pop() + slicevar = state.make_temp() + indexvar = state.make_temp() + nonevar = state.make_temp() + state.append( + inst, + base=tos1, + stop=tos, + slicevar=slicevar, + indexvar=indexvar, + nonevar=nonevar, + ) + + def op_DELETE_SLICE_3(self, state, inst): + """ + del TOS2[TOS1:TOS] + """ + tos = state.pop() + tos1 = state.pop() + tos2 = state.pop() + slicevar = state.make_temp() + indexvar = state.make_temp() + state.append( + inst, base=tos2, start=tos1, stop=tos, slicevar=slicevar, + indexvar=indexvar + ) + + def op_BUILD_SLICE(self, state, inst): + """ + slice(TOS1, TOS) or slice(TOS2, TOS1, TOS) + """ + argc = inst.arg + if argc == 2: + tos = state.pop() + tos1 = state.pop() + start = tos1 + stop = tos + step = None + elif argc == 3: + tos = state.pop() + tos1 = state.pop() + tos2 = state.pop() + start = tos2 + stop = tos1 + step = tos + else: + raise Exception("unreachable") + slicevar = state.make_temp() + res = state.make_temp() + state.append( + inst, start=start, stop=stop, step=step, res=res, slicevar=slicevar + ) + state.push(res) + + def _op_POP_JUMP_IF(self, state, inst): + pred = state.pop() + state.append(inst, pred=pred) + + target_inst = inst.get_jump_target() + next_inst = inst.next + # if the next inst and the jump target are the same location, issue one + # fork else issue a fork for the next and the target. + state.fork(pc=next_inst) + if target_inst != next_inst: + state.fork(pc=target_inst) + + op_POP_JUMP_IF_TRUE = _op_POP_JUMP_IF + op_POP_JUMP_IF_FALSE = _op_POP_JUMP_IF + + def _op_JUMP_IF_OR_POP(self, state, inst): + pred = state.get_tos() + state.append(inst, pred=pred) + state.fork(pc=inst.next, npop=1) + state.fork(pc=inst.get_jump_target()) + + op_JUMP_IF_FALSE_OR_POP = _op_JUMP_IF_OR_POP + op_JUMP_IF_TRUE_OR_POP = _op_JUMP_IF_OR_POP + + def op_JUMP_FORWARD(self, state, inst): + state.append(inst) + state.fork(pc=inst.get_jump_target()) + + def op_JUMP_ABSOLUTE(self, state, inst): + state.append(inst) + state.fork(pc=inst.get_jump_target()) + + def op_BREAK_LOOP(self, state, inst): + # NOTE: bytecode removed since py3.8 + end = state.get_top_block('LOOP')['end'] + state.append(inst, end=end) + state.pop_block() + state.fork(pc=end) + + def op_RETURN_VALUE(self, state, inst): + state.append(inst, retval=state.pop(), castval=state.make_temp()) + state.terminate() + + def op_YIELD_VALUE(self, state, inst): + val = state.pop() + res = state.make_temp() + state.append(inst, value=val, res=res) + state.push(res) + + def op_RAISE_VARARGS(self, state, inst): + in_exc_block = any([ + state.get_top_block("EXCEPT") is not None, + state.get_top_block("FINALLY") is not None + ]) + if inst.arg == 0: + exc = None + if in_exc_block: + raise UnsupportedError( + "The re-raising of an exception is not yet supported.", + loc=self.get_debug_loc(inst.lineno), + ) + elif inst.arg == 1: + exc = state.pop() + else: + raise ValueError("Multiple argument raise is not supported.") + state.append(inst, exc=exc) + state.terminate() + + def op_BEGIN_FINALLY(self, state, inst): + temps = [] + for i in range(_EXCEPT_STACK_OFFSET): + tmp = state.make_temp() + temps.append(tmp) + state.push(tmp) + state.append(inst, temps=temps) + + def op_END_FINALLY(self, state, inst): + blk = state.pop_block() + state.reset_stack(blk['entry_stack']) + + def op_POP_FINALLY(self, state, inst): + # we don't emulate the exact stack behavior + if inst.arg != 0: + msg = ('Unsupported use of a bytecode related to try..finally' + ' or a with-context') + raise UnsupportedError(msg, loc=self.get_debug_loc(inst.lineno)) + + def op_CALL_FINALLY(self, state, inst): + pass + + def op_WITH_CLEANUP_START(self, state, inst): + # we don't emulate the exact stack behavior + state.append(inst) + + def op_WITH_CLEANUP_FINISH(self, state, inst): + # we don't emulate the exact stack behavior + state.append(inst) + + def op_SETUP_LOOP(self, state, inst): + # NOTE: bytecode removed since py3.8 + state.push_block( + state.make_block( + kind='LOOP', + end=inst.get_jump_target(), + ) + ) + + def op_SETUP_WITH(self, state, inst): + cm = state.pop() # the context-manager + + yielded = state.make_temp() + exitfn = state.make_temp(prefix='setup_with_exitfn') + state.append(inst, contextmanager=cm, exitfn=exitfn) + + # py39 doesn't have with-finally + if PYVERSION < (3, 9): + state.push_block( + state.make_block( + kind='WITH_FINALLY', + end=inst.get_jump_target(), + ) + ) + + state.push(exitfn) + state.push(yielded) + + state.push_block( + state.make_block( + kind='WITH', + end=inst.get_jump_target(), + ) + ) + # Forces a new block + state.fork(pc=inst.next) + + def _setup_try(self, kind, state, next, end): + handler_block = state.make_block( + kind=kind, + end=None, + reset_stack=False, + ) + # Forces a new block + # Fork to the body of the finally + state.fork( + pc=next, + extra_block=state.make_block( + kind='TRY', + end=end, + reset_stack=False, + handler=handler_block, + ) + ) + + def op_SETUP_EXCEPT(self, state, inst): + # Opcode removed since py3.8 + state.append(inst) + self._setup_try( + 'EXCEPT', state, next=inst.next, end=inst.get_jump_target(), + ) + + def op_SETUP_FINALLY(self, state, inst): + state.append(inst) + self._setup_try( + 'FINALLY', state, next=inst.next, end=inst.get_jump_target(), + ) + + def op_POP_EXCEPT(self, state, inst): + blk = state.pop_block() + if blk['kind'] not in {BlockKind('EXCEPT'), BlockKind('FINALLY')}: + raise UnsupportedError( + "POP_EXCEPT got an unexpected block: {}".format(blk['kind']), + loc=self.get_debug_loc(inst.lineno), + ) + state.pop() + state.pop() + state.pop() + # Forces a new block + state.fork(pc=inst.next) + + def op_POP_BLOCK(self, state, inst): + blk = state.pop_block() + if blk['kind'] == BlockKind('TRY'): + state.append(inst, kind='try') + elif blk['kind'] == BlockKind('WITH'): + state.append(inst, kind='with') + state.fork(pc=inst.next) + + def op_BINARY_SUBSCR(self, state, inst): + index = state.pop() + target = state.pop() + res = state.make_temp() + state.append(inst, index=index, target=target, res=res) + state.push(res) + + def op_STORE_SUBSCR(self, state, inst): + index = state.pop() + target = state.pop() + value = state.pop() + state.append(inst, target=target, index=index, value=value) + + def op_DELETE_SUBSCR(self, state, inst): + index = state.pop() + target = state.pop() + state.append(inst, target=target, index=index) + + def op_CALL_FUNCTION(self, state, inst): + narg = inst.arg + args = list(reversed([state.pop() for _ in range(narg)])) + func = state.pop() + + res = state.make_temp() + state.append(inst, func=func, args=args, res=res) + state.push(res) + + def op_CALL_FUNCTION_KW(self, state, inst): + narg = inst.arg + names = state.pop() # tuple of names + args = list(reversed([state.pop() for _ in range(narg)])) + func = state.pop() + + res = state.make_temp() + state.append(inst, func=func, args=args, names=names, res=res) + state.push(res) + + def op_CALL_FUNCTION_EX(self, state, inst): + if inst.arg & 1 and PYVERSION != (3, 10): + errmsg = "CALL_FUNCTION_EX with **kwargs not supported" + raise UnsupportedError(errmsg) + if inst.arg & 1: + varkwarg = state.pop() + else: + varkwarg = None + vararg = state.pop() + func = state.pop() + res = state.make_temp() + state.append(inst, func=func, vararg=vararg, varkwarg=varkwarg, res=res) + state.push(res) + + def _dup_topx(self, state, inst, count): + orig = [state.pop() for _ in range(count)] + orig.reverse() + # We need to actually create new temporaries if we want the + # IR optimization pass to work correctly (see issue #580) + duped = [state.make_temp() for _ in range(count)] + state.append(inst, orig=orig, duped=duped) + for val in orig: + state.push(val) + for val in duped: + state.push(val) + + def op_DUP_TOPX(self, state, inst): + count = inst.arg + assert 1 <= count <= 5, "Invalid DUP_TOPX count" + self._dup_topx(state, inst, count) + + def op_DUP_TOP(self, state, inst): + self._dup_topx(state, inst, count=1) + + def op_DUP_TOP_TWO(self, state, inst): + self._dup_topx(state, inst, count=2) + + def op_ROT_TWO(self, state, inst): + first = state.pop() + second = state.pop() + state.push(first) + state.push(second) + + def op_ROT_THREE(self, state, inst): + first = state.pop() + second = state.pop() + third = state.pop() + state.push(first) + state.push(third) + state.push(second) + + def op_ROT_FOUR(self, state, inst): + first = state.pop() + second = state.pop() + third = state.pop() + forth = state.pop() + state.push(first) + state.push(forth) + state.push(third) + state.push(second) + + def op_UNPACK_SEQUENCE(self, state, inst): + count = inst.arg + iterable = state.pop() + stores = [state.make_temp() for _ in range(count)] + tupleobj = state.make_temp() + state.append(inst, iterable=iterable, stores=stores, tupleobj=tupleobj) + for st in reversed(stores): + state.push(st) + + def op_BUILD_TUPLE(self, state, inst): + count = inst.arg + items = list(reversed([state.pop() for _ in range(count)])) + tup = state.make_temp() + state.append(inst, items=items, res=tup) + state.push(tup) + + def _build_tuple_unpack(self, state, inst): + # Builds tuple from other tuples on the stack + tuples = list(reversed([state.pop() for _ in range(inst.arg)])) + temps = [state.make_temp() for _ in range(len(tuples) - 1)] + + # if the unpack is assign-like, e.g. x = (*y,), it needs handling + # differently. + is_assign = len(tuples) == 1 + if is_assign: + temps = [state.make_temp(),] + + state.append(inst, tuples=tuples, temps=temps, is_assign=is_assign) + # The result is in the last temp var + state.push(temps[-1]) + + def op_BUILD_TUPLE_UNPACK_WITH_CALL(self, state, inst): + # just unpack the input tuple, call inst will be handled afterwards + self._build_tuple_unpack(state, inst) + + def op_BUILD_TUPLE_UNPACK(self, state, inst): + self._build_tuple_unpack(state, inst) + + def op_LIST_TO_TUPLE(self, state, inst): + # "Pops a list from the stack and pushes a tuple containing the same + # values." + tos = state.pop() + res = state.make_temp() # new tuple var + state.append(inst, const_list=tos, res=res) + state.push(res) + + def op_BUILD_CONST_KEY_MAP(self, state, inst): + keys = state.pop() + vals = list(reversed([state.pop() for _ in range(inst.arg)])) + keytmps = [state.make_temp() for _ in range(inst.arg)] + res = state.make_temp() + state.append(inst, keys=keys, keytmps=keytmps, values=vals, res=res) + state.push(res) + + def op_BUILD_LIST(self, state, inst): + count = inst.arg + items = list(reversed([state.pop() for _ in range(count)])) + lst = state.make_temp() + state.append(inst, items=items, res=lst) + state.push(lst) + + def op_LIST_APPEND(self, state, inst): + value = state.pop() + index = inst.arg + target = state.peek(index) + appendvar = state.make_temp() + res = state.make_temp() + state.append(inst, target=target, value=value, appendvar=appendvar, + res=res) + + def op_LIST_EXTEND(self, state, inst): + value = state.pop() + index = inst.arg + target = state.peek(index) + extendvar = state.make_temp() + res = state.make_temp() + state.append(inst, target=target, value=value, extendvar=extendvar, + res=res) + + def op_BUILD_MAP(self, state, inst): + dct = state.make_temp() + count = inst.arg + items = [] + # In 3.5+, BUILD_MAP takes pairs from the stack + for i in range(count): + v, k = state.pop(), state.pop() + items.append((k, v)) + state.append(inst, items=items[::-1], size=count, res=dct) + state.push(dct) + + def op_MAP_ADD(self, state, inst): + # NOTE: https://docs.python.org/3/library/dis.html#opcode-MAP_ADD + # Python >= 3.8: TOS and TOS1 are value and key respectively + # Python < 3.8: TOS and TOS1 are key and value respectively + TOS = state.pop() + TOS1 = state.pop() + key, value = (TOS, TOS1) if PYVERSION < (3, 8) else (TOS1, TOS) + index = inst.arg + target = state.peek(index) + setitemvar = state.make_temp() + res = state.make_temp() + state.append(inst, target=target, key=key, value=value, + setitemvar=setitemvar, res=res) + + def op_BUILD_SET(self, state, inst): + count = inst.arg + # Note: related python bug http://bugs.python.org/issue26020 + items = list(reversed([state.pop() for _ in range(count)])) + res = state.make_temp() + state.append(inst, items=items, res=res) + state.push(res) + + def op_SET_UPDATE(self, state, inst): + value = state.pop() + index = inst.arg + target = state.peek(index) + updatevar = state.make_temp() + res = state.make_temp() + state.append(inst, target=target, value=value, updatevar=updatevar, + res=res) + + def op_DICT_UPDATE(self, state, inst): + value = state.pop() + index = inst.arg + target = state.peek(index) + updatevar = state.make_temp() + res = state.make_temp() + state.append(inst, target=target, value=value, updatevar=updatevar, + res=res) + + def op_GET_ITER(self, state, inst): + value = state.pop() + res = state.make_temp() + state.append(inst, value=value, res=res) + state.push(res) + + def op_FOR_ITER(self, state, inst): + iterator = state.get_tos() + pair = state.make_temp() + indval = state.make_temp() + pred = state.make_temp() + state.append(inst, iterator=iterator, pair=pair, indval=indval, + pred=pred) + state.push(indval) + end = inst.get_jump_target() + state.fork(pc=end, npop=2) + state.fork(pc=inst.next) + + def op_GEN_START(self, state, inst): + """Pops TOS. If TOS was not None, raises an exception. The kind + operand corresponds to the type of generator or coroutine and + determines the error message. The legal kinds are 0 for generator, + 1 for coroutine, and 2 for async generator. + + New in version 3.10. + """ + # no-op in Numba + pass + + def _unaryop(self, state, inst): + val = state.pop() + res = state.make_temp() + state.append(inst, value=val, res=res) + state.push(res) + + op_UNARY_NEGATIVE = _unaryop + op_UNARY_POSITIVE = _unaryop + op_UNARY_NOT = _unaryop + op_UNARY_INVERT = _unaryop + + def _binaryop(self, state, inst): + rhs = state.pop() + lhs = state.pop() + res = state.make_temp() + state.append(inst, lhs=lhs, rhs=rhs, res=res) + state.push(res) + + op_COMPARE_OP = _binaryop + op_IS_OP = _binaryop + op_CONTAINS_OP = _binaryop + + op_INPLACE_ADD = _binaryop + op_INPLACE_SUBTRACT = _binaryop + op_INPLACE_MULTIPLY = _binaryop + op_INPLACE_DIVIDE = _binaryop + op_INPLACE_TRUE_DIVIDE = _binaryop + op_INPLACE_FLOOR_DIVIDE = _binaryop + op_INPLACE_MODULO = _binaryop + op_INPLACE_POWER = _binaryop + op_INPLACE_MATRIX_MULTIPLY = _binaryop + + op_INPLACE_LSHIFT = _binaryop + op_INPLACE_RSHIFT = _binaryop + op_INPLACE_AND = _binaryop + op_INPLACE_OR = _binaryop + op_INPLACE_XOR = _binaryop + + op_BINARY_ADD = _binaryop + op_BINARY_SUBTRACT = _binaryop + op_BINARY_MULTIPLY = _binaryop + op_BINARY_DIVIDE = _binaryop + op_BINARY_TRUE_DIVIDE = _binaryop + op_BINARY_FLOOR_DIVIDE = _binaryop + op_BINARY_MODULO = _binaryop + op_BINARY_POWER = _binaryop + op_BINARY_MATRIX_MULTIPLY = _binaryop + + op_BINARY_LSHIFT = _binaryop + op_BINARY_RSHIFT = _binaryop + op_BINARY_AND = _binaryop + op_BINARY_OR = _binaryop + op_BINARY_XOR = _binaryop + + def op_MAKE_FUNCTION(self, state, inst, MAKE_CLOSURE=False): + name = state.pop() + code = state.pop() + closure = annotations = kwdefaults = defaults = None + if PYVERSION < (3, 6): + num_posdefaults = inst.arg & 0xFF + num_kwdefaults = (inst.arg >> 8) & 0xFF + num_annotations = (inst.arg >> 16) & 0x7FFF + if MAKE_CLOSURE: + closure = state.pop() + if num_annotations > 0: + annotations = state.pop() + if num_kwdefaults > 0: + kwdefaults = [] + for i in range(num_kwdefaults): + v = state.pop() + k = state.pop() + kwdefaults.append((k, v)) + kwdefaults = tuple(kwdefaults) + if num_posdefaults: + defaults = [] + for i in range(num_posdefaults): + defaults.append(state.pop()) + defaults = tuple(defaults) + else: + if inst.arg & 0x8: + closure = state.pop() + if inst.arg & 0x4: + annotations = state.pop() + if inst.arg & 0x2: + kwdefaults = state.pop() + if inst.arg & 0x1: + defaults = state.pop() + res = state.make_temp() + state.append( + inst, + name=name, + code=code, + closure=closure, + annotations=annotations, + kwdefaults=kwdefaults, + defaults=defaults, + res=res, + ) + state.push(res) + + def op_MAKE_CLOSURE(self, state, inst): + self.op_MAKE_FUNCTION(state, inst, MAKE_CLOSURE=True) + + def op_LOAD_CLOSURE(self, state, inst): + res = state.make_temp() + state.append(inst, res=res) + state.push(res) + + def op_LOAD_ASSERTION_ERROR(self, state, inst): + res = state.make_temp("assertion_error") + state.append(inst, res=res) + state.push(res) + + def op_JUMP_IF_NOT_EXC_MATCH(self, state, inst): + # Tests whether the second value on the stack is an exception matching + # TOS, and jumps if it is not. Pops two values from the stack. + pred = state.make_temp("predicate") + tos = state.pop() + tos1 = state.pop() + state.append(inst, pred=pred, tos=tos, tos1=tos1) + state.fork(pc=inst.next) + state.fork(pc=inst.get_jump_target()) + + def op_RERAISE(self, state, inst): + # This isn't handled, but the state is set up anyway + exc = state.pop() + state.append(inst, exc=exc) + state.terminate() + + # NOTE: Please see notes in `interpreter.py` surrounding the implementation + # of LOAD_METHOD and CALL_METHOD. + + def op_LOAD_METHOD(self, state, inst): + self.op_LOAD_ATTR(state, inst) + + def op_CALL_METHOD(self, state, inst): + self.op_CALL_FUNCTION(state, inst) + + +@total_ordering +class State(object): + """State of the trace + """ + def __init__(self, bytecode, pc, nstack, blockstack): + """ + Parameters + ---------- + bytecode : numba.bytecode.ByteCode + function bytecode + pc : int + program counter + nstack : int + stackdepth at entry + blockstack : Sequence[Dict] + A sequence of dictionary denoting entries on the blockstack. + """ + self._bytecode = bytecode + self._pc_initial = pc + self._pc = pc + self._nstack_initial = nstack + self._stack = [] + self._blockstack_initial = tuple(blockstack) + self._blockstack = list(blockstack) + self._temp_registers = [] + self._insts = [] + self._outedges = [] + self._terminated = False + self._phis = {} + self._outgoing_phis = UniqueDict() + self._used_regs = set() + for i in range(nstack): + phi = self.make_temp("phi") + self._phis[phi] = i + self.push(phi) + + def __repr__(self): + return "State(pc_initial={} nstack_initial={})".format( + self._pc_initial, self._nstack_initial + ) + + def get_identity(self): + return (self._pc_initial, self._nstack_initial) + + def __hash__(self): + return hash(self.get_identity()) + + def __lt__(self, other): + return self.get_identity() < other.get_identity() + + def __eq__(self, other): + return self.get_identity() == other.get_identity() + + @property + def pc_initial(self): + """The starting bytecode offset of this State. + The PC given to the constructor. + """ + return self._pc_initial + + @property + def instructions(self): + """The list of instructions information as a 2-tuple of + ``(pc : int, register_map : Dict)`` + """ + return self._insts + + @property + def outgoing_edges(self): + """The list of outgoing edges. + + Returns + ------- + edges : List[State] + """ + return self._outedges + + @property + def outgoing_phis(self): + """The dictionary of outgoing phi nodes. + + The keys are the name of the PHI nodes. + The values are the outgoing states. + """ + return self._outgoing_phis + + @property + def blockstack_initial(self): + """A copy of the initial state of the blockstack + """ + return self._blockstack_initial + + @property + def stack_depth(self): + """The current size of the stack + + Returns + ------- + res : int + """ + return len(self._stack) + + def find_initial_try_block(self): + """Find the initial *try* block. + """ + for blk in reversed(self._blockstack_initial): + if blk['kind'] == BlockKind('TRY'): + return blk + + def has_terminated(self): + return self._terminated + + def get_inst(self): + return self._bytecode[self._pc] + + def advance_pc(self): + inst = self.get_inst() + self._pc = inst.next + + def make_temp(self, prefix=""): + if not prefix: + name = "${prefix}{offset}{opname}.{tempct}".format( + prefix=prefix, + offset=self._pc, + opname=self.get_inst().opname.lower(), + tempct=len(self._temp_registers), + ) + else: + name = "${prefix}{offset}.{tempct}".format( + prefix=prefix, + offset=self._pc, + tempct=len(self._temp_registers), + ) + + self._temp_registers.append(name) + return name + + def append(self, inst, **kwargs): + """Append new inst""" + self._insts.append((inst.offset, kwargs)) + self._used_regs |= set(_flatten_inst_regs(kwargs.values())) + + def get_tos(self): + return self.peek(1) + + def peek(self, k): + """Return the k'th element on the stack + """ + return self._stack[-k] + + def push(self, item): + """Push to stack""" + self._stack.append(item) + + def pop(self): + """Pop the stack""" + return self._stack.pop() + + def push_block(self, synblk): + """Push a block to blockstack + """ + assert 'stack_depth' in synblk + self._blockstack.append(synblk) + + def reset_stack(self, depth): + """Reset the stack to the given stack depth. + Returning the popped items. + """ + self._stack, popped = self._stack[:depth], self._stack[depth:] + return popped + + def make_block(self, kind, end, reset_stack=True, handler=None): + """Make a new block + """ + d = { + 'kind': BlockKind(kind), + 'end': end, + 'entry_stack': len(self._stack), + } + if reset_stack: + d['stack_depth'] = len(self._stack) + else: + d['stack_depth'] = None + d['handler'] = handler + return d + + def pop_block(self): + """Pop a block and unwind the stack + """ + b = self._blockstack.pop() + self.reset_stack(b['stack_depth']) + return b + + def pop_block_and_above(self, blk): + """Find *blk* in the blockstack and remove it and all blocks above it + from the stack. + """ + idx = self._blockstack.index(blk) + assert 0 <= idx < len(self._blockstack) + self._blockstack = self._blockstack[:idx] + + def get_top_block(self, kind): + """Find the first block that matches *kind* + """ + kind = BlockKind(kind) + for bs in reversed(self._blockstack): + if bs['kind'] == kind: + return bs + + def has_active_try(self): + """Returns a boolean indicating if the top-block is a *try* block + """ + return self.get_top_block('TRY') is not None + + def get_varname(self, inst): + """Get referenced variable name from the oparg + """ + return self._bytecode.co_varnames[inst.arg] + + def terminate(self): + """Mark block as terminated + """ + self._terminated = True + + def fork(self, pc, npop=0, npush=0, extra_block=None): + """Fork the state + """ + # Handle changes on the stack + stack = list(self._stack) + if npop: + assert 0 <= npop <= len(self._stack) + nstack = len(self._stack) - npop + stack = stack[:nstack] + if npush: + assert 0 <= npush + for i in range(npush): + stack.append(self.make_temp()) + # Handle changes on the blockstack + blockstack = list(self._blockstack) + if extra_block: + blockstack.append(extra_block) + self._outedges.append(Edge( + pc=pc, stack=tuple(stack), npush=npush, + blockstack=tuple(blockstack), + )) + self.terminate() + + def split_new_block(self): + """Split the state + """ + self.fork(pc=self._pc) + + def get_outgoing_states(self): + """Get states for each outgoing edges + """ + # Should only call once + assert not self._outgoing_phis + ret = [] + for edge in self._outedges: + state = State(bytecode=self._bytecode, pc=edge.pc, + nstack=len(edge.stack), blockstack=edge.blockstack) + ret.append(state) + # Map outgoing_phis + for phi, i in state._phis.items(): + self._outgoing_phis[phi] = edge.stack[i] + return ret + + def get_outgoing_edgepushed(self): + """ + Returns + ------- + Dict[int, int] + where keys are the PC + values are the edge-pushed stack values + """ + + return {edge.pc: tuple(edge.stack[-edge.npush:]) + for edge in self._outedges} + + +Edge = namedtuple("Edge", ["pc", "stack", "blockstack", "npush"]) + + +class AdaptDFA(object): + """Adapt Flow to the old DFA class expected by Interpreter + """ + def __init__(self, flow): + self._flow = flow + + @property + def infos(self): + return self._flow.block_infos + + +AdaptBlockInfo = namedtuple( + "AdaptBlockInfo", + ["insts", "outgoing_phis", "blockstack", "active_try_block", + "outgoing_edgepushed"], +) + + +def adapt_state_infos(state): + return AdaptBlockInfo( + insts=tuple(state.instructions), + outgoing_phis=state.outgoing_phis, + blockstack=state.blockstack_initial, + active_try_block=state.find_initial_try_block(), + outgoing_edgepushed=state.get_outgoing_edgepushed(), + ) + + +def _flatten_inst_regs(iterable): + """Flatten an iterable of registers used in an instruction + """ + for item in iterable: + if isinstance(item, str): + yield item + elif isinstance(item, (tuple, list)): + for x in _flatten_inst_regs(item): + yield x + + +class AdaptCFA(object): + """Adapt Flow to the old CFA class expected by Interpreter + """ + def __init__(self, flow): + self._flow = flow + self._blocks = {} + for offset, blockinfo in flow.block_infos.items(): + self._blocks[offset] = AdaptCFBlock(blockinfo, offset) + backbone = self._flow.cfgraph.backbone() + + graph = flow.cfgraph + # Find backbone + backbone = graph.backbone() + # Filter out in loop blocks (Assuming no other cyclic control blocks) + # This is to unavoid variables defined in loops being considered as + # function scope. + inloopblocks = set() + for b in self.blocks.keys(): + if graph.in_loops(b): + inloopblocks.add(b) + self._backbone = backbone - inloopblocks + + @property + def graph(self): + return self._flow.cfgraph + + @property + def backbone(self): + return self._backbone + + @property + def blocks(self): + return self._blocks + + def iterliveblocks(self): + for b in sorted(self.blocks): + yield self.blocks[b] + + def dump(self): + self._flow.cfgraph.dump() + + +class AdaptCFBlock(object): + def __init__(self, blockinfo, offset): + self.offset = offset + self.body = tuple(i for i, _ in blockinfo.insts) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/caching.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/caching.py new file mode 100644 index 000000000..4339f5e30 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/caching.py @@ -0,0 +1,731 @@ +""" +Caching mechanism for compiled functions. +""" + + +from abc import ABCMeta, abstractmethod, abstractproperty +import contextlib +import errno +import hashlib +import inspect +import itertools +import os +import pickle +import sys +import tempfile +import uuid +import warnings + +from numba.misc.appdirs import AppDirs + +import numba +from numba.core.errors import NumbaWarning +from numba.core.base import BaseContext +from numba.core.codegen import CodeLibrary +from numba.core.compiler import CompileResult +from numba.core import config, compiler +from numba.core.serialize import dumps + + +def _cache_log(msg, *args): + if config.DEBUG_CACHE: + msg = msg % args + print(msg) + + +class _Cache(metaclass=ABCMeta): + + @abstractproperty + def cache_path(self): + """ + The base filesystem path of this cache (for example its root folder). + """ + + @abstractmethod + def load_overload(self, sig, target_context): + """ + Load an overload for the given signature using the target context. + The saved object must be returned if successful, None if not found + in the cache. + """ + + @abstractmethod + def save_overload(self, sig, data): + """ + Save the overload for the given signature. + """ + + @abstractmethod + def enable(self): + """ + Enable the cache. + """ + + @abstractmethod + def disable(self): + """ + Disable the cache. + """ + + @abstractmethod + def flush(self): + """ + Flush the cache. + """ + + +class NullCache(_Cache): + @property + def cache_path(self): + return None + + def load_overload(self, sig, target_context): + pass + + def save_overload(self, sig, cres): + pass + + def enable(self): + pass + + def disable(self): + pass + + def flush(self): + pass + + +class _CacheLocator(metaclass=ABCMeta): + """ + A filesystem locator for caching a given function. + """ + + def ensure_cache_path(self): + path = self.get_cache_path() + os.makedirs(path, exist_ok=True) + # Ensure the directory is writable by trying to write a temporary file + tempfile.TemporaryFile(dir=path).close() + + @abstractmethod + def get_cache_path(self): + """ + Return the directory the function is cached in. + """ + + @abstractmethod + def get_source_stamp(self): + """ + Get a timestamp representing the source code's freshness. + Can return any picklable Python object. + """ + + @abstractmethod + def get_disambiguator(self): + """ + Get a string disambiguator for this locator's function. + It should allow disambiguating different but similarly-named functions. + """ + + @classmethod + def from_function(cls, py_func, py_file): + """ + Create a locator instance for the given function located in the + given file. + """ + raise NotImplementedError + + @classmethod + def get_suitable_cache_subpath(cls, py_file): + """Given the Python file path, compute a suitable path inside the + cache directory. + + This will reduce a file path that is too long, which can be a problem + on some operating system (i.e. Windows 7). + """ + path = os.path.abspath(py_file) + subpath = os.path.dirname(path) + parentdir = os.path.split(subpath)[-1] + # Use SHA1 to reduce path length. + # Note: windows doesn't like long path. + hashed = hashlib.sha1(subpath.encode()).hexdigest() + # Retain parent directory name for easier debugging + return '_'.join([parentdir, hashed]) + + +class _SourceFileBackedLocatorMixin(object): + """ + A cache locator mixin for functions which are backed by a well-known + Python source file. + """ + + def get_source_stamp(self): + if getattr(sys, 'frozen', False): + st = os.stat(sys.executable) + else: + st = os.stat(self._py_file) + # We use both timestamp and size as some filesystems only have second + # granularity. + return st.st_mtime, st.st_size + + def get_disambiguator(self): + return str(self._lineno) + + @classmethod + def from_function(cls, py_func, py_file): + if not os.path.exists(py_file): + # Perhaps a placeholder (e.g. "") + return + self = cls(py_func, py_file) + try: + self.ensure_cache_path() + except OSError: + # Cannot ensure the cache directory exists or is writable + return + return self + + +class _UserProvidedCacheLocator(_SourceFileBackedLocatorMixin, _CacheLocator): + """ + A locator that always point to the user provided directory in + `numba.config.CACHE_DIR` + """ + def __init__(self, py_func, py_file): + self._py_file = py_file + self._lineno = py_func.__code__.co_firstlineno + cache_subpath = self.get_suitable_cache_subpath(py_file) + self._cache_path = os.path.join(config.CACHE_DIR, cache_subpath) + + def get_cache_path(self): + return self._cache_path + + @classmethod + def from_function(cls, py_func, py_file): + if not config.CACHE_DIR: + return + parent = super(_UserProvidedCacheLocator, cls) + return parent.from_function(py_func, py_file) + + +class _InTreeCacheLocator(_SourceFileBackedLocatorMixin, _CacheLocator): + """ + A locator for functions backed by a regular Python module with a + writable __pycache__ directory. + """ + + def __init__(self, py_func, py_file): + self._py_file = py_file + self._lineno = py_func.__code__.co_firstlineno + self._cache_path = os.path.join(os.path.dirname(self._py_file), '__pycache__') + + def get_cache_path(self): + return self._cache_path + + +class _UserWideCacheLocator(_SourceFileBackedLocatorMixin, _CacheLocator): + """ + A locator for functions backed by a regular Python module or a + frozen executable, cached into a user-wide cache directory. + """ + + def __init__(self, py_func, py_file): + self._py_file = py_file + self._lineno = py_func.__code__.co_firstlineno + appdirs = AppDirs(appname="numba", appauthor=False) + cache_dir = appdirs.user_cache_dir + cache_subpath = self.get_suitable_cache_subpath(py_file) + self._cache_path = os.path.join(cache_dir, cache_subpath) + + def get_cache_path(self): + return self._cache_path + + @classmethod + def from_function(cls, py_func, py_file): + if not (os.path.exists(py_file) or getattr(sys, 'frozen', False)): + # Perhaps a placeholder (e.g. "") + # stop function exit if frozen, since it uses a temp placeholder + return + self = cls(py_func, py_file) + try: + self.ensure_cache_path() + except OSError: + # Cannot ensure the cache directory exists or is writable + return + return self + + +class _IPythonCacheLocator(_CacheLocator): + """ + A locator for functions entered at the IPython prompt (notebook or other). + """ + + def __init__(self, py_func, py_file): + self._py_file = py_file + # Note IPython enhances the linecache module to be able to + # inspect source code of functions defined on the interactive prompt. + source = inspect.getsource(py_func) + if isinstance(source, bytes): + self._bytes_source = source + else: + self._bytes_source = source.encode('utf-8') + + def get_cache_path(self): + # We could also use jupyter_core.paths.jupyter_runtime_dir() + # In both cases this is a user-wide directory, so we need to + # be careful when disambiguating if we don't want too many + # conflicts (see below). + try: + from IPython.paths import get_ipython_cache_dir + except ImportError: + # older IPython version + from IPython.utils.path import get_ipython_cache_dir + return os.path.join(get_ipython_cache_dir(), 'numba_cache') + + def get_source_stamp(self): + return hashlib.sha256(self._bytes_source).hexdigest() + + def get_disambiguator(self): + # Heuristic: we don't want too many variants being saved, but + # we don't want similar named functions (e.g. "f") to compete + # for the cache, so we hash the first two lines of the function + # source (usually this will be the @jit decorator + the function + # signature). + firstlines = b''.join(self._bytes_source.splitlines(True)[:2]) + return hashlib.sha256(firstlines).hexdigest()[:10] + + @classmethod + def from_function(cls, py_func, py_file): + if not ( + py_file.startswith("' can appear in the qualname (e.g. '') but + # are forbidden in Windows filenames + fixed_fullname = fullname.replace('<', '').replace('>', '') + fmt = '%s-%s.py%d%d%s' + return fmt % (fixed_fullname, self.locator.get_disambiguator(), + sys.version_info[0], sys.version_info[1], abiflags) + + @property + def filename_base(self): + return self._filename_base + + @property + def locator(self): + return self._locator + + @abstractmethod + def reduce(self, data): + "Returns the serialized form the data" + pass + + @abstractmethod + def rebuild(self, target_context, reduced_data): + "Returns the de-serialized form of the *reduced_data*" + pass + + @abstractmethod + def check_cachable(self, data): + "Returns True if the given data is cachable; otherwise, returns False." + pass + + +class CompileResultCacheImpl(CacheImpl): + """ + Implements the logic to cache CompileResult objects. + """ + + def reduce(self, cres): + """ + Returns a serialized CompileResult + """ + return cres._reduce() + + def rebuild(self, target_context, payload): + """ + Returns the unserialized CompileResult + """ + return compiler.CompileResult._rebuild(target_context, *payload) + + def check_cachable(self, cres): + """ + Check cachability of the given compile result. + """ + cannot_cache = None + if any(not x.can_cache for x in cres.lifted): + cannot_cache = "as it uses lifted code" + elif cres.library.has_dynamic_globals: + cannot_cache = ("as it uses dynamic globals " + "(such as ctypes pointers and large global arrays)") + if cannot_cache: + msg = ('Cannot cache compiled function "%s" %s' + % (cres.fndesc.qualname.split('.')[-1], cannot_cache)) + warnings.warn_explicit(msg, NumbaWarning, + self._locator._py_file, self._lineno) + return False + return True + + +class CodeLibraryCacheImpl(CacheImpl): + """ + Implements the logic to cache CodeLibrary objects. + """ + + _filename_prefix = None # must be overridden + + def reduce(self, codelib): + """ + Returns a serialized CodeLibrary + """ + return codelib.serialize_using_object_code() + + def rebuild(self, target_context, payload): + """ + Returns the unserialized CodeLibrary + """ + return target_context.codegen().unserialize_library(payload) + + def check_cachable(self, codelib): + """ + Check cachability of the given CodeLibrary. + """ + return not codelib.has_dynamic_globals + + def get_filename_base(self, fullname, abiflags): + parent = super(CodeLibraryCacheImpl, self) + res = parent.get_filename_base(fullname, abiflags) + return '-'.join([self._filename_prefix, res]) + + +class IndexDataCacheFile(object): + """ + Implements the logic for the index file and data file used by a cache. + """ + def __init__(self, cache_path, filename_base, source_stamp): + self._cache_path = cache_path + self._index_name = '%s.nbi' % (filename_base,) + self._index_path = os.path.join(self._cache_path, self._index_name) + self._data_name_pattern = '%s.{number:d}.nbc' % (filename_base,) + self._source_stamp = source_stamp + self._version = numba.__version__ + + def flush(self): + self._save_index({}) + + def save(self, key, data): + """ + Save a new cache entry with *key* and *data*. + """ + overloads = self._load_index() + try: + # If key already exists, we will overwrite the file + data_name = overloads[key] + except KeyError: + # Find an available name for the data file + existing = set(overloads.values()) + for i in itertools.count(1): + data_name = self._data_name(i) + if data_name not in existing: + break + overloads[key] = data_name + self._save_index(overloads) + self._save_data(data_name, data) + + def load(self, key): + """ + Load a cache entry with *key*. + """ + overloads = self._load_index() + data_name = overloads.get(key) + if data_name is None: + return + try: + return self._load_data(data_name) + except OSError: + # File could have been removed while the index still refers it. + return + + def _load_index(self): + """ + Load the cache index and return it as a dictionary (possibly + empty if cache is empty or obsolete). + """ + try: + with open(self._index_path, "rb") as f: + version = pickle.load(f) + data = f.read() + except FileNotFoundError: + # Index doesn't exist yet? + return {} + if version != self._version: + # This is another version. Avoid trying to unpickling the + # rest of the stream, as that may fail. + return {} + stamp, overloads = pickle.loads(data) + _cache_log("[cache] index loaded from %r", self._index_path) + if stamp != self._source_stamp: + # Cache is not fresh. Stale data files will be eventually + # overwritten, since they are numbered in incrementing order. + return {} + else: + return overloads + + def _save_index(self, overloads): + data = self._source_stamp, overloads + data = self._dump(data) + with self._open_for_write(self._index_path) as f: + pickle.dump(self._version, f, protocol=-1) + f.write(data) + _cache_log("[cache] index saved to %r", self._index_path) + + def _load_data(self, name): + path = self._data_path(name) + with open(path, "rb") as f: + data = f.read() + tup = pickle.loads(data) + _cache_log("[cache] data loaded from %r", path) + return tup + + def _save_data(self, name, data): + data = self._dump(data) + path = self._data_path(name) + with self._open_for_write(path) as f: + f.write(data) + _cache_log("[cache] data saved to %r", path) + + def _data_name(self, number): + return self._data_name_pattern.format(number=number) + + def _data_path(self, name): + return os.path.join(self._cache_path, name) + + def _dump(self, obj): + return dumps(obj) + + @contextlib.contextmanager + def _open_for_write(self, filepath): + """ + Open *filepath* for writing in a race condition-free way (hopefully). + uuid4 is used to try and avoid name collisions on a shared filesystem. + """ + uid = uuid.uuid4().hex[:16] # avoid long paths + tmpname = '%s.tmp.%s' % (filepath, uid) + try: + with open(tmpname, "wb") as f: + yield f + os.replace(tmpname, filepath) + except Exception: + # In case of error, remove dangling tmp file + try: + os.unlink(tmpname) + except OSError: + pass + raise + + +class Cache(_Cache): + """ + A per-function compilation cache. The cache saves data in separate + data files and maintains information in an index file. + + There is one index file per function and Python version + ("function_name-.pyXY.nbi") which contains a mapping of + signatures and architectures to data files. + It is prefixed by a versioning key and a timestamp of the Python source + file containing the function. + + There is one data file ("function_name-.pyXY..nbc") + per function, function signature, target architecture and Python version. + + Separate index and data files per Python version avoid pickle + compatibility problems. + + Note: + This contains the driver logic only. The core logic is provided + by a subclass of ``CacheImpl`` specified as *_impl_class* in the subclass. + """ + + # The following class variables must be overridden by subclass. + _impl_class = None + + def __init__(self, py_func): + self._name = repr(py_func) + self._py_func = py_func + self._impl = self._impl_class(py_func) + self._cache_path = self._impl.locator.get_cache_path() + # This may be a bit strict but avoids us maintaining a magic number + source_stamp = self._impl.locator.get_source_stamp() + filename_base = self._impl.filename_base + self._cache_file = IndexDataCacheFile(cache_path=self._cache_path, + filename_base=filename_base, + source_stamp=source_stamp) + self.enable() + + def __repr__(self): + return "<%s py_func=%r>" % (self.__class__.__name__, self._name) + + @property + def cache_path(self): + return self._cache_path + + def enable(self): + self._enabled = True + + def disable(self): + self._enabled = False + + def flush(self): + self._cache_file.flush() + + def load_overload(self, sig, target_context): + """ + Load and recreate the cached object for the given signature, + using the *target_context*. + """ + # Refresh the context to ensure it is initialized + target_context.refresh() + with self._guard_against_spurious_io_errors(): + return self._load_overload(sig, target_context) + # None returned if the `with` block swallows an exception + + def _load_overload(self, sig, target_context): + if not self._enabled: + return + key = self._index_key(sig, target_context.codegen()) + data = self._cache_file.load(key) + if data is not None: + data = self._impl.rebuild(target_context, data) + return data + + def save_overload(self, sig, data): + """ + Save the data for the given signature in the cache. + """ + with self._guard_against_spurious_io_errors(): + self._save_overload(sig, data) + + def _save_overload(self, sig, data): + if not self._enabled: + return + if not self._impl.check_cachable(data): + return + self._impl.locator.ensure_cache_path() + key = self._index_key(sig, data.codegen) + data = self._impl.reduce(data) + self._cache_file.save(key, data) + + @contextlib.contextmanager + def _guard_against_spurious_io_errors(self): + if os.name == 'nt': + # Guard against permission errors due to accessing the file + # from several processes (see #2028) + try: + yield + except OSError as e: + if e.errno != errno.EACCES: + raise + else: + # No such conditions under non-Windows OSes + yield + + def _index_key(self, sig, codegen): + """ + Compute index key for the given signature and codegen. + It includes a description of the OS, target architecture and hashes of + the bytecode for the function and, if the function has a __closure__, + a hash of the cell_contents. + """ + codebytes = self._py_func.__code__.co_code + if self._py_func.__closure__ is not None: + cvars = tuple([x.cell_contents for x in self._py_func.__closure__]) + # Note: cloudpickle serializes a function differently depending + # on how the process is launched; e.g. multiprocessing.Process + cvarbytes = dumps(cvars) + else: + cvarbytes = b'' + + hasher = lambda x: hashlib.sha256(x).hexdigest() + return (sig, codegen.magic_tuple(), (hasher(codebytes), + hasher(cvarbytes),)) + + +class FunctionCache(Cache): + """ + Implements Cache that saves and loads CompileResult objects. + """ + _impl_class = CompileResultCacheImpl + + +# Remember used cache filename prefixes. +_lib_cache_prefixes = set(['']) + + +def make_library_cache(prefix): + """ + Create a Cache class for additional compilation features to cache their + result for reuse. The cache is saved in filename pattern like + in ``FunctionCache`` but with additional *prefix* as specified. + """ + # avoid cache prefix reuse + assert prefix not in _lib_cache_prefixes + _lib_cache_prefixes.add(prefix) + + class CustomCodeLibraryCacheImpl(CodeLibraryCacheImpl): + _filename_prefix = prefix + + class LibraryCache(Cache): + """ + Implements Cache that saves and loads CodeLibrary objects for additional + feature for the specified python function. + """ + _impl_class = CustomCodeLibraryCacheImpl + + return LibraryCache + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/callconv.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/callconv.py new file mode 100644 index 000000000..9347b92b1 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/callconv.py @@ -0,0 +1,650 @@ +""" +Calling conventions for Numba-compiled functions. +""" + +from collections import namedtuple +from collections.abc import Iterable +import itertools + +from llvmlite import ir + +from numba.core import types, cgutils +from numba.core.base import PYOBJECT, GENERIC_POINTER + + +TryStatus = namedtuple('TryStatus', ['in_try', 'excinfo']) + + +Status = namedtuple("Status", + ("code", + # If the function returned ok (a value or None) + "is_ok", + # If the function returned None + "is_none", + # If the function errored out (== not is_ok) + "is_error", + # If the generator exited with StopIteration + "is_stop_iteration", + # If the function errored with an already set exception + "is_python_exc", + # If the function errored with a user exception + "is_user_exc", + # The pointer to the exception info structure (for user exceptions) + "excinfoptr", + )) + +int32_t = ir.IntType(32) +errcode_t = int32_t + +def _const_int(code): + return ir.Constant(errcode_t, code) + +RETCODE_OK = _const_int(0) +RETCODE_EXC = _const_int(-1) +RETCODE_NONE = _const_int(-2) +# StopIteration +RETCODE_STOPIT = _const_int(-3) + +FIRST_USEREXC = 1 + +RETCODE_USEREXC = _const_int(FIRST_USEREXC) + + + + +class BaseCallConv(object): + + def __init__(self, context): + self.context = context + + def return_optional_value(self, builder, retty, valty, value): + if valty == types.none: + # Value is none + self.return_native_none(builder) + + elif retty == valty: + # Value is an optional, need a runtime switch + optval = self.context.make_helper(builder, retty, value=value) + + validbit = cgutils.as_bool_bit(builder, optval.valid) + with builder.if_then(validbit): + retval = self.context.get_return_value(builder, retty.type, + optval.data) + self.return_value(builder, retval) + + self.return_native_none(builder) + + elif not isinstance(valty, types.Optional): + # Value is not an optional, need a cast + if valty != retty.type: + value = self.context.cast(builder, value, fromty=valty, + toty=retty.type) + retval = self.context.get_return_value(builder, retty.type, value) + self.return_value(builder, retval) + + else: + raise NotImplementedError("returning {0} for {1}".format(valty, + retty)) + + def return_native_none(self, builder): + self._return_errcode_raw(builder, RETCODE_NONE) + + def return_exc(self, builder): + self._return_errcode_raw(builder, RETCODE_EXC, mark_exc=True) + + def return_stop_iteration(self, builder): + self._return_errcode_raw(builder, RETCODE_STOPIT) + + def get_return_type(self, ty): + """ + Get the actual type of the return argument for Numba type *ty*. + """ + restype = self.context.data_model_manager[ty].get_return_type() + return restype.as_pointer() + + def init_call_helper(self, builder): + """ + Initialize and return a call helper object for the given builder. + """ + ch = self._make_call_helper(builder) + builder.__call_helper = ch + return ch + + def _get_call_helper(self, builder): + return builder.__call_helper + + def raise_error(self, builder, api, status): + """ + Given a non-ok *status*, raise the corresponding Python exception. + """ + bbend = builder.function.append_basic_block() + + with builder.if_then(status.is_user_exc): + # Unserialize user exception. + # Make sure another error may not interfere. + api.err_clear() + exc = api.unserialize(status.excinfoptr) + with cgutils.if_likely(builder, + cgutils.is_not_null(builder, exc)): + api.raise_object(exc) # steals ref + builder.branch(bbend) + + with builder.if_then(status.is_stop_iteration): + api.err_set_none("PyExc_StopIteration") + builder.branch(bbend) + + with builder.if_then(status.is_python_exc): + # Error already raised => nothing to do + builder.branch(bbend) + + api.err_set_string("PyExc_SystemError", + "unknown error when calling native function") + builder.branch(bbend) + + builder.position_at_end(bbend) + + def decode_arguments(self, builder, argtypes, func): + """ + Get the decoded (unpacked) Python arguments with *argtypes* + from LLVM function *func*. A tuple of LLVM values is returned. + """ + raw_args = self.get_arguments(func) + arginfo = self._get_arg_packer(argtypes) + return arginfo.from_arguments(builder, raw_args) + + def _get_arg_packer(self, argtypes): + """ + Get an argument packer for the given argument types. + """ + return self.context.get_arg_packer(argtypes) + + +class MinimalCallConv(BaseCallConv): + """ + A minimal calling convention, suitable for e.g. GPU targets. + The implemented function signature is: + + retcode_t (*, ... ) + + The return code will be one of the RETCODE_* constants or a + function-specific user exception id (>= RETCODE_USEREXC). + + Caller is responsible for allocating a slot for the return value + (passed as a pointer in the first argument). + """ + + def _make_call_helper(self, builder): + return _MinimalCallHelper() + + def return_value(self, builder, retval): + retptr = builder.function.args[0] + assert retval.type == retptr.type.pointee, \ + (str(retval.type), str(retptr.type.pointee)) + builder.store(retval, retptr) + self._return_errcode_raw(builder, RETCODE_OK) + + def return_user_exc(self, builder, exc, exc_args=None, loc=None, + func_name=None): + if exc is not None and not issubclass(exc, BaseException): + raise TypeError("exc should be None or exception class, got %r" + % (exc,)) + if exc_args is not None and not isinstance(exc_args, tuple): + raise TypeError("exc_args should be None or tuple, got %r" + % (exc_args,)) + + # Build excinfo struct + if loc is not None: + fname = loc._raw_function_name() + if fname is None: + # could be exec() or REPL, try func_name + fname = func_name + + locinfo = (fname, loc.filename, loc.line) + if None in locinfo: + locinfo = None + else: + locinfo = None + + call_helper = self._get_call_helper(builder) + exc_id = call_helper._add_exception(exc, exc_args, locinfo) + self._return_errcode_raw(builder, _const_int(exc_id), mark_exc=True) + + def return_status_propagate(self, builder, status): + self._return_errcode_raw(builder, status.code) + + def _return_errcode_raw(self, builder, code, mark_exc=False): + if isinstance(code, int): + code = _const_int(code) + builder.ret(code) + + def _get_return_status(self, builder, code): + """ + Given a return *code*, get a Status instance. + """ + norm = builder.icmp_signed('==', code, RETCODE_OK) + none = builder.icmp_signed('==', code, RETCODE_NONE) + ok = builder.or_(norm, none) + err = builder.not_(ok) + exc = builder.icmp_signed('==', code, RETCODE_EXC) + is_stop_iteration = builder.icmp_signed('==', code, RETCODE_STOPIT) + is_user_exc = builder.icmp_signed('>=', code, RETCODE_USEREXC) + + status = Status(code=code, + is_ok=ok, + is_error=err, + is_python_exc=exc, + is_none=none, + is_user_exc=is_user_exc, + is_stop_iteration=is_stop_iteration, + excinfoptr=None) + return status + + def get_function_type(self, restype, argtypes): + """ + Get the implemented Function type for *restype* and *argtypes*. + """ + arginfo = self._get_arg_packer(argtypes) + argtypes = list(arginfo.argument_types) + resptr = self.get_return_type(restype) + fnty = ir.FunctionType(errcode_t, [resptr] + argtypes) + return fnty + + def decorate_function(self, fn, args, fe_argtypes, noalias=False): + """ + Set names and attributes of function arguments. + """ + assert not noalias + arginfo = self._get_arg_packer(fe_argtypes) + arginfo.assign_names(self.get_arguments(fn), + ['arg.' + a for a in args]) + fn.args[0].name = ".ret" + return fn + + def get_arguments(self, func): + """ + Get the Python-level arguments of LLVM *func*. + """ + return func.args[1:] + + def call_function(self, builder, callee, resty, argtys, args): + """ + Call the Numba-compiled *callee*. + """ + retty = callee.args[0].type.pointee + retvaltmp = cgutils.alloca_once(builder, retty) + # initialize return value + builder.store(cgutils.get_null_value(retty), retvaltmp) + + arginfo = self._get_arg_packer(argtys) + args = arginfo.as_arguments(builder, args) + realargs = [retvaltmp] + list(args) + code = builder.call(callee, realargs) + status = self._get_return_status(builder, code) + retval = builder.load(retvaltmp) + out = self.context.get_returned_value(builder, resty, retval) + return status, out + + +class _MinimalCallHelper(object): + """ + A call helper object for the "minimal" calling convention. + User exceptions are represented as integer codes and stored in + a mapping for retrieval from the caller. + """ + + def __init__(self): + self.exceptions = {} + + def _add_exception(self, exc, exc_args, locinfo): + """ + Add a new user exception to this helper. Returns an integer that can be + used to refer to the added exception in future. + + Parameters + ---------- + exc : + exception type + exc_args : None or tuple + exception args + locinfo : tuple + location information + """ + exc_id = len(self.exceptions) + FIRST_USEREXC + self.exceptions[exc_id] = exc, exc_args, locinfo + return exc_id + + def get_exception(self, exc_id): + """ + Get information about a user exception. Returns a tuple of + (exception type, exception args, location information). + + Parameters + ---------- + id : integer + The ID of the exception to look up + """ + try: + return self.exceptions[exc_id] + except KeyError: + msg = "unknown error %d in native function" % exc_id + exc = SystemError + exc_args = (msg,) + locinfo = None + return exc, exc_args, locinfo + +# The structure type constructed by PythonAPI.serialize_uncached() +# i.e a {i8* pickle_buf, i32 pickle_bufsz, i8* hash_buf} +excinfo_t = ir.LiteralStructType([GENERIC_POINTER, int32_t, GENERIC_POINTER]) +excinfo_ptr_t = ir.PointerType(excinfo_t) + + +class CPUCallConv(BaseCallConv): + """ + The calling convention for CPU targets. + The implemented function signature is: + + retcode_t (*, excinfo **, ... ) + + The return code will be one of the RETCODE_* constants. + If RETCODE_USEREXC, the exception info pointer will be filled with + a pointer to a constant struct describing the raised exception. + + Caller is responsible for allocating slots for the return value + and the exception info pointer (passed as first and second arguments, + respectively). + """ + _status_ids = itertools.count(1) + + def _make_call_helper(self, builder): + return None + + def return_value(self, builder, retval): + retptr = self._get_return_argument(builder.function) + assert retval.type == retptr.type.pointee, \ + (str(retval.type), str(retptr.type.pointee)) + builder.store(retval, retptr) + self._return_errcode_raw(builder, RETCODE_OK) + + def set_static_user_exc(self, builder, exc, exc_args=None, loc=None, + func_name=None): + if exc is not None and not issubclass(exc, BaseException): + raise TypeError("exc should be None or exception class, got %r" + % (exc,)) + if exc_args is not None and not isinstance(exc_args, tuple): + raise TypeError("exc_args should be None or tuple, got %r" + % (exc_args,)) + # None is indicative of no args, set the exc_args to an empty tuple + # as PyObject_CallObject(exc, exc_args) requires the second argument to + # be a tuple (or nullptr, but doing this makes it consistent) + if exc_args is None: + exc_args = tuple() + + pyapi = self.context.get_python_api(builder) + # Build excinfo struct + if loc is not None: + fname = loc._raw_function_name() + if fname is None: + # could be exec() or REPL, try func_name + fname = func_name + + locinfo = (fname, loc.filename, loc.line) + if None in locinfo: + locinfo = None + else: + locinfo = None + exc = (exc, exc_args, locinfo) + struct_gv = pyapi.serialize_object(exc) + excptr = self._get_excinfo_argument(builder.function) + builder.store(struct_gv, excptr) + + def return_user_exc(self, builder, exc, exc_args=None, loc=None, + func_name=None): + try_info = getattr(builder, '_in_try_block', False) + self.set_static_user_exc(builder, exc, exc_args=exc_args, + loc=loc, func_name=func_name) + trystatus = self.check_try_status(builder) + if try_info: + # This is a hack for old-style impl. + # We will branch directly to the exception handler. + builder.branch(try_info['target']) + else: + # Return from the current function + self._return_errcode_raw(builder, RETCODE_USEREXC, mark_exc=True) + + def _get_try_state(self, builder): + try: + return builder.__eh_try_state + except AttributeError: + ptr = cgutils.alloca_once( + builder, cgutils.intp_t, name='try_state', zfill=True, + ) + builder.__eh_try_state = ptr + return ptr + + def check_try_status(self, builder): + try_state_ptr = self._get_try_state(builder) + try_depth = builder.load(try_state_ptr) + # try_depth > 0 + in_try = builder.icmp_unsigned('>', try_depth, try_depth.type(0)) + + excinfoptr = self._get_excinfo_argument(builder.function) + excinfo = builder.load(excinfoptr) + + return TryStatus(in_try=in_try, excinfo=excinfo) + + def set_try_status(self, builder): + try_state_ptr = self._get_try_state(builder) + # Increment try depth + old = builder.load(try_state_ptr) + new = builder.add(old, old.type(1)) + builder.store(new, try_state_ptr) + + def unset_try_status(self, builder): + try_state_ptr = self._get_try_state(builder) + # Decrement try depth + old = builder.load(try_state_ptr) + new = builder.sub(old, old.type(1)) + builder.store(new, try_state_ptr) + + # Needs to reset the exception state so that the exception handler + # will run normally. + excinfoptr = self._get_excinfo_argument(builder.function) + null = cgutils.get_null_value(excinfoptr.type.pointee) + builder.store(null, excinfoptr) + + def return_status_propagate(self, builder, status): + trystatus = self.check_try_status(builder) + excptr = self._get_excinfo_argument(builder.function) + builder.store(status.excinfoptr, excptr) + with builder.if_then(builder.not_(trystatus.in_try)): + self._return_errcode_raw(builder, status.code, mark_exc=True) + + def _return_errcode_raw(self, builder, code, mark_exc=False): + ret = builder.ret(code) + + if mark_exc: + md = builder.module.add_metadata([ir.IntType(1)(1)]) + ret.set_metadata("ret_is_raise", md) + + def _get_return_status(self, builder, code, excinfoptr): + """ + Given a return *code* and *excinfoptr*, get a Status instance. + """ + norm = builder.icmp_signed('==', code, RETCODE_OK) + none = builder.icmp_signed('==', code, RETCODE_NONE) + exc = builder.icmp_signed('==', code, RETCODE_EXC) + is_stop_iteration = builder.icmp_signed('==', code, RETCODE_STOPIT) + ok = builder.or_(norm, none) + err = builder.not_(ok) + is_user_exc = builder.icmp_signed('>=', code, RETCODE_USEREXC) + excinfoptr = builder.select(is_user_exc, excinfoptr, + ir.Constant(excinfo_ptr_t, ir.Undefined)) + + status = Status(code=code, + is_ok=ok, + is_error=err, + is_python_exc=exc, + is_none=none, + is_user_exc=is_user_exc, + is_stop_iteration=is_stop_iteration, + excinfoptr=excinfoptr) + return status + + def get_function_type(self, restype, argtypes): + """ + Get the implemented Function type for *restype* and *argtypes*. + """ + arginfo = self._get_arg_packer(argtypes) + argtypes = list(arginfo.argument_types) + resptr = self.get_return_type(restype) + fnty = ir.FunctionType(errcode_t, + [resptr, ir.PointerType(excinfo_ptr_t)] + + argtypes) + return fnty + + def decorate_function(self, fn, args, fe_argtypes, noalias=False): + """ + Set names of function arguments, and add useful attributes to them. + """ + arginfo = self._get_arg_packer(fe_argtypes) + arginfo.assign_names(self.get_arguments(fn), + ['arg.' + a for a in args]) + retarg = self._get_return_argument(fn) + retarg.name = "retptr" + retarg.add_attribute("nocapture") + retarg.add_attribute("noalias") + excarg = self._get_excinfo_argument(fn) + excarg.name = "excinfo" + excarg.add_attribute("nocapture") + excarg.add_attribute("noalias") + + if noalias: + args = self.get_arguments(fn) + for a in args: + if isinstance(a.type, ir.PointerType): + a.add_attribute("nocapture") + a.add_attribute("noalias") + + # Add metadata to mark functions that may need NRT + # thus disabling aggressive refct pruning in removerefctpass.py + def type_may_always_need_nrt(ty): + # Returns True if it's a non-Array type that is contains MemInfo + if not isinstance(ty, types.Array): + dmm = self.context.data_model_manager + if dmm[ty].contains_nrt_meminfo(): + return True + return False + + args_may_always_need_nrt = any( + map(type_may_always_need_nrt, fe_argtypes) + ) + + if args_may_always_need_nrt: + nmd = fn.module.add_named_metadata( + 'numba_args_may_always_need_nrt', + ) + nmd.add(fn.module.add_metadata([fn])) + + return fn + + def get_arguments(self, func): + """ + Get the Python-level arguments of LLVM *func*. + """ + return func.args[2:] + + def _get_return_argument(self, func): + return func.args[0] + + def _get_excinfo_argument(self, func): + return func.args[1] + + def call_function(self, builder, callee, resty, argtys, args, + attrs=None): + """ + Call the Numba-compiled *callee*. + Parameters: + ----------- + attrs: LLVM style string or iterable of individual attributes, default + is None which specifies no attributes. Examples: + LLVM style string: "noinline fast" + Equivalent iterable: ("noinline", "fast") + """ + # XXX better fix for callees that are not function values + # (pointers to function; thus have no `.args` attribute) + retty = self._get_return_argument(callee.function_type).pointee + + retvaltmp = cgutils.alloca_once(builder, retty) + # initialize return value to zeros + builder.store(cgutils.get_null_value(retty), retvaltmp) + + excinfoptr = cgutils.alloca_once(builder, ir.PointerType(excinfo_t), + name="excinfo") + + arginfo = self._get_arg_packer(argtys) + args = list(arginfo.as_arguments(builder, args)) + realargs = [retvaltmp, excinfoptr] + args + # deal with attrs, it's fine to specify a load in a string like + # "noinline fast" as per LLVM or equally as an iterable of individual + # attributes. + if attrs is None: + _attrs = () + elif isinstance(attrs, Iterable) and not isinstance(attrs, str): + _attrs = tuple(attrs) + else: + raise TypeError("attrs must be an iterable of strings or None") + code = builder.call(callee, realargs, attrs=_attrs) + status = self._get_return_status(builder, code, + builder.load(excinfoptr)) + retval = builder.load(retvaltmp) + out = self.context.get_returned_value(builder, resty, retval) + return status, out + + +class ErrorModel(object): + + def __init__(self, call_conv): + self.call_conv = call_conv + + def fp_zero_division(self, builder, exc_args=None, loc=None): + if self.raise_on_fp_zero_division: + self.call_conv.return_user_exc(builder, ZeroDivisionError, exc_args, + loc) + return True + else: + return False + + +class PythonErrorModel(ErrorModel): + """ + The Python error model. Any invalid FP input raises an exception. + """ + raise_on_fp_zero_division = True + + +class NumpyErrorModel(ErrorModel): + """ + In the Numpy error model, floating-point errors don't raise an + exception. The FPU exception state is inspected by Numpy at the + end of a ufunc's execution and a warning is raised if appropriate. + + Note there's no easy way to set the FPU exception state from LLVM. + Instructions known to set an FP exception can be optimized away: + https://llvm.org/bugs/show_bug.cgi?id=6050 + http://lists.llvm.org/pipermail/llvm-dev/2014-September/076918.html + http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20140929/237997.html + """ + raise_on_fp_zero_division = False + + +error_models = { + 'python': PythonErrorModel, + 'numpy': NumpyErrorModel, + } + + +def create_error_model(model_name, context): + """ + Create an error model instance for the given target context. + """ + return error_models[model_name](context.call_conv) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/callwrapper.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/callwrapper.py new file mode 100644 index 000000000..5508aa38e --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/callwrapper.py @@ -0,0 +1,226 @@ +from llvmlite.ir import Constant, IRBuilder +import llvmlite.ir + +from numba.core import types, config, cgutils + + +class _ArgManager(object): + """ + A utility class to handle argument unboxing and cleanup + """ + def __init__(self, context, builder, api, env_manager, endblk, nargs): + self.context = context + self.builder = builder + self.api = api + self.env_manager = env_manager + self.arg_count = 0 # how many function arguments have been processed + self.cleanups = [] + self.nextblk = endblk + + def add_arg(self, obj, ty): + """ + Unbox argument and emit code that handles any error during unboxing. + Args are cleaned up in reverse order of the parameter list, and + cleanup begins as soon as unboxing of any argument fails. E.g. failure + on arg2 will result in control flow going through: + + arg2.err -> arg1.err -> arg0.err -> arg.end (returns) + """ + # Unbox argument + native = self.api.to_native_value(ty, obj) + + # If an error occurred, go to the cleanup block for + # the previous argument + with cgutils.if_unlikely(self.builder, native.is_error): + self.builder.branch(self.nextblk) + + # Define the cleanup function for the argument + def cleanup_arg(): + # Native value reflection + self.api.reflect_native_value(ty, native.value, self.env_manager) + + # Native value cleanup + if native.cleanup is not None: + native.cleanup() + + # NRT cleanup + # (happens after the native value cleanup as the latter + # may need the native value) + if self.context.enable_nrt: + self.context.nrt.decref(self.builder, ty, native.value) + + self.cleanups.append(cleanup_arg) + + # Write the on-error cleanup block for this argument + cleanupblk = self.builder.append_basic_block( + "arg%d.err" % self.arg_count) + with self.builder.goto_block(cleanupblk): + cleanup_arg() + # Go to next cleanup block + self.builder.branch(self.nextblk) + + self.nextblk = cleanupblk + self.arg_count += 1 + return native.value + + def emit_cleanup(self): + """ + Emit the cleanup code after returning from the wrapped function. + """ + for dtor in self.cleanups: + dtor() + + +class _GilManager(object): + """ + A utility class to handle releasing the GIL and then re-acquiring it + again. + """ + + def __init__(self, builder, api, argman): + self.builder = builder + self.api = api + self.argman = argman + self.thread_state = api.save_thread() + + def emit_cleanup(self): + self.api.restore_thread(self.thread_state) + self.argman.emit_cleanup() + + +class PyCallWrapper(object): + def __init__(self, context, module, func, fndesc, env, call_helper, + release_gil): + self.context = context + self.module = module + self.func = func + self.fndesc = fndesc + self.env = env + self.release_gil = release_gil + + def build(self): + wrapname = self.fndesc.llvm_cpython_wrapper_name + + # This is the signature of PyCFunctionWithKeywords + # (see CPython's methodobject.h) + pyobj = self.context.get_argument_type(types.pyobject) + wrapty = llvmlite.ir.FunctionType(pyobj, [pyobj, pyobj, pyobj]) + wrapper = llvmlite.ir.Function(self.module, wrapty, name=wrapname) + + builder = IRBuilder(wrapper.append_basic_block('entry')) + + # - `closure` will receive the `self` pointer stored in the + # PyCFunction object (see _dynfunc.c) + # - `args` and `kws` will receive the tuple and dict objects + # of positional and keyword arguments, respectively. + closure, args, kws = wrapper.args + closure.name = 'py_closure' + args.name = 'py_args' + kws.name = 'py_kws' + + api = self.context.get_python_api(builder) + self.build_wrapper(api, builder, closure, args, kws) + + return wrapper, api + + def build_wrapper(self, api, builder, closure, args, kws): + nargs = len(self.fndesc.argtypes) + + objs = [api.alloca_obj() for _ in range(nargs)] + parseok = api.unpack_tuple(args, self.fndesc.qualname, + nargs, nargs, *objs) + + pred = builder.icmp_unsigned( + '==', + parseok, + Constant(parseok.type, None)) + with cgutils.if_unlikely(builder, pred): + builder.ret(api.get_null_object()) + + # Block that returns after erroneous argument unboxing/cleanup + endblk = builder.append_basic_block("arg.end") + with builder.goto_block(endblk): + builder.ret(api.get_null_object()) + + # Get the Environment object + env_manager = self.get_env(api, builder) + + cleanup_manager = _ArgManager(self.context, builder, api, + env_manager, endblk, nargs) + + # Compute the arguments to the compiled Numba function. + innerargs = [] + for obj, ty in zip(objs, self.fndesc.argtypes): + if isinstance(ty, types.Omitted): + # It's an omitted value => ignore dummy Python object + innerargs.append(None) + else: + val = cleanup_manager.add_arg(builder.load(obj), ty) + innerargs.append(val) + + if self.release_gil: + cleanup_manager = _GilManager(builder, api, cleanup_manager) + + # We elect to not inline the top level user function into the call + # wrapper, this incurs an overhead of a function call, however, it + # increases optimisation stability in that the optimised user function + # is what will actually be run and it is this function that all the + # inspection tools "see". Further, this makes optimisation "stable" in + # that calling the user function from e.g. C or from this wrapper will + # result in the same code executing, were inlining permitted this may + # not be the case as the inline could trigger additional optimisation + # as the function goes into the wrapper, this resulting in the executing + # instruction stream being different from that of the instruction stream + # present in the user function. + status, retval = self.context.call_conv.call_function( + builder, self.func, self.fndesc.restype, self.fndesc.argtypes, + innerargs, attrs=('noinline',)) + # Do clean up + self.debug_print(builder, "# callwrapper: emit_cleanup") + cleanup_manager.emit_cleanup() + self.debug_print(builder, "# callwrapper: emit_cleanup end") + + # Determine return status + with builder.if_then(status.is_ok, likely=True): + # Ok => return boxed Python value + with builder.if_then(status.is_none): + api.return_none() + + retty = self._simplified_return_type() + obj = api.from_native_return(retty, retval, env_manager) + builder.ret(obj) + + # Error out + self.context.call_conv.raise_error(builder, api, status) + builder.ret(api.get_null_object()) + + def get_env(self, api, builder): + """Get the Environment object which is declared as a global + in the module of the wrapped function. + """ + envname = self.context.get_env_name(self.fndesc) + gvptr = self.context.declare_env_global(builder.module, envname) + envptr = builder.load(gvptr) + + env_body = self.context.get_env_body(builder, envptr) + + api.emit_environment_sentry(envptr, return_pyobject=True, + debug_msg=self.fndesc.env_name) + env_manager = api.get_env_manager(self.env, env_body, envptr) + return env_manager + + def _simplified_return_type(self): + """ + The NPM callconv has already converted simplified optional types. + We can simply use the value type from it. + """ + restype = self.fndesc.restype + # Optional type + if isinstance(restype, types.Optional): + return restype.type + else: + return restype + + def debug_print(self, builder, msg): + if config.DEBUG_JIT: + self.context.debug_print(builder, "DEBUGJIT: {0}".format(msg)) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/ccallback.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/ccallback.py new file mode 100644 index 000000000..2fd222db9 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/ccallback.py @@ -0,0 +1,134 @@ +""" +Implementation of compiled C callbacks (@cfunc). +""" + + +import ctypes + +from numba.core import utils, compiler, registry +from numba.core.caching import NullCache, FunctionCache +from numba.core.dispatcher import _FunctionCompiler +from numba.core.typing import signature +from numba.core.typing.ctypes_utils import to_ctypes +from numba.core.compiler_lock import global_compiler_lock + + +class _CFuncCompiler(_FunctionCompiler): + + def _customize_flags(self, flags): + flags.no_cpython_wrapper = True + flags.no_cfunc_wrapper = False + # Disable compilation of the IR module, because we first want to + # add the cfunc wrapper. + flags.no_compile = True + # Object mode is not currently supported in C callbacks + # (no reliable way to get the environment) + flags.enable_pyobject = False + if flags.force_pyobject: + raise NotImplementedError("object mode not allowed in C callbacks") + return flags + + +class CFunc(object): + """ + A compiled C callback, as created by the @cfunc decorator. + """ + _targetdescr = registry.cpu_target + + def __init__(self, pyfunc, sig, locals, options, + pipeline_class=compiler.Compiler): + args, return_type = sig + if return_type is None: + raise TypeError("C callback needs an explicit return type") + self.__name__ = pyfunc.__name__ + self.__qualname__ = getattr(pyfunc, '__qualname__', self.__name__) + self.__wrapped__ = pyfunc + + self._pyfunc = pyfunc + self._sig = signature(return_type, *args) + self._compiler = _CFuncCompiler(pyfunc, self._targetdescr, + options, locals, + pipeline_class=pipeline_class) + + self._wrapper_name = None + self._wrapper_address = None + self._cache = NullCache() + self._cache_hits = 0 + + def enable_caching(self): + self._cache = FunctionCache(self._pyfunc) + + @global_compiler_lock + def compile(self): + # Try to load from cache + cres = self._cache.load_overload(self._sig, + self._targetdescr.target_context) + if cres is None: + cres = self._compile_uncached() + self._cache.save_overload(self._sig, cres) + else: + self._cache_hits += 1 + + self._library = cres.library + self._wrapper_name = cres.fndesc.llvm_cfunc_wrapper_name + self._wrapper_address = self._library.get_pointer_to_function( + self._wrapper_name) + + def _compile_uncached(self): + sig = self._sig + + # Compile native function as well as cfunc wrapper + return self._compiler.compile(sig.args, sig.return_type) + + @property + def native_name(self): + """ + The process-wide symbol the C callback is exposed as. + """ + # Note from our point of view, the C callback is the wrapper around + # the native function. + return self._wrapper_name + + @property + def address(self): + """ + The address of the C callback. + """ + return self._wrapper_address + + @utils.cached_property + def cffi(self): + """ + A cffi function pointer representing the C callback. + """ + import cffi + ffi = cffi.FFI() + # cffi compares types by name, so using precise types would risk + # spurious mismatches (such as "int32_t" vs. "int"). + return ffi.cast("void *", self.address) + + @utils.cached_property + def ctypes(self): + """ + A ctypes function object representing the C callback. + """ + ctypes_args = [to_ctypes(ty) for ty in self._sig.args] + ctypes_restype = to_ctypes(self._sig.return_type) + functype = ctypes.CFUNCTYPE(ctypes_restype, *ctypes_args) + return functype(self.address) + + def inspect_llvm(self): + """ + Return the LLVM IR of the C callback definition. + """ + return self._library.get_llvm_str() + + @property + def cache_hits(self): + return self._cache_hits + + def __repr__(self): + return "" % (self.__qualname__,) + + def __call__(self, *args, **kwargs): + return self._pyfunc(*args, **kwargs) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/cgutils.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/cgutils.py new file mode 100644 index 000000000..3efe47f84 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/cgutils.py @@ -0,0 +1,1194 @@ +""" +Generic helpers for LLVM code generation. +""" + + +import collections +from contextlib import contextmanager +import functools + +from llvmlite import ir + +from numba.core import utils, types, config, debuginfo +import numba.core.datamodel + + +bool_t = ir.IntType(1) +int8_t = ir.IntType(8) +int32_t = ir.IntType(32) +intp_t = ir.IntType(utils.MACHINE_BITS) +voidptr_t = int8_t.as_pointer() + +true_bit = bool_t(1) +false_bit = bool_t(0) +true_byte = int8_t(1) +false_byte = int8_t(0) + + +def as_bool_bit(builder, value): + return builder.icmp_unsigned('!=', value, value.type(0)) + + +def make_anonymous_struct(builder, values, struct_type=None): + """ + Create an anonymous struct containing the given LLVM *values*. + """ + if struct_type is None: + struct_type = ir.LiteralStructType([v.type for v in values]) + struct_val = struct_type(ir.Undefined) + for i, v in enumerate(values): + struct_val = builder.insert_value(struct_val, v, i) + return struct_val + + +def make_bytearray(buf): + """ + Make a byte array constant from *buf*. + """ + b = bytearray(buf) + n = len(b) + return ir.Constant(ir.ArrayType(ir.IntType(8), n), b) + + +_struct_proxy_cache = {} + + +def create_struct_proxy(fe_type, kind='value'): + """ + Returns a specialized StructProxy subclass for the given fe_type. + """ + cache_key = (fe_type, kind) + res = _struct_proxy_cache.get(cache_key) + if res is None: + base = {'value': ValueStructProxy, + 'data': DataStructProxy, + }[kind] + clsname = base.__name__ + '_' + str(fe_type) + bases = (base,) + clsmembers = dict(_fe_type=fe_type) + res = type(clsname, bases, clsmembers) + + _struct_proxy_cache[cache_key] = res + return res + + +def copy_struct(dst, src, repl={}): + """ + Copy structure from *src* to *dst* with replacement from *repl*. + """ + repl = repl.copy() + # copy data from src or use those in repl + for k in src._datamodel._fields: + v = repl.pop(k, getattr(src, k)) + setattr(dst, k, v) + # use remaining key-values in repl + for k, v in repl.items(): + setattr(dst, k, v) + return dst + + +class _StructProxy(object): + """ + Creates a `Structure` like interface that is constructed with information + from DataModel instance. FE type must have a data model that is a + subclass of StructModel. + """ + # The following class members must be overridden by subclass + _fe_type = None + + def __init__(self, context, builder, value=None, ref=None): + self._context = context + self._datamodel = self._context.data_model_manager[self._fe_type] + if not isinstance(self._datamodel, numba.core.datamodel.StructModel): + raise TypeError( + "Not a structure model: {0}".format(self._datamodel)) + self._builder = builder + + self._be_type = self._get_be_type(self._datamodel) + assert not is_pointer(self._be_type) + + outer_ref, ref = self._make_refs(ref) + if ref.type.pointee != self._be_type: + raise AssertionError("bad ref type: expected %s, got %s" + % (self._be_type.as_pointer(), ref.type)) + + if value is not None: + if value.type != outer_ref.type.pointee: + raise AssertionError("bad value type: expected %s, got %s" + % (outer_ref.type.pointee, value.type)) + self._builder.store(value, outer_ref) + + self._value = ref + self._outer_ref = outer_ref + + def _make_refs(self, ref): + """ + Return an (outer ref, value ref) pair. By default, these are + the same pointers, but a derived class may override this. + """ + if ref is None: + ref = alloca_once(self._builder, self._be_type, zfill=True) + return ref, ref + + def _get_be_type(self, datamodel): + raise NotImplementedError + + def _cast_member_to_value(self, index, val): + raise NotImplementedError + + def _cast_member_from_value(self, index, val): + raise NotImplementedError + + def _get_ptr_by_index(self, index): + return gep_inbounds(self._builder, self._value, 0, index) + + def _get_ptr_by_name(self, attrname): + index = self._datamodel.get_field_position(attrname) + return self._get_ptr_by_index(index) + + def __getattr__(self, field): + """ + Load the LLVM value of the named *field*. + """ + if not field.startswith('_'): + return self[self._datamodel.get_field_position(field)] + else: + raise AttributeError(field) + + def __setattr__(self, field, value): + """ + Store the LLVM *value* into the named *field*. + """ + if field.startswith('_'): + return super(_StructProxy, self).__setattr__(field, value) + self[self._datamodel.get_field_position(field)] = value + + def __getitem__(self, index): + """ + Load the LLVM value of the field at *index*. + """ + member_val = self._builder.load(self._get_ptr_by_index(index)) + return self._cast_member_to_value(index, member_val) + + def __setitem__(self, index, value): + """ + Store the LLVM *value* into the field at *index*. + """ + ptr = self._get_ptr_by_index(index) + value = self._cast_member_from_value(index, value) + if value.type != ptr.type.pointee: + if (is_pointer(value.type) and is_pointer(ptr.type.pointee) + and value.type.pointee == ptr.type.pointee.pointee): + # Differ by address-space only + # Auto coerce it + value = self._context.addrspacecast(self._builder, + value, + ptr.type.pointee.addrspace) + else: + raise TypeError("Invalid store of {value.type} to " + "{ptr.type.pointee} in " + "{self._datamodel} " + "(trying to write member #{index})" + .format(value=value, ptr=ptr, self=self, + index=index)) + self._builder.store(value, ptr) + + def __len__(self): + """ + Return the number of fields. + """ + return self._datamodel.field_count + + def _getpointer(self): + """ + Return the LLVM pointer to the underlying structure. + """ + return self._outer_ref + + def _getvalue(self): + """ + Load and return the value of the underlying LLVM structure. + """ + return self._builder.load(self._outer_ref) + + def _setvalue(self, value): + """ + Store the value in this structure. + """ + assert not is_pointer(value.type) + assert value.type == self._be_type, (value.type, self._be_type) + self._builder.store(value, self._value) + + +class ValueStructProxy(_StructProxy): + """ + Create a StructProxy suitable for accessing regular values + (e.g. LLVM values or alloca slots). + """ + def _get_be_type(self, datamodel): + return datamodel.get_value_type() + + def _cast_member_to_value(self, index, val): + return val + + def _cast_member_from_value(self, index, val): + return val + + +class DataStructProxy(_StructProxy): + """ + Create a StructProxy suitable for accessing data persisted in memory. + """ + def _get_be_type(self, datamodel): + return datamodel.get_data_type() + + def _cast_member_to_value(self, index, val): + model = self._datamodel.get_model(index) + return model.from_data(self._builder, val) + + def _cast_member_from_value(self, index, val): + model = self._datamodel.get_model(index) + return model.as_data(self._builder, val) + + +class Structure(object): + """ + A high-level object wrapping a alloca'ed LLVM structure, including + named fields and attribute access. + """ + + # XXX Should this warrant several separate constructors? + def __init__(self, context, builder, value=None, ref=None, cast_ref=False): + self._type = context.get_struct_type(self) + self._context = context + self._builder = builder + if ref is None: + self._value = alloca_once(builder, self._type, zfill=True) + if value is not None: + assert not is_pointer(value.type) + assert value.type == self._type, (value.type, self._type) + builder.store(value, self._value) + else: + assert value is None + assert is_pointer(ref.type) + if self._type != ref.type.pointee: + if cast_ref: + ref = builder.bitcast(ref, self._type.as_pointer()) + else: + raise TypeError( + "mismatching pointer type: got %s, expected %s" + % (ref.type.pointee, self._type)) + self._value = ref + + self._namemap = {} + self._fdmap = [] + self._typemap = [] + base = int32_t(0) + for i, (k, tp) in enumerate(self._fields): + self._namemap[k] = i + self._fdmap.append((base, int32_t(i))) + self._typemap.append(tp) + + def _get_ptr_by_index(self, index): + ptr = self._builder.gep(self._value, self._fdmap[index], inbounds=True) + return ptr + + def _get_ptr_by_name(self, attrname): + return self._get_ptr_by_index(self._namemap[attrname]) + + def __getattr__(self, field): + """ + Load the LLVM value of the named *field*. + """ + if not field.startswith('_'): + return self[self._namemap[field]] + else: + raise AttributeError(field) + + def __setattr__(self, field, value): + """ + Store the LLVM *value* into the named *field*. + """ + if field.startswith('_'): + return super(Structure, self).__setattr__(field, value) + self[self._namemap[field]] = value + + def __getitem__(self, index): + """ + Load the LLVM value of the field at *index*. + """ + + return self._builder.load(self._get_ptr_by_index(index)) + + def __setitem__(self, index, value): + """ + Store the LLVM *value* into the field at *index*. + """ + ptr = self._get_ptr_by_index(index) + if ptr.type.pointee != value.type: + fmt = "Type mismatch: __setitem__(%d, ...) expected %r but got %r" + raise AssertionError(fmt % (index, + str(ptr.type.pointee), + str(value.type))) + self._builder.store(value, ptr) + + def __len__(self): + """ + Return the number of fields. + """ + return len(self._namemap) + + def _getpointer(self): + """ + Return the LLVM pointer to the underlying structure. + """ + return self._value + + def _getvalue(self): + """ + Load and return the value of the underlying LLVM structure. + """ + return self._builder.load(self._value) + + def _setvalue(self, value): + """Store the value in this structure""" + assert not is_pointer(value.type) + assert value.type == self._type, (value.type, self._type) + self._builder.store(value, self._value) + + # __iter__ is derived by Python from __len__ and __getitem__ + + +def alloca_once(builder, ty, size=None, name='', zfill=False): + """Allocate stack memory at the entry block of the current function + pointed by ``builder`` with llvm type ``ty``. The optional ``size`` arg + set the number of element to allocate. The default is 1. The optional + ``name`` arg set the symbol name inside the llvm IR for debugging. + If ``zfill`` is set, fill the memory with zeros at the current + use-site location. Note that the memory is always zero-filled after the + ``alloca`` at init-site (the entry block). + """ + if isinstance(size, int): + size = ir.Constant(intp_t, size) + # suspend debug metadata emission else it links up python source lines with + # alloca in the entry block as well as their actual location and it makes + # the debug info "jump about". + with debuginfo.suspend_emission(builder): + with builder.goto_entry_block(): + ptr = builder.alloca(ty, size=size, name=name) + # Always zero-fill at init-site. This is safe. + builder.store(ty(None), ptr) + # Also zero-fill at the use-site + if zfill: + builder.store(ptr.type.pointee(None), ptr) + return ptr + + +def sizeof(builder, ptr_type): + """Compute sizeof using GEP + """ + null = ptr_type(None) + offset = null.gep([int32_t(1)]) + return builder.ptrtoint(offset, intp_t) + + +def alloca_once_value(builder, value, name='', zfill=False): + """ + Like alloca_once(), but passing a *value* instead of a type. The + type is inferred and the allocated slot is also initialized with the + given value. + """ + storage = alloca_once(builder, value.type, zfill=zfill) + builder.store(value, storage) + return storage + + +def insert_pure_function(module, fnty, name): + """ + Insert a pure function (in the functional programming sense) in the + given module. + """ + fn = get_or_insert_function(module, fnty, name) + fn.attributes.add("readonly") + fn.attributes.add("nounwind") + return fn + + +def get_or_insert_function(module, fnty, name): + """ + Get the function named *name* with type *fnty* from *module*, or insert it + if it doesn't exist. + """ + fn = module.globals.get(name, None) + if fn is None: + fn = ir.Function(module, fnty, name) + return fn + + +def get_or_insert_named_metadata(module, name): + try: + return module.get_named_metadata(name) + except KeyError: + return module.add_named_metadata(name) + + +def add_global_variable(module, ty, name, addrspace=0): + unique_name = module.get_unique_name(name) + return ir.GlobalVariable(module, ty, unique_name, addrspace) + + +def terminate(builder, bbend): + bb = builder.basic_block + if bb.terminator is None: + builder.branch(bbend) + + +def get_null_value(ltype): + return ltype(None) + + +def is_null(builder, val): + null = get_null_value(val.type) + return builder.icmp_unsigned('==', null, val) + + +def is_not_null(builder, val): + null = get_null_value(val.type) + return builder.icmp_unsigned('!=', null, val) + + +def if_unlikely(builder, pred): + return builder.if_then(pred, likely=False) + + +def if_likely(builder, pred): + return builder.if_then(pred, likely=True) + + +def ifnot(builder, pred): + return builder.if_then(builder.not_(pred)) + + +def increment_index(builder, val): + """ + Increment an index *val*. + """ + one = val.type(1) + # We pass the "nsw" flag in the hope that LLVM understands the index + # never changes sign. Unfortunately this doesn't always work + # (e.g. ndindex()). + return builder.add(val, one, flags=['nsw']) + + +Loop = collections.namedtuple('Loop', ('index', 'do_break')) + + +@contextmanager +def for_range(builder, count, start=None, intp=None): + """ + Generate LLVM IR for a for-loop in [start, count). + *start* is equal to 0 by default. + + Yields a Loop namedtuple with the following members: + - `index` is the loop index's value + - `do_break` is a no-argument callable to break out of the loop + """ + if intp is None: + intp = count.type + if start is None: + start = intp(0) + stop = count + + bbcond = builder.append_basic_block("for.cond") + bbbody = builder.append_basic_block("for.body") + bbend = builder.append_basic_block("for.end") + + def do_break(): + builder.branch(bbend) + + bbstart = builder.basic_block + builder.branch(bbcond) + + with builder.goto_block(bbcond): + index = builder.phi(intp, name="loop.index") + pred = builder.icmp_signed('<', index, stop) + builder.cbranch(pred, bbbody, bbend) + + with builder.goto_block(bbbody): + yield Loop(index, do_break) + # Update bbbody as a new basic block may have been activated + bbbody = builder.basic_block + incr = increment_index(builder, index) + terminate(builder, bbcond) + + index.add_incoming(start, bbstart) + index.add_incoming(incr, bbbody) + + builder.position_at_end(bbend) + + +@contextmanager +def for_range_slice(builder, start, stop, step, intp=None, inc=True): + """ + Generate LLVM IR for a for-loop based on a slice. Yields a + (index, count) tuple where `index` is the slice index's value + inside the loop, and `count` the iteration count. + + Parameters + ------------- + builder : object + IRBuilder object + start : int + The beginning value of the slice + stop : int + The end value of the slice + step : int + The step value of the slice + intp : + The data type + inc : boolean, optional + Signals whether the step is positive (True) or negative (False). + + Returns + ----------- + None + """ + if intp is None: + intp = start.type + + bbcond = builder.append_basic_block("for.cond") + bbbody = builder.append_basic_block("for.body") + bbend = builder.append_basic_block("for.end") + bbstart = builder.basic_block + builder.branch(bbcond) + + with builder.goto_block(bbcond): + index = builder.phi(intp, name="loop.index") + count = builder.phi(intp, name="loop.count") + if (inc): + pred = builder.icmp_signed('<', index, stop) + else: + pred = builder.icmp_signed('>', index, stop) + builder.cbranch(pred, bbbody, bbend) + + with builder.goto_block(bbbody): + yield index, count + bbbody = builder.basic_block + incr = builder.add(index, step) + next_count = increment_index(builder, count) + terminate(builder, bbcond) + + index.add_incoming(start, bbstart) + index.add_incoming(incr, bbbody) + count.add_incoming(ir.Constant(intp, 0), bbstart) + count.add_incoming(next_count, bbbody) + builder.position_at_end(bbend) + + +@contextmanager +def for_range_slice_generic(builder, start, stop, step): + """ + A helper wrapper for for_range_slice(). This is a context manager which + yields two for_range_slice()-alike context managers, the first for + the positive step case, the second for the negative step case. + + Use: + with for_range_slice_generic(...) as (pos_range, neg_range): + with pos_range as (idx, count): + ... + with neg_range as (idx, count): + ... + """ + intp = start.type + is_pos_step = builder.icmp_signed('>=', step, ir.Constant(intp, 0)) + + pos_for_range = for_range_slice(builder, start, stop, step, intp, inc=True) + neg_for_range = for_range_slice(builder, start, stop, step, intp, inc=False) + + @contextmanager + def cm_cond(cond, inner_cm): + with cond: + with inner_cm as value: + yield value + + with builder.if_else(is_pos_step, likely=True) as (then, otherwise): + yield cm_cond(then, pos_for_range), cm_cond(otherwise, neg_for_range) + + +@contextmanager +def loop_nest(builder, shape, intp, order='C'): + """ + Generate a loop nest walking a N-dimensional array. + Yields a tuple of N indices for use in the inner loop body, + iterating over the *shape* space. + + If *order* is 'C' (the default), indices are incremented inside-out + (i.e. (0,0), (0,1), (0,2), (1,0) etc.). + If *order* is 'F', they are incremented outside-in + (i.e. (0,0), (1,0), (2,0), (0,1) etc.). + This has performance implications when walking an array as it impacts + the spatial locality of memory accesses. + """ + assert order in 'CF' + if not shape: + # 0-d array + yield () + else: + if order == 'F': + _swap = lambda x: x[::-1] + else: + _swap = lambda x: x + with _loop_nest(builder, _swap(shape), intp) as indices: + assert len(indices) == len(shape) + yield _swap(indices) + + +@contextmanager +def _loop_nest(builder, shape, intp): + with for_range(builder, shape[0], intp=intp) as loop: + if len(shape) > 1: + with _loop_nest(builder, shape[1:], intp) as indices: + yield (loop.index,) + indices + else: + yield (loop.index,) + + +def pack_array(builder, values, ty=None): + """ + Pack a sequence of values in a LLVM array. *ty* should be given + if the array may be empty, in which case the type can't be inferred + from the values. + """ + n = len(values) + if ty is None: + ty = values[0].type + ary = ir.ArrayType(ty, n)(ir.Undefined) + for i, v in enumerate(values): + ary = builder.insert_value(ary, v, i) + return ary + + +def pack_struct(builder, values): + """ + Pack a sequence of values into a LLVM struct. + """ + structty = ir.LiteralStructType([v.type for v in values]) + st = structty(ir.Undefined) + for i, v in enumerate(values): + st = builder.insert_value(st, v, i) + return st + + +def unpack_tuple(builder, tup, count=None): + """ + Unpack an array or structure of values, return a Python tuple. + """ + if count is None: + # Assuming *tup* is an aggregate + count = len(tup.type.elements) + vals = [builder.extract_value(tup, i) + for i in range(count)] + return vals + + +def get_item_pointer(context, builder, aryty, ary, inds, wraparound=False, + boundscheck=False): + # Set boundscheck=True for any pointer access that should be + # boundschecked. do_boundscheck() will handle enabling or disabling the + # actual boundschecking based on the user config. + shapes = unpack_tuple(builder, ary.shape, count=aryty.ndim) + strides = unpack_tuple(builder, ary.strides, count=aryty.ndim) + return get_item_pointer2(context, builder, data=ary.data, shape=shapes, + strides=strides, layout=aryty.layout, inds=inds, + wraparound=wraparound, boundscheck=boundscheck) + + +def do_boundscheck(context, builder, ind, dimlen, axis=None): + def _dbg(): + # Remove this when we figure out how to include this information + # in the error message. + if axis is not None: + if isinstance(axis, int): + printf(builder, "debug: IndexError: index %d is out of bounds " + "for axis {} with size %d\n".format(axis), ind, dimlen) + else: + printf(builder, "debug: IndexError: index %d is out of bounds " + "for axis %d with size %d\n", ind, axis, + dimlen) + else: + printf(builder, + "debug: IndexError: index %d is out of bounds for size %d\n", + ind, dimlen) + + msg = "index is out of bounds" + out_of_bounds_upper = builder.icmp_signed('>=', ind, dimlen) + with if_unlikely(builder, out_of_bounds_upper): + if config.FULL_TRACEBACKS: + _dbg() + context.call_conv.return_user_exc(builder, IndexError, (msg,)) + out_of_bounds_lower = builder.icmp_signed('<', ind, ind.type(0)) + with if_unlikely(builder, out_of_bounds_lower): + if config.FULL_TRACEBACKS: + _dbg() + context.call_conv.return_user_exc(builder, IndexError, (msg,)) + + +def get_item_pointer2(context, builder, data, shape, strides, layout, inds, + wraparound=False, boundscheck=False): + # Set boundscheck=True for any pointer access that should be + # boundschecked. do_boundscheck() will handle enabling or disabling the + # actual boundschecking based on the user config. + if wraparound: + # Wraparound + indices = [] + for ind, dimlen in zip(inds, shape): + negative = builder.icmp_signed('<', ind, ind.type(0)) + wrapped = builder.add(dimlen, ind) + selected = builder.select(negative, wrapped, ind) + indices.append(selected) + else: + indices = inds + if boundscheck: + for axis, (ind, dimlen) in enumerate(zip(indices, shape)): + do_boundscheck(context, builder, ind, dimlen, axis) + + if not indices: + # Indexing with empty tuple + return builder.gep(data, [int32_t(0)]) + intp = indices[0].type + # Indexing code + if layout in 'CF': + steps = [] + # Compute steps for each dimension + if layout == 'C': + # C contiguous + for i in range(len(shape)): + last = intp(1) + for j in shape[i + 1:]: + last = builder.mul(last, j) + steps.append(last) + elif layout == 'F': + # F contiguous + for i in range(len(shape)): + last = intp(1) + for j in shape[:i]: + last = builder.mul(last, j) + steps.append(last) + else: + raise Exception("unreachable") + + # Compute index + loc = intp(0) + for i, s in zip(indices, steps): + tmp = builder.mul(i, s) + loc = builder.add(loc, tmp) + ptr = builder.gep(data, [loc]) + return ptr + else: + # Any layout + dimoffs = [builder.mul(s, i) for s, i in zip(strides, indices)] + offset = functools.reduce(builder.add, dimoffs) + return pointer_add(builder, data, offset) + + +def _scalar_pred_against_zero(builder, value, fpred, icond): + nullval = value.type(0) + if isinstance(value.type, (ir.FloatType, ir.DoubleType)): + isnull = fpred(value, nullval) + elif isinstance(value.type, ir.IntType): + isnull = builder.icmp_signed(icond, value, nullval) + else: + raise TypeError("unexpected value type %s" % (value.type,)) + return isnull + + +def is_scalar_zero(builder, value): + """ + Return a predicate representing whether *value* is equal to zero. + """ + return _scalar_pred_against_zero( + builder, value, functools.partial(builder.fcmp_ordered, '=='), '==') + + +def is_not_scalar_zero(builder, value): + """ + Return a predicate representing whether a *value* is not equal to zero. + (not exactly "not is_scalar_zero" because of nans) + """ + return _scalar_pred_against_zero( + builder, value, functools.partial(builder.fcmp_unordered, '!='), '!=') + + +def is_scalar_zero_or_nan(builder, value): + """ + Return a predicate representing whether *value* is equal to either zero + or NaN. + """ + return _scalar_pred_against_zero( + builder, value, functools.partial(builder.fcmp_unordered, '=='), '==') + + +is_true = is_not_scalar_zero +is_false = is_scalar_zero + + +def is_scalar_neg(builder, value): + """ + Is *value* negative? Assumes *value* is signed. + """ + return _scalar_pred_against_zero( + builder, value, functools.partial(builder.fcmp_ordered, '<'), '<') + + +def guard_null(context, builder, value, exc_tuple): + """ + Guard against *value* being null or zero. + *exc_tuple* should be a (exception type, arguments...) tuple. + """ + with builder.if_then(is_scalar_zero(builder, value), likely=False): + exc = exc_tuple[0] + exc_args = exc_tuple[1:] or None + context.call_conv.return_user_exc(builder, exc, exc_args) + + +def guard_memory_error(context, builder, pointer, msg=None): + """ + Guard against *pointer* being NULL (and raise a MemoryError). + """ + assert isinstance(pointer.type, ir.PointerType), pointer.type + exc_args = (msg,) if msg else () + with builder.if_then(is_null(builder, pointer), likely=False): + context.call_conv.return_user_exc(builder, MemoryError, exc_args) + + +@contextmanager +def if_zero(builder, value, likely=False): + """ + Execute the given block if the scalar value is zero. + """ + with builder.if_then(is_scalar_zero(builder, value), likely=likely): + yield + + +guard_zero = guard_null + + +def is_pointer(ltyp): + """ + Whether the LLVM type *typ* is a struct type. + """ + return isinstance(ltyp, ir.PointerType) + + +def get_record_member(builder, record, offset, typ): + pval = gep_inbounds(builder, record, 0, offset) + assert not is_pointer(pval.type.pointee) + return builder.bitcast(pval, typ.as_pointer()) + + +def is_neg_int(builder, val): + return builder.icmp_signed('<', val, val.type(0)) + + +def gep_inbounds(builder, ptr, *inds, **kws): + """ + Same as *gep*, but add the `inbounds` keyword. + """ + return gep(builder, ptr, *inds, inbounds=True, **kws) + + +def gep(builder, ptr, *inds, **kws): + """ + Emit a getelementptr instruction for the given pointer and indices. + The indices can be LLVM values or Python int constants. + """ + name = kws.pop('name', '') + inbounds = kws.pop('inbounds', False) + assert not kws + idx = [] + for i in inds: + if isinstance(i, int): + # NOTE: llvm only accepts int32 inside structs, not int64 + ind = int32_t(i) + else: + ind = i + idx.append(ind) + return builder.gep(ptr, idx, name=name, inbounds=inbounds) + + +def pointer_add(builder, ptr, offset, return_type=None): + """ + Add an integral *offset* to pointer *ptr*, and return a pointer + of *return_type* (or, if omitted, the same type as *ptr*). + + Note the computation is done in bytes, and ignores the width of + the pointed item type. + """ + intptr = builder.ptrtoint(ptr, intp_t) + if isinstance(offset, int): + offset = intp_t(offset) + intptr = builder.add(intptr, offset) + return builder.inttoptr(intptr, return_type or ptr.type) + + +def memset(builder, ptr, size, value): + """ + Fill *size* bytes starting from *ptr* with *value*. + """ + fn = builder.module.declare_intrinsic('llvm.memset', (voidptr_t, size.type)) + ptr = builder.bitcast(ptr, voidptr_t) + if isinstance(value, int): + value = int8_t(value) + builder.call(fn, [ptr, value, size, bool_t(0)]) + + +def memset_padding(builder, ptr): + """ + Fill padding bytes of the pointee with zeros. + """ + # Load existing value + val = builder.load(ptr) + # Fill pointee with zeros + memset(builder, ptr, sizeof(builder, ptr.type), 0) + # Store value back + builder.store(val, ptr) + + +def global_constant(builder_or_module, name, value, linkage='internal'): + """ + Get or create a (LLVM module-)global constant with *name* or *value*. + """ + if isinstance(builder_or_module, ir.Module): + module = builder_or_module + else: + module = builder_or_module.module + data = add_global_variable(module, value.type, name) + data.linkage = linkage + data.global_constant = True + data.initializer = value + return data + + +def divmod_by_constant(builder, val, divisor): + """ + Compute the (quotient, remainder) of *val* divided by the constant + positive *divisor*. The semantics reflects those of Python integer + floor division, rather than C's / LLVM's signed division and modulo. + The difference lies with a negative *val*. + """ + assert divisor > 0 + divisor = val.type(divisor) + one = val.type(1) + + quot = alloca_once(builder, val.type) + + with builder.if_else(is_neg_int(builder, val)) as (if_neg, if_pos): + with if_pos: + # quot = val / divisor + quot_val = builder.sdiv(val, divisor) + builder.store(quot_val, quot) + with if_neg: + # quot = -1 + (val + 1) / divisor + val_plus_one = builder.add(val, one) + quot_val = builder.sdiv(val_plus_one, divisor) + builder.store(builder.sub(quot_val, one), quot) + + # rem = val - quot * divisor + # (should be slightly faster than a separate modulo operation) + quot_val = builder.load(quot) + rem_val = builder.sub(val, builder.mul(quot_val, divisor)) + return quot_val, rem_val + + +def cbranch_or_continue(builder, cond, bbtrue): + """ + Branch conditionally or continue. + + Note: a new block is created and builder is moved to the end of the new + block. + """ + bbcont = builder.append_basic_block('.continue') + builder.cbranch(cond, bbtrue, bbcont) + builder.position_at_end(bbcont) + return bbcont + + +def memcpy(builder, dst, src, count): + """ + Emit a memcpy to the builder. + + Copies each element of dst to src. Unlike the C equivalent, each element + can be any LLVM type. + + Assumes + ------- + * dst.type == src.type + * count is positive + """ + # Note this does seem to be optimized as a raw memcpy() by LLVM + # whenever possible... + assert dst.type == src.type + with for_range(builder, count, intp=count.type) as loop: + out_ptr = builder.gep(dst, [loop.index]) + in_ptr = builder.gep(src, [loop.index]) + builder.store(builder.load(in_ptr), out_ptr) + + +def _raw_memcpy(builder, func_name, dst, src, count, itemsize, align): + size_t = count.type + if isinstance(itemsize, int): + itemsize = ir.Constant(size_t, itemsize) + + memcpy = builder.module.declare_intrinsic(func_name, + [voidptr_t, voidptr_t, size_t]) + is_volatile = false_bit + builder.call(memcpy, [builder.bitcast(dst, voidptr_t), + builder.bitcast(src, voidptr_t), + builder.mul(count, itemsize), + is_volatile]) + + +def raw_memcpy(builder, dst, src, count, itemsize, align=1): + """ + Emit a raw memcpy() call for `count` items of size `itemsize` + from `src` to `dest`. + """ + return _raw_memcpy(builder, 'llvm.memcpy', dst, src, count, itemsize, align) + + +def raw_memmove(builder, dst, src, count, itemsize, align=1): + """ + Emit a raw memmove() call for `count` items of size `itemsize` + from `src` to `dest`. + """ + return _raw_memcpy(builder, 'llvm.memmove', dst, src, count, + itemsize, align) + + +def muladd_with_overflow(builder, a, b, c): + """ + Compute (a * b + c) and return a (result, overflow bit) pair. + The operands must be signed integers. + """ + p = builder.smul_with_overflow(a, b) + prod = builder.extract_value(p, 0) + prod_ovf = builder.extract_value(p, 1) + s = builder.sadd_with_overflow(prod, c) + res = builder.extract_value(s, 0) + ovf = builder.or_(prod_ovf, builder.extract_value(s, 1)) + return res, ovf + + +def printf(builder, format, *args): + """ + Calls printf(). + Argument `format` is expected to be a Python string. + Values to be printed are listed in `args`. + + Note: There is no checking to ensure there is correct number of values + in `args` and there type matches the declaration in the format string. + """ + assert isinstance(format, str) + mod = builder.module + # Make global constant for format string + cstring = voidptr_t + fmt_bytes = make_bytearray((format + '\00').encode('ascii')) + global_fmt = global_constant(mod, "printf_format", fmt_bytes) + fnty = ir.FunctionType(int32_t, [cstring], var_arg=True) + # Insert printf() + try: + fn = mod.get_global('printf') + except KeyError: + fn = ir.Function(mod, fnty, name="printf") + # Call + ptr_fmt = builder.bitcast(global_fmt, cstring) + return builder.call(fn, [ptr_fmt] + list(args)) + + +def snprintf(builder, buffer, bufsz, format, *args): + """Calls libc snprintf(buffer, bufsz, format, ...args) + """ + assert isinstance(format, str) + mod = builder.module + # Make global constant for format string + cstring = voidptr_t + fmt_bytes = make_bytearray((format + '\00').encode('ascii')) + global_fmt = global_constant(mod, "snprintf_format", fmt_bytes) + fnty = ir.FunctionType( + int32_t, [cstring, intp_t, cstring], var_arg=True, + ) + # Actual symbol name of snprintf is different on win32. + symbol = 'snprintf' + if config.IS_WIN32: + symbol = '_' + symbol + # Insert snprintf() + try: + fn = mod.get_global(symbol) + except KeyError: + fn = ir.Function(mod, fnty, name=symbol) + # Call + ptr_fmt = builder.bitcast(global_fmt, cstring) + return builder.call(fn, [buffer, bufsz, ptr_fmt] + list(args)) + + +def snprintf_stackbuffer(builder, bufsz, format, *args): + """Similar to `snprintf()` but the buffer is stack allocated to size *bufsz*. + + Returns the buffer pointer as i8*. + """ + assert isinstance(bufsz, int) + spacety = ir.ArrayType(ir.IntType(8), bufsz) + space = alloca_once(builder, spacety, zfill=True) + buffer = builder.bitcast(space, voidptr_t) + snprintf(builder, buffer, intp_t(bufsz), format, *args) + return buffer + + +def normalize_ir_text(text): + """ + Normalize the given string to latin1 compatible encoding that is + suitable for use in LLVM IR. + """ + # Just re-encoding to latin1 is enough + return text.encode('utf8').decode('latin1') + + +def hexdump(builder, ptr, nbytes): + """Debug print the memory region in *ptr* to *ptr + nbytes* + as hex. + """ + bytes_per_line = 16 + nbytes = builder.zext(nbytes, intp_t) + printf(builder, "hexdump p=%p n=%zu", + ptr, nbytes) + byte_t = ir.IntType(8) + ptr = builder.bitcast(ptr, byte_t.as_pointer()) + # Loop to print the bytes in *ptr* as hex + with for_range(builder, nbytes) as idx: + div_by = builder.urem(idx.index, intp_t(bytes_per_line)) + do_new_line = builder.icmp_unsigned("==", div_by, intp_t(0)) + with builder.if_then(do_new_line): + printf(builder, "\n") + + offset = builder.gep(ptr, [idx.index]) + val = builder.load(offset) + printf(builder, " %02x", val) + printf(builder, "\n") + + +def is_nonelike(ty): + """ returns if 'ty' is none """ + return ( + ty is None or + isinstance(ty, types.NoneType) or + isinstance(ty, types.Omitted) + ) + + +def create_constant_array(ty, val): + """ + Create an LLVM-constant of a fixed-length array from Python values. + + The type provided is the type of the elements. + """ + return ir.Constant(ir.ArrayType(ty, len(val)), val) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/codegen.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/codegen.py new file mode 100644 index 000000000..e988fab12 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/codegen.py @@ -0,0 +1,1437 @@ +import warnings +import functools +import locale +import weakref +import ctypes +import html +import textwrap + +import llvmlite.binding as ll +import llvmlite.ir as llvmir + +from abc import abstractmethod, ABCMeta +from numba.core import utils, config, cgutils +from numba.core.llvm_bindings import create_pass_manager_builder +from numba.core.runtime.nrtopt import remove_redundant_nrt_refct +from numba.core.runtime import rtsys +from numba.core.compiler_lock import require_global_compiler_lock +from numba.core.errors import NumbaInvalidConfigWarning +from numba.misc.inspection import disassemble_elf_to_cfg +from numba.misc.llvm_pass_timings import PassTimingsCollection + + +_x86arch = frozenset(['x86', 'i386', 'i486', 'i586', 'i686', 'i786', + 'i886', 'i986']) + + +def _is_x86(triple): + arch = triple.split('-')[0] + return arch in _x86arch + + +def _parse_refprune_flags(): + """Parse refprune flags from the `config`. + + Invalid values are ignored an warn via a `NumbaInvalidConfigWarning` + category. + + Returns + ------- + flags : llvmlite.binding.RefPruneSubpasses + """ + flags = config.LLVM_REFPRUNE_FLAGS.split(',') + if not flags: + return 0 + val = 0 + for item in flags: + item = item.strip() + try: + val |= getattr(ll.RefPruneSubpasses, item.upper()) + except AttributeError: + warnings.warn(f"invalid refprune flags {item!r}", + NumbaInvalidConfigWarning) + return val + + +def dump(header, body, lang): + if config.HIGHLIGHT_DUMPS: + try: + import pygments + except ImportError: + msg = "Please install pygments to see highlighted dumps" + raise ValueError(msg) + else: + from pygments import highlight + from pygments.lexers import GasLexer as gas_lexer + from pygments.lexers import LlvmLexer as llvm_lexer + from pygments.formatters import Terminal256Formatter + from numba.misc.dump_style import by_colorscheme + + lexer_map = {'llvm': llvm_lexer, 'asm': gas_lexer} + lexer = lexer_map[lang] + def printer(arg): + print(highlight(arg, lexer(), + Terminal256Formatter(style=by_colorscheme()))) + else: + printer = print + print('=' * 80) + print(header.center(80, '-')) + printer(body) + print('=' * 80) + + +class _CFG(object): + """ + Wraps the CFG graph for different display method. + + Instance of the class can be stringified (``__repr__`` is defined) to get + the graph in DOT format. The ``.display()`` method plots the graph in + PDF. If in IPython notebook, the returned image can be inlined. + """ + def __init__(self, cres, name, py_func, **kwargs): + self.cres = cres + self.name = name + self.py_func = py_func + fn = cres.get_function(name) + self.dot = ll.get_function_cfg(fn) + self.kwargs = kwargs + + def pretty_printer(self, filename=None, view=None, render_format=None, + highlight=True, + interleave=False, strip_ir=False, show_key=True, + fontsize=10): + """ + "Pretty" prints the DOT graph of the CFG. + For explanation of the parameters see the docstring for + numba.core.dispatcher::inspect_cfg. + """ + import graphviz as gv + import re + import json + import inspect + from llvmlite import binding as ll + from numba.typed import List + from types import SimpleNamespace + from collections import defaultdict + + _default = False + _highlight = SimpleNamespace(incref=_default, + decref=_default, + returns=_default, + raises=_default, + meminfo=_default, + branches=_default, + llvm_intrin_calls=_default, + function_calls=_default,) + _interleave = SimpleNamespace(python=_default, lineinfo=_default) + + def parse_config(_config, kwarg): + """ Parses the kwarg into a consistent format for use in configuring + the Digraph rendering. _config is the configuration instance to + update, kwarg is the kwarg on which to base the updates. + """ + if isinstance(kwarg, bool): + for attr in _config.__dict__: + setattr(_config, attr, kwarg) + elif isinstance(kwarg, dict): + for k, v in kwarg.items(): + if k not in _config.__dict__: + raise ValueError("Unexpected key in kwarg: %s" % k) + if isinstance(v, bool): + setattr(_config, k, v) + else: + msg = "Unexpected value for key: %s, got:%s" + raise ValueError(msg % (k, v)) + elif isinstance(kwarg, set): + for item in kwarg: + if item not in _config.__dict__: + raise ValueError("Unexpected key in kwarg: %s" % item) + else: + setattr(_config, item, True) + else: + msg = "Unhandled configuration type for kwarg %s" + raise ValueError(msg % type(kwarg)) + + parse_config(_highlight, highlight) + parse_config(_interleave, interleave) + + # This is the colour scheme. The graphviz HTML label renderer only takes + # names for colours: https://www.graphviz.org/doc/info/shapes.html#html + cs = defaultdict(lambda: 'white') # default bg colour is white + cs['marker'] = 'orange' + cs['python'] = 'yellow' + cs['truebr'] = 'green' + cs['falsebr'] = 'red' + cs['incref'] = 'cyan' + cs['decref'] = 'turquoise' + cs['raise'] = 'lightpink' + cs['meminfo'] = 'lightseagreen' + cs['return'] = 'purple' + cs['llvm_intrin_calls'] = 'rosybrown' + cs['function_calls'] = 'tomato' + + # Get the raw dot format information from LLVM and the LLVM IR + fn = self.cres.get_function(self.name) + #raw_dot = ll.get_function_cfg(fn).replace('\\l...', '') + llvm_str = self.cres.get_llvm_str() + + def get_metadata(llvm_str): + """ Gets the metadata entries from the LLVM IR, these look something + like '!123 = INFORMATION'. Returns a map of metadata key to metadata + value, i.e. from the example {'!123': INFORMATION}""" + md = {} + metadata_entry = re.compile(r'(^[!][0-9]+)(\s+=\s+.*)') + for x in llvm_str.splitlines(): + match = metadata_entry.match(x) + if match is not None: + g = match.groups() + if g is not None: + assert len(g) == 2 + md[g[0]] = g[1] + return md + + md = get_metadata(llvm_str) + + # setup digraph with initial properties + def init_digraph(name, fname, fontsize): + # name and fname are arbitrary graph and file names, they appear in + # some rendering formats, the fontsize determines the output + # fontsize. + + # truncate massive mangled names as file names as it causes OSError + # when trying to render to pdf + cmax = 200 + if len(fname) > cmax: + wstr = (f'CFG output filename "{fname}" exceeds maximum ' + f'supported length, it will be truncated.') + warnings.warn(wstr, NumbaInvalidConfigWarning) + fname = fname[:cmax] + f = gv.Digraph(name, filename=fname) + f.attr(rankdir='TB') + f.attr('node', shape='none', fontsize='%s' % str(fontsize)) + return f + + f = init_digraph(self.name, self.name, fontsize) + + # A lot of regex is needed to parse the raw dot output. This output + # contains a mix of LLVM IR in the labels, and also DOT markup. + + # DOT syntax, matches a "port" (where the tail of an edge starts) + port_match = re.compile('.*{(.*)}.*') + # DOT syntax, matches the "port" value from a found "port_match" + port_jmp_match = re.compile('.*<(.*)>(.*)') + # LLVM syntax, matches a LLVM debug marker + metadata_marker = re.compile(r'.*!dbg\s+(![0-9]+).*') + # LLVM syntax, matches a location entry + location_expr = (r'.*!DILocation\(line:\s+([0-9]+),' + r'\s+column:\s+([0-9]),.*') + location_entry = re.compile(location_expr) + # LLVM syntax, matches LLVMs internal debug value calls + dbg_value = re.compile(r'.*call void @llvm.dbg.value.*') + # LLVM syntax, matches tokens for highlighting + nrt_incref = re.compile(r"@NRT_incref\b") + nrt_decref = re.compile(r"@NRT_decref\b") + nrt_meminfo = re.compile("@NRT_MemInfo") + ll_intrin_calls = re.compile(r".*call.*@llvm\..*") + ll_function_call = re.compile(r".*call.*@.*") + ll_raise = re.compile(r"ret i32.*\!ret_is_raise.*") + ll_return = re.compile("ret i32 [^1],?.*") + + # wrapper function for line wrapping LLVM lines + def wrap(s): + return textwrap.wrap(s, width=120, subsequent_indent='... ') + + # function to fix (sometimes escaped for DOT!) LLVM IR etc that needs to + # be HTML escaped + def clean(s): + # Grab first 300 chars only, 1. this should be enough to identify + # the token and it keeps names short. 2. graphviz/dot has a maximum + # buffer size near 585?!, with additional transforms it's hard to + # know if this would be exceeded. 3. hash of the token string is + # written into the rendering to permit exact identification against + # e.g. LLVM IR dump if necessary. + n = 300 + if len(s) > n: + hs = str(hash(s)) + s = '{}...'.format(s[:n], hs) + s = html.escape(s) # deals with &, < and > + s = s.replace('\\{', "{") + s = s.replace('\\}', "}") + s = s.replace('\\', "\") + s = s.replace('%', "%") + s = s.replace('!', "!") + return s + + # These hold the node and edge ids from the raw dot information. They + # are used later to wire up a new DiGraph that has the same structure + # as the raw dot but with new nodes. + node_ids = {} + edge_ids = {} + + # Python source lines, used if python source interleave is requested + if _interleave.python: + src_code, firstlineno = inspect.getsourcelines(self.py_func) + + # This is the dot info from LLVM, it's in DOT form and has continuation + # lines, strip them and then re-parse into `dot_json` form for use in + # producing a formatted output. + raw_dot = ll.get_function_cfg(fn).replace('\\l...', '') + json_bytes = gv.Source(raw_dot).pipe(format='dot_json') + jzon = json.loads(json_bytes.decode('utf-8')) + + idc = 0 + # Walk the "objects" (nodes) in the DOT output + for obj in jzon['objects']: + # These are used to keep tabs on the current line and column numbers + # as per the markers. They are tracked so as to make sure a marker + # is only emitted if there's a change in the marker. + cur_line, cur_col = -1, -1 + label = obj['label'] + name = obj['name'] + gvid = obj['_gvid'] + node_ids[gvid] = name + # Label is DOT format, it needs the head and tail removing and then + # splitting for walking. + label = label[1:-1] + lines = label.split('\\l') + + # Holds the new lines + new_lines = [] + + # Aim is to produce an HTML table a bit like this: + # + # |------------| + # | HEADER | <-- this is the block header + # |------------| + # | LLVM SRC | <-- + # | Marker? | < this is the label/block body + # | Python src?| <-- + # |------------| + # | T | F | <-- this is the "ports", also determines col_span + # -------------- + # + + # This is HTML syntax, its the column span. If there's a switch or a + # branch at the bottom of the node this is rendered as multiple + # columns in a table. First job is to go and render that and work + # out how many columns are needed as that dictates how many columns + # the rest of the source lines must span. In DOT syntax the places + # that edges join nodes are referred to as "ports". Syntax in DOT + # is like `node:port`. + col_span = 1 + + # First see if there is a port entry for this node + port_line = '' + matched = port_match.match(lines[-1]) + sliced_lines = lines + if matched is not None: + # There is a port + ports = matched.groups()[0] + ports_tokens = ports.split('|') + col_span = len(ports_tokens) + # Generate HTML table data cells, one for each port. If the + # ports correspond to a branch then they can optionally + # highlighted based on T/F. + tdfmt = ('{}') + tbl_data = [] + if _highlight.branches: + colors = {'T': cs['truebr'], 'F': cs['falsebr']} + else: + colors = {} + for tok in ports_tokens: + target, value = port_jmp_match.match(tok).groups() + color = colors.get(value, 'white') + tbl_data.append(tdfmt.format(color, target, value)) + port_line = ''.join(tbl_data) + # Drop the last line from the rest of the parse as it's the port + # and just been dealt with. + sliced_lines = lines[:-1] + + # loop peel the block header, it needs a HTML border + fmtheader = ('{}') + new_lines.append(fmtheader.format(cs['default'], col_span, + clean(sliced_lines[0].strip()))) + + # process rest of block creating the table row at a time. + fmt = ('{}') + + def metadata_interleave(l, new_lines): + """ + Search line `l` for metadata associated with python or line info + and inject it into `new_lines` if requested. + """ + matched = metadata_marker.match(l) + if matched is not None: + # there's a metadata marker + g = matched.groups() + if g is not None: + assert len(g) == 1, g + marker = g[0] + debug_data = md.get(marker, None) + if debug_data is not None: + # and the metadata marker has a corresponding piece + # of metadata + ld = location_entry.match(debug_data) + if ld is not None: + # and the metadata is line info... proceed + assert len(ld.groups()) == 2, ld + line, col = ld.groups() + # only emit a new marker if the line number in + # the metadata is "new". + if line != cur_line or col != cur_col: + if _interleave.lineinfo: + mfmt = 'Marker %s, Line %s, column %s' + mark_line = mfmt % (marker, line, col) + ln = fmt.format(cs['marker'], col_span, + clean(mark_line)) + new_lines.append(ln) + if _interleave.python: + # TODO: + # +1 for decorator, this probably needs + # the same thing doing as for the + # error messages where the decorator + # is scanned for, its not always +1! + lidx = int(line) - (firstlineno + 1) + source_line = src_code[lidx + 1] + ln = fmt.format(cs['python'], col_span, + clean(source_line)) + new_lines.append(ln) + return line, col + + for l in sliced_lines[1:]: + + # Drop LLVM debug call entries + if dbg_value.match(l): + continue + + # if requested generate interleaving of markers or python from + # metadata + if _interleave.lineinfo or _interleave.python: + updated_lineinfo = metadata_interleave(l, new_lines) + if updated_lineinfo is not None: + cur_line, cur_col = updated_lineinfo + + # Highlight other LLVM features if requested, HTML BGCOLOR + # property is set by this. + if _highlight.incref and nrt_incref.search(l): + colour = cs['incref'] + elif _highlight.decref and nrt_decref.search(l): + colour = cs['decref'] + elif _highlight.meminfo and nrt_meminfo.search(l): + colour = cs['meminfo'] + elif _highlight.raises and ll_raise.search(l): + # search for raise as its more specific than exit + colour = cs['raise'] + elif _highlight.returns and ll_return.search(l): + colour = cs['return'] + elif _highlight.llvm_intrin_calls and ll_intrin_calls.search(l): + colour = cs['llvm_intrin_calls'] + elif _highlight.function_calls and ll_function_call.search(l): + colour = cs['function_calls'] + else: + colour = cs['default'] + + # Use the default coloring as a flag to force printing if a + # special token print was requested AND LLVM ir stripping is + # required + if colour is not cs['default'] or not strip_ir: + for x in wrap(clean(l)): + new_lines.append(fmt.format(colour, col_span, x)) + + # add in the port line at the end of the block if it was present + # (this was built right at the top of the parse) + if port_line: + new_lines.append('{}'.format(port_line)) + + # If there was data, create a table, else don't! + dat = ''.join(new_lines) + if dat: + tab = (('%s
') % (idc, + dat)) + label = '<{}>'.format(tab) + else: + label = '' + + # finally, add a replacement node for the original with a new marked + # up label. + f.node(name, label=label) + + # Parse the edge data + if 'edges' in jzon: # might be a single block, no edges + for edge in jzon['edges']: + gvid = edge['_gvid'] + tp = edge.get('tailport', None) + edge_ids[gvid] = (edge['head'], edge['tail'], tp) + + # Write in the edge wiring with respect to the new nodes:ports. + for gvid, edge in edge_ids.items(): + tail = node_ids[edge[1]] + head = node_ids[edge[0]] + port = edge[2] + if port is not None: + tail += ':%s' % port + f.edge(tail, head) + + # Add a key to the graph if requested. + if show_key: + key_tab = [] + for k, v in cs.items(): + key_tab.append(('{}').format(v, k)) + # The first < and last > are DOT syntax, rest is DOT HTML. + f.node("Key", label=('<{}
' + 'Key:
>').format(''.join(key_tab))) + + # Render if required + if filename is not None or view is not None: + f.render(filename=filename, view=view, format=render_format) + + # Else pipe out a SVG + return f.pipe(format='svg') + + def display(self, filename=None, format='pdf', view=False): + """ + Plot the CFG. In IPython notebook, the return image object can be + inlined. + + The *filename* option can be set to a specific path for the rendered + output to write to. If *view* option is True, the plot is opened by + the system default application for the image format (PDF). *format* can + be any valid format string accepted by graphviz, default is 'pdf'. + """ + rawbyt = self.pretty_printer(filename=filename, view=view, + render_format=format, **self.kwargs) + return rawbyt.decode('utf-8') + + def _repr_svg_(self): + return self.pretty_printer(**self.kwargs).decode('utf-8') + + def __repr__(self): + return self.dot + + +class CodeLibrary(metaclass=ABCMeta): + """ + An interface for bundling LLVM code together and compiling it. + It is tied to a *codegen* instance (e.g. JITCPUCodegen) that will + determine how the LLVM code is transformed and linked together. + """ + + _finalized = False + _object_caching_enabled = False + _disable_inspection = False + + def __init__(self, codegen: "CPUCodegen", name: str): + self._codegen = codegen + self._name = name + ptc_name = f"{self.__class__.__name__}({self._name!r})" + self._recorded_timings = PassTimingsCollection(ptc_name) + # Track names of the dynamic globals + self._dynamic_globals = [] + + @property + def has_dynamic_globals(self): + self._ensure_finalized() + return len(self._dynamic_globals) > 0 + + @property + def recorded_timings(self): + return self._recorded_timings + + @property + def codegen(self): + """ + The codegen object owning this library. + """ + return self._codegen + + @property + def name(self): + return self._name + + def __repr__(self): + return "" % (self.name, id(self)) + + def _raise_if_finalized(self): + if self._finalized: + raise RuntimeError("operation impossible on finalized object %r" + % (self,)) + + def _ensure_finalized(self): + if not self._finalized: + self.finalize() + + def create_ir_module(self, name): + """ + Create an LLVM IR module for use by this library. + """ + self._raise_if_finalized() + ir_module = self._codegen._create_empty_module(name) + return ir_module + + @abstractmethod + def add_linking_library(self, library): + """ + Add a library for linking into this library, without losing + the original library. + """ + + @abstractmethod + def add_ir_module(self, ir_module): + """ + Add an LLVM IR module's contents to this library. + """ + + @abstractmethod + def finalize(self): + """ + Finalize the library. After this call, nothing can be added anymore. + Finalization involves various stages of code optimization and + linking. + """ + + @abstractmethod + def get_function(self, name): + """ + Return the function named ``name``. + """ + + @abstractmethod + def get_llvm_str(self): + """ + Get the human-readable form of the LLVM module. + """ + + @abstractmethod + def get_asm_str(self): + """ + Get the human-readable assembly. + """ + + # + # Object cache hooks and serialization + # + + def enable_object_caching(self): + self._object_caching_enabled = True + self._compiled_object = None + self._compiled = False + + def _get_compiled_object(self): + if not self._object_caching_enabled: + raise ValueError("object caching not enabled in %s" % (self,)) + if self._compiled_object is None: + raise RuntimeError("no compiled object yet for %s" % (self,)) + return self._compiled_object + + def _set_compiled_object(self, value): + if not self._object_caching_enabled: + raise ValueError("object caching not enabled in %s" % (self,)) + if self._compiled: + raise ValueError("library already compiled: %s" % (self,)) + self._compiled_object = value + self._disable_inspection = True + + +class CPUCodeLibrary(CodeLibrary): + + def __init__(self, codegen, name): + super().__init__(codegen, name) + self._linking_libraries = [] # maintain insertion order + self._final_module = ll.parse_assembly( + str(self._codegen._create_empty_module(self.name))) + self._final_module.name = cgutils.normalize_ir_text(self.name) + self._shared_module = None + + def _optimize_functions(self, ll_module): + """ + Internal: run function-level optimizations inside *ll_module*. + """ + # Enforce data layout to enable layout-specific optimizations + ll_module.data_layout = self._codegen._data_layout + with self._codegen._function_pass_manager(ll_module) as fpm: + # Run function-level optimizations to reduce memory usage and improve + # module-level optimization. + for func in ll_module.functions: + k = f"Function passes on {func.name!r}" + with self._recorded_timings.record(k): + fpm.initialize() + fpm.run(func) + fpm.finalize() + + def _optimize_final_module(self): + """ + Internal: optimize this library's final module. + """ + cheap_name = "Module passes (cheap optimization for refprune)" + with self._recorded_timings.record(cheap_name): + # A cheaper optimisation pass is run first to try and get as many + # refops into the same function as possible via inlining + self._codegen._mpm_cheap.run(self._final_module) + # Refop pruning is then run on the heavily inlined function + if not config.LLVM_REFPRUNE_PASS: + self._final_module = remove_redundant_nrt_refct(self._final_module) + full_name = "Module passes (full optimization)" + with self._recorded_timings.record(full_name): + # The full optimisation suite is then run on the refop pruned IR + self._codegen._mpm_full.run(self._final_module) + + def _get_module_for_linking(self): + """ + Internal: get a LLVM module suitable for linking multiple times + into another library. Exported functions are made "linkonce_odr" + to allow for multiple definitions, inlining, and removal of + unused exports. + + See discussion in https://github.com/numba/numba/pull/890 + """ + self._ensure_finalized() + if self._shared_module is not None: + return self._shared_module + mod = self._final_module + to_fix = [] + nfuncs = 0 + for fn in mod.functions: + nfuncs += 1 + if not fn.is_declaration and fn.linkage == ll.Linkage.external: + to_fix.append(fn.name) + if nfuncs == 0: + # This is an issue which can occur if loading a module + # from an object file and trying to link with it, so detect it + # here to make debugging easier. + raise RuntimeError("library unfit for linking: " + "no available functions in %s" + % (self,)) + if to_fix: + mod = mod.clone() + for name in to_fix: + # NOTE: this will mark the symbol WEAK if serialized + # to an ELF file + mod.get_function(name).linkage = 'linkonce_odr' + self._shared_module = mod + return mod + + def add_linking_library(self, library): + library._ensure_finalized() + self._linking_libraries.append(library) + + def add_ir_module(self, ir_module): + self._raise_if_finalized() + assert isinstance(ir_module, llvmir.Module) + ir = cgutils.normalize_ir_text(str(ir_module)) + ll_module = ll.parse_assembly(ir) + ll_module.name = ir_module.name + ll_module.verify() + self.add_llvm_module(ll_module) + + def add_llvm_module(self, ll_module): + self._optimize_functions(ll_module) + # TODO: we shouldn't need to recreate the LLVM module object + if not config.LLVM_REFPRUNE_PASS: + ll_module = remove_redundant_nrt_refct(ll_module) + self._final_module.link_in(ll_module) + + def finalize(self): + require_global_compiler_lock() + + # Report any LLVM-related problems to the user + self._codegen._check_llvm_bugs() + + self._raise_if_finalized() + + if config.DUMP_FUNC_OPT: + dump("FUNCTION OPTIMIZED DUMP %s" % self.name, + self.get_llvm_str(), 'llvm') + + # Link libraries for shared code + seen = set() + for library in self._linking_libraries: + if library not in seen: + seen.add(library) + self._final_module.link_in( + library._get_module_for_linking(), preserve=True, + ) + + # Optimize the module after all dependences are linked in above, + # to allow for inlining. + self._optimize_final_module() + + self._final_module.verify() + self._finalize_final_module() + + def _finalize_dynamic_globals(self): + # Scan for dynamic globals + for gv in self._final_module.global_variables: + if gv.name.startswith('numba.dynamic.globals'): + self._dynamic_globals.append(gv.name) + + def _verify_declare_only_symbols(self): + # Verify that no declare-only function compiled by numba. + for fn in self._final_module.functions: + # We will only check for symbol name starting with '_ZN5numba' + if fn.is_declaration and fn.name.startswith('_ZN5numba'): + msg = 'Symbol {} not linked properly' + raise AssertionError(msg.format(fn.name)) + + def _finalize_final_module(self): + """ + Make the underlying LLVM module ready to use. + """ + self._finalize_dynamic_globals() + self._verify_declare_only_symbols() + + # Remember this on the module, for the object cache hooks + self._final_module.__library = weakref.proxy(self) + + # It seems add_module() must be done only here and not before + # linking in other modules, otherwise get_pointer_to_function() + # could fail. + cleanup = self._codegen._add_module(self._final_module) + if cleanup: + weakref.finalize(self, cleanup) + self._finalize_specific() + + self._finalized = True + + if config.DUMP_OPTIMIZED: + dump("OPTIMIZED DUMP %s" % self.name, self.get_llvm_str(), 'llvm') + + if config.DUMP_ASSEMBLY: + dump("ASSEMBLY %s" % self.name, self.get_asm_str(), 'asm') + + def get_defined_functions(self): + """ + Get all functions defined in the library. The library must have + been finalized. + """ + mod = self._final_module + for fn in mod.functions: + if not fn.is_declaration: + yield fn + + def get_function(self, name): + return self._final_module.get_function(name) + + def _sentry_cache_disable_inspection(self): + if self._disable_inspection: + warnings.warn('Inspection disabled for cached code. ' + 'Invalid result is returned.') + + def get_llvm_str(self): + self._sentry_cache_disable_inspection() + return str(self._final_module) + + def get_asm_str(self): + self._sentry_cache_disable_inspection() + return str(self._codegen._tm.emit_assembly(self._final_module)) + + def get_function_cfg(self, name, py_func=None, **kwargs): + """ + Get control-flow graph of the LLVM function + """ + self._sentry_cache_disable_inspection() + return _CFG(self, name, py_func, **kwargs) + + def get_disasm_cfg(self, mangled_name): + """ + Get the CFG of the disassembly of the ELF object at symbol mangled_name. + + Requires python package: r2pipe + Requires radare2 binary on $PATH. + Notebook rendering requires python package: graphviz + Optionally requires a compiler toolchain (via pycc) to link the ELF to + get better disassembly results. + """ + elf = self._get_compiled_object() + return disassemble_elf_to_cfg(elf, mangled_name) + + @classmethod + def _dump_elf(cls, buf): + """ + Dump the symbol table of an ELF file. + Needs pyelftools (https://github.com/eliben/pyelftools) + """ + from elftools.elf.elffile import ELFFile + from elftools.elf import descriptions + from io import BytesIO + f = ELFFile(BytesIO(buf)) + print("ELF file:") + for sec in f.iter_sections(): + if sec['sh_type'] == 'SHT_SYMTAB': + symbols = sorted(sec.iter_symbols(), key=lambda sym: sym.name) + print(" symbols:") + for sym in symbols: + if not sym.name: + continue + print(" - %r: size=%d, value=0x%x, type=%s, bind=%s" + % (sym.name.decode(), + sym['st_size'], + sym['st_value'], + descriptions.describe_symbol_type(sym['st_info']['type']), + descriptions.describe_symbol_bind(sym['st_info']['bind']), + )) + print() + + @classmethod + def _object_compiled_hook(cls, ll_module, buf): + """ + `ll_module` was compiled into object code `buf`. + """ + try: + self = ll_module.__library + except AttributeError: + return + if self._object_caching_enabled: + self._compiled = True + self._compiled_object = buf + + @classmethod + def _object_getbuffer_hook(cls, ll_module): + """ + Return a cached object code for `ll_module`. + """ + try: + self = ll_module.__library + except AttributeError: + return + if self._object_caching_enabled and self._compiled_object: + buf = self._compiled_object + self._compiled_object = None + return buf + + def serialize_using_bitcode(self): + """ + Serialize this library using its bitcode as the cached representation. + """ + self._ensure_finalized() + return (self.name, 'bitcode', self._final_module.as_bitcode()) + + def serialize_using_object_code(self): + """ + Serialize this library using its object code as the cached + representation. We also include its bitcode for further inlining + with other libraries. + """ + self._ensure_finalized() + data = (self._get_compiled_object(), + self._get_module_for_linking().as_bitcode()) + return (self.name, 'object', data) + + @classmethod + def _unserialize(cls, codegen, state): + name, kind, data = state + self = codegen.create_library(name) + assert isinstance(self, cls) + if kind == 'bitcode': + # No need to re-run optimizations, just make the module ready + self._final_module = ll.parse_bitcode(data) + self._finalize_final_module() + return self + elif kind == 'object': + object_code, shared_bitcode = data + self.enable_object_caching() + self._set_compiled_object(object_code) + self._shared_module = ll.parse_bitcode(shared_bitcode) + self._finalize_final_module() + # Load symbols from cache + self._codegen._engine._load_defined_symbols(self._shared_module) + return self + else: + raise ValueError("unsupported serialization kind %r" % (kind,)) + + +class AOTCodeLibrary(CPUCodeLibrary): + + def emit_native_object(self): + """ + Return this library as a native object (a bytestring) -- for example + ELF under Linux. + + This function implicitly calls .finalize(). + """ + self._ensure_finalized() + return self._codegen._tm.emit_object(self._final_module) + + def emit_bitcode(self): + """ + Return this library as LLVM bitcode (a bytestring). + + This function implicitly calls .finalize(). + """ + self._ensure_finalized() + return self._final_module.as_bitcode() + + def _finalize_specific(self): + pass + + +class JITCodeLibrary(CPUCodeLibrary): + + def get_pointer_to_function(self, name): + """ + Generate native code for function named *name* and return a pointer + to the start of the function (as an integer). + + This function implicitly calls .finalize(). + + Returns + ------- + pointer : int + - zero (null) if no symbol of *name* is defined by this code + library. + - non-zero if the symbol is defined. + """ + self._ensure_finalized() + ee = self._codegen._engine + if not ee.is_symbol_defined(name): + return 0 + else: + return self._codegen._engine.get_function_address(name) + + def _finalize_specific(self): + self._codegen._scan_and_fix_unresolved_refs(self._final_module) + with self._recorded_timings.record("Finalize object"): + self._codegen._engine.finalize_object() + + +class RuntimeLinker(object): + """ + For tracking unresolved symbols generated at runtime due to recursion. + """ + PREFIX = '.numba.unresolved$' + + def __init__(self): + self._unresolved = utils.UniqueDict() + self._defined = set() + self._resolved = [] + + def scan_unresolved_symbols(self, module, engine): + """ + Scan and track all unresolved external symbols in the module and + allocate memory for it. + """ + prefix = self.PREFIX + + for gv in module.global_variables: + if gv.name.startswith(prefix): + sym = gv.name[len(prefix):] + # Avoid remapping to existing GV + if engine.is_symbol_defined(gv.name): + continue + # Allocate a memory space for the pointer + abortfn = rtsys.library.get_pointer_to_function("nrt_unresolved_abort") + ptr = ctypes.c_void_p(abortfn) + engine.add_global_mapping(gv, ctypes.addressof(ptr)) + self._unresolved[sym] = ptr + + def scan_defined_symbols(self, module): + """ + Scan and track all defined symbols. + """ + for fn in module.functions: + if not fn.is_declaration: + self._defined.add(fn.name) + + def resolve(self, engine): + """ + Fix unresolved symbols if they are defined. + """ + # An iterator to get all unresolved but available symbols + pending = [name for name in self._unresolved if name in self._defined] + # Resolve pending symbols + for name in pending: + # Get runtime address + fnptr = engine.get_function_address(name) + # Fix all usage + ptr = self._unresolved[name] + ptr.value = fnptr + self._resolved.append((name, ptr)) # keep ptr alive + # Delete resolved + del self._unresolved[name] + +def _proxy(old): + @functools.wraps(old) + def wrapper(self, *args, **kwargs): + return old(self._ee, *args, **kwargs) + return wrapper + + +class JitEngine(object): + """Wraps an ExecutionEngine to provide custom symbol tracking. + Since the symbol tracking is incomplete (doesn't consider + loaded code object), we are not putting it in llvmlite. + """ + def __init__(self, ee): + self._ee = ee + # Track symbol defined via codegen'd Module + # but not any cached object. + # NOTE: `llvm::ExecutionEngine` will catch duplicated symbols and + # we are not going to protect against that. A proper duplicated + # symbol detection will need a more logic to check for the linkage + # (e.g. like `weak` linkage symbol can override). This + # `_defined_symbols` set will be just enough to tell if a symbol + # exists and will not cause the `EE` symbol lookup to `exit(1)` + # when symbol-not-found. + self._defined_symbols = set() + + def is_symbol_defined(self, name): + """Is the symbol defined in this session? + """ + return name in self._defined_symbols + + def _load_defined_symbols(self, mod): + """Extract symbols from the module + """ + for gsets in (mod.functions, mod.global_variables): + self._defined_symbols |= {gv.name for gv in gsets + if not gv.is_declaration} + + def add_module(self, module): + """Override ExecutionEngine.add_module + to keep info about defined symbols. + """ + self._load_defined_symbols(module) + return self._ee.add_module(module) + + def add_global_mapping(self, gv, addr): + """Override ExecutionEngine.add_global_mapping + to keep info about defined symbols. + """ + self._defined_symbols.add(gv.name) + return self._ee.add_global_mapping(gv, addr) + + # + # The remaining methods are re-export of the ExecutionEngine APIs + # + set_object_cache = _proxy(ll.ExecutionEngine.set_object_cache) + finalize_object = _proxy(ll.ExecutionEngine.finalize_object) + get_function_address = _proxy(ll.ExecutionEngine.get_function_address) + get_global_value_address = _proxy( + ll.ExecutionEngine.get_global_value_address + ) + + +class Codegen(metaclass=ABCMeta): + """ + Base Codegen class. It is expected that subclasses set the class attribute + ``_library_class``, indicating the CodeLibrary class for the target. + + Subclasses should also initialize: + + ``self._data_layout``: the data layout for the target. + ``self._target_data``: the binding layer ``TargetData`` for the target. + """ + + @abstractmethod + def _create_empty_module(self, name): + """ + Create a new empty module suitable for the target. + """ + + @abstractmethod + def _add_module(self, module): + """ + Add a module to the execution engine. Ownership of the module is + transferred to the engine. + """ + + @property + def target_data(self): + """ + The LLVM "target data" object for this codegen instance. + """ + return self._target_data + + def create_library(self, name, **kwargs): + """ + Create a :class:`CodeLibrary` object for use with this codegen + instance. + """ + return self._library_class(self, name, **kwargs) + + def unserialize_library(self, serialized): + return self._library_class._unserialize(self, serialized) + + +class CPUCodegen(Codegen): + + def __init__(self, module_name): + initialize_llvm() + + self._data_layout = None + self._llvm_module = ll.parse_assembly( + str(self._create_empty_module(module_name))) + self._llvm_module.name = "global_codegen_module" + self._rtlinker = RuntimeLinker() + self._init(self._llvm_module) + + def _init(self, llvm_module): + assert list(llvm_module.global_variables) == [], "Module isn't empty" + + target = ll.Target.from_triple(ll.get_process_triple()) + tm_options = dict(opt=config.OPT) + self._tm_features = self._customize_tm_features() + self._customize_tm_options(tm_options) + tm = target.create_target_machine(**tm_options) + engine = ll.create_mcjit_compiler(llvm_module, tm) + + if config.ENABLE_PROFILING: + engine.enable_jit_events() + + self._tm = tm + self._engine = JitEngine(engine) + self._target_data = engine.target_data + self._data_layout = str(self._target_data) + self._mpm_cheap = self._module_pass_manager(loop_vectorize=False, + slp_vectorize=False, + opt=0, + cost="cheap") + self._mpm_full = self._module_pass_manager() + + self._engine.set_object_cache(self._library_class._object_compiled_hook, + self._library_class._object_getbuffer_hook) + + def _create_empty_module(self, name): + ir_module = llvmir.Module(cgutils.normalize_ir_text(name)) + ir_module.triple = ll.get_process_triple() + if self._data_layout: + ir_module.data_layout = self._data_layout + return ir_module + + def _module_pass_manager(self, **kwargs): + pm = ll.create_module_pass_manager() + self._tm.add_analysis_passes(pm) + cost = kwargs.pop("cost", None) + with self._pass_manager_builder(**kwargs) as pmb: + pmb.populate(pm) + # If config.OPT==0 do not include these extra passes to help with + # vectorization. + if cost is not None and cost == "cheap" and config.OPT != 0: + # This knocks loops into rotated form early to reduce the likelihood + # of vectorization failing due to unknown PHI nodes. + pm.add_loop_rotate_pass() + # LLVM 11 added LFTR to the IV Simplification pass, this interacted + # badly with the existing use of the InstructionCombiner here and + # ended up with PHI nodes that prevented vectorization from + # working. The desired vectorization effects can be achieved + # with this in LLVM 11 (and also < 11) but at a potentially + # slightly higher cost: + pm.add_licm_pass() + pm.add_cfg_simplification_pass() + if config.LLVM_REFPRUNE_PASS: + pm.add_refprune_pass(_parse_refprune_flags()) + return pm + + def _function_pass_manager(self, llvm_module, **kwargs): + pm = ll.create_function_pass_manager(llvm_module) + self._tm.add_analysis_passes(pm) + with self._pass_manager_builder(**kwargs) as pmb: + pmb.populate(pm) + if config.LLVM_REFPRUNE_PASS: + pm.add_refprune_pass(_parse_refprune_flags()) + return pm + + def _pass_manager_builder(self, **kwargs): + """ + Create a PassManagerBuilder. + + Note: a PassManagerBuilder seems good only for one use, so you + should call this method each time you want to populate a module + or function pass manager. Otherwise some optimizations will be + missed... + """ + opt_level = kwargs.pop('opt', config.OPT) + loop_vectorize = kwargs.pop('loop_vectorize', config.LOOP_VECTORIZE) + slp_vectorize = kwargs.pop('slp_vectorize', config.SLP_VECTORIZE) + + pmb = create_pass_manager_builder(opt=opt_level, + loop_vectorize=loop_vectorize, + slp_vectorize=slp_vectorize, + **kwargs) + + return pmb + + def _check_llvm_bugs(self): + """ + Guard against some well-known LLVM bug(s). + """ + # Check the locale bug at https://github.com/numba/numba/issues/1569 + # Note we can't cache the result as locale settings can change + # across a process's lifetime. Also, for this same reason, + # the check here is a mere heuristic (there may be a race condition + # between now and actually compiling IR). + ir = """ + define double @func() + { + ret double 1.23e+01 + } + """ + mod = ll.parse_assembly(ir) + ir_out = str(mod) + if "12.3" in ir_out or "1.23" in ir_out: + # Everything ok + return + if "1.0" in ir_out: + loc = locale.getlocale() + raise RuntimeError( + "LLVM will produce incorrect floating-point code " + "in the current locale %s.\nPlease read " + "https://numba.readthedocs.io/en/stable/user/faq.html#llvm-locale-bug " + "for more information." + % (loc,)) + raise AssertionError("Unexpected IR:\n%s\n" % (ir_out,)) + + def magic_tuple(self): + """ + Return a tuple unambiguously describing the codegen behaviour. + """ + return (self._llvm_module.triple, self._get_host_cpu_name(), + self._tm_features) + + def _scan_and_fix_unresolved_refs(self, module): + self._rtlinker.scan_unresolved_symbols(module, self._engine) + self._rtlinker.scan_defined_symbols(module) + self._rtlinker.resolve(self._engine) + + def insert_unresolved_ref(self, builder, fnty, name): + voidptr = llvmir.IntType(8).as_pointer() + ptrname = self._rtlinker.PREFIX + name + llvm_mod = builder.module + try: + fnptr = llvm_mod.get_global(ptrname) + except KeyError: + # Not defined? + fnptr = llvmir.GlobalVariable(llvm_mod, voidptr, name=ptrname) + fnptr.linkage = 'external' + return builder.bitcast(builder.load(fnptr), fnty.as_pointer()) + + def _get_host_cpu_name(self): + return (ll.get_host_cpu_name() + if config.CPU_NAME is None + else config.CPU_NAME) + + def _get_host_cpu_features(self): + if config.CPU_FEATURES is not None: + return config.CPU_FEATURES + return get_host_cpu_features() + + +class AOTCPUCodegen(CPUCodegen): + """ + A codegen implementation suitable for Ahead-Of-Time compilation + (e.g. generation of object files). + """ + + _library_class = AOTCodeLibrary + + def __init__(self, module_name, cpu_name=None): + # By default, use generic cpu model for the arch + self._cpu_name = cpu_name or '' + CPUCodegen.__init__(self, module_name) + + def _customize_tm_options(self, options): + cpu_name = self._cpu_name + if cpu_name == 'host': + cpu_name = self._get_host_cpu_name() + options['cpu'] = cpu_name + options['reloc'] = 'pic' + options['codemodel'] = 'default' + options['features'] = self._tm_features + + def _customize_tm_features(self): + # ISA features are selected according to the requested CPU model + # in _customize_tm_options() + return '' + + def _add_module(self, module): + pass + + +class JITCPUCodegen(CPUCodegen): + """ + A codegen implementation suitable for Just-In-Time compilation. + """ + + _library_class = JITCodeLibrary + + def _customize_tm_options(self, options): + # As long as we don't want to ship the code to another machine, + # we can specialize for this CPU. + options['cpu'] = self._get_host_cpu_name() + # LLVM 7 change: # https://reviews.llvm.org/D47211#inline-425406 + # JIT needs static relocation on x86* + # native target is already initialized from base class __init__ + arch = ll.Target.from_default_triple().name + if arch.startswith('x86'): # one of x86 or x86_64 + reloc_model = 'static' + elif arch.startswith('ppc'): + reloc_model = 'pic' + else: + reloc_model = 'default' + options['reloc'] = reloc_model + options['codemodel'] = 'jitdefault' + + # Set feature attributes (such as ISA extensions) + # This overrides default feature selection by CPU model above + options['features'] = self._tm_features + + # Deal with optional argument to ll.Target.create_target_machine + sig = utils.pysignature(ll.Target.create_target_machine) + if 'jit' in sig.parameters: + # Mark that this is making a JIT engine + options['jit'] = True + + def _customize_tm_features(self): + # For JIT target, we will use LLVM to get the feature map + return self._get_host_cpu_features() + + def _add_module(self, module): + self._engine.add_module(module) + # XXX: disabling remove module due to MCJIT engine leakage in + # removeModule. The removeModule causes consistent access + # violation with certain test combinations. + # # Early bind the engine method to avoid keeping a reference to self. + # return functools.partial(self._engine.remove_module, module) + + def set_env(self, env_name, env): + """Set the environment address. + + Update the GlobalVariable named *env_name* to the address of *env*. + """ + gvaddr = self._engine.get_global_value_address(env_name) + envptr = (ctypes.c_void_p * 1).from_address(gvaddr) + envptr[0] = ctypes.c_void_p(id(env)) + + +def initialize_llvm(): + """Safe to use multiple times. + """ + ll.initialize() + ll.initialize_native_target() + ll.initialize_native_asmprinter() + + +def get_host_cpu_features(): + """Get host CPU features using LLVM. + + The features may be modified due to user setting. + See numba.config.ENABLE_AVX. + """ + try: + features = ll.get_host_cpu_features() + except RuntimeError: + return '' + else: + if not config.ENABLE_AVX: + # Disable all features with name starting with 'avx' + for k in features: + if k.startswith('avx'): + features[k] = False + + # Set feature attributes + return features.flatten() diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/compiler.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/compiler.py new file mode 100644 index 000000000..1ff376280 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/compiler.py @@ -0,0 +1,790 @@ +from collections import namedtuple +import copy +import warnings +from numba.core.tracing import event + +from numba.core import (utils, errors, typing, interpreter, bytecode, postproc, + config, callconv, cpu) +from numba.parfors.parfor import ParforDiagnostics +from numba.core.errors import CompilerError +from numba.core.environment import lookup_environment + +from numba.core.compiler_machinery import PassManager + +from numba.core.untyped_passes import (ExtractByteCode, TranslateByteCode, + FixupArgs, IRProcessing, DeadBranchPrune, + RewriteSemanticConstants, + InlineClosureLikes, GenericRewrites, + WithLifting, InlineInlinables, + FindLiterallyCalls, + MakeFunctionToJitFunction, + CanonicalizeLoopExit, + CanonicalizeLoopEntry, LiteralUnroll, + ReconstructSSA, + LiteralPropagationSubPipelinePass, + ) + +from numba.core.typed_passes import (NopythonTypeInference, AnnotateTypes, + NopythonRewrites, PreParforPass, + ParforPass, DumpParforDiagnostics, + IRLegalization, NoPythonBackend, + InlineOverloads, PreLowerStripPhis, + NativeLowering, + NoPythonSupportedFeatureValidation, + ) + +from numba.core.object_mode_passes import (ObjectModeFrontEnd, + ObjectModeBackEnd) +from numba.core.targetconfig import TargetConfig, Option, ConfigStack + + +class Flags(TargetConfig): + enable_looplift = Option( + type=bool, + default=False, + doc="Enable loop-lifting", + ) + enable_pyobject = Option( + type=bool, + default=False, + doc="Enable pyobject mode (in general)", + ) + enable_pyobject_looplift = Option( + type=bool, + default=False, + doc="Enable pyobject mode inside lifted loops", + ) + enable_ssa = Option( + type=bool, + default=True, + doc="Enable SSA", + ) + force_pyobject = Option( + type=bool, + default=False, + doc="Force pyobject mode inside the whole function", + ) + release_gil = Option( + type=bool, + default=False, + doc="Release GIL inside the native function", + ) + no_compile = Option( + type=bool, + default=False, + doc="TODO", + ) + debuginfo = Option( + type=bool, + default=False, + doc="TODO", + ) + boundscheck = Option( + type=bool, + default=False, + doc="TODO", + ) + forceinline = Option( + type=bool, + default=False, + doc="Force inlining of the function. Overrides _dbg_optnone.", + ) + no_cpython_wrapper = Option( + type=bool, + default=False, + doc="TODO", + ) + no_cfunc_wrapper = Option( + type=bool, + default=False, + doc="TODO", + ) + auto_parallel = Option( + type=cpu.ParallelOptions, + default=cpu.ParallelOptions(False), + doc="""Enable automatic parallel optimization, can be fine-tuned by +taking a dictionary of sub-options instead of a boolean, see parfor.py for +detail""", + ) + nrt = Option( + type=bool, + default=False, + doc="TODO", + ) + no_rewrites = Option( + type=bool, + default=False, + doc="TODO", + ) + error_model = Option( + type=str, + default="python", + doc="TODO", + ) + fastmath = Option( + type=cpu.FastMathOptions, + default=cpu.FastMathOptions(False), + doc="TODO", + ) + noalias = Option( + type=bool, + default=False, + doc="TODO", + ) + inline = Option( + type=cpu.InlineOptions, + default=cpu.InlineOptions("never"), + doc="TODO", + ) + # Defines a new target option for tracking the "target backend". + # This will be the XYZ in @jit(_target=XYZ). + target_backend = Option( + type=str, + default="cpu", # if not set, default to CPU + doc="backend" + ) + + dbg_extend_lifetimes = Option( + type=bool, + default=False, + doc=("Extend variable lifetime for debugging. " + "This automatically turns on with debug=True."), + ) + + dbg_optnone = Option( + type=bool, + default=False, + doc=("Disable optimization for debug. " + "Equivalent to adding optnone attribute in the LLVM Function.") + ) + + +DEFAULT_FLAGS = Flags() +DEFAULT_FLAGS.nrt = True + + +CR_FIELDS = ["typing_context", + "target_context", + "entry_point", + "typing_error", + "type_annotation", + "signature", + "objectmode", + "lifted", + "fndesc", + "library", + "call_helper", + "environment", + "metadata", + # List of functions to call to initialize on unserialization + # (i.e cache load). + "reload_init", + "referenced_envs", + ] + + +class CompileResult(namedtuple("_CompileResult", CR_FIELDS)): + """ + A structure holding results from the compilation of a function. + """ + + __slots__ = () + + def _reduce(self): + """ + Reduce a CompileResult to picklable components. + """ + libdata = self.library.serialize_using_object_code() + # Make it (un)picklable efficiently + typeann = str(self.type_annotation) + fndesc = self.fndesc + # Those don't need to be pickled and may fail + fndesc.typemap = fndesc.calltypes = None + # Include all referenced environments + referenced_envs = self._find_referenced_environments() + return (libdata, self.fndesc, self.environment, self.signature, + self.objectmode, self.lifted, typeann, self.reload_init, + tuple(referenced_envs)) + + def _find_referenced_environments(self): + """Returns a list of referenced environments + """ + mod = self.library._final_module + # Find environments + referenced_envs = [] + for gv in mod.global_variables: + gvn = gv.name + if gvn.startswith("_ZN08NumbaEnv"): + env = lookup_environment(gvn) + if env is not None: + if env.can_cache(): + referenced_envs.append(env) + return referenced_envs + + @classmethod + def _rebuild(cls, target_context, libdata, fndesc, env, + signature, objectmode, lifted, typeann, + reload_init, referenced_envs): + if reload_init: + # Re-run all + for fn in reload_init: + fn() + + library = target_context.codegen().unserialize_library(libdata) + cfunc = target_context.get_executable(library, fndesc, env) + cr = cls(target_context=target_context, + typing_context=target_context.typing_context, + library=library, + environment=env, + entry_point=cfunc, + fndesc=fndesc, + type_annotation=typeann, + signature=signature, + objectmode=objectmode, + lifted=lifted, + typing_error=None, + call_helper=None, + metadata=None, # Do not store, arbitrary & potentially large! + reload_init=reload_init, + referenced_envs=referenced_envs, + ) + + # Load Environments + for env in referenced_envs: + library.codegen.set_env(env.env_name, env) + + return cr + + @property + def codegen(self): + return self.target_context.codegen() + + def dump(self, tab=''): + print(f'{tab}DUMP {type(self).__name__} {self.entry_point}') + self.signature.dump(tab=tab + ' ') + print(f'{tab}END DUMP') + + +_LowerResult = namedtuple("_LowerResult", [ + "fndesc", + "call_helper", + "cfunc", + "env", +]) + + +def sanitize_compile_result_entries(entries): + keys = set(entries.keys()) + fieldset = set(CR_FIELDS) + badnames = keys - fieldset + if badnames: + raise NameError(*badnames) + missing = fieldset - keys + for k in missing: + entries[k] = None + # Avoid keeping alive traceback variables + err = entries['typing_error'] + if err is not None: + entries['typing_error'] = err.with_traceback(None) + return entries + + +def compile_result(**entries): + entries = sanitize_compile_result_entries(entries) + return CompileResult(**entries) + + +def compile_isolated(func, args, return_type=None, flags=DEFAULT_FLAGS, + locals={}): + """ + Compile the function in an isolated environment (typing and target + context). + Good for testing. + """ + from numba.core.registry import cpu_target + typingctx = typing.Context() + targetctx = cpu.CPUContext(typingctx, target='cpu') + # Register the contexts in case for nested @jit or @overload calls + with cpu_target.nested_context(typingctx, targetctx): + return compile_extra(typingctx, targetctx, func, args, return_type, + flags, locals) + + +def run_frontend(func, inline_closures=False, emit_dels=False): + """ + Run the compiler frontend over the given Python function, and return + the function's canonical Numba IR. + + If inline_closures is Truthy then closure inlining will be run + If emit_dels is Truthy the ir.Del nodes will be emitted appropriately + """ + # XXX make this a dedicated Pipeline? + func_id = bytecode.FunctionIdentity.from_function(func) + interp = interpreter.Interpreter(func_id) + bc = bytecode.ByteCode(func_id=func_id) + func_ir = interp.interpret(bc) + if inline_closures: + from numba.core.inline_closurecall import InlineClosureCallPass + inline_pass = InlineClosureCallPass(func_ir, cpu.ParallelOptions(False), + {}, False) + inline_pass.run() + post_proc = postproc.PostProcessor(func_ir) + post_proc.run(emit_dels) + return func_ir + + +class _CompileStatus(object): + """ + Describes the state of compilation. Used like a C record. + """ + __slots__ = ['fail_reason', 'can_fallback'] + + def __init__(self, can_fallback): + self.fail_reason = None + self.can_fallback = can_fallback + + def __repr__(self): + vals = [] + for k in self.__slots__: + vals.append("{k}={v}".format(k=k, v=getattr(self, k))) + return ', '.join(vals) + + +class _EarlyPipelineCompletion(Exception): + """ + Raised to indicate that a pipeline has completed early + """ + + def __init__(self, result): + self.result = result + + +class StateDict(dict): + """ + A dictionary that has an overloaded getattr and setattr to permit getting + and setting key/values through the use of attributes. + """ + + def __getattr__(self, attr): + try: + return self[attr] + except KeyError: + raise AttributeError(attr) + + def __setattr__(self, attr, value): + self[attr] = value + + +def _make_subtarget(targetctx, flags): + """ + Make a new target context from the given target context and flags. + """ + subtargetoptions = {} + if flags.debuginfo: + subtargetoptions['enable_debuginfo'] = True + if flags.boundscheck: + subtargetoptions['enable_boundscheck'] = True + if flags.nrt: + subtargetoptions['enable_nrt'] = True + if flags.auto_parallel: + subtargetoptions['auto_parallel'] = flags.auto_parallel + if flags.fastmath: + subtargetoptions['fastmath'] = flags.fastmath + error_model = callconv.create_error_model(flags.error_model, targetctx) + subtargetoptions['error_model'] = error_model + + return targetctx.subtarget(**subtargetoptions) + + +class CompilerBase(object): + """ + Stores and manages states for the compiler + """ + + def __init__(self, typingctx, targetctx, library, args, return_type, flags, + locals): + # Make sure the environment is reloaded + config.reload_config() + typingctx.refresh() + targetctx.refresh() + + self.state = StateDict() + + self.state.typingctx = typingctx + self.state.targetctx = _make_subtarget(targetctx, flags) + self.state.library = library + self.state.args = args + self.state.return_type = return_type + self.state.flags = flags + self.state.locals = locals + + # Results of various steps of the compilation pipeline + self.state.bc = None + self.state.func_id = None + self.state.func_ir = None + self.state.lifted = None + self.state.lifted_from = None + self.state.typemap = None + self.state.calltypes = None + self.state.type_annotation = None + # holds arbitrary inter-pipeline stage meta data + self.state.metadata = {} + self.state.reload_init = [] + # hold this for e.g. with_lifting, null out on exit + self.state.pipeline = self + + # parfor diagnostics info, add to metadata + self.state.parfor_diagnostics = ParforDiagnostics() + self.state.metadata['parfor_diagnostics'] = \ + self.state.parfor_diagnostics + self.state.metadata['parfors'] = {} + + self.state.status = _CompileStatus( + can_fallback=self.state.flags.enable_pyobject + ) + + def compile_extra(self, func): + self.state.func_id = bytecode.FunctionIdentity.from_function(func) + ExtractByteCode().run_pass(self.state) + + self.state.lifted = () + self.state.lifted_from = None + return self._compile_bytecode() + + def compile_ir(self, func_ir, lifted=(), lifted_from=None): + self.state.func_id = func_ir.func_id + self.state.lifted = lifted + self.state.lifted_from = lifted_from + self.state.func_ir = func_ir + self.state.nargs = self.state.func_ir.arg_count + + FixupArgs().run_pass(self.state) + return self._compile_ir() + + def define_pipelines(self): + """Child classes override this to customize the pipelines in use. + """ + raise NotImplementedError() + + def _compile_core(self): + """ + Populate and run compiler pipeline + """ + with ConfigStack().enter(self.state.flags.copy()): + pms = self.define_pipelines() + for pm in pms: + pipeline_name = pm.pipeline_name + func_name = "%s.%s" % (self.state.func_id.modname, + self.state.func_id.func_qualname) + + event("Pipeline: %s for %s" % (pipeline_name, func_name)) + self.state.metadata['pipeline_times'] = {pipeline_name: + pm.exec_times} + is_final_pipeline = pm == pms[-1] + res = None + try: + pm.run(self.state) + if self.state.cr is not None: + break + except _EarlyPipelineCompletion as e: + res = e.result + break + except Exception as e: + if (utils.use_new_style_errors() and not + isinstance(e, errors.NumbaError)): + raise e + + self.state.status.fail_reason = e + if is_final_pipeline: + raise e + else: + raise CompilerError("All available pipelines exhausted") + + # Pipeline is done, remove self reference to release refs to user + # code + self.state.pipeline = None + + # organise a return + if res is not None: + # Early pipeline completion + return res + else: + assert self.state.cr is not None + return self.state.cr + + def _compile_bytecode(self): + """ + Populate and run pipeline for bytecode input + """ + assert self.state.func_ir is None + return self._compile_core() + + def _compile_ir(self): + """ + Populate and run pipeline for IR input + """ + assert self.state.func_ir is not None + return self._compile_core() + + +class Compiler(CompilerBase): + """The default compiler + """ + + def define_pipelines(self): + # this maintains the objmode fallback behaviour + pms = [] + if not self.state.flags.force_pyobject: + pms.append(DefaultPassBuilder.define_nopython_pipeline(self.state)) + if self.state.status.can_fallback or self.state.flags.force_pyobject: + pms.append( + DefaultPassBuilder.define_objectmode_pipeline(self.state) + ) + return pms + + +class DefaultPassBuilder(object): + """ + This is the default pass builder, it contains the "classic" default + pipelines as pre-canned PassManager instances: + - nopython + - objectmode + - interpreted + - typed + - untyped + - nopython lowering + """ + @staticmethod + def define_nopython_pipeline(state, name='nopython'): + """Returns an nopython mode pipeline based PassManager + """ + # compose pipeline from untyped, typed and lowering parts + dpb = DefaultPassBuilder + pm = PassManager(name) + untyped_passes = dpb.define_untyped_pipeline(state) + pm.passes.extend(untyped_passes.passes) + + typed_passes = dpb.define_typed_pipeline(state) + pm.passes.extend(typed_passes.passes) + + lowering_passes = dpb.define_nopython_lowering_pipeline(state) + pm.passes.extend(lowering_passes.passes) + + pm.finalize() + return pm + + @staticmethod + def define_nopython_lowering_pipeline(state, name='nopython_lowering'): + pm = PassManager(name) + # legalise + pm.add_pass(NoPythonSupportedFeatureValidation, + "ensure features that are in use are in a valid form") + pm.add_pass(IRLegalization, + "ensure IR is legal prior to lowering") + # Annotate only once legalized + pm.add_pass(AnnotateTypes, "annotate types") + # lower + pm.add_pass(NativeLowering, "native lowering") + pm.add_pass(NoPythonBackend, "nopython mode backend") + pm.add_pass(DumpParforDiagnostics, "dump parfor diagnostics") + pm.finalize() + return pm + + @staticmethod + def define_typed_pipeline(state, name="typed"): + """Returns the typed part of the nopython pipeline""" + pm = PassManager(name) + # typing + pm.add_pass(NopythonTypeInference, "nopython frontend") + + # strip phis + pm.add_pass(PreLowerStripPhis, "remove phis nodes") + + # optimisation + pm.add_pass(InlineOverloads, "inline overloaded functions") + if state.flags.auto_parallel.enabled: + pm.add_pass(PreParforPass, "Preprocessing for parfors") + if not state.flags.no_rewrites: + pm.add_pass(NopythonRewrites, "nopython rewrites") + if state.flags.auto_parallel.enabled: + pm.add_pass(ParforPass, "convert to parfors") + + pm.finalize() + return pm + + @staticmethod + def define_untyped_pipeline(state, name='untyped'): + """Returns an untyped part of the nopython pipeline""" + pm = PassManager(name) + if state.func_ir is None: + pm.add_pass(TranslateByteCode, "analyzing bytecode") + pm.add_pass(FixupArgs, "fix up args") + pm.add_pass(IRProcessing, "processing IR") + pm.add_pass(WithLifting, "Handle with contexts") + + # inline closures early in case they are using nonlocal's + # see issue #6585. + pm.add_pass(InlineClosureLikes, + "inline calls to locally defined closures") + + # pre typing + if not state.flags.no_rewrites: + pm.add_pass(RewriteSemanticConstants, "rewrite semantic constants") + pm.add_pass(DeadBranchPrune, "dead branch pruning") + pm.add_pass(GenericRewrites, "nopython rewrites") + + # convert any remaining closures into functions + pm.add_pass(MakeFunctionToJitFunction, + "convert make_function into JIT functions") + # inline functions that have been determined as inlinable and rerun + # branch pruning, this needs to be run after closures are inlined as + # the IR repr of a closure masks call sites if an inlinable is called + # inside a closure + pm.add_pass(InlineInlinables, "inline inlinable functions") + if not state.flags.no_rewrites: + pm.add_pass(DeadBranchPrune, "dead branch pruning") + + pm.add_pass(FindLiterallyCalls, "find literally calls") + pm.add_pass(LiteralUnroll, "handles literal_unroll") + + if state.flags.enable_ssa: + pm.add_pass(ReconstructSSA, "ssa") + + pm.add_pass(LiteralPropagationSubPipelinePass, "Literal propagation") + + pm.finalize() + return pm + + @staticmethod + def define_objectmode_pipeline(state, name='object'): + """Returns an object-mode pipeline based PassManager + """ + pm = PassManager(name) + if state.func_ir is None: + pm.add_pass(TranslateByteCode, "analyzing bytecode") + pm.add_pass(FixupArgs, "fix up args") + else: + # Reaches here if it's a fallback from nopython mode. + # Strip the phi nodes. + pm.add_pass(PreLowerStripPhis, "remove phis nodes") + pm.add_pass(IRProcessing, "processing IR") + + if utils.PYVERSION >= (3, 7): + # The following passes are needed to adjust for looplifting + pm.add_pass(CanonicalizeLoopEntry, "canonicalize loop entry") + pm.add_pass(CanonicalizeLoopExit, "canonicalize loop exit") + + pm.add_pass(ObjectModeFrontEnd, "object mode frontend") + pm.add_pass(InlineClosureLikes, + "inline calls to locally defined closures") + # convert any remaining closures into functions + pm.add_pass(MakeFunctionToJitFunction, + "convert make_function into JIT functions") + pm.add_pass(IRLegalization, "ensure IR is legal prior to lowering") + pm.add_pass(AnnotateTypes, "annotate types") + pm.add_pass(ObjectModeBackEnd, "object mode backend") + pm.finalize() + return pm + + +def compile_extra(typingctx, targetctx, func, args, return_type, flags, + locals, library=None, pipeline_class=Compiler): + """Compiler entry point + + Parameter + --------- + typingctx : + typing context + targetctx : + target context + func : function + the python function to be compiled + args : tuple, list + argument types + return_type : + Use ``None`` to indicate void return + flags : numba.compiler.Flags + compiler flags + library : numba.codegen.CodeLibrary + Used to store the compiled code. + If it is ``None``, a new CodeLibrary is used. + pipeline_class : type like numba.compiler.CompilerBase + compiler pipeline + """ + pipeline = pipeline_class(typingctx, targetctx, library, + args, return_type, flags, locals) + return pipeline.compile_extra(func) + + +def compile_ir(typingctx, targetctx, func_ir, args, return_type, flags, + locals, lifted=(), lifted_from=None, is_lifted_loop=False, + library=None, pipeline_class=Compiler): + """ + Compile a function with the given IR. + + For internal use only. + """ + + # This is a special branch that should only run on IR from a lifted loop + if is_lifted_loop: + # This code is pessimistic and costly, but it is a not often trodden + # path and it will go away once IR is made immutable. The problem is + # that the rewrite passes can mutate the IR into a state that makes + # it possible for invalid tokens to be transmitted to lowering which + # then trickle through into LLVM IR and causes RuntimeErrors as LLVM + # cannot compile it. As a result the following approach is taken: + # 1. Create some new flags that copy the original ones but switch + # off rewrites. + # 2. Compile with 1. to get a compile result + # 3. Try and compile another compile result but this time with the + # original flags (and IR being rewritten). + # 4. If 3 was successful, use the result, else use 2. + + # create flags with no rewrites + norw_flags = copy.deepcopy(flags) + norw_flags.no_rewrites = True + + def compile_local(the_ir, the_flags): + pipeline = pipeline_class(typingctx, targetctx, library, + args, return_type, the_flags, locals) + return pipeline.compile_ir(func_ir=the_ir, lifted=lifted, + lifted_from=lifted_from) + + # compile with rewrites off, IR shouldn't be mutated irreparably + norw_cres = compile_local(func_ir.copy(), norw_flags) + + # try and compile with rewrites on if no_rewrites was not set in the + # original flags, IR might get broken but we've got a CompileResult + # that's usable from above. + rw_cres = None + if not flags.no_rewrites: + # Suppress warnings in compilation retry + with warnings.catch_warnings(): + warnings.simplefilter("ignore", errors.NumbaWarning) + try: + rw_cres = compile_local(func_ir.copy(), flags) + except Exception: + pass + # if the rewrite variant of compilation worked, use it, else use + # the norewrites backup + if rw_cres is not None: + cres = rw_cres + else: + cres = norw_cres + return cres + + else: + pipeline = pipeline_class(typingctx, targetctx, library, + args, return_type, flags, locals) + return pipeline.compile_ir(func_ir=func_ir, lifted=lifted, + lifted_from=lifted_from) + + +def compile_internal(typingctx, targetctx, library, + func, args, return_type, flags, locals): + """ + For internal use only. + """ + pipeline = Compiler(typingctx, targetctx, library, + args, return_type, flags, locals) + return pipeline.compile_extra(func) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/compiler_lock.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/compiler_lock.py new file mode 100644 index 000000000..874fced9f --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/compiler_lock.py @@ -0,0 +1,56 @@ +import threading +import functools +import numba.core.event as ev + + +# Lock for the preventing multiple compiler execution +class _CompilerLock(object): + def __init__(self): + self._lock = threading.RLock() + + def acquire(self): + ev.start_event("numba:compiler_lock") + self._lock.acquire() + + def release(self): + self._lock.release() + ev.end_event("numba:compiler_lock") + + def __enter__(self): + self.acquire() + + def __exit__(self, exc_val, exc_type, traceback): + self.release() + + def is_locked(self): + is_owned = getattr(self._lock, '_is_owned') + if not callable(is_owned): + is_owned = self._is_owned + return is_owned() + + def __call__(self, func): + @functools.wraps(func) + def _acquire_compile_lock(*args, **kwargs): + with self: + return func(*args, **kwargs) + return _acquire_compile_lock + + def _is_owned(self): + # This method is borrowed from threading.Condition. + # Return True if lock is owned by current_thread. + # This method is called only if _lock doesn't have _is_owned(). + if self._lock.acquire(0): + self._lock.release() + return False + else: + return True + + +global_compiler_lock = _CompilerLock() + + +def require_global_compiler_lock(): + """Sentry that checks the global_compiler_lock is acquired. + """ + # Use assert to allow turning off this check + assert global_compiler_lock.is_locked() diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/compiler_machinery.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/compiler_machinery.py new file mode 100644 index 000000000..2573888cd --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/compiler_machinery.py @@ -0,0 +1,463 @@ +import timeit +from abc import abstractmethod, ABCMeta +from collections import namedtuple, OrderedDict +import inspect +from pprint import pformat + + +from numba.core.compiler_lock import global_compiler_lock +from numba.core import errors, config, transforms, utils +from numba.core.tracing import event +from numba.core.postproc import PostProcessor +from numba.core.ir_utils import enforce_no_dels, legalize_single_scope +import numba.core.event as ev + +# terminal color markup +_termcolor = errors.termcolor() + + +class SimpleTimer(object): + """ + A simple context managed timer + """ + + def __enter__(self): + self.ts = timeit.default_timer() + return self + + def __exit__(self, *exc): + self.elapsed = timeit.default_timer() - self.ts + + +class CompilerPass(metaclass=ABCMeta): + """ The base class for all compiler passes. + """ + + @abstractmethod + def __init__(self, *args, **kwargs): + self._analysis = None + self._pass_id = None + + @classmethod + def name(cls): + """ + Returns the name of the pass + """ + return cls._name + + @property + def pass_id(self): + """ + The ID of the pass + """ + return self._pass_id + + @pass_id.setter + def pass_id(self, val): + """ + Sets the ID of the pass + """ + self._pass_id = val + + @property + def analysis(self): + """ + Analysis data for the pass + """ + return self._analysis + + @analysis.setter + def analysis(self, val): + """ + Set the analysis data for the pass + """ + self._analysis = val + + def run_initialization(self, *args, **kwargs): + """ + Runs the initialization sequence for the pass, will run before + `run_pass`. + """ + return False + + @abstractmethod + def run_pass(self, *args, **kwargs): + """ + Runs the pass itself. Must return True/False depending on whether + statement level modification took place. + """ + pass + + def run_finalizer(self, *args, **kwargs): + """ + Runs the initialization sequence for the pass, will run before + `run_pass`. + """ + return False + + def get_analysis_usage(self, AU): + """ Override to set analysis usage + """ + pass + + def get_analysis(self, pass_name): + """ + Gets the analysis from a given pass + """ + return self._analysis[pass_name] + + +class SSACompliantMixin(object): + """ Mixin to indicate a pass is SSA form compliant. Nothing is asserted + about this condition at present. + """ + pass + + +class FunctionPass(CompilerPass): + """ Base class for function passes + """ + pass + + +class AnalysisPass(CompilerPass): + """ Base class for analysis passes (no modification made to state) + """ + pass + + +class LoweringPass(CompilerPass): + """ Base class for lowering passes + """ + pass + + +class AnalysisUsage(object): + """This looks and behaves like LLVM's AnalysisUsage because its like that. + """ + + def __init__(self): + self._required = set() + self._preserved = set() + + def get_required_set(self): + return self._required + + def get_preserved_set(self): + return self._preserved + + def add_required(self, pss): + self._required.add(pss) + + def add_preserved(self, pss): + self._preserved.add(pss) + + def __str__(self): + return "required: %s\n" % self._required + + +_DEBUG = False + + +def debug_print(*args, **kwargs): + if _DEBUG: + print(*args, **kwargs) + + +pass_timings = namedtuple('pass_timings', 'init run finalize') + + +class PassManager(object): + """ + The PassManager is a named instance of a particular compilation pipeline + """ + # TODO: Eventually enable this, it enforces self consistency after each pass + _ENFORCING = False + + def __init__(self, pipeline_name): + """ + Create a new pipeline with name "pipeline_name" + """ + self.passes = [] + self.exec_times = OrderedDict() + self._finalized = False + self._analysis = None + self._print_after = None + self.pipeline_name = pipeline_name + + def _validate_pass(self, pass_cls): + if (not (isinstance(pass_cls, str) or + (inspect.isclass(pass_cls) and + issubclass(pass_cls, CompilerPass)))): + msg = ("Pass must be referenced by name or be a subclass of a " + "CompilerPass. Have %s" % pass_cls) + raise TypeError(msg) + if isinstance(pass_cls, str): + pass_cls = _pass_registry.find_by_name(pass_cls) + else: + if not _pass_registry.is_registered(pass_cls): + raise ValueError("Pass %s is not registered" % pass_cls) + + def add_pass(self, pss, description=""): + """ + Append a pass to the PassManager's compilation pipeline + """ + self._validate_pass(pss) + func_desc_tuple = (pss, description) + self.passes.append(func_desc_tuple) + self._finalized = False + + def add_pass_after(self, pass_cls, location): + """ + Add a pass `pass_cls` to the PassManager's compilation pipeline after + the pass `location`. + """ + assert self.passes + self._validate_pass(pass_cls) + self._validate_pass(location) + for idx, (x, _) in enumerate(self.passes): + if x == location: + break + else: + raise ValueError("Could not find pass %s" % location) + self.passes.insert(idx + 1, (pass_cls, str(pass_cls))) + # if a pass has been added, it's not finalized + self._finalized = False + + def _debug_init(self): + # determine after which passes IR dumps should take place + def parse(conf_item): + print_passes = [] + if conf_item != "none": + if conf_item == "all": + print_passes = [x.name() for (x, _) in self.passes] + else: + # we don't validate whether the named passes exist in this + # pipeline the compiler may be used reentrantly and + # different pipelines may contain different passes + splitted = conf_item.split(',') + print_passes = [x.strip() for x in splitted] + return print_passes + ret = (parse(config.DEBUG_PRINT_AFTER), + parse(config.DEBUG_PRINT_BEFORE), + parse(config.DEBUG_PRINT_WRAP),) + return ret + + def finalize(self): + """ + Finalize the PassManager, after which no more passes may be added + without re-finalization. + """ + self._analysis = self.dependency_analysis() + self._print_after, self._print_before, self._print_wrap = \ + self._debug_init() + self._finalized = True + + @property + def finalized(self): + return self._finalized + + def _patch_error(self, desc, exc): + """ + Patches the error to show the stage that it arose in. + """ + newmsg = "{desc}\n{exc}".format(desc=desc, exc=exc) + exc.args = (newmsg,) + return exc + + @global_compiler_lock # this need a lock, likely calls LLVM + def _runPass(self, index, pss, internal_state): + mutated = False + + def check(func, compiler_state): + mangled = func(compiler_state) + if mangled not in (True, False): + msg = ("CompilerPass implementations should return True/False. " + "CompilerPass with name '%s' did not.") + raise ValueError(msg % pss.name()) + return mangled + + def debug_print(pass_name, print_condition, printable_condition): + if pass_name in print_condition: + fid = internal_state.func_id + args = (fid.modname, fid.func_qualname, self.pipeline_name, + printable_condition, pass_name) + print(("%s.%s: %s: %s %s" % args).center(120, '-')) + if internal_state.func_ir is not None: + internal_state.func_ir.dump() + else: + print("func_ir is None") + + # debug print before this pass? + debug_print(pss.name(), self._print_before + self._print_wrap, "BEFORE") + + # wire in the analysis info so it's accessible + pss.analysis = self._analysis + + qualname = internal_state.func_id.func_qualname + + ev_details = dict( + name=f"{pss.name()} [{qualname}]", + qualname=qualname, + module=internal_state.func_id.modname, + flags=pformat(internal_state.flags.values()), + args=str(internal_state.args), + return_type=str(internal_state.return_type), + ) + with ev.trigger_event("numba:run_pass", data=ev_details): + with SimpleTimer() as init_time: + mutated |= check(pss.run_initialization, internal_state) + with SimpleTimer() as pass_time: + mutated |= check(pss.run_pass, internal_state) + with SimpleTimer() as finalize_time: + mutated |= check(pss.run_finalizer, internal_state) + + # Check that if the pass is an instance of a FunctionPass that it hasn't + # emitted ir.Dels. + if isinstance(pss, FunctionPass): + enforce_no_dels(internal_state.func_ir) + + if self._ENFORCING: + # TODO: Add in self consistency enforcement for + # `func_ir._definitions` etc + if _pass_registry.get(pss.__class__).mutates_CFG: + if mutated: # block level changes, rebuild all + PostProcessor(internal_state.func_ir).run() + else: # CFG level changes rebuild CFG + internal_state.func_ir.blocks = transforms.canonicalize_cfg( + internal_state.func_ir.blocks) + # Check the func_ir has exactly one Scope instance + if not legalize_single_scope(internal_state.func_ir.blocks): + raise errors.CompilerError( + f"multiple scope in func_ir detected in {pss}", + ) + # inject runtimes + pt = pass_timings(init_time.elapsed, pass_time.elapsed, + finalize_time.elapsed) + self.exec_times["%s_%s" % (index, pss.name())] = pt + + # debug print after this pass? + debug_print(pss.name(), self._print_after + self._print_wrap, "AFTER") + + def run(self, state): + """ + Run the defined pipelines on the state. + """ + from numba.core.compiler import _EarlyPipelineCompletion + if not self.finalized: + raise RuntimeError("Cannot run non-finalised pipeline") + + # walk the passes and run them + for idx, (pss, pass_desc) in enumerate(self.passes): + try: + event("-- %s" % pass_desc) + pass_inst = _pass_registry.get(pss).pass_inst + if isinstance(pass_inst, CompilerPass): + self._runPass(idx, pass_inst, state) + else: + raise BaseException("Legacy pass in use") + except _EarlyPipelineCompletion as e: + raise e + except Exception as e: + if (utils.use_new_style_errors() and not + isinstance(e, errors.NumbaError)): + raise e + msg = "Failed in %s mode pipeline (step: %s)" % \ + (self.pipeline_name, pass_desc) + patched_exception = self._patch_error(msg, e) + raise patched_exception + + def dependency_analysis(self): + """ + Computes dependency analysis + """ + deps = dict() + for (pss, _) in self.passes: + x = _pass_registry.get(pss).pass_inst + au = AnalysisUsage() + x.get_analysis_usage(au) + deps[type(x)] = au + + requires_map = dict() + for k, v in deps.items(): + requires_map[k] = v.get_required_set() + + def resolve_requires(key, rmap): + def walk(lkey, rmap): + dep_set = rmap[lkey] if lkey in rmap else set() + if dep_set: + for x in dep_set: + dep_set |= (walk(x, rmap)) + return dep_set + else: + return set() + ret = set() + for k in key: + ret |= walk(k, rmap) + return ret + + dep_chain = dict() + for k, v in requires_map.items(): + dep_chain[k] = set(v) | (resolve_requires(v, requires_map)) + + return dep_chain + + +pass_info = namedtuple('pass_info', 'pass_inst mutates_CFG analysis_only') + + +class PassRegistry(object): + """ + Pass registry singleton class. + """ + + _id = 0 + + _registry = dict() + + def register(self, mutates_CFG, analysis_only): + def make_festive(pass_class): + assert not self.is_registered(pass_class) + assert not self._does_pass_name_alias(pass_class.name()) + pass_class.pass_id = self._id + self._id += 1 + self._registry[pass_class] = pass_info(pass_class(), mutates_CFG, + analysis_only) + return pass_class + return make_festive + + def is_registered(self, clazz): + return clazz in self._registry.keys() + + def get(self, clazz): + assert self.is_registered(clazz) + return self._registry[clazz] + + def _does_pass_name_alias(self, check): + for k, v in self._registry.items(): + if v.pass_inst.name == check: + return True + return False + + def find_by_name(self, class_name): + assert isinstance(class_name, str) + for k, v in self._registry.items(): + if v.pass_inst.name == class_name: + return v + else: + raise ValueError("No pass with name %s is registered" % class_name) + + def dump(self): + for k, v in self._registry.items(): + print("%s: %s" % (k, v)) + + +_pass_registry = PassRegistry() +del PassRegistry + + +""" +register_pass is used to register a compiler pass class for use with PassManager +instances. +""" +register_pass = _pass_registry.register diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/config.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/config.py new file mode 100644 index 000000000..da5bed21b --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/config.py @@ -0,0 +1,527 @@ +import platform +import sys +import os +import re +import shutil +import warnings + +# YAML needed to use file based Numba config +try: + import yaml + _HAVE_YAML = True +except ImportError: + _HAVE_YAML = False + + +import llvmlite.binding as ll + + +IS_WIN32 = sys.platform.startswith('win32') +IS_OSX = sys.platform.startswith('darwin') +MACHINE_BITS = tuple.__itemsize__ * 8 +IS_32BITS = MACHINE_BITS == 32 +# Python version in (major, minor) tuple +PYVERSION = sys.version_info[:2] + +# this is the name of the user supplied configuration file +_config_fname = '.numba_config.yaml' + + +def _parse_cc(text): + """ + Parse CUDA compute capability version string. + """ + if not text: + return None + else: + m = re.match(r'(\d+)\.(\d+)', text) + if not m: + raise ValueError("Compute capability must be specified as a " + "string of \"major.minor\" where major " + "and minor are decimals") + grp = m.groups() + return int(grp[0]), int(grp[1]) + + +def _os_supports_avx(): + """ + Whether the current OS supports AVX, regardless of the CPU. + + This is necessary because the user may be running a very old Linux + kernel (e.g. CentOS 5) on a recent CPU. + """ + if (not sys.platform.startswith('linux') + or platform.machine() not in ('i386', 'i586', 'i686', 'x86_64')): + return True + # Executing the CPUID instruction may report AVX available even though + # the kernel doesn't support it, so parse /proc/cpuinfo instead. + try: + f = open('/proc/cpuinfo', 'r') + except OSError: + # If /proc isn't available, assume yes + return True + with f: + for line in f: + head, _, body = line.partition(':') + if head.strip() == 'flags' and 'avx' in body.split(): + return True + else: + return False + + +# Choose how to handle captured errors +def _validate_captured_errors_style(style_str): + rendered_style = str(style_str) + if rendered_style not in ('new_style', 'old_style'): + msg = ("Invalid style in NUMBA_CAPTURED_ERRORS: " + f"{rendered_style}") + raise ValueError(msg) + else: + return rendered_style + + +class _EnvReloader(object): + + def __init__(self): + self.reset() + + def reset(self): + self.old_environ = {} + self.update(force=True) + + def update(self, force=False): + new_environ = {} + + # first check if there's a .numba_config.yaml and use values from that + if os.path.exists(_config_fname) and os.path.isfile(_config_fname): + if not _HAVE_YAML: + msg = ("A Numba config file is found but YAML parsing " + "capabilities appear to be missing. " + "To use this feature please install `pyyaml`. e.g. " + "`conda install pyyaml`.") + warnings.warn(msg) + else: + with open(_config_fname, 'rt') as f: + y_conf = yaml.safe_load(f) + if y_conf is not None: + for k, v in y_conf.items(): + new_environ['NUMBA_' + k.upper()] = v + + # clobber file based config with any locally defined env vars + for name, value in os.environ.items(): + if name.startswith('NUMBA_'): + new_environ[name] = value + # We update the config variables if at least one NUMBA environment + # variable was modified. This lets the user modify values + # directly in the config module without having them when + # reload_config() is called by the compiler. + if force or self.old_environ != new_environ: + self.process_environ(new_environ) + # Store a copy + self.old_environ = dict(new_environ) + + self.validate() + + def validate(self): + global CUDA_USE_NVIDIA_BINDING + + if CUDA_USE_NVIDIA_BINDING: # noqa: F821 + try: + import cuda # noqa: F401 + except ImportError as ie: + msg = ("CUDA Python bindings requested (the environment " + "variable NUMBA_CUDA_USE_NVIDIA_BINDING is set), " + f"but they are not importable: {ie.msg}.") + warnings.warn(msg) + + CUDA_USE_NVIDIA_BINDING = False + + if CUDA_PER_THREAD_DEFAULT_STREAM: # noqa: F821 + warnings.warn("PTDS support is handled by CUDA Python when " + "using the NVIDIA binding. Please set the " + "environment variable " + "CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM to 1 " + "instead.") + + def process_environ(self, environ): + def _readenv(name, ctor, default): + value = environ.get(name) + if value is None: + return default() if callable(default) else default + try: + return ctor(value) + except Exception: + warnings.warn("environ %s defined but failed to parse '%s'" % + (name, value), RuntimeWarning) + return default + + def optional_str(x): + return str(x) if x is not None else None + + # developer mode produces full tracebacks, disables help instructions + DEVELOPER_MODE = _readenv("NUMBA_DEVELOPER_MODE", int, 0) + + # disable performance warnings, will switch of the generation of + # warnings of the class NumbaPerformanceWarning + DISABLE_PERFORMANCE_WARNINGS = _readenv( + "NUMBA_DISABLE_PERFORMANCE_WARNINGS", int, 0) + + # Flag to enable full exception reporting + FULL_TRACEBACKS = _readenv( + "NUMBA_FULL_TRACEBACKS", int, DEVELOPER_MODE) + + # Show help text when an error occurs + SHOW_HELP = _readenv("NUMBA_SHOW_HELP", int, 0) + + # The color scheme to use for error messages, default is no color + # just bold fonts in use. + COLOR_SCHEME = _readenv("NUMBA_COLOR_SCHEME", str, "no_color") + + # Whether to globally enable bounds checking. The default None means + # to use the value of the flag to @njit. 0 or 1 overrides the flag + # globally. + BOUNDSCHECK = _readenv("NUMBA_BOUNDSCHECK", int, None) + + # Whether to always warn about potential uninitialized variables + # because static controlflow analysis cannot find a definition + # in one or more of the incoming paths. + ALWAYS_WARN_UNINIT_VAR = _readenv( + "NUMBA_ALWAYS_WARN_UNINIT_VAR", int, 0, + ) + + # Whether to warn about kernel launches where the grid size will + # under utilize the GPU due to low occupancy. On by default. + CUDA_LOW_OCCUPANCY_WARNINGS = _readenv( + "NUMBA_CUDA_LOW_OCCUPANCY_WARNINGS", int, 1) + + # Whether to use the official CUDA Python API Bindings + CUDA_USE_NVIDIA_BINDING = _readenv( + "NUMBA_CUDA_USE_NVIDIA_BINDING", int, 0) + + # Debug flag to control compiler debug print + DEBUG = _readenv("NUMBA_DEBUG", int, 0) + + # DEBUG print IR after pass names + DEBUG_PRINT_AFTER = _readenv("NUMBA_DEBUG_PRINT_AFTER", str, "none") + + # DEBUG print IR before pass names + DEBUG_PRINT_BEFORE = _readenv("NUMBA_DEBUG_PRINT_BEFORE", str, "none") + + # DEBUG print IR before and after pass names + DEBUG_PRINT_WRAP = _readenv("NUMBA_DEBUG_PRINT_WRAP", str, "none") + + # Highlighting in intermediate dumps + HIGHLIGHT_DUMPS = _readenv("NUMBA_HIGHLIGHT_DUMPS", int, 0) + + # JIT Debug flag to trigger IR instruction print + DEBUG_JIT = _readenv("NUMBA_DEBUG_JIT", int, 0) + + # Enable debugging of front-end operation + # (up to and including IR generation) + DEBUG_FRONTEND = _readenv("NUMBA_DEBUG_FRONTEND", int, 0) + + # Enable debug prints in nrtdynmod + DEBUG_NRT = _readenv("NUMBA_DEBUG_NRT", int, 0) + + # How many recently deserialized functions to retain regardless + # of external references + FUNCTION_CACHE_SIZE = _readenv("NUMBA_FUNCTION_CACHE_SIZE", int, 128) + + # Maximum tuple size that parfors will unpack and pass to + # internal gufunc. + PARFOR_MAX_TUPLE_SIZE = _readenv("NUMBA_PARFOR_MAX_TUPLE_SIZE", + int, 100) + + # Enable logging of cache operation + DEBUG_CACHE = _readenv("NUMBA_DEBUG_CACHE", int, DEBUG) + + # Redirect cache directory + # Contains path to the directory + CACHE_DIR = _readenv("NUMBA_CACHE_DIR", str, "") + + # Enable tracing support + TRACE = _readenv("NUMBA_TRACE", int, 0) + + # Enable chrome tracing support + CHROME_TRACE = _readenv("NUMBA_CHROME_TRACE", str, "") + + # Enable debugging of type inference + DEBUG_TYPEINFER = _readenv("NUMBA_DEBUG_TYPEINFER", int, 0) + + # Configure compilation target to use the specified CPU name + # and CPU feature as the host information. + # Note: this overrides "host" option for AOT compilation. + CPU_NAME = _readenv("NUMBA_CPU_NAME", optional_str, None) + CPU_FEATURES = _readenv("NUMBA_CPU_FEATURES", optional_str, + ("" if str(CPU_NAME).lower() == 'generic' + else None)) + # Optimization level + OPT = _readenv("NUMBA_OPT", int, 3) + + # Force dump of Python bytecode + DUMP_BYTECODE = _readenv("NUMBA_DUMP_BYTECODE", int, DEBUG_FRONTEND) + + # Force dump of control flow graph + DUMP_CFG = _readenv("NUMBA_DUMP_CFG", int, DEBUG_FRONTEND) + + # Force dump of Numba IR + DUMP_IR = _readenv("NUMBA_DUMP_IR", int, + DEBUG_FRONTEND) + + # Force dump of Numba IR in SSA form + DUMP_SSA = _readenv("NUMBA_DUMP_SSA", int, + DEBUG_FRONTEND or DEBUG_TYPEINFER) + + # print debug info of analysis and optimization on array operations + DEBUG_ARRAY_OPT = _readenv("NUMBA_DEBUG_ARRAY_OPT", int, 0) + + # insert debug stmts to print information at runtime + DEBUG_ARRAY_OPT_RUNTIME = _readenv( + "NUMBA_DEBUG_ARRAY_OPT_RUNTIME", int, 0) + + # print stats about parallel for-loops + DEBUG_ARRAY_OPT_STATS = _readenv("NUMBA_DEBUG_ARRAY_OPT_STATS", int, 0) + + # prints user friendly information about parallel + PARALLEL_DIAGNOSTICS = _readenv("NUMBA_PARALLEL_DIAGNOSTICS", int, 0) + + # print debug info of inline closure pass + DEBUG_INLINE_CLOSURE = _readenv("NUMBA_DEBUG_INLINE_CLOSURE", int, 0) + + # Force dump of LLVM IR + DUMP_LLVM = _readenv("NUMBA_DUMP_LLVM", int, DEBUG) + + # Force dump of Function optimized LLVM IR + DUMP_FUNC_OPT = _readenv("NUMBA_DUMP_FUNC_OPT", int, DEBUG) + + # Force dump of Optimized LLVM IR + DUMP_OPTIMIZED = _readenv("NUMBA_DUMP_OPTIMIZED", int, DEBUG) + + # Force disable loop vectorize + # Loop vectorizer is disabled on 32-bit win32 due to a bug (#649) + LOOP_VECTORIZE = _readenv("NUMBA_LOOP_VECTORIZE", int, + not (IS_WIN32 and IS_32BITS)) + + # Switch on superword-level parallelism vectorization, default is on. + SLP_VECTORIZE = _readenv("NUMBA_SLP_VECTORIZE", int, 1) + + # Force dump of generated assembly + DUMP_ASSEMBLY = _readenv("NUMBA_DUMP_ASSEMBLY", int, DEBUG) + + # Force dump of type annotation + ANNOTATE = _readenv("NUMBA_DUMP_ANNOTATION", int, 0) + + # Dump IR in such as way as to aid in "diff"ing. + DIFF_IR = _readenv("NUMBA_DIFF_IR", int, 0) + + # Dump type annotation in html format + def fmt_html_path(path): + if path is None: + return path + else: + return os.path.abspath(path) + + HTML = _readenv("NUMBA_DUMP_HTML", fmt_html_path, None) + + # x86-64 specific + # Enable AVX on supported platforms where it won't degrade performance. + def avx_default(): + if not _os_supports_avx(): + return False + else: + # There are various performance issues with AVX and LLVM + # on some CPUs (list at + # http://llvm.org/bugs/buglist.cgi?quicksearch=avx). + # For now we'd rather disable it, since it can pessimize code + cpu_name = ll.get_host_cpu_name() + return cpu_name not in ('corei7-avx', 'core-avx-i', + 'sandybridge', 'ivybridge') + + ENABLE_AVX = _readenv("NUMBA_ENABLE_AVX", int, avx_default) + + # if set and SVML is available, it will be disabled + # By default, it's disabled on 32-bit platforms. + DISABLE_INTEL_SVML = _readenv( + "NUMBA_DISABLE_INTEL_SVML", int, IS_32BITS) + + # Disable jit for debugging + DISABLE_JIT = _readenv("NUMBA_DISABLE_JIT", int, 0) + + # choose parallel backend to use + THREADING_LAYER_PRIORITY = _readenv( + "NUMBA_THREADING_LAYER_PRIORITY", + lambda string: string.split(), + ['tbb', 'omp', 'workqueue'], + ) + THREADING_LAYER = _readenv("NUMBA_THREADING_LAYER", str, 'default') + + CAPTURED_ERRORS = _readenv("NUMBA_CAPTURED_ERRORS", + _validate_captured_errors_style, + 'old_style') + + # CUDA Configs + + # Whether to warn about kernel launches where a host array + # is used as a parameter, forcing a copy to and from the device. + # On by default. + CUDA_WARN_ON_IMPLICIT_COPY = _readenv( + "NUMBA_CUDA_WARN_ON_IMPLICIT_COPY", int, 1) + + # Force CUDA compute capability to a specific version + FORCE_CUDA_CC = _readenv("NUMBA_FORCE_CUDA_CC", _parse_cc, None) + + # The default compute capability to target when compiling to PTX. + CUDA_DEFAULT_PTX_CC = _readenv("NUMBA_CUDA_DEFAULT_PTX_CC", _parse_cc, + (5, 3)) + + # Disable CUDA support + DISABLE_CUDA = _readenv("NUMBA_DISABLE_CUDA", + int, int(MACHINE_BITS == 32)) + + # Enable CUDA simulator + ENABLE_CUDASIM = _readenv("NUMBA_ENABLE_CUDASIM", int, 0) + + # CUDA logging level + # Any level name from the *logging* module. Case insensitive. + # Defaults to CRITICAL if not set or invalid. + # Note: This setting only applies when logging is not configured. + # Any existing logging configuration is preserved. + CUDA_LOG_LEVEL = _readenv("NUMBA_CUDA_LOG_LEVEL", str, '') + + # Include argument values in the CUDA Driver API logs + CUDA_LOG_API_ARGS = _readenv("NUMBA_CUDA_LOG_API_ARGS", int, 0) + + # Maximum number of pending CUDA deallocations (default: 10) + CUDA_DEALLOCS_COUNT = _readenv("NUMBA_CUDA_MAX_PENDING_DEALLOCS_COUNT", + int, 10) + + # Maximum ratio of pending CUDA deallocations to capacity (default: 0.2) + CUDA_DEALLOCS_RATIO = _readenv("NUMBA_CUDA_MAX_PENDING_DEALLOCS_RATIO", + float, 0.2) + + CUDA_ARRAY_INTERFACE_SYNC = _readenv("NUMBA_CUDA_ARRAY_INTERFACE_SYNC", + int, 1) + + # Path of the directory that the CUDA driver libraries are located + CUDA_DRIVER = _readenv("NUMBA_CUDA_DRIVER", str, '') + + # Buffer size for logs produced by CUDA driver operations (e.g. + # linking) + CUDA_LOG_SIZE = _readenv("NUMBA_CUDA_LOG_SIZE", int, 1024) + + # Whether to generate verbose log messages when JIT linking + CUDA_VERBOSE_JIT_LOG = _readenv("NUMBA_CUDA_VERBOSE_JIT_LOG", int, 1) + + # Whether the default stream is the per-thread default stream + CUDA_PER_THREAD_DEFAULT_STREAM = _readenv( + "NUMBA_CUDA_PER_THREAD_DEFAULT_STREAM", int, 0) + + # Location of the CUDA include files + if IS_WIN32: + cuda_path = os.environ.get('CUDA_PATH') + if cuda_path: + default_cuda_include_path = os.path.join(cuda_path, "include") + else: + default_cuda_include_path = "cuda_include_not_found" + else: + default_cuda_include_path = os.path.join(os.sep, 'usr', 'local', + 'cuda', 'include') + CUDA_INCLUDE_PATH = _readenv("NUMBA_CUDA_INCLUDE_PATH", str, + default_cuda_include_path) + + # Threading settings + + # The default number of threads to use. + def num_threads_default(): + try: + sched_getaffinity = os.sched_getaffinity + except AttributeError: + pass + else: + return max(1, len(sched_getaffinity(0))) + + cpu_count = os.cpu_count() + if cpu_count is not None: + return max(1, cpu_count) + + return 1 + + NUMBA_DEFAULT_NUM_THREADS = num_threads_default() + + # Numba thread pool size (defaults to number of CPUs on the system). + _NUMBA_NUM_THREADS = _readenv("NUMBA_NUM_THREADS", int, + NUMBA_DEFAULT_NUM_THREADS) + if ('NUMBA_NUM_THREADS' in globals() + and globals()['NUMBA_NUM_THREADS'] != _NUMBA_NUM_THREADS): + + from numba.np.ufunc import parallel + if parallel._is_initialized: + raise RuntimeError("Cannot set NUMBA_NUM_THREADS to a " + "different value once the threads have been " + "launched (currently have %s, " + "trying to set %s)" % + (_NUMBA_NUM_THREADS, + globals()['NUMBA_NUM_THREADS'])) + + NUMBA_NUM_THREADS = _NUMBA_NUM_THREADS + del _NUMBA_NUM_THREADS + + # Profiling support + + # Indicates if a profiler detected. Only VTune can be detected for now + RUNNING_UNDER_PROFILER = 'VS_PROFILER' in os.environ + + # Enables jit events in LLVM to support profiling of dynamic code + ENABLE_PROFILING = _readenv( + "NUMBA_ENABLE_PROFILING", int, int(RUNNING_UNDER_PROFILER)) + + # Debug Info + + # The default value for the `debug` flag + DEBUGINFO_DEFAULT = _readenv("NUMBA_DEBUGINFO", int, ENABLE_PROFILING) + CUDA_DEBUGINFO_DEFAULT = _readenv("NUMBA_CUDA_DEBUGINFO", int, 0) + + EXTEND_VARIABLE_LIFETIMES = _readenv("NUMBA_EXTEND_VARIABLE_LIFETIMES", + int, 0) + + # gdb binary location + def which_gdb(path_or_bin): + gdb = shutil.which(path_or_bin) + return gdb if gdb is not None else path_or_bin + + GDB_BINARY = _readenv("NUMBA_GDB_BINARY", which_gdb, 'gdb') + + # CUDA Memory management + CUDA_MEMORY_MANAGER = _readenv("NUMBA_CUDA_MEMORY_MANAGER", str, + 'default') + + # Experimental refprune pass + LLVM_REFPRUNE_PASS = _readenv( + "NUMBA_LLVM_REFPRUNE_PASS", int, 1, + ) + LLVM_REFPRUNE_FLAGS = _readenv( + "NUMBA_LLVM_REFPRUNE_FLAGS", str, + "all" if LLVM_REFPRUNE_PASS else "", + ) + + # Timing support. + + # LLVM_PASS_TIMINGS enables LLVM recording of pass timings. + LLVM_PASS_TIMINGS = _readenv( + "NUMBA_LLVM_PASS_TIMINGS", int, 0, + ) + + # Inject the configuration values into the module globals + for name, value in locals().copy().items(): + if name.isupper(): + globals()[name] = value + + +_env_reloader = _EnvReloader() + + +def reload_config(): + """ + Reload the configuration from environment variables, if necessary. + """ + _env_reloader.update() diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/consts.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/consts.py new file mode 100644 index 000000000..d062b3208 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/consts.py @@ -0,0 +1,118 @@ +from types import ModuleType + +import weakref + +from numba.core.errors import ConstantInferenceError, NumbaError +from numba.core import ir + + +class ConstantInference(object): + """ + A constant inference engine for a given interpreter. + Inference inspects the IR to try and compute a compile-time constant for + a variable. + + This shouldn't be used directly, instead call Interpreter.infer_constant(). + """ + + def __init__(self, func_ir): + # Avoid cyclic references as some user-visible objects may be + # held alive in the cache + self._func_ir = weakref.proxy(func_ir) + self._cache = {} + + def infer_constant(self, name, loc=None): + """ + Infer a constant value for the given variable *name*. + If no value can be inferred, numba.errors.ConstantInferenceError + is raised. + """ + if name not in self._cache: + try: + self._cache[name] = (True, self._do_infer(name)) + except ConstantInferenceError as exc: + # Store the exception args only, to avoid keeping + # a whole traceback alive. + self._cache[name] = (False, (exc.__class__, exc.args)) + success, val = self._cache[name] + if success: + return val + else: + exc, args = val + if issubclass(exc, NumbaError): + raise exc(*args, loc=loc) + else: + raise exc(*args) + + def _fail(self, val): + # The location here is set to None because `val` is the ir.Var name + # and not the actual offending use of the var. When this is raised it is + # caught in the flow control of `infer_constant` and the class and args + # (the message) are captured and then raised again but with the location + # set to the expression that caused the constant inference error. + raise ConstantInferenceError( + "Constant inference not possible for: %s" % (val,), loc=None) + + def _do_infer(self, name): + if not isinstance(name, str): + raise TypeError("infer_constant() called with non-str %r" + % (name,)) + try: + defn = self._func_ir.get_definition(name) + except KeyError: + raise ConstantInferenceError( + "no single definition for %r" % (name,)) + try: + const = defn.infer_constant() + except ConstantInferenceError: + if isinstance(defn, ir.Expr): + return self._infer_expr(defn) + self._fail(defn) + return const + + def _infer_expr(self, expr): + # Infer an expression: handle supported cases + if expr.op == 'call': + func = self.infer_constant(expr.func.name, loc=expr.loc) + return self._infer_call(func, expr) + elif expr.op == 'getattr': + value = self.infer_constant(expr.value.name, loc=expr.loc) + return self._infer_getattr(value, expr) + elif expr.op == 'build_list': + return [self.infer_constant(i.name, loc=expr.loc) for i in + expr.items] + elif expr.op == 'build_tuple': + return tuple(self.infer_constant(i.name, loc=expr.loc) for i in + expr.items) + self._fail(expr) + + def _infer_call(self, func, expr): + if expr.kws or expr.vararg: + self._fail(expr) + # Check supported callables + _slice = func in (slice,) + _exc = isinstance(func, type) and issubclass(func, BaseException) + if _slice or _exc: + args = [self.infer_constant(a.name, loc=expr.loc) for a in + expr.args] + if _slice: + return func(*args) + elif _exc: + # If the exception class is user defined it may implement a ctor + # that does not pass the args to the super. Therefore return the + # raw class and the args so this can be instantiated at the call + # site in the way the user source expects it to be. + return func, args + else: + assert 0, 'Unreachable' + + self._fail(expr) + + def _infer_getattr(self, value, expr): + if isinstance(value, (ModuleType, type)): + # Allow looking up a constant on a class or module + try: + return getattr(value, expr.attr) + except AttributeError: + pass + self._fail(expr) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/controlflow.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/controlflow.py new file mode 100644 index 000000000..344f6e2ce --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/controlflow.py @@ -0,0 +1,954 @@ +import collections +import functools +import sys + +from numba.core import utils +from numba.core.ir import Loc +from numba.core.errors import UnsupportedError + +# List of bytecodes creating a new block in the control flow graph +# (in addition to explicit jump labels). +NEW_BLOCKERS = frozenset(['SETUP_LOOP', 'FOR_ITER', 'SETUP_WITH']) + + +class CFBlock(object): + + def __init__(self, offset): + self.offset = offset + self.body = [] + # A map of jumps to outgoing blocks (successors): + # { offset of outgoing block -> number of stack pops } + self.outgoing_jumps = {} + # A map of jumps to incoming blocks (predecessors): + # { offset of incoming block -> number of stack pops } + self.incoming_jumps = {} + self.terminating = False + + def __repr__(self): + args = (self.offset, + sorted(self.outgoing_jumps), + sorted(self.incoming_jumps)) + return "block(offset:%d, outgoing: %s, incoming: %s)" % args + + def __iter__(self): + return iter(self.body) + + +class Loop(collections.namedtuple("Loop", + ("entries", "exits", "header", "body"))): + """ + A control flow loop, as detected by a CFGraph object. + """ + + __slots__ = () + + # The loop header is enough to detect that two loops are really + # the same, assuming they belong to the same graph. + # (note: in practice, only one loop instance is created per graph + # loop, so identity would be fine) + + def __eq__(self, other): + return isinstance(other, Loop) and other.header == self.header + + def __hash__(self): + return hash(self.header) + + +class _DictOfContainers(collections.defaultdict): + """A defaultdict with customized equality checks that ignore empty values. + + Non-empty value is checked by: `bool(value_item) == True`. + """ + + def __eq__(self, other): + if isinstance(other, _DictOfContainers): + mine = self._non_empty_items() + theirs = other._non_empty_items() + return mine == theirs + + return NotImplemented + + def __ne__(self, other): + ret = self.__eq__(other) + if ret is NotImplemented: + return ret + else: + return not ret + + def _non_empty_items(self): + return [(k, vs) for k, vs in sorted(self.items()) if vs] + + +class CFGraph(object): + """ + Generic (almost) implementation of a Control Flow Graph. + """ + + def __init__(self): + self._nodes = set() + self._preds = _DictOfContainers(set) + self._succs = _DictOfContainers(set) + self._edge_data = {} + self._entry_point = None + + def add_node(self, node): + """ + Add *node* to the graph. This is necessary before adding any + edges from/to the node. *node* can be any hashable object. + """ + self._nodes.add(node) + + def add_edge(self, src, dest, data=None): + """ + Add an edge from node *src* to node *dest*, with optional + per-edge *data*. + If such an edge already exists, it is replaced (duplicate edges + are not possible). + """ + if src not in self._nodes: + raise ValueError("Cannot add edge as src node %s not in nodes %s" % + (src, self._nodes)) + if dest not in self._nodes: + raise ValueError("Cannot add edge as dest node %s not in nodes %s" % + (dest, self._nodes)) + self._add_edge(src, dest, data) + + def successors(self, src): + """ + Yield (node, data) pairs representing the successors of node *src*. + (*data* will be None if no data was specified when adding the edge) + """ + for dest in self._succs[src]: + yield dest, self._edge_data[src, dest] + + def predecessors(self, dest): + """ + Yield (node, data) pairs representing the predecessors of node *dest*. + (*data* will be None if no data was specified when adding the edge) + """ + for src in self._preds[dest]: + yield src, self._edge_data[src, dest] + + def set_entry_point(self, node): + """ + Set the entry point of the graph to *node*. + """ + assert node in self._nodes + self._entry_point = node + + def process(self): + """ + Compute essential properties of the control flow graph. The graph + must have been fully populated, and its entry point specified. Other + graph properties are computed on-demand. + """ + if self._entry_point is None: + raise RuntimeError("no entry point defined!") + self._eliminate_dead_blocks() + + def dominators(self): + """ + Return a dictionary of {node -> set(nodes)} mapping each node to + the nodes dominating it. + + A node D dominates a node N when any path leading to N must go through D + """ + return self._doms + + def post_dominators(self): + """ + Return a dictionary of {node -> set(nodes)} mapping each node to + the nodes post-dominating it. + + A node P post-dominates a node N when any path starting from N must go + through P. + """ + return self._post_doms + + def immediate_dominators(self): + """ + Return a dictionary of {node -> node} mapping each node to its + immediate dominator (idom). + + The idom(B) is the closest strict dominator of V + """ + return self._idom + + def dominance_frontier(self): + """ + Return a dictionary of {node -> set(nodes)} mapping each node to + the nodes in its dominance frontier. + + The dominance frontier _df(N) is the set of all nodes that are + immediate successors to blocks dominated by N but which aren't + strictly dominated by N + """ + return self._df + + def dominator_tree(self): + """ + return a dictionary of {node -> set(nodes)} mapping each node to + the set of nodes it immediately dominates + + The domtree(B) is the closest strict set of nodes that B dominates + """ + return self._domtree + + @utils.cached_property + def _exit_points(self): + return self._find_exit_points() + + @utils.cached_property + def _doms(self): + return self._find_dominators() + + @utils.cached_property + def _back_edges(self): + return self._find_back_edges() + + @utils.cached_property + def _topo_order(self): + return self._find_topo_order() + + @utils.cached_property + def _descs(self): + return self._find_descendents() + + @utils.cached_property + def _loops(self): + return self._find_loops() + + @utils.cached_property + def _in_loops(self): + return self._find_in_loops() + + @utils.cached_property + def _post_doms(self): + return self._find_post_dominators() + + @utils.cached_property + def _idom(self): + return self._find_immediate_dominators() + + @utils.cached_property + def _df(self): + return self._find_dominance_frontier() + + @utils.cached_property + def _domtree(self): + return self._find_dominator_tree() + + def descendents(self, node): + """ + Return the set of descendents of the given *node*, in topological + order (ignoring back edges). + """ + return self._descs[node] + + def entry_point(self): + """ + Return the entry point node. + """ + assert self._entry_point is not None + return self._entry_point + + def exit_points(self): + """ + Return the computed set of exit nodes (may be empty). + """ + return self._exit_points + + def backbone(self): + """ + Return the set of nodes constituting the graph's backbone. + (i.e. the nodes that every path starting from the entry point + must go through). By construction, it is non-empty: it contains + at least the entry point. + """ + return self._post_doms[self._entry_point] + + def loops(self): + """ + Return a dictionary of {node -> loop} mapping each loop header + to the loop (a Loop instance) starting with it. + """ + return self._loops + + def in_loops(self, node): + """ + Return the list of Loop objects the *node* belongs to, + from innermost to outermost. + """ + return [self._loops[x] for x in self._in_loops.get(node, ())] + + def dead_nodes(self): + """ + Return the set of dead nodes (eliminated from the graph). + """ + return self._dead_nodes + + def nodes(self): + """ + Return the set of live nodes. + """ + return self._nodes + + def topo_order(self): + """ + Return the sequence of nodes in topological order (ignoring back + edges). + """ + return self._topo_order + + def topo_sort(self, nodes, reverse=False): + """ + Iterate over the *nodes* in topological order (ignoring back edges). + The sort isn't guaranteed to be stable. + """ + nodes = set(nodes) + it = self._topo_order + if reverse: + it = reversed(it) + for n in it: + if n in nodes: + yield n + + def dump(self, file=None): + """ + Dump extensive debug information. + """ + import pprint + file = file or sys.stdout + if 1: + print("CFG adjacency lists:", file=file) + self._dump_adj_lists(file) + print("CFG dominators:", file=file) + pprint.pprint(self._doms, stream=file) + print("CFG post-dominators:", file=file) + pprint.pprint(self._post_doms, stream=file) + print("CFG back edges:", sorted(self._back_edges), file=file) + print("CFG loops:", file=file) + pprint.pprint(self._loops, stream=file) + print("CFG node-to-loops:", file=file) + pprint.pprint(self._in_loops, stream=file) + print("CFG backbone:", file=file) + pprint.pprint(self.backbone(), stream=file) + + def render_dot(self, filename="numba_cfg.dot"): + """Render the controlflow graph with GraphViz DOT via the + ``graphviz`` python binding. + + Returns + ------- + g : graphviz.Digraph + Use `g.view()` to open the graph in the default PDF application. + """ + + try: + import graphviz as gv + except ImportError: + raise ImportError( + "The feature requires `graphviz` but it is not available. " + "Please install with `pip install graphviz`" + ) + g = gv.Digraph(filename=filename) + # Populate the nodes + for n in self._nodes: + g.node(str(n)) + # Populate the edges + for n in self._nodes: + for edge in self._succs[n]: + g.edge(str(n), str(edge)) + return g + + # Internal APIs + + def _add_edge(self, from_, to, data=None): + # This internal version allows adding edges to/from unregistered + # (ghost) nodes. + self._preds[to].add(from_) + self._succs[from_].add(to) + self._edge_data[from_, to] = data + + def _remove_node_edges(self, node): + for succ in self._succs.pop(node, ()): + self._preds[succ].remove(node) + del self._edge_data[node, succ] + for pred in self._preds.pop(node, ()): + self._succs[pred].remove(node) + del self._edge_data[pred, node] + + def _dfs(self, entries=None): + if entries is None: + entries = (self._entry_point,) + seen = set() + stack = list(entries) + while stack: + node = stack.pop() + if node not in seen: + yield node + seen.add(node) + for succ in self._succs[node]: + stack.append(succ) + + def _eliminate_dead_blocks(self): + """ + Eliminate all blocks not reachable from the entry point, and + stash them into self._dead_nodes. + """ + live = set() + for node in self._dfs(): + live.add(node) + self._dead_nodes = self._nodes - live + self._nodes = live + # Remove all edges leading from dead nodes + for dead in self._dead_nodes: + self._remove_node_edges(dead) + + def _find_exit_points(self): + """ + Compute the graph's exit points. + """ + exit_points = set() + for n in self._nodes: + if not self._succs.get(n): + exit_points.add(n) + return exit_points + + def _find_postorder(self): + succs = self._succs + back_edges = self._back_edges + post_order = [] + seen = set() + + post_order = [] + + # DFS + def dfs_rec(node): + if node not in seen: + seen.add(node) + stack.append((post_order.append, node)) + for dest in succs[node]: + if (node, dest) not in back_edges: + stack.append((dfs_rec, dest)) + + stack = [(dfs_rec, self._entry_point)] + while stack: + cb, data = stack.pop() + cb(data) + + return post_order + + def _find_immediate_dominators(self): + # The algorithm implemented computes the immediate dominator + # for each node in the CFG which is equivalent to build a dominator tree + # Based on the implementation from NetworkX + # library - nx.immediate_dominators + # https://github.com/networkx/networkx/blob/858e7cb183541a78969fed0cbcd02346f5866c02/networkx/algorithms/dominance.py # noqa: E501 + # References: + # Keith D. Cooper, Timothy J. Harvey, and Ken Kennedy + # A Simple, Fast Dominance Algorithm + # https://www.cs.rice.edu/~keith/EMBED/dom.pdf + def intersect(u, v): + while u != v: + while idx[u] < idx[v]: + u = idom[u] + while idx[u] > idx[v]: + v = idom[v] + return u + + entry = self._entry_point + preds_table = self._preds + + order = self._find_postorder() + idx = {e: i for i, e in enumerate(order)} # index of each node + idom = {entry : entry} + order.pop() + order.reverse() + + changed = True + while changed: + changed = False + for u in order: + new_idom = functools.reduce(intersect, + (v for v in preds_table[u] + if v in idom)) + if u not in idom or idom[u] != new_idom: + idom[u] = new_idom + changed = True + + return idom + + def _find_dominator_tree(self): + idom = self._idom + domtree = _DictOfContainers(set) + + for u, v in idom.items(): + # v dominates u + if u not in domtree: + domtree[u] = set() + if u != v: + domtree[v].add(u) + + return domtree + + def _find_dominance_frontier(self): + idom = self._idom + preds_table = self._preds + df = {u: set() for u in idom} + + for u in idom: + if len(preds_table[u]) < 2: + continue + for v in preds_table[u]: + while v != idom[u]: + df[v].add(u) + v = idom[v] + + return df + + def _find_dominators_internal(self, post=False): + # See theoretical description in + # http://en.wikipedia.org/wiki/Dominator_%28graph_theory%29 + # The algorithm implemented here uses a todo-list as described + # in http://pages.cs.wisc.edu/~fischer/cs701.f08/finding.loops.html + if post: + entries = set(self._exit_points) + preds_table = self._succs + succs_table = self._preds + else: + entries = set([self._entry_point]) + preds_table = self._preds + succs_table = self._succs + + if not entries: + raise RuntimeError("no entry points: dominator algorithm " + "cannot be seeded") + + doms = {} + for e in entries: + doms[e] = set([e]) + + todo = [] + for n in self._nodes: + if n not in entries: + doms[n] = set(self._nodes) + todo.append(n) + + while todo: + n = todo.pop() + if n in entries: + continue + new_doms = set([n]) + preds = preds_table[n] + if preds: + new_doms |= functools.reduce(set.intersection, + [doms[p] for p in preds]) + if new_doms != doms[n]: + assert len(new_doms) < len(doms[n]) + doms[n] = new_doms + todo.extend(succs_table[n]) + return doms + + def _find_dominators(self): + return self._find_dominators_internal(post=False) + + def _find_post_dominators(self): + # To handle infinite loops correctly, we need to add a dummy + # exit point, and link members of infinite loops to it. + dummy_exit = object() + self._exit_points.add(dummy_exit) + for loop in self._loops.values(): + if not loop.exits: + for b in loop.body: + self._add_edge(b, dummy_exit) + pdoms = self._find_dominators_internal(post=True) + # Fix the _post_doms table to make no reference to the dummy exit + del pdoms[dummy_exit] + for doms in pdoms.values(): + doms.discard(dummy_exit) + self._remove_node_edges(dummy_exit) + self._exit_points.remove(dummy_exit) + return pdoms + + # Finding loops and back edges: see + # http://pages.cs.wisc.edu/~fischer/cs701.f08/finding.loops.html + + def _find_back_edges(self, stats=None): + """ + Find back edges. An edge (src, dest) is a back edge if and + only if *dest* dominates *src*. + """ + # Prepare stats to capture execution information + if stats is not None: + if not isinstance(stats, dict): + raise TypeError(f"*stats* must be a dict; got {type(stats)}") + stats.setdefault('iteration_count', 0) + + # Uses a simple DFS to find back-edges. + # The new algorithm is faster than the the previous dominator based + # algorithm. + back_edges = set() + # stack: keeps track of the traversal path + stack = [] + # succs_state: keep track of unvisited successors of a node + succs_state = {} + entry_point = self.entry_point() + + checked = set() + + def push_state(node): + stack.append(node) + succs_state[node] = [dest for dest in self._succs[node]] + + push_state(entry_point) + + # Keep track for iteration count for debugging + iter_ct = 0 + while stack: + iter_ct += 1 + tos = stack[-1] + tos_succs = succs_state[tos] + # Are there successors not checked? + if tos_succs: + # Check the next successor + cur_node = tos_succs.pop() + # Is it in our traversal path? + if cur_node in stack: + # Yes, it's a backedge + back_edges.add((tos, cur_node)) + elif cur_node not in checked: + # Push + push_state(cur_node) + else: + # Checked all successors. Pop + stack.pop() + checked.add(tos) + + if stats is not None: + stats['iteration_count'] += iter_ct + return back_edges + + def _find_topo_order(self): + succs = self._succs + back_edges = self._back_edges + post_order = [] + seen = set() + + def _dfs_rec(node): + if node not in seen: + seen.add(node) + for dest in succs[node]: + if (node, dest) not in back_edges: + _dfs_rec(dest) + post_order.append(node) + + _dfs_rec(self._entry_point) + post_order.reverse() + return post_order + + def _find_descendents(self): + descs = {} + for node in reversed(self._topo_order): + descs[node] = node_descs = set() + for succ in self._succs[node]: + if (node, succ) not in self._back_edges: + node_descs.add(succ) + node_descs.update(descs[succ]) + return descs + + def _find_loops(self): + """ + Find the loops defined by the graph's back edges. + """ + bodies = {} + for src, dest in self._back_edges: + # The destination of the back edge is the loop header + header = dest + # Build up the loop body from the back edge's source node, + # up to the source header. + body = set([header]) + queue = [src] + while queue: + n = queue.pop() + if n not in body: + body.add(n) + queue.extend(self._preds[n]) + # There can be several back edges to a given loop header; + # if so, merge the resulting body fragments. + if header in bodies: + bodies[header].update(body) + else: + bodies[header] = body + + # Create a Loop object for each header. + loops = {} + for header, body in bodies.items(): + entries = set() + exits = set() + for n in body: + entries.update(self._preds[n] - body) + exits.update(self._succs[n] - body) + loop = Loop(header=header, body=body, entries=entries, exits=exits) + loops[header] = loop + return loops + + def _find_in_loops(self): + loops = self._loops + # Compute the loops to which each node belongs. + in_loops = dict((n, []) for n in self._nodes) + # Sort loops from longest to shortest + # This ensures that outer loops will come before inner loops + for loop in sorted(loops.values(), key=lambda loop: len(loop.body)): + for n in loop.body: + in_loops[n].append(loop.header) + return in_loops + + def _dump_adj_lists(self, file): + adj_lists = dict((src, sorted(list(dests))) + for src, dests in self._succs.items()) + import pprint + pprint.pprint(adj_lists, stream=file) + + def __eq__(self, other): + if not isinstance(other, CFGraph): + raise NotImplementedError + + for x in ['_nodes', '_edge_data', '_entry_point', '_preds', '_succs']: + this = getattr(self, x, None) + that = getattr(other, x, None) + if this != that: + return False + return True + + def __ne__(self, other): + return not self.__eq__(other) + + +class ControlFlowAnalysis(object): + """ + Attributes + ---------- + - bytecode + + - blocks + + - blockseq + + - doms: dict of set + Dominators + + - backbone: set of block offsets + The set of block that is common to all possible code path. + + """ + def __init__(self, bytecode): + self.bytecode = bytecode + self.blocks = {} + self.liveblocks = {} + self.blockseq = [] + self.doms = None + self.backbone = None + # Internal temp states + self._force_new_block = True + self._curblock = None + self._blockstack = [] + self._loops = [] + self._withs = [] + + def iterblocks(self): + """ + Return all blocks in sequence of occurrence + """ + for i in self.blockseq: + yield self.blocks[i] + + def iterliveblocks(self): + """ + Return all live blocks in sequence of occurrence + """ + for i in self.blockseq: + if i in self.liveblocks: + yield self.blocks[i] + + def incoming_blocks(self, block): + """ + Yield (incoming block, number of stack pops) pairs for *block*. + """ + for i, pops in block.incoming_jumps.items(): + if i in self.liveblocks: + yield self.blocks[i], pops + + def dump(self, file=None): + self.graph.dump(file=None) + + def run(self): + for inst in self._iter_inst(): + fname = "op_%s" % inst.opname + fn = getattr(self, fname, None) + if fn is not None: + fn(inst) + elif inst.is_jump: + # this catches e.g. try... except + l = Loc(self.bytecode.func_id.filename, inst.lineno) + if inst.opname in {"SETUP_EXCEPT", "SETUP_FINALLY"}: + msg = "'try' block not supported until python3.7 or later" + else: + msg = "Use of unsupported opcode (%s) found" % inst.opname + raise UnsupportedError(msg, loc=l) + else: + # Non-jump instructions are ignored + pass # intentionally + + # Close all blocks + for cur, nxt in zip(self.blockseq, self.blockseq[1:]): + blk = self.blocks[cur] + if not blk.outgoing_jumps and not blk.terminating: + blk.outgoing_jumps[nxt] = 0 + + graph = CFGraph() + for b in self.blocks: + graph.add_node(b) + for b in self.blocks.values(): + for out, pops in b.outgoing_jumps.items(): + graph.add_edge(b.offset, out, pops) + graph.set_entry_point(min(self.blocks)) + graph.process() + self.graph = graph + + # Fill incoming + for b in self.blocks.values(): + for out, pops in b.outgoing_jumps.items(): + self.blocks[out].incoming_jumps[b.offset] = pops + + # Find liveblocks + self.liveblocks = dict((i, self.blocks[i]) + for i in self.graph.nodes()) + + for lastblk in reversed(self.blockseq): + if lastblk in self.liveblocks: + break + else: + raise AssertionError("No live block that exits!?") + + # Find backbone + backbone = self.graph.backbone() + # Filter out in loop blocks (Assuming no other cyclic control blocks) + # This is to unavoid variable defined in loops to be considered as + # function scope. + inloopblocks = set() + + for b in self.blocks.keys(): + if self.graph.in_loops(b): + inloopblocks.add(b) + + self.backbone = backbone - inloopblocks + + def jump(self, target, pops=0): + """ + Register a jump (conditional or not) to *target* offset. + *pops* is the number of stack pops implied by the jump (default 0). + """ + self._curblock.outgoing_jumps[target] = pops + + def _iter_inst(self): + for inst in self.bytecode: + if self._use_new_block(inst): + self._guard_with_as(inst) + self._start_new_block(inst) + self._curblock.body.append(inst.offset) + yield inst + + def _use_new_block(self, inst): + if inst.offset in self.bytecode.labels: + res = True + elif inst.opname in NEW_BLOCKERS: + res = True + else: + res = self._force_new_block + + self._force_new_block = False + return res + + def _start_new_block(self, inst): + self._curblock = CFBlock(inst.offset) + self.blocks[inst.offset] = self._curblock + self.blockseq.append(inst.offset) + + def _guard_with_as(self, current_inst): + """Checks if the next instruction after a SETUP_WITH is something other + than a POP_TOP, if it is something else it'll be some sort of store + which is not supported (this corresponds to `with CTXMGR as VAR(S)`).""" + if current_inst.opname == "SETUP_WITH": + next_op = self.bytecode[current_inst.next].opname + if next_op != "POP_TOP": + msg = ("The 'with (context manager) as " + "(variable):' construct is not " + "supported.") + raise UnsupportedError(msg) + + def op_SETUP_LOOP(self, inst): + end = inst.get_jump_target() + self._blockstack.append(end) + self._loops.append((inst.offset, end)) + # TODO: Looplifting requires the loop entry be its own block. + # Forcing a new block here is the simplest solution for now. + # But, we should consider other less ad-hoc ways. + self.jump(inst.next) + self._force_new_block = True + + def op_SETUP_WITH(self, inst): + end = inst.get_jump_target() + self._blockstack.append(end) + self._withs.append((inst.offset, end)) + # TODO: WithLifting requires the loop entry be its own block. + # Forcing a new block here is the simplest solution for now. + # But, we should consider other less ad-hoc ways. + self.jump(inst.next) + self._force_new_block = True + + def op_POP_BLOCK(self, inst): + self._blockstack.pop() + + def op_FOR_ITER(self, inst): + self.jump(inst.get_jump_target()) + self.jump(inst.next) + self._force_new_block = True + + def _op_ABSOLUTE_JUMP_IF(self, inst): + self.jump(inst.get_jump_target()) + self.jump(inst.next) + self._force_new_block = True + + op_POP_JUMP_IF_FALSE = _op_ABSOLUTE_JUMP_IF + op_POP_JUMP_IF_TRUE = _op_ABSOLUTE_JUMP_IF + op_JUMP_IF_FALSE = _op_ABSOLUTE_JUMP_IF + op_JUMP_IF_TRUE = _op_ABSOLUTE_JUMP_IF + + def _op_ABSOLUTE_JUMP_OR_POP(self, inst): + self.jump(inst.get_jump_target()) + self.jump(inst.next, pops=1) + self._force_new_block = True + + op_JUMP_IF_FALSE_OR_POP = _op_ABSOLUTE_JUMP_OR_POP + op_JUMP_IF_TRUE_OR_POP = _op_ABSOLUTE_JUMP_OR_POP + + def op_JUMP_ABSOLUTE(self, inst): + self.jump(inst.get_jump_target()) + self._force_new_block = True + + def op_JUMP_FORWARD(self, inst): + self.jump(inst.get_jump_target()) + self._force_new_block = True + + def op_RETURN_VALUE(self, inst): + self._curblock.terminating = True + self._force_new_block = True + + def op_RAISE_VARARGS(self, inst): + self._curblock.terminating = True + self._force_new_block = True + + def op_BREAK_LOOP(self, inst): + self.jump(self._blockstack[-1]) + self._force_new_block = True diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/cpu.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/cpu.py new file mode 100644 index 000000000..a27deb136 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/cpu.py @@ -0,0 +1,379 @@ +import sys +import platform + +import llvmlite.binding as ll +from llvmlite import ir + +from numba import _dynfunc +from numba.core.callwrapper import PyCallWrapper +from numba.core.base import BaseContext, PYOBJECT +from numba.core import utils, types, config, cgutils, callconv, codegen, externals, fastmathpass, intrinsics +from numba.core.utils import cached_property +from numba.core.options import TargetOptions, include_default_options +from numba.core.runtime import rtsys +from numba.core.compiler_lock import global_compiler_lock +import numba.core.entrypoints +from numba.core.cpu_options import (ParallelOptions, FastMathOptions, + InlineOptions) +from numba.np import ufunc_db + +# Keep those structures in sync with _dynfunc.c. + +class ClosureBody(cgutils.Structure): + _fields = [('env', types.pyobject)] + + +class EnvBody(cgutils.Structure): + _fields = [ + ('globals', types.pyobject), + ('consts', types.pyobject), + ] + + +class CPUContext(BaseContext): + """ + Changes BaseContext calling convention + """ + allow_dynamic_globals = True + + def __init__(self, typingctx, target='cpu'): + super().__init__(typingctx, target) + + # Overrides + def create_module(self, name): + return self._internal_codegen._create_empty_module(name) + + @global_compiler_lock + def init(self): + self.is32bit = (utils.MACHINE_BITS == 32) + self._internal_codegen = codegen.JITCPUCodegen("numba.exec") + + # Add ARM ABI functions from libgcc_s + if platform.machine() == 'armv7l': + ll.load_library_permanently('libgcc_s.so.1') + + # Map external C functions. + externals.c_math_functions.install(self) + + # Initialize NRT runtime + rtsys.initialize(self) + + # Add lower_extension attribute + self.lower_extensions = {} + from numba.parfors.parfor_lowering import _lower_parfor_parallel + from numba.parfors.parfor import Parfor + # Specify how to lower Parfor nodes using the lower_extensions + self.lower_extensions[Parfor] = _lower_parfor_parallel + + def load_additional_registries(self): + # Add implementations that work via import + from numba.cpython import (builtins, charseq, enumimpl, hashing, heapq, + iterators, listobj, numbers, rangeobj, + setobj, slicing, tupleobj, unicode,) + from numba.core import optional + from numba.misc import gdb_hook, literal + from numba.np import linalg, polynomial, arraymath, arrayobj + from numba.np.random import generator_core, generator_methods + from numba.typed import typeddict, dictimpl + from numba.typed import typedlist, listobject + from numba.experimental import jitclass, function_type + from numba.np import npdatetime + + # Add target specific implementations + from numba.np import npyimpl + from numba.cpython import cmathimpl, mathimpl, printimpl, randomimpl + from numba.misc import cffiimpl + from numba.experimental.jitclass.base import ClassBuilder as \ + jitclassimpl + self.install_registry(cmathimpl.registry) + self.install_registry(cffiimpl.registry) + self.install_registry(mathimpl.registry) + self.install_registry(npyimpl.registry) + self.install_registry(printimpl.registry) + self.install_registry(randomimpl.registry) + self.install_registry(jitclassimpl.class_impl_registry) + + # load 3rd party extensions + numba.core.entrypoints.init_all() + + @property + def target_data(self): + return self._internal_codegen.target_data + + def with_aot_codegen(self, name, **aot_options): + aot_codegen = codegen.AOTCPUCodegen(name, **aot_options) + return self.subtarget(_internal_codegen=aot_codegen, + aot_mode=True) + + def codegen(self): + return self._internal_codegen + + @cached_property + def call_conv(self): + return callconv.CPUCallConv(self) + + def get_env_body(self, builder, envptr): + """ + From the given *envptr* (a pointer to a _dynfunc.Environment object), + get a EnvBody allowing structured access to environment fields. + """ + body_ptr = cgutils.pointer_add( + builder, envptr, _dynfunc._impl_info['offsetof_env_body']) + return EnvBody(self, builder, ref=body_ptr, cast_ref=True) + + def get_env_manager(self, builder): + envgv = self.declare_env_global(builder.module, + self.get_env_name(self.fndesc)) + envarg = builder.load(envgv) + pyapi = self.get_python_api(builder) + pyapi.emit_environment_sentry( + envarg, debug_msg=self.fndesc.env_name, + ) + env_body = self.get_env_body(builder, envarg) + return pyapi.get_env_manager(self.environment, env_body, envarg) + + def get_generator_state(self, builder, genptr, return_type): + """ + From the given *genptr* (a pointer to a _dynfunc.Generator object), + get a pointer to its state area. + """ + return cgutils.pointer_add( + builder, genptr, _dynfunc._impl_info['offsetof_generator_state'], + return_type=return_type) + + def build_list(self, builder, list_type, items): + """ + Build a list from the Numba *list_type* and its initial *items*. + """ + from numba.cpython import listobj + return listobj.build_list(self, builder, list_type, items) + + def build_set(self, builder, set_type, items): + """ + Build a set from the Numba *set_type* and its initial *items*. + """ + from numba.cpython import setobj + return setobj.build_set(self, builder, set_type, items) + + def build_map(self, builder, dict_type, item_types, items): + from numba.typed import dictobject + + return dictobject.build_map(self, builder, dict_type, item_types, items) + + + def post_lowering(self, mod, library): + if self.fastmath: + fastmathpass.rewrite_module(mod, self.fastmath) + + if self.is32bit: + # 32-bit machine needs to replace all 64-bit div/rem to avoid + # calls to compiler-rt + intrinsics.fix_divmod(mod) + + library.add_linking_library(rtsys.library) + + def create_cpython_wrapper(self, library, fndesc, env, call_helper, + release_gil=False): + wrapper_module = self.create_module("wrapper") + fnty = self.call_conv.get_function_type(fndesc.restype, fndesc.argtypes) + wrapper_callee = ir.Function(wrapper_module, fnty, fndesc.llvm_func_name) + builder = PyCallWrapper(self, wrapper_module, wrapper_callee, + fndesc, env, call_helper=call_helper, + release_gil=release_gil) + builder.build() + library.add_ir_module(wrapper_module) + + def create_cfunc_wrapper(self, library, fndesc, env, call_helper): + wrapper_module = self.create_module("cfunc_wrapper") + fnty = self.call_conv.get_function_type(fndesc.restype, fndesc.argtypes) + wrapper_callee = ir.Function(wrapper_module, fnty, fndesc.llvm_func_name) + + ll_argtypes = [self.get_value_type(ty) for ty in fndesc.argtypes] + ll_return_type = self.get_value_type(fndesc.restype) + wrapty = ir.FunctionType(ll_return_type, ll_argtypes) + wrapfn = ir.Function(wrapper_module, wrapty, fndesc.llvm_cfunc_wrapper_name) + builder = ir.IRBuilder(wrapfn.append_basic_block('entry')) + + status, out = self.call_conv.call_function( + builder, wrapper_callee, fndesc.restype, fndesc.argtypes, + wrapfn.args, attrs=('noinline',)) + + with builder.if_then(status.is_error, likely=False): + # If (and only if) an error occurred, acquire the GIL + # and use the interpreter to write out the exception. + pyapi = self.get_python_api(builder) + gil_state = pyapi.gil_ensure() + self.call_conv.raise_error(builder, pyapi, status) + cstr = self.insert_const_string(builder.module, repr(self)) + strobj = pyapi.string_from_string(cstr) + pyapi.err_write_unraisable(strobj) + pyapi.decref(strobj) + pyapi.gil_release(gil_state) + + builder.ret(out) + library.add_ir_module(wrapper_module) + + def get_executable(self, library, fndesc, env): + """ + Returns + ------- + (cfunc, fnptr) + + - cfunc + callable function (Can be None) + - fnptr + callable function address + - env + an execution environment (from _dynfunc) + """ + # Code generation + baseptr = library.get_pointer_to_function(fndesc.llvm_func_name) + fnptr = library.get_pointer_to_function(fndesc.llvm_cpython_wrapper_name) + + # Note: we avoid reusing the original docstring to avoid encoding + # issues on Python 2, see issue #1908 + doc = "compiled wrapper for %r" % (fndesc.qualname,) + cfunc = _dynfunc.make_function(fndesc.lookup_module(), + fndesc.qualname.split('.')[-1], + doc, fnptr, env, + # objects to keepalive with the function + (library,) + ) + library.codegen.set_env(self.get_env_name(fndesc), env) + return cfunc + + def calc_array_sizeof(self, ndim): + ''' + Calculate the size of an array struct on the CPU target + ''' + aryty = types.Array(types.int32, ndim, 'A') + return self.get_abi_sizeof(self.get_value_type(aryty)) + + # Overrides + def get_ufunc_info(self, ufunc_key): + return ufunc_db.get_ufunc_info(ufunc_key) + + +# ---------------------------------------------------------------------------- +# TargetOptions + +_options_mixin = include_default_options( + "nopython", + "forceobj", + "looplift", + "_nrt", + "debug", + "boundscheck", + "nogil", + "no_rewrites", + "no_cpython_wrapper", + "no_cfunc_wrapper", + "parallel", + "fastmath", + "error_model", + "inline", + "forceinline", + # Add "target_backend" as a accepted option for the CPU in @jit(...) + "target_backend", + "_dbg_extend_lifetimes", + "_dbg_optnone", +) + +class CPUTargetOptions(_options_mixin, TargetOptions): + def finalize(self, flags, options): + if not flags.is_set("enable_pyobject"): + flags.enable_pyobject = True + + if not flags.is_set("enable_looplift"): + flags.enable_looplift = True + + flags.inherit_if_not_set("nrt", default=True) + + if not flags.is_set("debuginfo"): + flags.debuginfo = config.DEBUGINFO_DEFAULT + + if not flags.is_set("dbg_extend_lifetimes"): + if flags.debuginfo: + # auto turn on extend-lifetimes if debuginfo is on and + # dbg_extend_lifetimes is not set + flags.dbg_extend_lifetimes = True + else: + # set flag using env-var config + flags.dbg_extend_lifetimes = config.EXTEND_VARIABLE_LIFETIMES + + if not flags.is_set("boundscheck"): + flags.boundscheck = flags.debuginfo + + flags.enable_pyobject_looplift = True + + flags.inherit_if_not_set("fastmath") + + flags.inherit_if_not_set("error_model", default="python") + + # Add "target_backend" as a option that inherits from the caller + flags.inherit_if_not_set("target_backend") + + flags.inherit_if_not_set("forceinline") + + if flags.forceinline: + # forceinline turns off optnone, just like clang. + flags.optnone = False + +# ---------------------------------------------------------------------------- +# Internal + +def remove_refct_calls(func): + """ + Remove redundant incref/decref within on a per block basis + """ + for bb in func.basic_blocks: + remove_null_refct_call(bb) + remove_refct_pairs(bb) + + +def remove_null_refct_call(bb): + """ + Remove refct api calls to NULL pointer + """ + pass + ## Skipped for now + # for inst in bb.instructions: + # if isinstance(inst, ir.CallInstr): + # fname = inst.called_function.name + # if fname == "Py_IncRef" or fname == "Py_DecRef": + # arg = inst.args[0] + # print(type(arg)) + # if isinstance(arg, lc.ConstantPointerNull): + # inst.erase_from_parent() + + +def remove_refct_pairs(bb): + """ + Remove incref decref pairs on the same variable + """ + + didsomething = True + + while didsomething: + didsomething = False + + increfs = {} + decrefs = {} + + # Mark + for inst in bb.instructions: + if isinstance(inst, ir.CallInstr): + fname = inst.called_function.name + if fname == "Py_IncRef": + arg = inst.operands[0] + increfs[arg] = inst + elif fname == "Py_DecRef": + arg = inst.operands[0] + decrefs[arg] = inst + + # Sweep + for val in increfs.keys(): + if val in decrefs: + increfs[val].erase_from_parent() + decrefs[val].erase_from_parent() + didsomething = True diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/cpu_options.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/cpu_options.py new file mode 100644 index 000000000..e2136c319 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/cpu_options.py @@ -0,0 +1,181 @@ +""" +Defines CPU Options for use in the CPU target +""" +from abc import ABCMeta, abstractmethod + + +class AbstractOptionValue(metaclass=ABCMeta): + """Abstract base class for custom option values. + """ + @abstractmethod + def encode(self) -> str: + """Returns an encoding of the values + """ + ... + + def __repr__(self) -> str: + return f"{self.__class__.__name__}({self.encode()})" + + +class FastMathOptions(AbstractOptionValue): + """ + Options for controlling fast math optimization. + """ + + def __init__(self, value): + # https://releases.llvm.org/7.0.0/docs/LangRef.html#fast-math-flags + valid_flags = { + 'fast', + 'nnan', 'ninf', 'nsz', 'arcp', + 'contract', 'afn', 'reassoc', + } + + if isinstance(value, FastMathOptions): + self.flags = value.flags.copy() + elif value is True: + self.flags = {'fast'} + elif value is False: + self.flags = set() + elif isinstance(value, set): + invalid = value - valid_flags + if invalid: + raise ValueError("Unrecognized fastmath flags: %s" % invalid) + self.flags = value + elif isinstance(value, dict): + invalid = set(value.keys()) - valid_flags + if invalid: + raise ValueError("Unrecognized fastmath flags: %s" % invalid) + self.flags = {v for v, enable in value.items() if enable} + else: + msg = "Expected fastmath option(s) to be either a bool, dict or set" + raise ValueError(msg) + + def __bool__(self): + return bool(self.flags) + + __nonzero__ = __bool__ + + def encode(self) -> str: + return str(self.flags) + + def __eq__(self, other): + if type(other) is type(self): + return self.flags == other.flags + return NotImplemented + + +class ParallelOptions(AbstractOptionValue): + """ + Options for controlling auto parallelization. + """ + __slots__ = ("enabled", "comprehension", "reduction", "inplace_binop", + "setitem", "numpy", "stencil", "fusion", "prange") + + def __init__(self, value): + if isinstance(value, bool): + self.enabled = value + self.comprehension = value + self.reduction = value + self.inplace_binop = value + self.setitem = value + self.numpy = value + self.stencil = value + self.fusion = value + self.prange = value + elif isinstance(value, dict): + self.enabled = True + self.comprehension = value.pop('comprehension', True) + self.reduction = value.pop('reduction', True) + self.inplace_binop = value.pop('inplace_binop', True) + self.setitem = value.pop('setitem', True) + self.numpy = value.pop('numpy', True) + self.stencil = value.pop('stencil', True) + self.fusion = value.pop('fusion', True) + self.prange = value.pop('prange', True) + if value: + msg = "Unrecognized parallel options: %s" % value.keys() + raise NameError(msg) + elif isinstance(value, ParallelOptions): + self.enabled = value.enabled + self.comprehension = value.comprehension + self.reduction = value.reduction + self.inplace_binop = value.inplace_binop + self.setitem = value.setitem + self.numpy = value.numpy + self.stencil = value.stencil + self.fusion = value.fusion + self.prange = value.prange + else: + msg = "Expect parallel option to be either a bool or a dict" + raise ValueError(msg) + + def _get_values(self): + """Get values as dictionary. + """ + return {k: getattr(self, k) for k in self.__slots__} + + def __eq__(self, other): + if type(other) is type(self): + return self._get_values() == other._get_values() + return NotImplemented + + def encode(self) -> str: + return ", ".join(f"{k}={v}" for k, v in self._get_values().items()) + + +class InlineOptions(AbstractOptionValue): + """ + Options for controlling inlining + """ + + def __init__(self, value): + ok = False + if isinstance(value, str): + if value in ('always', 'never'): + ok = True + else: + ok = hasattr(value, '__call__') + + if ok: + self._inline = value + else: + msg = ("kwarg 'inline' must be one of the strings 'always' or " + "'never', or it can be a callable that returns True/False. " + "Found value %s" % value) + raise ValueError(msg) + + @property + def is_never_inline(self): + """ + True if never inline + """ + return self._inline == 'never' + + @property + def is_always_inline(self): + """ + True if always inline + """ + return self._inline == 'always' + + @property + def has_cost_model(self): + """ + True if a cost model is provided + """ + return not (self.is_always_inline or self.is_never_inline) + + @property + def value(self): + """ + The raw value + """ + return self._inline + + def __eq__(self, other): + if type(other) is type(self): + return self.value == other.value + return NotImplemented + + def encode(self) -> str: + return repr(self._inline) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/dataflow.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/dataflow.py new file mode 100644 index 000000000..5415bd5a1 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/dataflow.py @@ -0,0 +1,914 @@ +import collections +from pprint import pprint +import sys +import warnings + +from numba.core.errors import UnsupportedError +from numba.core.ir import Loc + + +class DataFlowAnalysis(object): + """ + Perform stack2reg + + This is necessary to resolve blocks that propagates stack value. + This would allow the use of `and` and `or` and python2.6 jumps. + """ + + def __init__(self, cfa): + self.cfa = cfa + self.bytecode = cfa.bytecode + # { block offset -> BlockInfo } + self.infos = {} + self.edge_process = {} + + def run(self): + for blk in self.cfa.iterliveblocks(): + self.infos[blk.offset] = self.run_on_block(blk) + + def run_on_block(self, blk): + incoming_blocks = [] + info = BlockInfo(blk, blk.offset, incoming_blocks) + edge_callbacks = [] + + for ib, pops in self.cfa.incoming_blocks(blk): + # By nature of Python bytecode, there will be no incoming + # variables from subsequent blocks. This is an easy way + # of breaking the potential circularity of the problem. + if ib.offset >= blk.offset: + continue + ib = self.infos[ib.offset] + incoming_blocks.append(ib) + if (ib.offset, blk.offset) in self.edge_process: + edge_callbacks.append(self.edge_process[(ib.offset, blk.offset)]) + + # Compute stack offset at block entry + # The stack effect of our predecessors should be known + assert ib.stack_offset is not None, ib + new_offset = ib.stack_offset + ib.stack_effect - pops + if new_offset < 0: + raise RuntimeError("computed negative stack offset for %s" + % blk) + if info.stack_offset is None: + info.stack_offset = new_offset + elif info.stack_offset != new_offset: + warnings.warn("inconsistent stack offset for %s" % blk, + RuntimeWarning) + + # Compute syntax blocks at block entry + assert ib.syntax_blocks is not None, ib + if info.syntax_blocks is None: + info.syntax_blocks = ib.syntax_blocks[:] + elif info.syntax_blocks != ib.syntax_blocks: + warnings.warn("inconsistent entry syntax blocks for %s" % blk, + RuntimeWarning) + + if info.stack_offset is None: + # No incoming blocks => assume it's the entry block + info.stack_offset = 0 + info.syntax_blocks = [] + info.stack_effect = 0 + + for callback in edge_callbacks: + callback(info) + + for offset in blk: + inst = self.bytecode[offset] + self.dispatch(info, inst) + return info + + def dump(self): + for blk in self.infos.values(): + blk.dump() + + def dispatch(self, info, inst): + fname = "op_%s" % inst.opname.replace('+', '_') + fn = getattr(self, fname, self.handle_unknown_opcode) + fn(info, inst) + + def handle_unknown_opcode(self, info, inst): + raise UnsupportedError( + "Use of unknown opcode '{}'".format(inst.opname), + loc=Loc(filename=self.bytecode.func_id.filename, + line=inst.lineno) + ) + + def dup_topx(self, info, inst, count): + orig = [info.pop() for _ in range(count)] + orig.reverse() + # We need to actually create new temporaries if we want the + # IR optimization pass to work correctly (see issue #580) + duped = [info.make_temp() for _ in range(count)] + info.append(inst, orig=orig, duped=duped) + for val in orig: + info.push(val) + for val in duped: + info.push(val) + + def add_syntax_block(self, info, block): + """ + Add an inner syntax block. + """ + block.stack_offset = info.stack_offset + info.syntax_blocks.append(block) + + def pop_syntax_block(self, info): + """ + Pop the innermost syntax block and revert its stack effect. + """ + block = info.syntax_blocks.pop() + assert info.stack_offset >= block.stack_offset + while info.stack_offset + info.stack_effect > block.stack_offset: + info.pop(discard=True) + return block + + def op_NOP(self, info, inst): + pass + + def op_DUP_TOPX(self, info, inst): + count = inst.arg + assert 1 <= count <= 5, "Invalid DUP_TOPX count" + self.dup_topx(info, inst, count) + + def op_DUP_TOP(self, info, inst): + self.dup_topx(info, inst, count=1) + + def op_DUP_TOP_TWO(self, info, inst): + self.dup_topx(info, inst, count=2) + + def op_ROT_TWO(self, info, inst): + first = info.pop() + second = info.pop() + info.push(first) + info.push(second) + + def op_ROT_THREE(self, info, inst): + first = info.pop() + second = info.pop() + third = info.pop() + info.push(first) + info.push(third) + info.push(second) + + def op_ROT_FOUR(self, info, inst): + first = info.pop() + second = info.pop() + third = info.pop() + forth = info.pop() + info.push(first) + info.push(forth) + info.push(third) + info.push(second) + + def op_UNPACK_SEQUENCE(self, info, inst): + count = inst.arg + iterable = info.pop() + stores = [info.make_temp() for _ in range(count)] + tupleobj = info.make_temp() + info.append(inst, iterable=iterable, stores=stores, tupleobj=tupleobj) + for st in reversed(stores): + info.push(st) + + def op_FORMAT_VALUE(self, info, inst): + """ + FORMAT_VALUE(flags): flags argument specifies format spec which is + not supported yet. Currently, str() is simply called on the value. + Pops a value from stack and pushes results back. + Required for supporting f-strings. + https://docs.python.org/3/library/dis.html#opcode-FORMAT_VALUE + """ + if inst.arg != 0: + msg = "format spec in f-strings not supported yet" + raise UnsupportedError( + msg, + loc=Loc(filename=self.bytecode.func_id.filename, + line=inst.lineno) + ) + value = info.pop() + strvar = info.make_temp() + res = info.make_temp() + info.append(inst, value=value, res=res, strvar=strvar) + info.push(res) + + def op_BUILD_STRING(self, info, inst): + """ + BUILD_STRING(count): Concatenates count strings from the stack and + pushes the resulting string onto the stack. + Required for supporting f-strings. + https://docs.python.org/3/library/dis.html#opcode-BUILD_STRING + """ + count = inst.arg + strings = list(reversed([info.pop() for _ in range(count)])) + # corner case: f"" + if count == 0: + tmps = [info.make_temp()] + else: + tmps = [info.make_temp() for _ in range(count - 1)] + info.append(inst, strings=strings, tmps=tmps) + info.push(tmps[-1]) + + def op_BUILD_TUPLE(self, info, inst): + count = inst.arg + items = list(reversed([info.pop() for _ in range(count)])) + tup = info.make_temp() + info.append(inst, items=items, res=tup) + info.push(tup) + + def op_BUILD_LIST(self, info, inst): + count = inst.arg + items = list(reversed([info.pop() for _ in range(count)])) + lst = info.make_temp() + info.append(inst, items=items, res=lst) + info.push(lst) + + def op_LIST_APPEND(self, info, inst): + value = info.pop() + index = inst.arg + target = info.peek(index) + appendvar = info.make_temp() + res = info.make_temp() + info.append(inst, target=target, value=value, appendvar=appendvar, res=res) + + def op_BUILD_MAP(self, info, inst): + dct = info.make_temp() + count = inst.arg + items = [] + # BUILD_MAP takes pairs from the stack + for i in range(count): + v, k = info.pop(), info.pop() + items.append((k, v)) + info.append(inst, items=items[::-1], size=count, res=dct) + info.push(dct) + + def op_MAP_ADD(self, info, inst): + key = info.pop() + value = info.pop() + index = inst.arg + target = info.peek(index) + setitemvar = info.make_temp() + res = info.make_temp() + info.append(inst, target=target, key=key, value=value, + setitemvar=setitemvar, res=res) + + def op_BUILD_SET(self, info, inst): + count = inst.arg + # Note: related python bug http://bugs.python.org/issue26020 + items = list(reversed([info.pop() for _ in range(count)])) + res = info.make_temp() + info.append(inst, items=items, res=res) + info.push(res) + + def op_POP_TOP(self, info, inst): + info.pop(discard=True) + + def op_STORE_ATTR(self, info, inst): + target = info.pop() + value = info.pop() + info.append(inst, target=target, value=value) + + def op_DELETE_ATTR(self, info, inst): + target = info.pop() + info.append(inst, target=target) + + def op_STORE_FAST(self, info, inst): + value = info.pop() + info.append(inst, value=value) + + def op_STORE_MAP(self, info, inst): + key = info.pop() + value = info.pop() + dct = info.tos + info.append(inst, dct=dct, key=key, value=value) + + def op_STORE_DEREF(self, info, inst): + value = info.pop() + info.append(inst, value=value) + + def op_LOAD_FAST(self, info, inst): + name = self.bytecode.co_varnames[inst.arg] + res = info.make_temp(name) + info.append(inst, res=res) + info.push(res) + + def op_LOAD_CONST(self, info, inst): + res = info.make_temp('const') + info.append(inst, res=res) + info.push(res) + + def op_LOAD_GLOBAL(self, info, inst): + res = info.make_temp() + info.append(inst, res=res) + info.push(res) + + def op_LOAD_DEREF(self, info, inst): + res = info.make_temp() + info.append(inst, res=res) + info.push(res) + + def op_LOAD_ATTR(self, info, inst): + item = info.pop() + res = info.make_temp() + info.append(inst, item=item, res=res) + info.push(res) + + def op_BINARY_SUBSCR(self, info, inst): + index = info.pop() + target = info.pop() + res = info.make_temp() + info.append(inst, index=index, target=target, res=res) + info.push(res) + + def op_STORE_SUBSCR(self, info, inst): + index = info.pop() + target = info.pop() + value = info.pop() + info.append(inst, target=target, index=index, value=value) + + def op_DELETE_SUBSCR(self, info, inst): + index = info.pop() + target = info.pop() + info.append(inst, target=target, index=index) + + def op_GET_ITER(self, info, inst): + value = info.pop() + res = info.make_temp() + info.append(inst, value=value, res=res) + info.push(res) + + def op_FOR_ITER(self, info, inst): + iterator = info.tos + pair = info.make_temp() + indval = info.make_temp() + pred = info.make_temp() + info.append(inst, iterator=iterator, pair=pair, indval=indval, pred=pred) + info.push(indval) + # Setup for stack POP (twice) at loop exit (before processing instruction at jump target) + def pop_info(info): + info.pop() + info.pop() + self.edge_process[(info.block.offset, inst.get_jump_target())] = pop_info + + def op_CALL_FUNCTION(self, info, inst): + narg = inst.arg + args = list(reversed([info.pop() for _ in range(narg)])) + func = info.pop() + + res = info.make_temp() + info.append(inst, func=func, args=args, res=res) + info.push(res) + + def op_CALL_FUNCTION_KW(self, info, inst): + narg = inst.arg + names = info.pop() # tuple of names + args = list(reversed([info.pop() for _ in range(narg)])) + func = info.pop() + + res = info.make_temp() + info.append(inst, func=func, args=args, names=names, res=res) + info.push(res) + + def op_CALL_FUNCTION_EX(self, info, inst): + if inst.arg & 1: + errmsg = 'CALL_FUNCTION_EX with **kwargs not supported' + raise NotImplementedError(errmsg) + vararg = info.pop() + func = info.pop() + res = info.make_temp() + info.append(inst, func=func, vararg=vararg, res=res) + info.push(res) + + def _build_tuple_unpack(self, info, inst): + # Builds tuple from other tuples on the stack + tuples = list(reversed([info.pop() for _ in range(inst.arg)])) + temps = [info.make_temp() for _ in range(len(tuples) - 1)] + # if the unpack is assign-like, e.g. x = (*y,), it needs handling + # differently. + is_assign = len(tuples) == 1 + if is_assign: + temps = [info.make_temp(),] + + info.append(inst, tuples=tuples, temps=temps, is_assign=is_assign) + # The result is in the last temp var + info.push(temps[-1]) + + def op_BUILD_TUPLE_UNPACK_WITH_CALL(self, info, inst): + # just unpack the input tuple, call inst will be handled afterwards + self._build_tuple_unpack(info, inst) + + def op_BUILD_TUPLE_UNPACK(self, info, inst): + self._build_tuple_unpack(info, inst) + + def op_BUILD_CONST_KEY_MAP(self, info, inst): + keys = info.pop() + vals = list(reversed([info.pop() for _ in range(inst.arg)])) + keytmps = [info.make_temp() for _ in range(inst.arg)] + res = info.make_temp() + info.append(inst, keys=keys, keytmps=keytmps, values=vals, res=res) + info.push(res) + + def op_PRINT_ITEM(self, info, inst): + warnings.warn("Python2 style print partially supported. Please use " + "Python3 style print.", RuntimeWarning) + item = info.pop() + printvar = info.make_temp() + res = info.make_temp() + info.append(inst, item=item, printvar=printvar, res=res) + + def op_PRINT_NEWLINE(self, info, inst): + printvar = info.make_temp() + res = info.make_temp() + info.append(inst, printvar=printvar, res=res) + + def _unaryop(self, info, inst): + val = info.pop() + res = info.make_temp() + info.append(inst, value=val, res=res) + info.push(res) + + op_UNARY_NEGATIVE = _unaryop + op_UNARY_POSITIVE = _unaryop + op_UNARY_NOT = _unaryop + op_UNARY_INVERT = _unaryop + + def _binaryop(self, info, inst): + rhs = info.pop() + lhs = info.pop() + res = info.make_temp() + info.append(inst, lhs=lhs, rhs=rhs, res=res) + info.push(res) + + op_COMPARE_OP = _binaryop + op_IS_OP = _binaryop + op_CONTAINS_OP = _binaryop + + op_INPLACE_ADD = _binaryop + op_INPLACE_SUBTRACT = _binaryop + op_INPLACE_MULTIPLY = _binaryop + op_INPLACE_DIVIDE = _binaryop + op_INPLACE_TRUE_DIVIDE = _binaryop + op_INPLACE_FLOOR_DIVIDE = _binaryop + op_INPLACE_MODULO = _binaryop + op_INPLACE_POWER = _binaryop + op_INPLACE_MATRIX_MULTIPLY = _binaryop + + op_INPLACE_LSHIFT = _binaryop + op_INPLACE_RSHIFT = _binaryop + op_INPLACE_AND = _binaryop + op_INPLACE_OR = _binaryop + op_INPLACE_XOR = _binaryop + + op_BINARY_ADD = _binaryop + op_BINARY_SUBTRACT = _binaryop + op_BINARY_MULTIPLY = _binaryop + op_BINARY_DIVIDE = _binaryop + op_BINARY_TRUE_DIVIDE = _binaryop + op_BINARY_FLOOR_DIVIDE = _binaryop + op_BINARY_MODULO = _binaryop + op_BINARY_POWER = _binaryop + op_BINARY_MATRIX_MULTIPLY = _binaryop + + op_BINARY_LSHIFT = _binaryop + op_BINARY_RSHIFT = _binaryop + op_BINARY_AND = _binaryop + op_BINARY_OR = _binaryop + op_BINARY_XOR = _binaryop + + def op_SLICE_0(self, info, inst): + """ + TOS = TOS[:] + """ + tos = info.pop() + res = info.make_temp() + slicevar = info.make_temp() + indexvar = info.make_temp() + nonevar = info.make_temp() + info.append(inst, base=tos, res=res, slicevar=slicevar, + indexvar=indexvar, nonevar=nonevar) + info.push(res) + + def op_SLICE_1(self, info, inst): + """ + TOS = TOS1[TOS:] + """ + tos = info.pop() + tos1 = info.pop() + res = info.make_temp() + slicevar = info.make_temp() + indexvar = info.make_temp() + nonevar = info.make_temp() + info.append(inst, base=tos1, start=tos, res=res, slicevar=slicevar, + indexvar=indexvar, nonevar=nonevar) + info.push(res) + + def op_SLICE_2(self, info, inst): + """ + TOS = TOS1[:TOS] + """ + tos = info.pop() + tos1 = info.pop() + res = info.make_temp() + slicevar = info.make_temp() + indexvar = info.make_temp() + nonevar = info.make_temp() + info.append(inst, base=tos1, stop=tos, res=res, slicevar=slicevar, + indexvar=indexvar, nonevar=nonevar) + info.push(res) + + def op_SLICE_3(self, info, inst): + """ + TOS = TOS2[TOS1:TOS] + """ + tos = info.pop() + tos1 = info.pop() + tos2 = info.pop() + res = info.make_temp() + slicevar = info.make_temp() + indexvar = info.make_temp() + info.append(inst, base=tos2, start=tos1, stop=tos, res=res, + slicevar=slicevar, indexvar=indexvar) + info.push(res) + + def op_STORE_SLICE_0(self, info, inst): + """ + TOS[:] = TOS1 + """ + tos = info.pop() + value = info.pop() + slicevar = info.make_temp() + indexvar = info.make_temp() + nonevar = info.make_temp() + info.append(inst, base=tos, value=value, slicevar=slicevar, + indexvar=indexvar, nonevar=nonevar) + + def op_STORE_SLICE_1(self, info, inst): + """ + TOS1[TOS:] = TOS2 + """ + tos = info.pop() + tos1 = info.pop() + value = info.pop() + slicevar = info.make_temp() + indexvar = info.make_temp() + nonevar = info.make_temp() + info.append(inst, base=tos1, start=tos, slicevar=slicevar, + value=value, indexvar=indexvar, nonevar=nonevar) + + def op_STORE_SLICE_2(self, info, inst): + """ + TOS1[:TOS] = TOS2 + """ + tos = info.pop() + tos1 = info.pop() + value = info.pop() + slicevar = info.make_temp() + indexvar = info.make_temp() + nonevar = info.make_temp() + info.append(inst, base=tos1, stop=tos, value=value, slicevar=slicevar, + indexvar=indexvar, nonevar=nonevar) + + def op_STORE_SLICE_3(self, info, inst): + """ + TOS2[TOS1:TOS] = TOS3 + """ + tos = info.pop() + tos1 = info.pop() + tos2 = info.pop() + value = info.pop() + slicevar = info.make_temp() + indexvar = info.make_temp() + info.append(inst, base=tos2, start=tos1, stop=tos, value=value, + slicevar=slicevar, indexvar=indexvar) + + def op_DELETE_SLICE_0(self, info, inst): + """ + del TOS[:] + """ + tos = info.pop() + slicevar = info.make_temp() + indexvar = info.make_temp() + nonevar = info.make_temp() + info.append(inst, base=tos, slicevar=slicevar, + indexvar=indexvar, nonevar=nonevar) + + def op_DELETE_SLICE_1(self, info, inst): + """ + del TOS1[TOS:] + """ + tos = info.pop() + tos1 = info.pop() + slicevar = info.make_temp() + indexvar = info.make_temp() + nonevar = info.make_temp() + info.append(inst, base=tos1, start=tos, slicevar=slicevar, + indexvar=indexvar, nonevar=nonevar) + + def op_DELETE_SLICE_2(self, info, inst): + """ + del TOS1[:TOS] + """ + tos = info.pop() + tos1 = info.pop() + slicevar = info.make_temp() + indexvar = info.make_temp() + nonevar = info.make_temp() + info.append(inst, base=tos1, stop=tos, slicevar=slicevar, + indexvar=indexvar, nonevar=nonevar) + + def op_DELETE_SLICE_3(self, info, inst): + """ + del TOS2[TOS1:TOS] + """ + tos = info.pop() + tos1 = info.pop() + tos2 = info.pop() + slicevar = info.make_temp() + indexvar = info.make_temp() + info.append(inst, base=tos2, start=tos1, stop=tos, + slicevar=slicevar, indexvar=indexvar) + + def op_BUILD_SLICE(self, info, inst): + """ + slice(TOS1, TOS) or slice(TOS2, TOS1, TOS) + """ + argc = inst.arg + if argc == 2: + tos = info.pop() + tos1 = info.pop() + start = tos1 + stop = tos + step = None + elif argc == 3: + tos = info.pop() + tos1 = info.pop() + tos2 = info.pop() + start = tos2 + stop = tos1 + step = tos + else: + raise Exception("unreachable") + slicevar = info.make_temp() + res = info.make_temp() + info.append(inst, start=start, stop=stop, step=step, res=res, + slicevar=slicevar) + info.push(res) + + def op_POP_JUMP_IF_TRUE(self, info, inst): + pred = info.pop() + info.append(inst, pred=pred) + info.terminator = inst + + def op_POP_JUMP_IF_FALSE(self, info, inst): + pred = info.pop() + info.append(inst, pred=pred) + info.terminator = inst + + def op_JUMP_IF_TRUE(self, info, inst): + pred = info.tos + info.append(inst, pred=pred) + info.terminator = inst + + def op_JUMP_IF_FALSE(self, info, inst): + pred = info.tos + info.append(inst, pred=pred) + info.terminator = inst + + op_JUMP_IF_FALSE_OR_POP = op_JUMP_IF_FALSE + op_JUMP_IF_TRUE_OR_POP = op_JUMP_IF_TRUE + + def op_JUMP_ABSOLUTE(self, info, inst): + info.append(inst) + info.terminator = inst + + def op_JUMP_FORWARD(self, info, inst): + info.append(inst) + info.terminator = inst + + def op_BREAK_LOOP(self, info, inst): + self.pop_syntax_block(info) + info.append(inst) + info.terminator = inst + + def op_RETURN_VALUE(self, info, inst): + info.append(inst, retval=info.pop(), castval=info.make_temp()) + info.terminator = inst + + def op_YIELD_VALUE(self, info, inst): + val = info.pop() + res = info.make_temp() + info.append(inst, value=val, res=res) + info.push(res) + + def op_SETUP_LOOP(self, info, inst): + self.add_syntax_block(info, LoopBlock()) + info.append(inst) + + def op_SETUP_WITH(self, info, inst): + cm = info.pop() # the context-manager + self.add_syntax_block(info, WithBlock()) + yielded = info.make_temp() + info.push(yielded) + info.append(inst, contextmanager=cm) + + def op_WITH_CLEANUP(self, info, inst): + """ + Note: py2 only opcode + """ + # TOS is the return value of __exit__() + info.pop() + info.append(inst) + + def op_WITH_CLEANUP_START(self, info, inst): + # TOS is the return value of __exit__() + info.pop() + info.append(inst) + + def op_WITH_CLEANUP_FINISH(self, info, inst): + info.append(inst) + + def op_END_FINALLY(self, info, inst): + info.append(inst) + + def op_POP_BLOCK(self, info, inst): + block = self.pop_syntax_block(info) + info.append(inst) + + def op_RAISE_VARARGS(self, info, inst): + if inst.arg == 0: + exc = None + elif inst.arg == 1: + exc = info.pop() + else: + raise ValueError("Multiple argument raise is not supported.") + info.append(inst, exc=exc) + + def op_MAKE_FUNCTION(self, info, inst, MAKE_CLOSURE=False): + name = info.pop() + code = info.pop() + closure = annotations = kwdefaults = defaults = None + if inst.arg & 0x8: + closure = info.pop() + if inst.arg & 0x4: + annotations = info.pop() + if inst.arg & 0x2: + kwdefaults = info.pop() + if inst.arg & 0x1: + defaults = info.pop() + res = info.make_temp() + info.append(inst, name=name, code=code, closure=closure, annotations=annotations, + kwdefaults=kwdefaults, defaults=defaults, res=res) + info.push(res) + + def op_MAKE_CLOSURE(self, info, inst): + self.op_MAKE_FUNCTION(info, inst, MAKE_CLOSURE=True) + + def op_LOAD_CLOSURE(self, info, inst): + res = info.make_temp() + info.append(inst, res=res) + info.push(res) + + #NOTE: Please see notes in `interpreter.py` surrounding the implementation + # of LOAD_METHOD and CALL_METHOD. + + def op_LOAD_METHOD(self, *args, **kws): + self.op_LOAD_ATTR(*args, **kws) + + def op_CALL_METHOD(self, *args, **kws): + self.op_CALL_FUNCTION(*args, **kws) + + def _ignored(self, info, inst): + pass + + +class LoopBlock(object): + __slots__ = ('stack_offset',) + + def __init__(self): + self.stack_offset = None + + +class WithBlock(object): + __slots__ = ('stack_offset',) + + def __init__(self): + self.stack_offset = None + + +class BlockInfo(object): + def __init__(self, block, offset, incoming_blocks): + self.block = block + self.offset = offset + # The list of incoming BlockInfo objects (obtained by control + # flow analysis). + self.incoming_blocks = incoming_blocks + self.stack = [] + # Outgoing variables from this block: + # { outgoing phi name -> var name } + self.outgoing_phis = {} + self.insts = [] + self.tempct = 0 + self._term = None + self.stack_offset = None + self.stack_effect = 0 + self.syntax_blocks = None + + def __repr__(self): + return "<%s at offset %d>" % (self.__class__.__name__, self.offset) + + def dump(self): + print("offset", self.offset, "{") + print(" stack: ", end='') + pprint(self.stack) + pprint(self.insts) + print("}") + + def make_temp(self, prefix=''): + self.tempct += 1 + name = '$%s%s.%s' % (prefix, self.offset, self.tempct) + return name + + def push(self, val): + self.stack_effect += 1 + self.stack.append(val) + + def pop(self, discard=False): + """ + Pop a variable from the stack, or request it from incoming blocks if + the stack is empty. + If *discard* is true, the variable isn't meant to be used anymore, + which allows reducing the number of temporaries created. + """ + if not self.stack: + self.stack_offset -= 1 + if not discard: + return self.make_incoming() + else: + self.stack_effect -= 1 + return self.stack.pop() + + def peek(self, k): + """ + Return the k'th element back from the top of the stack. + peek(1) is the top of the stack. + """ + num_pops = k + top_k = [self.pop() for _ in range(num_pops)] + r = top_k[-1] + for i in range(num_pops - 1, -1, -1): + self.push(top_k[i]) + return r + + def make_incoming(self): + """ + Create an incoming variable (due to not enough values being + available on our stack) and request its assignment from our + incoming blocks' own stacks. + """ + assert self.incoming_blocks + ret = self.make_temp('phi') + for ib in self.incoming_blocks: + stack_index = self.stack_offset + self.stack_effect + ib.request_outgoing(self, ret, stack_index) + return ret + + def request_outgoing(self, outgoing_block, phiname, stack_index): + """ + Request the assignment of the next available stack variable + for block *outgoing_block* with target name *phiname*. + """ + if phiname in self.outgoing_phis: + # If phiname was already requested, ignore this new request + # (can happen with a diamond-shaped block flow structure). + return + if stack_index < self.stack_offset: + assert self.incoming_blocks + for ib in self.incoming_blocks: + ib.request_outgoing(self, phiname, stack_index) + else: + varname = self.stack[stack_index - self.stack_offset] + self.outgoing_phis[phiname] = varname + + @property + def tos(self): + r = self.pop() + self.push(r) + return r + + def append(self, inst, **kws): + self.insts.append((inst.offset, kws)) + + @property + def terminator(self): + assert self._term is None + return self._term + + @terminator.setter + def terminator(self, inst): + self._term = inst + + @property + def active_try_block(self): + """Try except not supported. + + See byteflow.py + """ + return None diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/datamodel/__init__.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/datamodel/__init__.py new file mode 100644 index 000000000..2cb8b104e --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/datamodel/__init__.py @@ -0,0 +1,4 @@ +from .manager import DataModelManager +from .packer import ArgPacker, DataPacker +from .registry import register_default, default_manager, register +from .models import PrimitiveModel, CompositeModel, StructModel diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/datamodel/manager.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/datamodel/manager.py new file mode 100644 index 000000000..819f33511 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/datamodel/manager.py @@ -0,0 +1,47 @@ +import weakref + +from numba.core import types + + +class DataModelManager(object): + """Manages mapping of FE types to their corresponding data model + """ + + def __init__(self): + # { numba type class -> model factory } + self._handlers = {} + # { numba type instance -> model instance } + self._cache = weakref.WeakKeyDictionary() + + def register(self, fetypecls, handler): + """Register the datamodel factory corresponding to a frontend-type class + """ + assert issubclass(fetypecls, types.Type) + self._handlers[fetypecls] = handler + + def lookup(self, fetype): + """Returns the corresponding datamodel given the frontend-type instance + """ + try: + return self._cache[fetype] + except KeyError: + pass + handler = self._handlers[type(fetype)] + model = self._cache[fetype] = handler(self, fetype) + return model + + def __getitem__(self, fetype): + """Shorthand for lookup() + """ + return self.lookup(fetype) + + def copy(self): + """ + Make a copy of the manager. + Use this to inherit from the default data model and specialize it + for custom target. + """ + dmm = DataModelManager() + dmm._handlers = self._handlers.copy() + return dmm + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/datamodel/models.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/datamodel/models.py new file mode 100644 index 000000000..cc62f6035 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/datamodel/models.py @@ -0,0 +1,1384 @@ +from functools import partial +from collections import deque + +from llvmlite import ir + +from numba.core.datamodel.registry import register_default +from numba.core import types, cgutils +from numba.np import numpy_support + + +class DataModel(object): + """ + DataModel describe how a FE type is represented in the LLVM IR at + different contexts. + + Contexts are: + + - value: representation inside function body. Maybe stored in stack. + The representation here are flexible. + + - data: representation used when storing into containers (e.g. arrays). + + - argument: representation used for function argument. All composite + types are unflattened into multiple primitive types. + + - return: representation used for return argument. + + Throughput the compiler pipeline, a LLVM value is usually passed around + in the "value" representation. All "as_" prefix function converts from + "value" representation. All "from_" prefix function converts to the + "value" representation. + + """ + def __init__(self, dmm, fe_type): + self._dmm = dmm + self._fe_type = fe_type + + @property + def fe_type(self): + return self._fe_type + + def get_value_type(self): + raise NotImplementedError(self) + + def get_data_type(self): + return self.get_value_type() + + def get_argument_type(self): + """Return a LLVM type or nested tuple of LLVM type + """ + return self.get_value_type() + + def get_return_type(self): + return self.get_value_type() + + def as_data(self, builder, value): + raise NotImplementedError(self) + + def as_argument(self, builder, value): + """ + Takes one LLVM value + Return a LLVM value or nested tuple of LLVM value + """ + raise NotImplementedError(self) + + def as_return(self, builder, value): + raise NotImplementedError(self) + + def from_data(self, builder, value): + raise NotImplementedError(self) + + def from_argument(self, builder, value): + """ + Takes a LLVM value or nested tuple of LLVM value + Returns one LLVM value + """ + raise NotImplementedError(self) + + def from_return(self, builder, value): + raise NotImplementedError(self) + + def load_from_data_pointer(self, builder, ptr, align=None): + """ + Load value from a pointer to data. + This is the default implementation, sufficient for most purposes. + """ + return self.from_data(builder, builder.load(ptr, align=align)) + + def traverse(self, builder): + """ + Traverse contained members. + Returns a iterable of contained (types, getters). + Each getter is a one-argument function accepting a LLVM value. + """ + return [] + + def traverse_models(self): + """ + Recursively list all models involved in this model. + """ + return [self._dmm[t] for t in self.traverse_types()] + + def traverse_types(self): + """ + Recursively list all frontend types involved in this model. + """ + types = [self._fe_type] + queue = deque([self]) + while len(queue) > 0: + dm = queue.popleft() + + for i_dm in dm.inner_models(): + if i_dm._fe_type not in types: + queue.append(i_dm) + types.append(i_dm._fe_type) + + return types + + def inner_models(self): + """ + List all *inner* models. + """ + return [] + + def get_nrt_meminfo(self, builder, value): + """ + Returns the MemInfo object or None if it is not tracked. + It is only defined for types.meminfo_pointer + """ + return None + + def has_nrt_meminfo(self): + return False + + def contains_nrt_meminfo(self): + """ + Recursively check all contained types for need for NRT meminfo. + """ + return any(model.has_nrt_meminfo() for model in self.traverse_models()) + + def _compared_fields(self): + return (type(self), self._fe_type) + + def __hash__(self): + return hash(tuple(self._compared_fields())) + + def __eq__(self, other): + if type(self) is type(other): + return self._compared_fields() == other._compared_fields() + else: + return False + + def __ne__(self, other): + return not self.__eq__(other) + + +@register_default(types.Omitted) +class OmittedArgDataModel(DataModel): + """ + A data model for omitted arguments. Only the "argument" representation + is defined, other representations raise a NotImplementedError. + """ + # Omitted arguments are using a dummy value type + def get_value_type(self): + return ir.LiteralStructType([]) + + # Omitted arguments don't produce any LLVM function argument. + def get_argument_type(self): + return () + + def as_argument(self, builder, val): + return () + + def from_argument(self, builder, val): + assert val == (), val + return None + + +@register_default(types.Boolean) +@register_default(types.BooleanLiteral) +class BooleanModel(DataModel): + _bit_type = ir.IntType(1) + _byte_type = ir.IntType(8) + + def get_value_type(self): + return self._bit_type + + def get_data_type(self): + return self._byte_type + + def get_return_type(self): + return self.get_data_type() + + def get_argument_type(self): + return self.get_data_type() + + def as_data(self, builder, value): + return builder.zext(value, self.get_data_type()) + + def as_argument(self, builder, value): + return self.as_data(builder, value) + + def as_return(self, builder, value): + return self.as_data(builder, value) + + def from_data(self, builder, value): + ty = self.get_value_type() + resalloca = cgutils.alloca_once(builder, ty) + cond = builder.icmp_unsigned('==', value, value.type(0)) + with builder.if_else(cond) as (then, otherwise): + with then: + builder.store(ty(0), resalloca) + with otherwise: + builder.store(ty(1), resalloca) + return builder.load(resalloca) + + def from_argument(self, builder, value): + return self.from_data(builder, value) + + def from_return(self, builder, value): + return self.from_data(builder, value) + + +class PrimitiveModel(DataModel): + """A primitive type can be represented natively in the target in all + usage contexts. + """ + + def __init__(self, dmm, fe_type, be_type): + super(PrimitiveModel, self).__init__(dmm, fe_type) + self.be_type = be_type + + def get_value_type(self): + return self.be_type + + def as_data(self, builder, value): + return value + + def as_argument(self, builder, value): + return value + + def as_return(self, builder, value): + return value + + def from_data(self, builder, value): + return value + + def from_argument(self, builder, value): + return value + + def from_return(self, builder, value): + return value + + +class ProxyModel(DataModel): + """ + Helper class for models which delegate to another model. + """ + + def get_value_type(self): + return self._proxied_model.get_value_type() + + def get_data_type(self): + return self._proxied_model.get_data_type() + + def get_return_type(self): + return self._proxied_model.get_return_type() + + def get_argument_type(self): + return self._proxied_model.get_argument_type() + + def as_data(self, builder, value): + return self._proxied_model.as_data(builder, value) + + def as_argument(self, builder, value): + return self._proxied_model.as_argument(builder, value) + + def as_return(self, builder, value): + return self._proxied_model.as_return(builder, value) + + def from_data(self, builder, value): + return self._proxied_model.from_data(builder, value) + + def from_argument(self, builder, value): + return self._proxied_model.from_argument(builder, value) + + def from_return(self, builder, value): + return self._proxied_model.from_return(builder, value) + + +@register_default(types.EnumMember) +@register_default(types.IntEnumMember) +class EnumModel(ProxyModel): + """ + Enum members are represented exactly like their values. + """ + def __init__(self, dmm, fe_type): + super(EnumModel, self).__init__(dmm, fe_type) + self._proxied_model = dmm.lookup(fe_type.dtype) + + +@register_default(types.Opaque) +@register_default(types.PyObject) +@register_default(types.RawPointer) +@register_default(types.NoneType) +@register_default(types.StringLiteral) +@register_default(types.EllipsisType) +@register_default(types.Function) +@register_default(types.Type) +@register_default(types.Object) +@register_default(types.Module) +@register_default(types.Phantom) +@register_default(types.ContextManager) +@register_default(types.Dispatcher) +@register_default(types.ObjModeDispatcher) +@register_default(types.ExceptionClass) +@register_default(types.Dummy) +@register_default(types.ExceptionInstance) +@register_default(types.ExternalFunction) +@register_default(types.EnumClass) +@register_default(types.IntEnumClass) +@register_default(types.NumberClass) +@register_default(types.TypeRef) +@register_default(types.NamedTupleClass) +@register_default(types.DType) +@register_default(types.RecursiveCall) +@register_default(types.MakeFunctionLiteral) +@register_default(types.Poison) +class OpaqueModel(PrimitiveModel): + """ + Passed as opaque pointers + """ + _ptr_type = ir.IntType(8).as_pointer() + + def __init__(self, dmm, fe_type): + be_type = self._ptr_type + super(OpaqueModel, self).__init__(dmm, fe_type, be_type) + + +@register_default(types.MemInfoPointer) +class MemInfoModel(OpaqueModel): + + def inner_models(self): + return [self._dmm.lookup(self._fe_type.dtype)] + + def has_nrt_meminfo(self): + return True + + def get_nrt_meminfo(self, builder, value): + return value + + +@register_default(types.Integer) +@register_default(types.IntegerLiteral) +class IntegerModel(PrimitiveModel): + def __init__(self, dmm, fe_type): + be_type = ir.IntType(fe_type.bitwidth) + super(IntegerModel, self).__init__(dmm, fe_type, be_type) + + +@register_default(types.Float) +class FloatModel(PrimitiveModel): + def __init__(self, dmm, fe_type): + if fe_type == types.float32: + be_type = ir.FloatType() + elif fe_type == types.float64: + be_type = ir.DoubleType() + else: + raise NotImplementedError(fe_type) + super(FloatModel, self).__init__(dmm, fe_type, be_type) + + +@register_default(types.CPointer) +class PointerModel(PrimitiveModel): + def __init__(self, dmm, fe_type): + self._pointee_model = dmm.lookup(fe_type.dtype) + self._pointee_be_type = self._pointee_model.get_data_type() + be_type = self._pointee_be_type.as_pointer() + super(PointerModel, self).__init__(dmm, fe_type, be_type) + + +@register_default(types.EphemeralPointer) +class EphemeralPointerModel(PointerModel): + + def get_data_type(self): + return self._pointee_be_type + + def as_data(self, builder, value): + value = builder.load(value) + return self._pointee_model.as_data(builder, value) + + def from_data(self, builder, value): + raise NotImplementedError("use load_from_data_pointer() instead") + + def load_from_data_pointer(self, builder, ptr, align=None): + return builder.bitcast(ptr, self.get_value_type()) + + +@register_default(types.EphemeralArray) +class EphemeralArrayModel(PointerModel): + + def __init__(self, dmm, fe_type): + super(EphemeralArrayModel, self).__init__(dmm, fe_type) + self._data_type = ir.ArrayType(self._pointee_be_type, + self._fe_type.count) + + def get_data_type(self): + return self._data_type + + def as_data(self, builder, value): + values = [builder.load(cgutils.gep_inbounds(builder, value, i)) + for i in range(self._fe_type.count)] + return cgutils.pack_array(builder, values) + + def from_data(self, builder, value): + raise NotImplementedError("use load_from_data_pointer() instead") + + def load_from_data_pointer(self, builder, ptr, align=None): + return builder.bitcast(ptr, self.get_value_type()) + + +@register_default(types.ExternalFunctionPointer) +class ExternalFuncPointerModel(PrimitiveModel): + def __init__(self, dmm, fe_type): + sig = fe_type.sig + # Since the function is non-Numba, there is no adaptation + # of arguments and return value, hence get_value_type(). + retty = dmm.lookup(sig.return_type).get_value_type() + args = [dmm.lookup(t).get_value_type() for t in sig.args] + be_type = ir.PointerType(ir.FunctionType(retty, args)) + super(ExternalFuncPointerModel, self).__init__(dmm, fe_type, be_type) + + +@register_default(types.UniTuple) +@register_default(types.NamedUniTuple) +@register_default(types.StarArgUniTuple) +class UniTupleModel(DataModel): + def __init__(self, dmm, fe_type): + super(UniTupleModel, self).__init__(dmm, fe_type) + self._elem_model = dmm.lookup(fe_type.dtype) + self._count = len(fe_type) + self._value_type = ir.ArrayType(self._elem_model.get_value_type(), + self._count) + self._data_type = ir.ArrayType(self._elem_model.get_data_type(), + self._count) + + def get_value_type(self): + return self._value_type + + def get_data_type(self): + return self._data_type + + def get_return_type(self): + return self.get_value_type() + + def get_argument_type(self): + return (self._elem_model.get_argument_type(),) * self._count + + def as_argument(self, builder, value): + out = [] + for i in range(self._count): + v = builder.extract_value(value, [i]) + v = self._elem_model.as_argument(builder, v) + out.append(v) + return out + + def from_argument(self, builder, value): + out = ir.Constant(self.get_value_type(), ir.Undefined) + for i, v in enumerate(value): + v = self._elem_model.from_argument(builder, v) + out = builder.insert_value(out, v, [i]) + return out + + def as_data(self, builder, value): + out = ir.Constant(self.get_data_type(), ir.Undefined) + for i in range(self._count): + val = builder.extract_value(value, [i]) + dval = self._elem_model.as_data(builder, val) + out = builder.insert_value(out, dval, [i]) + return out + + def from_data(self, builder, value): + out = ir.Constant(self.get_value_type(), ir.Undefined) + for i in range(self._count): + val = builder.extract_value(value, [i]) + dval = self._elem_model.from_data(builder, val) + out = builder.insert_value(out, dval, [i]) + return out + + def as_return(self, builder, value): + return value + + def from_return(self, builder, value): + return value + + def traverse(self, builder): + def getter(i, value): + return builder.extract_value(value, i) + return [(self._fe_type.dtype, partial(getter, i)) + for i in range(self._count)] + + def inner_models(self): + return [self._elem_model] + + +class CompositeModel(DataModel): + """Any model that is composed of multiple other models should subclass from + this. + """ + pass + + +class StructModel(CompositeModel): + _value_type = None + _data_type = None + + def __init__(self, dmm, fe_type, members): + super(StructModel, self).__init__(dmm, fe_type) + if members: + self._fields, self._members = zip(*members) + else: + self._fields = self._members = () + self._models = tuple([self._dmm.lookup(t) for t in self._members]) + + def get_member_fe_type(self, name): + """ + StructModel-specific: get the Numba type of the field named *name*. + """ + pos = self.get_field_position(name) + return self._members[pos] + + def get_value_type(self): + if self._value_type is None: + self._value_type = ir.LiteralStructType([t.get_value_type() + for t in self._models]) + return self._value_type + + def get_data_type(self): + if self._data_type is None: + self._data_type = ir.LiteralStructType([t.get_data_type() + for t in self._models]) + return self._data_type + + def get_argument_type(self): + return tuple([t.get_argument_type() for t in self._models]) + + def get_return_type(self): + return self.get_data_type() + + def _as(self, methname, builder, value): + extracted = [] + for i, dm in enumerate(self._models): + extracted.append(getattr(dm, methname)(builder, + self.get(builder, value, i))) + return tuple(extracted) + + def _from(self, methname, builder, value): + struct = ir.Constant(self.get_value_type(), ir.Undefined) + + for i, (dm, val) in enumerate(zip(self._models, value)): + v = getattr(dm, methname)(builder, val) + struct = self.set(builder, struct, v, i) + + return struct + + def as_data(self, builder, value): + """ + Converts the LLVM struct in `value` into a representation suited for + storing into arrays. + + Note + ---- + Current implementation rarely changes how types are represented for + "value" and "data". This is usually a pointless rebuild of the + immutable LLVM struct value. Luckily, LLVM optimization removes all + redundancy. + + Sample usecase: Structures nested with pointers to other structures + that can be serialized into a flat representation when storing into + array. + """ + elems = self._as("as_data", builder, value) + struct = ir.Constant(self.get_data_type(), ir.Undefined) + for i, el in enumerate(elems): + struct = builder.insert_value(struct, el, [i]) + return struct + + def from_data(self, builder, value): + """ + Convert from "data" representation back into "value" representation. + Usually invoked when loading from array. + + See notes in `as_data()` + """ + vals = [builder.extract_value(value, [i]) + for i in range(len(self._members))] + return self._from("from_data", builder, vals) + + def load_from_data_pointer(self, builder, ptr, align=None): + values = [] + for i, model in enumerate(self._models): + elem_ptr = cgutils.gep_inbounds(builder, ptr, 0, i) + val = model.load_from_data_pointer(builder, elem_ptr, align) + values.append(val) + + struct = ir.Constant(self.get_value_type(), ir.Undefined) + for i, val in enumerate(values): + struct = self.set(builder, struct, val, i) + return struct + + def as_argument(self, builder, value): + return self._as("as_argument", builder, value) + + def from_argument(self, builder, value): + return self._from("from_argument", builder, value) + + def as_return(self, builder, value): + elems = self._as("as_data", builder, value) + struct = ir.Constant(self.get_data_type(), ir.Undefined) + for i, el in enumerate(elems): + struct = builder.insert_value(struct, el, [i]) + return struct + + def from_return(self, builder, value): + vals = [builder.extract_value(value, [i]) + for i in range(len(self._members))] + return self._from("from_data", builder, vals) + + def get(self, builder, val, pos): + """Get a field at the given position or the fieldname + + Args + ---- + builder: + LLVM IRBuilder + val: + value to be inserted + pos: int or str + field index or field name + + Returns + ------- + Extracted value + """ + if isinstance(pos, str): + pos = self.get_field_position(pos) + return builder.extract_value(val, [pos], + name="extracted." + self._fields[pos]) + + def set(self, builder, stval, val, pos): + """Set a field at the given position or the fieldname + + Args + ---- + builder: + LLVM IRBuilder + stval: + LLVM struct value + val: + value to be inserted + pos: int or str + field index or field name + + Returns + ------- + A new LLVM struct with the value inserted + """ + if isinstance(pos, str): + pos = self.get_field_position(pos) + return builder.insert_value(stval, val, [pos], + name="inserted." + self._fields[pos]) + + def get_field_position(self, field): + try: + return self._fields.index(field) + except ValueError: + raise KeyError("%s does not have a field named %r" + % (self.__class__.__name__, field)) + + @property + def field_count(self): + return len(self._fields) + + def get_type(self, pos): + """Get the frontend type (numba type) of a field given the position + or the fieldname + + Args + ---- + pos: int or str + field index or field name + """ + if isinstance(pos, str): + pos = self.get_field_position(pos) + return self._members[pos] + + def get_model(self, pos): + """ + Get the datamodel of a field given the position or the fieldname. + + Args + ---- + pos: int or str + field index or field name + """ + return self._models[pos] + + def traverse(self, builder): + def getter(k, value): + if value.type != self.get_value_type(): + args = self.get_value_type(), value.type + raise TypeError("expecting {0} but got {1}".format(*args)) + return self.get(builder, value, k) + + return [(self.get_type(k), partial(getter, k)) for k in self._fields] + + def inner_models(self): + return self._models + + +@register_default(types.Complex) +class ComplexModel(StructModel): + _element_type = NotImplemented + + def __init__(self, dmm, fe_type): + members = [ + ('real', fe_type.underlying_float), + ('imag', fe_type.underlying_float), + ] + super(ComplexModel, self).__init__(dmm, fe_type, members) + + +@register_default(types.LiteralList) +@register_default(types.LiteralStrKeyDict) +@register_default(types.Tuple) +@register_default(types.NamedTuple) +@register_default(types.StarArgTuple) +class TupleModel(StructModel): + def __init__(self, dmm, fe_type): + members = [('f' + str(i), t) for i, t in enumerate(fe_type)] + super(TupleModel, self).__init__(dmm, fe_type, members) + + +@register_default(types.UnionType) +class UnionModel(StructModel): + def __init__(self, dmm, fe_type): + members = [ + ('tag', types.uintp), + # XXX: it should really be a MemInfoPointer(types.voidptr) + ('payload', types.Tuple.from_types(fe_type.types)), + ] + super(UnionModel, self).__init__(dmm, fe_type, members) + + + +@register_default(types.Pair) +class PairModel(StructModel): + def __init__(self, dmm, fe_type): + members = [('first', fe_type.first_type), + ('second', fe_type.second_type)] + super(PairModel, self).__init__(dmm, fe_type, members) + + +@register_default(types.ListPayload) +class ListPayloadModel(StructModel): + def __init__(self, dmm, fe_type): + # The fields are mutable but the payload is always manipulated + # by reference. This scheme allows mutations of an array to + # be seen by its iterators. + members = [ + ('size', types.intp), + ('allocated', types.intp), + # This member is only used only for reflected lists + ('dirty', types.boolean), + # Actually an inlined var-sized array + ('data', fe_type.container.dtype), + ] + super(ListPayloadModel, self).__init__(dmm, fe_type, members) + + +@register_default(types.List) +class ListModel(StructModel): + def __init__(self, dmm, fe_type): + payload_type = types.ListPayload(fe_type) + members = [ + # The meminfo data points to a ListPayload + ('meminfo', types.MemInfoPointer(payload_type)), + # This member is only used only for reflected lists + ('parent', types.pyobject), + ] + super(ListModel, self).__init__(dmm, fe_type, members) + + +@register_default(types.ListIter) +class ListIterModel(StructModel): + def __init__(self, dmm, fe_type): + payload_type = types.ListPayload(fe_type.container) + members = [ + # The meminfo data points to a ListPayload (shared with the + # original list object) + ('meminfo', types.MemInfoPointer(payload_type)), + ('index', types.EphemeralPointer(types.intp)), + ] + super(ListIterModel, self).__init__(dmm, fe_type, members) + + +@register_default(types.SetEntry) +class SetEntryModel(StructModel): + def __init__(self, dmm, fe_type): + dtype = fe_type.set_type.dtype + members = [ + # -1 = empty, -2 = deleted + ('hash', types.intp), + ('key', dtype), + ] + super(SetEntryModel, self).__init__(dmm, fe_type, members) + + +@register_default(types.SetPayload) +class SetPayloadModel(StructModel): + def __init__(self, dmm, fe_type): + entry_type = types.SetEntry(fe_type.container) + members = [ + # Number of active + deleted entries + ('fill', types.intp), + # Number of active entries + ('used', types.intp), + # Allocated size - 1 (size being a power of 2) + ('mask', types.intp), + # Search finger + ('finger', types.intp), + # This member is only used only for reflected sets + ('dirty', types.boolean), + # Actually an inlined var-sized array + ('entries', entry_type), + ] + super(SetPayloadModel, self).__init__(dmm, fe_type, members) + +@register_default(types.Set) +class SetModel(StructModel): + def __init__(self, dmm, fe_type): + payload_type = types.SetPayload(fe_type) + members = [ + # The meminfo data points to a SetPayload + ('meminfo', types.MemInfoPointer(payload_type)), + # This member is only used only for reflected sets + ('parent', types.pyobject), + ] + super(SetModel, self).__init__(dmm, fe_type, members) + +@register_default(types.SetIter) +class SetIterModel(StructModel): + def __init__(self, dmm, fe_type): + payload_type = types.SetPayload(fe_type.container) + members = [ + # The meminfo data points to a SetPayload (shared with the + # original set object) + ('meminfo', types.MemInfoPointer(payload_type)), + # The index into the entries table + ('index', types.EphemeralPointer(types.intp)), + ] + super(SetIterModel, self).__init__(dmm, fe_type, members) + + +@register_default(types.Array) +@register_default(types.Buffer) +@register_default(types.ByteArray) +@register_default(types.Bytes) +@register_default(types.MemoryView) +@register_default(types.PyArray) +class ArrayModel(StructModel): + def __init__(self, dmm, fe_type): + ndim = fe_type.ndim + members = [ + ('meminfo', types.MemInfoPointer(fe_type.dtype)), + ('parent', types.pyobject), + ('nitems', types.intp), + ('itemsize', types.intp), + ('data', types.CPointer(fe_type.dtype)), + ('shape', types.UniTuple(types.intp, ndim)), + ('strides', types.UniTuple(types.intp, ndim)), + + ] + super(ArrayModel, self).__init__(dmm, fe_type, members) + + +@register_default(types.ArrayFlags) +class ArrayFlagsModel(StructModel): + def __init__(self, dmm, fe_type): + members = [ + ('parent', fe_type.array_type), + ] + super(ArrayFlagsModel, self).__init__(dmm, fe_type, members) + + +@register_default(types.NestedArray) +class NestedArrayModel(ArrayModel): + def __init__(self, dmm, fe_type): + self._be_type = dmm.lookup(fe_type.dtype).get_data_type() + super(NestedArrayModel, self).__init__(dmm, fe_type) + + def as_storage_type(self): + """Return the LLVM type representation for the storage of + the nestedarray. + """ + ret = ir.ArrayType(self._be_type, self._fe_type.nitems) + return ret + + +@register_default(types.Optional) +class OptionalModel(StructModel): + def __init__(self, dmm, fe_type): + members = [ + ('data', fe_type.type), + ('valid', types.boolean), + ] + self._value_model = dmm.lookup(fe_type.type) + super(OptionalModel, self).__init__(dmm, fe_type, members) + + def get_return_type(self): + return self._value_model.get_return_type() + + def as_return(self, builder, value): + raise NotImplementedError + + def from_return(self, builder, value): + return self._value_model.from_return(builder, value) + + def traverse(self, builder): + def get_data(value): + valid = get_valid(value) + data = self.get(builder, value, "data") + return builder.select(valid, data, ir.Constant(data.type, None)) + def get_valid(value): + return self.get(builder, value, "valid") + + return [(self.get_type("data"), get_data), + (self.get_type("valid"), get_valid)] + + +@register_default(types.Record) +class RecordModel(CompositeModel): + def __init__(self, dmm, fe_type): + super(RecordModel, self).__init__(dmm, fe_type) + self._models = [self._dmm.lookup(t) for _, t in fe_type.members] + self._be_type = ir.ArrayType(ir.IntType(8), fe_type.size) + self._be_ptr_type = self._be_type.as_pointer() + + def get_value_type(self): + """Passed around as reference to underlying data + """ + return self._be_ptr_type + + def get_argument_type(self): + return self._be_ptr_type + + def get_return_type(self): + return self._be_ptr_type + + def get_data_type(self): + return self._be_type + + def as_data(self, builder, value): + return builder.load(value) + + def from_data(self, builder, value): + raise NotImplementedError("use load_from_data_pointer() instead") + + def as_argument(self, builder, value): + return value + + def from_argument(self, builder, value): + return value + + def as_return(self, builder, value): + return value + + def from_return(self, builder, value): + return value + + def load_from_data_pointer(self, builder, ptr, align=None): + return builder.bitcast(ptr, self.get_value_type()) + + +@register_default(types.UnicodeCharSeq) +class UnicodeCharSeq(DataModel): + def __init__(self, dmm, fe_type): + super(UnicodeCharSeq, self).__init__(dmm, fe_type) + charty = ir.IntType(numpy_support.sizeof_unicode_char * 8) + self._be_type = ir.ArrayType(charty, fe_type.count) + + def get_value_type(self): + return self._be_type + + def get_data_type(self): + return self._be_type + + def as_data(self, builder, value): + return value + + def from_data(self, builder, value): + return value + + def as_return(self, builder, value): + return value + + def from_return(self, builder, value): + return value + + def as_argument(self, builder, value): + return value + + def from_argument(self, builder, value): + return value + + +@register_default(types.CharSeq) +class CharSeq(DataModel): + def __init__(self, dmm, fe_type): + super(CharSeq, self).__init__(dmm, fe_type) + charty = ir.IntType(8) + self._be_type = ir.ArrayType(charty, fe_type.count) + + def get_value_type(self): + return self._be_type + + def get_data_type(self): + return self._be_type + + def as_data(self, builder, value): + return value + + def from_data(self, builder, value): + return value + + def as_return(self, builder, value): + return value + + def from_return(self, builder, value): + return value + + def as_argument(self, builder, value): + return value + + def from_argument(self, builder, value): + return value + + +class CContiguousFlatIter(StructModel): + def __init__(self, dmm, fe_type, need_indices): + assert fe_type.array_type.layout == 'C' + array_type = fe_type.array_type + dtype = array_type.dtype + ndim = array_type.ndim + members = [('array', array_type), + ('stride', types.intp), + ('index', types.EphemeralPointer(types.intp)), + ] + if need_indices: + # For ndenumerate() + members.append(('indices', types.EphemeralArray(types.intp, ndim))) + super(CContiguousFlatIter, self).__init__(dmm, fe_type, members) + + +class FlatIter(StructModel): + def __init__(self, dmm, fe_type): + array_type = fe_type.array_type + dtype = array_type.dtype + ndim = array_type.ndim + members = [('array', array_type), + ('pointers', types.EphemeralArray(types.CPointer(dtype), ndim)), + ('indices', types.EphemeralArray(types.intp, ndim)), + ('exhausted', types.EphemeralPointer(types.boolean)), + ] + super(FlatIter, self).__init__(dmm, fe_type, members) + + +@register_default(types.UniTupleIter) +class UniTupleIter(StructModel): + def __init__(self, dmm, fe_type): + members = [('index', types.EphemeralPointer(types.intp)), + ('tuple', fe_type.container,)] + super(UniTupleIter, self).__init__(dmm, fe_type, members) + + +@register_default(types.misc.SliceLiteral) +@register_default(types.SliceType) +class SliceModel(StructModel): + def __init__(self, dmm, fe_type): + members = [('start', types.intp), + ('stop', types.intp), + ('step', types.intp), + ] + super(SliceModel, self).__init__(dmm, fe_type, members) + + +@register_default(types.NPDatetime) +@register_default(types.NPTimedelta) +class NPDatetimeModel(PrimitiveModel): + def __init__(self, dmm, fe_type): + be_type = ir.IntType(64) + super(NPDatetimeModel, self).__init__(dmm, fe_type, be_type) + + +@register_default(types.ArrayIterator) +class ArrayIterator(StructModel): + def __init__(self, dmm, fe_type): + # We use an unsigned index to avoid the cost of negative index tests. + members = [('index', types.EphemeralPointer(types.uintp)), + ('array', fe_type.array_type)] + super(ArrayIterator, self).__init__(dmm, fe_type, members) + + +@register_default(types.EnumerateType) +class EnumerateType(StructModel): + def __init__(self, dmm, fe_type): + members = [('count', types.EphemeralPointer(types.intp)), + ('iter', fe_type.source_type)] + + super(EnumerateType, self).__init__(dmm, fe_type, members) + + +@register_default(types.ZipType) +class ZipType(StructModel): + def __init__(self, dmm, fe_type): + members = [('iter%d' % i, source_type.iterator_type) + for i, source_type in enumerate(fe_type.source_types)] + super(ZipType, self).__init__(dmm, fe_type, members) + + +@register_default(types.RangeIteratorType) +class RangeIteratorType(StructModel): + def __init__(self, dmm, fe_type): + int_type = fe_type.yield_type + members = [('iter', types.EphemeralPointer(int_type)), + ('stop', int_type), + ('step', int_type), + ('count', types.EphemeralPointer(int_type))] + super(RangeIteratorType, self).__init__(dmm, fe_type, members) + + +@register_default(types.Generator) +class GeneratorModel(CompositeModel): + def __init__(self, dmm, fe_type): + super(GeneratorModel, self).__init__(dmm, fe_type) + # XXX Fold this in DataPacker? + self._arg_models = [self._dmm.lookup(t) for t in fe_type.arg_types + if not isinstance(t, types.Omitted)] + self._state_models = [self._dmm.lookup(t) for t in fe_type.state_types] + + self._args_be_type = ir.LiteralStructType( + [t.get_data_type() for t in self._arg_models]) + self._state_be_type = ir.LiteralStructType( + [t.get_data_type() for t in self._state_models]) + # The whole generator closure + self._be_type = ir.LiteralStructType( + [self._dmm.lookup(types.int32).get_value_type(), + self._args_be_type, self._state_be_type]) + self._be_ptr_type = self._be_type.as_pointer() + + def get_value_type(self): + """ + The generator closure is passed around as a reference. + """ + return self._be_ptr_type + + def get_argument_type(self): + return self._be_ptr_type + + def get_return_type(self): + return self._be_type + + def get_data_type(self): + return self._be_type + + def as_argument(self, builder, value): + return value + + def from_argument(self, builder, value): + return value + + def as_return(self, builder, value): + return self.as_data(builder, value) + + def from_return(self, builder, value): + return self.from_data(builder, value) + + def as_data(self, builder, value): + return builder.load(value) + + def from_data(self, builder, value): + stack = cgutils.alloca_once(builder, value.type) + builder.store(value, stack) + return stack + + +@register_default(types.ArrayCTypes) +class ArrayCTypesModel(StructModel): + def __init__(self, dmm, fe_type): + # ndim = fe_type.ndim + members = [('data', types.CPointer(fe_type.dtype)), + ('meminfo', types.MemInfoPointer(fe_type.dtype))] + super(ArrayCTypesModel, self).__init__(dmm, fe_type, members) + + +@register_default(types.RangeType) +class RangeModel(StructModel): + def __init__(self, dmm, fe_type): + int_type = fe_type.iterator_type.yield_type + members = [('start', int_type), + ('stop', int_type), + ('step', int_type)] + super(RangeModel, self).__init__(dmm, fe_type, members) + + +# ============================================================================= + +@register_default(types.NumpyNdIndexType) +class NdIndexModel(StructModel): + def __init__(self, dmm, fe_type): + ndim = fe_type.ndim + members = [('shape', types.UniTuple(types.intp, ndim)), + ('indices', types.EphemeralArray(types.intp, ndim)), + ('exhausted', types.EphemeralPointer(types.boolean)), + ] + super(NdIndexModel, self).__init__(dmm, fe_type, members) + + +@register_default(types.NumpyFlatType) +def handle_numpy_flat_type(dmm, ty): + if ty.array_type.layout == 'C': + return CContiguousFlatIter(dmm, ty, need_indices=False) + else: + return FlatIter(dmm, ty) + +@register_default(types.NumpyNdEnumerateType) +def handle_numpy_ndenumerate_type(dmm, ty): + if ty.array_type.layout == 'C': + return CContiguousFlatIter(dmm, ty, need_indices=True) + else: + return FlatIter(dmm, ty) + +@register_default(types.BoundFunction) +def handle_bound_function(dmm, ty): + # The same as the underlying type + return dmm[ty.this] + + +@register_default(types.NumpyNdIterType) +class NdIter(StructModel): + def __init__(self, dmm, fe_type): + array_types = fe_type.arrays + ndim = fe_type.ndim + shape_len = ndim if fe_type.need_shaped_indexing else 1 + members = [('exhausted', types.EphemeralPointer(types.boolean)), + ('arrays', types.Tuple(array_types)), + # The iterator's main shape and indices + ('shape', types.UniTuple(types.intp, shape_len)), + ('indices', types.EphemeralArray(types.intp, shape_len)), + ] + # Indexing state for the various sub-iterators + # XXX use a tuple instead? + for i, sub in enumerate(fe_type.indexers): + kind, start_dim, end_dim, _ = sub + member_name = 'index%d' % i + if kind == 'flat': + # A single index into the flattened array + members.append((member_name, types.EphemeralPointer(types.intp))) + elif kind in ('scalar', 'indexed', '0d'): + # Nothing required + pass + else: + assert 0 + # Slots holding values of the scalar args + # XXX use a tuple instead? + for i, ty in enumerate(fe_type.arrays): + if not isinstance(ty, types.Array): + member_name = 'scalar%d' % i + members.append((member_name, types.EphemeralPointer(ty))) + + super(NdIter, self).__init__(dmm, fe_type, members) + + +@register_default(types.DeferredType) +class DeferredStructModel(CompositeModel): + def __init__(self, dmm, fe_type): + super(DeferredStructModel, self).__init__(dmm, fe_type) + self.typename = "deferred.{0}".format(id(fe_type)) + self.actual_fe_type = fe_type.get() + + def get_value_type(self): + return ir.global_context.get_identified_type(self.typename + '.value') + + def get_data_type(self): + return ir.global_context.get_identified_type(self.typename + '.data') + + def get_argument_type(self): + return self._actual_model.get_argument_type() + + def as_argument(self, builder, value): + inner = self.get(builder, value) + return self._actual_model.as_argument(builder, inner) + + def from_argument(self, builder, value): + res = self._actual_model.from_argument(builder, value) + return self.set(builder, self.make_uninitialized(), res) + + def from_data(self, builder, value): + self._define() + elem = self.get(builder, value) + value = self._actual_model.from_data(builder, elem) + out = self.make_uninitialized() + return self.set(builder, out, value) + + def as_data(self, builder, value): + self._define() + elem = self.get(builder, value) + value = self._actual_model.as_data(builder, elem) + out = self.make_uninitialized(kind='data') + return self.set(builder, out, value) + + def from_return(self, builder, value): + return value + + def as_return(self, builder, value): + return value + + def get(self, builder, value): + return builder.extract_value(value, [0]) + + def set(self, builder, value, content): + return builder.insert_value(value, content, [0]) + + def make_uninitialized(self, kind='value'): + self._define() + if kind == 'value': + ty = self.get_value_type() + else: + ty = self.get_data_type() + return ir.Constant(ty, ir.Undefined) + + def _define(self): + valty = self.get_value_type() + self._define_value_type(valty) + datty = self.get_data_type() + self._define_data_type(datty) + + def _define_value_type(self, value_type): + if value_type.is_opaque: + value_type.set_body(self._actual_model.get_value_type()) + + def _define_data_type(self, data_type): + if data_type.is_opaque: + data_type.set_body(self._actual_model.get_data_type()) + + @property + def _actual_model(self): + return self._dmm.lookup(self.actual_fe_type) + + def traverse(self, builder): + return [(self.actual_fe_type, + lambda value: builder.extract_value(value, [0]))] + + +@register_default(types.StructRefPayload) +class StructPayloadModel(StructModel): + """Model for the payload of a mutable struct + """ + def __init__(self, dmm, fe_typ): + members = tuple(fe_typ.field_dict.items()) + super().__init__(dmm, fe_typ, members) + + +class StructRefModel(StructModel): + """Model for a mutable struct. + A reference to the payload + """ + def __init__(self, dmm, fe_typ): + dtype = fe_typ.get_data_type() + members = [ + ("meminfo", types.MemInfoPointer(dtype)), + ] + super().__init__(dmm, fe_typ, members) + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/datamodel/packer.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/datamodel/packer.py new file mode 100644 index 000000000..9efc51449 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/datamodel/packer.py @@ -0,0 +1,213 @@ +from collections import deque + +from numba.core import types, cgutils + + + +class DataPacker(object): + """ + A helper to pack a number of typed arguments into a data structure. + Omitted arguments (i.e. values with the type `Omitted`) are automatically + skipped. + """ + # XXX should DataPacker be a model for a dedicated type? + + def __init__(self, dmm, fe_types): + self._dmm = dmm + self._fe_types = fe_types + self._models = [dmm.lookup(ty) for ty in fe_types] + + self._pack_map = [] + self._be_types = [] + for i, ty in enumerate(fe_types): + if not isinstance(ty, types.Omitted): + self._pack_map.append(i) + self._be_types.append(self._models[i].get_data_type()) + + def as_data(self, builder, values): + """ + Return the given values packed as a data structure. + """ + elems = [self._models[i].as_data(builder, values[i]) + for i in self._pack_map] + return cgutils.make_anonymous_struct(builder, elems) + + def _do_load(self, builder, ptr, formal_list=None): + res = [] + for i, i_formal in enumerate(self._pack_map): + elem_ptr = cgutils.gep_inbounds(builder, ptr, 0, i) + val = self._models[i_formal].load_from_data_pointer(builder, elem_ptr) + if formal_list is None: + res.append((self._fe_types[i_formal], val)) + else: + formal_list[i_formal] = val + return res + + def load(self, builder, ptr): + """ + Load the packed values and return a (type, value) tuples. + """ + return self._do_load(builder, ptr) + + def load_into(self, builder, ptr, formal_list): + """ + Load the packed values into a sequence indexed by formal + argument number (skipping any Omitted position). + """ + self._do_load(builder, ptr, formal_list) + + +class ArgPacker(object): + """ + Compute the position for each high-level typed argument. + It flattens every composite argument into primitive types. + It maintains a position map for unflattening the arguments. + + Since struct (esp. nested struct) have specific ABI requirements (e.g. + alignment, pointer address-space, ...) in different architecture (e.g. + OpenCL, CUDA), flattening composite argument types simplifes the call + setup from the Python side. Functions are receiving simple primitive + types and there are only a handful of these. + """ + + def __init__(self, dmm, fe_args): + self._dmm = dmm + self._fe_args = fe_args + self._nargs = len(fe_args) + + self._dm_args = [] + argtys = [] + for ty in fe_args: + dm = self._dmm.lookup(ty) + self._dm_args.append(dm) + argtys.append(dm.get_argument_type()) + self._unflattener = _Unflattener(argtys) + self._be_args = list(_flatten(argtys)) + + def as_arguments(self, builder, values): + """Flatten all argument values + """ + if len(values) != self._nargs: + raise TypeError("invalid number of args: expected %d, got %d" + % (self._nargs, len(values))) + + if not values: + return () + + args = [dm.as_argument(builder, val) + for dm, val in zip(self._dm_args, values) + ] + + args = tuple(_flatten(args)) + return args + + def from_arguments(self, builder, args): + """Unflatten all argument values + """ + + valtree = self._unflattener.unflatten(args) + values = [dm.from_argument(builder, val) + for dm, val in zip(self._dm_args, valtree) + ] + + return values + + def assign_names(self, args, names): + """Assign names for each flattened argument values. + """ + + valtree = self._unflattener.unflatten(args) + for aval, aname in zip(valtree, names): + self._assign_names(aval, aname) + + def _assign_names(self, val_or_nested, name, depth=()): + if isinstance(val_or_nested, (tuple, list)): + for pos, aval in enumerate(val_or_nested): + self._assign_names(aval, name, depth=depth + (pos,)) + else: + postfix = '.'.join(map(str, depth)) + parts = [name, postfix] + val_or_nested.name = '.'.join(filter(bool, parts)) + + @property + def argument_types(self): + """Return a list of LLVM types that are results of flattening + composite types. + """ + return tuple(ty for ty in self._be_args if ty != ()) + + +def _flatten(iterable): + """ + Flatten nested iterable of (tuple, list). + """ + def rec(iterable): + for i in iterable: + if isinstance(i, (tuple, list)): + for j in rec(i): + yield j + else: + yield i + return rec(iterable) + + +_PUSH_LIST = 1 +_APPEND_NEXT_VALUE = 2 +_APPEND_EMPTY_TUPLE = 3 +_POP = 4 + +class _Unflattener(object): + """ + An object used to unflatten nested sequences after a given pattern + (an arbitrarily nested sequence). + The pattern shows the nested sequence shape desired when unflattening; + the values it contains are irrelevant. + """ + + def __init__(self, pattern): + self._code = self._build_unflatten_code(pattern) + + def _build_unflatten_code(self, iterable): + """Build the unflatten opcode sequence for the given *iterable* structure + (an iterable of nested sequences). + """ + code = [] + def rec(iterable): + for i in iterable: + if isinstance(i, (tuple, list)): + if len(i) > 0: + code.append(_PUSH_LIST) + rec(i) + code.append(_POP) + else: + code.append(_APPEND_EMPTY_TUPLE) + else: + code.append(_APPEND_NEXT_VALUE) + + rec(iterable) + return code + + def unflatten(self, flatiter): + """Rebuild a nested tuple structure. + """ + vals = deque(flatiter) + + res = [] + cur = res + stack = [] + for op in self._code: + if op is _PUSH_LIST: + stack.append(cur) + cur.append([]) + cur = cur[-1] + elif op is _APPEND_NEXT_VALUE: + cur.append(vals.popleft()) + elif op is _APPEND_EMPTY_TUPLE: + cur.append(()) + elif op is _POP: + cur = stack.pop() + + assert not stack, stack + assert not vals, vals + + return res diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/datamodel/registry.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/datamodel/registry.py new file mode 100644 index 000000000..18bdc475e --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/datamodel/registry.py @@ -0,0 +1,18 @@ +import functools +from .manager import DataModelManager + + +def register(dmm, typecls): + """Used as decorator to simplify datamodel registration. + Returns the object being decorated so that chaining is possible. + """ + def wraps(fn): + dmm.register(typecls, fn) + return fn + + return wraps + + +default_manager = DataModelManager() + +register_default = functools.partial(register, default_manager) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/datamodel/testing.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/datamodel/testing.py new file mode 100644 index 000000000..e2e8a2818 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/datamodel/testing.py @@ -0,0 +1,150 @@ +from llvmlite import ir +from llvmlite import binding as ll + +from numba.core import datamodel +import unittest + + +class DataModelTester(unittest.TestCase): + """ + Test the implementation of a DataModel for a frontend type. + """ + fe_type = NotImplemented + + def setUp(self): + self.module = ir.Module() + self.datamodel = datamodel.default_manager[self.fe_type] + + def test_as_arg(self): + """ + - Is as_arg() and from_arg() implemented? + - Are they the inverse of each other? + """ + fnty = ir.FunctionType(ir.VoidType(), []) + function = ir.Function(self.module, fnty, name="test_as_arg") + builder = ir.IRBuilder() + builder.position_at_end(function.append_basic_block()) + + undef_value = ir.Constant(self.datamodel.get_value_type(), None) + args = self.datamodel.as_argument(builder, undef_value) + self.assertIsNot(args, NotImplemented, "as_argument returned " + "NotImplementedError") + + if isinstance(args, (tuple, list)): + def recur_tuplize(args, func=None): + for arg in args: + if isinstance(arg, (tuple, list)): + yield tuple(recur_tuplize(arg, func=func)) + else: + if func is None: + yield arg + else: + yield func(arg) + + argtypes = tuple(recur_tuplize(args, func=lambda x: x.type)) + exptypes = tuple(recur_tuplize( + self.datamodel.get_argument_type())) + self.assertEqual(exptypes, argtypes) + else: + self.assertEqual(args.type, + self.datamodel.get_argument_type()) + + rev_value = self.datamodel.from_argument(builder, args) + self.assertEqual(rev_value.type, self.datamodel.get_value_type()) + + builder.ret_void() # end function + + # Ensure valid LLVM generation + materialized = ll.parse_assembly(str(self.module)) + str(materialized) + + def test_as_return(self): + """ + - Is as_return() and from_return() implemented? + - Are they the inverse of each other? + """ + fnty = ir.FunctionType(ir.VoidType(), []) + function = ir.Function(self.module, fnty, name="test_as_return") + builder = ir.IRBuilder() + builder.position_at_end(function.append_basic_block()) + + undef_value = ir.Constant(self.datamodel.get_value_type(), None) + ret = self.datamodel.as_return(builder, undef_value) + self.assertIsNot(ret, NotImplemented, "as_return returned " + "NotImplementedError") + + self.assertEqual(ret.type, self.datamodel.get_return_type()) + + rev_value = self.datamodel.from_return(builder, ret) + self.assertEqual(rev_value.type, self.datamodel.get_value_type()) + + builder.ret_void() # end function + + # Ensure valid LLVM generation + materialized = ll.parse_assembly(str(self.module)) + str(materialized) + + +class SupportAsDataMixin(object): + """Test as_data() and from_data() + """ + # XXX test load_from_data_pointer() as well + + def test_as_data(self): + fnty = ir.FunctionType(ir.VoidType(), []) + function = ir.Function(self.module, fnty, name="test_as_data") + builder = ir.IRBuilder() + builder.position_at_end(function.append_basic_block()) + + undef_value = ir.Constant(self.datamodel.get_value_type(), None) + data = self.datamodel.as_data(builder, undef_value) + self.assertIsNot(data, NotImplemented, + "as_data returned NotImplemented") + + self.assertEqual(data.type, self.datamodel.get_data_type()) + + rev_value = self.datamodel.from_data(builder, data) + self.assertEqual(rev_value.type, + self.datamodel.get_value_type()) + + builder.ret_void() # end function + + # Ensure valid LLVM generation + materialized = ll.parse_assembly(str(self.module)) + str(materialized) + + +class NotSupportAsDataMixin(object): + """Ensure as_data() and from_data() raise NotImplementedError. + """ + + def test_as_data_not_supported(self): + fnty = ir.FunctionType(ir.VoidType(), []) + function = ir.Function(self.module, fnty, name="test_as_data") + builder = ir.IRBuilder() + builder.position_at_end(function.append_basic_block()) + + undef_value = ir.Constant(self.datamodel.get_value_type(), None) + with self.assertRaises(NotImplementedError): + data = self.datamodel.as_data(builder, undef_value) + with self.assertRaises(NotImplementedError): + rev_data = self.datamodel.from_data(builder, undef_value) + + +class DataModelTester_SupportAsDataMixin(DataModelTester, + SupportAsDataMixin): + pass + + +class DataModelTester_NotSupportAsDataMixin(DataModelTester, + NotSupportAsDataMixin): + pass + + +def test_factory(support_as_data=True): + """A helper for returning a unittest TestCase for testing + """ + if support_as_data: + return DataModelTester_SupportAsDataMixin + else: + return DataModelTester_NotSupportAsDataMixin diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/debuginfo.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/debuginfo.py new file mode 100644 index 000000000..692241a35 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/debuginfo.py @@ -0,0 +1,609 @@ +""" +Implements helpers to build LLVM debuginfo. +""" + + +import abc +import os.path +from contextlib import contextmanager + +from llvmlite import ir +from numba.core import cgutils, types +from numba.core.datamodel.models import ComplexModel, UniTupleModel +from numba.core import config + + +@contextmanager +def suspend_emission(builder): + """Suspends the emission of debug_metadata for the duration of the context + managed block.""" + ref = builder.debug_metadata + builder.debug_metadata = None + try: + yield + finally: + builder.debug_metadata = ref + + +class AbstractDIBuilder(metaclass=abc.ABCMeta): + @abc.abstractmethod + def mark_variable(self, builder, allocavalue, name, lltype, size, line, + datamodel=None, argidx=None): + """Emit debug info for the variable. + """ + pass + + @abc.abstractmethod + def mark_location(self, builder, line): + """Emit source location information to the given IRBuilder. + """ + pass + + @abc.abstractmethod + def mark_subprogram(self, function, qualname, argnames, argtypes, line): + """Emit source location information for the given function. + """ + pass + + @abc.abstractmethod + def initialize(self): + """Initialize the debug info. An opportunity for the debuginfo to + prepare any necessary data structures. + """ + + @abc.abstractmethod + def finalize(self): + """Finalize the debuginfo by emitting all necessary metadata. + """ + pass + + +class DummyDIBuilder(AbstractDIBuilder): + + def __init__(self, module, filepath, cgctx): + pass + + def mark_variable(self, builder, allocavalue, name, lltype, size, line, + datamodel=None, argidx=None): + pass + + def mark_location(self, builder, line): + pass + + def mark_subprogram(self, function, qualname, argnames, argtypes, line): + pass + + def initialize(self): + pass + + def finalize(self): + pass + + +_BYTE_SIZE = 8 + + +class DIBuilder(AbstractDIBuilder): + DWARF_VERSION = 4 + DEBUG_INFO_VERSION = 3 + DBG_CU_NAME = 'llvm.dbg.cu' + _DEBUG = False + + def __init__(self, module, filepath, cgctx): + self.module = module + self.filepath = os.path.abspath(filepath) + self.difile = self._di_file() + self.subprograms = [] + self.cgctx = cgctx + self.initialize() + + def initialize(self): + # Create the compile unit now because it is referenced when + # constructing subprograms + self.dicompileunit = self._di_compile_unit() + + def _var_type(self, lltype, size, datamodel=None): + if self._DEBUG: + print("-->", lltype, size, datamodel, + getattr(datamodel, 'fe_type', 'NO FE TYPE')) + m = self.module + bitsize = _BYTE_SIZE * size + + int_type = ir.IntType, + real_type = ir.FloatType, ir.DoubleType + # For simple numeric types, choose the closest encoding. + # We treat all integers as unsigned when there's no known datamodel. + if isinstance(lltype, int_type + real_type): + if datamodel is None: + # This is probably something like an `i8*` member of a struct + name = str(lltype) + if isinstance(lltype, int_type): + ditok = 'DW_ATE_unsigned' + else: + ditok = 'DW_ATE_float' + else: + # This is probably a known int/float scalar type + name = str(datamodel.fe_type) + if isinstance(datamodel.fe_type, types.Integer): + if datamodel.fe_type.signed: + ditok = 'DW_ATE_signed' + else: + ditok = 'DW_ATE_unsigned' + else: + ditok = 'DW_ATE_float' + mdtype = m.add_debug_info('DIBasicType', { + 'name': name, + 'size': bitsize, + 'encoding': ir.DIToken(ditok), + }) + elif isinstance(datamodel, ComplexModel): + # TODO: Is there a better way of determining "this is a complex + # number"? + # + # NOTE: Commented below is the way to generate the metadata for a + # C99 complex type that's directly supported by DWARF. Numba however + # generates a struct with real/imag cf. CPython to give a more + # pythonic feel to inspection. + # + # mdtype = m.add_debug_info('DIBasicType', { + # 'name': f"{datamodel.fe_type} ({str(lltype)})", + # 'size': bitsize, + # 'encoding': ir.DIToken('DW_ATE_complex_float'), + #}) + meta = [] + offset = 0 + for ix, name in enumerate(('real', 'imag')): + component = lltype.elements[ix] + component_size = self.cgctx.get_abi_sizeof(component) + component_basetype = m.add_debug_info('DIBasicType', { + 'name': str(component), + 'size': _BYTE_SIZE * component_size, # bits + 'encoding': ir.DIToken('DW_ATE_float'), + }) + derived_type = m.add_debug_info('DIDerivedType', { + 'tag': ir.DIToken('DW_TAG_member'), + 'name': name, + 'baseType': component_basetype, + 'size': _BYTE_SIZE * component_size, # DW_TAG_member size is in bits + 'offset': offset, + }) + meta.append(derived_type) + offset += (_BYTE_SIZE * component_size) # offset is in bits + mdtype = m.add_debug_info('DICompositeType', { + 'tag': ir.DIToken('DW_TAG_structure_type'), + 'name': f"{datamodel.fe_type} ({str(lltype)})", + 'identifier': str(lltype), + 'elements': m.add_metadata(meta), + 'size': offset, + }, is_distinct=True) + elif isinstance(datamodel, UniTupleModel): + element = lltype.element + el_size = self.cgctx.get_abi_sizeof(element) + basetype = self._var_type(element, el_size) + name = f"{datamodel.fe_type} ({str(lltype)})" + count = size // el_size + mdrange = m.add_debug_info('DISubrange', { + 'count': count, + }) + mdtype = m.add_debug_info('DICompositeType', { + 'tag': ir.DIToken('DW_TAG_array_type'), + 'baseType': basetype, + 'name': name, + 'size': bitsize, + 'identifier': str(lltype), + 'elements': m.add_metadata([mdrange]), + }) + elif isinstance(lltype, ir.PointerType): + model = getattr(datamodel, '_pointee_model', None) + basetype = self._var_type(lltype.pointee, + self.cgctx.get_abi_sizeof(lltype.pointee), + model) + mdtype = m.add_debug_info('DIDerivedType', { + 'tag': ir.DIToken('DW_TAG_pointer_type'), + 'baseType': basetype, + 'size': _BYTE_SIZE * self.cgctx.get_abi_sizeof(lltype) + }) + elif isinstance(lltype, ir.LiteralStructType): + # Struct type + meta = [] + offset = 0 + if datamodel is None or not datamodel.inner_models(): + name = f"Anonymous struct ({str(lltype)})" + for field_id, element in enumerate(lltype.elements): + size = self.cgctx.get_abi_sizeof(element) + basetype = self._var_type(element, size) + derived_type = m.add_debug_info('DIDerivedType', { + 'tag': ir.DIToken('DW_TAG_member'), + 'name': f'', + 'baseType': basetype, + 'size': _BYTE_SIZE * size, # DW_TAG_member size is in bits + 'offset': offset, + }) + meta.append(derived_type) + offset += (_BYTE_SIZE * size) # offset is in bits + else: + name = f"{datamodel.fe_type} ({str(lltype)})" + for element, field, model in zip(lltype.elements, + datamodel._fields, + datamodel.inner_models()): + size = self.cgctx.get_abi_sizeof(element) + basetype = self._var_type(element, size, datamodel=model) + derived_type = m.add_debug_info('DIDerivedType', { + 'tag': ir.DIToken('DW_TAG_member'), + 'name': field, + 'baseType': basetype, + 'size': _BYTE_SIZE * size, # DW_TAG_member size is in bits + 'offset': offset, + }) + meta.append(derived_type) + offset += (_BYTE_SIZE * size) # offset is in bits + + mdtype = m.add_debug_info('DICompositeType', { + 'tag': ir.DIToken('DW_TAG_structure_type'), + 'name': name, + 'identifier': str(lltype), + 'elements': m.add_metadata(meta), + 'size': offset, + }, is_distinct=True) + elif isinstance(lltype, ir.ArrayType): + element = lltype.element + el_size = self.cgctx.get_abi_sizeof(element) + basetype = self._var_type(element, el_size) + count = size // el_size + mdrange = m.add_debug_info('DISubrange', { + 'count': count, + }) + mdtype = m.add_debug_info('DICompositeType', { + 'tag': ir.DIToken('DW_TAG_array_type'), + 'baseType': basetype, + 'name': str(lltype), + 'size': bitsize, + 'identifier': str(lltype), + 'elements': m.add_metadata([mdrange]), + }) + else: + # For all other types, describe it as sequence of bytes + count = size + mdrange = m.add_debug_info('DISubrange', { + 'count': count, + }) + mdbase = m.add_debug_info('DIBasicType', { + 'name': 'byte', + 'size': _BYTE_SIZE, + 'encoding': ir.DIToken('DW_ATE_unsigned_char'), + }) + mdtype = m.add_debug_info('DICompositeType', { + 'tag': ir.DIToken('DW_TAG_array_type'), + 'baseType': mdbase, + 'name': str(lltype), + 'size': bitsize, + 'identifier': str(lltype), + 'elements': m.add_metadata([mdrange]), + }) + + return mdtype + + def mark_variable(self, builder, allocavalue, name, lltype, size, line, + datamodel=None, argidx=None): + + arg_index = 0 if argidx is None else argidx + m = self.module + fnty = ir.FunctionType(ir.VoidType(), [ir.MetaDataType()] * 3) + decl = cgutils.get_or_insert_function(m, fnty, 'llvm.dbg.declare') + + mdtype = self._var_type(lltype, size, datamodel=datamodel) + name = name.replace('.', '$') # for gdb to work correctly + mdlocalvar = m.add_debug_info('DILocalVariable', { + 'name': name, + 'arg': arg_index, + 'scope': self.subprograms[-1], + 'file': self.difile, + 'line': line, + 'type': mdtype, + }) + mdexpr = m.add_debug_info('DIExpression', {}) + + return builder.call(decl, [allocavalue, mdlocalvar, mdexpr]) + + def mark_location(self, builder, line): + builder.debug_metadata = self._add_location(line) + + def mark_subprogram(self, function, qualname, argnames, argtypes, line): + name = qualname + argmap = dict(zip(argnames, argtypes)) + di_subp = self._add_subprogram(name=name, linkagename=function.name, + line=line, function=function, + argmap=argmap) + function.set_metadata("dbg", di_subp) + + # Don't marked alwaysinline functions as noinline. + if 'alwaysinline' not in function.attributes: + # disable inlining for this function for easier debugging + function.attributes.add('noinline') + + def finalize(self): + dbgcu = cgutils.get_or_insert_named_metadata(self.module, self.DBG_CU_NAME) + dbgcu.add(self.dicompileunit) + self._set_module_flags() + + # + # Internal APIs + # + + def _set_module_flags(self): + """Set the module flags metadata + """ + module = self.module + mflags = cgutils.get_or_insert_named_metadata(module, 'llvm.module.flags') + # Set *require* behavior to warning + # See http://llvm.org/docs/LangRef.html#module-flags-metadata + require_warning_behavior = self._const_int(2) + if self.DWARF_VERSION is not None: + dwarf_version = module.add_metadata([ + require_warning_behavior, + "Dwarf Version", + self._const_int(self.DWARF_VERSION) + ]) + if dwarf_version not in mflags.operands: + mflags.add(dwarf_version) + debuginfo_version = module.add_metadata([ + require_warning_behavior, + "Debug Info Version", + self._const_int(self.DEBUG_INFO_VERSION) + ]) + if debuginfo_version not in mflags.operands: + mflags.add(debuginfo_version) + + def _add_subprogram(self, name, linkagename, line, function, argmap): + """Emit subprogram metadata + """ + subp = self._di_subprogram(name, linkagename, line, function, argmap) + self.subprograms.append(subp) + return subp + + def _add_location(self, line): + """Emit location metatdaa + """ + loc = self._di_location(line) + return loc + + @classmethod + def _const_int(cls, num, bits=32): + """Util to create constant int in metadata + """ + return ir.IntType(bits)(num) + + @classmethod + def _const_bool(cls, boolean): + """Util to create constant boolean in metadata + """ + return ir.IntType(1)(boolean) + + # + # Helpers to emit the metadata nodes + # + + def _di_file(self): + return self.module.add_debug_info('DIFile', { + 'directory': os.path.dirname(self.filepath), + 'filename': os.path.basename(self.filepath), + }) + + def _di_compile_unit(self): + return self.module.add_debug_info('DICompileUnit', { + 'language': ir.DIToken('DW_LANG_C_plus_plus'), + 'file': self.difile, + # Numba has to pretend to be clang to ensure the prologue is skipped + # correctly in gdb. See: + # https://sourceware.org/git/?p=binutils-gdb.git;a=blob;f=gdb/amd64-tdep.c;h=e563d369d8cb3eb3c2f732c2fa850ec70ba8d63b;hb=a4b0231e179607e47b1cdf1fe15c5dc25e482fad#l2521 + # Note the "producer_is_llvm" call to specialise the prologue + # handling, this is defined here: + # https://sourceware.org/git/?p=binutils-gdb.git;a=blob;f=gdb/producer.c;h=cdfd80d904c09394febd18749bb90359b2d128cc;hb=a4b0231e179607e47b1cdf1fe15c5dc25e482fad#l124 + # and to get a match for this condition the 'producer' must start + # with "clang ", hence the following... + 'producer': 'clang (Numba)', + 'runtimeVersion': 0, + 'isOptimized': config.OPT != 0, + 'emissionKind': 1, # 0-NoDebug, 1-FullDebug + }, is_distinct=True) + + def _di_subroutine_type(self, line, function, argmap): + # The function call conv needs encoding. + llfunc = function + md = [] + + for idx, llarg in enumerate(llfunc.args): + if not llarg.name.startswith('arg.'): + name = llarg.name.replace('.', '$') # for gdb to work correctly + lltype = llarg.type + size = self.cgctx.get_abi_sizeof(lltype) + mdtype = self._var_type(lltype, size, datamodel=None) + md.append(mdtype) + + for idx, (name, nbtype) in enumerate(argmap.items()): + name = name.replace('.', '$') # for gdb to work correctly + datamodel = self.cgctx.data_model_manager[nbtype] + lltype = self.cgctx.get_value_type(nbtype) + size = self.cgctx.get_abi_sizeof(lltype) + mdtype = self._var_type(lltype, size, datamodel=datamodel) + md.append(mdtype) + + return self.module.add_debug_info('DISubroutineType', { + 'types': self.module.add_metadata(md), + }) + + def _di_subprogram(self, name, linkagename, line, function, argmap): + return self.module.add_debug_info('DISubprogram', { + 'name': name, + 'linkageName': linkagename, + 'scope': self.difile, + 'file': self.difile, + 'line': line, + 'type': self._di_subroutine_type(line, function, argmap), + 'isLocal': False, + 'isDefinition': True, + 'scopeLine': line, + 'isOptimized': config.OPT != 0, + 'unit': self.dicompileunit, + }, is_distinct=True) + + def _di_location(self, line): + return self.module.add_debug_info('DILocation', { + 'line': line, + 'column': 1, + 'scope': self.subprograms[-1], + }) + + +class NvvmDIBuilder(DIBuilder): + """ + Only implemented the minimal metadata to get line number information. + See http://llvm.org/releases/3.4/docs/LangRef.html + """ + # These constants are copied from llvm3.4 + DW_LANG_Python = 0x0014 + DI_Compile_unit = 786449 + DI_Subroutine_type = 786453 + DI_Subprogram = 786478 + DI_File = 786473 + + DWARF_VERSION = None # don't emit DWARF version + DEBUG_INFO_VERSION = 1 # as required by NVVM IR Spec + # Rename DIComputeUnit MD to hide it from llvm.parse_assembly() + # which strips invalid/outdated debug metadata + DBG_CU_NAME = 'numba.llvm.dbg.cu' + + # Default member + # Used in mark_location to remember last lineno to avoid duplication + _last_lineno = None + + def mark_variable(self, builder, allocavalue, name, lltype, size, line, + datamodel=None, argidx=None): + # unsupported + pass + + def mark_location(self, builder, line): + # Avoid duplication + if self._last_lineno == line: + return + self._last_lineno = line + # Add call to an inline asm to mark line location + asmty = ir.FunctionType(ir.VoidType(), []) + asm = ir.InlineAsm(asmty, "// dbg {}".format(line), "", + side_effect=True) + call = builder.call(asm, []) + md = self._di_location(line) + call.set_metadata('numba.dbg', md) + + def mark_subprogram(self, function, qualname, argnames, argtypes, line): + argmap = dict(zip(argnames, argtypes)) + self._add_subprogram(name=qualname, linkagename=function.name, + line=line) + + def _add_subprogram(self, name, linkagename, line): + """Emit subprogram metadata + """ + subp = self._di_subprogram(name, linkagename, line) + self.subprograms.append(subp) + return subp + + # + # Helper methods to create the metadata nodes. + # + + def _filepair(self): + return self.module.add_metadata([ + os.path.basename(self.filepath), + os.path.dirname(self.filepath), + ]) + + def _di_file(self): + return self.module.add_metadata([ + self._const_int(self.DI_File), + self._filepair(), + ]) + + def _di_compile_unit(self): + filepair = self._filepair() + empty = self.module.add_metadata([self._const_int(0)]) + sp_metadata = self.module.add_metadata(self.subprograms) + return self.module.add_metadata([ + self._const_int(self.DI_Compile_unit), # tag + filepair, # source directory and file pair + self._const_int(self.DW_LANG_Python), # language + 'Numba', # producer + self._const_bool(True), # optimized + "", # flags?? + self._const_int(0), # runtime version + empty, # enums types + empty, # retained types + self.module.add_metadata(self.subprograms), # subprograms + empty, # global variables + empty, # imported entities + "", # split debug filename + ]) + + def _di_subroutine_type(self): + types = self.module.add_metadata([None]) + return self.module.add_metadata([ + self._const_int(self.DI_Subroutine_type), # tag + self._const_int(0), + None, + "", + self._const_int(0), # line of definition + self._const_int(0, 64), # size in bits + self._const_int(0, 64), # offset in bits + self._const_int(0, 64), # align in bits + self._const_int(0), # flags + None, + types, + self._const_int(0), + None, + None, + None, + ]) + + def _di_subprogram(self, name, linkagename, line): + function_ptr = self.module.get_global(linkagename) + subroutine_type = self._di_subroutine_type() + funcvars = self.module.add_metadata([self._const_int(0)]) + context = self._di_file() + return self.module.add_metadata([ + self._const_int(self.DI_Subprogram), # tag + self._filepair(), # source dir & file + context, # context descriptor + name, # name + name, # display name + linkagename, # linkage name + self._const_int(line), # line + subroutine_type, # type descriptor + self._const_bool(False), # is local + self._const_bool(True), # is definition + self._const_int(0), # virtuality + self._const_int(0), # virtual function index + None, # vtable base type + self._const_int(0), # flags + self._const_bool(True), # is optimized + function_ptr, # pointer to function + None, # function template parameters + None, # function declaration descriptor + funcvars, # function variables + self._const_int(line) # scope line + ]) + + def _di_location(self, line): + return self.module.add_metadata([ + self._const_int(line), # line + self._const_int(0), # column + self.subprograms[-1], # scope + None, # original scope + ]) + + def initialize(self): + pass + + def finalize(self): + # We create the compile unit at this point because subprograms is + # populated and can be referred to by the compile unit. + self.dicompileunit = self._di_compile_unit() + super().finalize() diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/decorators.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/decorators.py new file mode 100644 index 000000000..2dfc4633f --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/decorators.py @@ -0,0 +1,309 @@ +""" +Define @jit and related decorators. +""" + + +import sys +import warnings +import inspect +import logging + +from numba.core.errors import DeprecationError, NumbaDeprecationWarning +from numba.stencils.stencil import stencil +from numba.core import config, extending, sigutils, registry + +_logger = logging.getLogger(__name__) + + +# ----------------------------------------------------------------------------- +# Decorators + +_msg_deprecated_signature_arg = ("Deprecated keyword argument `{0}`. " + "Signatures should be passed as the first " + "positional argument.") + + +def jit(signature_or_function=None, locals={}, cache=False, + pipeline_class=None, boundscheck=None, **options): + """ + This decorator is used to compile a Python function into native code. + + Args + ----- + signature_or_function: + The (optional) signature or list of signatures to be compiled. + If not passed, required signatures will be compiled when the + decorated function is called, depending on the argument values. + As a convenience, you can directly pass the function to be compiled + instead. + + locals: dict + Mapping of local variable names to Numba types. Used to override the + types deduced by Numba's type inference engine. + + pipeline_class: type numba.compiler.CompilerBase + The compiler pipeline type for customizing the compilation stages. + + options: + For a cpu target, valid options are: + nopython: bool + Set to True to disable the use of PyObjects and Python API + calls. The default behavior is to allow the use of PyObjects + and Python API. Default value is False. + + forceobj: bool + Set to True to force the use of PyObjects for every value. + Default value is False. + + looplift: bool + Set to True to enable jitting loops in nopython mode while + leaving surrounding code in object mode. This allows functions + to allocate NumPy arrays and use Python objects, while the + tight loops in the function can still be compiled in nopython + mode. Any arrays that the tight loop uses should be created + before the loop is entered. Default value is True. + + error_model: str + The error-model affects divide-by-zero behavior. + Valid values are 'python' and 'numpy'. The 'python' model + raises exception. The 'numpy' model sets the result to + *+/-inf* or *nan*. Default value is 'python'. + + inline: str or callable + The inline option will determine whether a function is inlined + at into its caller if called. String options are 'never' + (default) which will never inline, and 'always', which will + always inline. If a callable is provided it will be called with + the call expression node that is requesting inlining, the + caller's IR and callee's IR as arguments, it is expected to + return Truthy as to whether to inline. + NOTE: This inlining is performed at the Numba IR level and is in + no way related to LLVM inlining. + + boundscheck: bool or None + Set to True to enable bounds checking for array indices. Out + of bounds accesses will raise IndexError. The default is to + not do bounds checking. If False, bounds checking is disabled, + out of bounds accesses can produce garbage results or segfaults. + However, enabling bounds checking will slow down typical + functions, so it is recommended to only use this flag for + debugging. You can also set the NUMBA_BOUNDSCHECK environment + variable to 0 or 1 to globally override this flag. The default + value is None, which under normal execution equates to False, + but if debug is set to True then bounds checking will be + enabled. + + Returns + -------- + A callable usable as a compiled function. Actual compiling will be + done lazily if no explicit signatures are passed. + + Examples + -------- + The function can be used in the following ways: + + 1) jit(signatures, **targetoptions) -> jit(function) + + Equivalent to: + + d = dispatcher(function, targetoptions) + for signature in signatures: + d.compile(signature) + + Create a dispatcher object for a python function. Then, compile + the function with the given signature(s). + + Example: + + @jit("int32(int32, int32)") + def foo(x, y): + return x + y + + @jit(["int32(int32, int32)", "float32(float32, float32)"]) + def bar(x, y): + return x + y + + 2) jit(function, **targetoptions) -> dispatcher + + Create a dispatcher function object that specializes at call site. + + Examples: + + @jit + def foo(x, y): + return x + y + + @jit(nopython=True) + def bar(x, y): + return x + y + + """ + if 'argtypes' in options: + raise DeprecationError(_msg_deprecated_signature_arg.format('argtypes')) + if 'restype' in options: + raise DeprecationError(_msg_deprecated_signature_arg.format('restype')) + if options.get('nopython', False) and options.get('forceobj', False): + raise ValueError("Only one of 'nopython' or 'forceobj' can be True.") + + if "_target" in options: + # Set the "target_backend" option if "_target" is defined. + options['target_backend'] = options['_target'] + target = options.pop('_target', 'cpu') + + options['boundscheck'] = boundscheck + + # Handle signature + if signature_or_function is None: + # No signature, no function + pyfunc = None + sigs = None + elif isinstance(signature_or_function, list): + # A list of signatures is passed + pyfunc = None + sigs = signature_or_function + elif sigutils.is_signature(signature_or_function): + # A single signature is passed + pyfunc = None + sigs = [signature_or_function] + else: + # A function is passed + pyfunc = signature_or_function + sigs = None + + dispatcher_args = {} + if pipeline_class is not None: + dispatcher_args['pipeline_class'] = pipeline_class + wrapper = _jit(sigs, locals=locals, target=target, cache=cache, + targetoptions=options, **dispatcher_args) + if pyfunc is not None: + return wrapper(pyfunc) + else: + return wrapper + + +def _jit(sigs, locals, target, cache, targetoptions, **dispatcher_args): + + from numba.core.target_extension import resolve_dispatcher_from_str + dispatcher = resolve_dispatcher_from_str(target) + + def wrapper(func): + if extending.is_jitted(func): + raise TypeError( + "A jit decorator was called on an already jitted function " + f"{func}. If trying to access the original python " + f"function, use the {func}.py_func attribute." + ) + + if not inspect.isfunction(func): + raise TypeError( + "The decorated object is not a function (got type " + f"{type(func)})." + ) + + if config.ENABLE_CUDASIM and target == 'cuda': + from numba import cuda + return cuda.jit(func) + if config.DISABLE_JIT and not target == 'npyufunc': + return func + disp = dispatcher(py_func=func, locals=locals, + targetoptions=targetoptions, + **dispatcher_args) + if cache: + disp.enable_caching() + if sigs is not None: + # Register the Dispatcher to the type inference mechanism, + # even though the decorator hasn't returned yet. + from numba.core import typeinfer + with typeinfer.register_dispatcher(disp): + for sig in sigs: + disp.compile(sig) + disp.disable_compile() + return disp + + return wrapper + + +def generated_jit(function=None, cache=False, + pipeline_class=None, **options): + """ + This decorator allows flexible type-based compilation + of a jitted function. It works as `@jit`, except that the decorated + function is called at compile-time with the *types* of the arguments + and should return an implementation function for those types. + """ + dispatcher_args = {} + if pipeline_class is not None: + dispatcher_args['pipeline_class'] = pipeline_class + wrapper = _jit(sigs=None, locals={}, target='cpu', cache=cache, + targetoptions=options, impl_kind='generated', + **dispatcher_args) + if function is not None: + return wrapper(function) + else: + return wrapper + + +def njit(*args, **kws): + """ + Equivalent to jit(nopython=True) + + See documentation for jit function/decorator for full description. + """ + if 'nopython' in kws: + warnings.warn('nopython is set for njit and is ignored', RuntimeWarning) + if 'forceobj' in kws: + warnings.warn('forceobj is set for njit and is ignored', RuntimeWarning) + del kws['forceobj'] + kws.update({'nopython': True}) + return jit(*args, **kws) + + +def cfunc(sig, locals={}, cache=False, pipeline_class=None, **options): + """ + This decorator is used to compile a Python function into a C callback + usable with foreign C libraries. + + Usage:: + @cfunc("float64(float64, float64)", nopython=True, cache=True) + def add(a, b): + return a + b + + """ + sig = sigutils.normalize_signature(sig) + + def wrapper(func): + from numba.core.ccallback import CFunc + additional_args = {} + if pipeline_class is not None: + additional_args['pipeline_class'] = pipeline_class + res = CFunc(func, sig, locals=locals, options=options, **additional_args) + if cache: + res.enable_caching() + res.compile() + return res + + return wrapper + + +def jit_module(**kwargs): + """ Automatically ``jit``-wraps functions defined in a Python module + + Note that ``jit_module`` should only be called at the end of the module to + be jitted. In addition, only functions which are defined in the module + ``jit_module`` is called from are considered for automatic jit-wrapping. + See the Numba documentation for more information about what can/cannot be + jitted. + + :param kwargs: Keyword arguments to pass to ``jit`` such as ``nopython`` + or ``error_model``. + + """ + # Get the module jit_module is being called from + frame = inspect.stack()[1] + module = inspect.getmodule(frame[0]) + # Replace functions in module with jit-wrapped versions + for name, obj in module.__dict__.items(): + if inspect.isfunction(obj) and inspect.getmodule(obj) == module: + _logger.debug("Auto decorating function {} from module {} with jit " + "and options: {}".format(obj, module.__name__, kwargs)) + module.__dict__[name] = jit(obj, **kwargs) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/descriptors.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/descriptors.py new file mode 100644 index 000000000..9c0c367e9 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/descriptors.py @@ -0,0 +1,21 @@ +""" +Target Descriptors +""" + +from abc import ABCMeta, abstractmethod + + +class TargetDescriptor(metaclass=ABCMeta): + + def __init__(self, target_name): + self._target_name = target_name + + @property + @abstractmethod + def typing_context(self): + ... + + @property + @abstractmethod + def target_context(self): + ... diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/dispatcher.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/dispatcher.py new file mode 100644 index 000000000..69414a7c5 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/dispatcher.py @@ -0,0 +1,1322 @@ +# -*- coding: utf-8 -*- + + +import collections +import functools +import sys +import types as pytypes +import uuid +import weakref +from contextlib import ExitStack + +from numba import _dispatcher +from numba.core import ( + utils, types, errors, typing, serialize, config, compiler, sigutils +) +from numba.core.compiler_lock import global_compiler_lock +from numba.core.typeconv.rules import default_type_manager +from numba.core.typing.templates import fold_arguments +from numba.core.typing.typeof import Purpose, typeof +from numba.core.bytecode import get_code_object +from numba.core.caching import NullCache, FunctionCache +from numba.core import entrypoints +from numba.core.retarget import BaseRetarget +import numba.core.event as ev + + +class _RetargetStack(utils.ThreadLocalStack, stack_name="retarget"): + def push(self, state): + super().push(state) + _dispatcher.set_use_tls_target_stack(len(self) > 0) + + def pop(self): + super().pop() + _dispatcher.set_use_tls_target_stack(len(self) > 0) + + +class TargetConfigurationStack: + """The target configuration stack. + + Uses the BORG pattern and stores states in threadlocal storage. + + WARNING: features associated with this class are experimental. The API + may change without notice. + """ + + def __init__(self): + self._stack = _RetargetStack() + + def get(self): + """Get the current target from the top of the stack. + + May raise IndexError if the stack is empty. Users should check the size + of the stack beforehand. + """ + return self._stack.top() + + def __len__(self): + """Size of the stack + """ + return len(self._stack) + + @classmethod + def switch_target(cls, retarget: BaseRetarget): + """Returns a contextmanager that pushes a new retarget handler, + an instance of `numba.core.retarget.BaseRetarget`, onto the + target-config stack for the duration of the context-manager. + """ + return cls()._stack.enter(retarget) + + +class OmittedArg(object): + """ + A placeholder for omitted arguments with a default value. + """ + + def __init__(self, value): + self.value = value + + def __repr__(self): + return "omitted arg(%r)" % (self.value,) + + @property + def _numba_type_(self): + return types.Omitted(self.value) + + +class _FunctionCompiler(object): + def __init__(self, py_func, targetdescr, targetoptions, locals, + pipeline_class): + self.py_func = py_func + self.targetdescr = targetdescr + self.targetoptions = targetoptions + self.locals = locals + self.pysig = utils.pysignature(self.py_func) + self.pipeline_class = pipeline_class + # Remember key=(args, return_type) combinations that will fail + # compilation to avoid compilation attempt on them. The values are + # the exceptions. + self._failed_cache = {} + + def fold_argument_types(self, args, kws): + """ + Given positional and named argument types, fold keyword arguments + and resolve defaults by inserting types.Omitted() instances. + + A (pysig, argument types) tuple is returned. + """ + def normal_handler(index, param, value): + return value + + def default_handler(index, param, default): + return types.Omitted(default) + + def stararg_handler(index, param, values): + return types.StarArgTuple(values) + # For now, we take argument values from the @jit function, even + # in the case of generated jit. + args = fold_arguments(self.pysig, args, kws, + normal_handler, + default_handler, + stararg_handler) + return self.pysig, args + + def compile(self, args, return_type): + status, retval = self._compile_cached(args, return_type) + if status: + return retval + else: + raise retval + + def _compile_cached(self, args, return_type): + key = tuple(args), return_type + try: + return False, self._failed_cache[key] + except KeyError: + pass + + try: + retval = self._compile_core(args, return_type) + except errors.TypingError as e: + self._failed_cache[key] = e + return False, e + else: + return True, retval + + def _compile_core(self, args, return_type): + flags = compiler.Flags() + self.targetdescr.options.parse_as_flags(flags, self.targetoptions) + flags = self._customize_flags(flags) + + impl = self._get_implementation(args, {}) + cres = compiler.compile_extra(self.targetdescr.typing_context, + self.targetdescr.target_context, + impl, + args=args, return_type=return_type, + flags=flags, locals=self.locals, + pipeline_class=self.pipeline_class) + # Check typing error if object mode is used + if cres.typing_error is not None and not flags.enable_pyobject: + raise cres.typing_error + return cres + + def get_globals_for_reduction(self): + return serialize._get_function_globals_for_reduction(self.py_func) + + def _get_implementation(self, args, kws): + return self.py_func + + def _customize_flags(self, flags): + return flags + + +class _GeneratedFunctionCompiler(_FunctionCompiler): + + def __init__(self, py_func, targetdescr, targetoptions, locals, + pipeline_class): + super(_GeneratedFunctionCompiler, self).__init__( + py_func, targetdescr, targetoptions, locals, pipeline_class) + self.impls = set() + + def get_globals_for_reduction(self): + # This will recursively get the globals used by any nested + # implementation function. + return serialize._get_function_globals_for_reduction(self.py_func) + + def _get_implementation(self, args, kws): + impl = self.py_func(*args, **kws) + # Check the generating function and implementation signatures are + # compatible, otherwise compiling would fail later. + pysig = utils.pysignature(self.py_func) + implsig = utils.pysignature(impl) + ok = len(pysig.parameters) == len(implsig.parameters) + if ok: + for pyparam, implparam in zip(pysig.parameters.values(), + implsig.parameters.values()): + # We allow the implementation to omit default values, but + # if it mentions them, they should have the same value... + if (pyparam.name != implparam.name or + pyparam.kind != implparam.kind or + (implparam.default is not implparam.empty and + implparam.default != pyparam.default)): + ok = False + if not ok: + raise TypeError("generated implementation %s should be compatible " + "with signature '%s', but has signature '%s'" + % (impl, pysig, implsig)) + self.impls.add(impl) + return impl + + +_CompileStats = collections.namedtuple( + '_CompileStats', ('cache_path', 'cache_hits', 'cache_misses')) + + +class CompilingCounter(object): + """ + A simple counter that increment in __enter__ and decrement in __exit__. + """ + + def __init__(self): + self.counter = 0 + + def __enter__(self): + assert self.counter >= 0 + self.counter += 1 + + def __exit__(self, *args, **kwargs): + self.counter -= 1 + assert self.counter >= 0 + + def __bool__(self): + return self.counter > 0 + + __nonzero__ = __bool__ + + +class _DispatcherBase(_dispatcher.Dispatcher): + """ + Common base class for dispatcher Implementations. + """ + + __numba__ = "py_func" + + def __init__(self, arg_count, py_func, pysig, can_fallback, + exact_match_required): + self._tm = default_type_manager + + # A mapping of signatures to compile results + self.overloads = collections.OrderedDict() + + self.py_func = py_func + # other parts of Numba assume the old Python 2 name for code object + self.func_code = get_code_object(py_func) + # but newer python uses a different name + self.__code__ = self.func_code + # a place to keep an active reference to the types of the active call + self._types_active_call = [] + # Default argument values match the py_func + self.__defaults__ = py_func.__defaults__ + + argnames = tuple(pysig.parameters) + default_values = self.py_func.__defaults__ or () + defargs = tuple(OmittedArg(val) for val in default_values) + try: + lastarg = list(pysig.parameters.values())[-1] + except IndexError: + has_stararg = False + else: + has_stararg = lastarg.kind == lastarg.VAR_POSITIONAL + _dispatcher.Dispatcher.__init__(self, self._tm.get_pointer(), + arg_count, self._fold_args, + argnames, defargs, + can_fallback, + has_stararg, + exact_match_required) + + self.doc = py_func.__doc__ + self._compiling_counter = CompilingCounter() + weakref.finalize(self, self._make_finalizer()) + + def _compilation_chain_init_hook(self): + """ + This will be called ahead of any part of compilation taking place (this + even includes being ahead of working out the types of the arguments). + This permits activities such as initialising extension entry points so + that the compiler knows about additional externally defined types etc + before it does anything. + """ + entrypoints.init_all() + + def _reset_overloads(self): + self._clear() + self.overloads.clear() + + def _make_finalizer(self): + """ + Return a finalizer function that will release references to + related compiled functions. + """ + overloads = self.overloads + targetctx = self.targetctx + + # Early-bind utils.shutting_down() into the function's local namespace + # (see issue #689) + def finalizer(shutting_down=utils.shutting_down): + # The finalizer may crash at shutdown, skip it (resources + # will be cleared by the process exiting, anyway). + if shutting_down(): + return + # This function must *not* hold any reference to self: + # we take care to bind the necessary objects in the closure. + for cres in overloads.values(): + try: + targetctx.remove_user_function(cres.entry_point) + except KeyError: + pass + + return finalizer + + @property + def signatures(self): + """ + Returns a list of compiled function signatures. + """ + return list(self.overloads) + + @property + def nopython_signatures(self): + return [cres.signature for cres in self.overloads.values() + if not cres.objectmode] + + def disable_compile(self, val=True): + """Disable the compilation of new signatures at call time. + """ + # If disabling compilation then there must be at least one signature + assert (not val) or len(self.signatures) > 0 + self._can_compile = not val + + def add_overload(self, cres): + args = tuple(cres.signature.args) + sig = [a._code for a in args] + self._insert(sig, cres.entry_point, cres.objectmode) + self.overloads[args] = cres + + def fold_argument_types(self, args, kws): + return self._compiler.fold_argument_types(args, kws) + + def get_call_template(self, args, kws): + """ + Get a typing.ConcreteTemplate for this dispatcher and the given + *args* and *kws* types. This allows to resolve the return type. + + A (template, pysig, args, kws) tuple is returned. + """ + # XXX how about a dispatcher template class automating the + # following? + + # Fold keyword arguments and resolve default values + pysig, args = self._compiler.fold_argument_types(args, kws) + kws = {} + # Ensure an overload is available + if self._can_compile: + self.compile(tuple(args)) + + # Create function type for typing + func_name = self.py_func.__name__ + name = "CallTemplate({0})".format(func_name) + # The `key` isn't really used except for diagnosis here, + # so avoid keeping a reference to `cfunc`. + call_template = typing.make_concrete_template( + name, key=func_name, signatures=self.nopython_signatures) + return call_template, pysig, args, kws + + def get_overload(self, sig): + """ + Return the compiled function for the given signature. + """ + args, return_type = sigutils.normalize_signature(sig) + return self.overloads[tuple(args)].entry_point + + @property + def is_compiling(self): + """ + Whether a specialization is currently being compiled. + """ + return self._compiling_counter + + def _compile_for_args(self, *args, **kws): + """ + For internal use. Compile a specialized version of the function + for the given *args* and *kws*, and return the resulting callable. + """ + assert not kws + # call any initialisation required for the compilation chain (e.g. + # extension point registration). + self._compilation_chain_init_hook() + + def error_rewrite(e, issue_type): + """ + Rewrite and raise Exception `e` with help supplied based on the + specified issue_type. + """ + if config.SHOW_HELP: + help_msg = errors.error_extras[issue_type] + e.patch_message('\n'.join((str(e).rstrip(), help_msg))) + if config.FULL_TRACEBACKS: + raise e + else: + raise e.with_traceback(None) + + argtypes = [] + for a in args: + if isinstance(a, OmittedArg): + argtypes.append(types.Omitted(a.value)) + else: + argtypes.append(self.typeof_pyval(a)) + + return_val = None + try: + return_val = self.compile(tuple(argtypes)) + except errors.ForceLiteralArg as e: + # Received request for compiler re-entry with the list of arguments + # indicated by e.requested_args. + # First, check if any of these args are already Literal-ized + already_lit_pos = [i for i in e.requested_args + if isinstance(args[i], types.Literal)] + if already_lit_pos: + # Abort compilation if any argument is already a Literal. + # Letting this continue will cause infinite compilation loop. + m = ("Repeated literal typing request.\n" + "{}.\n" + "This is likely caused by an error in typing. " + "Please see nested and suppressed exceptions.") + info = ', '.join('Arg #{} is {}'.format(i, args[i]) + for i in sorted(already_lit_pos)) + raise errors.CompilerError(m.format(info)) + # Convert requested arguments into a Literal. + args = [(types.literal + if i in e.requested_args + else lambda x: x)(args[i]) + for i, v in enumerate(args)] + # Re-enter compilation with the Literal-ized arguments + return_val = self._compile_for_args(*args) + + except errors.TypingError as e: + # Intercept typing error that may be due to an argument + # that failed inferencing as a Numba type + failed_args = [] + for i, arg in enumerate(args): + val = arg.value if isinstance(arg, OmittedArg) else arg + try: + tp = typeof(val, Purpose.argument) + except ValueError as typeof_exc: + failed_args.append((i, str(typeof_exc))) + else: + if tp is None: + failed_args.append( + (i, f"cannot determine Numba type of value {val}")) + if failed_args: + # Patch error message to ease debugging + args_str = "\n".join( + f"- argument {i}: {err}" for i, err in failed_args + ) + msg = (f"{str(e).rstrip()} \n\nThis error may have been caused " + f"by the following argument(s):\n{args_str}\n") + e.patch_message(msg) + + error_rewrite(e, 'typing') + except errors.UnsupportedError as e: + # Something unsupported is present in the user code, add help info + error_rewrite(e, 'unsupported_error') + except (errors.NotDefinedError, errors.RedefinedError, + errors.VerificationError) as e: + # These errors are probably from an issue with either the code + # supplied being syntactically or otherwise invalid + error_rewrite(e, 'interpreter') + except errors.ConstantInferenceError as e: + # this is from trying to infer something as constant when it isn't + # or isn't supported as a constant + error_rewrite(e, 'constant_inference') + except Exception as e: + if config.SHOW_HELP: + if hasattr(e, 'patch_message'): + help_msg = errors.error_extras['reportable'] + e.patch_message('\n'.join((str(e).rstrip(), help_msg))) + # ignore the FULL_TRACEBACKS config, this needs reporting! + raise e + finally: + self._types_active_call = [] + return return_val + + def inspect_llvm(self, signature=None): + """Get the LLVM intermediate representation generated by compilation. + + Parameters + ---------- + signature : tuple of numba types, optional + Specify a signature for which to obtain the LLVM IR. If None, the + IR is returned for all available signatures. + + Returns + ------- + llvm : dict[signature, str] or str + Either the LLVM IR string for the specified signature, or, if no + signature was given, a dictionary mapping signatures to LLVM IR + strings. + """ + if signature is not None: + lib = self.overloads[signature].library + return lib.get_llvm_str() + + return dict((sig, self.inspect_llvm(sig)) for sig in self.signatures) + + def inspect_asm(self, signature=None): + """Get the generated assembly code. + + Parameters + ---------- + signature : tuple of numba types, optional + Specify a signature for which to obtain the assembly code. If + None, the assembly code is returned for all available signatures. + + Returns + ------- + asm : dict[signature, str] or str + Either the assembly code for the specified signature, or, if no + signature was given, a dictionary mapping signatures to assembly + code. + """ + if signature is not None: + lib = self.overloads[signature].library + return lib.get_asm_str() + + return dict((sig, self.inspect_asm(sig)) for sig in self.signatures) + + def inspect_types(self, file=None, signature=None, + pretty=False, style='default', **kwargs): + """Print/return Numba intermediate representation (IR)-annotated code. + + Parameters + ---------- + file : file-like object, optional + File to which to print. Defaults to sys.stdout if None. Must be + None if ``pretty=True``. + signature : tuple of numba types, optional + Print/return the intermediate representation for only the given + signature. If None, the IR is printed for all available signatures. + pretty : bool, optional + If True, an Annotate object will be returned that can render the + IR with color highlighting in Jupyter and IPython. ``file`` must + be None if ``pretty`` is True. Additionally, the ``pygments`` + library must be installed for ``pretty=True``. + style : str, optional + Choose a style for rendering. Ignored if ``pretty`` is ``False``. + This is directly consumed by ``pygments`` formatters. To see a + list of available styles, import ``pygments`` and run + ``list(pygments.styles.get_all_styles())``. + + Returns + ------- + annotated : Annotate object, optional + Only returned if ``pretty=True``, otherwise this function is only + used for its printing side effect. If ``pretty=True``, an Annotate + object is returned that can render itself in Jupyter and IPython. + """ + overloads = self.overloads + if signature is not None: + overloads = {signature: self.overloads[signature]} + + if not pretty: + if file is None: + file = sys.stdout + + for ver, res in overloads.items(): + print("%s %s" % (self.py_func.__name__, ver), file=file) + print('-' * 80, file=file) + print(res.type_annotation, file=file) + print('=' * 80, file=file) + else: + if file is not None: + raise ValueError("`file` must be None if `pretty=True`") + from numba.core.annotations.pretty_annotate import Annotate + return Annotate(self, signature=signature, style=style) + + def inspect_cfg(self, signature=None, show_wrapper=None, **kwargs): + """ + For inspecting the CFG of the function. + + By default the CFG of the user function is shown. The *show_wrapper* + option can be set to "python" or "cfunc" to show the python wrapper + function or the *cfunc* wrapper function, respectively. + + Parameters accepted in kwargs + ----------------------------- + filename : string, optional + the name of the output file, if given this will write the output to + filename + view : bool, optional + whether to immediately view the optional output file + highlight : bool, set, dict, optional + what, if anything, to highlight, options are: + { incref : bool, # highlight NRT_incref calls + decref : bool, # highlight NRT_decref calls + returns : bool, # highlight exits which are normal returns + raises : bool, # highlight exits which are from raise + meminfo : bool, # highlight calls to NRT*meminfo + branches : bool, # highlight true/false branches + } + Default is True which sets all of the above to True. Supplying a set + of strings is also accepted, these are interpreted as key:True with + respect to the above dictionary. e.g. {'incref', 'decref'} would + switch on highlighting on increfs and decrefs. + interleave: bool, set, dict, optional + what, if anything, to interleave in the LLVM IR, options are: + { python: bool # interleave python source code with the LLVM IR + lineinfo: bool # interleave line information markers with the LLVM + # IR + } + Default is True which sets all of the above to True. Supplying a set + of strings is also accepted, these are interpreted as key:True with + respect to the above dictionary. e.g. {'python',} would + switch on interleaving of python source code in the LLVM IR. + strip_ir : bool, optional + Default is False. If set to True all LLVM IR that is superfluous to + that requested in kwarg `highlight` will be removed. + show_key : bool, optional + Default is True. Create a "key" for the highlighting in the rendered + CFG. + fontsize : int, optional + Default is 8. Set the fontsize in the output to this value. + """ + if signature is not None: + cres = self.overloads[signature] + lib = cres.library + if show_wrapper == 'python': + fname = cres.fndesc.llvm_cpython_wrapper_name + elif show_wrapper == 'cfunc': + fname = cres.fndesc.llvm_cfunc_wrapper_name + else: + fname = cres.fndesc.mangled_name + return lib.get_function_cfg(fname, py_func=self.py_func, **kwargs) + + return dict((sig, self.inspect_cfg(sig, show_wrapper=show_wrapper)) + for sig in self.signatures) + + def inspect_disasm_cfg(self, signature=None): + """ + For inspecting the CFG of the disassembly of the function. + + Requires python package: r2pipe + Requires radare2 binary on $PATH. + Notebook rendering requires python package: graphviz + + signature : tuple of Numba types, optional + Print/return the disassembly CFG for only the given signatures. + If None, the IR is printed for all available signatures. + """ + if signature is not None: + cres = self.overloads[signature] + lib = cres.library + return lib.get_disasm_cfg(cres.fndesc.mangled_name) + + return dict((sig, self.inspect_disasm_cfg(sig)) + for sig in self.signatures) + + def get_annotation_info(self, signature=None): + """ + Gets the annotation information for the function specified by + signature. If no signature is supplied a dictionary of signature to + annotation information is returned. + """ + signatures = self.signatures if signature is None else [signature] + out = collections.OrderedDict() + for sig in signatures: + cres = self.overloads[sig] + ta = cres.type_annotation + key = (ta.func_id.filename + ':' + str(ta.func_id.firstlineno + 1), + ta.signature) + out[key] = ta.annotate_raw()[key] + return out + + def _explain_ambiguous(self, *args, **kws): + """ + Callback for the C _Dispatcher object. + """ + assert not kws, "kwargs not handled" + args = tuple([self.typeof_pyval(a) for a in args]) + # The order here must be deterministic for testing purposes, which + # is ensured by the OrderedDict. + sigs = self.nopython_signatures + # This will raise + self.typingctx.resolve_overload(self.py_func, sigs, args, kws, + allow_ambiguous=False) + + def _explain_matching_error(self, *args, **kws): + """ + Callback for the C _Dispatcher object. + """ + assert not kws, "kwargs not handled" + args = [self.typeof_pyval(a) for a in args] + msg = ("No matching definition for argument type(s) %s" + % ', '.join(map(str, args))) + raise TypeError(msg) + + def _search_new_conversions(self, *args, **kws): + """ + Callback for the C _Dispatcher object. + Search for approximately matching signatures for the given arguments, + and ensure the corresponding conversions are registered in the C++ + type manager. + """ + assert not kws, "kwargs not handled" + args = [self.typeof_pyval(a) for a in args] + found = False + for sig in self.nopython_signatures: + conv = self.typingctx.install_possible_conversions(args, sig.args) + if conv: + found = True + return found + + def __repr__(self): + return "%s(%s)" % (type(self).__name__, self.py_func) + + def typeof_pyval(self, val): + """ + Resolve the Numba type of Python value *val*. + This is called from numba._dispatcher as a fallback if the native code + cannot decide the type. + """ + # Not going through the resolve_argument_type() indirection + # can save a couple µs. + try: + tp = typeof(val, Purpose.argument) + except ValueError: + tp = types.pyobject + else: + if tp is None: + tp = types.pyobject + self._types_active_call.append(tp) + return tp + + def _callback_add_timer(self, duration, cres, lock_name): + md = cres.metadata + # md can be None when code is loaded from cache + if md is not None: + timers = md.setdefault("timers", {}) + if lock_name not in timers: + # Only write if the metadata does not exist + timers[lock_name] = duration + else: + msg = f"'{lock_name} metadata is already defined." + raise AssertionError(msg) + + def _callback_add_compiler_timer(self, duration, cres): + return self._callback_add_timer(duration, cres, + lock_name="compiler_lock") + + def _callback_add_llvm_timer(self, duration, cres): + return self._callback_add_timer(duration, cres, + lock_name="llvm_lock") + + +class _MemoMixin: + __uuid = None + # A {uuid -> instance} mapping, for deserialization + _memo = weakref.WeakValueDictionary() + # hold refs to last N functions deserialized, retaining them in _memo + # regardless of whether there is another reference + _recent = collections.deque(maxlen=config.FUNCTION_CACHE_SIZE) + + @property + def _uuid(self): + """ + An instance-specific UUID, to avoid multiple deserializations of + a given instance. + + Note: this is lazily-generated, for performance reasons. + """ + u = self.__uuid + if u is None: + u = str(uuid.uuid4()) + self._set_uuid(u) + return u + + def _set_uuid(self, u): + assert self.__uuid is None + self.__uuid = u + self._memo[u] = self + self._recent.append(self) + + +class Dispatcher(serialize.ReduceMixin, _MemoMixin, _DispatcherBase): + """ + Implementation of user-facing dispatcher objects (i.e. created using + the @jit decorator). + This is an abstract base class. Subclasses should define the targetdescr + class attribute. + """ + _fold_args = True + _impl_kinds = { + 'direct': _FunctionCompiler, + 'generated': _GeneratedFunctionCompiler, + } + + __numba__ = 'py_func' + + def __init__(self, py_func, locals={}, targetoptions={}, + impl_kind='direct', pipeline_class=compiler.Compiler): + """ + Parameters + ---------- + py_func: function object to be compiled + locals: dict, optional + Mapping of local variable names to Numba types. Used to override + the types deduced by the type inference engine. + targetoptions: dict, optional + Target-specific config options. + impl_kind: str + Select the compiler mode for `@jit` and `@generated_jit` + pipeline_class: type numba.compiler.CompilerBase + The compiler pipeline type. + """ + self.typingctx = self.targetdescr.typing_context + self.targetctx = self.targetdescr.target_context + + pysig = utils.pysignature(py_func) + arg_count = len(pysig.parameters) + can_fallback = not targetoptions.get('nopython', False) + + _DispatcherBase.__init__(self, arg_count, py_func, pysig, can_fallback, + exact_match_required=False) + + functools.update_wrapper(self, py_func) + + self.targetoptions = targetoptions + self.locals = locals + self._cache = NullCache() + compiler_class = self._impl_kinds[impl_kind] + self._impl_kind = impl_kind + self._compiler = compiler_class(py_func, self.targetdescr, + targetoptions, locals, pipeline_class) + self._cache_hits = collections.Counter() + self._cache_misses = collections.Counter() + + self._type = types.Dispatcher(self) + self.typingctx.insert_global(self, self._type) + + # Remember target restriction + self._required_target_backend = targetoptions.get('target_backend') + + def dump(self, tab=''): + print(f'{tab}DUMP {type(self).__name__}[{self.py_func.__name__}' + f', type code={self._type._code}]') + for cres in self.overloads.values(): + cres.dump(tab=tab + ' ') + print(f'{tab}END DUMP {type(self).__name__}[{self.py_func.__name__}]') + + @property + def _numba_type_(self): + return types.Dispatcher(self) + + def enable_caching(self): + self._cache = FunctionCache(self.py_func) + + def __get__(self, obj, objtype=None): + '''Allow a JIT function to be bound as a method to an object''' + if obj is None: # Unbound method + return self + else: # Bound method + return pytypes.MethodType(self, obj) + + def _reduce_states(self): + """ + Reduce the instance for pickling. This will serialize + the original function as well the compilation options and + compiled signatures, but not the compiled code itself. + + NOTE: part of ReduceMixin protocol + """ + if self._can_compile: + sigs = [] + else: + sigs = [cr.signature for cr in self.overloads.values()] + + return dict( + uuid=str(self._uuid), + py_func=self.py_func, + locals=self.locals, + targetoptions=self.targetoptions, + impl_kind=self._impl_kind, + can_compile=self._can_compile, + sigs=sigs, + ) + + @classmethod + def _rebuild(cls, uuid, py_func, locals, targetoptions, impl_kind, + can_compile, sigs): + """ + Rebuild an Dispatcher instance after it was __reduce__'d. + + NOTE: part of ReduceMixin protocol + """ + try: + return cls._memo[uuid] + except KeyError: + pass + self = cls(py_func, locals, targetoptions, impl_kind) + # Make sure this deserialization will be merged with subsequent ones + self._set_uuid(uuid) + for sig in sigs: + self.compile(sig) + self._can_compile = can_compile + return self + + def compile(self, sig): + disp = self._get_dispatcher_for_current_target() + if disp is not self: + return disp.compile(sig) + + with ExitStack() as scope: + cres = None + + def cb_compiler(dur): + if cres is not None: + self._callback_add_compiler_timer(dur, cres) + + def cb_llvm(dur): + if cres is not None: + self._callback_add_llvm_timer(dur, cres) + + scope.enter_context(ev.install_timer("numba:compiler_lock", + cb_compiler)) + scope.enter_context(ev.install_timer("numba:llvm_lock", cb_llvm)) + scope.enter_context(global_compiler_lock) + + if not self._can_compile: + raise RuntimeError("compilation disabled") + # Use counter to track recursion compilation depth + with self._compiling_counter: + args, return_type = sigutils.normalize_signature(sig) + # Don't recompile if signature already exists + existing = self.overloads.get(tuple(args)) + if existing is not None: + return existing.entry_point + # Try to load from disk cache + cres = self._cache.load_overload(sig, self.targetctx) + if cres is not None: + self._cache_hits[sig] += 1 + # XXX fold this in add_overload()? (also see compiler.py) + if not cres.objectmode: + self.targetctx.insert_user_function(cres.entry_point, + cres.fndesc, + [cres.library]) + self.add_overload(cres) + return cres.entry_point + + self._cache_misses[sig] += 1 + ev_details = dict( + dispatcher=self, + args=args, + return_type=return_type, + ) + with ev.trigger_event("numba:compile", data=ev_details): + try: + cres = self._compiler.compile(args, return_type) + except errors.ForceLiteralArg as e: + def folded(args, kws): + return self._compiler.fold_argument_types(args, + kws)[1] + raise e.bind_fold_arguments(folded) + self.add_overload(cres) + self._cache.save_overload(sig, cres) + return cres.entry_point + + def get_compile_result(self, sig): + """Compile (if needed) and return the compilation result with the + given signature. + """ + atypes = tuple(sig.args) + if atypes not in self.overloads: + self.compile(atypes) + return self.overloads[atypes] + + def recompile(self): + """ + Recompile all signatures afresh. + """ + sigs = list(self.overloads) + old_can_compile = self._can_compile + # Ensure the old overloads are disposed of, + # including compiled functions. + self._make_finalizer()() + self._reset_overloads() + self._cache.flush() + self._can_compile = True + try: + for sig in sigs: + self.compile(sig) + finally: + self._can_compile = old_can_compile + + @property + def stats(self): + return _CompileStats( + cache_path=self._cache.cache_path, + cache_hits=self._cache_hits, + cache_misses=self._cache_misses, + ) + + def parallel_diagnostics(self, signature=None, level=1): + """ + Print parallel diagnostic information for the given signature. If no + signature is present it is printed for all known signatures. level is + used to adjust the verbosity, level=1 (default) is minimal verbosity, + and 2, 3, and 4 provide increasing levels of verbosity. + """ + def dump(sig): + ol = self.overloads[sig] + pfdiag = ol.metadata.get('parfor_diagnostics', None) + if pfdiag is None: + msg = "No parfors diagnostic available, is 'parallel=True' set?" + raise ValueError(msg) + pfdiag.dump(level) + if signature is not None: + dump(signature) + else: + [dump(sig) for sig in self.signatures] + + def get_metadata(self, signature=None): + """ + Obtain the compilation metadata for a given signature. + """ + if signature is not None: + return self.overloads[signature].metadata + else: + return dict( + (sig,self.overloads[sig].metadata) for sig in self.signatures + ) + + def get_function_type(self): + """Return unique function type of dispatcher when possible, otherwise + return None. + + A Dispatcher instance has unique function type when it + contains exactly one compilation result and its compilation + has been disabled (via its disable_compile method). + """ + if not self._can_compile and len(self.overloads) == 1: + cres = tuple(self.overloads.values())[0] + return types.FunctionType(cres.signature) + + def _get_retarget_dispatcher(self): + """Returns a dispatcher for the retarget request. + """ + # Check TLS target configuration + tc = TargetConfigurationStack() + retarget = tc.get() + retarget.check_compatible(self) + disp = retarget.retarget(self) + return disp + + def _get_dispatcher_for_current_target(self): + """Returns a dispatcher for the current target registered in + `TargetConfigurationStack`. `self` is returned if no target is + specified. + """ + tc = TargetConfigurationStack() + if tc: + return self._get_retarget_dispatcher() + else: + return self + + def _call_tls_target(self, *args, **kwargs): + """This is called when the C dispatcher logic sees a retarget request. + """ + disp = self._get_retarget_dispatcher() + # Call the new dispatcher + return disp(*args, **kwargs) + + +class LiftedCode(serialize.ReduceMixin, _MemoMixin, _DispatcherBase): + """ + Implementation of the hidden dispatcher objects used for lifted code + (a lifted loop is really compiled as a separate function). + """ + _fold_args = False + can_cache = False + + def __init__(self, func_ir, typingctx, targetctx, flags, locals): + self.func_ir = func_ir + self.lifted_from = None + + self.typingctx = typingctx + self.targetctx = targetctx + self.flags = flags + self.locals = locals + + _DispatcherBase.__init__(self, self.func_ir.arg_count, + self.func_ir.func_id.func, + self.func_ir.func_id.pysig, + can_fallback=True, + exact_match_required=False) + + def _reduce_states(self): + """ + Reduce the instance for pickling. This will serialize + the original function as well the compilation options and + compiled signatures, but not the compiled code itself. + + NOTE: part of ReduceMixin protocol + """ + return dict( + uuid=self._uuid, func_ir=self.func_ir, flags=self.flags, + locals=self.locals, extras=self._reduce_extras(), + ) + + def _reduce_extras(self): + """ + NOTE: sub-class can override to add extra states + """ + return {} + + @classmethod + def _rebuild(cls, uuid, func_ir, flags, locals, extras): + """ + Rebuild an Dispatcher instance after it was __reduce__'d. + + NOTE: part of ReduceMixin protocol + """ + try: + return cls._memo[uuid] + except KeyError: + pass + + # NOTE: We are assuming that this is must be cpu_target, which is true + # for now. + # TODO: refactor this to not assume on `cpu_target` + + from numba.core import registry + typingctx = registry.cpu_target.typing_context + targetctx = registry.cpu_target.target_context + + self = cls(func_ir, typingctx, targetctx, flags, locals, **extras) + self._set_uuid(uuid) + return self + + def get_source_location(self): + """Return the starting line number of the loop. + """ + return self.func_ir.loc.line + + def _pre_compile(self, args, return_type, flags): + """Pre-compile actions + """ + pass + + def compile(self, sig): + with ExitStack() as scope: + cres = None + + def cb_compiler(dur): + if cres is not None: + self._callback_add_compiler_timer(dur, cres) + + def cb_llvm(dur): + if cres is not None: + self._callback_add_llvm_timer(dur, cres) + + scope.enter_context(ev.install_timer("numba:compiler_lock", + cb_compiler)) + scope.enter_context(ev.install_timer("numba:llvm_lock", cb_llvm)) + scope.enter_context(global_compiler_lock) + + # Use counter to track recursion compilation depth + with self._compiling_counter: + # XXX this is mostly duplicated from Dispatcher. + flags = self.flags + args, return_type = sigutils.normalize_signature(sig) + + # Don't recompile if signature already exists + # (e.g. if another thread compiled it before we got the lock) + existing = self.overloads.get(tuple(args)) + if existing is not None: + return existing.entry_point + + self._pre_compile(args, return_type, flags) + + # Clone IR to avoid (some of the) mutation in the rewrite pass + cloned_func_ir = self.func_ir.copy() + + ev_details = dict( + dispatcher=self, + args=args, + return_type=return_type, + ) + with ev.trigger_event("numba:compile", data=ev_details): + cres = compiler.compile_ir(typingctx=self.typingctx, + targetctx=self.targetctx, + func_ir=cloned_func_ir, + args=args, + return_type=return_type, + flags=flags, locals=self.locals, + lifted=(), + lifted_from=self.lifted_from, + is_lifted_loop=True,) + + # Check typing error if object mode is used + if (cres.typing_error is not None and + not flags.enable_pyobject): + raise cres.typing_error + self.add_overload(cres) + return cres.entry_point + + def _get_dispatcher_for_current_target(self): + # Lifted code does not honor the target switch currently. + # No work has been done to check if this can be allowed. + return self + + +class LiftedLoop(LiftedCode): + def _pre_compile(self, args, return_type, flags): + assert not flags.enable_looplift, "Enable looplift flags is on" + + +class LiftedWith(LiftedCode): + + can_cache = True + + def _reduce_extras(self): + return dict(output_types=self.output_types) + + @property + def _numba_type_(self): + return types.Dispatcher(self) + + def get_call_template(self, args, kws): + """ + Get a typing.ConcreteTemplate for this dispatcher and the given + *args* and *kws* types. This enables the resolving of the return type. + + A (template, pysig, args, kws) tuple is returned. + """ + # Ensure an overload is available + if self._can_compile: + self.compile(tuple(args)) + + pysig = None + # Create function type for typing + func_name = self.py_func.__name__ + name = "CallTemplate({0})".format(func_name) + # The `key` isn't really used except for diagnosis here, + # so avoid keeping a reference to `cfunc`. + call_template = typing.make_concrete_template( + name, key=func_name, signatures=self.nopython_signatures) + return call_template, pysig, args, kws + + +class ObjModeLiftedWith(LiftedWith): + def __init__(self, *args, **kwargs): + self.output_types = kwargs.pop('output_types', None) + super(LiftedWith, self).__init__(*args, **kwargs) + if not self.flags.force_pyobject: + raise ValueError("expecting `flags.force_pyobject`") + if self.output_types is None: + raise TypeError('`output_types` must be provided') + # switch off rewrites, they have no effect + self.flags.no_rewrites = True + + @property + def _numba_type_(self): + return types.ObjModeDispatcher(self) + + def get_call_template(self, args, kws): + """ + Get a typing.ConcreteTemplate for this dispatcher and the given + *args* and *kws* types. This enables the resolving of the return type. + + A (template, pysig, args, kws) tuple is returned. + """ + assert not kws + self._legalize_arg_types(args) + # Coerce to object mode + args = [types.ffi_forced_object] * len(args) + + if self._can_compile: + self.compile(tuple(args)) + + signatures = [typing.signature(self.output_types, *args)] + pysig = None + func_name = self.py_func.__name__ + name = "CallTemplate({0})".format(func_name) + call_template = typing.make_concrete_template( + name, key=func_name, signatures=signatures) + + return call_template, pysig, args, kws + + def _legalize_arg_types(self, args): + for i, a in enumerate(args, start=1): + if isinstance(a, types.List): + msg = ( + 'Does not support list type inputs into ' + 'with-context for arg {}' + ) + raise errors.TypingError(msg.format(i)) + elif isinstance(a, types.Dispatcher): + msg = ( + 'Does not support function type inputs into ' + 'with-context for arg {}' + ) + raise errors.TypingError(msg.format(i)) + + @global_compiler_lock + def compile(self, sig): + args, _ = sigutils.normalize_signature(sig) + sig = (types.ffi_forced_object,) * len(args) + return super().compile(sig) + + +# Initialize typeof machinery +_dispatcher.typeof_init( + OmittedArg, + dict((str(t), t._code) for t in types.number_domain)) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/entrypoints.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/entrypoints.py new file mode 100644 index 000000000..ad42782df --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/entrypoints.py @@ -0,0 +1,58 @@ +import logging +import warnings + +from numba.core.config import PYVERSION + +if PYVERSION < (3, 9): + try: + import importlib_metadata + except ImportError as ex: + raise ImportError( + "importlib_metadata backport is required for Python version < 3.9, " + "try:\n" + "$ conda/pip install importlib_metadata" + ) from ex +else: + from importlib import metadata as importlib_metadata + + +_already_initialized = False +logger = logging.getLogger(__name__) + + +def init_all(): + """Execute all `numba_extensions` entry points with the name `init` + + If extensions have already been initialized, this function does nothing. + """ + global _already_initialized + if _already_initialized: + return + + # Must put this here to avoid extensions re-triggering initialization + _already_initialized = True + + def load_ep(entry_point): + """Loads a given entry point. Warns and logs on failure. + """ + logger.debug('Loading extension: %s', entry_point) + try: + func = entry_point.load() + func() + except Exception as e: + msg = (f"Numba extension module '{entry_point.module}' " + f"failed to load due to '{type(e).__name__}({str(e)})'.") + warnings.warn(msg, stacklevel=3) + logger.debug('Extension loading failed for: %s', entry_point) + + eps = importlib_metadata.entry_points() + # Split, Python 3.10+ and importlib_metadata 3.6+ have the "selectable" + # interface, versions prior to that do not. See "compatibility note" in: + # https://docs.python.org/3.10/library/importlib.metadata.html#entry-points + if hasattr(eps, 'select'): + for entry_point in eps.select(group="numba_extensions", name="init"): + load_ep(entry_point) + else: + for entry_point in eps.get("numba_extensions", ()): + if entry_point.name == "init": + load_ep(entry_point) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/environment.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/environment.py new file mode 100644 index 000000000..ffc5feb59 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/environment.py @@ -0,0 +1,64 @@ +import weakref +import importlib + +from numba import _dynfunc + + +class Environment(_dynfunc.Environment): + """Stores globals and constant pyobjects for runtime. + + It is often needed to convert b/w nopython objects and pyobjects. + """ + __slots__ = ('env_name', '__weakref__') + # A weak-value dictionary to store live environment with env_name as the + # key. + _memo = weakref.WeakValueDictionary() + + @classmethod + def from_fndesc(cls, fndesc): + try: + # Avoid creating new Env + return cls._memo[fndesc.env_name] + except KeyError: + inst = cls(fndesc.lookup_globals()) + inst.env_name = fndesc.env_name + cls._memo[fndesc.env_name] = inst + return inst + + def can_cache(self): + is_dyn = '__name__' not in self.globals + return not is_dyn + + def __reduce__(self): + return _rebuild_env, ( + self.globals.get('__name__'), + self.consts, + self.env_name, + ) + + def __del__(self): + return + + def __repr__(self): + return f"" + + +def _rebuild_env(modname, consts, env_name): + env = lookup_environment(env_name) + if env is not None: + return env + + mod = importlib.import_module(modname) + env = Environment(mod.__dict__) + env.consts[:] = consts + env.env_name = env_name + # Cache loaded object + Environment._memo[env_name] = env + return env + + +def lookup_environment(env_name): + """Returns the Environment object for the given name; + or None if not found + """ + return Environment._memo.get(env_name) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/errors.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/errors.py new file mode 100644 index 000000000..7c3a1ac96 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/errors.py @@ -0,0 +1,848 @@ +""" +Numba-specific errors and warnings. +""" + + +import abc +import contextlib +import os +import sys +import warnings +import numba.core.config +import numpy as np +from collections import defaultdict +from numba.core.utils import (chain_exception, use_old_style_errors, + use_new_style_errors) +from functools import wraps +from abc import abstractmethod + +# Filled at the end +__all__ = [] + + +class NumbaWarning(Warning): + """ + Base category for all Numba compiler warnings. + """ + + def __init__(self, msg, loc=None, highlighting=True, ): + self.msg = msg + self.loc = loc + if highlighting: + highlight = termcolor().errmsg + else: + def highlight(x): + return x + if loc: + super(NumbaWarning, self).__init__( + highlight("%s\n%s\n" % (msg, loc.strformat()))) + else: + super(NumbaWarning, self).__init__(highlight("%s" % (msg,))) + + +class NumbaPerformanceWarning(NumbaWarning): + """ + Warning category for when an operation might not be + as fast as expected. + """ + + +class NumbaDeprecationWarning(NumbaWarning): + """ + Warning category for use of a deprecated feature. + """ + + +class NumbaPendingDeprecationWarning(NumbaWarning): + """ + Warning category for use of a feature that is pending deprecation. + """ + + +class NumbaParallelSafetyWarning(NumbaWarning): + """ + Warning category for when an operation in a prange + might not have parallel semantics. + """ + + +class NumbaTypeSafetyWarning(NumbaWarning): + """ + Warning category for unsafe casting operations. + """ + + +class NumbaExperimentalFeatureWarning(NumbaWarning): + """ + Warning category for using an experimental feature. + """ + + +class NumbaInvalidConfigWarning(NumbaWarning): + """ + Warning category for using an invalid configuration. + """ + + +class NumbaPedanticWarning(NumbaWarning): + """ + Warning category for reporting pedantic messages. + """ + def __init__(self, msg, **kwargs): + super().__init__(f"{msg}\n{pedantic_warning_info}") + + +class NumbaIRAssumptionWarning(NumbaPedanticWarning): + """ + Warning category for reporting an IR assumption violation. + """ + + +class NumbaDebugInfoWarning(NumbaWarning): + """ + Warning category for an issue with the emission of debug information. + """ + +# These are needed in the color formatting of errors setup + + +class _ColorScheme(metaclass=abc.ABCMeta): + + @abstractmethod + def code(self, msg): + pass + + @abstractmethod + def errmsg(self, msg): + pass + + @abstractmethod + def filename(self, msg): + pass + + @abstractmethod + def indicate(self, msg): + pass + + @abstractmethod + def highlight(self, msg): + pass + + @abstractmethod + def reset(self, msg): + pass + + +class _DummyColorScheme(_ColorScheme): + + def __init__(self, theme=None): + pass + + def code(self, msg): + pass + + def errmsg(self, msg): + pass + + def filename(self, msg): + pass + + def indicate(self, msg): + pass + + def highlight(self, msg): + pass + + def reset(self, msg): + pass + + +# holds reference to the instance of the terminal color scheme in use +_termcolor_inst = None + +try: + import colorama + + # If the colorama version is < 0.3.9 it can break stdout/stderr in some + # situations, as a result if this condition is met colorama is disabled and + # the user is warned. Note that early versions did not have a __version__. + colorama_version = getattr(colorama, '__version__', '0.0.0') + + if tuple([int(x) for x in colorama_version.split('.')]) < (0, 3, 9): + msg = ("Insufficiently recent colorama version found. " + "Numba requires colorama >= 0.3.9") + # warn the user + warnings.warn(msg) + # trip the exception to disable color errors + raise ImportError + + # If Numba is running in testsuite mode then do not use error message + # coloring so CI system output is consistently readable without having + # to read between shell escape characters. + if os.environ.get('NUMBA_DISABLE_ERROR_MESSAGE_HIGHLIGHTING', None): + raise ImportError # just to trigger the exception handler below + +except ImportError: + + class NOPColorScheme(_DummyColorScheme): + def __init__(self, theme=None): + if theme is not None: + raise ValueError("specifying a theme has no effect") + _DummyColorScheme.__init__(self, theme=theme) + + def code(self, msg): + return msg + + def errmsg(self, msg): + return msg + + def filename(self, msg): + return msg + + def indicate(self, msg): + return msg + + def highlight(self, msg): + return msg + + def reset(self, msg): + return msg + + def termcolor(): + global _termcolor_inst + if _termcolor_inst is None: + _termcolor_inst = NOPColorScheme() + return _termcolor_inst + +else: + + from colorama import init, reinit, deinit, Fore, Style + + class ColorShell(object): + _has_initialized = False + + def __init__(self): + init() + self._has_initialized = True + + def __enter__(self): + if self._has_initialized: + reinit() + + def __exit__(self, *exc_detail): + Style.RESET_ALL + deinit() + + class reset_terminal(object): + def __init__(self): + self._buf = bytearray(b'') + + def __enter__(self): + return self._buf + + def __exit__(self, *exc_detail): + self._buf += bytearray(Style.RESET_ALL.encode('utf-8')) + + # define some default themes, if more are added, update the envvars docs! + themes = {} + + # No color added, just bold weighting + themes['no_color'] = {'code': None, + 'errmsg': None, + 'filename': None, + 'indicate': None, + 'highlight': None, + 'reset': None, } + + # suitable for terminals with a dark background + themes['dark_bg'] = {'code': Fore.BLUE, + 'errmsg': Fore.YELLOW, + 'filename': Fore.WHITE, + 'indicate': Fore.GREEN, + 'highlight': Fore.RED, + 'reset': Style.RESET_ALL, } + + # suitable for terminals with a light background + themes['light_bg'] = {'code': Fore.BLUE, + 'errmsg': Fore.BLACK, + 'filename': Fore.MAGENTA, + 'indicate': Fore.BLACK, + 'highlight': Fore.RED, + 'reset': Style.RESET_ALL, } + + # suitable for terminals with a blue background + themes['blue_bg'] = {'code': Fore.WHITE, + 'errmsg': Fore.YELLOW, + 'filename': Fore.MAGENTA, + 'indicate': Fore.CYAN, + 'highlight': Fore.RED, + 'reset': Style.RESET_ALL, } + + # suitable for use in jupyter notebooks + themes['jupyter_nb'] = {'code': Fore.BLACK, + 'errmsg': Fore.BLACK, + 'filename': Fore.GREEN, + 'indicate': Fore.CYAN, + 'highlight': Fore.RED, + 'reset': Style.RESET_ALL, } + + default_theme = themes['no_color'] + + class HighlightColorScheme(_DummyColorScheme): + def __init__(self, theme=default_theme): + self._code = theme['code'] + self._errmsg = theme['errmsg'] + self._filename = theme['filename'] + self._indicate = theme['indicate'] + self._highlight = theme['highlight'] + self._reset = theme['reset'] + _DummyColorScheme.__init__(self, theme=theme) + + def _markup(self, msg, color=None, style=Style.BRIGHT): + features = '' + if color: + features += color + if style: + features += style + with ColorShell(): + with reset_terminal() as mu: + mu += features.encode('utf-8') + mu += (msg).encode('utf-8') + return mu.decode('utf-8') + + def code(self, msg): + return self._markup(msg, self._code) + + def errmsg(self, msg): + return self._markup(msg, self._errmsg) + + def filename(self, msg): + return self._markup(msg, self._filename) + + def indicate(self, msg): + return self._markup(msg, self._indicate) + + def highlight(self, msg): + return self._markup(msg, self._highlight) + + def reset(self, msg): + return self._markup(msg, self._reset) + + def termcolor(): + global _termcolor_inst + if _termcolor_inst is None: + scheme = themes[numba.core.config.COLOR_SCHEME] + _termcolor_inst = HighlightColorScheme(scheme) + return _termcolor_inst + + +pedantic_warning_info = """ +This warning came from an internal pedantic check. Please report the warning +message and traceback, along with a minimal reproducer at: +https://github.com/numba/numba/issues/new?template=bug_report.md +""" + +feedback_details = """ +Please report the error message and traceback, along with a minimal reproducer +at: https://github.com/numba/numba/issues/new?template=bug_report.md + +If more help is needed please feel free to speak to the Numba core developers +directly at: https://gitter.im/numba/numba + +Thanks in advance for your help in improving Numba! +""" + +unsupported_error_info = """ +Unsupported functionality was found in the code Numba was trying to compile. + +If this functionality is important to you please file a feature request at: +https://github.com/numba/numba/issues/new?template=feature_request.md +""" + +interpreter_error_info = """ +Unsupported Python functionality was found in the code Numba was trying to +compile. This error could be due to invalid code, does the code work +without Numba? (To temporarily disable Numba JIT, set the `NUMBA_DISABLE_JIT` +environment variable to non-zero, and then rerun the code). + +If the code is valid and the unsupported functionality is important to you +please file a feature request at: +https://github.com/numba/numba/issues/new?template=feature_request.md + +To see Python/NumPy features supported by the latest release of Numba visit: +https://numba.readthedocs.io/en/stable/reference/pysupported.html +and +https://numba.readthedocs.io/en/stable/reference/numpysupported.html +""" + +constant_inference_info = """ +Numba could not make a constant out of something that it decided should be +a constant. This could well be a current limitation in Numba's internals, +however please first check that your code is valid for compilation, +particularly with respect to string interpolation (not supported!) and +the requirement of compile time constants as arguments to exceptions: +https://numba.readthedocs.io/en/stable/reference/pysupported.html?highlight=exceptions#constructs + +If the code is valid and the unsupported functionality is important to you +please file a feature request at: +https://github.com/numba/numba/issues/new?template=feature_request.md + +If you think your code should work with Numba. %s +""" % feedback_details + +typing_error_info = """ +This is not usually a problem with Numba itself but instead often caused by +the use of unsupported features or an issue in resolving types. + +To see Python/NumPy features supported by the latest release of Numba visit: +https://numba.readthedocs.io/en/stable/reference/pysupported.html +and +https://numba.readthedocs.io/en/stable/reference/numpysupported.html + +For more information about typing errors and how to debug them visit: +https://numba.readthedocs.io/en/stable/user/troubleshoot.html#my-code-doesn-t-compile + +If you think your code should work with Numba, please report the error message +and traceback, along with a minimal reproducer at: +https://github.com/numba/numba/issues/new?template=bug_report.md +""" + +reportable_issue_info = """ +------------------------------------------------------------------------------- +This should not have happened, a problem has occurred in Numba's internals. +You are currently using Numba version %s. +%s +""" % (numba.__version__, feedback_details) + +error_extras = dict() +error_extras['unsupported_error'] = unsupported_error_info +error_extras['typing'] = typing_error_info +error_extras['reportable'] = reportable_issue_info +error_extras['interpreter'] = interpreter_error_info +error_extras['constant_inference'] = constant_inference_info + + +def deprecated(arg): + """Define a deprecation decorator. + An optional string should refer to the new API to be used instead. + + Example: + @deprecated + def old_func(): ... + + @deprecated('new_func') + def old_func(): ...""" + + subst = arg if isinstance(arg, str) else None + + def decorator(func): + def wrapper(*args, **kwargs): + msg = "Call to deprecated function \"{}\"." + if subst: + msg += "\n Use \"{}\" instead." + warnings.warn(msg.format(func.__name__, subst), + category=DeprecationWarning, stacklevel=2) + return func(*args, **kwargs) + + return wraps(func)(wrapper) + + if not subst: + return decorator(arg) + else: + return decorator + + +class WarningsFixer(object): + """ + An object "fixing" warnings of a given category caught during + certain phases. The warnings can have their filename and lineno fixed, + and they are deduplicated as well. + """ + + def __init__(self, category): + self._category = category + # {(filename, lineno, category) -> messages} + self._warnings = defaultdict(set) + + @contextlib.contextmanager + def catch_warnings(self, filename=None, lineno=None): + """ + Store warnings and optionally fix their filename and lineno. + """ + with warnings.catch_warnings(record=True) as wlist: + warnings.simplefilter('always', self._category) + yield + + for w in wlist: + msg = str(w.message) + if issubclass(w.category, self._category): + # Store warnings of this category for deduplication + filename = filename or w.filename + lineno = lineno or w.lineno + self._warnings[filename, lineno, w.category].add(msg) + else: + # Simply emit other warnings again + warnings.warn_explicit(msg, w.category, + w.filename, w.lineno) + + def flush(self): + """ + Emit all stored warnings. + """ + def key(arg): + # It is possible through codegen to create entirely identical + # warnings, this leads to comparing types when sorting which breaks + # on Python 3. Key as str() and if the worse happens then `id` + # creates some uniqueness + return str(arg) + str(id(arg)) + + for (filename, lineno, category), messages in sorted( + self._warnings.items(), key=key): + for msg in sorted(messages): + warnings.warn_explicit(msg, category, filename, lineno) + self._warnings.clear() + + +class NumbaError(Exception): + + def __init__(self, msg, loc=None, highlighting=True): + self.msg = msg + self.loc = loc + if highlighting: + highlight = termcolor().errmsg + else: + def highlight(x): + return x + + if loc: + new_msg = "%s\n%s\n" % (msg, loc.strformat()) + else: + new_msg = "%s" % (msg,) + super(NumbaError, self).__init__(highlight(new_msg)) + + @property + def contexts(self): + try: + return self._contexts + except AttributeError: + self._contexts = lst = [] + return lst + + def add_context(self, msg): + """ + Add contextual info. The exception message is expanded with the new + contextual information. + """ + self.contexts.append(msg) + f = termcolor().errmsg('{0}\n') + termcolor().filename('During: {1}') + newmsg = f.format(self, msg) + self.args = (newmsg,) + return self + + def patch_message(self, new_message): + """ + Change the error message to the given new message. + """ + self.args = (new_message,) + self.args[1:] + + +class UnsupportedError(NumbaError): + """ + Numba does not have an implementation for this functionality. + """ + pass + + +class UnsupportedRewriteError(UnsupportedError): + """UnsupportedError from rewrite passes + """ + pass + + +class IRError(NumbaError): + """ + An error occurred during Numba IR generation. + """ + pass + + +class RedefinedError(IRError): + """ + An error occurred during interpretation of IR due to variable redefinition. + """ + pass + + +class NotDefinedError(IRError): + """ + An undefined variable is encountered during interpretation of IR. + """ + + def __init__(self, name, loc=None): + self.name = name + msg = ("The compiler failed to analyze the bytecode. " + "Variable '%s' is not defined." % name) + super(NotDefinedError, self).__init__(msg, loc=loc) + + +class VerificationError(IRError): + """ + An error occurred during IR verification. Once Numba's internal + representation (IR) is constructed it is then verified to ensure that + terminators are both present and in the correct places within the IR. If + it is the case that this condition is not met, a VerificationError is + raised. + """ + pass + + +class DeprecationError(NumbaError): + """ + Functionality is deprecated. + """ + pass + + +class LoweringError(NumbaError): + """ + An error occurred during lowering. + """ + + def __init__(self, msg, loc=None): + super(LoweringError, self).__init__(msg, loc=loc) + + +class UnsupportedParforsError(NumbaError): + """ + An error occurred because parfors is not supported on the platform. + """ + pass + + +class ForbiddenConstruct(LoweringError): + """ + A forbidden Python construct was encountered (e.g. use of locals()). + """ + pass + + +class TypingError(NumbaError): + """ + A type inference failure. + """ + pass + + +class UntypedAttributeError(TypingError): + def __init__(self, value, attr, loc=None): + module = getattr(value, 'pymod', None) + if module is not None and module == np: + # unsupported numpy feature. + msg = ("Use of unsupported NumPy function 'numpy.%s' " + "or unsupported use of the function.") % attr + else: + msg = "Unknown attribute '{attr}' of type {type}" + msg = msg.format(type=value, attr=attr) + super(UntypedAttributeError, self).__init__(msg, loc=loc) + + +class ByteCodeSupportError(NumbaError): + """ + Failure to extract the bytecode of the user's function. + """ + + def __init__(self, msg, loc=None): + super(ByteCodeSupportError, self).__init__(msg, loc=loc) + + +class CompilerError(NumbaError): + """ + Some high-level error in the compiler. + """ + pass + + +class ConstantInferenceError(NumbaError): + """ + Failure during constant inference. + """ + + def __init__(self, value, loc=None): + super(ConstantInferenceError, self).__init__(value, loc=loc) + + +class InternalError(NumbaError): + """ + For wrapping internal error occurred within the compiler + """ + + def __init__(self, exception): + super(InternalError, self).__init__(str(exception)) + self.old_exception = exception + + +class InternalTargetMismatchError(InternalError): + """For signalling a target mismatch error occurred internally within the + compiler. + """ + def __init__(self, kind, target_hw, hw_clazz): + msg = (f"{kind.title()} being resolved on a target from which it does " + f"not inherit. Local target is {target_hw}, declared " + f"target class is {hw_clazz}.") + super().__init__(msg) + + +class RequireLiteralValue(TypingError): + """ + For signalling that a function's typing requires a constant value for + some of its arguments. + """ + pass + + +class ForceLiteralArg(NumbaError): + """A Pseudo-exception to signal the dispatcher to type an argument literally + + Attributes + ---------- + requested_args : frozenset[int] + requested positions of the arguments. + """ + def __init__(self, arg_indices, fold_arguments=None, loc=None): + """ + Parameters + ---------- + arg_indices : Sequence[int] + requested positions of the arguments. + fold_arguments: callable + A function ``(tuple, dict) -> tuple`` that binds and flattens + the ``args`` and ``kwargs``. + loc : numba.ir.Loc or None + """ + super(ForceLiteralArg, self).__init__( + "Pseudo-exception to force literal arguments in the dispatcher", + loc=loc, + ) + self.requested_args = frozenset(arg_indices) + self.fold_arguments = fold_arguments + + def bind_fold_arguments(self, fold_arguments): + """Bind the fold_arguments function + """ + e = ForceLiteralArg(self.requested_args, fold_arguments, + loc=self.loc) + return chain_exception(e, self) + + def combine(self, other): + """Returns a new instance by or'ing the requested_args. + """ + if not isinstance(other, ForceLiteralArg): + m = '*other* must be a {} but got a {} instead' + raise TypeError(m.format(ForceLiteralArg, type(other))) + return ForceLiteralArg(self.requested_args | other.requested_args) + + def __or__(self, other): + """Same as self.combine(other) + """ + return self.combine(other) + + +class LiteralTypingError(TypingError): + """ + Failure in typing a Literal type + """ + pass + + +# These Exception classes are just Numba copies of their Python equivalents for +# use internally in cases where we want e.g. type inference to keep on trying. +# Exceptions extending from NumbaError are considered "special" by Numba's +# internals and are treated differently to standard Python exceptions which are +# permitted to just propagate up the stack. + +class NumbaValueError(TypingError): + pass + + +class NumbaTypeError(TypingError): + pass + + +class NumbaAttributeError(TypingError): + pass + + +class NumbaAssertionError(TypingError): + pass + + +class NumbaNotImplementedError(TypingError): + pass + + +class NumbaKeyError(TypingError): + pass + + +class NumbaIndexError(TypingError): + pass + + +class NumbaRuntimeError(NumbaError): + pass + + +def _format_msg(fmt, args, kwargs): + return fmt.format(*args, **kwargs) + + +_numba_path = os.path.dirname(__file__) +loc_info = {} + + +@contextlib.contextmanager +def new_error_context(fmt_, *args, **kwargs): + """ + A contextmanager that prepend contextual information to any exception + raised within. If the exception type is not an instance of NumbaError, + it will be wrapped into a InternalError. The exception class can be + changed by providing a "errcls_" keyword argument with the exception + constructor. + + The first argument is a message that describes the context. It can be a + format string. If there are additional arguments, it will be used as + ``fmt_.format(*args, **kwargs)`` to produce the final message string. + """ + errcls = kwargs.pop('errcls_', InternalError) + + loc = kwargs.get('loc', None) + if loc is not None and not loc.filename.startswith(_numba_path): + loc_info.update(kwargs) + + try: + yield + except NumbaError as e: + e.add_context(_format_msg(fmt_, args, kwargs)) + raise + except AssertionError: + # Let assertion error pass through for shorter traceback in debugging + raise + except Exception as e: + if use_old_style_errors(): + newerr = errcls(e).add_context(_format_msg(fmt_, args, kwargs)) + if numba.core.config.FULL_TRACEBACKS: + tb = sys.exc_info()[2] + else: + tb = None + raise newerr.with_traceback(tb) + elif use_new_style_errors(): + raise e + else: + msg = ("Unknown CAPTURED_ERRORS style: " + f"'{numba.core.config.CAPTURED_ERRORS}'.") + assert 0, msg + + +__all__ += [name for (name, value) in globals().items() + if not name.startswith('_') and isinstance(value, type) + and issubclass(value, (Exception, Warning))] diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/event.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/event.py new file mode 100644 index 000000000..e2a1b0bde --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/event.py @@ -0,0 +1,491 @@ +""" +The ``numba.core.event`` module provides a simple event system for applications +to register callbacks to listen to specific compiler events. + +The following events are built in: + +- ``"numba:compile"`` is broadcast when a dispatcher is compiling. Events of + this kind have ``data`` defined to be a ``dict`` with the following + key-values: + + - ``"dispatcher"``: the dispatcher object that is compiling. + - ``"args"``: the argument types. + - ``"return_type"``: the return type. + +- ``"numba:compiler_lock"`` is broadcast when the internal compiler-lock is + acquired. This is mostly used internally to measure time spent with the lock + acquired. + +- ``"numba:llvm_lock"`` is broadcast when the internal LLVM-lock is acquired. + This is used internally to measure time spent with the lock acquired. + +- ``"numba:run_pass"`` is broadcast when a compiler pass is running. + + - ``"name"``: pass name. + - ``"qualname"``: qualified name of the function being compiled. + - ``"module"``: module name of the function being compiled. + - ``"flags"``: compilation flags. + - ``"args"``: argument types. + - ``"return_type"`` return type. + +Applications can register callbacks that are listening for specific events using +``register(kind: str, listener: Listener)``, where ``listener`` is an instance +of ``Listener`` that defines custom actions on occurrence of the specific event. +""" + +import os +import json +import atexit +import abc +import enum +import time +import threading +from timeit import default_timer as timer +from contextlib import contextmanager, ExitStack +from collections import defaultdict + +from numba.core import config + + +class EventStatus(enum.Enum): + """Status of an event. + """ + START = enum.auto() + END = enum.auto() + + +# Builtin event kinds. +_builtin_kinds = frozenset([ + "numba:compiler_lock", + "numba:compile", + "numba:llvm_lock", + "numba:run_pass", +]) + + +def _guard_kind(kind): + """Guard to ensure that an event kind is valid. + + All event kinds with a "numba:" prefix must be defined in the pre-defined + ``numba.core.event._builtin_kinds``. + Custom event kinds are allowed by not using the above prefix. + + Parameters + ---------- + kind : str + + Return + ------ + res : str + """ + if kind.startswith("numba:") and kind not in _builtin_kinds: + msg = (f"{kind} is not a valid event kind, " + "it starts with the reserved prefix 'numba:'") + raise ValueError(msg) + return kind + + +class Event: + """An event. + + Parameters + ---------- + kind : str + status : EventStatus + data : any; optional + Additional data for the event. + exc_details : 3-tuple; optional + Same 3-tuple for ``__exit__``. + """ + def __init__(self, kind, status, data=None, exc_details=None): + self._kind = _guard_kind(kind) + self._status = status + self._data = data + self._exc_details = (None + if exc_details is None or exc_details[0] is None + else exc_details) + + @property + def kind(self): + """Event kind + + Returns + ------- + res : str + """ + return self._kind + + @property + def status(self): + """Event status + + Returns + ------- + res : EventStatus + """ + return self._status + + @property + def data(self): + """Event data + + Returns + ------- + res : object + """ + return self._data + + @property + def is_start(self): + """Is it a *START* event? + + Returns + ------- + res : bool + """ + return self._status == EventStatus.START + + @property + def is_end(self): + """Is it an *END* event? + + Returns + ------- + res : bool + """ + return self._status == EventStatus.END + + @property + def is_failed(self): + """Is the event carrying an exception? + + This is used for *END* event. This method will never return ``True`` + in a *START* event. + + Returns + ------- + res : bool + """ + return self._exc_details is None + + def __str__(self): + data = (f"{type(self.data).__qualname__}" + if self.data is not None else "None") + return f"Event({self._kind}, {self._status}, data: {data})" + + __repr__ = __str__ + + +_registered = defaultdict(list) + + +def register(kind, listener): + """Register a listener for a given event kind. + + Parameters + ---------- + kind : str + listener : Listener + """ + assert isinstance(listener, Listener) + kind = _guard_kind(kind) + _registered[kind].append(listener) + + +def unregister(kind, listener): + """Unregister a listener for a given event kind. + + Parameters + ---------- + kind : str + listener : Listener + """ + assert isinstance(listener, Listener) + kind = _guard_kind(kind) + lst = _registered[kind] + lst.remove(listener) + + +def broadcast(event): + """Broadcast an event to all registered listeners. + + Parameters + ---------- + event : Event + """ + for listener in _registered[event.kind]: + listener.notify(event) + + +class Listener(abc.ABC): + """Base class for all event listeners. + """ + @abc.abstractmethod + def on_start(self, event): + """Called when there is a *START* event. + + Parameters + ---------- + event : Event + """ + pass + + @abc.abstractmethod + def on_end(self, event): + """Called when there is a *END* event. + + Parameters + ---------- + event : Event + """ + pass + + def notify(self, event): + """Notify this Listener with the given Event. + + Parameters + ---------- + event : Event + """ + if event.is_start: + self.on_start(event) + elif event.is_end: + self.on_end(event) + else: + raise AssertionError("unreachable") + + +class TimingListener(Listener): + """A listener that measures the total time spent between *START* and + *END* events during the time this listener is active. + """ + def __init__(self): + self._depth = 0 + + def on_start(self, event): + if self._depth == 0: + self._ts = timer() + self._depth += 1 + + def on_end(self, event): + self._depth -= 1 + if self._depth == 0: + last = getattr(self, "_duration", 0) + self._duration = (timer() - self._ts) + last + + @property + def done(self): + """Returns a ``bool`` indicating whether a measurement has been made. + + When this returns ``False``, the matching event has never fired. + If and only if this returns ``True``, ``.duration`` can be read without + error. + """ + return hasattr(self, "_duration") + + @property + def duration(self): + """Returns the measured duration. + + This may raise ``AttributeError``. Users can use ``.done`` to check + that a measurement has been made. + """ + return self._duration + + +class RecordingListener(Listener): + """A listener that records all events and stores them in the ``.buffer`` + attribute as a list of 2-tuple ``(float, Event)``, where the first element + is the time the event occurred as returned by ``time.time()`` and the second + element is the event. + """ + def __init__(self): + self.buffer = [] + + def on_start(self, event): + self.buffer.append((time.time(), event)) + + def on_end(self, event): + self.buffer.append((time.time(), event)) + + +@contextmanager +def install_listener(kind, listener): + """Install a listener for event "kind" temporarily within the duration of + the context. + + Returns + ------- + res : Listener + The *listener* provided. + + Examples + -------- + + >>> with install_listener("numba:compile", listener): + >>> some_code() # listener will be active here. + >>> other_code() # listener will be unregistered by this point. + + """ + register(kind, listener) + try: + yield listener + finally: + unregister(kind, listener) + + +@contextmanager +def install_timer(kind, callback): + """Install a TimingListener temporarily to measure the duration of + an event. + + If the context completes successfully, the *callback* function is executed. + The *callback* function is expected to take a float argument for the + duration in seconds. + + Returns + ------- + res : TimingListener + + Examples + -------- + + This is equivalent to: + + >>> with install_listener(kind, TimingListener()) as res: + >>> ... + """ + tl = TimingListener() + with install_listener(kind, tl): + yield tl + + if tl.done: + callback(tl.duration) + + +@contextmanager +def install_recorder(kind): + """Install a RecordingListener temporarily to record all events. + + Once the context is closed, users can use ``RecordingListener.buffer`` + to access the recorded events. + + Returns + ------- + res : RecordingListener + + Examples + -------- + + This is equivalent to: + + >>> with install_listener(kind, RecordingListener()) as res: + >>> ... + """ + rl = RecordingListener() + with install_listener(kind, rl): + yield rl + + +def start_event(kind, data=None): + """Trigger the start of an event of *kind* with *data*. + + Parameters + ---------- + kind : str + Event kind. + data : any; optional + Extra event data. + """ + evt = Event(kind=kind, status=EventStatus.START, data=data) + broadcast(evt) + + +def end_event(kind, data=None, exc_details=None): + """Trigger the end of an event of *kind*, *exc_details*. + + Parameters + ---------- + kind : str + Event kind. + data : any; optional + Extra event data. + exc_details : 3-tuple; optional + Same 3-tuple for ``__exit__``. Or, ``None`` if no error. + """ + evt = Event( + kind=kind, status=EventStatus.END, data=data, exc_details=exc_details, + ) + broadcast(evt) + + +@contextmanager +def trigger_event(kind, data=None): + """A context manager to trigger the start and end events of *kind* with + *data*. The start event is triggered when entering the context. + The end event is triggered when exiting the context. + + Parameters + ---------- + kind : str + Event kind. + data : any; optional + Extra event data. + """ + with ExitStack() as scope: + @scope.push + def on_exit(*exc_details): + end_event(kind, data=data, exc_details=exc_details) + + start_event(kind, data=data) + yield + + +def _get_native_ident(): + try: + return threading.get_native_ident() + except AttributeError: + # Fallback for python <3.8 + return threading.get_ident() + + +def _prepare_chrome_trace_data(listener: RecordingListener): + """Prepare events in `listener` for serializing as chrome trace data. + """ + # The spec for the trace event format can be found at: + # https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/edit # noqa + # This code only uses the JSON Array Format for simplicity. + pid = os.getpid() + tid = _get_native_ident() + evs = [] + for ts, rec in listener.buffer: + data = rec.data + cat = str(rec.kind) + ph = 'B' if rec.is_start else 'E' + name = data['name'] + args = data + ev = dict( + cat=cat, pid=pid, tid=tid, ts=ts, ph=ph, name=name, args=args, + ) + evs.append(ev) + return evs + + +def _setup_chrome_trace_exit_handler(): + """Setup a RecordingListener and an exit handler to write the captured events + to file. + """ + listener = RecordingListener() + register("numba:run_pass", listener) + filename = config.CHROME_TRACE + + @atexit.register + def _write_chrome_trace(): + # The following output file is not multi-process safe. + evs = _prepare_chrome_trace_data(listener) + with open(filename, "w") as out: + json.dump(evs, out) + + +if config.CHROME_TRACE: + _setup_chrome_trace_exit_handler() diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/extending.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/extending.py new file mode 100644 index 000000000..9d005fe74 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/extending.py @@ -0,0 +1,583 @@ +import os +import uuid +import weakref +import collections +import functools + +import numba +from numba.core import types, errors, utils, config + +# Exported symbols +from numba.core.typing.typeof import typeof_impl # noqa: F401 +from numba.core.typing.asnumbatype import as_numba_type # noqa: F401 +from numba.core.typing.templates import infer, infer_getattr # noqa: F401 +from numba.core.imputils import ( # noqa: F401 + lower_builtin, lower_getattr, lower_getattr_generic, # noqa: F401 + lower_setattr, lower_setattr_generic, lower_cast) # noqa: F401 +from numba.core.datamodel import models # noqa: F401 +from numba.core.datamodel import register_default as register_model # noqa: F401, E501 +from numba.core.pythonapi import box, unbox, reflect, NativeValue # noqa: F401 +from numba._helperlib import _import_cython_function # noqa: F401 +from numba.core.serialize import ReduceMixin + + +def type_callable(func): + """ + Decorate a function as implementing typing for the callable *func*. + *func* can be a callable object (probably a global) or a string + denoting a built-in operation (such 'getitem' or '__array_wrap__') + """ + from numba.core.typing.templates import (CallableTemplate, infer, + infer_global) + if not callable(func) and not isinstance(func, str): + raise TypeError("`func` should be a function or string") + try: + func_name = func.__name__ + except AttributeError: + func_name = str(func) + + def decorate(typing_func): + def generic(self): + return typing_func(self.context) + + name = "%s_CallableTemplate" % (func_name,) + bases = (CallableTemplate,) + class_dict = dict(key=func, generic=generic) + template = type(name, bases, class_dict) + infer(template) + if callable(func): + infer_global(func, types.Function(template)) + return typing_func + + return decorate + + +# By default, an *overload* does not have a cpython wrapper because it is not +# callable from python. +_overload_default_jit_options = {'no_cpython_wrapper': True} + + +def overload(func, jit_options={}, strict=True, inline='never', + prefer_literal=False, **kwargs): + """ + A decorator marking the decorated function as typing and implementing + *func* in nopython mode. + + The decorated function will have the same formal parameters as *func* + and be passed the Numba types of those parameters. It should return + a function implementing *func* for the given types. + + Here is an example implementing len() for tuple types:: + + @overload(len) + def tuple_len(seq): + if isinstance(seq, types.BaseTuple): + n = len(seq) + def len_impl(seq): + return n + return len_impl + + Compiler options can be passed as an dictionary using the **jit_options** + argument. + + Overloading strictness (that the typing and implementing signatures match) + is enforced by the **strict** keyword argument, it is recommended that this + is set to True (default). + + To handle a function that accepts imprecise types, an overload + definition can return 2-tuple of ``(signature, impl_function)``, where + the ``signature`` is a ``typing.Signature`` specifying the precise + signature to be used; and ``impl_function`` is the same implementation + function as in the simple case. + + If the kwarg inline determines whether the overload is inlined in the + calling function and can be one of three values: + * 'never' (default) - the overload is never inlined. + * 'always' - the overload is always inlined. + * a function that takes two arguments, both of which are instances of a + namedtuple with fields: + * func_ir + * typemap + * calltypes + * signature + The first argument holds the information from the caller, the second + holds the information from the callee. The function should return Truthy + to determine whether to inline, this essentially permitting custom + inlining rules (typical use might be cost models). + + The *prefer_literal* option allows users to control if literal types should + be tried first or last. The default (`False`) is to use non-literal types. + Implementations that can specialize based on literal values should set the + option to `True`. Note, this option maybe expanded in the near future to + allow for more control (e.g. disabling non-literal types). + + **kwargs prescribes additional arguments passed through to the overload + template. The only accepted key at present is 'target' which is a string + corresponding to the target that this overload should be bound against. + """ + from numba.core.typing.templates import make_overload_template, infer_global + + # set default options + opts = _overload_default_jit_options.copy() + opts.update(jit_options) # let user options override + + # TODO: abort now if the kwarg 'target' relates to an unregistered target, + # this requires sorting out the circular imports first. + + def decorate(overload_func): + template = make_overload_template(func, overload_func, opts, strict, + inline, prefer_literal, **kwargs) + infer(template) + if callable(func): + infer_global(func, types.Function(template)) + return overload_func + + return decorate + + +def register_jitable(*args, **kwargs): + """ + Register a regular python function that can be executed by the python + interpreter and can be compiled into a nopython function when referenced + by other jit'ed functions. Can be used as:: + + @register_jitable + def foo(x, y): + return x + y + + Or, with compiler options:: + + @register_jitable(_nrt=False) # disable runtime allocation + def foo(x, y): + return x + y + + """ + def wrap(fn): + # It is just a wrapper for @overload + inline = kwargs.pop('inline', 'never') + + @overload(fn, jit_options=kwargs, inline=inline, strict=False) + def ov_wrap(*args, **kwargs): + return fn + return fn + + if kwargs: + return wrap + else: + return wrap(*args) + + +def overload_attribute(typ, attr, **kwargs): + """ + A decorator marking the decorated function as typing and implementing + attribute *attr* for the given Numba type in nopython mode. + + *kwargs* are passed to the underlying `@overload` call. + + Here is an example implementing .nbytes for array types:: + + @overload_attribute(types.Array, 'nbytes') + def array_nbytes(arr): + def get(arr): + return arr.size * arr.itemsize + return get + """ + # TODO implement setters + from numba.core.typing.templates import make_overload_attribute_template + + def decorate(overload_func): + template = make_overload_attribute_template( + typ, attr, overload_func, + inline=kwargs.get('inline', 'never'), + ) + infer_getattr(template) + overload(overload_func, **kwargs)(overload_func) + return overload_func + + return decorate + + +def _overload_method_common(typ, attr, **kwargs): + """Common code for overload_method and overload_classmethod + """ + from numba.core.typing.templates import make_overload_method_template + + def decorate(overload_func): + copied_kwargs = kwargs.copy() # avoid mutating parent dict + template = make_overload_method_template( + typ, attr, overload_func, + inline=copied_kwargs.pop('inline', 'never'), + prefer_literal=copied_kwargs.pop('prefer_literal', False), + **copied_kwargs, + ) + infer_getattr(template) + overload(overload_func, **kwargs)(overload_func) + return overload_func + + return decorate + + +def overload_method(typ, attr, **kwargs): + """ + A decorator marking the decorated function as typing and implementing + method *attr* for the given Numba type in nopython mode. + + *kwargs* are passed to the underlying `@overload` call. + + Here is an example implementing .take() for array types:: + + @overload_method(types.Array, 'take') + def array_take(arr, indices): + if isinstance(indices, types.Array): + def take_impl(arr, indices): + n = indices.shape[0] + res = np.empty(n, arr.dtype) + for i in range(n): + res[i] = arr[indices[i]] + return res + return take_impl + """ + return _overload_method_common(typ, attr, **kwargs) + + +def overload_classmethod(typ, attr, **kwargs): + """ + A decorator marking the decorated function as typing and implementing + classmethod *attr* for the given Numba type in nopython mode. + + + Similar to ``overload_method``. + + + Here is an example implementing a classmethod on the Array type to call + ``np.arange()``:: + + @overload_classmethod(types.Array, "make") + def ov_make(cls, nitems): + def impl(cls, nitems): + return np.arange(nitems) + return impl + + The above code will allow the following to work in jit-compiled code:: + + @njit + def foo(n): + return types.Array.make(n) + """ + return _overload_method_common(types.TypeRef(typ), attr, **kwargs) + + +def make_attribute_wrapper(typeclass, struct_attr, python_attr): + """ + Make an automatic attribute wrapper exposing member named *struct_attr* + as a read-only attribute named *python_attr*. + The given *typeclass*'s model must be a StructModel subclass. + """ + from numba.core.typing.templates import AttributeTemplate + from numba.core.datamodel import default_manager + from numba.core.datamodel.models import StructModel + from numba.core.imputils import impl_ret_borrowed + from numba.core import cgutils + + if not isinstance(typeclass, type) or not issubclass(typeclass, types.Type): + raise TypeError("typeclass should be a Type subclass, got %s" + % (typeclass,)) + + def get_attr_fe_type(typ): + """ + Get the Numba type of member *struct_attr* in *typ*. + """ + model = default_manager.lookup(typ) + if not isinstance(model, StructModel): + raise TypeError("make_struct_attribute_wrapper() needs a type " + "with a StructModel, but got %s" % (model,)) + return model.get_member_fe_type(struct_attr) + + @infer_getattr + class StructAttribute(AttributeTemplate): + key = typeclass + + def generic_resolve(self, typ, attr): + if attr == python_attr: + return get_attr_fe_type(typ) + + @lower_getattr(typeclass, python_attr) + def struct_getattr_impl(context, builder, typ, val): + val = cgutils.create_struct_proxy(typ)(context, builder, value=val) + attrty = get_attr_fe_type(typ) + attrval = getattr(val, struct_attr) + return impl_ret_borrowed(context, builder, attrty, attrval) + + +class _Intrinsic(ReduceMixin): + """ + Dummy callable for intrinsic + """ + _memo = weakref.WeakValueDictionary() + # hold refs to last N functions deserialized, retaining them in _memo + # regardless of whether there is another reference + _recent = collections.deque(maxlen=config.FUNCTION_CACHE_SIZE) + + __uuid = None + + def __init__(self, name, defn, **kwargs): + self._ctor_kwargs = kwargs + self._name = name + self._defn = defn + functools.update_wrapper(self, defn) + + @property + def _uuid(self): + """ + An instance-specific UUID, to avoid multiple deserializations of + a given instance. + + Note this is lazily-generated, for performance reasons. + """ + u = self.__uuid + if u is None: + u = str(uuid.uuid1()) + self._set_uuid(u) + return u + + def _set_uuid(self, u): + assert self.__uuid is None + self.__uuid = u + self._memo[u] = self + self._recent.append(self) + + def _register(self): + # _ctor_kwargs + from numba.core.typing.templates import (make_intrinsic_template, + infer_global) + + template = make_intrinsic_template(self, self._defn, self._name, + self._ctor_kwargs) + infer(template) + infer_global(self, types.Function(template)) + + def __call__(self, *args, **kwargs): + """ + This is only defined to pretend to be a callable from CPython. + """ + msg = '{0} is not usable in pure-python'.format(self) + raise NotImplementedError(msg) + + def __repr__(self): + return "".format(self._name) + + def __deepcopy__(self, memo): + # NOTE: Intrinsic are immutable and we don't need to copy. + # This is triggered from deepcopy of statements. + return self + + def _reduce_states(self): + """ + NOTE: part of ReduceMixin protocol + """ + return dict(uuid=self._uuid, name=self._name, defn=self._defn) + + @classmethod + def _rebuild(cls, uuid, name, defn): + """ + NOTE: part of ReduceMixin protocol + """ + try: + return cls._memo[uuid] + except KeyError: + llc = cls(name=name, defn=defn) + llc._register() + llc._set_uuid(uuid) + return llc + + +def intrinsic(*args, **kwargs): + """ + A decorator marking the decorated function as typing and implementing + *func* in nopython mode using the llvmlite IRBuilder API. This is an escape + hatch for expert users to build custom LLVM IR that will be inlined to + the caller. + + The first argument to *func* is the typing context. The rest of the + arguments corresponds to the type of arguments of the decorated function. + These arguments are also used as the formal argument of the decorated + function. If *func* has the signature ``foo(typing_context, arg0, arg1)``, + the decorated function will have the signature ``foo(arg0, arg1)``. + + The return values of *func* should be a 2-tuple of expected type signature, + and a code-generation function that will passed to ``lower_builtin``. + For unsupported operation, return None. + + Here is an example implementing a ``cast_int_to_byte_ptr`` that cast + any integer to a byte pointer:: + + @intrinsic + def cast_int_to_byte_ptr(typingctx, src): + # check for accepted types + if isinstance(src, types.Integer): + # create the expected type signature + result_type = types.CPointer(types.uint8) + sig = result_type(types.uintp) + # defines the custom code generation + def codegen(context, builder, signature, args): + # llvm IRBuilder code here + [src] = args + rtype = signature.return_type + llrtype = context.get_value_type(rtype) + return builder.inttoptr(src, llrtype) + return sig, codegen + """ + # Make inner function for the actual work + def _intrinsic(func): + name = getattr(func, '__name__', str(func)) + llc = _Intrinsic(name, func, **kwargs) + llc._register() + return llc + + if not kwargs: + # No option is given + return _intrinsic(*args) + else: + # options are given, create a new callable to recv the + # definition function + def wrapper(func): + return _intrinsic(func) + return wrapper + + +def get_cython_function_address(module_name, function_name): + """ + Get the address of a Cython function. + + Args + ---- + module_name: + Name of the Cython module + function_name: + Name of the Cython function + + Returns + ------- + A Python int containing the address of the function + + """ + return _import_cython_function(module_name, function_name) + + +def include_path(): + """Returns the C include directory path. + """ + include_dir = os.path.dirname(os.path.dirname(numba.__file__)) + path = os.path.abspath(include_dir) + return path + + +def sentry_literal_args(pysig, literal_args, args, kwargs): + """Ensures that the given argument types (in *args* and *kwargs*) are + literally typed for a function with the python signature *pysig* and the + list of literal argument names in *literal_args*. + + Alternatively, this is the same as:: + + SentryLiteralArgs(literal_args).for_pysig(pysig).bind(*args, **kwargs) + """ + boundargs = pysig.bind(*args, **kwargs) + + # Find literal argument positions and whether it is satisfied. + request_pos = set() + missing = False + for i, (k, v) in enumerate(boundargs.arguments.items()): + if k in literal_args: + request_pos.add(i) + if not isinstance(v, types.Literal): + missing = True + if missing: + # Yes, there are missing required literal arguments + e = errors.ForceLiteralArg(request_pos) + + # A helper function to fold arguments + def folded(args, kwargs): + out = pysig.bind(*args, **kwargs).arguments.values() + return tuple(out) + + raise e.bind_fold_arguments(folded) + + +class SentryLiteralArgs(collections.namedtuple( + '_SentryLiteralArgs', ['literal_args'])): + """ + Parameters + ---------- + literal_args : Sequence[str] + A sequence of names for literal arguments + + Examples + -------- + + The following line: + + >>> SentryLiteralArgs(literal_args).for_pysig(pysig).bind(*args, **kwargs) + + is equivalent to: + + >>> sentry_literal_args(pysig, literal_args, args, kwargs) + """ + def for_function(self, func): + """Bind the sentry to the signature of *func*. + + Parameters + ---------- + func : Function + A python function. + + Returns + ------- + obj : BoundLiteralArgs + """ + return self.for_pysig(utils.pysignature(func)) + + def for_pysig(self, pysig): + """Bind the sentry to the given signature *pysig*. + + Parameters + ---------- + pysig : inspect.Signature + + + Returns + ------- + obj : BoundLiteralArgs + """ + return BoundLiteralArgs( + pysig=pysig, + literal_args=self.literal_args, + ) + + +class BoundLiteralArgs(collections.namedtuple( + 'BoundLiteralArgs', ['pysig', 'literal_args'])): + """ + This class is usually created by SentryLiteralArgs. + """ + def bind(self, *args, **kwargs): + """Bind to argument types. + """ + return sentry_literal_args( + self.pysig, + self.literal_args, + args, + kwargs, + ) + + +def is_jitted(function): + """Returns True if a function is wrapped by one of the Numba @jit + decorators, for example: numba.jit, numba.njit + + The purpose of this function is to provide a means to check if a function is + already JIT decorated. + """ + + # don't want to export this so import locally + from numba.core.dispatcher import Dispatcher + return isinstance(function, Dispatcher) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/externals.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/externals.py new file mode 100644 index 000000000..e181b5f43 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/externals.py @@ -0,0 +1,155 @@ +""" +Register external C functions necessary for Numba code generation. +""" + +import sys + +from llvmlite import ir +import llvmlite.binding as ll + +from numba.core import utils, intrinsics +from numba import _helperlib + + +def _add_missing_symbol(symbol, addr): + """Add missing symbol into LLVM internal symtab + """ + if not ll.address_of_symbol(symbol): + ll.add_symbol(symbol, addr) + + +def _get_msvcrt_symbol(symbol): + """ + Under Windows, look up a symbol inside the C runtime + and return the raw pointer value as an integer. + """ + from ctypes import cdll, cast, c_void_p + f = getattr(cdll.msvcrt, symbol) + return cast(f, c_void_p).value + + +def compile_multi3(context): + """ + Compile the multi3() helper function used by LLVM + for 128-bit multiplication on 32-bit platforms. + """ + codegen = context.codegen() + library = codegen.create_library("multi3") + + ir_mod = library.create_ir_module("multi3") + + i64 = ir.IntType(64) + i128 = ir.IntType(128) + lower_mask = ir.Constant(i64, 0xffffffff) + _32 = ir.Constant(i64, 32) + _64 = ir.Constant(i128, 64) + + fn_type = ir.FunctionType(i128, [i128, i128]) + fn = ir.Function(ir_mod, fn_type, name="multi3") + + a, b = fn.args + bb = fn.append_basic_block() + builder = ir.IRBuilder(bb) + + # This implementation mimics compiler-rt's. + al = builder.trunc(a, i64) + bl = builder.trunc(b, i64) + ah = builder.trunc(builder.ashr(a, _64), i64) + bh = builder.trunc(builder.ashr(b, _64), i64) + + # Compute {rh, rl} = al * bl (unsigned 64-bit multiplication) + # rl = (al & 0xffffffff) * (bl & 0xffffffff) + rl = builder.mul(builder.and_(al, lower_mask), builder.and_(bl, lower_mask)) + # t = rl >> 32 + t = builder.lshr(rl, _32) + # rl &= 0xffffffff + rl = builder.and_(rl, lower_mask) + # t += (al >> 32) * (bl & 0xffffffff) + t = builder.add(t, builder.mul(builder.lshr(al, _32), + builder.and_(bl, lower_mask))) + # rl += t << 32 + rl = builder.add(rl, builder.shl(t, _32)) + # rh = t >> 32 + rh = builder.lshr(t, _32) + # t = rl >> 32 + t = builder.lshr(rl, _32) + # rl &= 0xffffffff + rl = builder.and_(rl, lower_mask) + # t += (bl >> 32) * (al & 0xffffffff) + t = builder.add(t, builder.mul(builder.lshr(bl, _32), + builder.and_(al, lower_mask))) + # rl += t << 32 + rl = builder.add(rl, builder.shl(t, _32)) + # rh += t >> 32 + rh = builder.add(rh, builder.lshr(t, _32)) + # rh += (al >> 32) * (bl >> 32) + rh = builder.add(rh, builder.mul(builder.lshr(al, _32), + builder.lshr(bl, _32))) + + # rh += (bh * al) + (bl * ah) + rh = builder.add(rh, builder.mul(bh, al)) + rh = builder.add(rh, builder.mul(bl, ah)) + + # r = rl + (rh << 64) + r = builder.zext(rl, i128) + r = builder.add(r, builder.shl(builder.zext(rh, i128), _64)) + builder.ret(r) + + library.add_ir_module(ir_mod) + library.finalize() + + return library + + +class _Installer(object): + + _installed = False + + def install(self, context): + """ + Install the functions into LLVM. This only needs to be done once, + as the mappings are persistent during the process lifetime. + """ + if not self._installed: + self._do_install(context) + self._installed = True + + +class _ExternalMathFunctions(_Installer): + """ + Map the math functions from the C runtime library into the LLVM + execution environment. + """ + + def _do_install(self, context): + is32bit = utils.MACHINE_BITS == 32 + c_helpers = _helperlib.c_helpers + + if sys.platform.startswith('win32') and is32bit: + # For Windows XP _ftol2 is not defined, we will just use + # _ftol as a replacement. + # On Windows 7, this is not necessary but will work anyway. + ftol = _get_msvcrt_symbol("_ftol") + _add_missing_symbol("_ftol2", ftol) + + elif sys.platform.startswith('linux') and is32bit: + _add_missing_symbol("__fixunsdfdi", c_helpers["fptoui"]) + _add_missing_symbol("__fixunssfdi", c_helpers["fptouif"]) + + if is32bit: + # Make the library immortal + self._multi3_lib = compile_multi3(context) + ptr = self._multi3_lib.get_pointer_to_function("multi3") + assert ptr + _add_missing_symbol("__multi3", ptr) + + # List available C-math + for fname in intrinsics.INTR_MATH: + # Force binding from CPython's C runtime library. + # (under Windows, different versions of the C runtime can + # be loaded at the same time, for example msvcrt100 by + # CPython and msvcrt120 by LLVM) + ll.add_symbol(fname, c_helpers[fname]) + + +c_math_functions = _ExternalMathFunctions() diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/fastmathpass.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/fastmathpass.py new file mode 100644 index 000000000..d6dd1b89c --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/fastmathpass.py @@ -0,0 +1,44 @@ +from llvmlite import ir +from llvmlite.ir.transforms import Visitor, CallVisitor + + +class FastFloatBinOpVisitor(Visitor): + """ + A pass to add fastmath flag to float-binop instruction if they don't have + any flags. + """ + float_binops = frozenset(['fadd', 'fsub', 'fmul', 'fdiv', 'frem', 'fcmp']) + + def __init__(self, flags): + self.flags = flags + + def visit_Instruction(self, instr): + if instr.opname in self.float_binops: + if not instr.flags: + for flag in self.flags: + instr.flags.append(flag) + + +class FastFloatCallVisitor(CallVisitor): + """ + A pass to change all float function calls to use fastmath. + """ + + def __init__(self, flags): + self.flags = flags + + def visit_Call(self, instr): + # Add to any call that has float/double return type + if instr.type in (ir.FloatType(), ir.DoubleType()): + for flag in self.flags: + instr.fastmath.add(flag) + + +def rewrite_module(mod, options): + """ + Rewrite the given LLVM module to use fastmath everywhere. + """ + flags = options.flags + FastFloatBinOpVisitor(flags).visit(mod) + FastFloatCallVisitor(flags).visit(mod) + diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/funcdesc.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/funcdesc.py new file mode 100644 index 000000000..2a1a4fe17 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/funcdesc.py @@ -0,0 +1,230 @@ +""" +Function descriptors. +""" + +from collections import defaultdict +import importlib + +from numba.core import types, itanium_mangler +from numba.core.utils import _dynamic_modname, _dynamic_module + + +def default_mangler(name, argtypes, *, abi_tags=(), uid=None): + return itanium_mangler.mangle(name, argtypes, abi_tags=abi_tags, uid=uid) + + +def qualifying_prefix(modname, qualname): + """ + Returns a new string that is used for the first half of the mangled name. + """ + # XXX choose a different convention for object mode + return '{}.{}'.format(modname, qualname) if modname else qualname + + +class FunctionDescriptor(object): + """ + Base class for function descriptors: an object used to carry + useful metadata about a natively callable function. + + Note that while `FunctionIdentity` denotes a Python function + which is being concretely compiled by Numba, `FunctionDescriptor` + may be more "abstract": e.g. a function decorated with `@generated_jit`. + """ + __slots__ = ('native', 'modname', 'qualname', 'doc', 'typemap', + 'calltypes', 'args', 'kws', 'restype', 'argtypes', + 'mangled_name', 'unique_name', 'env_name', 'global_dict', + 'inline', 'noalias', 'abi_tags', 'uid') + + def __init__(self, native, modname, qualname, unique_name, doc, + typemap, restype, calltypes, args, kws, mangler=None, + argtypes=None, inline=False, noalias=False, env_name=None, + global_dict=None, abi_tags=(), uid=None): + self.native = native + self.modname = modname + self.global_dict = global_dict + self.qualname = qualname + self.unique_name = unique_name + self.doc = doc + # XXX typemap and calltypes should be on the compile result, + # not the FunctionDescriptor + self.typemap = typemap + self.calltypes = calltypes + self.args = args + self.kws = kws + self.restype = restype + # Argument types + if argtypes is not None: + assert isinstance(argtypes, tuple), argtypes + self.argtypes = argtypes + else: + # Get argument types from the type inference result + # (note the "arg.FOO" convention as used in typeinfer + self.argtypes = tuple(self.typemap['arg.' + a] for a in args) + mangler = default_mangler if mangler is None else mangler + # The mangled name *must* be unique, else the wrong function can + # be chosen at link time. + qualprefix = qualifying_prefix(self.modname, self.qualname) + self.uid = uid + self.mangled_name = mangler( + qualprefix, self.argtypes, abi_tags=abi_tags, uid=uid, + ) + if env_name is None: + env_name = mangler(".NumbaEnv.{}".format(qualprefix), + self.argtypes, abi_tags=abi_tags, uid=uid) + self.env_name = env_name + self.inline = inline + self.noalias = noalias + self.abi_tags = abi_tags + + def lookup_globals(self): + """ + Return the global dictionary of the function. + It may not match the Module's globals if the function is created + dynamically (i.e. exec) + """ + return self.global_dict or self.lookup_module().__dict__ + + def lookup_module(self): + """ + Return the module in which this function is supposed to exist. + This may be a dummy module if the function was dynamically + generated or the module can't be found. + """ + if self.modname == _dynamic_modname: + return _dynamic_module + else: + try: + # ensure module exist + return importlib.import_module(self.modname) + except ImportError: + return _dynamic_module + + def lookup_function(self): + """ + Return the original function object described by this object. + """ + return getattr(self.lookup_module(), self.qualname) + + @property + def llvm_func_name(self): + """ + The LLVM-registered name for the raw function. + """ + return self.mangled_name + + # XXX refactor this + + @property + def llvm_cpython_wrapper_name(self): + """ + The LLVM-registered name for a CPython-compatible wrapper of the + raw function (i.e. a PyCFunctionWithKeywords). + """ + return itanium_mangler.prepend_namespace(self.mangled_name, + ns='cpython') + + @property + def llvm_cfunc_wrapper_name(self): + """ + The LLVM-registered name for a C-compatible wrapper of the + raw function. + """ + return 'cfunc.' + self.mangled_name + + def __repr__(self): + return "" % (self.unique_name) + + @classmethod + def _get_function_info(cls, func_ir): + """ + Returns + ------- + qualname, unique_name, modname, doc, args, kws, globals + + ``unique_name`` must be a unique name. + """ + func = func_ir.func_id.func + qualname = func_ir.func_id.func_qualname + # XXX to func_id + modname = func.__module__ + doc = func.__doc__ or '' + args = tuple(func_ir.arg_names) + kws = () # TODO + global_dict = None + + if modname is None: + # Dynamically generated function. + modname = _dynamic_modname + # Retain a reference to the dictionary of the function. + # This disables caching, serialization and pickling. + global_dict = func_ir.func_id.func.__globals__ + + unique_name = func_ir.func_id.unique_name + + return qualname, unique_name, modname, doc, args, kws, global_dict + + @classmethod + def _from_python_function(cls, func_ir, typemap, restype, + calltypes, native, mangler=None, + inline=False, noalias=False, abi_tags=()): + (qualname, unique_name, modname, doc, args, kws, global_dict, + ) = cls._get_function_info(func_ir) + + self = cls(native, modname, qualname, unique_name, doc, + typemap, restype, calltypes, + args, kws, mangler=mangler, inline=inline, noalias=noalias, + global_dict=global_dict, abi_tags=abi_tags, + uid=func_ir.func_id.unique_id) + return self + + +class PythonFunctionDescriptor(FunctionDescriptor): + """ + A FunctionDescriptor subclass for Numba-compiled functions. + """ + __slots__ = () + + @classmethod + def from_specialized_function(cls, func_ir, typemap, restype, calltypes, + mangler, inline, noalias, abi_tags): + """ + Build a FunctionDescriptor for a given specialization of a Python + function (in nopython mode). + """ + return cls._from_python_function(func_ir, typemap, restype, calltypes, + native=True, mangler=mangler, + inline=inline, noalias=noalias, + abi_tags=abi_tags) + + @classmethod + def from_object_mode_function(cls, func_ir): + """ + Build a FunctionDescriptor for an object mode variant of a Python + function. + """ + typemap = defaultdict(lambda: types.pyobject) + calltypes = typemap.copy() + restype = types.pyobject + return cls._from_python_function(func_ir, typemap, restype, calltypes, + native=False) + + +class ExternalFunctionDescriptor(FunctionDescriptor): + """ + A FunctionDescriptor subclass for opaque external functions + (e.g. raw C functions). + """ + __slots__ = () + + def __init__(self, name, restype, argtypes): + args = ["arg%d" % i for i in range(len(argtypes))] + + def mangler(a, x, abi_tags, uid=None): + return a + super(ExternalFunctionDescriptor, self + ).__init__(native=True, modname=None, qualname=name, + unique_name=name, doc='', typemap=None, + restype=restype, calltypes=None, args=args, + kws=None, + mangler=mangler, + argtypes=argtypes) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/generators.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/generators.py new file mode 100644 index 000000000..6060d160c --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/generators.py @@ -0,0 +1,356 @@ +""" +Support for lowering generators. +""" + +import llvmlite.ir +from llvmlite.ir import Constant, IRBuilder + +from numba.core import types, config, cgutils +from numba.core.funcdesc import FunctionDescriptor + + +class GeneratorDescriptor(FunctionDescriptor): + """ + The descriptor for a generator's next function. + """ + __slots__ = () + + @classmethod + def from_generator_fndesc(cls, func_ir, fndesc, gentype, mangler): + """ + Build a GeneratorDescriptor for the generator returned by the + function described by *fndesc*, with type *gentype*. + + The generator inherits the env_name from the *fndesc*. + All emitted functions for the generator shares the same Env. + """ + assert isinstance(gentype, types.Generator) + restype = gentype.yield_type + args = ['gen'] + argtypes = (gentype,) + qualname = fndesc.qualname + '.next' + unique_name = fndesc.unique_name + '.next' + self = cls(fndesc.native, fndesc.modname, qualname, unique_name, + fndesc.doc, fndesc.typemap, restype, fndesc.calltypes, + args, fndesc.kws, argtypes=argtypes, mangler=mangler, + inline=False, env_name=fndesc.env_name) + return self + + @property + def llvm_finalizer_name(self): + """ + The LLVM name of the generator's finalizer function + (if .has_finalizer is true). + """ + return 'finalize_' + self.mangled_name + + +class BaseGeneratorLower(object): + """ + Base support class for lowering generators. + """ + + def __init__(self, lower): + self.context = lower.context + self.fndesc = lower.fndesc + self.library = lower.library + self.call_conv = lower.call_conv + self.func_ir = lower.func_ir + + self.geninfo = lower.generator_info + self.gentype = self.get_generator_type() + self.gendesc = GeneratorDescriptor.from_generator_fndesc( + lower.func_ir, self.fndesc, self.gentype, self.context.mangler) + # Helps packing non-omitted arguments into a structure + self.arg_packer = self.context.get_data_packer(self.fndesc.argtypes) + + self.resume_blocks = {} + + def get_args_ptr(self, builder, genptr): + return cgutils.gep_inbounds(builder, genptr, 0, 1) + + def get_resume_index_ptr(self, builder, genptr): + return cgutils.gep_inbounds(builder, genptr, 0, 0, + name='gen.resume_index') + + def get_state_ptr(self, builder, genptr): + return cgutils.gep_inbounds(builder, genptr, 0, 2, + name='gen.state') + + def lower_init_func(self, lower): + """ + Lower the generator's initialization function (which will fill up + the passed-by-reference generator structure). + """ + lower.setup_function(self.fndesc) + + builder = lower.builder + + # Insert the generator into the target context in order to allow + # calling from other Numba-compiled functions. + lower.context.insert_generator(self.gentype, self.gendesc, + [self.library]) + + # Init argument values + lower.extract_function_arguments() + + lower.pre_lower() + + # Initialize the return structure (i.e. the generator structure). + retty = self.context.get_return_type(self.gentype) + # Structure index #0: the initial resume index (0 == start of generator) + resume_index = self.context.get_constant(types.int32, 0) + # Structure index #1: the function arguments + argsty = retty.elements[1] + statesty = retty.elements[2] + + lower.debug_print("# low_init_func incref") + # Incref all NRT arguments before storing into generator states + if self.context.enable_nrt: + for argty, argval in zip(self.fndesc.argtypes, lower.fnargs): + self.context.nrt.incref(builder, argty, argval) + + # Filter out omitted arguments + argsval = self.arg_packer.as_data(builder, lower.fnargs) + + # Zero initialize states + statesval = Constant(statesty, None) + gen_struct = cgutils.make_anonymous_struct(builder, + [resume_index, argsval, + statesval], + retty) + + retval = self.box_generator_struct(lower, gen_struct) + + lower.debug_print("# low_init_func before return") + self.call_conv.return_value(builder, retval) + lower.post_lower() + + def lower_next_func(self, lower): + """ + Lower the generator's next() function (which takes the + passed-by-reference generator structure and returns the next + yielded value). + """ + lower.setup_function(self.gendesc) + lower.debug_print("# lower_next_func: {0}".format(self.gendesc.unique_name)) + assert self.gendesc.argtypes[0] == self.gentype + builder = lower.builder + function = lower.function + + # Extract argument values and other information from generator struct + genptr, = self.call_conv.get_arguments(function) + self.arg_packer.load_into(builder, + self.get_args_ptr(builder, genptr), + lower.fnargs) + + self.resume_index_ptr = self.get_resume_index_ptr(builder, genptr) + self.gen_state_ptr = self.get_state_ptr(builder, genptr) + + prologue = function.append_basic_block("generator_prologue") + + # Lower the generator's Python code + entry_block_tail = lower.lower_function_body() + + # Add block for StopIteration on entry + stop_block = function.append_basic_block("stop_iteration") + builder.position_at_end(stop_block) + self.call_conv.return_stop_iteration(builder) + + # Add prologue switch to resume blocks + builder.position_at_end(prologue) + # First Python block is also the resume point on first next() call + first_block = self.resume_blocks[0] = lower.blkmap[lower.firstblk] + + # Create front switch to resume points + switch = builder.switch(builder.load(self.resume_index_ptr), + stop_block) + for index, block in self.resume_blocks.items(): + switch.add_case(index, block) + + # Close tail of entry block + builder.position_at_end(entry_block_tail) + builder.branch(prologue) + + def lower_finalize_func(self, lower): + """ + Lower the generator's finalizer. + """ + fnty = llvmlite.ir.FunctionType(llvmlite.ir.VoidType(), + [self.context.get_value_type(self.gentype)]) + function = cgutils.get_or_insert_function( + lower.module, fnty, self.gendesc.llvm_finalizer_name) + entry_block = function.append_basic_block('entry') + builder = IRBuilder(entry_block) + + genptrty = self.context.get_value_type(self.gentype) + genptr = builder.bitcast(function.args[0], genptrty) + self.lower_finalize_func_body(builder, genptr) + + def return_from_generator(self, lower): + """ + Emit a StopIteration at generator end and mark the generator exhausted. + """ + indexval = Constant(self.resume_index_ptr.type.pointee, -1) + lower.builder.store(indexval, self.resume_index_ptr) + self.call_conv.return_stop_iteration(lower.builder) + + def create_resumption_block(self, lower, index): + block_name = "generator_resume%d" % (index,) + block = lower.function.append_basic_block(block_name) + lower.builder.position_at_end(block) + self.resume_blocks[index] = block + + def debug_print(self, builder, msg): + if config.DEBUG_JIT: + self.context.debug_print(builder, "DEBUGJIT: {0}".format(msg)) + +class GeneratorLower(BaseGeneratorLower): + """ + Support class for lowering nopython generators. + """ + + def get_generator_type(self): + return self.fndesc.restype + + def box_generator_struct(self, lower, gen_struct): + return gen_struct + + def lower_finalize_func_body(self, builder, genptr): + """ + Lower the body of the generator's finalizer: decref all live + state variables. + """ + self.debug_print(builder, "# generator: finalize") + if self.context.enable_nrt: + + # Always dereference all arguments + # self.debug_print(builder, "# generator: clear args") + args_ptr = self.get_args_ptr(builder, genptr) + for ty, val in self.arg_packer.load(builder, args_ptr): + self.context.nrt.decref(builder, ty, val) + + self.debug_print(builder, "# generator: finalize end") + builder.ret_void() + +class PyGeneratorLower(BaseGeneratorLower): + """ + Support class for lowering object mode generators. + """ + + def get_generator_type(self): + """ + Compute the actual generator type (the generator function's return + type is simply "pyobject"). + """ + return types.Generator( + gen_func=self.func_ir.func_id.func, + yield_type=types.pyobject, + arg_types=(types.pyobject,) * self.func_ir.arg_count, + state_types=(types.pyobject,) * len(self.geninfo.state_vars), + has_finalizer=True, + ) + + def box_generator_struct(self, lower, gen_struct): + """ + Box the raw *gen_struct* as a Python object. + """ + gen_ptr = cgutils.alloca_once_value(lower.builder, gen_struct) + return lower.pyapi.from_native_generator(gen_ptr, self.gentype, lower.envarg) + + def init_generator_state(self, lower): + """ + NULL-initialize all generator state variables, to avoid spurious + decref's on cleanup. + """ + lower.builder.store(Constant(self.gen_state_ptr.type.pointee, None), + self.gen_state_ptr) + + def lower_finalize_func_body(self, builder, genptr): + """ + Lower the body of the generator's finalizer: decref all live + state variables. + """ + pyapi = self.context.get_python_api(builder) + resume_index_ptr = self.get_resume_index_ptr(builder, genptr) + resume_index = builder.load(resume_index_ptr) + # If resume_index is 0, next() was never called + # If resume_index is -1, generator terminated cleanly + # (note function arguments are saved in state variables, + # so they don't need a separate cleanup step) + need_cleanup = builder.icmp_signed( + '>', resume_index, Constant(resume_index.type, 0)) + + with cgutils.if_unlikely(builder, need_cleanup): + # Decref all live vars (some may be NULL) + gen_state_ptr = self.get_state_ptr(builder, genptr) + for state_index in range(len(self.gentype.state_types)): + state_slot = cgutils.gep_inbounds(builder, gen_state_ptr, + 0, state_index) + ty = self.gentype.state_types[state_index] + val = self.context.unpack_value(builder, ty, state_slot) + pyapi.decref(val) + + builder.ret_void() + + +class LowerYield(object): + """ + Support class for lowering a particular yield point. + """ + + def __init__(self, lower, yield_point, live_vars): + self.lower = lower + self.context = lower.context + self.builder = lower.builder + self.genlower = lower.genlower + self.gentype = self.genlower.gentype + + self.gen_state_ptr = self.genlower.gen_state_ptr + self.resume_index_ptr = self.genlower.resume_index_ptr + self.yp = yield_point + self.inst = self.yp.inst + self.live_vars = live_vars + self.live_var_indices = [lower.generator_info.state_vars.index(v) + for v in live_vars] + + def lower_yield_suspend(self): + self.lower.debug_print("# generator suspend") + # Save live vars in state + for state_index, name in zip(self.live_var_indices, self.live_vars): + state_slot = cgutils.gep_inbounds(self.builder, self.gen_state_ptr, + 0, state_index) + ty = self.gentype.state_types[state_index] + # The yield might be in a loop, in which case the state might + # contain a predicate var that branches back to the loop head, in + # this case the var is live but in sequential lowering won't have + # been alloca'd yet, so do this here. + fetype = self.lower.typeof(name) + self.lower._alloca_var(name, fetype) + val = self.lower.loadvar(name) + # IncRef newly stored value + if self.context.enable_nrt: + self.context.nrt.incref(self.builder, ty, val) + + self.context.pack_value(self.builder, ty, val, state_slot) + # Save resume index + indexval = Constant(self.resume_index_ptr.type.pointee, + self.inst.index) + self.builder.store(indexval, self.resume_index_ptr) + self.lower.debug_print("# generator suspend end") + + def lower_yield_resume(self): + # Emit resumption point + self.genlower.create_resumption_block(self.lower, self.inst.index) + self.lower.debug_print("# generator resume") + # Reload live vars from state + for state_index, name in zip(self.live_var_indices, self.live_vars): + state_slot = cgutils.gep_inbounds(self.builder, self.gen_state_ptr, + 0, state_index) + ty = self.gentype.state_types[state_index] + val = self.context.unpack_value(self.builder, ty, state_slot) + self.lower.storevar(val, name) + # Previous storevar is making an extra incref + if self.context.enable_nrt: + self.context.nrt.decref(self.builder, ty, val) + self.lower.debug_print("# generator resume end") diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/imputils.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/imputils.py new file mode 100644 index 000000000..06c35abb9 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/imputils.py @@ -0,0 +1,469 @@ +""" +Utilities to simplify the boilerplate for native lowering. +""" + + +import collections +import contextlib +import inspect +import functools +from enum import Enum + +from numba.core import typing, types, utils, cgutils +from numba.core.typing.templates import BaseRegistryLoader + + +class Registry(object): + """ + A registry of function and attribute implementations. + """ + def __init__(self, name='unspecified'): + self.name = name + self.functions = [] + self.getattrs = [] + self.setattrs = [] + self.casts = [] + self.constants = [] + + def lower(self, func, *argtys): + """ + Decorate an implementation of *func* for the given argument types. + *func* may be an actual global function object, or any + pseudo-function supported by Numba, such as "getitem". + + The decorated implementation has the signature + (context, builder, sig, args). + """ + def decorate(impl): + self.functions.append((impl, func, argtys)) + return impl + return decorate + + def _decorate_attr(self, impl, ty, attr, impl_list, decorator): + real_impl = decorator(impl, ty, attr) + impl_list.append((real_impl, attr, real_impl.signature)) + return impl + + def lower_getattr(self, ty, attr): + """ + Decorate an implementation of __getattr__ for type *ty* and + the attribute *attr*. + + The decorated implementation will have the signature + (context, builder, typ, val). + """ + def decorate(impl): + return self._decorate_attr(impl, ty, attr, self.getattrs, + _decorate_getattr) + return decorate + + def lower_getattr_generic(self, ty): + """ + Decorate the fallback implementation of __getattr__ for type *ty*. + + The decorated implementation will have the signature + (context, builder, typ, val, attr). The implementation is + called for attributes which haven't been explicitly registered + with lower_getattr(). + """ + return self.lower_getattr(ty, None) + + def lower_setattr(self, ty, attr): + """ + Decorate an implementation of __setattr__ for type *ty* and + the attribute *attr*. + + The decorated implementation will have the signature + (context, builder, sig, args). + """ + def decorate(impl): + return self._decorate_attr(impl, ty, attr, self.setattrs, + _decorate_setattr) + return decorate + + def lower_setattr_generic(self, ty): + """ + Decorate the fallback implementation of __setattr__ for type *ty*. + + The decorated implementation will have the signature + (context, builder, sig, args, attr). The implementation is + called for attributes which haven't been explicitly registered + with lower_setattr(). + """ + return self.lower_setattr(ty, None) + + def lower_cast(self, fromty, toty): + """ + Decorate the implementation of implicit conversion between + *fromty* and *toty*. + + The decorated implementation will have the signature + (context, builder, fromty, toty, val). + """ + def decorate(impl): + self.casts.append((impl, (fromty, toty))) + return impl + return decorate + + def lower_constant(self, ty): + """ + Decorate the implementation for creating a constant of type *ty*. + + The decorated implementation will have the signature + (context, builder, ty, pyval). + """ + def decorate(impl): + self.constants.append((impl, (ty,))) + return impl + return decorate + + def __repr__(self): + return f"Lowering Registry<{self.name}>" + + +class RegistryLoader(BaseRegistryLoader): + """ + An incremental loader for a target registry. + """ + registry_items = ('functions', 'getattrs', 'setattrs', 'casts', 'constants') + + +# Global registry for implementations of builtin operations +# (functions, attributes, type casts) +builtin_registry = Registry('builtin_registry') + +lower_builtin = builtin_registry.lower +lower_getattr = builtin_registry.lower_getattr +lower_getattr_generic = builtin_registry.lower_getattr_generic +lower_setattr = builtin_registry.lower_setattr +lower_setattr_generic = builtin_registry.lower_setattr_generic +lower_cast = builtin_registry.lower_cast +lower_constant = builtin_registry.lower_constant + + +def _decorate_getattr(impl, ty, attr): + real_impl = impl + + if attr is not None: + def res(context, builder, typ, value, attr): + return real_impl(context, builder, typ, value) + else: + def res(context, builder, typ, value, attr): + return real_impl(context, builder, typ, value, attr) + + res.signature = (ty,) + res.attr = attr + return res + +def _decorate_setattr(impl, ty, attr): + real_impl = impl + + if attr is not None: + def res(context, builder, sig, args, attr): + return real_impl(context, builder, sig, args) + else: + def res(context, builder, sig, args, attr): + return real_impl(context, builder, sig, args, attr) + + res.signature = (ty, types.Any) + res.attr = attr + return res + + +def fix_returning_optional(context, builder, sig, status, retval): + # Reconstruct optional return type + if isinstance(sig.return_type, types.Optional): + value_type = sig.return_type.type + optional_none = context.make_optional_none(builder, value_type) + retvalptr = cgutils.alloca_once_value(builder, optional_none) + with builder.if_then(builder.not_(status.is_none)): + optional_value = context.make_optional_value( + builder, value_type, retval, + ) + builder.store(optional_value, retvalptr) + retval = builder.load(retvalptr) + return retval + +def user_function(fndesc, libs): + """ + A wrapper inserting code calling Numba-compiled *fndesc*. + """ + + def imp(context, builder, sig, args): + func = context.declare_function(builder.module, fndesc) + # env=None assumes this is a nopython function + status, retval = context.call_conv.call_function( + builder, func, fndesc.restype, fndesc.argtypes, args) + with cgutils.if_unlikely(builder, status.is_error): + context.call_conv.return_status_propagate(builder, status) + assert sig.return_type == fndesc.restype + # Reconstruct optional return type + retval = fix_returning_optional(context, builder, sig, status, retval) + # If the data representations don't match up + if retval.type != context.get_value_type(sig.return_type): + msg = "function returned {0} but expect {1}" + raise TypeError(msg.format(retval.type, sig.return_type)) + + return impl_ret_new_ref(context, builder, fndesc.restype, retval) + + imp.signature = fndesc.argtypes + imp.libs = tuple(libs) + return imp + + +def user_generator(gendesc, libs): + """ + A wrapper inserting code calling Numba-compiled *gendesc*. + """ + + def imp(context, builder, sig, args): + func = context.declare_function(builder.module, gendesc) + # env=None assumes this is a nopython function + status, retval = context.call_conv.call_function( + builder, func, gendesc.restype, gendesc.argtypes, args) + # Return raw status for caller to process StopIteration + return status, retval + + imp.libs = tuple(libs) + return imp + + +def iterator_impl(iterable_type, iterator_type): + """ + Decorator a given class as implementing *iterator_type* + (by providing an `iternext()` method). + """ + + def wrapper(cls): + # These are unbound methods + iternext = cls.iternext + + @iternext_impl(RefType.BORROWED) + def iternext_wrapper(context, builder, sig, args, result): + (value,) = args + iterobj = cls(context, builder, value) + return iternext(iterobj, context, builder, result) + + lower_builtin('iternext', iterator_type)(iternext_wrapper) + return cls + + return wrapper + + +class _IternextResult(object): + """ + A result wrapper for iteration, passed by iternext_impl() into the + wrapped function. + """ + __slots__ = ('_context', '_builder', '_pairobj') + + def __init__(self, context, builder, pairobj): + self._context = context + self._builder = builder + self._pairobj = pairobj + + def set_exhausted(self): + """ + Mark the iterator as exhausted. + """ + self._pairobj.second = self._context.get_constant(types.boolean, False) + + def set_valid(self, is_valid=True): + """ + Mark the iterator as valid according to *is_valid* (which must + be either a Python boolean or a LLVM inst). + """ + if is_valid in (False, True): + is_valid = self._context.get_constant(types.boolean, is_valid) + self._pairobj.second = is_valid + + def yield_(self, value): + """ + Mark the iterator as yielding the given *value* (a LLVM inst). + """ + self._pairobj.first = value + + def is_valid(self): + """ + Return whether the iterator is marked valid. + """ + return self._context.get_argument_value(self._builder, + types.boolean, + self._pairobj.second) + + def yielded_value(self): + """ + Return the iterator's yielded value, if any. + """ + return self._pairobj.first + +class RefType(Enum): + """ + Enumerate the reference type + """ + """ + A new reference + """ + NEW = 1 + """ + A borrowed reference + """ + BORROWED = 2 + """ + An untracked reference + """ + UNTRACKED = 3 + +def iternext_impl(ref_type=None): + """ + Wrap the given iternext() implementation so that it gets passed + an _IternextResult() object easing the returning of the iternext() + result pair. + + ref_type: a numba.targets.imputils.RefType value, the reference type used is + that specified through the RefType enum. + + The wrapped function will be called with the following signature: + (context, builder, sig, args, iternext_result) + """ + if ref_type not in [x for x in RefType]: + raise ValueError("ref_type must be an enum member of imputils.RefType") + + def outer(func): + def wrapper(context, builder, sig, args): + pair_type = sig.return_type + pairobj = context.make_helper(builder, pair_type) + func(context, builder, sig, args, + _IternextResult(context, builder, pairobj)) + if ref_type == RefType.NEW: + impl_ret = impl_ret_new_ref + elif ref_type == RefType.BORROWED: + impl_ret = impl_ret_borrowed + elif ref_type == RefType.UNTRACKED: + impl_ret = impl_ret_untracked + else: + raise ValueError("Unknown ref_type encountered") + return impl_ret(context, builder, + pair_type, pairobj._getvalue()) + return wrapper + return outer + + +def call_getiter(context, builder, iterable_type, val): + """ + Call the `getiter()` implementation for the given *iterable_type* + of value *val*, and return the corresponding LLVM inst. + """ + getiter_sig = typing.signature(iterable_type.iterator_type, iterable_type) + getiter_impl = context.get_function('getiter', getiter_sig) + return getiter_impl(builder, (val,)) + + +def call_iternext(context, builder, iterator_type, val): + """ + Call the `iternext()` implementation for the given *iterator_type* + of value *val*, and return a convenience _IternextResult() object + reflecting the results. + """ + itemty = iterator_type.yield_type + pair_type = types.Pair(itemty, types.boolean) + iternext_sig = typing.signature(pair_type, iterator_type) + iternext_impl = context.get_function('iternext', iternext_sig) + val = iternext_impl(builder, (val,)) + pairobj = context.make_helper(builder, pair_type, val) + return _IternextResult(context, builder, pairobj) + + +def call_len(context, builder, ty, val): + """ + Call len() on the given value. Return None if len() isn't defined on + this type. + """ + try: + len_impl = context.get_function(len, typing.signature(types.intp, ty,)) + except NotImplementedError: + return None + else: + return len_impl(builder, (val,)) + + +_ForIterLoop = collections.namedtuple('_ForIterLoop', + ('value', 'do_break')) + + +@contextlib.contextmanager +def for_iter(context, builder, iterable_type, val): + """ + Simulate a for loop on the given iterable. Yields a namedtuple with + the given members: + - `value` is the value being yielded + - `do_break` is a callable to early out of the loop + """ + iterator_type = iterable_type.iterator_type + iterval = call_getiter(context, builder, iterable_type, val) + + bb_body = builder.append_basic_block('for_iter.body') + bb_end = builder.append_basic_block('for_iter.end') + + def do_break(): + builder.branch(bb_end) + + builder.branch(bb_body) + + with builder.goto_block(bb_body): + res = call_iternext(context, builder, iterator_type, iterval) + with builder.if_then(builder.not_(res.is_valid()), likely=False): + builder.branch(bb_end) + yield _ForIterLoop(res.yielded_value(), do_break) + builder.branch(bb_body) + + builder.position_at_end(bb_end) + if context.enable_nrt: + context.nrt.decref(builder, iterator_type, iterval) + + +def impl_ret_new_ref(ctx, builder, retty, ret): + """ + The implementation returns a new reference. + """ + return ret + + +def impl_ret_borrowed(ctx, builder, retty, ret): + """ + The implementation returns a borrowed reference. + This function automatically incref so that the implementation is + returning a new reference. + """ + if ctx.enable_nrt: + ctx.nrt.incref(builder, retty, ret) + return ret + + +def impl_ret_untracked(ctx, builder, retty, ret): + """ + The return type is not a NRT object. + """ + return ret + + +@contextlib.contextmanager +def force_error_model(context, model_name='numpy'): + """ + Temporarily change the context's error model. + """ + from numba.core import callconv + + old_error_model = context.error_model + context.error_model = callconv.create_error_model(model_name, context) + try: + yield + finally: + context.error_model = old_error_model + + +def numba_typeref_ctor(*args, **kwargs): + """A stub for use internally by Numba when a call is emitted + on a TypeRef. + """ + raise NotImplementedError("This function should not be executed.") diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/inline_closurecall.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/inline_closurecall.py new file mode 100644 index 000000000..badb6b49d --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/inline_closurecall.py @@ -0,0 +1,1557 @@ +import types as pytypes # avoid confusion with numba.types +import copy +import ctypes +import numba.core.analysis +from numba.core import utils, types, typing, errors, ir, rewrites, config, ir_utils +from numba import prange +from numba.parfors.parfor import internal_prange +from numba.core.ir_utils import ( + mk_unique_var, + next_label, + add_offset_to_labels, + replace_vars, + remove_dels, + rename_labels, + find_topo_order, + merge_adjacent_blocks, + GuardException, + require, + guard, + get_definition, + find_callname, + find_build_sequence, + get_np_ufunc_typ, + get_ir_of_code, + simplify_CFG, + canonicalize_array_math, + dead_code_elimination, + ) + +from numba.core.analysis import ( + compute_cfg_from_blocks, + compute_use_defs, + compute_live_variables) +from numba.core import postproc +from numba.np.unsafe.ndarray import empty_inferred as unsafe_empty_inferred +import numpy as np +import operator +import numba.misc.special + +""" +Variable enable_inline_arraycall is only used for testing purpose. +""" +enable_inline_arraycall = True + + +def callee_ir_validator(func_ir): + """Checks the IR of a callee is supported for inlining + """ + for blk in func_ir.blocks.values(): + for stmt in blk.find_insts(ir.Assign): + if isinstance(stmt.value, ir.Yield): + msg = "The use of yield in a closure is unsupported." + raise errors.UnsupportedError(msg, loc=stmt.loc) + + +def _created_inlined_var_name(function_name, var_name): + """Creates a name for an inlined variable based on the function name and the + variable name. It does this "safely" to avoid the use of characters that are + illegal in python variable names as there are occasions when function + generation needs valid python name tokens.""" + inlined_name = f'{function_name}.{var_name}' + # Replace angle brackets, e.g. "" is replaced with "_locals_" + new_name = inlined_name.replace('<', '_').replace('>', '_') + # The version "version" of the closure function e.g. foo$2 (id 2) is + # rewritten as "foo_v2". Further "." is also replaced with "_". + new_name = new_name.replace('.', '_').replace('$', '_v') + return new_name + + +class InlineClosureCallPass(object): + """InlineClosureCallPass class looks for direct calls to locally defined + closures, and inlines the body of the closure function to the call site. + """ + + def __init__(self, func_ir, parallel_options, swapped={}, typed=False): + self.func_ir = func_ir + self.parallel_options = parallel_options + self.swapped = swapped + self._processed_stencils = [] + self.typed = typed + + def run(self): + """Run inline closure call pass. + """ + # Analysis relies on ir.Del presence, strip out later + pp = postproc.PostProcessor(self.func_ir) + pp.run(True) + + modified = False + work_list = list(self.func_ir.blocks.items()) + debug_print = _make_debug_print("InlineClosureCallPass") + debug_print("START") + while work_list: + label, block = work_list.pop() + for i, instr in enumerate(block.body): + if isinstance(instr, ir.Assign): + lhs = instr.target + expr = instr.value + if isinstance(expr, ir.Expr) and expr.op == 'call': + call_name = guard(find_callname, self.func_ir, expr) + func_def = guard(get_definition, self.func_ir, expr.func) + + if guard(self._inline_reduction, + work_list, block, i, expr, call_name): + modified = True + break # because block structure changed + + if guard(self._inline_closure, + work_list, block, i, func_def): + modified = True + break # because block structure changed + + if guard(self._inline_stencil, + instr, call_name, func_def): + modified = True + + if enable_inline_arraycall: + # Identify loop structure + if modified: + # Need to do some cleanups if closure inlining kicked in + merge_adjacent_blocks(self.func_ir.blocks) + cfg = compute_cfg_from_blocks(self.func_ir.blocks) + debug_print("start inline arraycall") + _debug_dump(cfg) + loops = cfg.loops() + sized_loops = [(k, len(loops[k].body)) for k in loops.keys()] + visited = [] + # We go over all loops, bigger loops first (outer first) + for k, s in sorted(sized_loops, key=lambda tup: tup[1], reverse=True): + visited.append(k) + if guard(_inline_arraycall, self.func_ir, cfg, visited, loops[k], + self.swapped, self.parallel_options.comprehension, + self.typed): + modified = True + if modified: + _fix_nested_array(self.func_ir) + + if modified: + # clean up now dead/unreachable blocks, e.g. unconditionally raising + # an exception in an inlined function would render some parts of the + # inliner unreachable + cfg = compute_cfg_from_blocks(self.func_ir.blocks) + for dead in cfg.dead_nodes(): + del self.func_ir.blocks[dead] + + # run dead code elimination + dead_code_elimination(self.func_ir) + # do label renaming + self.func_ir.blocks = rename_labels(self.func_ir.blocks) + + # inlining done, strip dels + remove_dels(self.func_ir.blocks) + + debug_print("END") + + def _inline_reduction(self, work_list, block, i, expr, call_name): + # only inline reduction in sequential execution, parallel handling + # is done in ParforPass. + require(not self.parallel_options.reduction) + require(call_name == ('reduce', 'builtins') or + call_name == ('reduce', '_functools')) + if len(expr.args) not in (2, 3): + raise TypeError("invalid reduce call, " + "two arguments are required (optional initial " + "value can also be specified)") + check_reduce_func(self.func_ir, expr.args[0]) + def reduce_func(f, A, v=None): + it = iter(A) + if v is not None: + s = v + else: + s = next(it) + for a in it: + s = f(s, a) + return s + inline_closure_call(self.func_ir, + self.func_ir.func_id.func.__globals__, + block, i, reduce_func, work_list=work_list, + callee_validator=callee_ir_validator) + return True + + def _inline_stencil(self, instr, call_name, func_def): + from numba.stencils.stencil import StencilFunc + lhs = instr.target + expr = instr.value + # We keep the escaping variables of the stencil kernel + # alive by adding them to the actual kernel call as extra + # keyword arguments, which is ignored anyway. + if (isinstance(func_def, ir.Global) and + func_def.name == 'stencil' and + isinstance(func_def.value, StencilFunc)): + if expr.kws: + expr.kws += func_def.value.kws + else: + expr.kws = func_def.value.kws + return True + # Otherwise we proceed to check if it is a call to numba.stencil + require(call_name == ('stencil', 'numba.stencils.stencil') or + call_name == ('stencil', 'numba')) + require(expr not in self._processed_stencils) + self._processed_stencils.append(expr) + if not len(expr.args) == 1: + raise ValueError("As a minimum Stencil requires" + " a kernel as an argument") + stencil_def = guard(get_definition, self.func_ir, expr.args[0]) + require(isinstance(stencil_def, ir.Expr) and + stencil_def.op == "make_function") + kernel_ir = get_ir_of_code(self.func_ir.func_id.func.__globals__, + stencil_def.code) + options = dict(expr.kws) + if 'neighborhood' in options: + fixed = guard(self._fix_stencil_neighborhood, options) + if not fixed: + raise ValueError("stencil neighborhood option should be a tuple" + " with constant structure such as ((-w, w),)") + if 'index_offsets' in options: + fixed = guard(self._fix_stencil_index_offsets, options) + if not fixed: + raise ValueError("stencil index_offsets option should be a tuple" + " with constant structure such as (offset, )") + sf = StencilFunc(kernel_ir, 'constant', options) + sf.kws = expr.kws # hack to keep variables live + sf_global = ir.Global('stencil', sf, expr.loc) + self.func_ir._definitions[lhs.name] = [sf_global] + instr.value = sf_global + return True + + def _fix_stencil_neighborhood(self, options): + """ + Extract the two-level tuple representing the stencil neighborhood + from the program IR to provide a tuple to StencilFunc. + """ + # build_tuple node with neighborhood for each dimension + dims_build_tuple = get_definition(self.func_ir, options['neighborhood']) + require(hasattr(dims_build_tuple, 'items')) + res = [] + for window_var in dims_build_tuple.items: + win_build_tuple = get_definition(self.func_ir, window_var) + require(hasattr(win_build_tuple, 'items')) + res.append(tuple(win_build_tuple.items)) + options['neighborhood'] = tuple(res) + return True + + def _fix_stencil_index_offsets(self, options): + """ + Extract the tuple representing the stencil index offsets + from the program IR to provide to StencilFunc. + """ + offset_tuple = get_definition(self.func_ir, options['index_offsets']) + require(hasattr(offset_tuple, 'items')) + options['index_offsets'] = tuple(offset_tuple.items) + return True + + def _inline_closure(self, work_list, block, i, func_def): + require(isinstance(func_def, ir.Expr) and + func_def.op == "make_function") + inline_closure_call(self.func_ir, + self.func_ir.func_id.func.__globals__, + block, i, func_def, work_list=work_list, + callee_validator=callee_ir_validator) + return True + +def check_reduce_func(func_ir, func_var): + """Checks the function at func_var in func_ir to make sure it's amenable + for inlining. Returns the function itself""" + reduce_func = guard(get_definition, func_ir, func_var) + if reduce_func is None: + raise ValueError("Reduce function cannot be found for njit \ + analysis") + if isinstance(reduce_func, (ir.FreeVar, ir.Global)): + if not isinstance(reduce_func.value, + numba.core.registry.CPUDispatcher): + raise ValueError("Invalid reduction function") + # pull out the python function for inlining + reduce_func = reduce_func.value.py_func + elif not (hasattr(reduce_func, 'code') + or hasattr(reduce_func, '__code__')): + raise ValueError("Invalid reduction function") + f_code = (reduce_func.code if hasattr(reduce_func, 'code') + else reduce_func.__code__) + if not f_code.co_argcount == 2: + raise TypeError("Reduction function should take 2 arguments") + return reduce_func + + +class InlineWorker(object): + """ A worker class for inlining, this is a more advanced version of + `inline_closure_call` in that it permits inlining from function type, Numba + IR and code object. It also, runs the entire untyped compiler pipeline on + the inlinee to ensure that it is transformed as though it were compiled + directly. + """ + + def __init__(self, + typingctx=None, + targetctx=None, + locals=None, + pipeline=None, + flags=None, + validator=callee_ir_validator, + typemap=None, + calltypes=None): + """ + Instantiate a new InlineWorker, all arguments are optional though some + must be supplied together for certain use cases. The methods will refuse + to run if the object isn't configured in the manner needed. Args are the + same as those in a numba.core.Compiler.state, except the validator which + is a function taking Numba IR and validating it for use when inlining + (this is optional and really to just provide better error messages about + things which the inliner cannot handle like yield in closure). + """ + def check(arg, name): + if arg is None: + raise TypeError("{} must not be None".format(name)) + + from numba.core.compiler import DefaultPassBuilder + + # check the stuff needed to run the more advanced compilation pipeline + # is valid if any of it is provided + compiler_args = (targetctx, locals, pipeline, flags) + compiler_group = [x is not None for x in compiler_args] + if any(compiler_group) and not all(compiler_group): + check(targetctx, 'targetctx') + check(locals, 'locals') + check(pipeline, 'pipeline') + check(flags, 'flags') + elif all(compiler_group): + check(typingctx, 'typingctx') + + self._compiler_pipeline = DefaultPassBuilder.define_untyped_pipeline + + self.typingctx = typingctx + self.targetctx = targetctx + self.locals = locals + self.pipeline = pipeline + self.flags = flags + self.validator = validator + self.debug_print = _make_debug_print("InlineWorker") + + # check whether this inliner can also support typemap and calltypes + # update and if what's provided is valid + pair = (typemap, calltypes) + pair_is_none = [x is None for x in pair] + if any(pair_is_none) and not all(pair_is_none): + msg = ("typemap and calltypes must both be either None or have a " + "value, got: %s, %s") + raise TypeError(msg % pair) + self._permit_update_type_and_call_maps = not all(pair_is_none) + self.typemap = typemap + self.calltypes = calltypes + + + def inline_ir(self, caller_ir, block, i, callee_ir, callee_freevars, + arg_typs=None): + """ Inlines the callee_ir in the caller_ir at statement index i of block + `block`, callee_freevars are the free variables for the callee_ir. If + the callee_ir is derived from a function `func` then this is + `func.__code__.co_freevars`. If `arg_typs` is given and the InlineWorker + instance was initialized with a typemap and calltypes then they will be + appropriately updated based on the arg_typs. + """ + + # Always copy the callee IR, it gets mutated + def copy_ir(the_ir): + kernel_copy = the_ir.copy() + kernel_copy.blocks = {} + for block_label, block in the_ir.blocks.items(): + new_block = copy.deepcopy(the_ir.blocks[block_label]) + new_block.body = [] + for stmt in the_ir.blocks[block_label].body: + scopy = copy.deepcopy(stmt) + new_block.body.append(scopy) + kernel_copy.blocks[block_label] = new_block + return kernel_copy + + callee_ir = copy_ir(callee_ir) + + # check that the contents of the callee IR is something that can be + # inlined if a validator is present + if self.validator is not None: + self.validator(callee_ir) + + # save an unmutated copy of the callee_ir to return + callee_ir_original = copy_ir(callee_ir) + scope = block.scope + instr = block.body[i] + call_expr = instr.value + callee_blocks = callee_ir.blocks + + # 1. relabel callee_ir by adding an offset + max_label = max(ir_utils._the_max_label.next(), max(caller_ir.blocks.keys())) + callee_blocks = add_offset_to_labels(callee_blocks, max_label + 1) + callee_blocks = simplify_CFG(callee_blocks) + callee_ir.blocks = callee_blocks + min_label = min(callee_blocks.keys()) + max_label = max(callee_blocks.keys()) + # reset globals in ir_utils before we use it + ir_utils._the_max_label.update(max_label) + self.debug_print("After relabel") + _debug_dump(callee_ir) + + # 2. rename all local variables in callee_ir with new locals created in + # caller_ir + callee_scopes = _get_all_scopes(callee_blocks) + self.debug_print("callee_scopes = ", callee_scopes) + # one function should only have one local scope + assert(len(callee_scopes) == 1) + callee_scope = callee_scopes[0] + var_dict = {} + for var in tuple(callee_scope.localvars._con.values()): + if not (var.name in callee_freevars): + inlined_name = _created_inlined_var_name( + callee_ir.func_id.unique_name, var.name) + # Update the caller scope with the new names + new_var = scope.redefine(inlined_name, loc=var.loc) + # Also update the callee scope with the new names. Should the + # type and call maps need updating (which requires SSA form) the + # transformation to SSA is valid as the IR object is internally + # consistent. + callee_scope.redefine(inlined_name, loc=var.loc) + var_dict[var.name] = new_var + self.debug_print("var_dict = ", var_dict) + replace_vars(callee_blocks, var_dict) + self.debug_print("After local var rename") + _debug_dump(callee_ir) + + # 3. replace formal parameters with actual arguments + callee_func = callee_ir.func_id.func + args = _get_callee_args(call_expr, callee_func, block.body[i].loc, + caller_ir) + + # 4. Update typemap + if self._permit_update_type_and_call_maps: + if arg_typs is None: + raise TypeError('arg_typs should have a value not None') + self.update_type_and_call_maps(callee_ir, arg_typs) + # update_type_and_call_maps replaces blocks + callee_blocks = callee_ir.blocks + + self.debug_print("After arguments rename: ") + _debug_dump(callee_ir) + + _replace_args_with(callee_blocks, args) + # 5. split caller blocks into two + new_blocks = [] + new_block = ir.Block(scope, block.loc) + new_block.body = block.body[i + 1:] + new_label = next_label() + caller_ir.blocks[new_label] = new_block + new_blocks.append((new_label, new_block)) + block.body = block.body[:i] + block.body.append(ir.Jump(min_label, instr.loc)) + + # 6. replace Return with assignment to LHS + topo_order = find_topo_order(callee_blocks) + _replace_returns(callee_blocks, instr.target, new_label) + + # remove the old definition of instr.target too + if (instr.target.name in caller_ir._definitions + and call_expr in caller_ir._definitions[instr.target.name]): + # NOTE: target can have multiple definitions due to control flow + caller_ir._definitions[instr.target.name].remove(call_expr) + + # 7. insert all new blocks, and add back definitions + for label in topo_order: + # block scope must point to parent's + block = callee_blocks[label] + block.scope = scope + _add_definitions(caller_ir, block) + caller_ir.blocks[label] = block + new_blocks.append((label, block)) + self.debug_print("After merge in") + _debug_dump(caller_ir) + + return callee_ir_original, callee_blocks, var_dict, new_blocks + + def inline_function(self, caller_ir, block, i, function, arg_typs=None): + """ Inlines the function in the caller_ir at statement index i of block + `block`. If `arg_typs` is given and the InlineWorker instance was + initialized with a typemap and calltypes then they will be appropriately + updated based on the arg_typs. + """ + callee_ir = self.run_untyped_passes(function) + freevars = function.__code__.co_freevars + return self.inline_ir(caller_ir, block, i, callee_ir, freevars, + arg_typs=arg_typs) + + def run_untyped_passes(self, func, enable_ssa=False): + """ + Run the compiler frontend's untyped passes over the given Python + function, and return the function's canonical Numba IR. + + Disable SSA transformation by default, since the call site won't be in SSA + form and self.inline_ir depends on this being the case. + """ + from numba.core.compiler import StateDict, _CompileStatus + from numba.core.untyped_passes import ExtractByteCode, WithLifting + from numba.core import bytecode + from numba.parfors.parfor import ParforDiagnostics + state = StateDict() + state.func_ir = None + state.typingctx = self.typingctx + state.targetctx = self.targetctx + state.locals = self.locals + state.pipeline = self.pipeline + state.flags = self.flags + state.flags.enable_ssa = enable_ssa + + state.func_id = bytecode.FunctionIdentity.from_function(func) + + state.typemap = None + state.calltypes = None + state.type_annotation = None + state.status = _CompileStatus(False) + state.return_type = None + state.parfor_diagnostics = ParforDiagnostics() + state.metadata = {} + + ExtractByteCode().run_pass(state) + # This is a lie, just need *some* args for the case where an obj mode + # with lift is needed + state.args = len(state.bc.func_id.pysig.parameters) * (types.pyobject,) + + pm = self._compiler_pipeline(state) + + pm.finalize() + pm.run(state) + return state.func_ir + + def update_type_and_call_maps(self, callee_ir, arg_typs): + """ Updates the type and call maps based on calling callee_ir with arguments + from arg_typs""" + from numba.core.ssa import reconstruct_ssa + from numba.core.typed_passes import PreLowerStripPhis + + if not self._permit_update_type_and_call_maps: + msg = ("InlineWorker instance not configured correctly, typemap or " + "calltypes missing in initialization.") + raise ValueError(msg) + from numba.core import typed_passes + # call branch pruning to simplify IR and avoid inference errors + callee_ir._definitions = ir_utils.build_definitions(callee_ir.blocks) + numba.core.analysis.dead_branch_prune(callee_ir, arg_typs) + # callee's typing may require SSA + callee_ir = reconstruct_ssa(callee_ir) + callee_ir._definitions = ir_utils.build_definitions(callee_ir.blocks) + f_typemap, f_return_type, f_calltypes, _ = typed_passes.type_inference_stage( + self.typingctx, self.targetctx, callee_ir, arg_typs, None) + callee_ir = PreLowerStripPhis()._strip_phi_nodes(callee_ir) + callee_ir._definitions = ir_utils.build_definitions(callee_ir.blocks) + canonicalize_array_math(callee_ir, f_typemap, + f_calltypes, self.typingctx) + # remove argument entries like arg.a from typemap + arg_names = [vname for vname in f_typemap if vname.startswith("arg.")] + for a in arg_names: + f_typemap.pop(a) + self.typemap.update(f_typemap) + self.calltypes.update(f_calltypes) + + +def inline_closure_call(func_ir, glbls, block, i, callee, typingctx=None, + targetctx=None, arg_typs=None, typemap=None, + calltypes=None, work_list=None, callee_validator=None, + replace_freevars=True): + """Inline the body of `callee` at its callsite (`i`-th instruction of `block`) + + `func_ir` is the func_ir object of the caller function and `glbls` is its + global variable environment (func_ir.func_id.func.__globals__). + `block` is the IR block of the callsite and `i` is the index of the + callsite's node. `callee` is either the called function or a + make_function node. `typingctx`, `typemap` and `calltypes` are typing + data structures of the caller, available if we are in a typed pass. + `arg_typs` includes the types of the arguments at the callsite. + `callee_validator` is an optional callable which can be used to validate the + IR of the callee to ensure that it contains IR supported for inlining, it + takes one argument, the func_ir of the callee + + Returns IR blocks of the callee and the variable renaming dictionary used + for them to facilitate further processing of new blocks. + """ + scope = block.scope + instr = block.body[i] + call_expr = instr.value + debug_print = _make_debug_print("inline_closure_call") + debug_print("Found closure call: ", instr, " with callee = ", callee) + # support both function object and make_function Expr + callee_code = callee.code if hasattr(callee, 'code') else callee.__code__ + callee_closure = callee.closure if hasattr(callee, 'closure') else callee.__closure__ + # first, get the IR of the callee + if isinstance(callee, pytypes.FunctionType): + from numba.core import compiler + callee_ir = compiler.run_frontend(callee, inline_closures=True) + else: + callee_ir = get_ir_of_code(glbls, callee_code) + + # check that the contents of the callee IR is something that can be inlined + # if a validator is supplied + if callee_validator is not None: + callee_validator(callee_ir) + + callee_blocks = callee_ir.blocks + + # 1. relabel callee_ir by adding an offset + max_label = max(ir_utils._the_max_label.next(), max(func_ir.blocks.keys())) + callee_blocks = add_offset_to_labels(callee_blocks, max_label + 1) + callee_blocks = simplify_CFG(callee_blocks) + callee_ir.blocks = callee_blocks + min_label = min(callee_blocks.keys()) + max_label = max(callee_blocks.keys()) + # reset globals in ir_utils before we use it + ir_utils._the_max_label.update(max_label) + debug_print("After relabel") + _debug_dump(callee_ir) + + # 2. rename all local variables in callee_ir with new locals created in func_ir + callee_scopes = _get_all_scopes(callee_blocks) + debug_print("callee_scopes = ", callee_scopes) + # one function should only have one local scope + assert(len(callee_scopes) == 1) + callee_scope = callee_scopes[0] + var_dict = {} + for var in callee_scope.localvars._con.values(): + if not (var.name in callee_code.co_freevars): + inlined_name = _created_inlined_var_name( + callee_ir.func_id.unique_name, var.name) + new_var = scope.redefine(inlined_name, loc=var.loc) + var_dict[var.name] = new_var + debug_print("var_dict = ", var_dict) + replace_vars(callee_blocks, var_dict) + debug_print("After local var rename") + _debug_dump(callee_ir) + + # 3. replace formal parameters with actual arguments + args = _get_callee_args(call_expr, callee, block.body[i].loc, func_ir) + + debug_print("After arguments rename: ") + _debug_dump(callee_ir) + + # 4. replace freevar with actual closure var + if callee_closure and replace_freevars: + closure = func_ir.get_definition(callee_closure) + debug_print("callee's closure = ", closure) + if isinstance(closure, tuple): + cellget = ctypes.pythonapi.PyCell_Get + cellget.restype = ctypes.py_object + cellget.argtypes = (ctypes.py_object,) + items = tuple(cellget(x) for x in closure) + else: + assert(isinstance(closure, ir.Expr) + and closure.op == 'build_tuple') + items = closure.items + assert(len(callee_code.co_freevars) == len(items)) + _replace_freevars(callee_blocks, items) + debug_print("After closure rename") + _debug_dump(callee_ir) + + if typingctx: + from numba.core import typed_passes + # call branch pruning to simplify IR and avoid inference errors + callee_ir._definitions = ir_utils.build_definitions(callee_ir.blocks) + numba.core.analysis.dead_branch_prune(callee_ir, arg_typs) + try: + f_typemap, f_return_type, f_calltypes, _ = typed_passes.type_inference_stage( + typingctx, targetctx, callee_ir, arg_typs, None) + except Exception as e: + f_typemap, f_return_type, f_calltypes, _ = typed_passes.type_inference_stage( + typingctx, targetctx, callee_ir, arg_typs, None) + pass + canonicalize_array_math(callee_ir, f_typemap, + f_calltypes, typingctx) + # remove argument entries like arg.a from typemap + arg_names = [vname for vname in f_typemap if vname.startswith("arg.")] + for a in arg_names: + f_typemap.pop(a) + typemap.update(f_typemap) + calltypes.update(f_calltypes) + + _replace_args_with(callee_blocks, args) + # 5. split caller blocks into two + new_blocks = [] + new_block = ir.Block(scope, block.loc) + new_block.body = block.body[i + 1:] + new_label = next_label() + func_ir.blocks[new_label] = new_block + new_blocks.append((new_label, new_block)) + block.body = block.body[:i] + block.body.append(ir.Jump(min_label, instr.loc)) + + # 6. replace Return with assignment to LHS + topo_order = find_topo_order(callee_blocks) + _replace_returns(callee_blocks, instr.target, new_label) + + # remove the old definition of instr.target too + if (instr.target.name in func_ir._definitions + and call_expr in func_ir._definitions[instr.target.name]): + # NOTE: target can have multiple definitions due to control flow + func_ir._definitions[instr.target.name].remove(call_expr) + + # 7. insert all new blocks, and add back definitions + for label in topo_order: + # block scope must point to parent's + block = callee_blocks[label] + block.scope = scope + _add_definitions(func_ir, block) + func_ir.blocks[label] = block + new_blocks.append((label, block)) + debug_print("After merge in") + _debug_dump(func_ir) + + if work_list is not None: + for block in new_blocks: + work_list.append(block) + return callee_blocks, var_dict + + +def _get_callee_args(call_expr, callee, loc, func_ir): + """Get arguments for calling 'callee', including the default arguments. + keyword arguments are currently only handled when 'callee' is a function. + """ + if call_expr.op == 'call': + args = list(call_expr.args) + if call_expr.vararg: + msg = "Calling a closure with *args is unsupported." + raise errors.UnsupportedError(msg, call_expr.loc) + elif call_expr.op == 'getattr': + args = [call_expr.value] + elif ir_utils.is_operator_or_getitem(call_expr): + args = call_expr.list_vars() + else: + raise TypeError("Unsupported ir.Expr.{}".format(call_expr.op)) + + debug_print = _make_debug_print("inline_closure_call default handling") + + # handle defaults and kw arguments using pysignature if callee is function + if isinstance(callee, pytypes.FunctionType): + pysig = numba.core.utils.pysignature(callee) + normal_handler = lambda index, param, default: default + default_handler = lambda index, param, default: ir.Const(default, loc) + # Throw error for stararg + # TODO: handle stararg + def stararg_handler(index, param, default): + raise NotImplementedError( + "Stararg not supported in inliner for arg {} {}".format( + index, param)) + if call_expr.op == 'call': + kws = dict(call_expr.kws) + else: + kws = {} + return numba.core.typing.fold_arguments( + pysig, args, kws, normal_handler, default_handler, + stararg_handler) + else: + # TODO: handle arguments for make_function case similar to function + # case above + callee_defaults = (callee.defaults if hasattr(callee, 'defaults') + else callee.__defaults__) + if callee_defaults: + debug_print("defaults = ", callee_defaults) + if isinstance(callee_defaults, tuple): # Python 3.5 + defaults_list = [] + for x in callee_defaults: + if isinstance(x, ir.Var): + defaults_list.append(x) + else: + # this branch is predominantly for kwargs from + # inlinable functions + defaults_list.append(ir.Const(value=x, loc=loc)) + args = args + defaults_list + elif (isinstance(callee_defaults, ir.Var) + or isinstance(callee_defaults, str)): + default_tuple = func_ir.get_definition(callee_defaults) + assert(isinstance(default_tuple, ir.Expr)) + assert(default_tuple.op == "build_tuple") + const_vals = [func_ir.get_definition(x) for + x in default_tuple.items] + args = args + const_vals + else: + raise NotImplementedError( + "Unsupported defaults to make_function: {}".format( + defaults)) + return args + + +def _make_debug_print(prefix): + def debug_print(*args): + if config.DEBUG_INLINE_CLOSURE: + print(prefix + ": " + "".join(str(x) for x in args)) + return debug_print + + +def _debug_dump(func_ir): + if config.DEBUG_INLINE_CLOSURE: + func_ir.dump() + + +def _get_all_scopes(blocks): + """Get all block-local scopes from an IR. + """ + all_scopes = [] + for label, block in blocks.items(): + if not (block.scope in all_scopes): + all_scopes.append(block.scope) + return all_scopes + + +def _replace_args_with(blocks, args): + """ + Replace ir.Arg(...) with real arguments from call site + """ + for label, block in blocks.items(): + assigns = block.find_insts(ir.Assign) + for stmt in assigns: + if isinstance(stmt.value, ir.Arg): + idx = stmt.value.index + assert(idx < len(args)) + stmt.value = args[idx] + + +def _replace_freevars(blocks, args): + """ + Replace ir.FreeVar(...) with real variables from parent function + """ + for label, block in blocks.items(): + assigns = block.find_insts(ir.Assign) + for stmt in assigns: + if isinstance(stmt.value, ir.FreeVar): + idx = stmt.value.index + assert(idx < len(args)) + if isinstance(args[idx], ir.Var): + stmt.value = args[idx] + else: + stmt.value = ir.Const(args[idx], stmt.loc) + + +def _replace_returns(blocks, target, return_label): + """ + Return return statement by assigning directly to target, and a jump. + """ + for label, block in blocks.items(): + casts = [] + for i in range(len(block.body)): + stmt = block.body[i] + if isinstance(stmt, ir.Return): + assert(i + 1 == len(block.body)) + block.body[i] = ir.Assign(stmt.value, target, stmt.loc) + block.body.append(ir.Jump(return_label, stmt.loc)) + # remove cast of the returned value + for cast in casts: + if cast.target.name == stmt.value.name: + cast.value = cast.value.value + elif isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr) and stmt.value.op == 'cast': + casts.append(stmt) + +def _add_definitions(func_ir, block): + """ + Add variable definitions found in a block to parent func_ir. + """ + definitions = func_ir._definitions + assigns = block.find_insts(ir.Assign) + for stmt in assigns: + definitions[stmt.target.name].append(stmt.value) + +def _find_arraycall(func_ir, block): + """Look for statement like "x = numpy.array(y)" or "x[..] = y" + immediately after the closure call that creates list y (the i-th + statement in block). Return the statement index if found, or + raise GuardException. + """ + array_var = None + array_call_index = None + list_var_dead_after_array_call = False + list_var = None + + i = 0 + while i < len(block.body): + instr = block.body[i] + if isinstance(instr, ir.Del): + # Stop the process if list_var becomes dead + if list_var and array_var and instr.value == list_var.name: + list_var_dead_after_array_call = True + break + pass + elif isinstance(instr, ir.Assign): + # Found array_var = array(list_var) + lhs = instr.target + expr = instr.value + if (guard(find_callname, func_ir, expr) == ('array', 'numpy') and + isinstance(expr.args[0], ir.Var)): + list_var = expr.args[0] + array_var = lhs + array_stmt_index = i + array_kws = dict(expr.kws) + elif (isinstance(instr, ir.SetItem) and + isinstance(instr.value, ir.Var) and + not list_var): + list_var = instr.value + # Found array_var[..] = list_var, the case for nested array + array_var = instr.target + array_def = get_definition(func_ir, array_var) + require(guard(_find_unsafe_empty_inferred, func_ir, array_def)) + array_stmt_index = i + array_kws = {} + else: + # Bail out otherwise + break + i = i + 1 + # require array_var is found, and list_var is dead after array_call. + require(array_var and list_var_dead_after_array_call) + _make_debug_print("find_array_call")(block.body[array_stmt_index]) + return list_var, array_stmt_index, array_kws + + +def _find_iter_range(func_ir, range_iter_var, swapped): + """Find the iterator's actual range if it is either range(n), or range(m, n), + otherwise return raise GuardException. + """ + debug_print = _make_debug_print("find_iter_range") + range_iter_def = get_definition(func_ir, range_iter_var) + debug_print("range_iter_var = ", range_iter_var, " def = ", range_iter_def) + require(isinstance(range_iter_def, ir.Expr) and range_iter_def.op == 'getiter') + range_var = range_iter_def.value + range_def = get_definition(func_ir, range_var) + debug_print("range_var = ", range_var, " range_def = ", range_def) + require(isinstance(range_def, ir.Expr) and range_def.op == 'call') + func_var = range_def.func + func_def = get_definition(func_ir, func_var) + debug_print("func_var = ", func_var, " func_def = ", func_def) + require(isinstance(func_def, ir.Global) and + (func_def.value == range or func_def.value == numba.misc.special.prange)) + nargs = len(range_def.args) + swapping = [('"array comprehension"', 'closure of'), range_def.func.loc] + if nargs == 1: + swapped[range_def.func.name] = swapping + stop = get_definition(func_ir, range_def.args[0], lhs_only=True) + return (0, range_def.args[0], func_def) + elif nargs == 2: + swapped[range_def.func.name] = swapping + start = get_definition(func_ir, range_def.args[0], lhs_only=True) + stop = get_definition(func_ir, range_def.args[1], lhs_only=True) + return (start, stop, func_def) + else: + raise GuardException + +def _inline_arraycall(func_ir, cfg, visited, loop, swapped, enable_prange=False, + typed=False): + """Look for array(list) call in the exit block of a given loop, and turn list operations into + array operations in the loop if the following conditions are met: + 1. The exit block contains an array call on the list; + 2. The list variable is no longer live after array call; + 3. The list is created in the loop entry block; + 4. The loop is created from an range iterator whose length is known prior to the loop; + 5. There is only one list_append operation on the list variable in the loop body; + 6. The block that contains list_append dominates the loop head, which ensures list + length is the same as loop length; + If any condition check fails, no modification will be made to the incoming IR. + """ + debug_print = _make_debug_print("inline_arraycall") + # There should only be one loop exit + require(len(loop.exits) == 1) + exit_block = next(iter(loop.exits)) + list_var, array_call_index, array_kws = _find_arraycall(func_ir, func_ir.blocks[exit_block]) + + # check if dtype is present in array call + dtype_def = None + dtype_mod_def = None + if 'dtype' in array_kws: + require(isinstance(array_kws['dtype'], ir.Var)) + # We require that dtype argument to be a constant of getattr Expr, and we'll + # remember its definition for later use. + dtype_def = get_definition(func_ir, array_kws['dtype']) + require(isinstance(dtype_def, ir.Expr) and dtype_def.op == 'getattr') + dtype_mod_def = get_definition(func_ir, dtype_def.value) + + list_var_def = get_definition(func_ir, list_var) + debug_print("list_var = ", list_var, " def = ", list_var_def) + if isinstance(list_var_def, ir.Expr) and list_var_def.op == 'cast': + list_var_def = get_definition(func_ir, list_var_def.value) + # Check if the definition is a build_list + require(isinstance(list_var_def, ir.Expr) and list_var_def.op == 'build_list') + # The build_list must be empty + require(len(list_var_def.items) == 0) + + # Look for list_append in "last" block in loop body, which should be a block that is + # a post-dominator of the loop header. + list_append_stmts = [] + for label in loop.body: + # We have to consider blocks of this loop, but not sub-loops. + # To achieve this, we require the set of "in_loops" of "label" to be visited loops. + in_visited_loops = [l.header in visited for l in cfg.in_loops(label)] + if not all(in_visited_loops): + continue + block = func_ir.blocks[label] + debug_print("check loop body block ", label) + for stmt in block.find_insts(ir.Assign): + lhs = stmt.target + expr = stmt.value + if isinstance(expr, ir.Expr) and expr.op == 'call': + func_def = get_definition(func_ir, expr.func) + if isinstance(func_def, ir.Expr) and func_def.op == 'getattr' \ + and func_def.attr == 'append': + list_def = get_definition(func_ir, func_def.value) + debug_print("list_def = ", list_def, list_def is list_var_def) + if list_def is list_var_def: + # found matching append call + list_append_stmts.append((label, block, stmt)) + + # Require only one list_append, otherwise we won't know the indices + require(len(list_append_stmts) == 1) + append_block_label, append_block, append_stmt = list_append_stmts[0] + + # Check if append_block (besides loop entry) dominates loop header. + # Since CFG doesn't give us this info without loop entry, we approximate + # by checking if the predecessor set of the header block is the same + # as loop_entries plus append_block, which is certainly more restrictive + # than necessary, and can be relaxed if needed. + preds = set(l for l, b in cfg.predecessors(loop.header)) + debug_print("preds = ", preds, (loop.entries | set([append_block_label]))) + require(preds == (loop.entries | set([append_block_label]))) + + # Find iterator in loop header + iter_vars = [] + iter_first_vars = [] + loop_header = func_ir.blocks[loop.header] + for stmt in loop_header.find_insts(ir.Assign): + expr = stmt.value + if isinstance(expr, ir.Expr): + if expr.op == 'iternext': + iter_def = get_definition(func_ir, expr.value) + debug_print("iter_def = ", iter_def) + iter_vars.append(expr.value) + elif expr.op == 'pair_first': + iter_first_vars.append(stmt.target) + + # Require only one iterator in loop header + require(len(iter_vars) == 1 and len(iter_first_vars) == 1) + iter_var = iter_vars[0] # variable that holds the iterator object + iter_first_var = iter_first_vars[0] # variable that holds the value out of iterator + + # Final requirement: only one loop entry, and we're going to modify it by: + # 1. replacing the list definition with an array definition; + # 2. adding a counter for the array iteration. + require(len(loop.entries) == 1) + loop_entry = func_ir.blocks[next(iter(loop.entries))] + terminator = loop_entry.terminator + scope = loop_entry.scope + loc = loop_entry.loc + stmts = [] + removed = [] + def is_removed(val, removed): + if isinstance(val, ir.Var): + for x in removed: + if x.name == val.name: + return True + return False + # Skip list construction and skip terminator, add the rest to stmts + for i in range(len(loop_entry.body) - 1): + stmt = loop_entry.body[i] + if isinstance(stmt, ir.Assign) and (stmt.value is list_def or is_removed(stmt.value, removed)): + removed.append(stmt.target) + else: + stmts.append(stmt) + debug_print("removed variables: ", removed) + + # Define an index_var to index the array. + # If the range happens to be single step ranges like range(n), or range(m, n), + # then the index_var correlates to iterator index; otherwise we'll have to + # define a new counter. + range_def = guard(_find_iter_range, func_ir, iter_var, swapped) + index_var = ir.Var(scope, mk_unique_var("index"), loc) + if range_def and range_def[0] == 0: + # iterator starts with 0, index_var can just be iter_first_var + index_var = iter_first_var + else: + # index_var = -1 # starting the index with -1 since it will incremented in loop header + stmts.append(_new_definition(func_ir, index_var, ir.Const(value=-1, loc=loc), loc)) + + # Insert statement to get the size of the loop iterator + size_var = ir.Var(scope, mk_unique_var("size"), loc) + if range_def: + start, stop, range_func_def = range_def + if start == 0: + size_val = stop + else: + size_val = ir.Expr.binop(fn=operator.sub, lhs=stop, rhs=start, loc=loc) + # we can parallelize this loop if enable_prange = True, by changing + # range function from range, to prange. + if enable_prange and isinstance(range_func_def, ir.Global): + range_func_def.name = 'internal_prange' + range_func_def.value = internal_prange + + else: + # this doesn't work in objmode as it's effectively untyped + if typed: + len_func_var = ir.Var(scope, mk_unique_var("len_func"), loc) + from numba.cpython.rangeobj import length_of_iterator + stmts.append(_new_definition(func_ir, len_func_var, + ir.Global('length_of_iterator', + length_of_iterator, + loc=loc), + loc)) + size_val = ir.Expr.call(len_func_var, (iter_var,), (), loc=loc) + else: + raise GuardException + + + stmts.append(_new_definition(func_ir, size_var, size_val, loc)) + + size_tuple_var = ir.Var(scope, mk_unique_var("size_tuple"), loc) + stmts.append(_new_definition(func_ir, size_tuple_var, + ir.Expr.build_tuple(items=[size_var], loc=loc), loc)) + + # Insert array allocation + array_var = ir.Var(scope, mk_unique_var("array"), loc) + empty_func = ir.Var(scope, mk_unique_var("empty_func"), loc) + if dtype_def and dtype_mod_def: + # when dtype is present, we'll call empty with dtype + dtype_mod_var = ir.Var(scope, mk_unique_var("dtype_mod"), loc) + dtype_var = ir.Var(scope, mk_unique_var("dtype"), loc) + stmts.append(_new_definition(func_ir, dtype_mod_var, dtype_mod_def, loc)) + stmts.append(_new_definition(func_ir, dtype_var, + ir.Expr.getattr(dtype_mod_var, dtype_def.attr, loc), loc)) + stmts.append(_new_definition(func_ir, empty_func, + ir.Global('empty', np.empty, loc=loc), loc)) + array_kws = [('dtype', dtype_var)] + else: + # this doesn't work in objmode as it's effectively untyped + if typed: + # otherwise we'll call unsafe_empty_inferred + stmts.append(_new_definition(func_ir, empty_func, + ir.Global('unsafe_empty_inferred', + unsafe_empty_inferred, loc=loc), loc)) + array_kws = [] + else: + raise GuardException + + # array_var = empty_func(size_tuple_var) + stmts.append(_new_definition(func_ir, array_var, + ir.Expr.call(empty_func, (size_tuple_var,), list(array_kws), loc=loc), loc)) + + # Add back removed just in case they are used by something else + for var in removed: + stmts.append(_new_definition(func_ir, var, array_var, loc)) + + # Add back terminator + stmts.append(terminator) + # Modify loop_entry + loop_entry.body = stmts + + if range_def: + if range_def[0] != 0: + # when range doesn't start from 0, index_var becomes loop index + # (iter_first_var) minus an offset (range_def[0]) + terminator = loop_header.terminator + assert(isinstance(terminator, ir.Branch)) + # find the block in the loop body that header jumps to + block_id = terminator.truebr + blk = func_ir.blocks[block_id] + loc = blk.loc + blk.body.insert(0, _new_definition(func_ir, index_var, + ir.Expr.binop(fn=operator.sub, lhs=iter_first_var, + rhs=range_def[0], loc=loc), + loc)) + else: + # Insert index_var increment to the end of loop header + loc = loop_header.loc + terminator = loop_header.terminator + stmts = loop_header.body[0:-1] + next_index_var = ir.Var(scope, mk_unique_var("next_index"), loc) + one = ir.Var(scope, mk_unique_var("one"), loc) + # one = 1 + stmts.append(_new_definition(func_ir, one, + ir.Const(value=1,loc=loc), loc)) + # next_index_var = index_var + 1 + stmts.append(_new_definition(func_ir, next_index_var, + ir.Expr.binop(fn=operator.add, lhs=index_var, rhs=one, loc=loc), loc)) + # index_var = next_index_var + stmts.append(_new_definition(func_ir, index_var, next_index_var, loc)) + stmts.append(terminator) + loop_header.body = stmts + + # In append_block, change list_append into array assign + for i in range(len(append_block.body)): + if append_block.body[i] is append_stmt: + debug_print("Replace append with SetItem") + append_block.body[i] = ir.SetItem(target=array_var, index=index_var, + value=append_stmt.value.args[0], loc=append_stmt.loc) + + # replace array call, by changing "a = array(b)" to "a = b" + stmt = func_ir.blocks[exit_block].body[array_call_index] + # stmt can be either array call or SetItem, we only replace array call + if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr): + stmt.value = array_var + func_ir._definitions[stmt.target.name] = [stmt.value] + + return True + + +def _find_unsafe_empty_inferred(func_ir, expr): + unsafe_empty_inferred + require(isinstance(expr, ir.Expr) and expr.op == 'call') + callee = expr.func + callee_def = get_definition(func_ir, callee) + require(isinstance(callee_def, ir.Global)) + _make_debug_print("_find_unsafe_empty_inferred")(callee_def.value) + return callee_def.value == unsafe_empty_inferred + + +def _fix_nested_array(func_ir): + """Look for assignment like: a[..] = b, where both a and b are numpy arrays, and + try to eliminate array b by expanding a with an extra dimension. + """ + blocks = func_ir.blocks + cfg = compute_cfg_from_blocks(blocks) + usedefs = compute_use_defs(blocks) + empty_deadmap = dict([(label, set()) for label in blocks.keys()]) + livemap = compute_live_variables(cfg, blocks, usedefs.defmap, empty_deadmap) + + def find_array_def(arr): + """Find numpy array definition such as + arr = numba.unsafe.ndarray.empty_inferred(...). + If it is arr = b[...], find array definition of b recursively. + """ + arr_def = get_definition(func_ir, arr) + _make_debug_print("find_array_def")(arr, arr_def) + if isinstance(arr_def, ir.Expr): + if guard(_find_unsafe_empty_inferred, func_ir, arr_def): + return arr_def + elif arr_def.op == 'getitem': + return find_array_def(arr_def.value) + raise GuardException + + def fix_dependencies(expr, varlist): + """Double check if all variables in varlist are defined before + expr is used. Try to move constant definition when the check fails. + Bails out by raising GuardException if it can't be moved. + """ + debug_print = _make_debug_print("fix_dependencies") + for label, block in blocks.items(): + scope = block.scope + body = block.body + defined = set() + for i in range(len(body)): + inst = body[i] + if isinstance(inst, ir.Assign): + defined.add(inst.target.name) + if inst.value is expr: + new_varlist = [] + for var in varlist: + # var must be defined before this inst, or live + # and not later defined. + if (var.name in defined or + (var.name in livemap[label] and + not (var.name in usedefs.defmap[label]))): + debug_print(var.name, " already defined") + new_varlist.append(var) + else: + debug_print(var.name, " not yet defined") + var_def = get_definition(func_ir, var.name) + if isinstance(var_def, ir.Const): + loc = var.loc + new_var = ir.Var(scope, mk_unique_var("new_var"), loc) + new_const = ir.Const(var_def.value, loc) + new_vardef = _new_definition(func_ir, + new_var, new_const, loc) + new_body = [] + new_body.extend(body[:i]) + new_body.append(new_vardef) + new_body.extend(body[i:]) + block.body = new_body + new_varlist.append(new_var) + else: + raise GuardException + return new_varlist + # when expr is not found in block + raise GuardException + + def fix_array_assign(stmt): + """For assignment like lhs[idx] = rhs, where both lhs and rhs are arrays, do the + following: + 1. find the definition of rhs, which has to be a call to numba.unsafe.ndarray.empty_inferred + 2. find the source array creation for lhs, insert an extra dimension of size of b. + 3. replace the definition of rhs = numba.unsafe.ndarray.empty_inferred(...) with rhs = lhs[idx] + """ + require(isinstance(stmt, ir.SetItem)) + require(isinstance(stmt.value, ir.Var)) + debug_print = _make_debug_print("fix_array_assign") + debug_print("found SetItem: ", stmt) + lhs = stmt.target + # Find the source array creation of lhs + lhs_def = find_array_def(lhs) + debug_print("found lhs_def: ", lhs_def) + rhs_def = get_definition(func_ir, stmt.value) + debug_print("found rhs_def: ", rhs_def) + require(isinstance(rhs_def, ir.Expr)) + if rhs_def.op == 'cast': + rhs_def = get_definition(func_ir, rhs_def.value) + require(isinstance(rhs_def, ir.Expr)) + require(_find_unsafe_empty_inferred(func_ir, rhs_def)) + # Find the array dimension of rhs + dim_def = get_definition(func_ir, rhs_def.args[0]) + require(isinstance(dim_def, ir.Expr) and dim_def.op == 'build_tuple') + debug_print("dim_def = ", dim_def) + extra_dims = [ get_definition(func_ir, x, lhs_only=True) for x in dim_def.items ] + debug_print("extra_dims = ", extra_dims) + # Expand size tuple when creating lhs_def with extra_dims + size_tuple_def = get_definition(func_ir, lhs_def.args[0]) + require(isinstance(size_tuple_def, ir.Expr) and size_tuple_def.op == 'build_tuple') + debug_print("size_tuple_def = ", size_tuple_def) + extra_dims = fix_dependencies(size_tuple_def, extra_dims) + size_tuple_def.items += extra_dims + # In-place modify rhs_def to be getitem + rhs_def.op = 'getitem' + rhs_def.fn = operator.getitem + rhs_def.value = get_definition(func_ir, lhs, lhs_only=True) + rhs_def.index = stmt.index + del rhs_def._kws['func'] + del rhs_def._kws['args'] + del rhs_def._kws['vararg'] + del rhs_def._kws['kws'] + # success + return True + + for label in find_topo_order(func_ir.blocks): + block = func_ir.blocks[label] + for stmt in block.body: + if guard(fix_array_assign, stmt): + block.body.remove(stmt) + +def _new_definition(func_ir, var, value, loc): + func_ir._definitions[var.name] = [value] + return ir.Assign(value=value, target=var, loc=loc) + +@rewrites.register_rewrite('after-inference') +class RewriteArrayOfConsts(rewrites.Rewrite): + '''The RewriteArrayOfConsts class is responsible for finding + 1D array creations from a constant list, and rewriting it into + direct initialization of array elements without creating the list. + ''' + def __init__(self, state, *args, **kws): + self.typingctx = state.typingctx + super(RewriteArrayOfConsts, self).__init__(*args, **kws) + + def match(self, func_ir, block, typemap, calltypes): + if len(calltypes) == 0: + return False + self.crnt_block = block + self.new_body = guard(_inline_const_arraycall, block, func_ir, + self.typingctx, typemap, calltypes) + return self.new_body is not None + + def apply(self): + self.crnt_block.body = self.new_body + return self.crnt_block + + +def _inline_const_arraycall(block, func_ir, context, typemap, calltypes): + """Look for array(list) call where list is a constant list created by build_list, + and turn them into direct array creation and initialization, if the following + conditions are met: + 1. The build_list call immediate precedes the array call; + 2. The list variable is no longer live after array call; + If any condition check fails, no modification will be made. + """ + debug_print = _make_debug_print("inline_const_arraycall") + scope = block.scope + + def inline_array(array_var, expr, stmts, list_vars, dels): + """Check to see if the given "array_var" is created from a list + of constants, and try to inline the list definition as array + initialization. + + Extra statements produced with be appended to "stmts". + """ + callname = guard(find_callname, func_ir, expr) + require(callname and callname[1] == 'numpy' and callname[0] == 'array') + require(expr.args[0].name in list_vars) + ret_type = calltypes[expr].return_type + require(isinstance(ret_type, types.ArrayCompatible) and + ret_type.ndim == 1) + loc = expr.loc + list_var = expr.args[0] + # Get the type of the array to be created. + array_typ = typemap[array_var.name] + debug_print("inline array_var = ", array_var, " list_var = ", list_var) + # Get the element type of the array to be created. + dtype = array_typ.dtype + # Get the sequence of operations to provide values to the new array. + seq, _ = find_build_sequence(func_ir, list_var) + size = len(seq) + # Create a tuple to pass to empty below to specify the new array size. + size_var = ir.Var(scope, mk_unique_var("size"), loc) + size_tuple_var = ir.Var(scope, mk_unique_var("size_tuple"), loc) + size_typ = types.intp + size_tuple_typ = types.UniTuple(size_typ, 1) + typemap[size_var.name] = size_typ + typemap[size_tuple_var.name] = size_tuple_typ + stmts.append(_new_definition(func_ir, size_var, + ir.Const(size, loc=loc), loc)) + stmts.append(_new_definition(func_ir, size_tuple_var, + ir.Expr.build_tuple(items=[size_var], loc=loc), loc)) + + # The general approach is to create an empty array and then fill + # the elements in one-by-one from their specification. + + # Get the numpy type to pass to empty. + nptype = types.DType(dtype) + + # Create a variable to hold the numpy empty function. + empty_func = ir.Var(scope, mk_unique_var("empty_func"), loc) + fnty = get_np_ufunc_typ(np.empty) + sig = context.resolve_function_type(fnty, (size_typ,), {'dtype':nptype}) + + typemap[empty_func.name] = fnty + + stmts.append(_new_definition(func_ir, empty_func, + ir.Global('empty', np.empty, loc=loc), loc)) + + # We pass two arguments to empty, first the size tuple and second + # the dtype of the new array. Here, we created typ_var which is + # the dtype argument of the new array. typ_var in turn is created + # by getattr of the dtype string on the numpy module. + + # Create var for numpy module. + g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc) + typemap[g_np_var.name] = types.misc.Module(np) + g_np = ir.Global('np', np, loc) + stmts.append(_new_definition(func_ir, g_np_var, g_np, loc)) + + # Create var for result of numpy.. + typ_var = ir.Var(scope, mk_unique_var("$np_typ_var"), loc) + typemap[typ_var.name] = nptype + dtype_str = str(dtype) + if dtype_str == 'bool': + dtype_str = 'bool_' + # Get dtype attribute of numpy module. + np_typ_getattr = ir.Expr.getattr(g_np_var, dtype_str, loc) + stmts.append(_new_definition(func_ir, typ_var, np_typ_getattr, loc)) + + # Create the call to numpy.empty passing the size tuple and dtype var. + empty_call = ir.Expr.call(empty_func, [size_var, typ_var], {}, loc=loc) + calltypes[empty_call] = typing.signature(array_typ, size_typ, nptype) + stmts.append(_new_definition(func_ir, array_var, empty_call, loc)) + + # Fill in the new empty array one-by-one. + for i in range(size): + index_var = ir.Var(scope, mk_unique_var("index"), loc) + index_typ = types.intp + typemap[index_var.name] = index_typ + stmts.append(_new_definition(func_ir, index_var, + ir.Const(i, loc), loc)) + setitem = ir.SetItem(array_var, index_var, seq[i], loc) + calltypes[setitem] = typing.signature(types.none, array_typ, + index_typ, dtype) + stmts.append(setitem) + + stmts.extend(dels) + return True + + class State(object): + """ + This class is used to hold the state in the following loop so as to make + it easy to reset the state of the variables tracking the various + statement kinds + """ + + def __init__(self): + # list_vars keep track of the variable created from the latest + # build_list instruction, as well as its synonyms. + self.list_vars = [] + # dead_vars keep track of those in list_vars that are considered dead. + self.dead_vars = [] + # list_items keep track of the elements used in build_list. + self.list_items = [] + self.stmts = [] + # dels keep track of the deletion of list_items, which will need to be + # moved after array initialization. + self.dels = [] + # tracks if a modification has taken place + self.modified = False + + def reset(self): + """ + Resets the internal state of the variables used for tracking + """ + self.list_vars = [] + self.dead_vars = [] + self.list_items = [] + self.dels = [] + + def list_var_used(self, inst): + """ + Returns True if the list being analysed is used between the + build_list and the array call. + """ + return any([x.name in self.list_vars for x in inst.list_vars()]) + + state = State() + + for inst in block.body: + if isinstance(inst, ir.Assign): + if isinstance(inst.value, ir.Var): + if inst.value.name in state.list_vars: + state.list_vars.append(inst.target.name) + state.stmts.append(inst) + continue + elif isinstance(inst.value, ir.Expr): + expr = inst.value + if expr.op == 'build_list': + # new build_list encountered, reset state + state.reset() + state.list_items = [x.name for x in expr.items] + state.list_vars = [inst.target.name] + state.stmts.append(inst) + continue + elif expr.op == 'call' and expr in calltypes: + arr_var = inst.target + if guard(inline_array, inst.target, expr, + state.stmts, state.list_vars, state.dels): + state.modified = True + continue + elif isinstance(inst, ir.Del): + removed_var = inst.value + if removed_var in state.list_items: + state.dels.append(inst) + continue + elif removed_var in state.list_vars: + # one of the list_vars is considered dead. + state.dead_vars.append(removed_var) + state.list_vars.remove(removed_var) + state.stmts.append(inst) + if state.list_vars == []: + # if all list_vars are considered dead, we need to filter + # them out from existing stmts to completely remove + # build_list. + # Note that if a translation didn't take place, dead_vars + # will also be empty when we reach this point. + body = [] + for inst in state.stmts: + if ((isinstance(inst, ir.Assign) and + inst.target.name in state.dead_vars) or + (isinstance(inst, ir.Del) and + inst.value in state.dead_vars)): + continue + body.append(inst) + state.stmts = body + state.dead_vars = [] + state.modified = True + continue + state.stmts.append(inst) + + # If the list is used in any capacity between build_list and array + # call, then we must call off the translation for this list because + # it could be mutated and list_items would no longer be applicable. + if state.list_var_used(inst): + state.reset() + + return state.stmts if state.modified else None diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/interpreter.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/interpreter.py new file mode 100644 index 000000000..71ea49831 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/interpreter.py @@ -0,0 +1,2876 @@ +import builtins +import collections +import dis +import operator +import logging +import textwrap + +from numba.core import errors, dataflow, controlflow, ir, config +from numba.core.errors import NotDefinedError, UnsupportedError, error_extras +from numba.core.ir_utils import get_definition, guard +from numba.core.utils import (PYVERSION, BINOPS_TO_OPERATORS, + INPLACE_BINOPS_TO_OPERATORS,) +from numba.core.byteflow import Flow, AdaptDFA, AdaptCFA +from numba.core.unsafe import eh +from numba.cpython.unsafe.tuple import unpack_single_tuple + + +class _UNKNOWN_VALUE(object): + """Represents an unknown value, this is for ease of debugging purposes only. + """ + + def __init__(self, varname): + self._varname = varname + + def __repr__(self): + return "_UNKNOWN_VALUE({})".format(self._varname) + + +_logger = logging.getLogger(__name__) + + +class Assigner(object): + """ + This object keeps track of potential assignment simplifications + inside a code block. + For example `$O.1 = x` followed by `y = $0.1` can be simplified + into `y = x`, but it's not possible anymore if we have `x = z` + in-between those two instructions. + + NOTE: this is not only an optimization, but is actually necessary + due to certain limitations of Numba - such as only accepting the + returning of an array passed as function argument. + """ + + def __init__(self): + # { destination variable name -> source Var object } + self.dest_to_src = {} + # Basically a reverse mapping of dest_to_src: + # { source variable name -> all destination names in dest_to_src } + self.src_invalidate = collections.defaultdict(list) + self.unused_dests = set() + + def assign(self, srcvar, destvar): + """ + Assign *srcvar* to *destvar*. Return either *srcvar* or a possible + simplified assignment source (earlier assigned to *srcvar*). + """ + srcname = srcvar.name + destname = destvar.name + if destname in self.src_invalidate: + # destvar will change, invalidate all previously known + # simplifications + for d in self.src_invalidate.pop(destname): + self.dest_to_src.pop(d) + if srcname in self.dest_to_src: + srcvar = self.dest_to_src[srcname] + if destvar.is_temp: + self.dest_to_src[destname] = srcvar + self.src_invalidate[srcname].append(destname) + self.unused_dests.add(destname) + return srcvar + + def get_assignment_source(self, destname): + """ + Get a possible assignment source (a ir.Var instance) to replace + *destname*, otherwise None. + """ + if destname in self.dest_to_src: + return self.dest_to_src[destname] + self.unused_dests.discard(destname) + return None + + +def _remove_assignment_definition(old_body, idx, func_ir, already_deleted_defs): + """ + Deletes the definition defined for old_body at index idx + from func_ir. We assume this stmt will be deleted from + new_body. + + In some optimizations we may update the same variable multiple times. + In this situation, we only need to delete a particular definition once, + this is tracked in already_deleted_def, which is a map from + assignment name to the set of values that have already been + deleted. + """ + lhs = old_body[idx].target.name + rhs = old_body[idx].value + if rhs in func_ir._definitions[lhs]: + func_ir._definitions[lhs].remove(rhs) + already_deleted_defs[lhs].add(rhs) + elif rhs not in already_deleted_defs[lhs]: + raise UnsupportedError( + "Inconsistency found in the definitions while executing" + " a peephole optimization. This suggests an internal" + " error or inconsistency elsewhere in the compiler." + ) + + +def _call_function_ex_replace_kws_small( + old_body, + keyword_expr, + new_body, + buildmap_idx, + func_ir, + already_deleted_defs +): + """ + Extracts the kws args passed as varkwarg + for CALL_FUNCTION_EX. This pass is taken when + n_kws <= 15 and the bytecode looks like: + + # Start for each argument + LOAD_FAST # Load each argument. + # End for each argument + ... + BUILD_CONST_KEY_MAP # Build a map + + In the generated IR, the varkwarg refers + to a single build_map that contains all of the + kws. In addition to returning the kws, this + function updates new_body to remove all usage + of the map. + """ + kws = keyword_expr.items.copy() + # kws are required to have constant keys. + # We update these with the value_indexes + value_indexes = keyword_expr.value_indexes + for key, index in value_indexes.items(): + kws[index] = (key, kws[index][1]) + # Remove the build_map by setting the list + # index to None. Nones will be removed later. + new_body[buildmap_idx] = None + # Remove the definition. + _remove_assignment_definition( + old_body, buildmap_idx, func_ir, already_deleted_defs + ) + return kws + + +def _call_function_ex_replace_kws_large( + old_body, + buildmap_name, + buildmap_idx, + search_end, + new_body, + func_ir, + errmsg, + already_deleted_defs +): + """ + Extracts the kws args passed as varkwarg + for CALL_FUNCTION_EX. This pass is taken when + n_kws > 15 and the bytecode looks like: + + BUILD_MAP # Construct the map + # Start for each argument + LOAD_CONST # Load a constant for the name of the argument + LOAD_FAST # Load each argument. + MAP_ADD # Append the (key, value) pair to the map + # End for each argument + + In the IR generated, the initial build map is empty and a series + of setitems are applied afterwards. THE IR looks like: + + $build_map_var = build_map(items=[]) + $constvar = const(str, ...) # create the const key + # CREATE THE ARGUMENT, This may take multiple lines. + $created_arg = ... + $var = getattr( + value=$build_map_var, + attr=__setitem__, + ) + $unused_var = call $var($constvar, $created_arg) + + We iterate through the IR, deleting all usages of the buildmap + from the new_body, and adds the kws to a new kws list. + """ + # Remove the build_map from the body. + new_body[buildmap_idx] = None + # Remove the definition. + _remove_assignment_definition( + old_body, buildmap_idx, func_ir, already_deleted_defs + ) + kws = [] + search_start = buildmap_idx + 1 + while search_start <= search_end: + # The first value must be a constant. + const_stmt = old_body[search_start] + if not ( + isinstance(const_stmt, ir.Assign) + and isinstance(const_stmt.value, ir.Const) + ): + # We cannot handle this format so raise the + # original error message. + raise UnsupportedError(errmsg) + key_var_name = const_stmt.target.name + key_val = const_stmt.value.value + search_start += 1 + # Now we need to search for a getattr with setitem + found_getattr = False + while ( + search_start <= search_end + and not found_getattr + ): + getattr_stmt = old_body[search_start] + if ( + isinstance(getattr_stmt, ir.Assign) + and isinstance(getattr_stmt.value, ir.Expr) + and getattr_stmt.value.op == "getattr" + and ( + getattr_stmt.value.value.name + == buildmap_name + ) + and getattr_stmt.value.attr == "__setitem__" + ): + found_getattr = True + else: + # If the argument is "created" in JIT, then there + # will be intermediate operations in between setitems. + # For example we have arg5=pow(arg5, 2), + # then the IR would look like: + # + # # Creation of the constant key. + # $const44.26 = const(str, arg5) + # + # # Argument creation. This is the section we are skipping + # $46load_global.27 = global(pow: ) + # $const50.29 = const(int, 2) + # $call.30 = call $46load_global.27(arg5, $const50.29) + # + # # Setitem with arg5 + # $54map_add.31 = getattr(value=$map.2, attr=__setitem__) + # $54map_add.32 = call $54map_add.31($const44.26, $call.30) + search_start += 1 + if ( + not found_getattr + or search_start == search_end + ): + # We cannot handle this format so raise the + # original error message. + raise UnsupportedError(errmsg) + setitem_stmt = old_body[search_start + 1] + if not ( + isinstance(setitem_stmt, ir.Assign) + and isinstance(setitem_stmt.value, ir.Expr) + and setitem_stmt.value.op == "call" + and ( + setitem_stmt.value.func.name + == getattr_stmt.target.name + ) + and len(setitem_stmt.value.args) == 2 + and ( + setitem_stmt.value.args[0].name + == key_var_name + ) + ): + # A call statement should always immediately follow the + # getattr. If for some reason this doesn't match the code + # format, we raise the original error message. This check + # is meant as a precaution. + raise UnsupportedError(errmsg) + arg_var = setitem_stmt.value.args[1] + # Append the (key, value) pair. + kws.append((key_val, arg_var)) + # Remove the __setitem__ getattr and call + new_body[search_start] = None + new_body[search_start + 1] = None + # Remove the definitions. + _remove_assignment_definition( + old_body, search_start, func_ir, already_deleted_defs + ) + _remove_assignment_definition( + old_body, search_start + 1, func_ir, already_deleted_defs + ) + search_start += 2 + return kws + + +def _call_function_ex_replace_args_small( + old_body, + tuple_expr, + new_body, + buildtuple_idx, + func_ir, + already_deleted_defs +): + """ + Extracts the args passed as vararg + for CALL_FUNCTION_EX. This pass is taken when + n_args <= 30 and the bytecode looks like: + + # Start for each argument + LOAD_FAST # Load each argument. + # End for each argument + ... + BUILD_TUPLE # Create a tuple of the arguments + + In the IR generated, the vararg refer + to a single build_tuple that contains all of the + args. In addition to returning the args, this + function updates new_body to remove all usage + of the tuple. + """ + # Delete the build tuple + new_body[buildtuple_idx] = None + # Remove the definition. + _remove_assignment_definition( + old_body, buildtuple_idx, func_ir, already_deleted_defs + ) + # Return the args. + return tuple_expr.items + + +def _call_function_ex_replace_args_large( + old_body, + vararg_stmt, + new_body, + search_end, + func_ir, + errmsg, + already_deleted_defs +): + """ + Extracts the args passed as vararg + for CALL_FUNCTION_EX. This pass is taken when + n_args > 30 and the bytecode looks like: + + BUILD_TUPLE # Create a list to append to + # Start for each argument + LOAD_FAST # Load each argument. + LIST_APPEND # Add the argument to the list + # End for each argument + ... + LIST_TO_TUPLE # Convert the args to a tuple. + + In the IR generated, the tuple is created by concatenating + together several 1 element tuples to an initial empty tuple. + We traverse backwards in the IR, collecting args, until we + find the original empty tuple. For example, the IR might + look like: + + $orig_tuple = build_tuple(items=[]) + $first_var = build_tuple(items=[Var(arg0, test.py:6)]) + $next_tuple = $orig_tuple + $first_var + ... + $final_var = build_tuple(items=[Var(argn, test.py:6)]) + $final_tuple = $prev_tuple + $final_var + $varargs_var = $final_tuple + """ + # We traverse to the front of the block to look for the original + # tuple. + search_start = 0 + total_args = [] + if ( + isinstance(vararg_stmt, ir.Assign) + and isinstance(vararg_stmt.value, ir.Var) + ): + target_name = vararg_stmt.value.name + # If there is an initial assignment, delete it + new_body[search_end] = None + # Remove the definition. + _remove_assignment_definition( + old_body, search_end, func_ir, already_deleted_defs + ) + search_end -= 1 + else: + # There must always be an initial assignment + # https://github.com/numba/numba/blob/59fa2e335be68148b3bd72a29de3ff011430038d/numba/core/interpreter.py#L259-L260 + # If this changes we may need to support this branch. + raise AssertionError("unreachable") + # Traverse backwards to find all concatenations + # until eventually reaching the original empty tuple. + while search_end >= search_start: + concat_stmt = old_body[search_end] + if ( + isinstance(concat_stmt, ir.Assign) + and concat_stmt.target.name == target_name + and isinstance(concat_stmt.value, ir.Expr) + and concat_stmt.value.op == "build_tuple" + and not concat_stmt.value.items + ): + new_body[search_end] = None + # Remove the definition. + _remove_assignment_definition( + old_body, search_end, func_ir, already_deleted_defs + ) + # If we have reached the build_tuple we exit. + break + else: + # We expect to find another arg to append. + # The first stmt must be a binop "add" + if (search_end == search_start) or not ( + isinstance(concat_stmt, ir.Assign) + and ( + concat_stmt.target.name + == target_name + ) + and isinstance( + concat_stmt.value, ir.Expr + ) + and concat_stmt.value.op == "binop" + and concat_stmt.value.fn == operator.add + ): + # We cannot handle this format. + raise UnsupportedError(errmsg) + lhs_name = concat_stmt.value.lhs.name + rhs_name = concat_stmt.value.rhs.name + # The previous statement should be a + # build_tuple containing the arg. + arg_tuple_stmt = old_body[search_end - 1] + if not ( + isinstance(arg_tuple_stmt, ir.Assign) + and isinstance( + arg_tuple_stmt.value, ir.Expr + ) + and ( + arg_tuple_stmt.value.op + == "build_tuple" + ) + and len(arg_tuple_stmt.value.items) == 1 + ): + # We cannot handle this format. + raise UnsupportedError(errmsg) + if arg_tuple_stmt.target.name == lhs_name: + # The tuple should always be generated on the RHS. + raise AssertionError("unreachable") + elif arg_tuple_stmt.target.name == rhs_name: + target_name = lhs_name + else: + # We cannot handle this format. + raise UnsupportedError(errmsg) + total_args.append( + arg_tuple_stmt.value.items[0] + ) + new_body[search_end] = None + new_body[search_end - 1] = None + # Remove the definitions. + _remove_assignment_definition( + old_body, search_end, func_ir, already_deleted_defs + ) + _remove_assignment_definition( + old_body, search_end - 1, func_ir, already_deleted_defs + ) + search_end -= 2 + # Avoid any space between appends + keep_looking = True + while search_end >= search_start and keep_looking: + next_stmt = old_body[search_end] + if ( + isinstance(next_stmt, ir.Assign) + and ( + next_stmt.target.name + == target_name + ) + ): + keep_looking = False + else: + # If the argument is "created" in JIT, then there + # will be intermediate operations in between appends. + # For example if the next arg after arg4 is pow(arg5, 2), + # then the IR would look like: + # + # # Appending arg4 + # $arg4_tup = build_tuple(items=[arg4]) + # $append_var.5 = $append_var.4 + $arg4_tup + # + # # Creation of arg5. + # # This is the section that we are skipping. + # $32load_global.20 = global(pow: ) + # $const36.22 = const(int, 2) + # $call.23 = call $32load_global.20(arg5, $const36.22) + # + # # Appending arg5 + # $arg5_tup = build_tuple(items=[$call.23]) + # $append_var.6 = $append_var.5 + $arg5_tup + search_end -= 1 + if search_end == search_start: + # If we reached the start we never found the build_tuple. + # We cannot handle this format so raise the + # original error message. + raise UnsupportedError(errmsg) + # Reverse the arguments so we get the correct order. + return total_args[::-1] + + +def peep_hole_call_function_ex_to_call_function_kw(func_ir): + """ + This peephole rewrites a bytecode sequence unique to Python 3.10 + where CALL_FUNCTION_EX is used instead of CALL_FUNCTION_KW because of + stack limitations set by CPython. This limitation is imposed whenever + a function call has too many arguments or keyword arguments. + + https://github.com/python/cpython/blob/a58ebcc701dd6c43630df941481475ff0f615a81/Python/compile.c#L55 + https://github.com/python/cpython/blob/a58ebcc701dd6c43630df941481475ff0f615a81/Python/compile.c#L4442 + + In particular, this change is imposed whenever (n_args / 2) + n_kws > 15. + + Different bytecode is generated for args depending on if n_args > 30 + or n_args <= 30 and similarly if n_kws > 15 or n_kws <= 15. + + This function unwraps the *args and **kwargs in the function call + and places these values directly into the args and kwargs of the call. + """ + # All changes are local to the a single block + # so it can be traversed in any order. + errmsg = textwrap.dedent(""" + CALL_FUNCTION_EX with **kwargs not supported. + If you are not using **kwargs this may indicate that + you have a large number of kwargs and are using inlined control + flow. You can resolve this issue by moving the control flow out of + the function call. For example, if you have + + f(a=1 if flag else 0, ...) + + Replace that with: + + a_val = 1 if flag else 0 + f(a=a_val, ...)""") + + # Track which definitions have already been deleted + already_deleted_defs = collections.defaultdict(set) + for blk in func_ir.blocks.values(): + blk_changed = False + new_body = [] + for i, stmt in enumerate(blk.body): + if ( + isinstance(stmt, ir.Assign) + and isinstance(stmt.value, ir.Expr) + and stmt.value.op == "call" + and stmt.value.varkwarg is not None + ): + blk_changed = True + call = stmt.value + args = call.args + kws = call.kws + # We need to check the call expression contents if + # it contains either vararg or varkwarg. If it contains + # varkwarg we need to update the IR. If it just contains + # vararg we don't need to update the IR, but we need to + # check if peep_hole_list_to_tuple failed to replace the + # vararg list with a tuple. If so, we output an error + # message with suggested code changes. + vararg = call.vararg + varkwarg = call.varkwarg + start_search = i - 1 + # varkwarg should be defined second so we start there. + varkwarg_loc = start_search + keyword_def = None + found = False + while varkwarg_loc >= 0 and not found: + keyword_def = blk.body[varkwarg_loc] + if ( + isinstance(keyword_def, ir.Assign) + and keyword_def.target.name == varkwarg.name + ): + found = True + else: + varkwarg_loc -= 1 + if ( + kws + or not found + or not ( + isinstance(keyword_def.value, ir.Expr) + and keyword_def.value.op == "build_map" + ) + ): + # If we couldn't find where the kwargs are created + # then it should be a normal **kwargs call + # so we produce an unsupported message. + raise UnsupportedError(errmsg) + # Determine the kws + if keyword_def.value.items: + # n_kws <= 15 case. + # Here the IR looks like a series of + # constants, then the arguments and finally + # a build_map that contains all of the pairs. + # For Example: + # + # $const_n = const("arg_name") + # $arg_n = ... + # $kwargs_var = build_map(items=[ + # ($const_0, $arg_0), + # ..., + # ($const_n, $arg_n),]) + kws = _call_function_ex_replace_kws_small( + blk.body, + keyword_def.value, + new_body, + varkwarg_loc, + func_ir, + already_deleted_defs, + ) + else: + # n_kws > 15 case. + # Here the IR is an initial empty build_map + # followed by a series of setitems with a constant + # key and then the argument. + # For example: + # + # $kwargs_var = build_map(items=[]) + # $const_0 = const("arg_name") + # $arg_0 = ... + # $my_attr = getattr(const_0, attr=__setitem__) + # $unused_var = call $my_attr($const_0, $arg_0) + # ... + kws = _call_function_ex_replace_kws_large( + blk.body, + varkwarg.name, + varkwarg_loc, + i - 1, + new_body, + func_ir, + errmsg, + already_deleted_defs, + ) + start_search = varkwarg_loc + # Vararg isn't required to be provided. + if vararg is not None: + if args: + # If we have vararg then args is expected to + # be an empty list. + raise UnsupportedError(errmsg) + vararg_loc = start_search + args_def = None + found = False + while vararg_loc >= 0 and not found: + args_def = blk.body[vararg_loc] + if ( + isinstance(args_def, ir.Assign) + and args_def.target.name == vararg.name + ): + found = True + else: + vararg_loc -= 1 + if not found: + # If we couldn't find where the args are created + # then we can't handle this format. + raise UnsupportedError(errmsg) + if ( + isinstance(args_def.value, ir.Expr) + and args_def.value.op == "build_tuple" + ): + # n_args <= 30 case. + # Here the IR is a simple build_tuple containing + # all of the args. + # For example: + # + # $arg_n = ... + # $varargs = build_tuple( + # items=[$arg_0, ..., $arg_n] + # ) + args = _call_function_ex_replace_args_small( + blk.body, + args_def.value, + new_body, + vararg_loc, + func_ir, + already_deleted_defs, + ) + elif ( + isinstance(args_def.value, ir.Expr) + and args_def.value.op == "list_to_tuple" + ): + # If there is a call with vararg we need to check + # if the list -> tuple conversion failed and if so + # throw an error. + raise UnsupportedError(errmsg) + else: + # Here the IR is an initial empty build_tuple. + # Then for each arg, a new tuple with a single + # element is created and one by one these are + # added to a growing tuple. + # For example: + # + # $combo_tup_0 = build_tuple(items=[]) + # $arg0 = ... + # $arg0_tup = build_tuple(items=[$arg0]) + # $combo_tup_1 = $combo_tup_0 + $arg0_tup + # $arg1 = ... + # $arg1_tup = build_tuple(items=[$arg1]) + # $combo_tup_2 = $combo_tup_1 + $arg1_tup + # ... + # $combo_tup_n = $combo_tup_{n-1} + $argn_tup + # + # In addition, the IR contains a final + # assignment for the varargs that looks like: + # + # $varargs_var = $combo_tup_n + # + # Here args_def is expected to be a simple assignment. + args = _call_function_ex_replace_args_large( + blk.body, + args_def, + new_body, + vararg_loc, + func_ir, + errmsg, + already_deleted_defs, + ) + # Create a new call updating the args and kws + new_call = ir.Expr.call( + call.func, args, kws, call.loc, target=call.target + ) + # Drop the existing definition for this stmt. + _remove_assignment_definition( + blk.body, i, func_ir, already_deleted_defs + ) + # Update the statement + stmt = ir.Assign(new_call, stmt.target, stmt.loc) + # Update the definition + func_ir._definitions[stmt.target.name].append(new_call) + elif ( + isinstance(stmt, ir.Assign) + and isinstance(stmt.value, ir.Expr) + and stmt.value.op == "call" + and stmt.value.vararg is not None + ): + # If there is a call with vararg we need to check + # if the list -> tuple conversion failed and if so + # throw an error. + call = stmt.value + vararg_name = call.vararg.name + if ( + vararg_name in func_ir._definitions + and len(func_ir._definitions[vararg_name]) == 1 + ): + # If this value is still a list to tuple raise the + # exception. + expr = func_ir._definitions[vararg_name][0] + if isinstance(expr, ir.Expr) and expr.op == "list_to_tuple": + raise UnsupportedError(errmsg) + + new_body.append(stmt) + # Replace the block body if we changed the IR + if blk_changed: + blk.body.clear() + blk.body.extend([x for x in new_body if x is not None]) + return func_ir + + +def peep_hole_list_to_tuple(func_ir): + """ + This peephole rewrites a bytecode sequence new to Python 3.9 that looks + like e.g.: + + def foo(a): + return (*a,) + + 41 0 BUILD_LIST 0 + 2 LOAD_FAST 0 (a) + 4 LIST_EXTEND 1 + 6 LIST_TO_TUPLE + 8 RETURN_VAL + + essentially, the unpacking of tuples is written as a list which is appended + to/extended and then "magicked" into a tuple by the new LIST_TO_TUPLE + opcode. + + This peephole repeatedly analyses the bytecode in a block looking for a + window between a `LIST_TO_TUPLE` and `BUILD_LIST` and... + + 1. Turns the BUILD_LIST into a BUILD_TUPLE + 2. Sets an accumulator's initial value as the target of the BUILD_TUPLE + 3. Searches for 'extend' on the original list and turns these into binary + additions on the accumulator. + 4. Searches for 'append' on the original list and turns these into a + `BUILD_TUPLE` which is then appended via binary addition to the + accumulator. + 5. Assigns the accumulator to the variable that exits the peephole and the + rest of the block/code refers to as the result of the unpack operation. + 6. Patches up + """ + _DEBUG = False + + # For all blocks + for offset, blk in func_ir.blocks.items(): + # keep doing the peephole rewrite until nothing is left that matches + while True: + # first try and find a matching region + # i.e. BUILD_LIST......LIST_TO_TUPLE + def find_postive_region(): + found = False + for idx in reversed(range(len(blk.body))): + stmt = blk.body[idx] + if isinstance(stmt, ir.Assign): + value = stmt.value + if (isinstance(value, ir.Expr) and + value.op == 'list_to_tuple'): + target_list = value.info[0] + found = True + bt = (idx, stmt) + if found: + if isinstance(stmt, ir.Assign): + if stmt.target.name == target_list: + region = (bt, (idx, stmt)) + return region + + region = find_postive_region() + # if there's a peep hole region then do something with it + if region is not None: + peep_hole = blk.body[region[1][0] : region[0][0]] + if _DEBUG: + print("\nWINDOW:") + for x in peep_hole: + print(x) + print("") + + appends = [] + extends = [] + init = region[1][1] + const_list = init.target.name + # Walk through the peep_hole and find things that are being + # "extend"ed and "append"ed to the BUILD_LIST + for x in peep_hole: + if isinstance(x, ir.Assign): + if isinstance(x.value, ir.Expr): + expr = x.value + if (expr.op == 'getattr' and + expr.value.name == const_list): + # it's not strictly necessary to split out + # extends and appends, but it helps with + # debugging to do so! + if expr.attr == 'extend': + extends.append(x.target.name) + elif expr.attr == 'append': + appends.append(x.target.name) + else: + assert 0 + # go back through the peep hole build new IR based on it. + new_hole = [] + + def append_and_fix(x): + """ Adds to the new_hole and fixes up definitions""" + new_hole.append(x) + if x.target.name in func_ir._definitions: + # if there's already a definition, drop it, should only + # be 1 as the way cpython emits the sequence for + # `list_to_tuple` should ensure this. + assert len(func_ir._definitions[x.target.name]) == 1 + func_ir._definitions[x.target.name].clear() + func_ir._definitions[x.target.name].append(x.value) + + the_build_list = init.target + + # Do the transform on the peep hole + if _DEBUG: + print("\nBLOCK:") + blk.dump() + + # This section basically accumulates list appends and extends + # as binop(+) on tuples, it drops all the getattr() for extend + # and append as they are now dead and replaced with binop(+). + # It also switches out the build_list for a build_tuple and then + # ensures everything is wired up and defined ok. + t2l_agn = region[0][1] + acc = the_build_list + for x in peep_hole: + if isinstance(x, ir.Assign): + if isinstance(x.value, ir.Expr): + expr = x.value + if expr.op == 'getattr': + if (x.target.name in extends or + x.target.name in appends): + # drop definition, it's being wholesale + # replaced. + func_ir._definitions.pop(x.target.name) + continue + else: + # a getattr on something we're not + # interested in + new_hole.append(x) + elif expr.op == 'call': + fname = expr.func.name + if fname in extends or fname in appends: + arg = expr.args[0] + if isinstance(arg, ir.Var): + tmp_name = "%s_var_%s" % (fname, + arg.name) + if fname in appends: + bt = ir.Expr.build_tuple([arg,], + expr.loc) + else: + # Extend as tuple + gv_tuple = ir.Global( + name="tuple", value=tuple, + loc=expr.loc, + ) + tuple_var = arg.scope.redefine( + "$_list_extend_gv_tuple", + loc=expr.loc, + ) + new_hole.append( + ir.Assign( + target=tuple_var, + value=gv_tuple, + loc=expr.loc, + ), + ) + bt = ir.Expr.call( + tuple_var, (arg,), (), + loc=expr.loc, + ) + var = ir.Var(arg.scope, tmp_name, + expr.loc) + asgn = ir.Assign(bt, var, expr.loc) + append_and_fix(asgn) + arg = var + + # this needs to be a binary add + new = ir.Expr.binop(fn=operator.add, + lhs=acc, + rhs=arg, + loc=x.loc) + asgn = ir.Assign(new, x.target, expr.loc) + append_and_fix(asgn) + acc = asgn.target + else: + # there could be a call in the unpack, like + # *(a, x.append(y)) + new_hole.append(x) + elif (expr.op == 'build_list' and + x.target.name == const_list): + new = ir.Expr.build_tuple(expr.items, expr.loc) + asgn = ir.Assign(new, x.target, expr.loc) + # Not a temporary any more + append_and_fix(asgn) + else: + new_hole.append(x) + else: + new_hole.append(x) + + else: + # stick everything else in as-is + new_hole.append(x) + # Finally write the result back into the original build list as + # everything refers to it. + append_and_fix(ir.Assign(acc, t2l_agn.target, + the_build_list.loc)) + if _DEBUG: + print("\nNEW HOLE:") + for x in new_hole: + print(x) + + # and then update the block body with the modified region + cpy = blk.body[:] + head = cpy[:region[1][0]] + tail = blk.body[region[0][0] + 1:] + tmp = head + new_hole + tail + blk.body.clear() + blk.body.extend(tmp) + + if _DEBUG: + print("\nDUMP post hole:") + blk.dump() + + else: + # else escape + break + + return func_ir + + +def peep_hole_delete_with_exit(func_ir): + """ + This rewrite removes variables used to store the `__exit__` function + loaded by SETUP_WITH. + """ + dead_vars = set() + + for blk in func_ir.blocks.values(): + for stmt in blk.body: + # Any statement that uses a variable with the '$setup_with_exitfn' + # prefix is considered dead. + used = set(stmt.list_vars()) + for v in used: + if v.name.startswith('$setup_with_exitfn'): + dead_vars.add(v) + # Any assignment that uses any of the dead variable is considered + # dead. + if used & dead_vars: + if isinstance(stmt, ir.Assign): + dead_vars.add(stmt.target) + + new_body = [] + for stmt in blk.body: + # Skip any statements that uses anyone of the dead variable. + if not (set(stmt.list_vars()) & dead_vars): + new_body.append(stmt) + blk.body.clear() + blk.body.extend(new_body) + + return func_ir + + +def peep_hole_fuse_dict_add_updates(func_ir): + """ + This rewrite removes d1._update_from_bytecode(d2) + calls that are between two dictionaries, d1 and d2, + in the same basic block. This pattern can appear as a + result of Python 3.10 bytecode emission changes, which + prevent large constant literal dictionaries + (> 15 elements) from being constant. If both dictionaries + are constant dictionaries defined in the same block and + neither is used between the update call, then we replace d1 + with a new definition that combines the two dictionaries. At + the bytecode translation stage we convert DICT_UPDATE into + _update_from_bytecode, so we know that _update_from_bytecode + always comes from the bytecode change and not user code. + + Python 3.10 may also rewrite the individual dictionaries + as an empty build_map + many map_add. Here we again look + for an _update_from_bytecode, and if so we replace these + with a single constant dictionary. + + When running this algorithm we can always safely remove d2. + + This is the relevant section of the CPython 3.10 that causes + this bytecode change: + https://github.com/python/cpython/blob/3.10/Python/compile.c#L4048 + """ + + # This algorithm fuses build_map expressions into the largest + # possible build map before use. For example, if we have an + # IR that looks like this: + # + # $d1 = build_map([]) + # $key = const("a") + # $value = const(2) + # $setitem_func = getattr($d1, "__setitem__") + # $unused1 = call (setitem_func, ($key, $value)) + # $key2 = const("b") + # $value2 = const(3) + # $d2 = build_map([($key2, $value2)]) + # $update_func = getattr($d1, "_update_from_bytecode") + # $unused2 = call ($update_func, ($d2,)) + # $othervar = None + # $retvar = cast($othervar) + # return $retvar + # + # Then the IR is rewritten such that any __setitem__ and + # _update_from_bytecode operations are fused into the original buildmap. + # The new buildmap is then added to the + # last location where it had previously had encountered a __setitem__, + # _update_from_bytecode, or build_map before any other uses. + # The new IR would look like: + # + # $key = const("a") + # $value = const(2) + # $key2 = const("b") + # $value2 = const(3) + # $d1 = build_map([($key, $value), ($key2, $value2)]) + # $othervar = None + # $retvar = cast($othervar) + # return $retvar + # + # Note that we don't push $d1 to the bottom of the block. This is because + # some values may be found below this block (e.g pop_block) that are pattern + # matched in other locations, such as objmode handling. It should be safe to + # move a map to the last location at which there was _update_from_bytecode. + + errmsg = textwrap.dedent(""" + A DICT_UPDATE op-code was encountered that could not be replaced. + If you have created a large constant dictionary, this may + be an an indication that you are using inlined control + flow. You can resolve this issue by moving the control flow out of + the dicitonary constructor. For example, if you have + + d = {a: 1 if flag else 0, ...) + + Replace that with: + + a_val = 1 if flag else 0 + d = {a: a_val, ...)""") + + already_deleted_defs = collections.defaultdict(set) + for blk in func_ir.blocks.values(): + new_body = [] + # literal map var name -> block idx of the original build_map + lit_map_def_idx = {} + # literal map var name -> list(map_uses) + # This is the index of every build_map or __setitem__ + # in the IR that will need to be removed if the map + # is updated. + lit_map_use_idx = collections.defaultdict(list) + # literal map var name -> list of key/value items for build map + map_updates = {} + blk_changed = False + + for i, stmt in enumerate(blk.body): + # What instruction should we append + new_inst = stmt + # Name that should be skipped when tracking used + # vars in statement. This is always the lhs with + # a build_map. + stmt_build_map_out = None + if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr): + if stmt.value.op == "build_map": + # Skip the output build_map when looking for used vars. + stmt_build_map_out = stmt.target.name + # If we encounter a build map add it to the + # tracked maps. + lit_map_def_idx[stmt.target.name] = i + lit_map_use_idx[stmt.target.name].append(i) + map_updates[stmt.target.name] = stmt.value.items.copy() + elif stmt.value.op == "call" and i > 0: + # If we encounter a call we may need to replace + # the body + func_name = stmt.value.func.name + # If we have an update or a setitem + # it will be the previous expression. + getattr_stmt = blk.body[i - 1] + args = stmt.value.args + if ( + isinstance(getattr_stmt, ir.Assign) + and getattr_stmt.target.name == func_name + and isinstance(getattr_stmt.value, ir.Expr) + and getattr_stmt.value.op == "getattr" + and getattr_stmt.value.attr in ( + "__setitem__", "_update_from_bytecode" + ) + ): + update_map_name = getattr_stmt.value.value.name + attr = getattr_stmt.value.attr + if (attr == "__setitem__" + and update_map_name in lit_map_use_idx): + # If we have a setitem, update the lists + map_updates[update_map_name].append(args) + # Update the list of instructions that would + # need to be removed to include the setitem + # and the the getattr + lit_map_use_idx[update_map_name].extend([i - 1, i]) + elif attr == "_update_from_bytecode": + d2_map_name = args[0].name + if (update_map_name in lit_map_use_idx + and d2_map_name in lit_map_use_idx): + # If we have an update and the arg is also + # a literal dictionary, fuse the lists. + map_updates[update_map_name].extend( + map_updates[d2_map_name] + ) + # Delete the old IR for d1 and d2 + lit_map_use_idx[update_map_name].extend( + lit_map_use_idx[d2_map_name] + ) + lit_map_use_idx[update_map_name].append(i - 1) + for linenum in lit_map_use_idx[update_map_name]: + # Drop the existing definition. + _remove_assignment_definition( + blk.body, + linenum, + func_ir, + already_deleted_defs, + ) + # Delete it from the new block + new_body[linenum] = None + # Delete the maps from dicts + del lit_map_def_idx[d2_map_name] + del lit_map_use_idx[d2_map_name] + del map_updates[d2_map_name] + # Add d1 as the new instruction, removing the + # old definition. + _remove_assignment_definition( + blk.body, i, func_ir, already_deleted_defs + ) + new_inst = _build_new_build_map( + func_ir, + update_map_name, + blk.body, + lit_map_def_idx[update_map_name], + map_updates[update_map_name], + ) + # Update d1 in lit_map_use_idx to just the new + # definition and clear the previous list. + lit_map_use_idx[update_map_name].clear() + lit_map_use_idx[update_map_name].append(i) + # Mark that this block has been modified + blk_changed = True + else: + # If we cannot remove _update_from_bytecode + # Then raise an error for the user. + raise UnsupportedError(errmsg) + + # Check if we need to drop any maps from being tracked. + # Skip the setitem/_update_from_bytecode getattr that + # will be removed when handling their call in the next + # iteration. + if not ( + isinstance(stmt, ir.Assign) + and isinstance(stmt.value, ir.Expr) + and stmt.value.op == "getattr" + and stmt.value.value.name in lit_map_use_idx + and stmt.value.attr in ("__setitem__", "_update_from_bytecode") + ): + for var in stmt.list_vars(): + # If a map is used it cannot be fused later in + # the block. As a result we delete it from + # the dicitonaries + if ( + var.name in lit_map_use_idx + and var.name != stmt_build_map_out + ): + del lit_map_def_idx[var.name] + del lit_map_use_idx[var.name] + del map_updates[var.name] + + # Append the instruction to the new block + new_body.append(new_inst) + + if blk_changed: + # If the block is changed replace the block body. + blk.body.clear() + blk.body.extend([x for x in new_body if x is not None]) + + return func_ir + + +def _build_new_build_map(func_ir, name, old_body, old_lineno, new_items): + """ + Create a new build_map with a new set of key/value items + but all the other info the same. + """ + old_assign = old_body[old_lineno] + old_target = old_assign.target + old_bm = old_assign.value + # Build the literals + literal_keys = [] + # Track the constant key/values to set the literal_value + # field of build_map properly + values = [] + for pair in new_items: + k, v = pair + key_def = guard(get_definition, func_ir, k) + if isinstance(key_def, (ir.Const, ir.Global, ir.FreeVar)): + literal_keys.append(key_def.value) + value_def = guard(get_definition, func_ir, v) + if isinstance(value_def, (ir.Const, ir.Global, ir.FreeVar)): + values.append(value_def.value) + else: + # Append unknown value if not a literal. + values.append(_UNKNOWN_VALUE(v.name)) + + value_indexes = {} + if len(literal_keys) == len(new_items): + # All keys must be literals to have any literal values. + literal_value = {x: y for x, y in zip(literal_keys, values)} + for i, k in enumerate(literal_keys): + value_indexes[k] = i + else: + literal_value = None + + # Construct a new build map. + new_bm = ir.Expr.build_map( + items=new_items, + size=len(new_items), + literal_value=literal_value, + value_indexes=value_indexes, + loc=old_bm.loc, + ) + + # The previous definition has already been removed + # when updating the IR in peep_hole_fuse_dict_add_updates + func_ir._definitions[name].append(new_bm) + + # Return a new assign. + return ir.Assign( + new_bm, ir.Var(old_target.scope, name, old_target.loc), new_bm.loc + ) + + +class Interpreter(object): + """A bytecode interpreter that builds up the IR. + """ + + def __init__(self, func_id): + self.func_id = func_id + self.arg_count = func_id.arg_count + self.arg_names = func_id.arg_names + self.loc = self.first_loc = ir.Loc.from_function_id(func_id) + self.is_generator = func_id.is_generator + + # { inst offset : ir.Block } + self.blocks = {} + # { name: [definitions] } of local variables + self.definitions = collections.defaultdict(list) + # A set to keep track of all exception variables. + # To be used in _legalize_exception_vars() + self._exception_vars = set() + + def interpret(self, bytecode): + """ + Generate IR for this bytecode. + """ + self.bytecode = bytecode + + self.scopes = [] + global_scope = ir.Scope(parent=None, loc=self.loc) + self.scopes.append(global_scope) + + if PYVERSION < (3, 7): + # Control flow analysis + self.cfa = controlflow.ControlFlowAnalysis(bytecode) + self.cfa.run() + if config.DUMP_CFG: + self.cfa.dump() + + # Data flow analysis + self.dfa = dataflow.DataFlowAnalysis(self.cfa) + self.dfa.run() + else: + flow = Flow(bytecode) + flow.run() + self.dfa = AdaptDFA(flow) + self.cfa = AdaptCFA(flow) + if config.DUMP_CFG: + self.cfa.dump() + + # Temp states during interpretation + self.current_block = None + self.current_block_offset = None + self.syntax_blocks = [] + self.dfainfo = None + + self.scopes.append(ir.Scope(parent=self.current_scope, loc=self.loc)) + # Interpret loop + for inst, kws in self._iter_inst(): + self._dispatch(inst, kws) + self._legalize_exception_vars() + # Prepare FunctionIR + func_ir = ir.FunctionIR(self.blocks, self.is_generator, self.func_id, + self.first_loc, self.definitions, + self.arg_count, self.arg_names) + _logger.debug(func_ir.dump_to_string()) + + # post process the IR to rewrite opcodes/byte sequences that are too + # involved to risk handling as part of direct interpretation + peepholes = [] + if PYVERSION in [(3, 9), (3, 10)]: + peepholes.append(peep_hole_list_to_tuple) + peepholes.append(peep_hole_delete_with_exit) + if PYVERSION == (3, 10): + # peep_hole_call_function_ex_to_call_function_kw + # depends on peep_hole_list_to_tuple converting + # any large number of arguments from a list to a + # tuple. + peepholes.append(peep_hole_call_function_ex_to_call_function_kw) + peepholes.append(peep_hole_fuse_dict_add_updates) + + post_processed_ir = self.post_process(peepholes, func_ir) + return post_processed_ir + + def post_process(self, peepholes, func_ir): + for peep in peepholes: + func_ir = peep(func_ir) + return func_ir + + def _legalize_exception_vars(self): + """Search for unsupported use of exception variables. + Note, they cannot be stored into user variable. + """ + # Build a set of exception variables + excvars = self._exception_vars.copy() + # Propagate the exception variables to LHS of assignment + for varname, defnvars in self.definitions.items(): + for v in defnvars: + if isinstance(v, ir.Var): + k = v.name + if k in excvars: + excvars.add(varname) + # Filter out the user variables. + uservar = list(filter(lambda x: not x.startswith('$'), excvars)) + if uservar: + # Complain about the first user-variable storing an exception + first = uservar[0] + loc = self.current_scope.get(first).loc + msg = "Exception object cannot be stored into variable ({})." + raise errors.UnsupportedError(msg.format(first), loc=loc) + + def init_first_block(self): + # Define variables receiving the function arguments + for index, name in enumerate(self.arg_names): + val = ir.Arg(index=index, name=name, loc=self.loc) + self.store(val, name) + + def _iter_inst(self): + for blkct, block in enumerate(self.cfa.iterliveblocks()): + firstinst = self.bytecode[block.offset] + self.loc = self.loc.with_lineno(firstinst.lineno) + self._start_new_block(block.offset) + if blkct == 0: + # Is first block + self.init_first_block() + for offset, kws in self.dfainfo.insts: + inst = self.bytecode[offset] + self.loc = self.loc.with_lineno(inst.lineno) + yield inst, kws + self._end_current_block() + + def _start_new_block(self, offset): + oldblock = self.current_block + self.insert_block(offset) + # Ensure the last block is terminated + if oldblock is not None and not oldblock.is_terminated: + # Handle ending try block. + tryblk = self.dfainfo.active_try_block + # If there's an active try-block and the handler block is live. + if tryblk is not None and tryblk['end'] in self.cfa.graph.nodes(): + # We are in a try-block, insert a branch to except-block. + # This logic cannot be in self._end_current_block() + # because we the non-raising next block-offset. + branch = ir.Branch( + cond=self.get('$exception_check'), + truebr=tryblk['end'], + falsebr=offset, + loc=self.loc, + ) + oldblock.append(branch) + # Handle normal case + else: + jmp = ir.Jump(offset, loc=self.loc) + oldblock.append(jmp) + # Get DFA block info + self.dfainfo = self.dfa.infos[self.current_block_offset] + self.assigner = Assigner() + # Check out-of-scope syntactic-block + while self.syntax_blocks: + if offset >= self.syntax_blocks[-1].exit: + self.syntax_blocks.pop() + else: + break + + def _end_current_block(self): + # Handle try block + if not self.current_block.is_terminated: + tryblk = self.dfainfo.active_try_block + if tryblk is not None: + self._insert_exception_check() + # Handle normal block cleanup + self._remove_unused_temporaries() + self._insert_outgoing_phis() + + def _inject_call(self, func, gv_name, res_name=None): + """A helper function to inject a call to *func* which is a python + function. + Parameters + ---------- + func : callable + The function object to be called. + gv_name : str + The variable name to be used to store the function object. + res_name : str; optional + The variable name to be used to store the call result. + If ``None``, a name is created automatically. + """ + gv_fn = ir.Global(gv_name, func, loc=self.loc) + self.store(value=gv_fn, name=gv_name, redefine=True) + callres = ir.Expr.call(self.get(gv_name), (), (), loc=self.loc) + res_name = res_name or '$callres_{}'.format(gv_name) + self.store(value=callres, name=res_name, redefine=True) + + def _insert_try_block_begin(self): + """Insert IR-nodes to mark the start of a `try` block. + """ + self._inject_call(eh.mark_try_block, 'mark_try_block') + + def _insert_try_block_end(self): + """Insert IR-nodes to mark the end of a `try` block. + """ + self._inject_call(eh.end_try_block, 'end_try_block') + + def _insert_exception_variables(self): + """Insert IR-nodes to initialize the exception variables. + """ + tryblk = self.dfainfo.active_try_block + # Get exception variables + endblk = tryblk['end'] + edgepushed = self.dfainfo.outgoing_edgepushed.get(endblk) + # Note: the last value on the stack is the exception value + # Note: due to the current limitation, all exception variables are None + if edgepushed: + const_none = ir.Const(value=None, loc=self.loc) + # For each variable going to the handler block. + for var in edgepushed: + if var in self.definitions: + raise AssertionError( + "exception variable CANNOT be defined by other code", + ) + self.store(value=const_none, name=var) + self._exception_vars.add(var) + + def _insert_exception_check(self): + """Called before the end of a block to inject checks if raised. + """ + self._insert_exception_variables() + # Do exception check + self._inject_call(eh.exception_check, 'exception_check', + '$exception_check') + + def _remove_unused_temporaries(self): + """ + Remove assignments to unused temporary variables from the + current block. + """ + new_body = [] + replaced_var = {} + for inst in self.current_block.body: + # the same temporary is assigned to multiple variables in cases + # like a = b[i] = 1, so need to handle replaced temporaries in + # later setitem/setattr nodes + if (isinstance(inst, (ir.SetItem, ir.SetAttr)) + and inst.value.name in replaced_var): + inst.value = replaced_var[inst.value.name] + elif isinstance(inst, ir.Assign): + if (inst.target.is_temp + and inst.target.name in self.assigner.unused_dests): + continue + # the same temporary is assigned to multiple variables in cases + # like a = b = 1, so need to handle replaced temporaries in + # later assignments + if (isinstance(inst.value, ir.Var) + and inst.value.name in replaced_var): + inst.value = replaced_var[inst.value.name] + new_body.append(inst) + continue + # chained unpack cases may reuse temporary + # e.g. a = (b, c) = (x, y) + if (isinstance(inst.value, ir.Expr) + and inst.value.op == "exhaust_iter" + and inst.value.value.name in replaced_var): + inst.value.value = replaced_var[inst.value.value.name] + new_body.append(inst) + continue + # eliminate temporary variables that are assigned to user + # variables right after creation. E.g.: + # $1 = f(); a = $1 -> a = f() + # the temporary variable is not reused elsewhere since CPython + # bytecode is stack-based and this pattern corresponds to a pop + if (isinstance(inst.value, ir.Var) and inst.value.is_temp + and new_body and isinstance(new_body[-1], ir.Assign)): + prev_assign = new_body[-1] + # _var_used_in_binop check makes sure we don't create a new + # inplace binop operation which can fail + # (see TestFunctionType.test_in_iter_func_call) + if (prev_assign.target.name == inst.value.name + and not self._var_used_in_binop( + inst.target.name, prev_assign.value)): + replaced_var[inst.value.name] = inst.target + prev_assign.target = inst.target + # replace temp var definition in target with proper defs + self.definitions[inst.target.name].remove(inst.value) + self.definitions[inst.target.name].extend( + self.definitions.pop(inst.value.name) + ) + continue + + new_body.append(inst) + + self.current_block.body = new_body + + def _var_used_in_binop(self, varname, expr): + """return True if 'expr' is a binary expression and 'varname' is used + in it as an argument + """ + return (isinstance(expr, ir.Expr) + and expr.op in ("binop", "inplace_binop") + and (varname == expr.lhs.name or varname == expr.rhs.name)) + + def _insert_outgoing_phis(self): + """ + Add assignments to forward requested outgoing values + to subsequent blocks. + """ + for phiname, varname in self.dfainfo.outgoing_phis.items(): + target = self.current_scope.get_or_define(phiname, + loc=self.loc) + stmt = ir.Assign(value=self.get(varname), target=target, + loc=self.loc) + self.definitions[target.name].append(stmt.value) + if not self.current_block.is_terminated: + self.current_block.append(stmt) + else: + self.current_block.insert_before_terminator(stmt) + + def get_global_value(self, name): + """ + Get a global value from the func_global (first) or + as a builtins (second). If both failed, return a ir.UNDEFINED. + """ + try: + return self.func_id.func.__globals__[name] + except KeyError: + return getattr(builtins, name, ir.UNDEFINED) + + def get_closure_value(self, index): + """ + Get a value from the cell contained in this function's closure. + If not set, return a ir.UNDEFINED. + """ + cell = self.func_id.func.__closure__[index] + try: + return cell.cell_contents + except ValueError: + return ir.UNDEFINED + + @property + def current_scope(self): + return self.scopes[-1] + + @property + def code_consts(self): + return self.bytecode.co_consts + + @property + def code_locals(self): + return self.bytecode.co_varnames + + @property + def code_names(self): + return self.bytecode.co_names + + @property + def code_cellvars(self): + return self.bytecode.co_cellvars + + @property + def code_freevars(self): + return self.bytecode.co_freevars + + def _dispatch(self, inst, kws): + assert self.current_block is not None + fname = "op_%s" % inst.opname.replace('+', '_') + try: + fn = getattr(self, fname) + except AttributeError: + raise NotImplementedError(inst) + else: + try: + return fn(inst, **kws) + except errors.NotDefinedError as e: + if e.loc is None: + loc = self.loc + else: + loc = e.loc + + err = errors.NotDefinedError(e.name, loc=loc) + if not config.FULL_TRACEBACKS: + raise err from None + else: + raise err + + # --- Scope operations --- + + def store(self, value, name, redefine=False): + """ + Store *value* (a Expr or Var instance) into the variable named *name* + (a str object). Returns the target variable. + """ + if redefine or self.current_block_offset in self.cfa.backbone: + rename = not (name in self.code_cellvars) + target = self.current_scope.redefine(name, loc=self.loc, + rename=rename) + else: + target = self.current_scope.get_or_define(name, loc=self.loc) + if isinstance(value, ir.Var): + value = self.assigner.assign(value, target) + stmt = ir.Assign(value=value, target=target, loc=self.loc) + self.current_block.append(stmt) + self.definitions[target.name].append(value) + return target + + def get(self, name): + """ + Get the variable (a Var instance) with the given *name*. + """ + # Implicit argument for comprehension starts with '.' + # See Parameter class in inspect.py (from Python source) + if name[0] == '.' and name[1:].isdigit(): + name = 'implicit{}'.format(name[1:]) + + # Try to simplify the variable lookup by returning an earlier + # variable assigned to *name*. + var = self.assigner.get_assignment_source(name) + if var is None: + var = self.current_scope.get(name) + return var + + # --- Block operations --- + + def insert_block(self, offset, scope=None, loc=None): + scope = scope or self.current_scope + loc = loc or self.loc + blk = ir.Block(scope=scope, loc=loc) + self.blocks[offset] = blk + self.current_block = blk + self.current_block_offset = offset + return blk + + # --- Bytecode handlers --- + + def op_NOP(self, inst): + pass + + def op_PRINT_ITEM(self, inst, item, printvar, res): + item = self.get(item) + printgv = ir.Global("print", print, loc=self.loc) + self.store(value=printgv, name=printvar) + call = ir.Expr.call(self.get(printvar), (item,), (), loc=self.loc) + self.store(value=call, name=res) + + def op_PRINT_NEWLINE(self, inst, printvar, res): + printgv = ir.Global("print", print, loc=self.loc) + self.store(value=printgv, name=printvar) + call = ir.Expr.call(self.get(printvar), (), (), loc=self.loc) + self.store(value=call, name=res) + + def op_UNPACK_SEQUENCE(self, inst, iterable, stores, tupleobj): + count = len(stores) + # Exhaust the iterable into a tuple-like object + tup = ir.Expr.exhaust_iter(value=self.get(iterable), loc=self.loc, + count=count) + self.store(name=tupleobj, value=tup) + + # then index the tuple-like object to extract the values + for i, st in enumerate(stores): + expr = ir.Expr.static_getitem(self.get(tupleobj), + index=i, index_var=None, + loc=self.loc) + self.store(expr, st) + + def op_FORMAT_VALUE(self, inst, value, res, strvar): + """ + FORMAT_VALUE(flags): flags argument specifies format spec which is not + supported yet. Currently, str() is simply called on the value. + https://docs.python.org/3/library/dis.html#opcode-FORMAT_VALUE + """ + value = self.get(value) + strgv = ir.Global("str", str, loc=self.loc) + self.store(value=strgv, name=strvar) + call = ir.Expr.call(self.get(strvar), (value,), (), loc=self.loc) + self.store(value=call, name=res) + + def op_BUILD_STRING(self, inst, strings, tmps): + """ + BUILD_STRING(count): Concatenates count strings. + Required for supporting f-strings. + https://docs.python.org/3/library/dis.html#opcode-BUILD_STRING + """ + count = inst.arg + # corner case: f"" + if count == 0: + const = ir.Const("", loc=self.loc) + self.store(const, tmps[-1]) + return + + prev = self.get(strings[0]) + for other, tmp in zip(strings[1:], tmps): + other = self.get(other) + expr = ir.Expr.binop( + operator.add, lhs=prev, rhs=other, loc=self.loc + ) + self.store(expr, tmp) + prev = self.get(tmp) + + def op_BUILD_SLICE(self, inst, start, stop, step, res, slicevar): + start = self.get(start) + stop = self.get(stop) + + slicegv = ir.Global("slice", slice, loc=self.loc) + self.store(value=slicegv, name=slicevar) + + if step is None: + sliceinst = ir.Expr.call(self.get(slicevar), (start, stop), (), + loc=self.loc) + else: + step = self.get(step) + sliceinst = ir.Expr.call(self.get(slicevar), (start, stop, step), + (), loc=self.loc) + self.store(value=sliceinst, name=res) + + def op_SLICE_0(self, inst, base, res, slicevar, indexvar, nonevar): + base = self.get(base) + + slicegv = ir.Global("slice", slice, loc=self.loc) + self.store(value=slicegv, name=slicevar) + + nonegv = ir.Const(None, loc=self.loc) + self.store(value=nonegv, name=nonevar) + none = self.get(nonevar) + + index = ir.Expr.call(self.get(slicevar), (none, none), (), loc=self.loc) + self.store(value=index, name=indexvar) + + expr = ir.Expr.getitem(base, self.get(indexvar), loc=self.loc) + self.store(value=expr, name=res) + + def op_SLICE_1(self, inst, base, start, nonevar, res, slicevar, indexvar): + base = self.get(base) + start = self.get(start) + + nonegv = ir.Const(None, loc=self.loc) + self.store(value=nonegv, name=nonevar) + none = self.get(nonevar) + + slicegv = ir.Global("slice", slice, loc=self.loc) + self.store(value=slicegv, name=slicevar) + + index = ir.Expr.call(self.get(slicevar), (start, none), (), + loc=self.loc) + self.store(value=index, name=indexvar) + + expr = ir.Expr.getitem(base, self.get(indexvar), loc=self.loc) + self.store(value=expr, name=res) + + def op_SLICE_2(self, inst, base, nonevar, stop, res, slicevar, indexvar): + base = self.get(base) + stop = self.get(stop) + + nonegv = ir.Const(None, loc=self.loc) + self.store(value=nonegv, name=nonevar) + none = self.get(nonevar) + + slicegv = ir.Global("slice", slice, loc=self.loc) + self.store(value=slicegv, name=slicevar) + + index = ir.Expr.call(self.get(slicevar), (none, stop,), (), + loc=self.loc) + self.store(value=index, name=indexvar) + + expr = ir.Expr.getitem(base, self.get(indexvar), loc=self.loc) + self.store(value=expr, name=res) + + def op_SLICE_3(self, inst, base, start, stop, res, slicevar, indexvar): + base = self.get(base) + start = self.get(start) + stop = self.get(stop) + + slicegv = ir.Global("slice", slice, loc=self.loc) + self.store(value=slicegv, name=slicevar) + + index = ir.Expr.call(self.get(slicevar), (start, stop), (), + loc=self.loc) + self.store(value=index, name=indexvar) + + expr = ir.Expr.getitem(base, self.get(indexvar), loc=self.loc) + self.store(value=expr, name=res) + + def op_STORE_SLICE_0(self, inst, base, value, slicevar, indexvar, nonevar): + base = self.get(base) + + slicegv = ir.Global("slice", slice, loc=self.loc) + self.store(value=slicegv, name=slicevar) + + nonegv = ir.Const(None, loc=self.loc) + self.store(value=nonegv, name=nonevar) + none = self.get(nonevar) + + index = ir.Expr.call(self.get(slicevar), (none, none), (), loc=self.loc) + self.store(value=index, name=indexvar) + + stmt = ir.SetItem(base, self.get(indexvar), self.get(value), + loc=self.loc) + self.current_block.append(stmt) + + def op_STORE_SLICE_1(self, inst, base, start, nonevar, value, slicevar, + indexvar): + base = self.get(base) + start = self.get(start) + + nonegv = ir.Const(None, loc=self.loc) + self.store(value=nonegv, name=nonevar) + none = self.get(nonevar) + + slicegv = ir.Global("slice", slice, loc=self.loc) + self.store(value=slicegv, name=slicevar) + + index = ir.Expr.call(self.get(slicevar), (start, none), (), + loc=self.loc) + self.store(value=index, name=indexvar) + + stmt = ir.SetItem(base, self.get(indexvar), self.get(value), + loc=self.loc) + self.current_block.append(stmt) + + def op_STORE_SLICE_2(self, inst, base, nonevar, stop, value, slicevar, + indexvar): + base = self.get(base) + stop = self.get(stop) + + nonegv = ir.Const(None, loc=self.loc) + self.store(value=nonegv, name=nonevar) + none = self.get(nonevar) + + slicegv = ir.Global("slice", slice, loc=self.loc) + self.store(value=slicegv, name=slicevar) + + index = ir.Expr.call(self.get(slicevar), (none, stop,), (), + loc=self.loc) + self.store(value=index, name=indexvar) + + stmt = ir.SetItem(base, self.get(indexvar), self.get(value), + loc=self.loc) + self.current_block.append(stmt) + + def op_STORE_SLICE_3(self, inst, base, start, stop, value, slicevar, + indexvar): + base = self.get(base) + start = self.get(start) + stop = self.get(stop) + + slicegv = ir.Global("slice", slice, loc=self.loc) + self.store(value=slicegv, name=slicevar) + + index = ir.Expr.call(self.get(slicevar), (start, stop), (), + loc=self.loc) + self.store(value=index, name=indexvar) + stmt = ir.SetItem(base, self.get(indexvar), self.get(value), + loc=self.loc) + self.current_block.append(stmt) + + def op_DELETE_SLICE_0(self, inst, base, slicevar, indexvar, nonevar): + base = self.get(base) + + slicegv = ir.Global("slice", slice, loc=self.loc) + self.store(value=slicegv, name=slicevar) + + nonegv = ir.Const(None, loc=self.loc) + self.store(value=nonegv, name=nonevar) + none = self.get(nonevar) + + index = ir.Expr.call(self.get(slicevar), (none, none), (), loc=self.loc) + self.store(value=index, name=indexvar) + + stmt = ir.DelItem(base, self.get(indexvar), loc=self.loc) + self.current_block.append(stmt) + + def op_DELETE_SLICE_1(self, inst, base, start, nonevar, slicevar, indexvar): + base = self.get(base) + start = self.get(start) + + nonegv = ir.Const(None, loc=self.loc) + self.store(value=nonegv, name=nonevar) + none = self.get(nonevar) + + slicegv = ir.Global("slice", slice, loc=self.loc) + self.store(value=slicegv, name=slicevar) + + index = ir.Expr.call(self.get(slicevar), (start, none), (), + loc=self.loc) + self.store(value=index, name=indexvar) + + stmt = ir.DelItem(base, self.get(indexvar), loc=self.loc) + self.current_block.append(stmt) + + def op_DELETE_SLICE_2(self, inst, base, nonevar, stop, slicevar, indexvar): + base = self.get(base) + stop = self.get(stop) + + nonegv = ir.Const(None, loc=self.loc) + self.store(value=nonegv, name=nonevar) + none = self.get(nonevar) + + slicegv = ir.Global("slice", slice, loc=self.loc) + self.store(value=slicegv, name=slicevar) + + index = ir.Expr.call(self.get(slicevar), (none, stop,), (), + loc=self.loc) + self.store(value=index, name=indexvar) + + stmt = ir.DelItem(base, self.get(indexvar), loc=self.loc) + self.current_block.append(stmt) + + def op_DELETE_SLICE_3(self, inst, base, start, stop, slicevar, indexvar): + base = self.get(base) + start = self.get(start) + stop = self.get(stop) + + slicegv = ir.Global("slice", slice, loc=self.loc) + self.store(value=slicegv, name=slicevar) + + index = ir.Expr.call(self.get(slicevar), (start, stop), (), + loc=self.loc) + self.store(value=index, name=indexvar) + stmt = ir.DelItem(base, self.get(indexvar), loc=self.loc) + self.current_block.append(stmt) + + def op_LOAD_FAST(self, inst, res): + srcname = self.code_locals[inst.arg] + self.store(value=self.get(srcname), name=res) + + def op_STORE_FAST(self, inst, value): + dstname = self.code_locals[inst.arg] + value = self.get(value) + self.store(value=value, name=dstname) + + def op_DELETE_FAST(self, inst): + dstname = self.code_locals[inst.arg] + self.current_block.append(ir.Del(dstname, loc=self.loc)) + + def op_DUP_TOPX(self, inst, orig, duped): + for src, dst in zip(orig, duped): + self.store(value=self.get(src), name=dst) + + op_DUP_TOP = op_DUP_TOPX + op_DUP_TOP_TWO = op_DUP_TOPX + + def op_STORE_ATTR(self, inst, target, value): + attr = self.code_names[inst.arg] + sa = ir.SetAttr(target=self.get(target), value=self.get(value), + attr=attr, loc=self.loc) + self.current_block.append(sa) + + def op_DELETE_ATTR(self, inst, target): + attr = self.code_names[inst.arg] + sa = ir.DelAttr(target=self.get(target), attr=attr, loc=self.loc) + self.current_block.append(sa) + + def op_LOAD_ATTR(self, inst, item, res): + item = self.get(item) + attr = self.code_names[inst.arg] + getattr = ir.Expr.getattr(item, attr, loc=self.loc) + self.store(getattr, res) + + def op_LOAD_CONST(self, inst, res): + value = self.code_consts[inst.arg] + if isinstance(value, tuple): + st = [] + for x in value: + nm = '$const_%s' % str(x) + val_const = ir.Const(x, loc=self.loc) + target = self.store(val_const, name=nm, redefine=True) + st.append(target) + const = ir.Expr.build_tuple(st, loc=self.loc) + elif isinstance(value, frozenset): + st = [] + for x in value: + nm = '$const_%s' % str(x) + val_const = ir.Const(x, loc=self.loc) + target = self.store(val_const, name=nm, redefine=True) + st.append(target) + const = ir.Expr.build_set(st, loc=self.loc) + else: + const = ir.Const(value, loc=self.loc) + self.store(const, res) + + def op_LOAD_GLOBAL(self, inst, res): + name = self.code_names[inst.arg] + value = self.get_global_value(name) + gl = ir.Global(name, value, loc=self.loc) + self.store(gl, res) + + def op_LOAD_DEREF(self, inst, res): + n_cellvars = len(self.code_cellvars) + if inst.arg < n_cellvars: + name = self.code_cellvars[inst.arg] + gl = self.get(name) + else: + idx = inst.arg - n_cellvars + name = self.code_freevars[idx] + value = self.get_closure_value(idx) + gl = ir.FreeVar(idx, name, value, loc=self.loc) + self.store(gl, res) + + def op_STORE_DEREF(self, inst, value): + n_cellvars = len(self.code_cellvars) + if inst.arg < n_cellvars: + dstname = self.code_cellvars[inst.arg] + else: + dstname = self.code_freevars[inst.arg - n_cellvars] + value = self.get(value) + self.store(value=value, name=dstname) + + def op_SETUP_LOOP(self, inst): + assert self.blocks[inst.offset] is self.current_block + loop = ir.Loop(inst.offset, exit=(inst.next + inst.arg)) + self.syntax_blocks.append(loop) + + def op_SETUP_WITH(self, inst, contextmanager, exitfn=None): + assert self.blocks[inst.offset] is self.current_block + # Handle with + exitpt = inst.next + inst.arg + wth = ir.With(inst.offset, exit=exitpt) + self.syntax_blocks.append(wth) + ctxmgr = self.get(contextmanager) + self.current_block.append(ir.EnterWith(contextmanager=ctxmgr, + begin=inst.offset, + end=exitpt, loc=self.loc,)) + + # Store exit fn + exit_fn_obj = ir.Const(None, loc=self.loc) + self.store(value=exit_fn_obj, name=exitfn) + + def op_SETUP_EXCEPT(self, inst): + # Removed since python3.8 + self._insert_try_block_begin() + + def op_SETUP_FINALLY(self, inst): + self._insert_try_block_begin() + + def op_WITH_CLEANUP(self, inst): + "no-op" + + def op_WITH_CLEANUP_START(self, inst): + "no-op" + + def op_WITH_CLEANUP_FINISH(self, inst): + "no-op" + + def op_END_FINALLY(self, inst): + "no-op" + + def op_BEGIN_FINALLY(self, inst, temps): + # The *temps* are the exception variables + const_none = ir.Const(None, loc=self.loc) + for tmp in temps: + # Set to None for now + self.store(const_none, name=tmp) + self._exception_vars.add(tmp) + + if PYVERSION < (3, 6): + + def op_CALL_FUNCTION(self, inst, func, args, kws, res, vararg): + func = self.get(func) + args = [self.get(x) for x in args] + if vararg is not None: + vararg = self.get(vararg) + + # Process keywords + keyvalues = [] + removethese = [] + for k, v in kws: + k, v = self.get(k), self.get(v) + for inst in self.current_block.body: + if isinstance(inst, ir.Assign) and inst.target is k: + removethese.append(inst) + keyvalues.append((inst.value.value, v)) + + # Remove keyword constant statements + for inst in removethese: + self.current_block.remove(inst) + + expr = ir.Expr.call(func, args, keyvalues, loc=self.loc, + vararg=vararg) + self.store(expr, res) + + op_CALL_FUNCTION_VAR = op_CALL_FUNCTION + else: + def op_CALL_FUNCTION(self, inst, func, args, res): + func = self.get(func) + args = [self.get(x) for x in args] + expr = ir.Expr.call(func, args, (), loc=self.loc) + self.store(expr, res) + + def op_CALL_FUNCTION_KW(self, inst, func, args, names, res): + func = self.get(func) + args = [self.get(x) for x in args] + # Find names const + names = self.get(names) + for inst in self.current_block.body: + if isinstance(inst, ir.Assign) and inst.target is names: + self.current_block.remove(inst) + # scan up the block looking for the values, remove them + # and find their name strings + named_items = [] + for x in inst.value.items: + for y in self.current_block.body[::-1]: + if x == y.target: + self.current_block.remove(y) + named_items.append(y.value.value) + break + keys = named_items + break + + nkeys = len(keys) + posvals = args[:-nkeys] + kwvals = args[-nkeys:] + keyvalues = list(zip(keys, kwvals)) + + expr = ir.Expr.call(func, posvals, keyvalues, loc=self.loc) + self.store(expr, res) + + def op_CALL_FUNCTION_EX(self, inst, func, vararg, varkwarg, res): + func = self.get(func) + vararg = self.get(vararg) + if varkwarg is not None: + varkwarg = self.get(varkwarg) + expr = ir.Expr.call( + func, [], [], loc=self.loc, vararg=vararg, varkwarg=varkwarg + ) + self.store(expr, res) + + def _build_tuple_unpack(self, inst, tuples, temps, is_assign): + first = self.get(tuples[0]) + if is_assign: + # it's assign-like, defer handling to an intrinsic that will have + # type information. + # Can deal with tuples only, i.e. y = (*x,). where x = + gv_name = "unpack_single_tuple" + gv_fn = ir.Global(gv_name, unpack_single_tuple, loc=self.loc,) + self.store(value=gv_fn, name=gv_name, redefine=True) + exc = ir.Expr.call(self.get(gv_name), args=(first,), kws=(), + loc=self.loc,) + self.store(exc, temps[0]) + else: + loc = self.loc + for other, tmp in zip(map(self.get, tuples[1:]), temps): + # Emit as `first + tuple(other)` + gv_tuple = ir.Global( + name="tuple", value=tuple, + loc=loc, + ) + tuple_var = self.store( + gv_tuple, "$_list_extend_gv_tuple", redefine=True, + ) + tuplify_val = ir.Expr.call( + tuple_var, (other,), (), + loc=loc, + ) + tuplify_var = self.store(tuplify_val, "$_tuplify", + redefine=True) + out = ir.Expr.binop( + fn=operator.add, lhs=first, rhs=self.get(tuplify_var.name), + loc=self.loc, + ) + self.store(out, tmp) + first = self.get(tmp) + + def op_BUILD_TUPLE_UNPACK_WITH_CALL(self, inst, tuples, temps, is_assign): + # just unpack the input tuple, call inst will be handled afterwards + self._build_tuple_unpack(inst, tuples, temps, is_assign) + + def op_BUILD_TUPLE_UNPACK(self, inst, tuples, temps, is_assign): + self._build_tuple_unpack(inst, tuples, temps, is_assign) + + def op_LIST_TO_TUPLE(self, inst, const_list, res): + expr = ir.Expr.dummy('list_to_tuple', (const_list,), loc=self.loc) + self.store(expr, res) + + def op_BUILD_CONST_KEY_MAP(self, inst, keys, keytmps, values, res): + # Unpack the constant key-tuple and reused build_map which takes + # a sequence of (key, value) pair. + keyvar = self.get(keys) + # TODO: refactor this pattern. occurred several times. + for inst in self.current_block.body: + if isinstance(inst, ir.Assign) and inst.target is keyvar: + self.current_block.remove(inst) + # scan up the block looking for the values, remove them + # and find their name strings + named_items = [] + for x in inst.value.items: + for y in self.current_block.body[::-1]: + if x == y.target: + self.current_block.remove(y) + named_items.append(y.value.value) + break + keytup = named_items + break + assert len(keytup) == len(values) + keyconsts = [ir.Const(value=x, loc=self.loc) for x in keytup] + for kval, tmp in zip(keyconsts, keytmps): + self.store(kval, tmp) + items = list(zip(map(self.get, keytmps), map(self.get, values))) + + # sort out literal values + literal_items = [] + for v in values: + defns = self.definitions[v] + if len(defns) != 1: + break + defn = defns[0] + if not isinstance(defn, ir.Const): + break + literal_items.append(defn.value) + + def resolve_const(v): + defns = self.definitions[v] + if len(defns) != 1: + return _UNKNOWN_VALUE(self.get(v).name) + defn = defns[0] + if not isinstance(defn, ir.Const): + return _UNKNOWN_VALUE(self.get(v).name) + return defn.value + + if len(literal_items) != len(values): + literal_dict = {x: resolve_const(y) for x, y in + zip(keytup, values)} + else: + literal_dict = {x:y for x, y in zip(keytup, literal_items)} + + # to deal with things like {'a': 1, 'a': 'cat', 'b': 2, 'a': 2j} + # store the index of the actual used value for a given key, this is + # used when lowering to pull the right value out into the tuple repr + # of a mixed value type dictionary. + value_indexes = {} + for i, k in enumerate(keytup): + value_indexes[k] = i + + expr = ir.Expr.build_map(items=items, + size=2, + literal_value=literal_dict, + value_indexes=value_indexes, + loc=self.loc) + + self.store(expr, res) + + def op_GET_ITER(self, inst, value, res): + expr = ir.Expr.getiter(value=self.get(value), loc=self.loc) + self.store(expr, res) + + def op_FOR_ITER(self, inst, iterator, pair, indval, pred): + """ + Assign new block other this instruction. + """ + assert inst.offset in self.blocks, "FOR_ITER must be block head" + + # Emit code + val = self.get(iterator) + + pairval = ir.Expr.iternext(value=val, loc=self.loc) + self.store(pairval, pair) + + iternext = ir.Expr.pair_first(value=self.get(pair), loc=self.loc) + self.store(iternext, indval) + + isvalid = ir.Expr.pair_second(value=self.get(pair), loc=self.loc) + self.store(isvalid, pred) + + # Conditional jump + br = ir.Branch(cond=self.get(pred), truebr=inst.next, + falsebr=inst.get_jump_target(), + loc=self.loc) + self.current_block.append(br) + + def op_BINARY_SUBSCR(self, inst, target, index, res): + index = self.get(index) + target = self.get(target) + expr = ir.Expr.getitem(target, index=index, loc=self.loc) + self.store(expr, res) + + def op_STORE_SUBSCR(self, inst, target, index, value): + index = self.get(index) + target = self.get(target) + value = self.get(value) + stmt = ir.SetItem(target=target, index=index, value=value, + loc=self.loc) + self.current_block.append(stmt) + + def op_DELETE_SUBSCR(self, inst, target, index): + index = self.get(index) + target = self.get(target) + stmt = ir.DelItem(target=target, index=index, loc=self.loc) + self.current_block.append(stmt) + + def op_BUILD_TUPLE(self, inst, items, res): + expr = ir.Expr.build_tuple(items=[self.get(x) for x in items], + loc=self.loc) + self.store(expr, res) + + def op_BUILD_LIST(self, inst, items, res): + expr = ir.Expr.build_list(items=[self.get(x) for x in items], + loc=self.loc) + self.store(expr, res) + + def op_BUILD_SET(self, inst, items, res): + expr = ir.Expr.build_set(items=[self.get(x) for x in items], + loc=self.loc) + self.store(expr, res) + + def op_SET_UPDATE(self, inst, target, value, updatevar, res): + target = self.get(target) + value = self.get(value) + updateattr = ir.Expr.getattr(target, 'update', loc=self.loc) + self.store(value=updateattr, name=updatevar) + updateinst = ir.Expr.call(self.get(updatevar), (value,), (), + loc=self.loc) + self.store(value=updateinst, name=res) + + def op_DICT_UPDATE(self, inst, target, value, updatevar, res): + target = self.get(target) + value = self.get(value) + # We generate _update_from_bytecode instead of update so we can + # differentiate between user .update() calls and those from the + # bytecode. This is then used to recombine dictionaries in peephole + # optimizations. See the dicussion in this PR about why: + # https://github.com/numba/numba/pull/7964/files#r868229306 + updateattr = ir.Expr.getattr( + target, '_update_from_bytecode', loc=self.loc + ) + self.store(value=updateattr, name=updatevar) + updateinst = ir.Expr.call(self.get(updatevar), (value,), (), + loc=self.loc) + self.store(value=updateinst, name=res) + + def op_BUILD_MAP(self, inst, items, size, res): + got_items = [(self.get(k), self.get(v)) for k, v in items] + + # sort out literal values, this is a bit contrived but is to handle + # situations like `{1: 10, 1: 10}` where the size of the literal dict + # is smaller than the definition + def get_literals(target): + literal_items = [] + values = [self.get(v.name) for v in target] + for v in values: + defns = self.definitions[v.name] + if len(defns) != 1: + break + defn = defns[0] + if not isinstance(defn, ir.Const): + break + literal_items.append(defn.value) + return literal_items + + literal_keys = get_literals(x[0] for x in got_items) + literal_values = get_literals(x[1] for x in got_items) + + has_literal_keys = len(literal_keys) == len(got_items) + has_literal_values = len(literal_values) == len(got_items) + + value_indexes = {} + if not has_literal_keys and not has_literal_values: + literal_dict = None + elif has_literal_keys and not has_literal_values: + literal_dict = {x: _UNKNOWN_VALUE(y[1]) for x, y in + zip(literal_keys, got_items)} + for i, k in enumerate(literal_keys): + value_indexes[k] = i + else: + literal_dict = {x: y for x, y in zip(literal_keys, literal_values)} + for i, k in enumerate(literal_keys): + value_indexes[k] = i + + expr = ir.Expr.build_map(items=got_items, size=size, + literal_value=literal_dict, + value_indexes=value_indexes, + loc=self.loc) + self.store(expr, res) + + def op_STORE_MAP(self, inst, dct, key, value): + stmt = ir.StoreMap(dct=self.get(dct), key=self.get(key), + value=self.get(value), loc=self.loc) + self.current_block.append(stmt) + + def op_UNARY_NEGATIVE(self, inst, value, res): + value = self.get(value) + expr = ir.Expr.unary('-', value=value, loc=self.loc) + return self.store(expr, res) + + def op_UNARY_POSITIVE(self, inst, value, res): + value = self.get(value) + expr = ir.Expr.unary('+', value=value, loc=self.loc) + return self.store(expr, res) + + def op_UNARY_INVERT(self, inst, value, res): + value = self.get(value) + expr = ir.Expr.unary('~', value=value, loc=self.loc) + return self.store(expr, res) + + def op_UNARY_NOT(self, inst, value, res): + value = self.get(value) + expr = ir.Expr.unary('not', value=value, loc=self.loc) + return self.store(expr, res) + + def _binop(self, op, lhs, rhs, res): + op = BINOPS_TO_OPERATORS[op] + lhs = self.get(lhs) + rhs = self.get(rhs) + expr = ir.Expr.binop(op, lhs=lhs, rhs=rhs, loc=self.loc) + self.store(expr, res) + + def _inplace_binop(self, op, lhs, rhs, res): + immuop = BINOPS_TO_OPERATORS[op] + op = INPLACE_BINOPS_TO_OPERATORS[op + '='] + lhs = self.get(lhs) + rhs = self.get(rhs) + expr = ir.Expr.inplace_binop(op, immuop, lhs=lhs, rhs=rhs, + loc=self.loc) + self.store(expr, res) + + def op_BINARY_ADD(self, inst, lhs, rhs, res): + self._binop('+', lhs, rhs, res) + + def op_BINARY_SUBTRACT(self, inst, lhs, rhs, res): + self._binop('-', lhs, rhs, res) + + def op_BINARY_MULTIPLY(self, inst, lhs, rhs, res): + self._binop('*', lhs, rhs, res) + + def op_BINARY_DIVIDE(self, inst, lhs, rhs, res): + self._binop('/?', lhs, rhs, res) + + def op_BINARY_TRUE_DIVIDE(self, inst, lhs, rhs, res): + self._binop('/', lhs, rhs, res) + + def op_BINARY_FLOOR_DIVIDE(self, inst, lhs, rhs, res): + self._binop('//', lhs, rhs, res) + + def op_BINARY_MODULO(self, inst, lhs, rhs, res): + self._binop('%', lhs, rhs, res) + + def op_BINARY_POWER(self, inst, lhs, rhs, res): + self._binop('**', lhs, rhs, res) + + def op_BINARY_MATRIX_MULTIPLY(self, inst, lhs, rhs, res): + self._binop('@', lhs, rhs, res) + + def op_BINARY_LSHIFT(self, inst, lhs, rhs, res): + self._binop('<<', lhs, rhs, res) + + def op_BINARY_RSHIFT(self, inst, lhs, rhs, res): + self._binop('>>', lhs, rhs, res) + + def op_BINARY_AND(self, inst, lhs, rhs, res): + self._binop('&', lhs, rhs, res) + + def op_BINARY_OR(self, inst, lhs, rhs, res): + self._binop('|', lhs, rhs, res) + + def op_BINARY_XOR(self, inst, lhs, rhs, res): + self._binop('^', lhs, rhs, res) + + def op_INPLACE_ADD(self, inst, lhs, rhs, res): + self._inplace_binop('+', lhs, rhs, res) + + def op_INPLACE_SUBTRACT(self, inst, lhs, rhs, res): + self._inplace_binop('-', lhs, rhs, res) + + def op_INPLACE_MULTIPLY(self, inst, lhs, rhs, res): + self._inplace_binop('*', lhs, rhs, res) + + def op_INPLACE_DIVIDE(self, inst, lhs, rhs, res): + self._inplace_binop('/?', lhs, rhs, res) + + def op_INPLACE_TRUE_DIVIDE(self, inst, lhs, rhs, res): + self._inplace_binop('/', lhs, rhs, res) + + def op_INPLACE_FLOOR_DIVIDE(self, inst, lhs, rhs, res): + self._inplace_binop('//', lhs, rhs, res) + + def op_INPLACE_MODULO(self, inst, lhs, rhs, res): + self._inplace_binop('%', lhs, rhs, res) + + def op_INPLACE_POWER(self, inst, lhs, rhs, res): + self._inplace_binop('**', lhs, rhs, res) + + def op_INPLACE_MATRIX_MULTIPLY(self, inst, lhs, rhs, res): + self._inplace_binop('@', lhs, rhs, res) + + def op_INPLACE_LSHIFT(self, inst, lhs, rhs, res): + self._inplace_binop('<<', lhs, rhs, res) + + def op_INPLACE_RSHIFT(self, inst, lhs, rhs, res): + self._inplace_binop('>>', lhs, rhs, res) + + def op_INPLACE_AND(self, inst, lhs, rhs, res): + self._inplace_binop('&', lhs, rhs, res) + + def op_INPLACE_OR(self, inst, lhs, rhs, res): + self._inplace_binop('|', lhs, rhs, res) + + def op_INPLACE_XOR(self, inst, lhs, rhs, res): + self._inplace_binop('^', lhs, rhs, res) + + def op_JUMP_ABSOLUTE(self, inst): + jmp = ir.Jump(inst.get_jump_target(), loc=self.loc) + self.current_block.append(jmp) + + def op_JUMP_FORWARD(self, inst): + jmp = ir.Jump(inst.get_jump_target(), loc=self.loc) + self.current_block.append(jmp) + + def op_POP_BLOCK(self, inst, kind=None): + if kind is None: + self.syntax_blocks.pop() + elif kind == 'with': + d = ir.PopBlock(loc=self.loc) + self.current_block.append(d) + elif kind == 'try': + self._insert_try_block_end() + + def op_RETURN_VALUE(self, inst, retval, castval): + self.store(ir.Expr.cast(self.get(retval), loc=self.loc), castval) + ret = ir.Return(self.get(castval), loc=self.loc) + self.current_block.append(ret) + + def op_COMPARE_OP(self, inst, lhs, rhs, res): + op = dis.cmp_op[inst.arg] + if op == 'in' or op == 'not in': + lhs, rhs = rhs, lhs + + if op == 'not in': + self._binop('in', lhs, rhs, res) + tmp = self.get(res) + out = ir.Expr.unary('not', value=tmp, loc=self.loc) + self.store(out, res) + elif op == 'exception match': + gv_fn = ir.Global( + "exception_match", eh.exception_match, loc=self.loc, + ) + exc_match_name = '$exc_match' + self.store(value=gv_fn, name=exc_match_name, redefine=True) + lhs = self.get(lhs) + rhs = self.get(rhs) + exc = ir.Expr.call( + self.get(exc_match_name), args=(lhs, rhs), kws=(), loc=self.loc, + ) + self.store(exc, res) + else: + self._binop(op, lhs, rhs, res) + + def op_IS_OP(self, inst, lhs, rhs, res): + # invert if op case is 1 + op = 'is not' if inst.arg == 1 else 'is' + self._binop(op, lhs, rhs, res) + + def op_CONTAINS_OP(self, inst, lhs, rhs, res): + lhs, rhs = rhs, lhs + self._binop('in', lhs, rhs, res) + # invert if op case is 1 + if inst.arg == 1: + tmp = self.get(res) + out = ir.Expr.unary('not', value=tmp, loc=self.loc) + self.store(out, res) + + def op_BREAK_LOOP(self, inst, end=None): + if end is None: + loop = self.syntax_blocks[-1] + assert isinstance(loop, ir.Loop) + end = loop.exit + jmp = ir.Jump(target=end, loc=self.loc) + self.current_block.append(jmp) + + def _op_JUMP_IF(self, inst, pred, iftrue): + brs = { + True: inst.get_jump_target(), + False: inst.next, + } + truebr = brs[iftrue] + falsebr = brs[not iftrue] + + name = "bool%s" % (inst.offset) + gv_fn = ir.Global("bool", bool, loc=self.loc) + self.store(value=gv_fn, name=name) + + callres = ir.Expr.call(self.get(name), (self.get(pred),), (), + loc=self.loc) + + pname = "$%spred" % (inst.offset) + predicate = self.store(value=callres, name=pname) + bra = ir.Branch(cond=predicate, truebr=truebr, falsebr=falsebr, + loc=self.loc) + self.current_block.append(bra) + + def op_JUMP_IF_FALSE(self, inst, pred): + self._op_JUMP_IF(inst, pred=pred, iftrue=False) + + def op_JUMP_IF_TRUE(self, inst, pred): + self._op_JUMP_IF(inst, pred=pred, iftrue=True) + + def op_POP_JUMP_IF_FALSE(self, inst, pred): + self._op_JUMP_IF(inst, pred=pred, iftrue=False) + + def op_POP_JUMP_IF_TRUE(self, inst, pred): + self._op_JUMP_IF(inst, pred=pred, iftrue=True) + + def op_JUMP_IF_FALSE_OR_POP(self, inst, pred): + self._op_JUMP_IF(inst, pred=pred, iftrue=False) + + def op_JUMP_IF_TRUE_OR_POP(self, inst, pred): + self._op_JUMP_IF(inst, pred=pred, iftrue=True) + + def op_JUMP_IF_NOT_EXC_MATCH(self, inst, pred, tos, tos1): + truebr = inst.next + falsebr = inst.get_jump_target() + gv_fn = ir.Global( + "exception_match", eh.exception_match, loc=self.loc, + ) + exc_match_name = '$exc_match' + self.store(value=gv_fn, name=exc_match_name, redefine=True) + lhs = self.get(tos1) + rhs = self.get(tos) + exc = ir.Expr.call( + self.get(exc_match_name), args=(lhs, rhs), kws=(), loc=self.loc, + ) + predicate = self.store(exc, pred) + bra = ir.Branch(cond=predicate, truebr=truebr, falsebr=falsebr, + loc=self.loc) + self.current_block.append(bra) + + def op_RERAISE(self, inst, exc): + # Numba can't handle this case and it's caught else where, this is a + # runtime guard in case this is reached by unknown means. + msg = (f"Unreachable condition reached (op code RERAISE executed)" + f"{error_extras['reportable']}") + stmt = ir.StaticRaise(AssertionError, (msg,), self.loc) + self.current_block.append(stmt) + + def op_RAISE_VARARGS(self, inst, exc): + if exc is not None: + exc = self.get(exc) + tryblk = self.dfainfo.active_try_block + if tryblk is not None: + # In a try block + stmt = ir.TryRaise(exception=exc, loc=self.loc) + self.current_block.append(stmt) + self._insert_try_block_end() + self.current_block.append(ir.Jump(tryblk['end'], loc=self.loc)) + else: + # Not in a try block + stmt = ir.Raise(exception=exc, loc=self.loc) + self.current_block.append(stmt) + + def op_YIELD_VALUE(self, inst, value, res): + # initialize index to None. it's being set later in post-processing + index = None + inst = ir.Yield(value=self.get(value), index=index, loc=self.loc) + return self.store(inst, res) + + def op_MAKE_FUNCTION(self, inst, name, code, closure, annotations, + kwdefaults, defaults, res): + # annotations are ignored by numba but useful for static analysis + # re. https://github.com/numba/numba/issues/7269 + if kwdefaults is not None: + msg = "op_MAKE_FUNCTION with kwdefaults is not implemented" + raise NotImplementedError(msg) + if defaults: + if isinstance(defaults, tuple): + defaults = tuple([self.get(name) for name in defaults]) + else: + defaults = self.get(defaults) + + assume_code_const = self.definitions[code][0] + if not isinstance(assume_code_const, ir.Const): + msg = ( + "Unsupported use of closure. " + "Probably caused by complex control-flow constructs; " + "e.g. try-except" + ) + raise errors.UnsupportedError(msg, loc=self.loc) + fcode = assume_code_const.value + if name: + name = self.get(name) + if closure: + closure = self.get(closure) + expr = ir.Expr.make_function(name, fcode, closure, defaults, self.loc) + self.store(expr, res) + + def op_MAKE_CLOSURE(self, inst, name, code, closure, annotations, + kwdefaults, defaults, res): + self.op_MAKE_FUNCTION(inst, name, code, closure, annotations, + kwdefaults, defaults, res) + + def op_LOAD_CLOSURE(self, inst, res): + n_cellvars = len(self.code_cellvars) + if inst.arg < n_cellvars: + name = self.code_cellvars[inst.arg] + try: + gl = self.get(name) + except NotDefinedError: + msg = "Unsupported use of op_LOAD_CLOSURE encountered" + raise NotImplementedError(msg) + else: + idx = inst.arg - n_cellvars + name = self.code_freevars[idx] + value = self.get_closure_value(idx) + gl = ir.FreeVar(idx, name, value, loc=self.loc) + self.store(gl, res) + + def op_LIST_APPEND(self, inst, target, value, appendvar, res): + target = self.get(target) + value = self.get(value) + appendattr = ir.Expr.getattr(target, 'append', loc=self.loc) + self.store(value=appendattr, name=appendvar) + appendinst = ir.Expr.call(self.get(appendvar), (value,), (), + loc=self.loc) + self.store(value=appendinst, name=res) + + def op_LIST_EXTEND(self, inst, target, value, extendvar, res): + target = self.get(target) + value = self.get(value) + # If the statements between the current instruction and the target + # are N * consts followed by build_tuple AND the target has no items, + # it's a situation where a list is being statically initialised, rewrite + # the build_tuple as a build_list, drop the extend, and wire up the + # target as the result from the build_tuple that's been rewritten. + + # See if this is the first statement in a block, if so its probably from + # control flow in a tuple unpack like: + # `(*(1, (2,) if predicate else (3,)))` + # this cannot be handled as present so raise + msg = ("An unsupported bytecode sequence has been encountered: " + "op_LIST_EXTEND at the start of a block.\n\nThis could be " + "due to the use of a branch in a tuple unpacking statement.") + if not self.current_block.body: + raise errors.UnsupportedError(msg) + + # is last emitted statement a build_tuple? + stmt = self.current_block.body[-1] + ok = isinstance(stmt.value, ir.Expr) and stmt.value.op == "build_tuple" + # check statements from self.current_block.body[-1] through to target, + # make sure they are consts + build_empty_list = None + if ok: + for stmt in reversed(self.current_block.body[:-1]): + if not isinstance(stmt, ir.Assign): + ok = False + break + # if its not a const, it needs to be the `build_list` for the + # target, else it's something else we don't know about so just + # bail + if isinstance(stmt.value, ir.Const): + continue + + # it's not a const, check for target + elif isinstance(stmt.value, ir.Expr) and stmt.target == target: + build_empty_list = stmt + # it's only ok to do this if the target has no initializer + # already + ok = not stmt.value.items + break + else: + ok = False + break + if ok and build_empty_list is None: + raise errors.UnsupportedError(msg) + if ok: + stmts = self.current_block.body + build_tuple_asgn = self.current_block.body[-1] + # move build list to last issued statement + stmts.append(stmts.pop(stmts.index(build_empty_list))) + # fix the build list + build_tuple = build_tuple_asgn.value + build_list = build_empty_list.value + build_list.items = build_tuple.items + else: + # it's just a list extend with no static init, let it be + extendattr = ir.Expr.getattr(target, 'extend', loc=self.loc) + self.store(value=extendattr, name=extendvar) + extendinst = ir.Expr.call(self.get(extendvar), (value,), (), + loc=self.loc) + self.store(value=extendinst, name=res) + + def op_MAP_ADD(self, inst, target, key, value, setitemvar, res): + target = self.get(target) + key = self.get(key) + value = self.get(value) + setitemattr = ir.Expr.getattr(target, '__setitem__', loc=self.loc) + self.store(value=setitemattr, name=setitemvar) + appendinst = ir.Expr.call(self.get(setitemvar), (key, value,), (), + loc=self.loc) + self.store(value=appendinst, name=res) + + def op_LOAD_ASSERTION_ERROR(self, inst, res): + gv_fn = ir.Global("AssertionError", AssertionError, loc=self.loc) + self.store(value=gv_fn, name=res) + + # NOTE: The LOAD_METHOD opcode is implemented as a LOAD_ATTR for ease, + # however this means a new object (the bound-method instance) could be + # created. Conversely, using a pure LOAD_METHOD no intermediary is present + # and it is essentially like a pointer grab and forward to CALL_METHOD. The + # net outcome is that the implementation in Numba produces the same result, + # but in object mode it may be that it runs more slowly than it would if + # run in CPython. + + def op_LOAD_METHOD(self, *args, **kws): + self.op_LOAD_ATTR(*args, **kws) + + def op_CALL_METHOD(self, *args, **kws): + self.op_CALL_FUNCTION(*args, **kws) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/intrinsics.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/intrinsics.py new file mode 100644 index 000000000..8e85bb354 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/intrinsics.py @@ -0,0 +1,101 @@ +""" +LLVM pass that converts intrinsic into other math calls +""" + +from llvmlite import ir + + +class _DivmodFixer(ir.Visitor): + def visit_Instruction(self, instr): + if instr.type == ir.IntType(64): + if instr.opname in ['srem', 'urem', 'sdiv', 'udiv']: + name = 'numba_{op}'.format(op=instr.opname) + fn = self.module.globals.get(name) + # Declare the function if it doesn't already exist + if fn is None: + opty = instr.type + sdivfnty = ir.FunctionType(opty, [opty, opty]) + fn = ir.Function(self.module, sdivfnty, name=name) + # Replace the operation with a call to the builtin + repl = ir.CallInstr(parent=instr.parent, func=fn, + args=instr.operands, name=instr.name) + instr.parent.replace(instr, repl) + + +def fix_divmod(mod): + """Replace division and reminder instructions to builtins calls + """ + _DivmodFixer().visit(mod) + + +INTR_TO_CMATH = { + "llvm.pow.f32": "powf", + "llvm.pow.f64": "pow", + + "llvm.sin.f32": "sinf", + "llvm.sin.f64": "sin", + + "llvm.cos.f32": "cosf", + "llvm.cos.f64": "cos", + + "llvm.sqrt.f32": "sqrtf", + "llvm.sqrt.f64": "sqrt", + + "llvm.exp.f32": "expf", + "llvm.exp.f64": "exp", + + "llvm.log.f32": "logf", + "llvm.log.f64": "log", + + "llvm.log10.f32": "log10f", + "llvm.log10.f64": "log10", + + "llvm.fabs.f32": "fabsf", + "llvm.fabs.f64": "fabs", + + "llvm.floor.f32": "floorf", + "llvm.floor.f64": "floor", + + "llvm.ceil.f32": "ceilf", + "llvm.ceil.f64": "ceil", + + "llvm.trunc.f32": "truncf", + "llvm.trunc.f64": "trunc", +} + +OTHER_CMATHS = ''' +tan +tanf +sinh +sinhf +cosh +coshf +tanh +tanhf +asin +asinf +acos +acosf +atan +atanf +atan2 +atan2f +asinh +asinhf +acosh +acoshf +atanh +atanhf +expm1 +expm1f +log1p +log1pf +log10 +log10f +fmod +fmodf +round +roundf +'''.split() + +INTR_MATH = frozenset(INTR_TO_CMATH.values()) | frozenset(OTHER_CMATHS) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/ir.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/ir.py new file mode 100644 index 000000000..d2b546996 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/ir.py @@ -0,0 +1,1630 @@ +from collections import defaultdict +import copy +import itertools +import os +import linecache +import pprint +import re +import sys +import operator +from types import FunctionType, BuiltinFunctionType +from functools import total_ordering +from io import StringIO + +from numba.core import errors, config +from numba.core.utils import (BINOPS_TO_OPERATORS, INPLACE_BINOPS_TO_OPERATORS, + UNARY_BUITINS_TO_OPERATORS, OPERATORS_TO_BUILTINS) +from numba.core.errors import (NotDefinedError, RedefinedError, + VerificationError, ConstantInferenceError) +from numba.core import consts + +# terminal color markup +_termcolor = errors.termcolor() + + +class Loc(object): + """Source location + + """ + _defmatcher = re.compile(r'def\s+(\w+)\(.*') + + def __init__(self, filename, line, col=None, maybe_decorator=False): + """ Arguments: + filename - name of the file + line - line in file + col - column + maybe_decorator - Set to True if location is likely a jit decorator + """ + self.filename = filename + self.line = line + self.col = col + self.lines = None # the source lines from the linecache + self.maybe_decorator = maybe_decorator + + def __eq__(self, other): + # equivalence is solely based on filename, line and col + if type(self) is not type(other): return False + if self.filename != other.filename: return False + if self.line != other.line: return False + if self.col != other.col: return False + return True + + def __ne__(self, other): + return not self.__eq__(other) + + @classmethod + def from_function_id(cls, func_id): + return cls(func_id.filename, func_id.firstlineno, maybe_decorator=True) + + def __repr__(self): + return "Loc(filename=%s, line=%s, col=%s)" % (self.filename, + self.line, self.col) + + def __str__(self): + if self.col is not None: + return "%s (%s:%s)" % (self.filename, self.line, self.col) + else: + return "%s (%s)" % (self.filename, self.line) + + def _find_definition(self): + # try and find a def, go backwards from error line + fn_name = None + lines = self.get_lines() + for x in reversed(lines[:self.line - 1]): + # the strip and startswith is to handle user code with commented out + # 'def' or use of 'def' in a docstring. + if x.strip().startswith('def '): + fn_name = x + break + + return fn_name + + def _raw_function_name(self): + defn = self._find_definition() + if defn: + return self._defmatcher.match(defn.strip()).groups()[0] + else: + # Probably exec() or REPL. + return None + + def get_lines(self): + if self.lines is None: + + self.lines = linecache.getlines(self._get_path()) + + return self.lines + + def _get_path(self): + path = None + try: + # Try to get a relative path + # ipython/jupyter input just returns as self.filename + path = os.path.relpath(self.filename) + except ValueError: + # Fallback to absolute path if error occurred in getting the + # relative path. + # This may happen on windows if the drive is different + path = os.path.abspath(self.filename) + return path + + + def strformat(self, nlines_up=2): + + lines = self.get_lines() + + use_line = self.line + + if self.maybe_decorator: + # try and sort out a better `loc`, if it's suspected that this loc + # points at a jit decorator by virtue of + # `__code__.co_firstlineno` + + # get lines, add a dummy entry at the start as lines count from + # 1 but list index counts from 0 + tmplines = [''] + lines + + if lines and use_line and 'def ' not in tmplines[use_line]: + # look forward 10 lines, unlikely anyone managed to stretch + # a jit call declaration over >10 lines?! + min_line = max(0, use_line) + max_line = use_line + 10 + selected = tmplines[min_line : max_line] + index = 0 + for idx, x in enumerate(selected): + if 'def ' in x: + index = idx + break + use_line = use_line + index + + + ret = [] # accumulates output + if lines and use_line: + + def count_spaces(string): + spaces = 0 + for x in itertools.takewhile(str.isspace, str(string)): + spaces += 1 + return spaces + + # A few places in the code still use no `loc` or default to line 1 + # this is often in places where exceptions are used for the purposes + # of flow control. As a result max is in use to prevent slice from + # `[negative: positive]` + selected = lines[max(0, use_line - nlines_up):use_line] + + # see if selected contains a definition + def_found = False + for x in selected: + if 'def ' in x: + def_found = True + + # no definition found, try and find one + if not def_found: + # try and find a def, go backwards from error line + fn_name = None + for x in reversed(lines[:use_line - 1]): + if 'def ' in x: + fn_name = x + break + if fn_name: + ret.append(fn_name) + spaces = count_spaces(x) + ret.append(' '*(4 + spaces) + '\n') + + if selected: + ret.extend(selected[:-1]) + ret.append(_termcolor.highlight(selected[-1])) + + # point at the problem with a caret + spaces = count_spaces(selected[-1]) + ret.append(' '*(spaces) + _termcolor.indicate("^")) + + # if in the REPL source may not be available + if not ret: + ret = "" + + err = _termcolor.filename('\nFile "%s", line %d:')+'\n%s' + tmp = err % (self._get_path(), use_line, _termcolor.code(''.join(ret))) + return tmp + + def with_lineno(self, line, col=None): + """ + Return a new Loc with this line number. + """ + return type(self)(self.filename, line, col) + + def short(self): + """ + Returns a short string + """ + shortfilename = os.path.basename(self.filename) + return "%s:%s" % (shortfilename, self.line) + + +# Used for annotating errors when source location is unknown. +unknown_loc = Loc("unknown location", 0, 0) + + +@total_ordering +class SlotEqualityCheckMixin(object): + # some ir nodes are __dict__ free using __slots__ instead, this mixin + # should not trigger the unintended creation of __dict__. + __slots__ = tuple() + + def __eq__(self, other): + if type(self) is type(other): + for name in self.__slots__: + if getattr(self, name) != getattr(other, name): + return False + else: + return True + return False + + def __le__(self, other): + return str(self) <= str(other) + + def __hash__(self): + return id(self) + + +@total_ordering +class EqualityCheckMixin(object): + """ Mixin for basic equality checking """ + + def __eq__(self, other): + if type(self) is type(other): + def fixup(adict): + bad = ('loc', 'scope') + d = dict(adict) + for x in bad: + d.pop(x, None) + return d + d1 = fixup(self.__dict__) + d2 = fixup(other.__dict__) + if d1 == d2: + return True + return False + + def __le__(self, other): + return str(self) < str(other) + + def __hash__(self): + return id(self) + + +class VarMap(object): + def __init__(self): + self._con = {} + + def define(self, name, var): + if name in self._con: + raise RedefinedError(name) + else: + self._con[name] = var + + def get(self, name): + try: + return self._con[name] + except KeyError: + raise NotDefinedError(name) + + def __contains__(self, name): + return name in self._con + + def __len__(self): + return len(self._con) + + def __repr__(self): + return pprint.pformat(self._con) + + def __hash__(self): + return hash(self.name) + + def __iter__(self): + return self._con.iterkeys() + + def __eq__(self, other): + if type(self) is type(other): + # check keys only, else __eq__ ref cycles, scope -> varmap -> var + return self._con.keys() == other._con.keys() + return False + + def __ne__(self, other): + return not self.__eq__(other) + + +class AbstractRHS(object): + """Abstract base class for anything that can be the RHS of an assignment. + This class **does not** define any methods. + """ + + +class Inst(EqualityCheckMixin, AbstractRHS): + """ + Base class for all IR instructions. + """ + + def list_vars(self): + """ + List the variables used (read or written) by the instruction. + """ + raise NotImplementedError + + def _rec_list_vars(self, val): + """ + A recursive helper used to implement list_vars() in subclasses. + """ + if isinstance(val, Var): + return [val] + elif isinstance(val, Inst): + return val.list_vars() + elif isinstance(val, (list, tuple)): + lst = [] + for v in val: + lst.extend(self._rec_list_vars(v)) + return lst + elif isinstance(val, dict): + lst = [] + for v in val.values(): + lst.extend(self._rec_list_vars(v)) + return lst + else: + return [] + + +class Stmt(Inst): + """ + Base class for IR statements (instructions which can appear on their + own in a Block). + """ + # Whether this statement ends its basic block (i.e. it will either jump + # to another block or exit the function). + is_terminator = False + # Whether this statement exits the function. + is_exit = False + + def list_vars(self): + return self._rec_list_vars(self.__dict__) + + +class Terminator(Stmt): + """ + IR statements that are terminators: the last statement in a block. + A terminator must either: + - exit the function + - jump to a block + + All subclass of Terminator must override `.get_targets()` to return a list + of jump targets. + """ + is_terminator = True + + def get_targets(self): + raise NotImplementedError(type(self)) + + +class Expr(Inst): + """ + An IR expression (an instruction which can only be part of a larger + statement). + """ + + def __init__(self, op, loc, **kws): + assert isinstance(op, str) + assert isinstance(loc, Loc) + self.op = op + self.loc = loc + self._kws = kws + + def __getattr__(self, name): + if name.startswith('_'): + return Inst.__getattr__(self, name) + return self._kws[name] + + def __setattr__(self, name, value): + if name in ('op', 'loc', '_kws'): + self.__dict__[name] = value + else: + self._kws[name] = value + + @classmethod + def binop(cls, fn, lhs, rhs, loc): + assert isinstance(fn, BuiltinFunctionType) + assert isinstance(lhs, Var) + assert isinstance(rhs, Var) + assert isinstance(loc, Loc) + op = 'binop' + return cls(op=op, loc=loc, fn=fn, lhs=lhs, rhs=rhs, + static_lhs=UNDEFINED, static_rhs=UNDEFINED) + + @classmethod + def inplace_binop(cls, fn, immutable_fn, lhs, rhs, loc): + assert isinstance(fn, BuiltinFunctionType) + assert isinstance(immutable_fn, BuiltinFunctionType) + assert isinstance(lhs, Var) + assert isinstance(rhs, Var) + assert isinstance(loc, Loc) + op = 'inplace_binop' + return cls(op=op, loc=loc, fn=fn, immutable_fn=immutable_fn, + lhs=lhs, rhs=rhs, + static_lhs=UNDEFINED, static_rhs=UNDEFINED) + + @classmethod + def unary(cls, fn, value, loc): + assert isinstance(value, (str, Var, FunctionType)) + assert isinstance(loc, Loc) + op = 'unary' + fn = UNARY_BUITINS_TO_OPERATORS.get(fn, fn) + return cls(op=op, loc=loc, fn=fn, value=value) + + @classmethod + def call(cls, func, args, kws, loc, vararg=None, varkwarg=None, target=None): + assert isinstance(func, Var) + assert isinstance(loc, Loc) + op = 'call' + return cls(op=op, loc=loc, func=func, args=args, kws=kws, + vararg=vararg, varkwarg=varkwarg, target=target) + + @classmethod + def build_tuple(cls, items, loc): + assert isinstance(loc, Loc) + op = 'build_tuple' + return cls(op=op, loc=loc, items=items) + + @classmethod + def build_list(cls, items, loc): + assert isinstance(loc, Loc) + op = 'build_list' + return cls(op=op, loc=loc, items=items) + + @classmethod + def build_set(cls, items, loc): + assert isinstance(loc, Loc) + op = 'build_set' + return cls(op=op, loc=loc, items=items) + + @classmethod + def build_map(cls, items, size, literal_value, value_indexes, loc): + assert isinstance(loc, Loc) + op = 'build_map' + return cls(op=op, loc=loc, items=items, size=size, + literal_value=literal_value, value_indexes=value_indexes) + + @classmethod + def pair_first(cls, value, loc): + assert isinstance(value, Var) + op = 'pair_first' + return cls(op=op, loc=loc, value=value) + + @classmethod + def pair_second(cls, value, loc): + assert isinstance(value, Var) + assert isinstance(loc, Loc) + op = 'pair_second' + return cls(op=op, loc=loc, value=value) + + @classmethod + def getiter(cls, value, loc): + assert isinstance(value, Var) + assert isinstance(loc, Loc) + op = 'getiter' + return cls(op=op, loc=loc, value=value) + + @classmethod + def iternext(cls, value, loc): + assert isinstance(value, Var) + assert isinstance(loc, Loc) + op = 'iternext' + return cls(op=op, loc=loc, value=value) + + @classmethod + def exhaust_iter(cls, value, count, loc): + assert isinstance(value, Var) + assert isinstance(count, int) + assert isinstance(loc, Loc) + op = 'exhaust_iter' + return cls(op=op, loc=loc, value=value, count=count) + + @classmethod + def getattr(cls, value, attr, loc): + assert isinstance(value, Var) + assert isinstance(attr, str) + assert isinstance(loc, Loc) + op = 'getattr' + return cls(op=op, loc=loc, value=value, attr=attr) + + @classmethod + def getitem(cls, value, index, loc): + assert isinstance(value, Var) + assert isinstance(index, Var) + assert isinstance(loc, Loc) + op = 'getitem' + fn = operator.getitem + return cls(op=op, loc=loc, value=value, index=index, fn=fn) + + @classmethod + def typed_getitem(cls, value, dtype, index, loc): + assert isinstance(value, Var) + assert isinstance(loc, Loc) + op = 'typed_getitem' + return cls(op=op, loc=loc, value=value, dtype=dtype, + index=index) + + @classmethod + def static_getitem(cls, value, index, index_var, loc): + assert isinstance(value, Var) + assert index_var is None or isinstance(index_var, Var) + assert isinstance(loc, Loc) + op = 'static_getitem' + fn = operator.getitem + return cls(op=op, loc=loc, value=value, index=index, + index_var=index_var, fn=fn) + + @classmethod + def cast(cls, value, loc): + """ + A node for implicit casting at the return statement + """ + assert isinstance(value, Var) + assert isinstance(loc, Loc) + op = 'cast' + return cls(op=op, value=value, loc=loc) + + @classmethod + def phi(cls, loc): + """Phi node + """ + assert isinstance(loc, Loc) + return cls(op='phi', incoming_values=[], incoming_blocks=[], loc=loc) + + @classmethod + def make_function(cls, name, code, closure, defaults, loc): + """ + A node for making a function object. + """ + assert isinstance(loc, Loc) + op = 'make_function' + return cls(op=op, name=name, code=code, closure=closure, defaults=defaults, loc=loc) + + @classmethod + def null(cls, loc): + """ + A node for null value. + + This node is not handled by type inference. It is only added by + post-typing passes. + """ + assert isinstance(loc, Loc) + op = 'null' + return cls(op=op, loc=loc) + + @classmethod + def dummy(cls, op, info, loc): + """ + A node for a dummy value. + + This node is a place holder for carrying information through to a point + where it is rewritten into something valid. This node is not handled + by type inference or lowering. It's presence outside of the interpreter + renders IR as illegal. + """ + assert isinstance(loc, Loc) + assert isinstance(op, str) + return cls(op=op, info=info, loc=loc) + + def __repr__(self): + if self.op == 'call': + args = ', '.join(str(a) for a in self.args) + pres_order = self._kws.items() if config.DIFF_IR == 0 else sorted(self._kws.items()) + kws = ', '.join('%s=%s' % (k, v) for k, v in pres_order) + vararg = '*%s' % (self.vararg,) if self.vararg is not None else '' + arglist = ', '.join(filter(None, [args, vararg, kws])) + return 'call %s(%s)' % (self.func, arglist) + elif self.op == 'binop': + lhs, rhs = self.lhs, self.rhs + if self.fn == operator.contains: + lhs, rhs = rhs, lhs + fn = OPERATORS_TO_BUILTINS.get(self.fn, self.fn) + return '%s %s %s' % (lhs, fn, rhs) + else: + pres_order = self._kws.items() if config.DIFF_IR == 0 else sorted(self._kws.items()) + args = ('%s=%s' % (k, v) for k, v in pres_order) + return '%s(%s)' % (self.op, ', '.join(args)) + + def list_vars(self): + return self._rec_list_vars(self._kws) + + def infer_constant(self): + raise ConstantInferenceError('%s' % self, loc=self.loc) + + +class SetItem(Stmt): + """ + target[index] = value + """ + + def __init__(self, target, index, value, loc): + assert isinstance(target, Var) + assert isinstance(index, Var) + assert isinstance(value, Var) + assert isinstance(loc, Loc) + self.target = target + self.index = index + self.value = value + self.loc = loc + + def __repr__(self): + return '%s[%s] = %s' % (self.target, self.index, self.value) + + +class StaticSetItem(Stmt): + """ + target[constant index] = value + """ + + def __init__(self, target, index, index_var, value, loc): + assert isinstance(target, Var) + assert not isinstance(index, Var) + assert isinstance(index_var, Var) + assert isinstance(value, Var) + assert isinstance(loc, Loc) + self.target = target + self.index = index + self.index_var = index_var + self.value = value + self.loc = loc + + def __repr__(self): + return '%s[%r] = %s' % (self.target, self.index, self.value) + + +class DelItem(Stmt): + """ + del target[index] + """ + + def __init__(self, target, index, loc): + assert isinstance(target, Var) + assert isinstance(index, Var) + assert isinstance(loc, Loc) + self.target = target + self.index = index + self.loc = loc + + def __repr__(self): + return 'del %s[%s]' % (self.target, self.index) + + +class SetAttr(Stmt): + def __init__(self, target, attr, value, loc): + assert isinstance(target, Var) + assert isinstance(attr, str) + assert isinstance(value, Var) + assert isinstance(loc, Loc) + self.target = target + self.attr = attr + self.value = value + self.loc = loc + + def __repr__(self): + return '(%s).%s = %s' % (self.target, self.attr, self.value) + + +class DelAttr(Stmt): + def __init__(self, target, attr, loc): + assert isinstance(target, Var) + assert isinstance(attr, str) + assert isinstance(loc, Loc) + self.target = target + self.attr = attr + self.loc = loc + + def __repr__(self): + return 'del (%s).%s' % (self.target, self.attr) + + +class StoreMap(Stmt): + def __init__(self, dct, key, value, loc): + assert isinstance(dct, Var) + assert isinstance(key, Var) + assert isinstance(value, Var) + assert isinstance(loc, Loc) + self.dct = dct + self.key = key + self.value = value + self.loc = loc + + def __repr__(self): + return '%s[%s] = %s' % (self.dct, self.key, self.value) + + +class Del(Stmt): + def __init__(self, value, loc): + assert isinstance(value, str) + assert isinstance(loc, Loc) + self.value = value + self.loc = loc + + def __str__(self): + return "del %s" % self.value + + +class Raise(Terminator): + is_exit = True + + def __init__(self, exception, loc): + assert exception is None or isinstance(exception, Var) + assert isinstance(loc, Loc) + self.exception = exception + self.loc = loc + + def __str__(self): + return "raise %s" % self.exception + + def get_targets(self): + return [] + + +class StaticRaise(Terminator): + """ + Raise an exception class and arguments known at compile-time. + Note that if *exc_class* is None, a bare "raise" statement is implied + (i.e. re-raise the current exception). + """ + is_exit = True + + def __init__(self, exc_class, exc_args, loc): + assert exc_class is None or isinstance(exc_class, type) + assert isinstance(loc, Loc) + assert exc_args is None or isinstance(exc_args, tuple) + self.exc_class = exc_class + self.exc_args = exc_args + self.loc = loc + + def __str__(self): + if self.exc_class is None: + return " raise" + elif self.exc_args is None: + return " raise %s" % (self.exc_class,) + else: + return " raise %s(%s)" % (self.exc_class, + ", ".join(map(repr, self.exc_args))) + + def get_targets(self): + return [] + + +class TryRaise(Stmt): + """A raise statement inside a try-block + Similar to ``Raise`` but does not terminate. + """ + def __init__(self, exception, loc): + assert exception is None or isinstance(exception, Var) + assert isinstance(loc, Loc) + self.exception = exception + self.loc = loc + + def __str__(self): + return "try_raise %s" % self.exception + + +class StaticTryRaise(Stmt): + """A raise statement inside a try-block. + Similar to ``StaticRaise`` but does not terminate. + """ + + def __init__(self, exc_class, exc_args, loc): + assert exc_class is None or isinstance(exc_class, type) + assert isinstance(loc, Loc) + assert exc_args is None or isinstance(exc_args, tuple) + self.exc_class = exc_class + self.exc_args = exc_args + self.loc = loc + + def __str__(self): + if self.exc_class is None: + return "static_try_raise" + elif self.exc_args is None: + return "static_try_raise %s" % (self.exc_class,) + else: + return "static_try_raise %s(%s)" % (self.exc_class, + ", ".join(map(repr, self.exc_args))) + + +class Return(Terminator): + """ + Return to caller. + """ + is_exit = True + + def __init__(self, value, loc): + assert isinstance(value, Var), type(value) + assert isinstance(loc, Loc) + self.value = value + self.loc = loc + + def __str__(self): + return 'return %s' % self.value + + def get_targets(self): + return [] + + +class Jump(Terminator): + """ + Unconditional branch. + """ + + def __init__(self, target, loc): + assert isinstance(loc, Loc) + self.target = target + self.loc = loc + + def __str__(self): + return 'jump %s' % self.target + + def get_targets(self): + return [self.target] + + +class Branch(Terminator): + """ + Conditional branch. + """ + + def __init__(self, cond, truebr, falsebr, loc): + assert isinstance(cond, Var) + assert isinstance(loc, Loc) + self.cond = cond + self.truebr = truebr + self.falsebr = falsebr + self.loc = loc + + def __str__(self): + return 'branch %s, %s, %s' % (self.cond, self.truebr, self.falsebr) + + def get_targets(self): + return [self.truebr, self.falsebr] + + +class Assign(Stmt): + """ + Assign to a variable. + """ + def __init__(self, value, target, loc): + assert isinstance(value, AbstractRHS) + assert isinstance(target, Var) + assert isinstance(loc, Loc) + self.value = value + self.target = target + self.loc = loc + + def __str__(self): + return '%s = %s' % (self.target, self.value) + + +class Print(Stmt): + """ + Print some values. + """ + def __init__(self, args, vararg, loc): + assert all(isinstance(x, Var) for x in args) + assert vararg is None or isinstance(vararg, Var) + assert isinstance(loc, Loc) + self.args = tuple(args) + self.vararg = vararg + # Constant-inferred arguments + self.consts = {} + self.loc = loc + + def __str__(self): + return 'print(%s)' % ', '.join(str(v) for v in self.args) + + +class Yield(Inst): + def __init__(self, value, loc, index): + assert isinstance(value, Var) + assert isinstance(loc, Loc) + self.value = value + self.loc = loc + self.index = index + + def __str__(self): + return 'yield %s' % (self.value,) + + def list_vars(self): + return [self.value] + + +class EnterWith(Stmt): + """Enter a "with" context + """ + def __init__(self, contextmanager, begin, end, loc): + """ + Parameters + ---------- + contextmanager : IR value + begin, end : int + The beginning and the ending offset of the with-body. + loc : ir.Loc instance + Source location + """ + assert isinstance(contextmanager, Var) + assert isinstance(loc, Loc) + self.contextmanager = contextmanager + self.begin = begin + self.end = end + self.loc = loc + + def __str__(self): + return 'enter_with {}'.format(self.contextmanager) + + def list_vars(self): + return [self.contextmanager] + + +class PopBlock(Stmt): + """Marker statement for a pop block op code""" + def __init__(self, loc): + assert isinstance(loc, Loc) + self.loc = loc + + def __str__(self): + return 'pop_block' + + +class Arg(EqualityCheckMixin, AbstractRHS): + def __init__(self, name, index, loc): + assert isinstance(name, str) + assert isinstance(index, int) + assert isinstance(loc, Loc) + self.name = name + self.index = index + self.loc = loc + + def __repr__(self): + return 'arg(%d, name=%s)' % (self.index, self.name) + + def infer_constant(self): + raise ConstantInferenceError('%s' % self, loc=self.loc) + + +class Const(EqualityCheckMixin, AbstractRHS): + def __init__(self, value, loc, use_literal_type=True): + assert isinstance(loc, Loc) + self.value = value + self.loc = loc + # Note: need better way to tell if this is a literal or not. + self.use_literal_type = use_literal_type + + def __repr__(self): + return 'const(%s, %s)' % (type(self.value).__name__, self.value) + + def infer_constant(self): + return self.value + + def __deepcopy__(self, memo): + # Override to not copy constant values in code + return Const( + value=self.value, loc=self.loc, + use_literal_type=self.use_literal_type, + ) + + +class Global(EqualityCheckMixin, AbstractRHS): + def __init__(self, name, value, loc): + assert isinstance(loc, Loc) + self.name = name + self.value = value + self.loc = loc + + def __str__(self): + return 'global(%s: %s)' % (self.name, self.value) + + def infer_constant(self): + return self.value + + def __deepcopy__(self, memo): + # don't copy value since it can fail (e.g. modules) + # value is readonly and doesn't need copying + return Global(self.name, self.value, copy.deepcopy(self.loc)) + + +class FreeVar(EqualityCheckMixin, AbstractRHS): + """ + A freevar, as loaded by LOAD_DECREF. + (i.e. a variable defined in an enclosing non-global scope) + """ + + def __init__(self, index, name, value, loc): + assert isinstance(index, int) + assert isinstance(name, str) + assert isinstance(loc, Loc) + # index inside __code__.co_freevars + self.index = index + # variable name + self.name = name + # frozen value + self.value = value + self.loc = loc + + def __str__(self): + return 'freevar(%s: %s)' % (self.name, self.value) + + def infer_constant(self): + return self.value + + def __deepcopy__(self, memo): + # Override to not copy constant values in code + return FreeVar(index=self.index, name=self.name, value=self.value, + loc=self.loc) + + + +class Var(EqualityCheckMixin, AbstractRHS): + """ + Attributes + ----------- + - scope: Scope + + - name: str + + - loc: Loc + Definition location + """ + + def __init__(self, scope, name, loc): + # NOTE: Use of scope=None should be removed. + assert scope is None or isinstance(scope, Scope) + assert isinstance(name, str) + assert isinstance(loc, Loc) + self.scope = scope + self.name = name + self.loc = loc + + def __repr__(self): + return 'Var(%s, %s)' % (self.name, self.loc.short()) + + def __str__(self): + return self.name + + @property + def is_temp(self): + return self.name.startswith("$") + + @property + def unversioned_name(self): + """The unversioned name of this variable, i.e. SSA renaming removed + """ + for k, redef_set in self.scope.var_redefinitions.items(): + if self.name in redef_set: + return k + return self.name + + @property + def versioned_names(self): + """Known versioned names for this variable, i.e. known variable names in + the scope that have been formed from applying SSA to this variable + """ + return self.scope.get_versions_of(self.unversioned_name) + + @property + def all_names(self): + """All known versioned and unversioned names for this variable + """ + return self.versioned_names | {self.unversioned_name,} + + +class Scope(EqualityCheckMixin): + """ + Attributes + ----------- + - parent: Scope + Parent scope + + - localvars: VarMap + Scope-local variable map + + - loc: Loc + Start of scope location + + """ + + def __init__(self, parent, loc): + assert parent is None or isinstance(parent, Scope) + assert isinstance(loc, Loc) + self.parent = parent + self.localvars = VarMap() + self.loc = loc + self.redefined = defaultdict(int) + self.var_redefinitions = defaultdict(set) + + def define(self, name, loc): + """ + Define a variable + """ + v = Var(scope=self, name=name, loc=loc) + self.localvars.define(v.name, v) + return v + + def get(self, name): + """ + Refer to a variable. Returns the latest version. + """ + if name in self.redefined: + name = "%s.%d" % (name, self.redefined[name]) + return self.get_exact(name) + + def get_exact(self, name): + """ + Refer to a variable. The returned variable has the exact + name (exact variable version). + """ + try: + return self.localvars.get(name) + except NotDefinedError: + if self.has_parent: + return self.parent.get(name) + else: + raise + + def get_or_define(self, name, loc): + if name in self.redefined: + name = "%s.%d" % (name, self.redefined[name]) + + if name not in self.localvars: + return self.define(name, loc) + else: + return self.localvars.get(name) + + def redefine(self, name, loc, rename=True): + """ + Redefine if the name is already defined + """ + if name not in self.localvars: + return self.define(name, loc) + elif not rename: + # Must use the same name if the variable is a cellvar, which + # means it could be captured in a closure. + return self.localvars.get(name) + else: + while True: + ct = self.redefined[name] + self.redefined[name] = ct + 1 + newname = "%s.%d" % (name, ct + 1) + try: + res = self.define(newname, loc) + except RedefinedError: + continue + else: + self.var_redefinitions[name].add(newname) + return res + + def get_versions_of(self, name): + """ + Gets all known versions of a given name + """ + vers = set() + def walk(thename): + redefs = self.var_redefinitions.get(thename, None) + if redefs: + for v in redefs: + vers.add(v) + walk(v) + walk(name) + return vers + + def make_temp(self, loc): + n = len(self.localvars) + v = Var(scope=self, name='$%d' % n, loc=loc) + self.localvars.define(v.name, v) + return v + + @property + def has_parent(self): + return self.parent is not None + + def __repr__(self): + return "Scope(has_parent=%r, num_vars=%d, %s)" % (self.has_parent, + len(self.localvars), + self.loc) + + +class Block(EqualityCheckMixin): + """A code block + + """ + + def __init__(self, scope, loc): + assert isinstance(scope, Scope) + assert isinstance(loc, Loc) + self.scope = scope + self.body = [] + self.loc = loc + + def copy(self): + block = Block(self.scope, self.loc) + block.body = self.body[:] + return block + + def find_exprs(self, op=None): + """ + Iterate over exprs of the given *op* in this block. + """ + for inst in self.body: + if isinstance(inst, Assign): + expr = inst.value + if isinstance(expr, Expr): + if op is None or expr.op == op: + yield expr + + def find_insts(self, cls=None): + """ + Iterate over insts of the given class in this block. + """ + for inst in self.body: + if isinstance(inst, cls): + yield inst + + def find_variable_assignment(self, name): + """ + Returns the assignment inst associated with variable "name", None if + it cannot be found. + """ + for x in self.find_insts(cls=Assign): + if x.target.name == name: + return x + return None + + def prepend(self, inst): + assert isinstance(inst, Stmt) + self.body.insert(0, inst) + + def append(self, inst): + assert isinstance(inst, Stmt) + self.body.append(inst) + + def remove(self, inst): + assert isinstance(inst, Stmt) + del self.body[self.body.index(inst)] + + def clear(self): + del self.body[:] + + def dump(self, file=None): + # Avoid early bind of sys.stdout as default value + file = file or sys.stdout + for inst in self.body: + if hasattr(inst, 'dump'): + inst.dump(file) + else: + inst_vars = sorted(str(v) for v in inst.list_vars()) + print(' %-40s %s' % (inst, inst_vars), file=file) + + @property + def terminator(self): + return self.body[-1] + + @property + def is_terminated(self): + return self.body and self.body[-1].is_terminator + + def verify(self): + if not self.is_terminated: + raise VerificationError("Missing block terminator") + # Only the last instruction can be a terminator + for inst in self.body[:-1]: + if inst.is_terminator: + raise VerificationError("Terminator before the last " + "instruction") + + def insert_after(self, stmt, other): + """ + Insert *stmt* after *other*. + """ + index = self.body.index(other) + self.body.insert(index + 1, stmt) + + def insert_before_terminator(self, stmt): + assert isinstance(stmt, Stmt) + assert self.is_terminated + self.body.insert(-1, stmt) + + def __repr__(self): + return "" % (self.loc,) + + +class Loop(SlotEqualityCheckMixin): + """Describes a loop-block + """ + __slots__ = "entry", "exit" + + def __init__(self, entry, exit): + self.entry = entry + self.exit = exit + + def __repr__(self): + args = self.entry, self.exit + return "Loop(entry=%s, exit=%s)" % args + + +class With(SlotEqualityCheckMixin): + """Describes a with-block + """ + __slots__ = "entry", "exit" + + def __init__(self, entry, exit): + self.entry = entry + self.exit = exit + + def __repr__(self): + args = self.entry, self.exit + return "With(entry=%s, exit=%s)" % args + + +class FunctionIR(object): + + def __init__(self, blocks, is_generator, func_id, loc, + definitions, arg_count, arg_names): + self.blocks = blocks + self.is_generator = is_generator + self.func_id = func_id + self.loc = loc + self.arg_count = arg_count + self.arg_names = arg_names + + self._definitions = definitions + + self._reset_analysis_variables() + + def equal_ir(self, other): + """ Checks that the IR contained within is equal to the IR in other. + Equality is defined by being equal in fundamental structure (blocks, + labels, IR node type and the order in which they are defined) and the + IR nodes being equal. IR node equality essentially comes down to + ensuring a node's `.__dict__` or `.__slots__` is equal, with the + exception of ignoring 'loc' and 'scope' entries. The upshot is that the + comparison is essentially location and scope invariant, but otherwise + behaves as unsurprisingly as possible. + """ + if type(self) is type(other): + return self.blocks == other.blocks + return False + + def diff_str(self, other): + """ + Compute a human readable difference in the IR, returns a formatted + string ready for printing. + """ + msg = [] + for label, block in self.blocks.items(): + other_blk = other.blocks.get(label, None) + if other_blk is not None: + if block != other_blk: + msg.append(("Block %s differs" % label).center(80, '-')) + # see if the instructions are just a permutation + block_del = [x for x in block.body if isinstance(x, Del)] + oth_del = [x for x in other_blk.body if isinstance(x, Del)] + if block_del != oth_del: + # this is a common issue, dels are all present, but + # order shuffled. + if sorted(block_del) == sorted(oth_del): + msg.append(("Block %s contains the same dels but " + "their order is different") % label) + if len(block.body) > len(other_blk.body): + msg.append("This block contains more statements") + elif len(block.body) < len(other_blk.body): + msg.append("Other block contains more statements") + + # find the indexes where they don't match + tmp = [] + for idx, stmts in enumerate(zip(block.body, + other_blk.body)): + b_s, o_s = stmts + if b_s != o_s: + tmp.append(idx) + + def get_pad(ablock, l): + pointer = '-> ' + sp = len(pointer) * ' ' + pad = [] + nstmt = len(ablock) + for i in range(nstmt): + if i in tmp: + item = pointer + elif i >= l: + item = pointer + else: + item = sp + pad.append(item) + return pad + + min_stmt_len = min(len(block.body), len(other_blk.body)) + + with StringIO() as buf: + it = [("self", block), ("other", other_blk)] + for name, _block in it: + buf.truncate(0) + _block.dump(file=buf) + stmts = buf.getvalue().splitlines() + pad = get_pad(_block.body, min_stmt_len) + title = ("%s: block %s" % (name, label)) + msg.append(title.center(80, '-')) + msg.extend(["{0}{1}".format(a, b) for a, b in + zip(pad, stmts)]) + if msg == []: + msg.append("IR is considered equivalent.") + return '\n'.join(msg) + + def _reset_analysis_variables(self): + + self._consts = consts.ConstantInference(self) + + # Will be computed by PostProcessor + self.generator_info = None + self.variable_lifetime = None + # { ir.Block: { variable names (potentially) alive at start of block } } + self.block_entry_vars = {} + + def derive(self, blocks, arg_count=None, arg_names=None, + force_non_generator=False): + """ + Derive a new function IR from this one, using the given blocks, + and possibly modifying the argument count and generator flag. + + Post-processing will have to be run again on the new IR. + """ + firstblock = blocks[min(blocks)] + + new_ir = copy.copy(self) + new_ir.blocks = blocks + new_ir.loc = firstblock.loc + if force_non_generator: + new_ir.is_generator = False + if arg_count is not None: + new_ir.arg_count = arg_count + if arg_names is not None: + new_ir.arg_names = arg_names + new_ir._reset_analysis_variables() + # Make fresh func_id + new_ir.func_id = new_ir.func_id.derive() + return new_ir + + def copy(self): + new_ir = copy.copy(self) + blocks = {} + block_entry_vars = {} + for label, block in self.blocks.items(): + new_block = block.copy() + blocks[label] = new_block + if block in self.block_entry_vars: + block_entry_vars[new_block] = self.block_entry_vars[block] + new_ir.blocks = blocks + new_ir.block_entry_vars = block_entry_vars + return new_ir + + def get_block_entry_vars(self, block): + """ + Return a set of variable names possibly alive at the beginning of + the block. + """ + return self.block_entry_vars[block] + + def infer_constant(self, name): + """ + Try to infer the constant value of a given variable. + """ + if isinstance(name, Var): + name = name.name + return self._consts.infer_constant(name) + + def get_definition(self, value, lhs_only=False): + """ + Get the definition site for the given variable name or instance. + A Expr instance is returned by default, but if lhs_only is set + to True, the left-hand-side variable is returned instead. + """ + lhs = value + while True: + if isinstance(value, Var): + lhs = value + name = value.name + elif isinstance(value, str): + lhs = value + name = value + else: + return lhs if lhs_only else value + defs = self._definitions[name] + if len(defs) == 0: + raise KeyError("no definition for %r" + % (name,)) + if len(defs) > 1: + raise KeyError("more than one definition for %r" + % (name,)) + value = defs[0] + + def get_assignee(self, rhs_value, in_blocks=None): + """ + Finds the assignee for a given RHS value. If in_blocks is given the + search will be limited to the specified blocks. + """ + if in_blocks is None: + blocks = self.blocks.values() + elif isinstance(in_blocks, int): + blocks = [self.blocks[in_blocks]] + else: + blocks = [self.blocks[blk] for blk in list(in_blocks)] + + assert isinstance(rhs_value, AbstractRHS) + + for blk in blocks: + for assign in blk.find_insts(Assign): + if assign.value == rhs_value: + return assign.target + + raise ValueError("Could not find an assignee for %s" % rhs_value) + + + def dump(self, file=None): + nofile = file is None + # Avoid early bind of sys.stdout as default value + file = file or StringIO() + for offset, block in sorted(self.blocks.items()): + print('label %s:' % (offset,), file=file) + block.dump(file=file) + if nofile: + text = file.getvalue() + if config.HIGHLIGHT_DUMPS: + try: + import pygments + except ImportError: + msg = "Please install pygments to see highlighted dumps" + raise ValueError(msg) + else: + from pygments import highlight + from numba.misc.dump_style import NumbaIRLexer as lexer + from numba.misc.dump_style import by_colorscheme + from pygments.formatters import Terminal256Formatter + print(highlight(text, lexer(), Terminal256Formatter( + style=by_colorscheme()))) + else: + print(text) + + + def dump_to_string(self): + with StringIO() as sb: + self.dump(file=sb) + return sb.getvalue() + + def dump_generator_info(self, file=None): + file = file or sys.stdout + gi = self.generator_info + print("generator state variables:", sorted(gi.state_vars), file=file) + for index, yp in sorted(gi.yield_points.items()): + print("yield point #%d: live variables = %s, weak live variables = %s" + % (index, sorted(yp.live_vars), sorted(yp.weak_live_vars)), + file=file) + + def render_dot(self, filename_prefix="numba_ir", include_ir=True): + """Render the CFG of the IR with GraphViz DOT via the + ``graphviz`` python binding. + + Returns + ------- + g : graphviz.Digraph + Use `g.view()` to open the graph in the default PDF application. + """ + + try: + import graphviz as gv + except ImportError: + raise ImportError( + "The feature requires `graphviz` but it is not available. " + "Please install with `pip install graphviz`" + ) + g = gv.Digraph( + filename="{}{}.dot".format( + filename_prefix, + self.func_id.unique_name, + ) + ) + # Populate the nodes + for k, blk in self.blocks.items(): + with StringIO() as sb: + blk.dump(sb) + label = sb.getvalue() + if include_ir: + label = ''.join( + [r' {}\l'.format(x) for x in label.splitlines()], + ) + label = r"block {}\l".format(k) + label + g.node(str(k), label=label, shape='rect') + else: + label = r"{}\l".format(k) + g.node(str(k), label=label, shape='circle') + # Populate the edges + for src, blk in self.blocks.items(): + for dst in blk.terminator.get_targets(): + g.edge(str(src), str(dst)) + return g + + +# A stub for undefined global reference +class UndefinedType(EqualityCheckMixin): + + _singleton = None + + def __new__(cls): + obj = cls._singleton + if obj is not None: + return obj + else: + obj = object.__new__(cls) + cls._singleton = obj + return obj + + def __repr__(self): + return "Undefined" + + +UNDEFINED = UndefinedType() diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/ir_utils.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/ir_utils.py new file mode 100644 index 000000000..9d58bc5da --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/ir_utils.py @@ -0,0 +1,2350 @@ +# +# Copyright (c) 2017 Intel Corporation +# SPDX-License-Identifier: BSD-2-Clause +# + +import numpy + +import types as pytypes +import collections +import warnings + +import numba +from numba.core.extending import _Intrinsic +from numba.core import types, typing, ir, analysis, postproc, rewrites, config +from numba.core.typing.templates import signature +from numba.core.analysis import (compute_live_map, compute_use_defs, + compute_cfg_from_blocks) +from numba.core.errors import (TypingError, UnsupportedError, + NumbaPendingDeprecationWarning, + CompilerError) + +import copy + +_unique_var_count = 0 + + +def mk_unique_var(prefix): + global _unique_var_count + var = prefix + "." + str(_unique_var_count) + _unique_var_count = _unique_var_count + 1 + return var + + +class _MaxLabel: + def __init__(self, value=0): + self._value = value + + def next(self): + self._value += 1 + return self._value + + def update(self, newval): + self._value = max(newval, self._value) + + +_the_max_label = _MaxLabel() +del _MaxLabel + + +def get_unused_var_name(prefix, var_table): + """ Get a new var name with a given prefix and + make sure it is unused in the given variable table. + """ + cur = 0 + while True: + var = prefix + str(cur) + if var not in var_table: + return var + cur += 1 + + +def next_label(): + return _the_max_label.next() + + +def mk_alloc(typingctx, typemap, calltypes, lhs, size_var, dtype, scope, loc, + lhs_typ): + """generate an array allocation with np.empty() and return list of nodes. + size_var can be an int variable or tuple of int variables. + lhs_typ is the type of the array being allocated. + """ + out = [] + ndims = 1 + size_typ = types.intp + if isinstance(size_var, tuple): + if len(size_var) == 1: + size_var = size_var[0] + size_var = convert_size_to_var(size_var, typemap, scope, loc, out) + else: + # tuple_var = build_tuple([size_var...]) + ndims = len(size_var) + tuple_var = ir.Var(scope, mk_unique_var("$tuple_var"), loc) + if typemap: + typemap[tuple_var.name] = types.containers.UniTuple( + types.intp, ndims) + # constant sizes need to be assigned to vars + new_sizes = [convert_size_to_var(s, typemap, scope, loc, out) + for s in size_var] + tuple_call = ir.Expr.build_tuple(new_sizes, loc) + tuple_assign = ir.Assign(tuple_call, tuple_var, loc) + out.append(tuple_assign) + size_var = tuple_var + size_typ = types.containers.UniTuple(types.intp, ndims) + # g_np_var = Global(numpy) + g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc) + if typemap: + typemap[g_np_var.name] = types.misc.Module(numpy) + g_np = ir.Global('np', numpy, loc) + g_np_assign = ir.Assign(g_np, g_np_var, loc) + # attr call: empty_attr = getattr(g_np_var, empty) + empty_attr_call = ir.Expr.getattr(g_np_var, "empty", loc) + attr_var = ir.Var(scope, mk_unique_var("$empty_attr_attr"), loc) + if typemap: + typemap[attr_var.name] = get_np_ufunc_typ(numpy.empty) + attr_assign = ir.Assign(empty_attr_call, attr_var, loc) + # Assume str(dtype) returns a valid type + dtype_str = str(dtype) + # alloc call: lhs = empty_attr(size_var, typ_var) + typ_var = ir.Var(scope, mk_unique_var("$np_typ_var"), loc) + if typemap: + typemap[typ_var.name] = types.functions.NumberClass(dtype) + # If dtype is a datetime/timedelta with a unit, + # then it won't return a valid type and instead can be created + # with a string. i.e. "datetime64[ns]") + if (isinstance(dtype, (types.NPDatetime, types.NPTimedelta)) and + dtype.unit != ''): + typename_const = ir.Const(dtype_str, loc) + typ_var_assign = ir.Assign(typename_const, typ_var, loc) + else: + if dtype_str=='bool': + # empty doesn't like 'bool' sometimes (e.g. kmeans example) + dtype_str = 'bool_' + np_typ_getattr = ir.Expr.getattr(g_np_var, dtype_str, loc) + typ_var_assign = ir.Assign(np_typ_getattr, typ_var, loc) + alloc_call = ir.Expr.call(attr_var, [size_var, typ_var], (), loc) + + if calltypes: + cac = typemap[attr_var.name].get_call_type( + typingctx, [size_typ, types.functions.NumberClass(dtype)], {}) + # By default, all calls to "empty" are typed as returning a standard + # NumPy ndarray. If we are allocating a ndarray subclass here then + # just change the return type to be that of the subclass. + cac._return_type = (lhs_typ.copy(layout='C') + if lhs_typ.layout == 'F' + else lhs_typ) + calltypes[alloc_call] = cac + if lhs_typ.layout == 'F': + empty_c_typ = lhs_typ.copy(layout='C') + empty_c_var = ir.Var(scope, mk_unique_var("$empty_c_var"), loc) + if typemap: + typemap[empty_c_var.name] = lhs_typ.copy(layout='C') + empty_c_assign = ir.Assign(alloc_call, empty_c_var, loc) + + # attr call: asfortranarray = getattr(g_np_var, asfortranarray) + asfortranarray_attr_call = ir.Expr.getattr(g_np_var, "asfortranarray", loc) + afa_attr_var = ir.Var(scope, mk_unique_var("$asfortran_array_attr"), loc) + if typemap: + typemap[afa_attr_var.name] = get_np_ufunc_typ(numpy.asfortranarray) + afa_attr_assign = ir.Assign(asfortranarray_attr_call, afa_attr_var, loc) + # call asfortranarray + asfortranarray_call = ir.Expr.call(afa_attr_var, [empty_c_var], (), loc) + if calltypes: + calltypes[asfortranarray_call] = typemap[afa_attr_var.name].get_call_type( + typingctx, [empty_c_typ], {}) + + asfortranarray_assign = ir.Assign(asfortranarray_call, lhs, loc) + + out.extend([g_np_assign, attr_assign, typ_var_assign, empty_c_assign, + afa_attr_assign, asfortranarray_assign]) + else: + alloc_assign = ir.Assign(alloc_call, lhs, loc) + out.extend([g_np_assign, attr_assign, typ_var_assign, alloc_assign]) + + return out + + +def convert_size_to_var(size_var, typemap, scope, loc, nodes): + if isinstance(size_var, int): + new_size = ir.Var(scope, mk_unique_var("$alloc_size"), loc) + if typemap: + typemap[new_size.name] = types.intp + size_assign = ir.Assign(ir.Const(size_var, loc), new_size, loc) + nodes.append(size_assign) + return new_size + assert isinstance(size_var, ir.Var) + return size_var + + +def get_np_ufunc_typ(func): + """get type of the incoming function from builtin registry""" + for (k, v) in typing.npydecl.registry.globals: + if k == func: + return v + for (k, v) in typing.templates.builtin_registry.globals: + if k == func: + return v + raise RuntimeError("type for func ", func, " not found") + + +def mk_range_block(typemap, start, stop, step, calltypes, scope, loc): + """make a block that initializes loop range and iteration variables. + target label in jump needs to be set. + """ + # g_range_var = Global(range) + g_range_var = ir.Var(scope, mk_unique_var("$range_g_var"), loc) + typemap[g_range_var.name] = get_global_func_typ(range) + g_range = ir.Global('range', range, loc) + g_range_assign = ir.Assign(g_range, g_range_var, loc) + arg_nodes, args = _mk_range_args(typemap, start, stop, step, scope, loc) + # range_call_var = call g_range_var(start, stop, step) + range_call = ir.Expr.call(g_range_var, args, (), loc) + calltypes[range_call] = typemap[g_range_var.name].get_call_type( + typing.Context(), [types.intp] * len(args), {}) + #signature(types.range_state64_type, types.intp) + range_call_var = ir.Var(scope, mk_unique_var("$range_c_var"), loc) + typemap[range_call_var.name] = types.iterators.RangeType(types.intp) + range_call_assign = ir.Assign(range_call, range_call_var, loc) + # iter_var = getiter(range_call_var) + iter_call = ir.Expr.getiter(range_call_var, loc) + calltypes[iter_call] = signature(types.range_iter64_type, + types.range_state64_type) + iter_var = ir.Var(scope, mk_unique_var("$iter_var"), loc) + typemap[iter_var.name] = types.iterators.RangeIteratorType(types.intp) + iter_call_assign = ir.Assign(iter_call, iter_var, loc) + # $phi = iter_var + phi_var = ir.Var(scope, mk_unique_var("$phi"), loc) + typemap[phi_var.name] = types.iterators.RangeIteratorType(types.intp) + phi_assign = ir.Assign(iter_var, phi_var, loc) + # jump to header + jump_header = ir.Jump(-1, loc) + range_block = ir.Block(scope, loc) + range_block.body = arg_nodes + [g_range_assign, range_call_assign, + iter_call_assign, phi_assign, jump_header] + return range_block + + +def _mk_range_args(typemap, start, stop, step, scope, loc): + nodes = [] + if isinstance(stop, ir.Var): + g_stop_var = stop + else: + assert isinstance(stop, int) + g_stop_var = ir.Var(scope, mk_unique_var("$range_stop"), loc) + if typemap: + typemap[g_stop_var.name] = types.intp + stop_assign = ir.Assign(ir.Const(stop, loc), g_stop_var, loc) + nodes.append(stop_assign) + if start == 0 and step == 1: + return nodes, [g_stop_var] + + if isinstance(start, ir.Var): + g_start_var = start + else: + assert isinstance(start, int) + g_start_var = ir.Var(scope, mk_unique_var("$range_start"), loc) + if typemap: + typemap[g_start_var.name] = types.intp + start_assign = ir.Assign(ir.Const(start, loc), g_start_var, loc) + nodes.append(start_assign) + if step == 1: + return nodes, [g_start_var, g_stop_var] + + if isinstance(step, ir.Var): + g_step_var = step + else: + assert isinstance(step, int) + g_step_var = ir.Var(scope, mk_unique_var("$range_step"), loc) + if typemap: + typemap[g_step_var.name] = types.intp + step_assign = ir.Assign(ir.Const(step, loc), g_step_var, loc) + nodes.append(step_assign) + + return nodes, [g_start_var, g_stop_var, g_step_var] + + +def get_global_func_typ(func): + """get type variable for func() from builtin registry""" + for (k, v) in typing.templates.builtin_registry.globals: + if k == func: + return v + raise RuntimeError("func type not found {}".format(func)) + + +def mk_loop_header(typemap, phi_var, calltypes, scope, loc): + """make a block that is a loop header updating iteration variables. + target labels in branch need to be set. + """ + # iternext_var = iternext(phi_var) + iternext_var = ir.Var(scope, mk_unique_var("$iternext_var"), loc) + typemap[iternext_var.name] = types.containers.Pair( + types.intp, types.boolean) + iternext_call = ir.Expr.iternext(phi_var, loc) + calltypes[iternext_call] = signature( + types.containers.Pair( + types.intp, + types.boolean), + types.range_iter64_type) + iternext_assign = ir.Assign(iternext_call, iternext_var, loc) + # pair_first_var = pair_first(iternext_var) + pair_first_var = ir.Var(scope, mk_unique_var("$pair_first_var"), loc) + typemap[pair_first_var.name] = types.intp + pair_first_call = ir.Expr.pair_first(iternext_var, loc) + pair_first_assign = ir.Assign(pair_first_call, pair_first_var, loc) + # pair_second_var = pair_second(iternext_var) + pair_second_var = ir.Var(scope, mk_unique_var("$pair_second_var"), loc) + typemap[pair_second_var.name] = types.boolean + pair_second_call = ir.Expr.pair_second(iternext_var, loc) + pair_second_assign = ir.Assign(pair_second_call, pair_second_var, loc) + # phi_b_var = pair_first_var + phi_b_var = ir.Var(scope, mk_unique_var("$phi"), loc) + typemap[phi_b_var.name] = types.intp + phi_b_assign = ir.Assign(pair_first_var, phi_b_var, loc) + # branch pair_second_var body_block out_block + branch = ir.Branch(pair_second_var, -1, -1, loc) + header_block = ir.Block(scope, loc) + header_block.body = [iternext_assign, pair_first_assign, + pair_second_assign, phi_b_assign, branch] + return header_block + + +def legalize_names(varnames): + """returns a dictionary for conversion of variable names to legal + parameter names. + """ + var_map = {} + for var in varnames: + new_name = var.replace("_", "__").replace("$", "_").replace(".", "_") + assert new_name not in var_map + var_map[var] = new_name + return var_map + + +def get_name_var_table(blocks): + """create a mapping from variable names to their ir.Var objects""" + def get_name_var_visit(var, namevar): + namevar[var.name] = var + return var + namevar = {} + visit_vars(blocks, get_name_var_visit, namevar) + return namevar + + +def replace_var_names(blocks, namedict): + """replace variables (ir.Var to ir.Var) from dictionary (name -> name)""" + # remove identity values to avoid infinite loop + new_namedict = {} + for l, r in namedict.items(): + if l != r: + new_namedict[l] = r + + def replace_name(var, namedict): + assert isinstance(var, ir.Var) + while var.name in namedict: + var = ir.Var(var.scope, namedict[var.name], var.loc) + return var + visit_vars(blocks, replace_name, new_namedict) + + +def replace_var_callback(var, vardict): + assert isinstance(var, ir.Var) + while var.name in vardict.keys(): + assert(vardict[var.name].name != var.name) + new_var = vardict[var.name] + var = ir.Var(new_var.scope, new_var.name, new_var.loc) + return var + + +def replace_vars(blocks, vardict): + """replace variables (ir.Var to ir.Var) from dictionary (name -> ir.Var)""" + # remove identity values to avoid infinite loop + new_vardict = {} + for l, r in vardict.items(): + if l != r.name: + new_vardict[l] = r + visit_vars(blocks, replace_var_callback, new_vardict) + + +def replace_vars_stmt(stmt, vardict): + visit_vars_stmt(stmt, replace_var_callback, vardict) + + +def replace_vars_inner(node, vardict): + return visit_vars_inner(node, replace_var_callback, vardict) + + +# other packages that define new nodes add calls to visit variables in them +# format: {type:function} +visit_vars_extensions = {} + + +def visit_vars(blocks, callback, cbdata): + """go over statements of block bodies and replace variable names with + dictionary. + """ + for block in blocks.values(): + for stmt in block.body: + visit_vars_stmt(stmt, callback, cbdata) + return + + +def visit_vars_stmt(stmt, callback, cbdata): + # let external calls handle stmt if type matches + for t, f in visit_vars_extensions.items(): + if isinstance(stmt, t): + f(stmt, callback, cbdata) + return + if isinstance(stmt, ir.Assign): + stmt.target = visit_vars_inner(stmt.target, callback, cbdata) + stmt.value = visit_vars_inner(stmt.value, callback, cbdata) + elif isinstance(stmt, ir.Arg): + stmt.name = visit_vars_inner(stmt.name, callback, cbdata) + elif isinstance(stmt, ir.Return): + stmt.value = visit_vars_inner(stmt.value, callback, cbdata) + elif isinstance(stmt, ir.Raise): + stmt.exception = visit_vars_inner(stmt.exception, callback, cbdata) + elif isinstance(stmt, ir.Branch): + stmt.cond = visit_vars_inner(stmt.cond, callback, cbdata) + elif isinstance(stmt, ir.Jump): + stmt.target = visit_vars_inner(stmt.target, callback, cbdata) + elif isinstance(stmt, ir.Del): + # Because Del takes only a var name, we make up by + # constructing a temporary variable. + var = ir.Var(None, stmt.value, stmt.loc) + var = visit_vars_inner(var, callback, cbdata) + stmt.value = var.name + elif isinstance(stmt, ir.DelAttr): + stmt.target = visit_vars_inner(stmt.target, callback, cbdata) + stmt.attr = visit_vars_inner(stmt.attr, callback, cbdata) + elif isinstance(stmt, ir.SetAttr): + stmt.target = visit_vars_inner(stmt.target, callback, cbdata) + stmt.attr = visit_vars_inner(stmt.attr, callback, cbdata) + stmt.value = visit_vars_inner(stmt.value, callback, cbdata) + elif isinstance(stmt, ir.DelItem): + stmt.target = visit_vars_inner(stmt.target, callback, cbdata) + stmt.index = visit_vars_inner(stmt.index, callback, cbdata) + elif isinstance(stmt, ir.StaticSetItem): + stmt.target = visit_vars_inner(stmt.target, callback, cbdata) + stmt.index_var = visit_vars_inner(stmt.index_var, callback, cbdata) + stmt.value = visit_vars_inner(stmt.value, callback, cbdata) + elif isinstance(stmt, ir.SetItem): + stmt.target = visit_vars_inner(stmt.target, callback, cbdata) + stmt.index = visit_vars_inner(stmt.index, callback, cbdata) + stmt.value = visit_vars_inner(stmt.value, callback, cbdata) + elif isinstance(stmt, ir.Print): + stmt.args = [visit_vars_inner(x, callback, cbdata) for x in stmt.args] + else: + # TODO: raise NotImplementedError("no replacement for IR node: ", stmt) + pass + return + + +def visit_vars_inner(node, callback, cbdata): + if isinstance(node, ir.Var): + return callback(node, cbdata) + elif isinstance(node, list): + return [visit_vars_inner(n, callback, cbdata) for n in node] + elif isinstance(node, tuple): + return tuple([visit_vars_inner(n, callback, cbdata) for n in node]) + elif isinstance(node, ir.Expr): + # if node.op in ['binop', 'inplace_binop']: + # lhs = node.lhs.name + # rhs = node.rhs.name + # node.lhs.name = callback, cbdata.get(lhs, lhs) + # node.rhs.name = callback, cbdata.get(rhs, rhs) + for arg in node._kws.keys(): + node._kws[arg] = visit_vars_inner(node._kws[arg], callback, cbdata) + elif isinstance(node, ir.Yield): + node.value = visit_vars_inner(node.value, callback, cbdata) + return node + + +add_offset_to_labels_extensions = {} + + +def add_offset_to_labels(blocks, offset): + """add an offset to all block labels and jump/branch targets + """ + new_blocks = {} + for l, b in blocks.items(): + # some parfor last blocks might be empty + term = None + if b.body: + term = b.body[-1] + for inst in b.body: + for T, f in add_offset_to_labels_extensions.items(): + if isinstance(inst, T): + f_max = f(inst, offset) + if isinstance(term, ir.Jump): + b.body[-1] = ir.Jump(term.target + offset, term.loc) + if isinstance(term, ir.Branch): + b.body[-1] = ir.Branch(term.cond, term.truebr + offset, + term.falsebr + offset, term.loc) + new_blocks[l + offset] = b + return new_blocks + + +find_max_label_extensions = {} + + +def find_max_label(blocks): + max_label = 0 + for l, b in blocks.items(): + term = None + if b.body: + term = b.body[-1] + for inst in b.body: + for T, f in find_max_label_extensions.items(): + if isinstance(inst, T): + f_max = f(inst) + if f_max > max_label: + max_label = f_max + if l > max_label: + max_label = l + return max_label + + +def flatten_labels(blocks): + """makes the labels in range(0, len(blocks)), useful to compare CFGs + """ + # first bulk move the labels out of the rewrite range + blocks = add_offset_to_labels(blocks, find_max_label(blocks) + 1) + # order them in topo order because it's easier to read + new_blocks = {} + topo_order = find_topo_order(blocks) + l_map = dict() + idx = 0 + for x in topo_order: + l_map[x] = idx + idx += 1 + + for t_node in topo_order: + b = blocks[t_node] + # some parfor last blocks might be empty + term = None + if b.body: + term = b.body[-1] + if isinstance(term, ir.Jump): + b.body[-1] = ir.Jump(l_map[term.target], term.loc) + if isinstance(term, ir.Branch): + b.body[-1] = ir.Branch(term.cond, l_map[term.truebr], + l_map[term.falsebr], term.loc) + new_blocks[l_map[t_node]] = b + return new_blocks + + +def remove_dels(blocks): + """remove ir.Del nodes""" + for block in blocks.values(): + new_body = [] + for stmt in block.body: + if not isinstance(stmt, ir.Del): + new_body.append(stmt) + block.body = new_body + return + + +def remove_args(blocks): + """remove ir.Arg nodes""" + for block in blocks.values(): + new_body = [] + for stmt in block.body: + if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Arg): + continue + new_body.append(stmt) + block.body = new_body + return + + +def dead_code_elimination(func_ir, typemap=None, alias_map=None, + arg_aliases=None): + """ Performs dead code elimination and leaves the IR in a valid state on + exit + """ + do_post_proc = False + while (remove_dead(func_ir.blocks, func_ir.arg_names, func_ir, typemap, + alias_map, arg_aliases)): + do_post_proc = True + + if do_post_proc: + post_proc = postproc.PostProcessor(func_ir) + post_proc.run() + + +def remove_dead(blocks, args, func_ir, typemap=None, alias_map=None, arg_aliases=None): + """dead code elimination using liveness and CFG info. + Returns True if something has been removed, or False if nothing is removed. + """ + cfg = compute_cfg_from_blocks(blocks) + usedefs = compute_use_defs(blocks) + live_map = compute_live_map(cfg, blocks, usedefs.usemap, usedefs.defmap) + call_table, _ = get_call_table(blocks) + if alias_map is None or arg_aliases is None: + alias_map, arg_aliases = find_potential_aliases(blocks, args, typemap, + func_ir) + if config.DEBUG_ARRAY_OPT >= 1: + print("args:", args) + print("alias map:", alias_map) + print("arg_aliases:", arg_aliases) + print("live_map:", live_map) + print("usemap:", usedefs.usemap) + print("defmap:", usedefs.defmap) + # keep set for easier search + alias_set = set(alias_map.keys()) + + removed = False + for label, block in blocks.items(): + # find live variables at each statement to delete dead assignment + lives = {v.name for v in block.terminator.list_vars()} + if config.DEBUG_ARRAY_OPT >= 2: + print("remove_dead processing block", label, lives) + # find live variables at the end of block + for out_blk, _data in cfg.successors(label): + if config.DEBUG_ARRAY_OPT >= 2: + print("succ live_map", out_blk, live_map[out_blk]) + lives |= live_map[out_blk] + removed |= remove_dead_block(block, lives, call_table, arg_aliases, + alias_map, alias_set, func_ir, typemap) + + return removed + + +# other packages that define new nodes add calls to remove dead code in them +# format: {type:function} +remove_dead_extensions = {} + + +def remove_dead_block(block, lives, call_table, arg_aliases, alias_map, + alias_set, func_ir, typemap): + """remove dead code using liveness info. + Mutable arguments (e.g. arrays) that are not definitely assigned are live + after return of function. + """ + # TODO: find mutable args that are not definitely assigned instead of + # assuming all args are live after return + removed = False + + # add statements in reverse order + new_body = [block.terminator] + # for each statement in reverse order, excluding terminator + for stmt in reversed(block.body[:-1]): + if config.DEBUG_ARRAY_OPT >= 2: + print("remove_dead_block", stmt) + # aliases of lives are also live + alias_lives = set() + init_alias_lives = lives & alias_set + for v in init_alias_lives: + alias_lives |= alias_map[v] + lives_n_aliases = lives | alias_lives | arg_aliases + + # let external calls handle stmt if type matches + if type(stmt) in remove_dead_extensions: + f = remove_dead_extensions[type(stmt)] + stmt = f(stmt, lives, lives_n_aliases, arg_aliases, alias_map, func_ir, + typemap) + if stmt is None: + if config.DEBUG_ARRAY_OPT >= 2: + print("Statement was removed.") + removed = True + continue + + # ignore assignments that their lhs is not live or lhs==rhs + if isinstance(stmt, ir.Assign): + lhs = stmt.target + rhs = stmt.value + if lhs.name not in lives and has_no_side_effect( + rhs, lives_n_aliases, call_table): + if config.DEBUG_ARRAY_OPT >= 2: + print("Statement was removed.") + removed = True + continue + if isinstance(rhs, ir.Var) and lhs.name == rhs.name: + if config.DEBUG_ARRAY_OPT >= 2: + print("Statement was removed.") + removed = True + continue + # TODO: remove other nodes like SetItem etc. + + if isinstance(stmt, ir.Del): + if stmt.value not in lives: + if config.DEBUG_ARRAY_OPT >= 2: + print("Statement was removed.") + removed = True + continue + + if isinstance(stmt, ir.SetItem): + name = stmt.target.name + if name not in lives_n_aliases: + if config.DEBUG_ARRAY_OPT >= 2: + print("Statement was removed.") + continue + + if type(stmt) in analysis.ir_extension_usedefs: + def_func = analysis.ir_extension_usedefs[type(stmt)] + uses, defs = def_func(stmt) + lives -= defs + lives |= uses + else: + lives |= {v.name for v in stmt.list_vars()} + if isinstance(stmt, ir.Assign): + # make sure lhs is not used in rhs, e.g. a = g(a) + if isinstance(stmt.value, ir.Expr): + rhs_vars = {v.name for v in stmt.value.list_vars()} + if lhs.name not in rhs_vars: + lives.remove(lhs.name) + else: + lives.remove(lhs.name) + + new_body.append(stmt) + new_body.reverse() + block.body = new_body + return removed + +# list of functions +remove_call_handlers = [] + +def remove_dead_random_call(rhs, lives, call_list): + if len(call_list) == 3 and call_list[1:] == ['random', numpy]: + return call_list[0] not in {'seed', 'shuffle'} + return False + +remove_call_handlers.append(remove_dead_random_call) + +def has_no_side_effect(rhs, lives, call_table): + """ Returns True if this expression has no side effects that + would prevent re-ordering. + """ + from numba.parfors import array_analysis, parfor + from numba.misc.special import prange + if isinstance(rhs, ir.Expr) and rhs.op == 'call': + func_name = rhs.func.name + if func_name not in call_table or call_table[func_name] == []: + return False + call_list = call_table[func_name] + if (call_list == ['empty', numpy] or + call_list == [slice] or + call_list == ['stencil', numba] or + call_list == ['log', numpy] or + call_list == ['dtype', numpy] or + call_list == [array_analysis.wrap_index] or + call_list == [prange] or + call_list == ['prange', numba] or + call_list == [parfor.internal_prange]): + return True + elif (isinstance(call_list[0], _Intrinsic) and + (call_list[0]._name == 'empty_inferred' or + call_list[0]._name == 'unsafe_empty_inferred')): + return True + from numba.core.registry import CPUDispatcher + from numba.np.linalg import dot_3_mv_check_args + if isinstance(call_list[0], CPUDispatcher): + py_func = call_list[0].py_func + if py_func == dot_3_mv_check_args: + return True + for f in remove_call_handlers: + if f(rhs, lives, call_list): + return True + return False + if isinstance(rhs, ir.Expr) and rhs.op == 'inplace_binop': + return rhs.lhs.name not in lives + if isinstance(rhs, ir.Yield): + return False + if isinstance(rhs, ir.Expr) and rhs.op == 'pair_first': + # don't remove pair_first since prange looks for it + return False + return True + +is_pure_extensions = [] + +def is_pure(rhs, lives, call_table): + """ Returns True if every time this expression is evaluated it + returns the same result. This is not the case for things + like calls to numpy.random. + """ + if isinstance(rhs, ir.Expr): + if rhs.op == 'call': + func_name = rhs.func.name + if func_name not in call_table or call_table[func_name] == []: + return False + call_list = call_table[func_name] + if (call_list == [slice] or + call_list == ['log', numpy] or + call_list == ['empty', numpy]): + return True + for f in is_pure_extensions: + if f(rhs, lives, call_list): + return True + return False + elif rhs.op == 'getiter' or rhs.op == 'iternext': + return False + if isinstance(rhs, ir.Yield): + return False + return True + +def is_const_call(module_name, func_name): + # Returns True if there is no state in the given module changed by the given function. + if module_name == 'numpy': + if func_name in ['empty']: + return True + return False + +alias_analysis_extensions = {} +alias_func_extensions = {} + +def get_canonical_alias(v, alias_map): + if v not in alias_map: + return v + + v_aliases = sorted(list(alias_map[v])) + return v_aliases[0] + +def find_potential_aliases(blocks, args, typemap, func_ir, alias_map=None, + arg_aliases=None): + "find all array aliases and argument aliases to avoid remove as dead" + if alias_map is None: + alias_map = {} + if arg_aliases is None: + arg_aliases = set(a for a in args if not is_immutable_type(a, typemap)) + + # update definitions since they are not guaranteed to be up-to-date + # FIXME keep definitions up-to-date to avoid the need for rebuilding + func_ir._definitions = build_definitions(func_ir.blocks) + np_alias_funcs = ['ravel', 'transpose', 'reshape'] + + for bl in blocks.values(): + for instr in bl.body: + if type(instr) in alias_analysis_extensions: + f = alias_analysis_extensions[type(instr)] + f(instr, args, typemap, func_ir, alias_map, arg_aliases) + if isinstance(instr, ir.Assign): + expr = instr.value + lhs = instr.target.name + # only mutable types can alias + if is_immutable_type(lhs, typemap): + continue + if isinstance(expr, ir.Var) and lhs!=expr.name: + _add_alias(lhs, expr.name, alias_map, arg_aliases) + # subarrays like A = B[0] for 2D B + if (isinstance(expr, ir.Expr) and (expr.op == 'cast' or + expr.op in ['getitem', 'static_getitem'])): + _add_alias(lhs, expr.value.name, alias_map, arg_aliases) + if isinstance(expr, ir.Expr) and expr.op == 'inplace_binop': + _add_alias(lhs, expr.lhs.name, alias_map, arg_aliases) + # array attributes like A.T + if (isinstance(expr, ir.Expr) and expr.op == 'getattr' + and expr.attr in ['T', 'ctypes', 'flat']): + _add_alias(lhs, expr.value.name, alias_map, arg_aliases) + # a = b.c. a should alias b + if (isinstance(expr, ir.Expr) and expr.op == 'getattr' + and expr.attr not in ['shape'] + and expr.value.name in arg_aliases): + _add_alias(lhs, expr.value.name, alias_map, arg_aliases) + # calls that can create aliases such as B = A.ravel() + if isinstance(expr, ir.Expr) and expr.op == 'call': + fdef = guard(find_callname, func_ir, expr, typemap) + # TODO: sometimes gufunc backend creates duplicate code + # causing find_callname to fail. Example: test_argmax + # ignored here since those cases don't create aliases + # but should be fixed in general + if fdef is None: + continue + fname, fmod = fdef + if fdef in alias_func_extensions: + alias_func = alias_func_extensions[fdef] + alias_func(lhs, expr.args, alias_map, arg_aliases) + if fmod == 'numpy' and fname in np_alias_funcs: + _add_alias(lhs, expr.args[0].name, alias_map, arg_aliases) + if isinstance(fmod, ir.Var) and fname in np_alias_funcs: + _add_alias(lhs, fmod.name, alias_map, arg_aliases) + + # copy to avoid changing size during iteration + old_alias_map = copy.deepcopy(alias_map) + # combine all aliases transitively + for v in old_alias_map: + for w in old_alias_map[v]: + alias_map[v] |= alias_map[w] + for w in old_alias_map[v]: + alias_map[w] = alias_map[v] + + return alias_map, arg_aliases + +def _add_alias(lhs, rhs, alias_map, arg_aliases): + if rhs in arg_aliases: + arg_aliases.add(lhs) + else: + if rhs not in alias_map: + alias_map[rhs] = set() + if lhs not in alias_map: + alias_map[lhs] = set() + alias_map[rhs].add(lhs) + alias_map[lhs].add(rhs) + return + +def is_immutable_type(var, typemap): + # Conservatively, assume mutable if type not available + if typemap is None or var not in typemap: + return False + typ = typemap[var] + # TODO: add more immutable types + if isinstance(typ, (types.Number, types.scalars._NPDatetimeBase, + types.iterators.RangeType)): + return True + if typ==types.string: + return True + # conservatively, assume mutable + return False + +def copy_propagate(blocks, typemap): + """compute copy propagation information for each block using fixed-point + iteration on data flow equations: + in_b = intersect(predec(B)) + out_b = gen_b | (in_b - kill_b) + """ + cfg = compute_cfg_from_blocks(blocks) + entry = cfg.entry_point() + + # format: dict of block labels to copies as tuples + # label -> (l,r) + c_data = init_copy_propagate_data(blocks, entry, typemap) + (gen_copies, all_copies, kill_copies, in_copies, out_copies) = c_data + + old_point = None + new_point = copy.deepcopy(out_copies) + # comparison works since dictionary of built-in types + while old_point != new_point: + for label in blocks.keys(): + if label == entry: + continue + predecs = [i for i, _d in cfg.predecessors(label)] + # in_b = intersect(predec(B)) + in_copies[label] = out_copies[predecs[0]].copy() + for p in predecs: + in_copies[label] &= out_copies[p] + + # out_b = gen_b | (in_b - kill_b) + out_copies[label] = (gen_copies[label] + | (in_copies[label] - kill_copies[label])) + old_point = new_point + new_point = copy.deepcopy(out_copies) + if config.DEBUG_ARRAY_OPT >= 1: + print("copy propagate out_copies:", out_copies) + return in_copies, out_copies + + +def init_copy_propagate_data(blocks, entry, typemap): + """get initial condition of copy propagation data flow for each block. + """ + # gen is all definite copies, extra_kill is additional ones that may hit + # for example, parfors can have control flow so they may hit extra copies + gen_copies, extra_kill = get_block_copies(blocks, typemap) + # set of all program copies + all_copies = set() + for l, s in gen_copies.items(): + all_copies |= gen_copies[l] + kill_copies = {} + for label, gen_set in gen_copies.items(): + kill_copies[label] = set() + for lhs, rhs in all_copies: + if lhs in extra_kill[label] or rhs in extra_kill[label]: + kill_copies[label].add((lhs, rhs)) + # a copy is killed if it is not in this block and lhs or rhs are + # assigned in this block + assigned = {lhs for lhs, rhs in gen_set} + if ((lhs, rhs) not in gen_set + and (lhs in assigned or rhs in assigned)): + kill_copies[label].add((lhs, rhs)) + # set initial values + # all copies are in for all blocks except entry + in_copies = {l: all_copies.copy() for l in blocks.keys()} + in_copies[entry] = set() + out_copies = {} + for label in blocks.keys(): + # out_b = gen_b | (in_b - kill_b) + out_copies[label] = (gen_copies[label] + | (in_copies[label] - kill_copies[label])) + out_copies[entry] = gen_copies[entry] + return (gen_copies, all_copies, kill_copies, in_copies, out_copies) + + +# other packages that define new nodes add calls to get copies in them +# format: {type:function} +copy_propagate_extensions = {} + + +def get_block_copies(blocks, typemap): + """get copies generated and killed by each block + """ + block_copies = {} + extra_kill = {} + for label, block in blocks.items(): + assign_dict = {} + extra_kill[label] = set() + # assignments as dict to replace with latest value + for stmt in block.body: + for T, f in copy_propagate_extensions.items(): + if isinstance(stmt, T): + gen_set, kill_set = f(stmt, typemap) + for lhs, rhs in gen_set: + assign_dict[lhs] = rhs + # if a=b is in dict and b is killed, a is also killed + new_assign_dict = {} + for l, r in assign_dict.items(): + if l not in kill_set and r not in kill_set: + new_assign_dict[l] = r + if r in kill_set: + extra_kill[label].add(l) + assign_dict = new_assign_dict + extra_kill[label] |= kill_set + if isinstance(stmt, ir.Assign): + lhs = stmt.target.name + if isinstance(stmt.value, ir.Var): + rhs = stmt.value.name + # copy is valid only if same type (see + # TestCFunc.test_locals) + # Some transformations can produce assignments of the + # form A = A. We don't put these mapping in the + # copy propagation set because then you get cycles and + # infinite loops in the replacement phase. + if typemap[lhs] == typemap[rhs] and lhs != rhs: + assign_dict[lhs] = rhs + continue + if isinstance(stmt.value, + ir.Expr) and stmt.value.op == 'inplace_binop': + in1_var = stmt.value.lhs.name + in1_typ = typemap[in1_var] + # inplace_binop assigns first operand if mutable + if not (isinstance(in1_typ, types.Number) + or in1_typ == types.string): + extra_kill[label].add(in1_var) + # if a=b is in dict and b is killed, a is also killed + new_assign_dict = {} + for l, r in assign_dict.items(): + if l != in1_var and r != in1_var: + new_assign_dict[l] = r + if r == in1_var: + extra_kill[label].add(l) + assign_dict = new_assign_dict + extra_kill[label].add(lhs) + block_cps = set(assign_dict.items()) + block_copies[label] = block_cps + return block_copies, extra_kill + + +# other packages that define new nodes add calls to apply copy propagate in them +# format: {type:function} +apply_copy_propagate_extensions = {} + + +def apply_copy_propagate(blocks, in_copies, name_var_table, typemap, calltypes, + save_copies=None): + """apply copy propagation to IR: replace variables when copies available""" + # save_copies keeps an approximation of the copies that were applied, so + # that the variable names of removed user variables can be recovered to some + # extent. + if save_copies is None: + save_copies = [] + + for label, block in blocks.items(): + var_dict = {l: name_var_table[r] for l, r in in_copies[label]} + # assignments as dict to replace with latest value + for stmt in block.body: + if type(stmt) in apply_copy_propagate_extensions: + f = apply_copy_propagate_extensions[type(stmt)] + f(stmt, var_dict, name_var_table, + typemap, calltypes, save_copies) + # only rhs of assignments should be replaced + # e.g. if x=y is available, x in x=z shouldn't be replaced + elif isinstance(stmt, ir.Assign): + stmt.value = replace_vars_inner(stmt.value, var_dict) + else: + replace_vars_stmt(stmt, var_dict) + fix_setitem_type(stmt, typemap, calltypes) + for T, f in copy_propagate_extensions.items(): + if isinstance(stmt, T): + gen_set, kill_set = f(stmt, typemap) + for lhs, rhs in gen_set: + if rhs in name_var_table: + var_dict[lhs] = name_var_table[rhs] + for l, r in var_dict.copy().items(): + if l in kill_set or r.name in kill_set: + var_dict.pop(l) + if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Var): + lhs = stmt.target.name + rhs = stmt.value.name + # rhs could be replaced with lhs from previous copies + if lhs != rhs: + # copy is valid only if same type (see + # TestCFunc.test_locals) + if typemap[lhs] == typemap[rhs] and rhs in name_var_table: + var_dict[lhs] = name_var_table[rhs] + else: + var_dict.pop(lhs, None) + # a=b kills previous t=a + lhs_kill = [] + for k, v in var_dict.items(): + if v.name == lhs: + lhs_kill.append(k) + for k in lhs_kill: + var_dict.pop(k, None) + if (isinstance(stmt, ir.Assign) + and not isinstance(stmt.value, ir.Var)): + lhs = stmt.target.name + var_dict.pop(lhs, None) + # previous t=a is killed if a is killed + lhs_kill = [] + for k, v in var_dict.items(): + if v.name == lhs: + lhs_kill.append(k) + for k in lhs_kill: + var_dict.pop(k, None) + save_copies.extend(var_dict.items()) + + return save_copies + +def fix_setitem_type(stmt, typemap, calltypes): + """Copy propagation can replace setitem target variable, which can be array + with 'A' layout. The replaced variable can be 'C' or 'F', so we update + setitem call type reflect this (from matrix power test) + """ + if not isinstance(stmt, (ir.SetItem, ir.StaticSetItem)): + return + t_typ = typemap[stmt.target.name] + s_typ = calltypes[stmt].args[0] + # test_optional t_typ can be Optional with array + if not isinstance( + s_typ, + types.npytypes.Array) or not isinstance( + t_typ, + types.npytypes.Array): + return + if s_typ.layout == 'A' and t_typ.layout != 'A': + new_s_typ = s_typ.copy(layout=t_typ.layout) + calltypes[stmt].args = ( + new_s_typ, + calltypes[stmt].args[1], + calltypes[stmt].args[2]) + return + + +def dprint_func_ir(func_ir, title, blocks=None): + """Debug print function IR, with an optional blocks argument + that may differ from the IR's original blocks. + """ + if config.DEBUG_ARRAY_OPT >= 1: + ir_blocks = func_ir.blocks + func_ir.blocks = ir_blocks if blocks == None else blocks + name = func_ir.func_id.func_qualname + print(("IR %s: %s" % (title, name)).center(80, "-")) + func_ir.dump() + print("-" * 40) + func_ir.blocks = ir_blocks + + +def find_topo_order(blocks, cfg = None): + """find topological order of blocks such that true branches are visited + first (e.g. for_break test in test_dataflow). + """ + if cfg is None: + cfg = compute_cfg_from_blocks(blocks) + post_order = [] + seen = set() + + def _dfs_rec(node): + if node not in seen: + seen.add(node) + succs = cfg._succs[node] + last_inst = blocks[node].body[-1] + if isinstance(last_inst, ir.Branch): + succs = [last_inst.falsebr, last_inst.truebr] + for dest in succs: + if (node, dest) not in cfg._back_edges: + _dfs_rec(dest) + post_order.append(node) + + _dfs_rec(cfg.entry_point()) + post_order.reverse() + return post_order + + +# other packages that define new nodes add calls to get call table +# format: {type:function} +call_table_extensions = {} + + +def get_call_table(blocks, call_table=None, reverse_call_table=None, topological_ordering=True): + """returns a dictionary of call variables and their references. + """ + # call_table example: c = np.zeros becomes c:["zeroes", np] + # reverse_call_table example: c = np.zeros becomes np_var:c + if call_table is None: + call_table = {} + if reverse_call_table is None: + reverse_call_table = {} + + if topological_ordering: + order = find_topo_order(blocks) + else: + order = list(blocks.keys()) + + for label in reversed(order): + for inst in reversed(blocks[label].body): + if isinstance(inst, ir.Assign): + lhs = inst.target.name + rhs = inst.value + if isinstance(rhs, ir.Expr) and rhs.op == 'call': + call_table[rhs.func.name] = [] + if isinstance(rhs, ir.Expr) and rhs.op == 'getattr': + if lhs in call_table: + call_table[lhs].append(rhs.attr) + reverse_call_table[rhs.value.name] = lhs + if lhs in reverse_call_table: + call_var = reverse_call_table[lhs] + call_table[call_var].append(rhs.attr) + reverse_call_table[rhs.value.name] = call_var + if isinstance(rhs, ir.Global): + if lhs in call_table: + call_table[lhs].append(rhs.value) + if lhs in reverse_call_table: + call_var = reverse_call_table[lhs] + call_table[call_var].append(rhs.value) + if isinstance(rhs, ir.FreeVar): + if lhs in call_table: + call_table[lhs].append(rhs.value) + if lhs in reverse_call_table: + call_var = reverse_call_table[lhs] + call_table[call_var].append(rhs.value) + if isinstance(rhs, ir.Var): + if lhs in call_table: + call_table[lhs].append(rhs.name) + reverse_call_table[rhs.name] = lhs + if lhs in reverse_call_table: + call_var = reverse_call_table[lhs] + call_table[call_var].append(rhs.name) + for T, f in call_table_extensions.items(): + if isinstance(inst, T): + f(inst, call_table, reverse_call_table) + return call_table, reverse_call_table + + +# other packages that define new nodes add calls to get tuple table +# format: {type:function} +tuple_table_extensions = {} + + +def get_tuple_table(blocks, tuple_table=None): + """returns a dictionary of tuple variables and their values. + """ + if tuple_table is None: + tuple_table = {} + + for block in blocks.values(): + for inst in block.body: + if isinstance(inst, ir.Assign): + lhs = inst.target.name + rhs = inst.value + if isinstance(rhs, ir.Expr) and rhs.op == 'build_tuple': + tuple_table[lhs] = rhs.items + if isinstance(rhs, ir.Const) and isinstance(rhs.value, tuple): + tuple_table[lhs] = rhs.value + for T, f in tuple_table_extensions.items(): + if isinstance(inst, T): + f(inst, tuple_table) + return tuple_table + + +def get_stmt_writes(stmt): + writes = set() + if isinstance(stmt, (ir.Assign, ir.SetItem, ir.StaticSetItem)): + writes.add(stmt.target.name) + return writes + + +def rename_labels(blocks): + """rename labels of function body blocks according to topological sort. + The set of labels of these blocks will remain unchanged. + """ + topo_order = find_topo_order(blocks) + + # make a block with return last if available (just for readability) + return_label = -1 + for l, b in blocks.items(): + if isinstance(b.body[-1], ir.Return): + return_label = l + # some cases like generators can have no return blocks + if return_label != -1: + topo_order.remove(return_label) + topo_order.append(return_label) + + label_map = {} + all_labels = sorted(topo_order, reverse=True) + for label in topo_order: + label_map[label] = all_labels.pop() + # update target labels in jumps/branches + for b in blocks.values(): + term = b.terminator + if isinstance(term, ir.Jump): + term.target = label_map[term.target] + if isinstance(term, ir.Branch): + term.truebr = label_map[term.truebr] + term.falsebr = label_map[term.falsebr] + # update blocks dictionary keys + new_blocks = {} + for k, b in blocks.items(): + new_label = label_map[k] + new_blocks[new_label] = b + + return new_blocks + + +def simplify_CFG(blocks): + """transform chains of blocks that have no loop into a single block""" + # first, inline single-branch-block to its predecessors + cfg = compute_cfg_from_blocks(blocks) + def find_single_branch(label): + block = blocks[label] + return len(block.body) == 1 and isinstance(block.body[0], ir.Branch) + single_branch_blocks = list(filter(find_single_branch, blocks.keys())) + marked_for_del = set() + for label in single_branch_blocks: + inst = blocks[label].body[0] + predecessors = cfg.predecessors(label) + delete_block = True + for (p, q) in predecessors: + block = blocks[p] + if isinstance(block.body[-1], ir.Jump): + block.body[-1] = copy.copy(inst) + else: + delete_block = False + if delete_block: + marked_for_del.add(label) + # Delete marked labels + for label in marked_for_del: + del blocks[label] + merge_adjacent_blocks(blocks) + return rename_labels(blocks) + + +arr_math = ['min', 'max', 'sum', 'prod', 'mean', 'var', 'std', + 'cumsum', 'cumprod', 'argmax', 'argmin', 'argsort', + 'nonzero', 'ravel'] + + +def canonicalize_array_math(func_ir, typemap, calltypes, typingctx): + # save array arg to call + # call_varname -> array + blocks = func_ir.blocks + saved_arr_arg = {} + topo_order = find_topo_order(blocks) + for label in topo_order: + block = blocks[label] + new_body = [] + for stmt in block.body: + if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Expr): + lhs = stmt.target.name + rhs = stmt.value + # replace A.func with np.func, and save A in saved_arr_arg + if (rhs.op == 'getattr' and rhs.attr in arr_math + and isinstance( + typemap[rhs.value.name], types.npytypes.Array)): + rhs = stmt.value + arr = rhs.value + saved_arr_arg[lhs] = arr + scope = arr.scope + loc = arr.loc + # g_np_var = Global(numpy) + g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc) + typemap[g_np_var.name] = types.misc.Module(numpy) + g_np = ir.Global('np', numpy, loc) + g_np_assign = ir.Assign(g_np, g_np_var, loc) + rhs.value = g_np_var + new_body.append(g_np_assign) + func_ir._definitions[g_np_var.name] = [g_np] + # update func var type + func = getattr(numpy, rhs.attr) + func_typ = get_np_ufunc_typ(func) + typemap.pop(lhs) + typemap[lhs] = func_typ + if rhs.op == 'call' and rhs.func.name in saved_arr_arg: + # add array as first arg + arr = saved_arr_arg[rhs.func.name] + # update call type signature to include array arg + old_sig = calltypes.pop(rhs) + # argsort requires kws for typing so sig.args can't be used + # reusing sig.args since some types become Const in sig + argtyps = old_sig.args[:len(rhs.args)] + kwtyps = {name: typemap[v.name] for name, v in rhs.kws} + calltypes[rhs] = typemap[rhs.func.name].get_call_type( + typingctx, [typemap[arr.name]] + list(argtyps), kwtyps) + rhs.args = [arr] + rhs.args + + new_body.append(stmt) + block.body = new_body + return + + +# format: {type:function} +array_accesses_extensions = {} + + +def get_array_accesses(blocks, accesses=None): + """returns a set of arrays accessed and their indices. + """ + if accesses is None: + accesses = set() + + for block in blocks.values(): + for inst in block.body: + if isinstance(inst, ir.SetItem): + accesses.add((inst.target.name, inst.index.name)) + if isinstance(inst, ir.StaticSetItem): + accesses.add((inst.target.name, inst.index_var.name)) + if isinstance(inst, ir.Assign): + lhs = inst.target.name + rhs = inst.value + if isinstance(rhs, ir.Expr) and rhs.op == 'getitem': + accesses.add((rhs.value.name, rhs.index.name)) + if isinstance(rhs, ir.Expr) and rhs.op == 'static_getitem': + index = rhs.index + # slice is unhashable, so just keep the variable + if index is None or is_slice_index(index): + index = rhs.index_var.name + accesses.add((rhs.value.name, index)) + for T, f in array_accesses_extensions.items(): + if isinstance(inst, T): + f(inst, accesses) + return accesses + +def is_slice_index(index): + """see if index is a slice index or has slice in it""" + if isinstance(index, slice): + return True + if isinstance(index, tuple): + for i in index: + if isinstance(i, slice): + return True + return False + +def merge_adjacent_blocks(blocks): + cfg = compute_cfg_from_blocks(blocks) + # merge adjacent blocks + removed = set() + for label in list(blocks.keys()): + if label in removed: + continue + block = blocks[label] + succs = list(cfg.successors(label)) + while True: + if len(succs) != 1: + break + next_label = succs[0][0] + if next_label in removed: + break + preds = list(cfg.predecessors(next_label)) + succs = list(cfg.successors(next_label)) + if len(preds) != 1 or preds[0][0] != label: + break + next_block = blocks[next_label] + # XXX: commented out since scope objects are not consistent + # throughout the compiler. for example, pieces of code are compiled + # and inlined on the fly without proper scope merge. + # if block.scope != next_block.scope: + # break + # merge + block.body.pop() # remove Jump + block.body += next_block.body + del blocks[next_label] + removed.add(next_label) + label = next_label + + +def restore_copy_var_names(blocks, save_copies, typemap): + """ + restores variable names of user variables after applying copy propagation + """ + if not save_copies: + return {} + + rename_dict = {} + var_rename_map = {} + for (a, b) in save_copies: + # a is string name, b is variable + # if a is user variable and b is generated temporary and b is not + # already renamed + if (not a.startswith('$') and b.name.startswith('$') + and b.name not in rename_dict): + new_name = mk_unique_var('${}'.format(a)); + rename_dict[b.name] = new_name + var_rename_map[new_name] = a + typ = typemap.pop(b.name) + typemap[new_name] = typ + + replace_var_names(blocks, rename_dict) + return var_rename_map + + +def simplify(func_ir, typemap, calltypes, metadata): + # get copies in to blocks and out from blocks + in_cps, _ = copy_propagate(func_ir.blocks, typemap) + # table mapping variable names to ir.Var objects to help replacement + name_var_table = get_name_var_table(func_ir.blocks) + save_copies = apply_copy_propagate( + func_ir.blocks, + in_cps, + name_var_table, + typemap, + calltypes) + var_rename_map = restore_copy_var_names(func_ir.blocks, save_copies, typemap) + if "var_rename_map" not in metadata: + metadata["var_rename_map"] = {} + metadata["var_rename_map"].update(var_rename_map) + # remove dead code to enable fusion + if config.DEBUG_ARRAY_OPT >= 1: + dprint_func_ir(func_ir, "after copy prop") + remove_dead(func_ir.blocks, func_ir.arg_names, func_ir, typemap) + func_ir.blocks = simplify_CFG(func_ir.blocks) + if config.DEBUG_ARRAY_OPT >= 1: + dprint_func_ir(func_ir, "after simplify") + + +class GuardException(Exception): + pass + + +def require(cond): + """ + Raise GuardException if the given condition is False. + """ + if not cond: + raise GuardException + +def guard(func, *args, **kwargs): + """ + Run a function with given set of arguments, and guard against + any GuardException raised by the function by returning None, + or the expected return results if no such exception was raised. + """ + try: + return func(*args, **kwargs) + except GuardException: + return None + +def get_definition(func_ir, name, **kwargs): + """ + Same as func_ir.get_definition(name), but raise GuardException if + exception KeyError is caught. + """ + try: + return func_ir.get_definition(name, **kwargs) + except KeyError: + raise GuardException + +def build_definitions(blocks, definitions=None): + """Build the definitions table of the given blocks by scanning + through all blocks and instructions, useful when the definitions + table is out-of-sync. + Will return a new definition table if one is not passed. + """ + if definitions is None: + definitions = collections.defaultdict(list) + + for block in blocks.values(): + for inst in block.body: + if isinstance(inst, ir.Assign): + name = inst.target.name + definition = definitions.get(name, []) + if definition == []: + definitions[name] = definition + definition.append(inst.value) + if type(inst) in build_defs_extensions: + f = build_defs_extensions[type(inst)] + f(inst, definitions) + + return definitions + +build_defs_extensions = {} + +def find_callname(func_ir, expr, typemap=None, definition_finder=get_definition): + """Try to find a call expression's function and module names and return + them as strings for unbounded calls. If the call is a bounded call, return + the self object instead of module name. Raise GuardException if failed. + + Providing typemap can make the call matching more accurate in corner cases + such as bounded call on an object which is inside another object. + """ + require(isinstance(expr, ir.Expr) and expr.op == 'call') + callee = expr.func + callee_def = definition_finder(func_ir, callee) + attrs = [] + obj = None + while True: + if isinstance(callee_def, (ir.Global, ir.FreeVar)): + # require(callee_def.value == numpy) + # these checks support modules like numpy, numpy.random as well as + # calls like len() and intrinsics like assertEquiv + keys = ['name', '_name', '__name__'] + value = None + for key in keys: + if hasattr(callee_def.value, key): + value = getattr(callee_def.value, key) + break + if not value or not isinstance(value, str): + raise GuardException + attrs.append(value) + def_val = callee_def.value + # get the underlying definition of Intrinsic object to be able to + # find the module effectively. + # Otherwise, it will return numba.extending + if isinstance(def_val, _Intrinsic): + def_val = def_val._defn + if hasattr(def_val, '__module__'): + mod_name = def_val.__module__ + # The reason for first checking if the function is in NumPy's + # top level name space by module is that some functions are + # deprecated in NumPy but the functions' names are aliased with + # other common names. This prevents deprecation warnings on + # e.g. getattr(numpy, 'bool') were a bool the target. + # For context see #6175, impacts NumPy>=1.20. + mod_not_none = mod_name is not None + numpy_toplevel = (mod_not_none and + (mod_name == 'numpy' + or mod_name.startswith('numpy.'))) + # it might be a numpy function imported directly + if (numpy_toplevel and hasattr(numpy, value) + and def_val == getattr(numpy, value)): + attrs += ['numpy'] + # it might be a np.random function imported directly + elif (hasattr(numpy.random, value) + and def_val == getattr(numpy.random, value)): + attrs += ['random', 'numpy'] + elif mod_not_none: + attrs.append(mod_name) + else: + class_name = def_val.__class__.__name__ + if class_name == 'builtin_function_or_method': + class_name = 'builtin' + if class_name != 'module': + attrs.append(class_name) + break + elif isinstance(callee_def, ir.Expr) and callee_def.op == 'getattr': + obj = callee_def.value + attrs.append(callee_def.attr) + if typemap and obj.name in typemap: + typ = typemap[obj.name] + if not isinstance(typ, types.Module): + return attrs[0], obj + callee_def = definition_finder(func_ir, obj) + else: + # obj.func calls where obj is not np array + if obj is not None: + return '.'.join(reversed(attrs)), obj + raise GuardException + return attrs[0], '.'.join(reversed(attrs[1:])) + +def find_build_sequence(func_ir, var): + """Check if a variable is constructed via build_tuple or + build_list or build_set, and return the sequence and the + operator, or raise GuardException otherwise. + Note: only build_tuple is immutable, so use with care. + """ + require(isinstance(var, ir.Var)) + var_def = get_definition(func_ir, var) + require(isinstance(var_def, ir.Expr)) + build_ops = ['build_tuple', 'build_list', 'build_set'] + require(var_def.op in build_ops) + return var_def.items, var_def.op + +def find_const(func_ir, var): + """Check if a variable is defined as constant, and return + the constant value, or raise GuardException otherwise. + """ + require(isinstance(var, ir.Var)) + var_def = get_definition(func_ir, var) + require(isinstance(var_def, (ir.Const, ir.Global, ir.FreeVar))) + return var_def.value + +def compile_to_numba_ir(mk_func, glbls, typingctx=None, targetctx=None, + arg_typs=None, typemap=None, calltypes=None): + """ + Compile a function or a make_function node to Numba IR. + + Rename variables and + labels to avoid conflict if inlined somewhere else. Perform type inference + if typingctx and other typing inputs are available and update typemap and + calltypes. + """ + from numba.core import typed_passes + # mk_func can be actual function or make_function node, or a njit function + if hasattr(mk_func, 'code'): + code = mk_func.code + elif hasattr(mk_func, '__code__'): + code = mk_func.__code__ + else: + raise NotImplementedError("function type not recognized {}".format(mk_func)) + f_ir = get_ir_of_code(glbls, code) + remove_dels(f_ir.blocks) + + # relabel by adding an offset + f_ir.blocks = add_offset_to_labels(f_ir.blocks, _the_max_label.next()) + max_label = max(f_ir.blocks.keys()) + _the_max_label.update(max_label) + + # rename all variables to avoid conflict + var_table = get_name_var_table(f_ir.blocks) + new_var_dict = {} + for name, var in var_table.items(): + new_var_dict[name] = mk_unique_var(name) + replace_var_names(f_ir.blocks, new_var_dict) + + # perform type inference if typingctx is available and update type + # data structures typemap and calltypes + if typingctx: + f_typemap, f_return_type, f_calltypes, _ = typed_passes.type_inference_stage( + typingctx, targetctx, f_ir, arg_typs, None) + # remove argument entries like arg.a from typemap + arg_names = [vname for vname in f_typemap if vname.startswith("arg.")] + for a in arg_names: + f_typemap.pop(a) + typemap.update(f_typemap) + calltypes.update(f_calltypes) + return f_ir + +def _create_function_from_code_obj(fcode, func_env, func_arg, func_clo, glbls): + """ + Creates a function from a code object. Args: + * fcode - the code object + * func_env - string for the freevar placeholders + * func_arg - string for the function args (e.g. "a, b, c, d=None") + * func_clo - string for the closure args + * glbls - the function globals + """ + sanitized_co_name = fcode.co_name.replace('<', '_').replace('>', '_') + func_text = (f"def closure():\n{func_env}\n" + f"\tdef {sanitized_co_name}({func_arg}):\n" + f"\t\treturn ({func_clo})\n" + f"\treturn {sanitized_co_name}") + loc = {} + exec(func_text, glbls, loc) + + f = loc['closure']() + # replace the code body + f.__code__ = fcode + f.__name__ = fcode.co_name + return f + +def get_ir_of_code(glbls, fcode): + """ + Compile a code object to get its IR, ir.Del nodes are emitted + """ + nfree = len(fcode.co_freevars) + func_env = "\n".join(["\tc_%d = None" % i for i in range(nfree)]) + func_clo = ",".join(["c_%d" % i for i in range(nfree)]) + func_arg = ",".join(["x_%d" % i for i in range(fcode.co_argcount)]) + + f = _create_function_from_code_obj(fcode, func_env, func_arg, func_clo, + glbls) + + from numba.core import compiler + ir = compiler.run_frontend(f) + # we need to run the before inference rewrite pass to normalize the IR + # XXX: check rewrite pass flag? + # for example, Raise nodes need to become StaticRaise before type inference + class DummyPipeline(object): + def __init__(self, f_ir): + self.state = compiler.StateDict() + self.state.typingctx = None + self.state.targetctx = None + self.state.args = None + self.state.func_ir = f_ir + self.state.typemap = None + self.state.return_type = None + self.state.calltypes = None + state = DummyPipeline(ir).state + rewrites.rewrite_registry.apply('before-inference', state) + # call inline pass to handle cases like stencils and comprehensions + swapped = {} # TODO: get this from diagnostics store + import numba.core.inline_closurecall + inline_pass = numba.core.inline_closurecall.InlineClosureCallPass( + ir, numba.core.cpu.ParallelOptions(False), swapped) + inline_pass.run() + + # TODO: DO NOT ADD MORE THINGS HERE! + # If adding more things here is being contemplated, it really is time to + # retire this function and work on getting the InlineWorker class from + # numba.core.inline_closurecall into sufficient shape as a replacement. + # The issue with `get_ir_of_code` is that it doesn't run a full compilation + # pipeline and as a result various additional things keep needing to be + # added to create valid IR. + + # rebuild IR in SSA form + from numba.core.untyped_passes import ReconstructSSA + from numba.core.typed_passes import PreLowerStripPhis + reconstruct_ssa = ReconstructSSA() + phistrip = PreLowerStripPhis() + reconstruct_ssa.run_pass(state) + phistrip.run_pass(state) + + post_proc = postproc.PostProcessor(ir) + post_proc.run(True) + return ir + +def replace_arg_nodes(block, args): + """ + Replace ir.Arg(...) with variables + """ + for stmt in block.body: + if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Arg): + idx = stmt.value.index + assert(idx < len(args)) + stmt.value = args[idx] + return + + +def replace_returns(blocks, target, return_label): + """ + Return return statement by assigning directly to target, and a jump. + """ + for block in blocks.values(): + # some blocks may be empty during transformations + if not block.body: + continue + stmt = block.terminator + if isinstance(stmt, ir.Return): + block.body.pop() # remove return + cast_stmt = block.body.pop() + assert (isinstance(cast_stmt, ir.Assign) + and isinstance(cast_stmt.value, ir.Expr) + and cast_stmt.value.op == 'cast'), "invalid return cast" + block.body.append(ir.Assign(cast_stmt.value.value, target, stmt.loc)) + block.body.append(ir.Jump(return_label, stmt.loc)) + + +def gen_np_call(func_as_str, func, lhs, args, typingctx, typemap, calltypes): + scope = args[0].scope + loc = args[0].loc + + # g_np_var = Global(numpy) + g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc) + typemap[g_np_var.name] = types.misc.Module(numpy) + g_np = ir.Global('np', numpy, loc) + g_np_assign = ir.Assign(g_np, g_np_var, loc) + # attr call: _attr = getattr(g_np_var, func_as_str) + np_attr_call = ir.Expr.getattr(g_np_var, func_as_str, loc) + attr_var = ir.Var(scope, mk_unique_var("$np_attr_attr"), loc) + func_var_typ = get_np_ufunc_typ(func) + typemap[attr_var.name] = func_var_typ + attr_assign = ir.Assign(np_attr_call, attr_var, loc) + # np call: lhs = np_attr(*args) + np_call = ir.Expr.call(attr_var, args, (), loc) + arg_types = [typemap[x.name] for x in args] + func_typ = func_var_typ.get_call_type(typingctx, arg_types, {}) + calltypes[np_call] = func_typ + np_assign = ir.Assign(np_call, lhs, loc) + return [g_np_assign, attr_assign, np_assign] + +def dump_blocks(blocks): + for label, block in blocks.items(): + print(label, ":") + for stmt in block.body: + print(" ", stmt) + +def is_operator_or_getitem(expr): + """true if expr is unary or binary operator or getitem""" + return (isinstance(expr, ir.Expr) + and getattr(expr, 'op', False) + and expr.op in ['unary', 'binop', 'inplace_binop', 'getitem', 'static_getitem']) + +def is_get_setitem(stmt): + """stmt is getitem assignment or setitem (and static cases)""" + return is_getitem(stmt) or is_setitem(stmt) + + +def is_getitem(stmt): + """true if stmt is a getitem or static_getitem assignment""" + return (isinstance(stmt, ir.Assign) + and isinstance(stmt.value, ir.Expr) + and stmt.value.op in ['getitem', 'static_getitem']) + +def is_setitem(stmt): + """true if stmt is a SetItem or StaticSetItem node""" + return isinstance(stmt, (ir.SetItem, ir.StaticSetItem)) + +def index_var_of_get_setitem(stmt): + """get index variable for getitem/setitem nodes (and static cases)""" + if is_getitem(stmt): + if stmt.value.op == 'getitem': + return stmt.value.index + else: + return stmt.value.index_var + + if is_setitem(stmt): + if isinstance(stmt, ir.SetItem): + return stmt.index + else: + return stmt.index_var + + return None + +def set_index_var_of_get_setitem(stmt, new_index): + if is_getitem(stmt): + if stmt.value.op == 'getitem': + stmt.value.index = new_index + else: + stmt.value.index_var = new_index + elif is_setitem(stmt): + if isinstance(stmt, ir.SetItem): + stmt.index = new_index + else: + stmt.index_var = new_index + else: + raise ValueError("getitem or setitem node expected but received {}".format( + stmt)) + + +def is_namedtuple_class(c): + """check if c is a namedtuple class""" + if not isinstance(c, type): + return False + # should have only tuple as superclass + bases = c.__bases__ + if len(bases) != 1 or bases[0] != tuple: + return False + # should have _make method + if not hasattr(c, '_make'): + return False + # should have _fields that is all string + fields = getattr(c, '_fields', None) + if not isinstance(fields, tuple): + return False + return all(isinstance(f, str) for f in fields) + + +def fill_block_with_call(newblock, callee, label_next, inputs, outputs): + """Fill *newblock* to call *callee* with arguments listed in *inputs*. + The returned values are unwrapped into variables in *outputs*. + The block would then jump to *label_next*. + """ + scope = newblock.scope + loc = newblock.loc + + fn = ir.Const(value=callee, loc=loc) + fnvar = scope.make_temp(loc=loc) + newblock.append(ir.Assign(target=fnvar, value=fn, loc=loc)) + # call + args = [scope.get_exact(name) for name in inputs] + callexpr = ir.Expr.call(func=fnvar, args=args, kws=(), loc=loc) + callres = scope.make_temp(loc=loc) + newblock.append(ir.Assign(target=callres, value=callexpr, loc=loc)) + # unpack return value + for i, out in enumerate(outputs): + target = scope.get_exact(out) + getitem = ir.Expr.static_getitem(value=callres, index=i, + index_var=None, loc=loc) + newblock.append(ir.Assign(target=target, value=getitem, loc=loc)) + # jump to next block + newblock.append(ir.Jump(target=label_next, loc=loc)) + return newblock + + +def fill_callee_prologue(block, inputs, label_next): + """ + Fill a new block *block* that unwraps arguments using names in *inputs* and + then jumps to *label_next*. + + Expected to use with *fill_block_with_call()* + """ + scope = block.scope + loc = block.loc + # load args + args = [ir.Arg(name=k, index=i, loc=loc) + for i, k in enumerate(inputs)] + for aname, aval in zip(inputs, args): + tmp = ir.Var(scope=scope, name=aname, loc=loc) + block.append(ir.Assign(target=tmp, value=aval, loc=loc)) + # jump to loop entry + block.append(ir.Jump(target=label_next, loc=loc)) + return block + + +def fill_callee_epilogue(block, outputs): + """ + Fill a new block *block* to prepare the return values. + This block is the last block of the function. + + Expected to use with *fill_block_with_call()* + """ + scope = block.scope + loc = block.loc + # prepare tuples to return + vals = [scope.get_exact(name=name) for name in outputs] + tupexpr = ir.Expr.build_tuple(items=vals, loc=loc) + tup = scope.make_temp(loc=loc) + block.append(ir.Assign(target=tup, value=tupexpr, loc=loc)) + # return + block.append(ir.Return(value=tup, loc=loc)) + return block + + +def find_outer_value(func_ir, var): + """Check if a variable is a global value, and return the value, + or raise GuardException otherwise. + """ + dfn = get_definition(func_ir, var) + if isinstance(dfn, (ir.Global, ir.FreeVar)): + return dfn.value + + if isinstance(dfn, ir.Expr) and dfn.op == 'getattr': + prev_val = find_outer_value(func_ir, dfn.value) + try: + val = getattr(prev_val, dfn.attr) + return val + except AttributeError: + raise GuardException + + raise GuardException + + +def raise_on_unsupported_feature(func_ir, typemap): + """ + Helper function to walk IR and raise if it finds op codes + that are unsupported. Could be extended to cover IR sequences + as well as op codes. Intended use is to call it as a pipeline + stage just prior to lowering to prevent LoweringErrors for known + unsupported features. + """ + gdb_calls = [] # accumulate calls to gdb/gdb_init + + # issue 2195: check for excessively large tuples + for arg_name in func_ir.arg_names: + if arg_name in typemap and \ + isinstance(typemap[arg_name], types.containers.UniTuple) and \ + typemap[arg_name].count > 1000: + # Raise an exception when len(tuple) > 1000. The choice of this number (1000) + # was entirely arbitrary + msg = ("Tuple '{}' length must be smaller than 1000.\n" + "Large tuples lead to the generation of a prohibitively large " + "LLVM IR which causes excessive memory pressure " + "and large compile times.\n" + "As an alternative, the use of a 'list' is recommended in " + "place of a 'tuple' as lists do not suffer from this problem.".format(arg_name)) + raise UnsupportedError(msg, func_ir.loc) + + for blk in func_ir.blocks.values(): + for stmt in blk.find_insts(ir.Assign): + # This raises on finding `make_function` + if isinstance(stmt.value, ir.Expr): + if stmt.value.op == 'make_function': + val = stmt.value + + # See if the construct name can be refined + code = getattr(val, 'code', None) + if code is not None: + # check if this is a closure, the co_name will + # be the captured function name which is not + # useful so be explicit + if getattr(val, 'closure', None) is not None: + use = '' + expr = '' + else: + use = code.co_name + expr = '(%s) ' % use + else: + use = '' + expr = '' + + msg = ("Numba encountered the use of a language " + "feature it does not support in this context: " + "%s (op code: make_function not supported). If " + "the feature is explicitly supported it is " + "likely that the result of the expression %s" + "is being used in an unsupported manner.") % \ + (use, expr) + raise UnsupportedError(msg, stmt.value.loc) + + # this checks for gdb initialization calls, only one is permitted + if isinstance(stmt.value, (ir.Global, ir.FreeVar)): + val = stmt.value + val = getattr(val, 'value', None) + if val is None: + continue + + # check global function + found = False + if isinstance(val, pytypes.FunctionType): + found = val in {numba.gdb, numba.gdb_init} + if not found: # freevar bind to intrinsic + found = getattr(val, '_name', "") == "gdb_internal" + if found: + gdb_calls.append(stmt.loc) # report last seen location + + # this checks that np. was called if view is called + if isinstance(stmt.value, ir.Expr): + if stmt.value.op == 'getattr' and stmt.value.attr == 'view': + var = stmt.value.value.name + if isinstance(typemap[var], types.Array): + continue + df = func_ir.get_definition(var) + cn = guard(find_callname, func_ir, df) + if cn and cn[1] == 'numpy': + ty = getattr(numpy, cn[0]) + if (numpy.issubdtype(ty, numpy.integer) or + numpy.issubdtype(ty, numpy.floating)): + continue + + vardescr = '' if var.startswith('$') else "'{}' ".format(var) + raise TypingError( + "'view' can only be called on NumPy dtypes, " + "try wrapping the variable {}with 'np.()'". + format(vardescr), loc=stmt.loc) + + # checks for globals that are also reflected + if isinstance(stmt.value, ir.Global): + ty = typemap[stmt.target.name] + msg = ("The use of a %s type, assigned to variable '%s' in " + "globals, is not supported as globals are considered " + "compile-time constants and there is no known way to " + "compile a %s type as a constant.") + if (getattr(ty, 'reflected', False) or + isinstance(ty, (types.DictType, types.ListType))): + raise TypingError(msg % (ty, stmt.value.name, ty), loc=stmt.loc) + + # checks for generator expressions (yield in use when func_ir has + # not been identified as a generator). + if isinstance(stmt.value, ir.Yield) and not func_ir.is_generator: + msg = "The use of generator expressions is unsupported." + raise UnsupportedError(msg, loc=stmt.loc) + + # There is more than one call to function gdb/gdb_init + if len(gdb_calls) > 1: + msg = ("Calling either numba.gdb() or numba.gdb_init() more than once " + "in a function is unsupported (strange things happen!), use " + "numba.gdb_breakpoint() to create additional breakpoints " + "instead.\n\nRelevant documentation is available here:\n" + "https://numba.readthedocs.io/en/stable/user/troubleshoot.html" + "#using-numba-s-direct-gdb-bindings-in-nopython-mode\n\n" + "Conflicting calls found at:\n %s") + buf = '\n'.join([x.strformat() for x in gdb_calls]) + raise UnsupportedError(msg % buf) + + +def warn_deprecated(func_ir, typemap): + # first pass, just walk the type map + for name, ty in typemap.items(): + # the Type Metaclass has a reflected member + if ty.reflected: + # if its an arg, report function call + if name.startswith('arg.'): + loc = func_ir.loc + arg = name.split('.')[1] + fname = func_ir.func_id.func_qualname + tyname = 'list' if isinstance(ty, types.List) else 'set' + url = ("https://numba.readthedocs.io/en/stable/reference/" + "deprecation.html#deprecation-of-reflection-for-list-and" + "-set-types") + msg = ("\nEncountered the use of a type that is scheduled for " + "deprecation: type 'reflected %s' found for argument " + "'%s' of function '%s'.\n\nFor more information visit " + "%s" % (tyname, arg, fname, url)) + warnings.warn(NumbaPendingDeprecationWarning(msg, loc=loc)) + + +def resolve_func_from_module(func_ir, node): + """ + This returns the python function that is being getattr'd from a module in + some IR, it resolves import chains/submodules recursively. Should it not be + possible to find the python function being called None will be returned. + + func_ir - the FunctionIR object + node - the IR node from which to start resolving (should be a `getattr`). + """ + getattr_chain = [] + def resolve_mod(mod): + if getattr(mod, 'op', False) == 'getattr': + getattr_chain.insert(0, mod.attr) + try: + mod = func_ir.get_definition(mod.value) + except KeyError: # multiple definitions + return None + return resolve_mod(mod) + elif isinstance(mod, (ir.Global, ir.FreeVar)): + if isinstance(mod.value, pytypes.ModuleType): + return mod + return None + + mod = resolve_mod(node) + if mod is not None: + defn = mod.value + for x in getattr_chain: + defn = getattr(defn, x, False) + if not defn: + break + else: + return defn + else: + return None + + +def enforce_no_dels(func_ir): + """ + Enforce there being no ir.Del nodes in the IR. + """ + for blk in func_ir.blocks.values(): + dels = [x for x in blk.find_insts(ir.Del)] + if dels: + msg = "Illegal IR, del found at: %s" % dels[0] + raise CompilerError(msg, loc=dels[0].loc) + +def enforce_no_phis(func_ir): + """ + Enforce there being no ir.Expr.phi nodes in the IR. + """ + for blk in func_ir.blocks.values(): + phis = [x for x in blk.find_exprs(op='phi')] + if phis: + msg = "Illegal IR, phi found at: %s" % phis[0] + raise CompilerError(msg, loc=phis[0].loc) + + +def legalize_single_scope(blocks): + """Check the given mapping of ir.Block for containing a single scope. + """ + return len({blk.scope for blk in blocks.values()}) == 1 + + +def check_and_legalize_ir(func_ir, flags: "numba.core.compiler.Flags"): + """ + This checks that the IR presented is legal + """ + enforce_no_phis(func_ir) + enforce_no_dels(func_ir) + # postprocess and emit ir.Dels + post_proc = postproc.PostProcessor(func_ir) + post_proc.run(True, extend_lifetimes=flags.dbg_extend_lifetimes) + +def convert_code_obj_to_function(code_obj, caller_ir): + """ + Converts a code object from a `make_function.code` attr in the IR into a + python function, caller_ir is the FunctionIR of the caller and is used for + the resolution of freevars. + """ + fcode = code_obj.code + nfree = len(fcode.co_freevars) + + # try and resolve freevars if they are consts in the caller's IR + # these can be baked into the new function + freevars = [] + for x in fcode.co_freevars: + # not using guard here to differentiate between multiple definition and + # non-const variable + try: + freevar_def = caller_ir.get_definition(x) + except KeyError: + msg = ("Cannot capture a constant value for variable '%s' as there " + "are multiple definitions present." % x) + raise TypingError(msg, loc=code_obj.loc) + if isinstance(freevar_def, ir.Const): + freevars.append(freevar_def.value) + else: + msg = ("Cannot capture the non-constant value associated with " + "variable '%s' in a function that will escape." % x) + raise TypingError(msg, loc=code_obj.loc) + + func_env = "\n".join(["\tc_%d = %s" % (i, x) for i, x in enumerate(freevars)]) + func_clo = ",".join(["c_%d" % i for i in range(nfree)]) + co_varnames = list(fcode.co_varnames) + + # This is horrible. The code object knows about the number of args present + # it also knows the name of the args but these are bundled in with other + # vars in `co_varnames`. The make_function IR node knows what the defaults + # are, they are defined in the IR as consts. The following finds the total + # number of args (args + kwargs with defaults), finds the default values + # and infers the number of "kwargs with defaults" from this and then infers + # the number of actual arguments from that. + n_kwargs = 0 + n_allargs = fcode.co_argcount + kwarg_defaults = caller_ir.get_definition(code_obj.defaults) + if kwarg_defaults is not None: + if isinstance(kwarg_defaults, tuple): + d = [caller_ir.get_definition(x).value for x in kwarg_defaults] + kwarg_defaults_tup = tuple(d) + else: + d = [caller_ir.get_definition(x).value + for x in kwarg_defaults.items] + kwarg_defaults_tup = tuple(d) + n_kwargs = len(kwarg_defaults_tup) + nargs = n_allargs - n_kwargs + + func_arg = ",".join(["%s" % (co_varnames[i]) for i in range(nargs)]) + if n_kwargs: + kw_const = ["%s = %s" % (co_varnames[i + nargs], kwarg_defaults_tup[i]) + for i in range(n_kwargs)] + func_arg += ", " + func_arg += ", ".join(kw_const) + + # globals are the same as those in the caller + glbls = caller_ir.func_id.func.__globals__ + + # create the function and return it + return _create_function_from_code_obj(fcode, func_env, func_arg, func_clo, + glbls) + + +def fixup_var_define_in_scope(blocks): + """Fixes the mapping of ir.Block to ensure all referenced ir.Var are + defined in every scope used by the function. Such that looking up a variable + from any scope in this function will not fail. + + Note: This is a workaround. Ideally, all the blocks should refer to the + same ir.Scope, but that property is not maintained by all the passes. + """ + # Scan for all used variables + used_var = {} + for blk in blocks.values(): + scope = blk.scope + for inst in blk.body: + for var in inst.list_vars(): + used_var[var] = inst + # Note: not all blocks share a single scope even though they should. + # Ensure the scope of each block defines all used variables. + for blk in blocks.values(): + scope = blk.scope + for var, inst in used_var.items(): + # add this variable if it's not in scope + if var.name not in scope.localvars: + # Note: using a internal method to reuse the same + scope.localvars.define(var.name, var) + + +def transfer_scope(block, scope): + """Transfer the ir.Block to use the given ir.Scope. + """ + old_scope = block.scope + if old_scope is scope: + # bypass if the block is already using the given scope + return block + # Ensure variables are defined in the new scope + for var in old_scope.localvars._con.values(): + if var.name not in scope.localvars: + scope.localvars.define(var.name, var) + # replace scope + block.scope = scope + return block + + +def is_setup_with(stmt): + return isinstance(stmt, ir.EnterWith) + + +def is_terminator(stmt): + return isinstance(stmt, ir.Terminator) + + +def is_raise(stmt): + return isinstance(stmt, ir.Raise) + + +def is_return(stmt): + return isinstance(stmt, ir.Return) + + +def is_pop_block(stmt): + return isinstance(stmt, ir.PopBlock) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/itanium_mangler.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/itanium_mangler.py new file mode 100644 index 000000000..963ee2318 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/itanium_mangler.py @@ -0,0 +1,205 @@ +""" +Itanium CXX ABI Mangler + +Reference: http://mentorembedded.github.io/cxx-abi/abi.html + +The basics of the mangling scheme. + +We are hijacking the CXX mangling scheme for our use. We map Python modules +into CXX namespace. A `module1.submodule2.foo` is mapped to +`module1::submodule2::foo`. For parameterized numba types, we treat them as +templated types; for example, `array(int64, 1d, C)` becomes an +`array`. + +All mangled names are prefixed with "_Z". It is followed by the name of the +entity. A name contains one or more identifiers. Each identifier is encoded +as "". If the name is namespaced and, therefore, +has multiple identifiers, the entire name is encoded as "NE". + +For functions, arguments types follow. There are condensed encodings for basic +built-in types; e.g. "i" for int, "f" for float. For other types, the +previously mentioned name encoding should be used. + +For templated types, the template parameters are encoded immediately after the +name. If it is namespaced, it should be within the 'N' 'E' marker. Template +parameters are encoded in "IE", where each parameter is encoded using +the mentioned name encoding scheme. Template parameters can contain literal +values like the '1' in the array type shown earlier. There is special encoding +scheme for them to avoid leading digits. +""" + + +import re + +from numba.core import types + + +# According the scheme, valid characters for mangled names are [a-zA-Z0-9_]. +# We borrow the '_' as the escape character to encode invalid char into +# '_xx' where 'xx' is the hex codepoint. +_re_invalid_char = re.compile(r'[^a-z0-9_]', re.I) + +PREFIX = "_Z" + +# Numba types to mangled type code. These correspond with the codes listed in +# https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling-builtin +N2CODE = { + types.void: 'v', + types.boolean: 'b', + types.uint8: 'h', + types.int8: 'a', + types.uint16: 't', + types.int16: 's', + types.uint32: 'j', + types.int32: 'i', + types.uint64: 'y', + types.int64: 'x', + types.float16: 'Dh', + types.float32: 'f', + types.float64: 'd' +} + + +def _escape_string(text): + """Escape the given string so that it only contains ASCII characters + of [a-zA-Z0-9_$]. + + The dollar symbol ($) and other invalid characters are escaped into + the string sequence of "$xx" where "xx" is the hex codepoint of the char. + + Multibyte characters are encoded into utf8 and converted into the above + hex format. + """ + + def repl(m): + return ''.join(('_%02x' % ch) + for ch in m.group(0).encode('utf8')) + ret = re.sub(_re_invalid_char, repl, text) + # Return str if we got a unicode (for py2) + if not isinstance(ret, str): + return ret.encode('ascii') + return ret + + +def _fix_lead_digit(text): + """ + Fix text with leading digit + """ + if text and text[0].isdigit(): + return '_' + text + else: + return text + + +def _len_encoded(string): + """ + Prefix string with digit indicating the length. + Add underscore if string is prefixed with digits. + """ + string = _fix_lead_digit(string) + return '%u%s' % (len(string), string) + + +def mangle_abi_tag(abi_tag: str) -> str: + return "B" + _len_encoded(_escape_string(abi_tag)) + + +def mangle_identifier(ident, template_params='', *, abi_tags=(), uid=None): + """ + Mangle the identifier with optional template parameters and abi_tags. + + Note: + + This treats '.' as '::' in C++. + """ + if uid is not None: + # Add uid to abi-tags + abi_tags = (f"v{uid}", *abi_tags) + parts = [_len_encoded(_escape_string(x)) for x in ident.split('.')] + enc_abi_tags = list(map(mangle_abi_tag, abi_tags)) + extras = template_params + ''.join(enc_abi_tags) + if len(parts) > 1: + return 'N%s%sE' % (''.join(parts), extras) + else: + return '%s%s' % (parts[0], extras) + + +def mangle_type_or_value(typ): + """ + Mangle type parameter and arbitrary value. + """ + # Handle numba types + if isinstance(typ, types.Type): + if typ in N2CODE: + return N2CODE[typ] + else: + return mangle_templated_ident(*typ.mangling_args) + # Handle integer literal + elif isinstance(typ, int): + return 'Li%dE' % typ + # Handle str as identifier + elif isinstance(typ, str): + return mangle_identifier(typ) + # Otherwise + else: + enc = _escape_string(str(typ)) + return _len_encoded(enc) + + +# Alias +mangle_type = mangle_type_or_value +mangle_value = mangle_type_or_value + + +def mangle_templated_ident(identifier, parameters): + """ + Mangle templated identifier. + """ + template_params = ('I%sE' % ''.join(map(mangle_type_or_value, parameters)) + if parameters else '') + return mangle_identifier(identifier, template_params) + + +def mangle_args(argtys): + """ + Mangle sequence of Numba type objects and arbitrary values. + """ + return ''.join([mangle_type_or_value(t) for t in argtys]) + + +def mangle(ident, argtys, *, abi_tags=(), uid=None): + """ + Mangle identifier with Numba type objects and abi-tags. + """ + return ''.join([PREFIX, + mangle_identifier(ident, abi_tags=abi_tags, uid=uid), + mangle_args(argtys)]) + + +def prepend_namespace(mangled, ns): + """ + Prepend namespace to mangled name. + """ + if not mangled.startswith(PREFIX): + raise ValueError('input is not a mangled name') + elif mangled.startswith(PREFIX + 'N'): + # nested + remaining = mangled[3:] + ret = PREFIX + 'N' + mangle_identifier(ns) + remaining + else: + # non-nested + remaining = mangled[2:] + head, tail = _split_mangled_ident(remaining) + ret = PREFIX + 'N' + mangle_identifier(ns) + head + 'E' + tail + return ret + + +def _split_mangled_ident(mangled): + """ + Returns `(head, tail)` where `head` is the ` + ` encoded + identifier and `tail` is the remaining. + """ + ct = int(mangled) + ctlen = len(str(ct)) + at = ctlen + ct + return mangled[:at], mangled[at:] diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/llvm_bindings.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/llvm_bindings.py new file mode 100644 index 000000000..fa94ad428 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/llvm_bindings.py @@ -0,0 +1,46 @@ +""" +Useful options to debug LLVM passes + +llvm.set_option("test", "-debug-pass=Details") +llvm.set_option("test", "-debug-pass=Executions") +llvm.set_option("test", "-debug-pass=Arguments") +llvm.set_option("test", "-debug-pass=Structure") +llvm.set_option("test", "-debug-only=loop-vectorize") +llvm.set_option("test", "-help-hidden") + +""" + +from llvmlite import binding as llvm + + +def _inlining_threshold(optlevel, sizelevel=0): + """ + Compute the inlining threshold for the desired optimisation level + + Refer to http://llvm.org/docs/doxygen/html/InlineSimple_8cpp_source.html + """ + if optlevel > 2: + return 275 + + # -Os + if sizelevel == 1: + return 75 + + # -Oz + if sizelevel == 2: + return 25 + + return 225 + + +def create_pass_manager_builder(opt=2, loop_vectorize=False, + slp_vectorize=False): + """ + Create an LLVM pass manager with the desired optimisation level and options. + """ + pmb = llvm.create_pass_manager_builder() + pmb.opt_level = opt + pmb.loop_vectorize = loop_vectorize + pmb.slp_vectorize = slp_vectorize + pmb.inlining_threshold = _inlining_threshold(opt) + return pmb diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/lowering.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/lowering.py new file mode 100644 index 000000000..84ce3514c --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/lowering.py @@ -0,0 +1,1536 @@ +from collections import namedtuple, defaultdict +import operator +import warnings +from functools import partial + +import llvmlite.ir +from llvmlite.ir import Constant, IRBuilder + +from numba.core import (typing, utils, types, ir, debuginfo, funcdesc, + generators, config, ir_utils, cgutils, removerefctpass, + targetconfig) +from numba.core.errors import (LoweringError, new_error_context, TypingError, + LiteralTypingError, UnsupportedError, + NumbaDebugInfoWarning) +from numba.core.funcdesc import default_mangler +from numba.core.environment import Environment +from numba.core.analysis import compute_use_defs, must_use_alloca +from numba.misc.firstlinefinder import get_func_body_first_lineno + + +_VarArgItem = namedtuple("_VarArgItem", ("vararg", "index")) + + +class BaseLower(object): + """ + Lower IR to LLVM + """ + + def __init__(self, context, library, fndesc, func_ir, metadata=None): + self.library = library + self.fndesc = fndesc + self.blocks = utils.SortedMap(func_ir.blocks.items()) + self.func_ir = func_ir + self.call_conv = context.call_conv + self.generator_info = func_ir.generator_info + self.metadata = metadata + self.flags = targetconfig.ConfigStack.top_or_none() + + # Initialize LLVM + self.module = self.library.create_ir_module(self.fndesc.unique_name) + + # Python execution environment (will be available to the compiled + # function). + self.env = Environment.from_fndesc(self.fndesc) + + # Internal states + self.blkmap = {} + self.pending_phis = {} + self.varmap = {} + self.firstblk = min(self.blocks.keys()) + self.loc = -1 + + # Specializes the target context as seen inside the Lowerer + # This adds: + # - environment: the python execution environment + self.context = context.subtarget(environment=self.env, + fndesc=self.fndesc) + + # Debuginfo + dibuildercls = (self.context.DIBuilder + if self.context.enable_debuginfo + else debuginfo.DummyDIBuilder) + + # debuginfo def location + self.defn_loc = self._compute_def_location() + + self.debuginfo = dibuildercls(module=self.module, + filepath=func_ir.loc.filename, + cgctx=context) + + # Subclass initialization + self.init() + + def init(self): + pass + + def init_pyapi(self): + """ + Init the Python API and Environment Manager for the function being + lowered. + """ + if self.pyapi is not None: + return + self.pyapi = self.context.get_python_api(self.builder) + + # Store environment argument for later use + self.env_manager = self.context.get_env_manager(self.builder) + self.env_body = self.env_manager.env_body + self.envarg = self.env_manager.env_ptr + + def _compute_def_location(self): + # Debuginfo requires source to be accurate. Find it and warn if not + # found. If it's not found, use the func_ir line + 1, this assumes that + # the function definition is decorated with a 1 line jit decorator. + defn_loc = self.func_ir.loc.with_lineno(self.func_ir.loc.line + 1) + if self.context.enable_debuginfo: + fn = self.func_ir.func_id.func + optional_lno = get_func_body_first_lineno(fn) + if optional_lno is not None: + # -1 as lines start at 1 and this is an offset. + offset = optional_lno - 1 + defn_loc = self.func_ir.loc.with_lineno(offset) + else: + msg = ("Could not find source for function: " + f"{self.func_ir.func_id.func}. Debug line information " + "may be inaccurate.") + warnings.warn(NumbaDebugInfoWarning(msg)) + return defn_loc + + def pre_lower(self): + """ + Called before lowering all blocks. + """ + # A given Lower object can be used for several LL functions + # (for generators) and it's important to use a new API and + # EnvironmentManager. + self.pyapi = None + self.debuginfo.mark_subprogram(function=self.builder.function, + qualname=self.fndesc.qualname, + argnames=self.fndesc.args, + argtypes=self.fndesc.argtypes, + line=self.defn_loc.line) + + def post_lower(self): + """ + Called after all blocks are lowered + """ + self.debuginfo.finalize() + + def pre_block(self, block): + """ + Called before lowering a block. + """ + + def post_block(self, block): + """ + Called after lowering a block. + """ + + def return_exception(self, exc_class, exc_args=None, loc=None): + """Propagate exception to the caller. + """ + self.call_conv.return_user_exc( + self.builder, exc_class, exc_args, + loc=loc, func_name=self.func_ir.func_id.func_name, + ) + + def set_exception(self, exc_class, exc_args=None, loc=None): + """Set exception state in the current function. + """ + self.call_conv.set_static_user_exc( + self.builder, exc_class, exc_args, + loc=loc, func_name=self.func_ir.func_id.func_name, + ) + + def emit_environment_object(self): + """Emit a pointer to hold the Environment object. + """ + # Define global for the environment and initialize it to NULL + envname = self.context.get_env_name(self.fndesc) + self.context.declare_env_global(self.module, envname) + + def lower(self): + # Emit the Env into the module + self.emit_environment_object() + if self.generator_info is None: + self.genlower = None + self.lower_normal_function(self.fndesc) + else: + self.genlower = self.GeneratorLower(self) + self.gentype = self.genlower.gentype + + self.genlower.lower_init_func(self) + self.genlower.lower_next_func(self) + if self.gentype.has_finalizer: + self.genlower.lower_finalize_func(self) + + if config.DUMP_LLVM: + print(("LLVM DUMP %s" % self.fndesc).center(80, '-')) + if config.HIGHLIGHT_DUMPS: + try: + from pygments import highlight + from pygments.lexers import LlvmLexer as lexer + from pygments.formatters import Terminal256Formatter + from numba.misc.dump_style import by_colorscheme + print(highlight(self.module.__repr__(), lexer(), + Terminal256Formatter( + style=by_colorscheme()))) + except ImportError: + msg = "Please install pygments to see highlighted dumps" + raise ValueError(msg) + else: + print(self.module) + print('=' * 80) + + # Special optimization to remove NRT on functions that do not need it. + if self.context.enable_nrt and self.generator_info is None: + removerefctpass.remove_unnecessary_nrt_usage(self.function, + context=self.context, + fndesc=self.fndesc) + + # Run target specific post lowering transformation + self.context.post_lowering(self.module, self.library) + + # Materialize LLVM Module + self.library.add_ir_module(self.module) + + def extract_function_arguments(self): + self.fnargs = self.call_conv.decode_arguments(self.builder, + self.fndesc.argtypes, + self.function) + return self.fnargs + + def lower_normal_function(self, fndesc): + """ + Lower non-generator *fndesc*. + """ + self.setup_function(fndesc) + + # Init argument values + self.extract_function_arguments() + entry_block_tail = self.lower_function_body() + + # Close tail of entry block, do not emit debug metadata else the + # unconditional jump gets associated with the metadata from the function + # body end. + with debuginfo.suspend_emission(self.builder): + self.builder.position_at_end(entry_block_tail) + self.builder.branch(self.blkmap[self.firstblk]) + + def lower_function_body(self): + """ + Lower the current function's body, and return the entry block. + """ + # Init Python blocks + for offset in self.blocks: + bname = "B%s" % offset + self.blkmap[offset] = self.function.append_basic_block(bname) + + self.pre_lower() + # pre_lower() may have changed the current basic block + entry_block_tail = self.builder.basic_block + + self.debug_print("# function begin: {0}".format( + self.fndesc.unique_name)) + + # Lower all blocks + for offset, block in sorted(self.blocks.items()): + bb = self.blkmap[offset] + self.builder.position_at_end(bb) + self.lower_block(block) + self.post_lower() + return entry_block_tail + + def lower_block(self, block): + """ + Lower the given block. + """ + self.pre_block(block) + for inst in block.body: + self.loc = inst.loc + defaulterrcls = partial(LoweringError, loc=self.loc) + with new_error_context('lowering "{inst}" at {loc}', inst=inst, + loc=self.loc, errcls_=defaulterrcls): + self.lower_inst(inst) + self.post_block(block) + + def create_cpython_wrapper(self, release_gil=False): + """ + Create CPython wrapper(s) around this function (or generator). + """ + if self.genlower: + self.context.create_cpython_wrapper(self.library, + self.genlower.gendesc, + self.env, self.call_helper, + release_gil=release_gil) + self.context.create_cpython_wrapper(self.library, self.fndesc, + self.env, self.call_helper, + release_gil=release_gil) + + def create_cfunc_wrapper(self): + """ + Create C wrapper around this function. + """ + if self.genlower: + raise UnsupportedError('generator as a first-class function type') + self.context.create_cfunc_wrapper(self.library, self.fndesc, + self.env, self.call_helper) + + def setup_function(self, fndesc): + # Setup function + self.function = self.context.declare_function(self.module, fndesc) + if self.flags.dbg_optnone: + attrset = self.function.attributes + if "alwaysinline" not in attrset: + attrset.add("optnone") + attrset.add("noinline") + self.entry_block = self.function.append_basic_block('entry') + self.builder = IRBuilder(self.entry_block) + self.call_helper = self.call_conv.init_call_helper(self.builder) + + def typeof(self, varname): + return self.fndesc.typemap[varname] + + def debug_print(self, msg): + if config.DEBUG_JIT: + self.context.debug_print(self.builder, "DEBUGJIT: {0}".format(msg)) + + def print_variable(self, msg, varname): + """Helper to emit ``print(msg, varname)`` for debugging. + + Parameters + ---------- + msg : str + Literal string to be printed. + varname : str + A variable name whose value will be printed. + """ + argtys = ( + types.literal(msg), + self.fndesc.typemap[varname] + ) + args = ( + self.context.get_dummy_value(), + self.loadvar(varname), + ) + sig = typing.signature(types.none, *argtys) + + impl = self.context.get_function(print, sig) + impl(self.builder, args) + + +class Lower(BaseLower): + GeneratorLower = generators.GeneratorLower + + def init(self): + super().init() + # find all singly assigned variables + self._find_singly_assigned_variable() + + @property + def _disable_sroa_like_opt(self): + """Flags that the SROA like optimisation that Numba performs (which + prevent alloca and subsequent load/store for locals) should be disabled. + Currently, this is conditional solely on the presence of a request for + the emission of debug information.""" + return False if self.flags is None else self.flags.debuginfo + + def _find_singly_assigned_variable(self): + func_ir = self.func_ir + blocks = func_ir.blocks + + sav = set() + + if not self.func_ir.func_id.is_generator: + use_defs = compute_use_defs(blocks) + alloca_vars = must_use_alloca(blocks) + + # Compute where variables are defined + var_assign_map = defaultdict(set) + for blk, vl in use_defs.defmap.items(): + for var in vl: + var_assign_map[var].add(blk) + + # Compute where variables are used + var_use_map = defaultdict(set) + for blk, vl in use_defs.usemap.items(): + for var in vl: + var_use_map[var].add(blk) + + # Keep only variables that are defined locally and used locally + for var in var_assign_map: + if var not in alloca_vars and len(var_assign_map[var]) == 1: + # Usemap does not keep locally defined variables. + if len(var_use_map[var]) == 0: + # Ensure that the variable is not defined multiple times + # in the block + [defblk] = var_assign_map[var] + assign_stmts = self.blocks[defblk].find_insts(ir.Assign) + assigns = [stmt for stmt in assign_stmts + if stmt.target.name == var] + if len(assigns) == 1: + sav.add(var) + + self._singly_assigned_vars = sav + self._blk_local_varmap = {} + + def pre_block(self, block): + from numba.core.unsafe import eh + + super(Lower, self).pre_block(block) + self._cur_ir_block = block + + if block == self.firstblk: + # create slots for all the vars, irrespective of whether they are + # initialized, SSA will pick this up and warn users about using + # uninitialized variables. Slots are added as alloca in the first + # block + bb = self.blkmap[self.firstblk] + self.builder.position_at_end(bb) + all_names = set() + for block in self.blocks.values(): + for x in block.find_insts(ir.Del): + if x.value not in all_names: + all_names.add(x.value) + for name in all_names: + fetype = self.typeof(name) + self._alloca_var(name, fetype) + + # Detect if we are in a TRY block by looking for a call to + # `eh.exception_check`. + for call in block.find_exprs(op='call'): + defn = ir_utils.guard( + ir_utils.get_definition, self.func_ir, call.func, + ) + if defn is not None and isinstance(defn, ir.Global): + if defn.value is eh.exception_check: + if isinstance(block.terminator, ir.Branch): + targetblk = self.blkmap[block.terminator.truebr] + # NOTE: This hacks in an attribute for call_conv to + # pick up. This hack is no longer needed when + # all old-style implementations are gone. + self.builder._in_try_block = {'target': targetblk} + break + + def post_block(self, block): + # Clean-up + try: + del self.builder._in_try_block + except AttributeError: + pass + + def lower_inst(self, inst): + # Set debug location for all subsequent LL instructions + self.debuginfo.mark_location(self.builder, self.loc.line) + self.debug_print(str(inst)) + if isinstance(inst, ir.Assign): + ty = self.typeof(inst.target.name) + val = self.lower_assign(ty, inst) + argidx = None + # If this is a store from an arg, like x = arg.x then tell debuginfo + # that this is the arg + if isinstance(inst.value, ir.Arg): + # NOTE: debug location is the `def ` line + self.debuginfo.mark_location(self.builder, self.defn_loc.line) + argidx = inst.value.index + 1 # args start at 1 + self.storevar(val, inst.target.name, argidx=argidx) + + elif isinstance(inst, ir.Branch): + cond = self.loadvar(inst.cond.name) + tr = self.blkmap[inst.truebr] + fl = self.blkmap[inst.falsebr] + + condty = self.typeof(inst.cond.name) + pred = self.context.cast(self.builder, cond, condty, types.boolean) + assert pred.type == llvmlite.ir.IntType(1),\ + ("cond is not i1: %s" % pred.type) + self.builder.cbranch(pred, tr, fl) + + elif isinstance(inst, ir.Jump): + target = self.blkmap[inst.target] + self.builder.branch(target) + + elif isinstance(inst, ir.Return): + if self.generator_info: + # StopIteration + self.genlower.return_from_generator(self) + return + val = self.loadvar(inst.value.name) + oty = self.typeof(inst.value.name) + ty = self.fndesc.restype + if isinstance(ty, types.Optional): + # If returning an optional type + self.call_conv.return_optional_value(self.builder, ty, oty, val) + return + assert ty == oty, ( + "type '{}' does not match return type '{}'".format(oty, ty)) + retval = self.context.get_return_value(self.builder, ty, val) + self.call_conv.return_value(self.builder, retval) + + elif isinstance(inst, ir.PopBlock): + pass # this is just a marker + + elif isinstance(inst, ir.StaticSetItem): + signature = self.fndesc.calltypes[inst] + assert signature is not None + try: + impl = self.context.get_function('static_setitem', signature) + except NotImplementedError: + return self.lower_setitem(inst.target, inst.index_var, + inst.value, signature) + else: + target = self.loadvar(inst.target.name) + value = self.loadvar(inst.value.name) + valuety = self.typeof(inst.value.name) + value = self.context.cast(self.builder, value, valuety, + signature.args[2]) + return impl(self.builder, (target, inst.index, value)) + + elif isinstance(inst, ir.Print): + self.lower_print(inst) + + elif isinstance(inst, ir.SetItem): + signature = self.fndesc.calltypes[inst] + assert signature is not None + return self.lower_setitem(inst.target, inst.index, inst.value, + signature) + + elif isinstance(inst, ir.StoreMap): + signature = self.fndesc.calltypes[inst] + assert signature is not None + return self.lower_setitem(inst.dct, inst.key, inst.value, signature) + + elif isinstance(inst, ir.DelItem): + target = self.loadvar(inst.target.name) + index = self.loadvar(inst.index.name) + + targetty = self.typeof(inst.target.name) + indexty = self.typeof(inst.index.name) + + signature = self.fndesc.calltypes[inst] + assert signature is not None + + op = operator.delitem + fnop = self.context.typing_context.resolve_value_type(op) + callsig = fnop.get_call_type( + self.context.typing_context, signature.args, {}, + ) + impl = self.context.get_function(fnop, callsig) + + assert targetty == signature.args[0] + index = self.context.cast(self.builder, index, indexty, + signature.args[1]) + + return impl(self.builder, (target, index)) + + elif isinstance(inst, ir.Del): + self.delvar(inst.value) + + elif isinstance(inst, ir.SetAttr): + target = self.loadvar(inst.target.name) + value = self.loadvar(inst.value.name) + signature = self.fndesc.calltypes[inst] + + targetty = self.typeof(inst.target.name) + valuety = self.typeof(inst.value.name) + assert signature is not None + assert signature.args[0] == targetty + impl = self.context.get_setattr(inst.attr, signature) + + # Convert argument to match + value = self.context.cast(self.builder, value, valuety, + signature.args[1]) + + return impl(self.builder, (target, value)) + + elif isinstance(inst, ir.StaticRaise): + self.lower_static_raise(inst) + + elif isinstance(inst, ir.StaticTryRaise): + self.lower_static_try_raise(inst) + + else: + if hasattr(self.context, "lower_extensions"): + for _class, func in self.context.lower_extensions.items(): + if isinstance(inst, _class): + func(self, inst) + return + raise NotImplementedError(type(inst)) + + def lower_setitem(self, target_var, index_var, value_var, signature): + target = self.loadvar(target_var.name) + value = self.loadvar(value_var.name) + index = self.loadvar(index_var.name) + + targetty = self.typeof(target_var.name) + valuety = self.typeof(value_var.name) + indexty = self.typeof(index_var.name) + + op = operator.setitem + fnop = self.context.typing_context.resolve_value_type(op) + callsig = fnop.get_call_type( + self.context.typing_context, signature.args, {}, + ) + impl = self.context.get_function(fnop, callsig) + + # Convert argument to match + if isinstance(targetty, types.Optional): + target = self.context.cast(self.builder, target, targetty, + targetty.type) + else: + ul = types.unliteral + assert ul(targetty) == ul(signature.args[0]) + + index = self.context.cast(self.builder, index, indexty, + signature.args[1]) + value = self.context.cast(self.builder, value, valuety, + signature.args[2]) + + return impl(self.builder, (target, index, value)) + + def lower_static_raise(self, inst): + if inst.exc_class is None: + # Reraise + self.return_exception(None, loc=self.loc) + else: + self.return_exception(inst.exc_class, inst.exc_args, loc=self.loc) + + def lower_static_try_raise(self, inst): + if inst.exc_class is None: + # Reraise + self.set_exception(None, loc=self.loc) + else: + self.set_exception(inst.exc_class, inst.exc_args, loc=self.loc) + + def lower_assign(self, ty, inst): + value = inst.value + # In nopython mode, closure vars are frozen like globals + if isinstance(value, (ir.Const, ir.Global, ir.FreeVar)): + res = self.context.get_constant_generic(self.builder, ty, + value.value) + self.incref(ty, res) + return res + + elif isinstance(value, ir.Expr): + return self.lower_expr(ty, value) + + elif isinstance(value, ir.Var): + val = self.loadvar(value.name) + oty = self.typeof(value.name) + res = self.context.cast(self.builder, val, oty, ty) + self.incref(ty, res) + return res + + elif isinstance(value, ir.Arg): + # Suspend debug info else all the arg repacking ends up being + # associated with some line or other and it's actually just a detail + # of Numba's CC. + with debuginfo.suspend_emission(self.builder): + # Cast from the argument type to the local variable type + # (note the "arg.FOO" convention as used in typeinfer) + argty = self.typeof("arg." + value.name) + if isinstance(argty, types.Omitted): + pyval = argty.value + tyctx = self.context.typing_context + valty = tyctx.resolve_value_type_prefer_literal(pyval) + # use the type of the constant value + const = self.context.get_constant_generic( + self.builder, valty, pyval, + ) + # cast it to the variable type + res = self.context.cast(self.builder, const, valty, ty) + else: + val = self.fnargs[value.index] + res = self.context.cast(self.builder, val, argty, ty) + self.incref(ty, res) + return res + + elif isinstance(value, ir.Yield): + res = self.lower_yield(ty, value) + self.incref(ty, res) + return res + + raise NotImplementedError(type(value), value) + + def lower_yield(self, retty, inst): + yp = self.generator_info.yield_points[inst.index] + assert yp.inst is inst + y = generators.LowerYield(self, yp, yp.live_vars) + y.lower_yield_suspend() + # Yield to caller + val = self.loadvar(inst.value.name) + typ = self.typeof(inst.value.name) + actual_rettyp = self.gentype.yield_type + + # cast the local val to the type yielded + yret = self.context.cast(self.builder, val, typ, actual_rettyp) + + # get the return repr of yielded value + retval = self.context.get_return_value( + self.builder, actual_rettyp, yret, + ) + + # return + self.call_conv.return_value(self.builder, retval) + + # Resumption point + y.lower_yield_resume() + # None is returned by the yield expression + return self.context.get_constant_generic(self.builder, retty, None) + + def lower_binop(self, resty, expr, op): + # if op in utils.OPERATORS_TO_BUILTINS: + # map operator.the_op => the corresponding types.Function() + # TODO: is this looks dodgy ... + op = self.context.typing_context.resolve_value_type(op) + + lhs = expr.lhs + rhs = expr.rhs + static_lhs = expr.static_lhs + static_rhs = expr.static_rhs + lty = self.typeof(lhs.name) + rty = self.typeof(rhs.name) + lhs = self.loadvar(lhs.name) + rhs = self.loadvar(rhs.name) + + # Convert argument to match + signature = self.fndesc.calltypes[expr] + lhs = self.context.cast(self.builder, lhs, lty, signature.args[0]) + rhs = self.context.cast(self.builder, rhs, rty, signature.args[1]) + + def cast_result(res): + return self.context.cast(self.builder, res, + signature.return_type, resty) + + # First try with static operands, if known + def try_static_impl(tys, args): + if any(a is ir.UNDEFINED for a in args): + return None + try: + if isinstance(op, types.Function): + static_sig = op.get_call_type(self.context.typing_context, + tys, {}) + else: + static_sig = typing.signature(signature.return_type, *tys) + except TypingError: + return None + try: + static_impl = self.context.get_function(op, static_sig) + return static_impl(self.builder, args) + except NotImplementedError: + return None + + res = try_static_impl( + (_lit_or_omitted(static_lhs), _lit_or_omitted(static_rhs)), + (static_lhs, static_rhs), + ) + if res is not None: + return cast_result(res) + + res = try_static_impl( + (_lit_or_omitted(static_lhs), rty), + (static_lhs, rhs), + ) + if res is not None: + return cast_result(res) + + res = try_static_impl( + (lty, _lit_or_omitted(static_rhs)), + (lhs, static_rhs), + ) + if res is not None: + return cast_result(res) + + # Normal implementation for generic arguments + + sig = op.get_call_type(self.context.typing_context, signature.args, {}) + impl = self.context.get_function(op, sig) + res = impl(self.builder, (lhs, rhs)) + return cast_result(res) + + def lower_getitem(self, resty, expr, value, index, signature): + baseval = self.loadvar(value.name) + indexval = self.loadvar(index.name) + # Get implementation of getitem + op = operator.getitem + fnop = self.context.typing_context.resolve_value_type(op) + callsig = fnop.get_call_type( + self.context.typing_context, signature.args, {}, + ) + impl = self.context.get_function(fnop, callsig) + + argvals = (baseval, indexval) + argtyps = (self.typeof(value.name), + self.typeof(index.name)) + castvals = [self.context.cast(self.builder, av, at, ft) + for av, at, ft in zip(argvals, argtyps, + signature.args)] + res = impl(self.builder, castvals) + return self.context.cast(self.builder, res, + signature.return_type, + resty) + + def _cast_var(self, var, ty): + """ + Cast a Numba IR variable to the given Numba type, returning a + low-level value. + """ + if isinstance(var, _VarArgItem): + varty = self.typeof(var.vararg.name)[var.index] + val = self.builder.extract_value(self.loadvar(var.vararg.name), + var.index) + else: + varty = self.typeof(var.name) + val = self.loadvar(var.name) + return self.context.cast(self.builder, val, varty, ty) + + def fold_call_args(self, fnty, signature, pos_args, vararg, kw_args): + if vararg: + # Inject *args from function call + # The lowering will be done in _cast_var() above. + tp_vararg = self.typeof(vararg.name) + assert isinstance(tp_vararg, types.BaseTuple) + pos_args = pos_args + [_VarArgItem(vararg, i) + for i in range(len(tp_vararg))] + + # Fold keyword arguments and resolve default argument values + pysig = signature.pysig + if pysig is None: + if kw_args: + raise NotImplementedError("unsupported keyword arguments " + "when calling %s" % (fnty,)) + argvals = [self._cast_var(var, sigty) + for var, sigty in zip(pos_args, signature.args)] + else: + def normal_handler(index, param, var): + return self._cast_var(var, signature.args[index]) + + def default_handler(index, param, default): + return self.context.get_constant_generic( + self.builder, signature.args[index], default) + + def stararg_handler(index, param, vars): + stararg_ty = signature.args[index] + assert isinstance(stararg_ty, types.BaseTuple), stararg_ty + values = [self._cast_var(var, sigty) + for var, sigty in zip(vars, stararg_ty)] + return cgutils.make_anonymous_struct(self.builder, values) + + argvals = typing.fold_arguments(pysig, + pos_args, dict(kw_args), + normal_handler, + default_handler, + stararg_handler) + return argvals + + def lower_print(self, inst): + """ + Lower a ir.Print() + """ + # We handle this, as far as possible, as a normal call to built-in + # print(). This will make it easy to undo the special ir.Print + # rewrite when it becomes unnecessary (e.g. when we have native + # strings). + sig = self.fndesc.calltypes[inst] + assert sig.return_type == types.none + fnty = self.context.typing_context.resolve_value_type(print) + + # Fix the call signature to inject any constant-inferred + # string argument + pos_tys = list(sig.args) + pos_args = list(inst.args) + for i in range(len(pos_args)): + if i in inst.consts: + pyval = inst.consts[i] + if isinstance(pyval, str): + pos_tys[i] = types.literal(pyval) + + fixed_sig = typing.signature(sig.return_type, *pos_tys) + fixed_sig = fixed_sig.replace(pysig=sig.pysig) + + argvals = self.fold_call_args(fnty, sig, pos_args, inst.vararg, {}) + impl = self.context.get_function(print, fixed_sig) + impl(self.builder, argvals) + + def lower_call(self, resty, expr): + signature = self.fndesc.calltypes[expr] + self.debug_print("# lower_call: expr = {0}".format(expr)) + if isinstance(signature.return_type, types.Phantom): + return self.context.get_dummy_value() + + fnty = self.typeof(expr.func.name) + + if isinstance(fnty, types.ObjModeDispatcher): + res = self._lower_call_ObjModeDispatcher(fnty, expr, signature) + + elif isinstance(fnty, types.ExternalFunction): + res = self._lower_call_ExternalFunction(fnty, expr, signature) + + elif isinstance(fnty, types.ExternalFunctionPointer): + res = self._lower_call_ExternalFunctionPointer( + fnty, expr, signature) + + elif isinstance(fnty, types.RecursiveCall): + res = self._lower_call_RecursiveCall(fnty, expr, signature) + + elif isinstance(fnty, types.FunctionType): + res = self._lower_call_FunctionType(fnty, expr, signature) + + else: + res = self._lower_call_normal(fnty, expr, signature) + + # If lowering the call returned None, interpret that as returning dummy + # value if the return type of the function is void, otherwise there is + # a problem + if res is None: + if signature.return_type == types.void: + res = self.context.get_dummy_value() + else: + raise LoweringError( + msg="non-void function returns None from implementation", + loc=self.loc + ) + + return self.context.cast(self.builder, res, signature.return_type, + resty) + + def _lower_call_ObjModeDispatcher(self, fnty, expr, signature): + from numba.core.pythonapi import ObjModeUtils + + self.init_pyapi() + # Acquire the GIL + gil_state = self.pyapi.gil_ensure() + # Fix types + argnames = [a.name for a in expr.args] + argtypes = [self.typeof(a) for a in argnames] + argvalues = [self.loadvar(a) for a in argnames] + for v, ty in zip(argvalues, argtypes): + # Because .from_native_value steal the reference + self.incref(ty, v) + + argobjs = [self.pyapi.from_native_value(atyp, aval, + self.env_manager) + for atyp, aval in zip(argtypes, argvalues)] + + # Load objmode dispatcher + callee = ObjModeUtils(self.pyapi).load_dispatcher(fnty, argtypes) + # Make Call + ret_obj = self.pyapi.call_function_objargs(callee, argobjs) + has_exception = cgutils.is_null(self.builder, ret_obj) + with self. builder.if_else(has_exception) as (then, orelse): + # Handles exception + # This branch must exit the function + with then: + # Clean arg + for obj in argobjs: + self.pyapi.decref(obj) + + # Release the GIL + self.pyapi.gil_release(gil_state) + + # Return and signal exception + self.call_conv.return_exc(self.builder) + + # Handles normal return + with orelse: + # Fix output value + native = self.pyapi.to_native_value( + fnty.dispatcher.output_types, + ret_obj, + ) + output = native.value + + # Release objs + self.pyapi.decref(ret_obj) + for obj in argobjs: + self.pyapi.decref(obj) + + # cleanup output + if callable(native.cleanup): + native.cleanup() + + # Release the GIL + self.pyapi.gil_release(gil_state) + + # Error during unboxing + with self.builder.if_then(native.is_error): + self.call_conv.return_exc(self.builder) + + return output + + def _lower_call_ExternalFunction(self, fnty, expr, signature): + # Handle a named external function + self.debug_print("# external function") + argvals = self.fold_call_args( + fnty, signature, expr.args, expr.vararg, expr.kws, + ) + fndesc = funcdesc.ExternalFunctionDescriptor( + fnty.symbol, fnty.sig.return_type, fnty.sig.args) + func = self.context.declare_external_function( + self.builder.module, fndesc) + return self.context.call_external_function( + self.builder, func, fndesc.argtypes, argvals, + ) + + def _lower_call_ExternalFunctionPointer(self, fnty, expr, signature): + # Handle a C function pointer + self.debug_print("# calling external function pointer") + argvals = self.fold_call_args( + fnty, signature, expr.args, expr.vararg, expr.kws, + ) + pointer = self.loadvar(expr.func.name) + # If the external function pointer uses libpython + if fnty.requires_gil: + self.init_pyapi() + # Acquire the GIL + gil_state = self.pyapi.gil_ensure() + # Make PyObjects + newargvals = [] + pyvals = [] + for exptyp, gottyp, aval in zip(fnty.sig.args, signature.args, + argvals): + # Adjust argument values to pyobjects + if exptyp == types.ffi_forced_object: + self.incref(gottyp, aval) + obj = self.pyapi.from_native_value( + gottyp, aval, self.env_manager, + ) + newargvals.append(obj) + pyvals.append(obj) + else: + newargvals.append(aval) + + # Call external function + res = self.context.call_function_pointer( + self.builder, pointer, newargvals, fnty.cconv, + ) + # Release PyObjects + for obj in pyvals: + self.pyapi.decref(obj) + + # Release the GIL + self.pyapi.gil_release(gil_state) + # If the external function pointer does NOT use libpython + else: + res = self.context.call_function_pointer( + self.builder, pointer, argvals, fnty.cconv, + ) + return res + + def _lower_call_RecursiveCall(self, fnty, expr, signature): + # Recursive call + argvals = self.fold_call_args( + fnty, signature, expr.args, expr.vararg, expr.kws, + ) + rec_ov = fnty.get_overloads(signature.args) + mangler = self.context.mangler or default_mangler + abi_tags = self.fndesc.abi_tags + mangled_name = mangler(rec_ov.qualname, signature.args, + abi_tags=abi_tags, uid=rec_ov.uid) + # special case self recursion + if self.builder.function.name.startswith(mangled_name): + res = self.context.call_internal( + self.builder, self.fndesc, signature, argvals, + ) + else: + res = self.context.call_unresolved( + self.builder, mangled_name, signature, argvals, + ) + return res + + def _lower_call_FunctionType(self, fnty, expr, signature): + self.debug_print("# calling first-class function type") + sig = types.unliteral(signature) + if not fnty.check_signature(signature): + # value dependent polymorphism? + raise UnsupportedError( + f'mismatch of function types:' + f' expected {fnty} but got {types.FunctionType(sig)}') + ftype = fnty.ftype + argvals = self.fold_call_args( + fnty, sig, expr.args, expr.vararg, expr.kws, + ) + func_ptr = self.__get_function_pointer(ftype, expr.func.name, sig=sig) + res = self.builder.call(func_ptr, argvals, cconv=fnty.cconv) + return res + + def __get_function_pointer(self, ftype, fname, sig=None): + from numba.experimental.function_type import lower_get_wrapper_address + + llty = self.context.get_value_type(ftype) + fstruct = self.loadvar(fname) + addr = self.builder.extract_value(fstruct, 0, + name='addr_of_%s' % (fname)) + + fptr = cgutils.alloca_once(self.builder, llty, + name="fptr_of_%s" % (fname)) + with self.builder.if_else( + cgutils.is_null(self.builder, addr), + likely=False) as (then, orelse): + with then: + self.init_pyapi() + # Acquire the GIL + gil_state = self.pyapi.gil_ensure() + pyaddr = self.builder.extract_value( + fstruct, 1, + name='pyaddr_of_%s' % (fname)) + # try to recover the function address, see + # test_zero_address BadToGood example in + # test_function_type.py + addr1 = lower_get_wrapper_address( + self.context, self.builder, pyaddr, sig, + failure_mode='ignore') + with self.builder.if_then( + cgutils.is_null(self.builder, addr1), likely=False): + self.return_exception( + RuntimeError, + exc_args=(f"{ftype} function address is null",), + loc=self.loc) + addr2 = self.pyapi.long_as_voidptr(addr1) + self.builder.store(self.builder.bitcast(addr2, llty), fptr) + self.pyapi.decref(addr1) + self.pyapi.gil_release(gil_state) + with orelse: + self.builder.store(self.builder.bitcast(addr, llty), fptr) + return self.builder.load(fptr) + + def _lower_call_normal(self, fnty, expr, signature): + # Normal function resolution + self.debug_print("# calling normal function: {0}".format(fnty)) + self.debug_print("# signature: {0}".format(signature)) + if isinstance(fnty, types.ObjModeDispatcher): + argvals = expr.func.args + else: + argvals = self.fold_call_args( + fnty, signature, expr.args, expr.vararg, expr.kws, + ) + tname = expr.target + if tname is not None: + from numba.core.target_extension import resolve_dispatcher_from_str + disp = resolve_dispatcher_from_str(tname) + hw_ctx = disp.targetdescr.target_context + impl = hw_ctx.get_function(fnty, signature) + else: + impl = self.context.get_function(fnty, signature) + if signature.recvr: + # The "self" object is passed as the function object + # for bounded function + the_self = self.loadvar(expr.func.name) + # Prepend the self reference + argvals = [the_self] + list(argvals) + + res = impl(self.builder, argvals, self.loc) + return res + + def lower_expr(self, resty, expr): + if expr.op == 'binop': + return self.lower_binop(resty, expr, expr.fn) + elif expr.op == 'inplace_binop': + lty = self.typeof(expr.lhs.name) + if lty.mutable: + return self.lower_binop(resty, expr, expr.fn) + else: + # inplace operators on non-mutable types reuse the same + # definition as the corresponding copying operators.) + return self.lower_binop(resty, expr, expr.immutable_fn) + elif expr.op == 'unary': + val = self.loadvar(expr.value.name) + typ = self.typeof(expr.value.name) + func_ty = self.context.typing_context.resolve_value_type(expr.fn) + # Get function + signature = self.fndesc.calltypes[expr] + impl = self.context.get_function(func_ty, signature) + # Convert argument to match + val = self.context.cast(self.builder, val, typ, signature.args[0]) + res = impl(self.builder, [val]) + res = self.context.cast(self.builder, res, + signature.return_type, resty) + return res + + elif expr.op == 'call': + res = self.lower_call(resty, expr) + return res + + elif expr.op == 'pair_first': + val = self.loadvar(expr.value.name) + ty = self.typeof(expr.value.name) + res = self.context.pair_first(self.builder, val, ty) + self.incref(resty, res) + return res + + elif expr.op == 'pair_second': + val = self.loadvar(expr.value.name) + ty = self.typeof(expr.value.name) + res = self.context.pair_second(self.builder, val, ty) + self.incref(resty, res) + return res + + elif expr.op in ('getiter', 'iternext'): + val = self.loadvar(expr.value.name) + ty = self.typeof(expr.value.name) + signature = self.fndesc.calltypes[expr] + impl = self.context.get_function(expr.op, signature) + [fty] = signature.args + castval = self.context.cast(self.builder, val, ty, fty) + res = impl(self.builder, (castval,)) + res = self.context.cast(self.builder, res, signature.return_type, + resty) + return res + + elif expr.op == 'exhaust_iter': + val = self.loadvar(expr.value.name) + ty = self.typeof(expr.value.name) + # Unpack optional + if isinstance(ty, types.Optional): + val = self.context.cast(self.builder, val, ty, ty.type) + ty = ty.type + + # If we have a tuple, we needn't do anything + # (and we can't iterate over the heterogeneous ones). + if isinstance(ty, types.BaseTuple): + assert ty == resty + self.incref(ty, val) + return val + + itemty = ty.iterator_type.yield_type + tup = self.context.get_constant_undef(resty) + pairty = types.Pair(itemty, types.boolean) + getiter_sig = typing.signature(ty.iterator_type, ty) + getiter_impl = self.context.get_function('getiter', + getiter_sig) + iternext_sig = typing.signature(pairty, ty.iterator_type) + iternext_impl = self.context.get_function('iternext', + iternext_sig) + iterobj = getiter_impl(self.builder, (val,)) + # We call iternext() as many times as desired (`expr.count`). + for i in range(expr.count): + pair = iternext_impl(self.builder, (iterobj,)) + is_valid = self.context.pair_second(self.builder, + pair, pairty) + with cgutils.if_unlikely(self.builder, + self.builder.not_(is_valid)): + self.return_exception(ValueError, loc=self.loc) + item = self.context.pair_first(self.builder, + pair, pairty) + tup = self.builder.insert_value(tup, item, i) + + # Call iternext() once more to check that the iterator + # is exhausted. + pair = iternext_impl(self.builder, (iterobj,)) + is_valid = self.context.pair_second(self.builder, + pair, pairty) + with cgutils.if_unlikely(self.builder, is_valid): + self.return_exception(ValueError, loc=self.loc) + + self.decref(ty.iterator_type, iterobj) + return tup + + elif expr.op == "getattr": + val = self.loadvar(expr.value.name) + ty = self.typeof(expr.value.name) + + if isinstance(resty, types.BoundFunction): + # if we are getting out a method, assume we have typed this + # properly and just build a bound function object + casted = self.context.cast(self.builder, val, ty, resty.this) + res = self.context.get_bound_function(self.builder, casted, + resty.this) + self.incref(resty, res) + return res + else: + impl = self.context.get_getattr(ty, expr.attr) + attrty = self.context.typing_context.resolve_getattr(ty, + expr.attr) + + if impl is None: + # ignore the attribute + return self.context.get_dummy_value() + else: + res = impl(self.context, self.builder, ty, val, expr.attr) + + # Cast the attribute type to the expected output type + res = self.context.cast(self.builder, res, attrty, resty) + return res + + elif expr.op == "static_getitem": + signature = typing.signature( + resty, + self.typeof(expr.value.name), + _lit_or_omitted(expr.index), + ) + try: + # Both get_function() and the returned implementation can + # raise NotImplementedError if the types aren't supported + impl = self.context.get_function("static_getitem", signature) + return impl(self.builder, + (self.loadvar(expr.value.name), expr.index)) + except NotImplementedError: + if expr.index_var is None: + raise + # Fall back on the generic getitem() implementation + # for this type. + signature = self.fndesc.calltypes[expr] + return self.lower_getitem(resty, expr, expr.value, + expr.index_var, signature) + elif expr.op == "typed_getitem": + signature = typing.signature( + resty, + self.typeof(expr.value.name), + self.typeof(expr.index.name), + ) + impl = self.context.get_function("typed_getitem", signature) + return impl(self.builder, (self.loadvar(expr.value.name), + self.loadvar(expr.index.name))) + elif expr.op == "getitem": + signature = self.fndesc.calltypes[expr] + return self.lower_getitem(resty, expr, expr.value, expr.index, + signature) + + elif expr.op == "build_tuple": + itemvals = [self.loadvar(i.name) for i in expr.items] + itemtys = [self.typeof(i.name) for i in expr.items] + castvals = [self.context.cast(self.builder, val, fromty, toty) + for val, toty, fromty in zip(itemvals, resty, itemtys)] + tup = self.context.make_tuple(self.builder, resty, castvals) + self.incref(resty, tup) + return tup + + elif expr.op == "build_list": + itemvals = [self.loadvar(i.name) for i in expr.items] + itemtys = [self.typeof(i.name) for i in expr.items] + if isinstance(resty, types.LiteralList): + castvals = [self.context.cast(self.builder, val, fromty, toty) + for val, toty, fromty in zip(itemvals, resty.types, + itemtys)] + tup = self.context.make_tuple(self.builder, + types.Tuple(resty.types), + castvals) + self.incref(resty, tup) + return tup + else: + castvals = [self.context.cast(self.builder, val, fromty, + resty.dtype) + for val, fromty in zip(itemvals, itemtys)] + return self.context.build_list(self.builder, resty, castvals) + + elif expr.op == "build_set": + # Insert in reverse order, as Python does + items = expr.items[::-1] + itemvals = [self.loadvar(i.name) for i in items] + itemtys = [self.typeof(i.name) for i in items] + castvals = [self.context.cast(self.builder, val, fromty, + resty.dtype) + for val, fromty in zip(itemvals, itemtys)] + return self.context.build_set(self.builder, resty, castvals) + + elif expr.op == "build_map": + items = expr.items + keys, values = [], [] + key_types, value_types = [], [] + for k, v in items: + key = self.loadvar(k.name) + keytype = self.typeof(k.name) + val = self.loadvar(v.name) + valtype = self.typeof(v.name) + keys.append(key) + values.append(val) + key_types.append(keytype) + value_types.append(valtype) + return self.context.build_map(self.builder, resty, + list(zip(key_types, value_types)), + list(zip(keys, values))) + + elif expr.op == "cast": + val = self.loadvar(expr.value.name) + ty = self.typeof(expr.value.name) + castval = self.context.cast(self.builder, val, ty, resty) + self.incref(resty, castval) + return castval + + elif expr.op == "phi": + raise LoweringError("PHI not stripped") + + elif expr.op == 'null': + return self.context.get_constant_null(resty) + + elif expr.op in self.context.special_ops: + res = self.context.special_ops[expr.op](self, expr) + return res + + raise NotImplementedError(expr) + + def _alloca_var(self, name, fetype): + """ + Ensure the given variable has an allocated stack slot (if needed). + """ + if name in self.varmap: + # quit early + return + + # If the name is used in multiple blocks or lowering with debuginfo... + if ((name not in self._singly_assigned_vars) or + self._disable_sroa_like_opt): + # If not already defined, allocate it + ptr = self.alloca(name, fetype) + # Remember the pointer + self.varmap[name] = ptr + + def getvar(self, name): + """ + Get a pointer to the given variable's slot. + """ + if not self._disable_sroa_like_opt: + assert name not in self._blk_local_varmap + assert name not in self._singly_assigned_vars + return self.varmap[name] + + def loadvar(self, name): + """ + Load the given variable's value. + """ + if name in self._blk_local_varmap and not self._disable_sroa_like_opt: + return self._blk_local_varmap[name] + ptr = self.getvar(name) + + # Don't associate debuginfo with the load for a function arg else it + # creates instructions ahead of the first source line of the + # function which then causes problems with breaking on the function + # symbol (it hits the symbol, not the first line). + if name in self.func_ir.arg_names: + with debuginfo.suspend_emission(self.builder): + return self.builder.load(ptr) + else: + return self.builder.load(ptr) + + def storevar(self, value, name, argidx=None): + """ + Store the value into the given variable. + """ + fetype = self.typeof(name) + # Define if not already + self._alloca_var(name, fetype) + + # Store variable + if (name in self._singly_assigned_vars and + not self._disable_sroa_like_opt): + self._blk_local_varmap[name] = value + else: + if argidx is None: + # Clean up existing value stored in the variable, not needed + # if it's an arg + old = self.loadvar(name) + self.decref(fetype, old) + + # stack stored variable + ptr = self.getvar(name) + if value.type != ptr.type.pointee: + msg = ("Storing {value.type} to ptr of {ptr.type.pointee} " + "('{name}'). FE type {fetype}").format(value=value, + ptr=ptr, + fetype=fetype, + name=name) + raise AssertionError(msg) + + # If this store is associated with an argument to the function (i.e. + # store following reassemble from CC splatting structs as many args + # to the function) then mark this variable as such. + if argidx is not None: + with debuginfo.suspend_emission(self.builder): + self.builder.store(value, ptr) + loc = self.defn_loc # the line with `def ` + lltype = self.context.get_value_type(fetype) + sizeof = self.context.get_abi_sizeof(lltype) + datamodel = self.context.data_model_manager[fetype] + self.debuginfo.mark_variable(self.builder, ptr, name=name, + lltype=lltype, size=sizeof, + line=loc.line, datamodel=datamodel, + argidx=argidx) + else: + self.builder.store(value, ptr) + + def delvar(self, name): + """ + Delete the given variable. + """ + fetype = self.typeof(name) + + # Out-of-order + if (name not in self._blk_local_varmap and + not self._disable_sroa_like_opt): + if name in self._singly_assigned_vars: + self._singly_assigned_vars.discard(name) + + # Define if not already (may happen if the variable is deleted + # at the beginning of a loop, but only set later in the loop) + self._alloca_var(name, fetype) + + if name in self._blk_local_varmap and not self._disable_sroa_like_opt: + llval = self._blk_local_varmap[name] + self.decref(fetype, llval) + else: + ptr = self.getvar(name) + self.decref(fetype, self.builder.load(ptr)) + # Zero-fill variable to avoid double frees on subsequent dels + self.builder.store(Constant(ptr.type.pointee, None), ptr) + + def alloca(self, name, type): + lltype = self.context.get_value_type(type) + datamodel = self.context.data_model_manager[type] + return self.alloca_lltype(name, lltype, datamodel=datamodel) + + def alloca_lltype(self, name, lltype, datamodel=None): + # Is user variable? + is_uservar = not name.startswith('$') + # Allocate space for variable + aptr = cgutils.alloca_once(self.builder, lltype, + name=name, zfill=False) + + # Emit debug info for user variable + if is_uservar: + # Don't associate debuginfo with the alloca for a function arg, this + # is handled by the first store to the alloca so that repacking the + # splatted args from the CC is dealt with. + if name not in self.func_ir.arg_names: + sizeof = self.context.get_abi_sizeof(lltype) + self.debuginfo.mark_variable(self.builder, aptr, name=name, + lltype=lltype, size=sizeof, + line=self.loc.line, + datamodel=datamodel,) + return aptr + + def incref(self, typ, val): + if not self.context.enable_nrt: + return + + self.context.nrt.incref(self.builder, typ, val) + + def decref(self, typ, val): + if not self.context.enable_nrt: + return + + # do not associate decref with "use", it creates "jumpy" line info as + # the decrefs are usually where the ir.Del nodes are, which is at the + # end of the block. + with debuginfo.suspend_emission(self.builder): + self.context.nrt.decref(self.builder, typ, val) + + +def _lit_or_omitted(value): + """Returns a Literal instance if the type of value is supported; + otherwise, return `Omitted(value)`. + """ + try: + return types.literal(value) + except LiteralTypingError: + return types.Omitted(value) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/object_mode_passes.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/object_mode_passes.py new file mode 100644 index 000000000..f5cd52383 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/object_mode_passes.py @@ -0,0 +1,169 @@ +import warnings +from numba.core import (errors, types, typing, funcdesc, config, pylowering, + transforms) +from numba.core.compiler_machinery import (FunctionPass, LoweringPass, + register_pass) +from collections import defaultdict + + +@register_pass(mutates_CFG=True, analysis_only=False) +class ObjectModeFrontEnd(FunctionPass): + _name = "object_mode_front_end" + + def __init__(self): + FunctionPass.__init__(self) + + def _frontend_looplift(self, state): + """ + Loop lifting analysis and transformation + """ + loop_flags = state.flags.copy() + outer_flags = state.flags.copy() + # Do not recursively loop lift + outer_flags.enable_looplift = False + loop_flags.enable_looplift = False + if not state.flags.enable_pyobject_looplift: + loop_flags.enable_pyobject = False + loop_flags.enable_ssa = False + + main, loops = transforms.loop_lifting(state.func_ir, + typingctx=state.typingctx, + targetctx=state.targetctx, + locals=state.locals, + flags=loop_flags) + if loops: + # Some loops were extracted + if config.DEBUG_FRONTEND or config.DEBUG: + for loop in loops: + print("Lifting loop", loop.get_source_location()) + from numba.core.compiler import compile_ir + cres = compile_ir(state.typingctx, state.targetctx, main, + state.args, state.return_type, + outer_flags, state.locals, + lifted=tuple(loops), lifted_from=None, + is_lifted_loop=True) + return cres + + def run_pass(self, state): + from numba.core.compiler import _EarlyPipelineCompletion + # NOTE: That so much stuff, including going back into the compiler, is + # captured in a single pass is not ideal. + if state.flags.enable_looplift: + assert not state.lifted + cres = self._frontend_looplift(state) + if cres is not None: + raise _EarlyPipelineCompletion(cres) + + # Fallback typing: everything is a python object + state.typemap = defaultdict(lambda: types.pyobject) + state.calltypes = defaultdict(lambda: types.pyobject) + state.return_type = types.pyobject + return True + + +@register_pass(mutates_CFG=True, analysis_only=False) +class ObjectModeBackEnd(LoweringPass): + + _name = "object_mode_back_end" + + def __init__(self): + LoweringPass.__init__(self) + + def _py_lowering_stage(self, targetctx, library, interp, flags): + fndesc = funcdesc.PythonFunctionDescriptor.from_object_mode_function( + interp + ) + with targetctx.push_code_library(library): + lower = pylowering.PyLower(targetctx, library, fndesc, interp) + lower.lower() + if not flags.no_cpython_wrapper: + lower.create_cpython_wrapper() + env = lower.env + call_helper = lower.call_helper + del lower + from numba.core.compiler import _LowerResult # TODO: move this + if flags.no_compile: + return _LowerResult(fndesc, call_helper, cfunc=None, env=env) + else: + # Prepare for execution + cfunc = targetctx.get_executable(library, fndesc, env) + return _LowerResult(fndesc, call_helper, cfunc=cfunc, env=env) + + def run_pass(self, state): + """ + Lowering for object mode + """ + + if state.library is None: + codegen = state.targetctx.codegen() + state.library = codegen.create_library(state.func_id.func_qualname) + # Enable object caching upfront, so that the library can + # be later serialized. + state.library.enable_object_caching() + + def backend_object_mode(): + """ + Object mode compilation + """ + if len(state.args) != state.nargs: + # append missing + # BUG?: What's going on with nargs here? + # check state.nargs vs self.nargs on original code + state.args = (tuple(state.args) + (types.pyobject,) * + (state.nargs - len(state.args))) + + return self._py_lowering_stage(state.targetctx, + state.library, + state.func_ir, + state.flags) + + lowered = backend_object_mode() + signature = typing.signature(state.return_type, *state.args) + from numba.core.compiler import compile_result + state.cr = compile_result( + typing_context=state.typingctx, + target_context=state.targetctx, + entry_point=lowered.cfunc, + typing_error=state.status.fail_reason, + type_annotation=state.type_annotation, + library=state.library, + call_helper=lowered.call_helper, + signature=signature, + objectmode=True, + lifted=state.lifted, + fndesc=lowered.fndesc, + environment=lowered.env, + metadata=state.metadata, + reload_init=state.reload_init, + ) + + # Warn, deprecated behaviour, code compiled in objmode without + # force_pyobject indicates fallback from nopython mode + if not state.flags.force_pyobject: + # first warn about object mode and yes/no to lifted loops + if len(state.lifted) > 0: + warn_msg = ('Function "%s" was compiled in object mode without' + ' forceobj=True, but has lifted loops.' % + (state.func_id.func_name,)) + else: + warn_msg = ('Function "%s" was compiled in object mode without' + ' forceobj=True.' % (state.func_id.func_name,)) + warnings.warn(errors.NumbaWarning(warn_msg, + state.func_ir.loc)) + + url = ("https://numba.readthedocs.io/en/stable/reference/" + "deprecation.html#deprecation-of-object-mode-fall-" + "back-behaviour-when-using-jit") + msg = ("\nFall-back from the nopython compilation path to the " + "object mode compilation path has been detected, this is " + "deprecated behaviour.\n\nFor more information visit %s" % + url) + warnings.warn(errors.NumbaDeprecationWarning(msg, + state.func_ir.loc)) + if state.flags.release_gil: + warn_msg = ("Code running in object mode won't allow parallel" + " execution despite nogil=True.") + warnings.warn_explicit(warn_msg, errors.NumbaWarning, + state.func_id.filename, + state.func_id.firstlineno) + return True diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/optional.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/optional.py new file mode 100644 index 000000000..16dcb5236 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/optional.py @@ -0,0 +1,121 @@ +import operator + +from numba.core import types, typing, cgutils + +from numba.core.imputils import (lower_cast, lower_builtin, + lower_getattr_generic, impl_ret_untracked, + lower_setattr_generic) + + +def always_return_true_impl(context, builder, sig, args): + return cgutils.true_bit + + +def always_return_false_impl(context, builder, sig, args): + return cgutils.false_bit + + +def optional_is_none(context, builder, sig, args): + """ + Check if an Optional value is invalid + """ + [lty, rty] = sig.args + [lval, rval] = args + + # Make sure None is on the right + if lty == types.none: + lty, rty = rty, lty + lval, rval = rval, lval + + opt_type = lty + opt_val = lval + + opt = context.make_helper(builder, opt_type, opt_val) + res = builder.not_(cgutils.as_bool_bit(builder, opt.valid)) + return impl_ret_untracked(context, builder, sig.return_type, res) + + +# None is/not None +lower_builtin(operator.is_, types.none, types.none)(always_return_true_impl) + +# Optional is None +lower_builtin(operator.is_, types.Optional, types.none)(optional_is_none) +lower_builtin(operator.is_, types.none, types.Optional)(optional_is_none) + + +@lower_getattr_generic(types.Optional) +def optional_getattr(context, builder, typ, value, attr): + """ + Optional.__getattr__ => redirect to the wrapped type. + """ + inner_type = typ.type + val = context.cast(builder, value, typ, inner_type) + imp = context.get_getattr(inner_type, attr) + return imp(context, builder, inner_type, val, attr) + + +@lower_setattr_generic(types.Optional) +def optional_setattr(context, builder, sig, args, attr): + """ + Optional.__setattr__ => redirect to the wrapped type. + """ + basety, valty = sig.args + target, val = args + target_type = basety.type + target = context.cast(builder, target, basety, target_type) + + newsig = typing.signature(sig.return_type, target_type, valty) + imp = context.get_setattr(attr, newsig) + return imp(builder, (target, val)) + + +@lower_cast(types.Optional, types.Optional) +def optional_to_optional(context, builder, fromty, toty, val): + """ + The handling of optional->optional cast must be special cased for + correct propagation of None value. Given type T and U. casting of + T? to U? (? denotes optional) should always succeed. If the from-value + is None, the None value the casted value (U?) should be None; otherwise, + the from-value is casted to U. This is different from casting T? to U, + which requires the from-value must not be None. + """ + optval = context.make_helper(builder, fromty, value=val) + validbit = cgutils.as_bool_bit(builder, optval.valid) + # Create uninitialized optional value + outoptval = context.make_helper(builder, toty) + + with builder.if_else(validbit) as (is_valid, is_not_valid): + with is_valid: + # Cast internal value + outoptval.valid = cgutils.true_bit + outoptval.data = context.cast(builder, optval.data, + fromty.type, toty.type) + + with is_not_valid: + # Store None to result + outoptval.valid = cgutils.false_bit + outoptval.data = cgutils.get_null_value( + outoptval.data.type) + + return outoptval._getvalue() + + +@lower_cast(types.Any, types.Optional) +def any_to_optional(context, builder, fromty, toty, val): + if fromty == types.none: + return context.make_optional_none(builder, toty.type) + else: + val = context.cast(builder, val, fromty, toty.type) + return context.make_optional_value(builder, toty.type, val) + + +@lower_cast(types.Optional, types.Any) +@lower_cast(types.Optional, types.Boolean) +def optional_to_any(context, builder, fromty, toty, val): + optval = context.make_helper(builder, fromty, value=val) + validbit = cgutils.as_bool_bit(builder, optval.valid) + with builder.if_then(builder.not_(validbit), likely=False): + msg = "expected %s, got None" % (fromty.type,) + context.call_conv.return_user_exc(builder, TypeError, (msg,)) + + return context.cast(builder, optval.data, fromty.type, toty) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/options.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/options.py new file mode 100644 index 000000000..3ca89a8e1 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/options.py @@ -0,0 +1,109 @@ +""" +Target Options +""" +import operator + +from numba.core import config, utils +from numba.core.targetconfig import TargetConfig, Option + + +class TargetOptions: + """Target options maps user options from decorators to the + ``numba.core.compiler.Flags`` used by lowering and target context. + """ + class Mapping: + def __init__(self, flag_name, apply=lambda x: x): + self.flag_name = flag_name + self.apply = apply + + def finalize(self, flags, options): + """Subclasses can override this method to make target specific + customizations of default flags. + + Parameters + ---------- + flags : Flags + options : dict + """ + pass + + @classmethod + def parse_as_flags(cls, flags, options): + """Parse target options defined in ``options`` and set ``flags`` + accordingly. + + Parameters + ---------- + flags : Flags + options : dict + """ + opt = cls() + opt._apply(flags, options) + opt.finalize(flags, options) + return flags + + def _apply(self, flags, options): + # Find all Mapping instances in the class + mappings = {} + cls = type(self) + for k in dir(cls): + v = getattr(cls, k) + if isinstance(v, cls.Mapping): + mappings[k] = v + + used = set() + for k, mapping in mappings.items(): + if k in options: + v = mapping.apply(options[k]) + setattr(flags, mapping.flag_name, v) + used.add(k) + + unused = set(options) - used + if unused: + # Unread options? + m = (f"Unrecognized options: {unused}. " + f"Known options are {mappings.keys()}") + raise KeyError(m) + + +_mapping = TargetOptions.Mapping + + +class DefaultOptions: + """Defines how user-level target options are mapped to the target flags. + """ + nopython = _mapping("enable_pyobject", operator.not_) + forceobj = _mapping("force_pyobject") + looplift = _mapping("enable_looplift") + _nrt = _mapping("nrt") + debug = _mapping("debuginfo") + boundscheck = _mapping("boundscheck") + nogil = _mapping("release_gil") + + no_rewrites = _mapping("no_rewrites") + no_cpython_wrapper = _mapping("no_cpython_wrapper") + no_cfunc_wrapper = _mapping("no_cfunc_wrapper") + + parallel = _mapping("auto_parallel") + fastmath = _mapping("fastmath") + error_model = _mapping("error_model") + inline = _mapping("inline") + forceinline = _mapping("forceinline") + + target_backend = _mapping("target_backend") + + _dbg_extend_lifetimes = _mapping("dbg_extend_lifetimes") + _dbg_optnone = _mapping("dbg_optnone") + + +def include_default_options(*args): + """Returns a mixin class with a subset of the options + + Parameters + ---------- + *args : str + Option names to include. + """ + glbs = {k: getattr(DefaultOptions, k) for k in args} + + return type("OptionMixins", (), glbs) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/overload_glue.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/overload_glue.py new file mode 100644 index 000000000..03eb97bfb --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/overload_glue.py @@ -0,0 +1,308 @@ +""" +Provides wrapper functions for "glueing" together Numba implementations that are +written in the "old" style of a separate typing and lowering implementation. +""" +import types as pytypes +import textwrap +from threading import RLock +from collections import defaultdict + +from numba.core import errors + + +class _OverloadWrapper(object): + """This class does all the work of assembling and registering wrapped split + implementations. + """ + + def __init__(self, function, typing_key=None): + assert function is not None + self._function = function + self._typing_key = typing_key + self._BIND_TYPES = dict() + self._selector = None + self._TYPER = None + # run to register overload, the intrinsic sorts out the binding to the + # registered impls at the point the overload is evaluated, i.e. this + # is all lazy. + self._build() + + def _stub_generator(self, body_func, varnames): + """This generates a function based on the argnames provided in + "varnames", the "body_func" is the function that'll type the overloaded + function and then work out which lowering to return""" + def stub(tyctx): + # body is supplied when the function is magic'd into life via glbls + return body(tyctx) # noqa: F821 + + stub_code = stub.__code__ + new_varnames = [*stub_code.co_varnames] + new_varnames.extend(varnames) + co_argcount = len(new_varnames) + co_args = [co_argcount] + additional_co_nlocals = len(varnames) + + from numba.core import utils + if utils.PYVERSION >= (3, 8): + co_args.append(stub_code.co_posonlyargcount) + co_args.append(stub_code.co_kwonlyargcount) + co_args.extend([stub_code.co_nlocals + additional_co_nlocals, + stub_code.co_stacksize, + stub_code.co_flags, + stub_code.co_code, + stub_code.co_consts, + stub_code.co_names, + tuple(new_varnames), + stub_code.co_filename, + stub_code.co_name, + stub_code.co_firstlineno, + stub_code.co_lnotab, + stub_code.co_freevars, + stub_code.co_cellvars + ]) + + new_code = pytypes.CodeType(*co_args) + + # get function + new_func = pytypes.FunctionType(new_code, {'body': body_func}) + return new_func + + def wrap_typing(self): + """ + Use this to replace @infer_global, it records the decorated function + as a typer for the argument `concrete_function`. + """ + if self._typing_key is None: + key = self._function + else: + key = self._typing_key + + def inner(typing_class): + # Note that two templates could be used for the same function, to + # avoid @infer_global etc the typing template is copied. This is to + # ensure there's a 1:1 relationship between the typing templates and + # their keys. + clazz_dict = dict(typing_class.__dict__) + clazz_dict['key'] = key + cloned = type(f"cloned_template_for_{key}", typing_class.__bases__, + clazz_dict) + self._TYPER = cloned + _overload_glue.add_no_defer(key) + self._build() + return typing_class + return inner + + def wrap_impl(self, *args): + """ + Use this to replace @lower*, it records the decorated function as the + lowering implementation + """ + assert self._TYPER is not None + + def inner(lowerer): + self._BIND_TYPES[args] = lowerer + return lowerer + return inner + + def _assemble(self): + """Assembles the OverloadSelector definitions from the registered + typing to lowering map. + """ + from numba.core.base import OverloadSelector + + if self._typing_key is None: + key = self._function + else: + key = self._typing_key + + _overload_glue.flush_deferred_lowering(key) + + self._selector = OverloadSelector() + msg = f"No entries in the typing->lowering map for {self._function}" + assert self._BIND_TYPES, msg + for sig, impl in self._BIND_TYPES.items(): + self._selector.append(impl, sig) + + def _build(self): + from numba.core.extending import overload, intrinsic + + @overload(self._function, strict=False, + jit_options={'forceinline': True}) + def ol_generated(*ol_args, **ol_kwargs): + def body(tyctx): + msg = f"No typer registered for {self._function}" + if self._TYPER is None: + raise errors.InternalError(msg) + typing = self._TYPER(tyctx) + sig = typing.apply(ol_args, ol_kwargs) + if sig is None: + # this follows convention of something not typeable + # returning None + return None + if self._selector is None: + self._assemble() + lowering = self._selector.find(sig.args) + msg = (f"Could not find implementation to lower {sig} for ", + f"{self._function}") + if lowering is None: + raise errors.InternalError(msg) + return sig, lowering + + # Need a typing context now so as to get a signature and a binding + # for the kwarg order. + from numba.core.target_extension import (dispatcher_registry, + resolve_target_str, + current_target) + disp = dispatcher_registry[resolve_target_str(current_target())] + typing_context = disp.targetdescr.typing_context + typing = self._TYPER(typing_context) + sig = typing.apply(ol_args, ol_kwargs) + if not sig: + # No signature is a typing error, there's no match, so report it + raise errors.TypingError("No match") + + # The following code branches based on whether the signature has a + # "pysig", if it does, it's from a CallableTemplate and + # specialisation is required based on precise arg/kwarg names and + # default values, if it does not, then it just requires + # specialisation based on the arg count. + # + # The "gen_var_names" function is defined to generate the variable + # names at the call site of the intrinsic. + # + # The "call_str_specific" is the list of args to the function + # returned by the @overload, it has to have matching arg names and + # kwargs names/defaults if the underlying typing template supports + # it (CallableTemplate), else it has to have a matching number of + # arguments (AbstractTemplate). The "call_str" is the list of args + # that will be passed to the intrinsic that deals with typing and + # selection of the lowering etc, so it just needs to be a list of + # the argument names. + + if sig.pysig: # CallableTemplate, has pysig + pysig_params = sig.pysig.parameters + + # Define the var names + gen_var_names = [x for x in pysig_params.keys()] + # CallableTemplate, pysig is present so generate the exact thing + # this is to permit calling with positional args specified by + # name. + buf = [] + for k, v in pysig_params.items(): + if v.default is v.empty: # no default ~= positional arg + buf.append(k) + else: # is kwarg, wire in default + buf.append(f'{k} = {v.default}') + call_str_specific = ', '.join(buf) + call_str = ', '.join(pysig_params.keys()) + else: # AbstractTemplate, need to bind 1:1 vars to the arg count + # Define the var names + gen_var_names = [f'tmp{x}' for x in range(len(ol_args))] + # Everything is just passed by position, there should be no + # kwargs. + assert not ol_kwargs + call_str_specific = ', '.join(gen_var_names) + call_str = call_str_specific + + stub = self._stub_generator(body, gen_var_names) + intrin = intrinsic(stub) + + # NOTE: The jit_wrapper functions cannot take `*args` + # albeit this an obvious choice for accepting an unknown number + # of arguments. If this is done, `*args` ends up as a cascade of + # Tuple assembling in the IR which ends up with literal + # information being lost. As a result the _exact_ argument list + # is generated to match the number of arguments and kwargs. + name = str(self._function) + # This is to name the function with something vaguely identifiable + name = ''.join([x if x not in {'>','<',' ','-','.'} else '_' + for x in name]) + gen = textwrap.dedent((""" + def jit_wrapper_{}({}): + return intrin({}) + """)).format(name, call_str_specific, call_str) + l = {} + g = {'intrin': intrin} + exec(gen, g, l) + return l['jit_wrapper_{}'.format(name)] + + +class _Gluer: + """This is a helper class to make sure that each concrete overload has only + one wrapper as the code relies on the wrapper being a singleton.""" + def __init__(self): + self._registered = dict() + self._lock = RLock() + # `_no_defer` stores keys that should not defer lowering because typing + # is already provided. + self._no_defer = set() + # `_deferred` stores lowering that must be deferred because the typing + # has not been provided. + self._deferred = defaultdict(list) + + def __call__(self, func, typing_key=None): + with self._lock: + if typing_key is None: + key = func + else: + key = typing_key + if key in self._registered: + return self._registered[key] + else: + wrapper = _OverloadWrapper(func, typing_key=typing_key) + self._registered[key] = wrapper + return wrapper + + def defer_lowering(self, key, lower_fn): + """Defer lowering of the given key and lowering function. + """ + with self._lock: + if key in self._no_defer: + # Key is marked as no defer, register lowering now + lower_fn() + else: + # Defer + self._deferred[key].append(lower_fn) + + def add_no_defer(self, key): + """Stop lowering to be deferred for the given key. + """ + with self._lock: + self._no_defer.add(key) + + def flush_deferred_lowering(self, key): + """Flush the deferred lowering for the given key. + """ + with self._lock: + deferred = self._deferred.pop(key, []) + for cb in deferred: + cb() + + +_overload_glue = _Gluer() +del _Gluer + + +def glue_typing(concrete_function, typing_key=None): + """This is a decorator for wrapping the typing part for a concrete function + 'concrete_function', it's a text-only replacement for '@infer_global'""" + return _overload_glue(concrete_function, + typing_key=typing_key).wrap_typing() + + +def glue_lowering(*args): + """This is a decorator for wrapping the implementation (lowering) part for + a concrete function. 'args[0]' is the concrete_function, 'args[1:]' are the + types the lowering will accept. This acts as a text-only replacement for + '@lower/@lower_builtin'""" + + def wrap(fn): + key = args[0] + + def real_call(): + glue = _overload_glue(args[0], typing_key=key) + return glue.wrap_impl(*args[1:])(fn) + + _overload_glue.defer_lowering(key, real_call) + return fn + return wrap diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/postproc.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/postproc.py new file mode 100644 index 000000000..a43c601df --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/postproc.py @@ -0,0 +1,238 @@ +from numba.core import utils, ir, analysis, transforms, ir_utils + + +class YieldPoint(object): + + def __init__(self, block, inst): + assert isinstance(block, ir.Block) + assert isinstance(inst, ir.Yield) + self.block = block + self.inst = inst + self.live_vars = None + self.weak_live_vars = None + + +class GeneratorInfo(object): + + def __init__(self): + # { index: YieldPoint } + self.yield_points = {} + # Ordered list of variable names + self.state_vars = [] + + def get_yield_points(self): + """ + Return an iterable of YieldPoint instances. + """ + return self.yield_points.values() + + +class VariableLifetime(object): + """ + For lazily building information of variable lifetime + """ + def __init__(self, blocks): + self._blocks = blocks + + @utils.cached_property + def cfg(self): + return analysis.compute_cfg_from_blocks(self._blocks) + + @utils.cached_property + def usedefs(self): + return analysis.compute_use_defs(self._blocks) + + @utils.cached_property + def livemap(self): + return analysis.compute_live_map(self.cfg, self._blocks, + self.usedefs.usemap, + self.usedefs.defmap) + + @utils.cached_property + def deadmaps(self): + return analysis.compute_dead_maps(self.cfg, self._blocks, self.livemap, + self.usedefs.defmap) + + +# other packages that define new nodes add calls for inserting dels +# format: {type:function} +ir_extension_insert_dels = {} + + +class PostProcessor(object): + """ + A post-processor for Numba IR. + """ + + def __init__(self, func_ir): + self.func_ir = func_ir + + def run(self, emit_dels: bool = False, extend_lifetimes: bool = False): + """ + Run the following passes over Numba IR: + - canonicalize the CFG + - emit explicit `del` instructions for variables + - compute lifetime of variables + - compute generator info (if function is a generator function) + """ + self.func_ir.blocks = transforms.canonicalize_cfg(self.func_ir.blocks) + vlt = VariableLifetime(self.func_ir.blocks) + self.func_ir.variable_lifetime = vlt + + bev = analysis.compute_live_variables(vlt.cfg, self.func_ir.blocks, + vlt.usedefs.defmap, + vlt.deadmaps.combined) + for offset, ir_block in self.func_ir.blocks.items(): + self.func_ir.block_entry_vars[ir_block] = bev[offset] + + if self.func_ir.is_generator: + self.func_ir.generator_info = GeneratorInfo() + self._compute_generator_info() + else: + self.func_ir.generator_info = None + + # Emit del nodes, do this last as the generator info parsing generates + # and then strips dels as part of its analysis. + if emit_dels: + self._insert_var_dels(extend_lifetimes=extend_lifetimes) + + def _populate_generator_info(self): + """ + Fill `index` for the Yield instruction and create YieldPoints. + """ + dct = self.func_ir.generator_info.yield_points + assert not dct, 'rerunning _populate_generator_info' + for block in self.func_ir.blocks.values(): + for inst in block.body: + if isinstance(inst, ir.Assign): + yieldinst = inst.value + if isinstance(yieldinst, ir.Yield): + index = len(dct) + 1 + yieldinst.index = index + yp = YieldPoint(block, yieldinst) + dct[yieldinst.index] = yp + + def _compute_generator_info(self): + """ + Compute the generator's state variables as the union of live variables + at all yield points. + """ + # generate del info, it's used in analysis here, strip it out at the end + self._insert_var_dels() + self._populate_generator_info() + gi = self.func_ir.generator_info + for yp in gi.get_yield_points(): + live_vars = set(self.func_ir.get_block_entry_vars(yp.block)) + weak_live_vars = set() + stmts = iter(yp.block.body) + for stmt in stmts: + if isinstance(stmt, ir.Assign): + if stmt.value is yp.inst: + break + live_vars.add(stmt.target.name) + elif isinstance(stmt, ir.Del): + live_vars.remove(stmt.value) + else: + assert 0, "couldn't find yield point" + # Try to optimize out any live vars that are deleted immediately + # after the yield point. + for stmt in stmts: + if isinstance(stmt, ir.Del): + name = stmt.value + if name in live_vars: + live_vars.remove(name) + weak_live_vars.add(name) + else: + break + yp.live_vars = live_vars + yp.weak_live_vars = weak_live_vars + + st = set() + for yp in gi.get_yield_points(): + st |= yp.live_vars + st |= yp.weak_live_vars + gi.state_vars = sorted(st) + self.remove_dels() + + def _insert_var_dels(self, extend_lifetimes=False): + """ + Insert del statements for each variable. + Returns a 2-tuple of (variable definition map, variable deletion map) + which indicates variables defined and deleted in each block. + + The algorithm avoids relying on explicit knowledge on loops and + distinguish between variables that are defined locally vs variables that + come from incoming blocks. + We start with simple usage (variable reference) and definition (variable + creation) maps on each block. Propagate the liveness info to predecessor + blocks until it stabilize, at which point we know which variables must + exist before entering each block. Then, we compute the end of variable + lives and insert del statements accordingly. Variables are deleted after + the last use. Variable referenced by terminators (e.g. conditional + branch and return) are deleted by the successors or the caller. + """ + vlt = self.func_ir.variable_lifetime + self._patch_var_dels(vlt.deadmaps.internal, vlt.deadmaps.escaping, + extend_lifetimes=extend_lifetimes) + + def _patch_var_dels(self, internal_dead_map, escaping_dead_map, + extend_lifetimes=False): + """ + Insert delete in each block + """ + for offset, ir_block in self.func_ir.blocks.items(): + # for each internal var, insert delete after the last use + internal_dead_set = internal_dead_map[offset].copy() + delete_pts = [] + # for each statement in reverse order + for stmt in reversed(ir_block.body[:-1]): + # internal vars that are used here + live_set = set(v.name for v in stmt.list_vars()) + dead_set = live_set & internal_dead_set + for T, def_func in ir_extension_insert_dels.items(): + if isinstance(stmt, T): + done_dels = def_func(stmt, dead_set) + dead_set -= done_dels + internal_dead_set -= done_dels + # used here but not afterwards + delete_pts.append((stmt, dead_set)) + internal_dead_set -= dead_set + + # rewrite body and insert dels + body = [] + lastloc = ir_block.loc + del_store = [] + for stmt, delete_set in reversed(delete_pts): + # If using extended lifetimes then the Dels are all put at the + # block end just ahead of the terminator, so associate their + # location with the terminator. + if extend_lifetimes: + lastloc = ir_block.body[-1].loc + else: + lastloc = stmt.loc + # Ignore dels (assuming no user inserted deletes) + if not isinstance(stmt, ir.Del): + body.append(stmt) + # note: the reverse sort is not necessary for correctness + # it is just to minimize changes to test for now + for var_name in sorted(delete_set, reverse=True): + delnode = ir.Del(var_name, loc=lastloc) + if extend_lifetimes: + del_store.append(delnode) + else: + body.append(delnode) + if extend_lifetimes: + body.extend(del_store) + body.append(ir_block.body[-1]) # terminator + ir_block.body = body + + # vars to delete at the start + escape_dead_set = escaping_dead_map[offset] + for var_name in sorted(escape_dead_set): + ir_block.prepend(ir.Del(var_name, loc=ir_block.body[0].loc)) + + def remove_dels(self): + """ + Strips the IR of Del nodes + """ + ir_utils.remove_dels(self.func_ir.blocks) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/pylowering.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/pylowering.py new file mode 100644 index 000000000..017124c63 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/pylowering.py @@ -0,0 +1,655 @@ +""" +Lowering implementation for object mode. +""" + + +import builtins +import operator +import inspect + +import llvmlite.ir + +from numba.core import types, utils, ir, generators, cgutils +from numba.core.errors import (ForbiddenConstruct, LoweringError, + NumbaNotImplementedError) +from numba.core.lowering import BaseLower + + +# Issue #475: locals() is unsupported as calling it naively would give +# out wrong results. +_unsupported_builtins = set([locals]) + +# Map operators to methods on the PythonAPI class +PYTHON_BINOPMAP = { + operator.add: ("number_add", False), + operator.sub: ("number_subtract", False), + operator.mul: ("number_multiply", False), + operator.truediv: ("number_truedivide", False), + operator.floordiv: ("number_floordivide", False), + operator.mod: ("number_remainder", False), + operator.pow: ("number_power", False), + operator.lshift: ("number_lshift", False), + operator.rshift: ("number_rshift", False), + operator.and_: ("number_and", False), + operator.or_: ("number_or", False), + operator.xor: ("number_xor", False), + # inplace operators + operator.iadd: ("number_add", True), + operator.isub: ("number_subtract", True), + operator.imul: ("number_multiply", True), + operator.itruediv: ("number_truedivide", True), + operator.ifloordiv: ("number_floordivide", True), + operator.imod: ("number_remainder", True), + operator.ipow: ("number_power", True), + operator.ilshift: ("number_lshift", True), + operator.irshift: ("number_rshift", True), + operator.iand: ("number_and", True), + operator.ior: ("number_or", True), + operator.ixor: ("number_xor", True), +} + +PYTHON_BINOPMAP[operator.matmul] = ("number_matrix_multiply", False) +PYTHON_BINOPMAP[operator.imatmul] = ("number_matrix_multiply", True) + +PYTHON_COMPAREOPMAP = { + operator.eq: '==', + operator.ne: '!=', + operator.lt: '<', + operator.le: '<=', + operator.gt: '>', + operator.ge: '>=', + operator.is_: 'is', + operator.is_not: 'is not', + operator.contains: 'in' +} + +class PyLower(BaseLower): + + GeneratorLower = generators.PyGeneratorLower + + def init(self): + # Strings to be frozen into the Environment object + self._frozen_strings = set() + + self._live_vars = set() + + def pre_lower(self): + super(PyLower, self).pre_lower() + self.init_pyapi() + + def post_lower(self): + pass + + def pre_block(self, block): + self.init_vars(block) + + def lower_inst(self, inst): + if isinstance(inst, ir.Assign): + value = self.lower_assign(inst) + self.storevar(value, inst.target.name) + + elif isinstance(inst, ir.SetItem): + target = self.loadvar(inst.target.name) + index = self.loadvar(inst.index.name) + value = self.loadvar(inst.value.name) + ok = self.pyapi.object_setitem(target, index, value) + self.check_int_status(ok) + + elif isinstance(inst, ir.DelItem): + target = self.loadvar(inst.target.name) + index = self.loadvar(inst.index.name) + ok = self.pyapi.object_delitem(target, index) + self.check_int_status(ok) + + elif isinstance(inst, ir.SetAttr): + target = self.loadvar(inst.target.name) + value = self.loadvar(inst.value.name) + ok = self.pyapi.object_setattr(target, + self._freeze_string(inst.attr), + value) + self.check_int_status(ok) + + elif isinstance(inst, ir.DelAttr): + target = self.loadvar(inst.target.name) + ok = self.pyapi.object_delattr(target, + self._freeze_string(inst.attr)) + self.check_int_status(ok) + + elif isinstance(inst, ir.StoreMap): + dct = self.loadvar(inst.dct.name) + key = self.loadvar(inst.key.name) + value = self.loadvar(inst.value.name) + ok = self.pyapi.dict_setitem(dct, key, value) + self.check_int_status(ok) + + elif isinstance(inst, ir.Return): + retval = self.loadvar(inst.value.name) + if self.generator_info: + # StopIteration + # We own a reference to the "return value", but we + # don't return it. + self.pyapi.decref(retval) + self.genlower.return_from_generator(self) + return + # No need to incref() as the reference is already owned. + self.call_conv.return_value(self.builder, retval) + + elif isinstance(inst, ir.Branch): + cond = self.loadvar(inst.cond.name) + if cond.type == llvmlite.ir.IntType(1): + istrue = cond + else: + istrue = self.pyapi.object_istrue(cond) + zero = llvmlite.ir.Constant(istrue.type, None) + pred = self.builder.icmp_unsigned('!=', istrue, zero) + tr = self.blkmap[inst.truebr] + fl = self.blkmap[inst.falsebr] + self.builder.cbranch(pred, tr, fl) + + elif isinstance(inst, ir.Jump): + target = self.blkmap[inst.target] + self.builder.branch(target) + + elif isinstance(inst, ir.Del): + self.delvar(inst.value) + + elif isinstance(inst, ir.PopBlock): + pass # this is just a marker + + elif isinstance(inst, ir.Raise): + if inst.exception is not None: + exc = self.loadvar(inst.exception.name) + # A reference will be stolen by raise_object() and another + # by return_exception_raised(). + self.incref(exc) + else: + exc = None + self.pyapi.raise_object(exc) + self.return_exception_raised() + + else: + msg = f"{type(inst)}, {inst}" + raise NumbaNotImplementedError(msg) + + @utils.cached_property + def _omitted_typobj(self): + """Return a `OmittedArg` type instance as a LLVM value suitable for + testing at runtime. + """ + from numba.core.dispatcher import OmittedArg + return self.pyapi.unserialize( + self.pyapi.serialize_object(OmittedArg)) + + def lower_assign(self, inst): + """ + The returned object must have a new reference + """ + value = inst.value + if isinstance(value, (ir.Const, ir.FreeVar)): + return self.lower_const(value.value) + elif isinstance(value, ir.Var): + val = self.loadvar(value.name) + self.incref(val) + return val + elif isinstance(value, ir.Expr): + return self.lower_expr(value) + elif isinstance(value, ir.Global): + return self.lower_global(value.name, value.value) + elif isinstance(value, ir.Yield): + return self.lower_yield(value) + elif isinstance(value, ir.Arg): + param = self.func_ir.func_id.pysig.parameters.get(value.name) + + obj = self.fnargs[value.index] + slot = cgutils.alloca_once_value(self.builder, obj) + # Don't check for OmittedArg unless the argument has a default + if param is not None and param.default is inspect.Parameter.empty: + self.incref(obj) + self.builder.store(obj, slot) + else: + # When an argument is omitted, the dispatcher hands it as + # _OmittedArg() + typobj = self.pyapi.get_type(obj) + is_omitted = self.builder.icmp_unsigned('==', typobj, + self._omitted_typobj) + with self.builder.if_else(is_omitted, likely=False) as (omitted, present): + with present: + self.incref(obj) + self.builder.store(obj, slot) + with omitted: + # The argument is omitted => get the default value + obj = self.pyapi.object_getattr_string(obj, 'value') + self.builder.store(obj, slot) + + return self.builder.load(slot) + else: + raise NotImplementedError(type(value), value) + + def lower_yield(self, inst): + yp = self.generator_info.yield_points[inst.index] + assert yp.inst is inst + self.genlower.init_generator_state(self) + + # Save live vars in state + # We also need to save live vars that are del'ed afterwards. + y = generators.LowerYield(self, yp, yp.live_vars | yp.weak_live_vars) + y.lower_yield_suspend() + # Yield to caller + val = self.loadvar(inst.value.name) + # Let caller own the reference + self.pyapi.incref(val) + self.call_conv.return_value(self.builder, val) + + # Resumption point + y.lower_yield_resume() + # None is returned by the yield expression + return self.pyapi.make_none() + + def lower_binop(self, expr, op, inplace=False): + lhs = self.loadvar(expr.lhs.name) + rhs = self.loadvar(expr.rhs.name) + assert not isinstance(op, str) + if op in PYTHON_BINOPMAP: + fname, inplace = PYTHON_BINOPMAP[op] + fn = getattr(self.pyapi, fname) + res = fn(lhs, rhs, inplace=inplace) + else: + # Assumed to be rich comparison + fn = PYTHON_COMPAREOPMAP.get(expr.fn, expr.fn) + if fn == 'in': # 'in' and operator.contains have args reversed + lhs, rhs = rhs, lhs + res = self.pyapi.object_richcompare(lhs, rhs, fn) + self.check_error(res) + return res + + def lower_expr(self, expr): + if expr.op == 'binop': + return self.lower_binop(expr, expr.fn, inplace=False) + elif expr.op == 'inplace_binop': + return self.lower_binop(expr, expr.fn, inplace=True) + elif expr.op == 'unary': + value = self.loadvar(expr.value.name) + if expr.fn == operator.neg: + res = self.pyapi.number_negative(value) + elif expr.fn == operator.pos: + res = self.pyapi.number_positive(value) + elif expr.fn == operator.not_: + res = self.pyapi.object_not(value) + self.check_int_status(res) + res = self.pyapi.bool_from_bool(res) + elif expr.fn == operator.invert: + res = self.pyapi.number_invert(value) + else: + raise NotImplementedError(expr) + self.check_error(res) + return res + elif expr.op == 'call': + argvals = [self.loadvar(a.name) for a in expr.args] + fn = self.loadvar(expr.func.name) + args = self.pyapi.tuple_pack(argvals) + if expr.vararg: + # Expand *args + varargs = self.pyapi.sequence_tuple( + self.loadvar(expr.vararg.name)) + new_args = self.pyapi.sequence_concat(args, varargs) + self.decref(varargs) + self.decref(args) + args = new_args + if not expr.kws: + # No named arguments + ret = self.pyapi.call(fn, args, None) + else: + # Named arguments + keyvalues = [(k, self.loadvar(v.name)) for k, v in expr.kws] + kws = self.pyapi.dict_pack(keyvalues) + ret = self.pyapi.call(fn, args, kws) + self.decref(kws) + self.decref(args) + self.check_error(ret) + return ret + elif expr.op == 'getattr': + obj = self.loadvar(expr.value.name) + res = self.pyapi.object_getattr(obj, self._freeze_string(expr.attr)) + self.check_error(res) + return res + elif expr.op == 'build_tuple': + items = [self.loadvar(it.name) for it in expr.items] + res = self.pyapi.tuple_pack(items) + self.check_error(res) + return res + elif expr.op == 'build_list': + items = [self.loadvar(it.name) for it in expr.items] + res = self.pyapi.list_pack(items) + self.check_error(res) + return res + elif expr.op == 'build_map': + res = self.pyapi.dict_new(expr.size) + self.check_error(res) + for k, v in expr.items: + key = self.loadvar(k.name) + value = self.loadvar(v.name) + ok = self.pyapi.dict_setitem(res, key, value) + self.check_int_status(ok) + return res + elif expr.op == 'build_set': + items = [self.loadvar(it.name) for it in expr.items] + res = self.pyapi.set_new() + self.check_error(res) + for it in items: + ok = self.pyapi.set_add(res, it) + self.check_int_status(ok) + return res + elif expr.op == 'getiter': + obj = self.loadvar(expr.value.name) + res = self.pyapi.object_getiter(obj) + self.check_error(res) + return res + elif expr.op == 'iternext': + iterobj = self.loadvar(expr.value.name) + item = self.pyapi.iter_next(iterobj) + is_valid = cgutils.is_not_null(self.builder, item) + pair = self.pyapi.tuple_new(2) + with self.builder.if_else(is_valid) as (then, otherwise): + with then: + self.pyapi.tuple_setitem(pair, 0, item) + with otherwise: + self.check_occurred() + # Make the tuple valid by inserting None as dummy + # iteration "result" (it will be ignored). + self.pyapi.tuple_setitem(pair, 0, self.pyapi.make_none()) + self.pyapi.tuple_setitem(pair, 1, self.pyapi.bool_from_bool(is_valid)) + return pair + elif expr.op == 'pair_first': + pair = self.loadvar(expr.value.name) + first = self.pyapi.tuple_getitem(pair, 0) + self.incref(first) + return first + elif expr.op == 'pair_second': + pair = self.loadvar(expr.value.name) + second = self.pyapi.tuple_getitem(pair, 1) + self.incref(second) + return second + elif expr.op == 'exhaust_iter': + iterobj = self.loadvar(expr.value.name) + tup = self.pyapi.sequence_tuple(iterobj) + self.check_error(tup) + # Check tuple size is as expected + tup_size = self.pyapi.tuple_size(tup) + expected_size = self.context.get_constant(types.intp, expr.count) + has_wrong_size = self.builder.icmp_unsigned('!=', + tup_size, expected_size) + with cgutils.if_unlikely(self.builder, has_wrong_size): + self.return_exception(ValueError) + return tup + elif expr.op == 'getitem': + value = self.loadvar(expr.value.name) + index = self.loadvar(expr.index.name) + res = self.pyapi.object_getitem(value, index) + self.check_error(res) + return res + elif expr.op == 'static_getitem': + value = self.loadvar(expr.value.name) + index = self.context.get_constant(types.intp, expr.index) + indexobj = self.pyapi.long_from_ssize_t(index) + self.check_error(indexobj) + res = self.pyapi.object_getitem(value, indexobj) + self.decref(indexobj) + self.check_error(res) + return res + elif expr.op == 'getslice': + target = self.loadvar(expr.target.name) + start = self.loadvar(expr.start.name) + stop = self.loadvar(expr.stop.name) + + slicefn = self.get_builtin_obj("slice") + sliceobj = self.pyapi.call_function_objargs(slicefn, (start, stop)) + self.decref(slicefn) + self.check_error(sliceobj) + + res = self.pyapi.object_getitem(target, sliceobj) + self.check_error(res) + + return res + + elif expr.op == 'cast': + val = self.loadvar(expr.value.name) + self.incref(val) + return val + elif expr.op == 'phi': + raise LoweringError("PHI not stripped") + + elif expr.op == 'null': + # Make null value + return cgutils.get_null_value(self.pyapi.pyobj) + + else: + raise NotImplementedError(expr) + + def lower_const(self, const): + # All constants are frozen inside the environment + index = self.env_manager.add_const(const) + ret = self.env_manager.read_const(index) + self.check_error(ret) + self.incref(ret) + return ret + + def lower_global(self, name, value): + """ + 1) Check global scope dictionary. + 2) Check __builtins__. + 2a) is it a dictionary (for non __main__ module) + 2b) is it a module (for __main__ module) + """ + moddict = self.get_module_dict() + obj = self.pyapi.dict_getitem(moddict, self._freeze_string(name)) + self.incref(obj) # obj is borrowed + + try: + if value in _unsupported_builtins: + raise ForbiddenConstruct("builtins %s() is not supported" + % name, loc=self.loc) + except TypeError: + # `value` is unhashable, ignore + pass + + if hasattr(builtins, name): + obj_is_null = self.is_null(obj) + bbelse = self.builder.basic_block + + with self.builder.if_then(obj_is_null): + mod = self.pyapi.dict_getitem(moddict, + self._freeze_string("__builtins__")) + builtin = self.builtin_lookup(mod, name) + bbif = self.builder.basic_block + + retval = self.builder.phi(self.pyapi.pyobj) + retval.add_incoming(obj, bbelse) + retval.add_incoming(builtin, bbif) + + else: + retval = obj + with cgutils.if_unlikely(self.builder, self.is_null(retval)): + self.pyapi.raise_missing_global_error(name) + self.return_exception_raised() + + return retval + + # ------------------------------------------------------------------------- + + def get_module_dict(self): + return self.env_body.globals + + def get_builtin_obj(self, name): + # XXX The builtins dict could be bound into the environment + moddict = self.get_module_dict() + mod = self.pyapi.dict_getitem(moddict, + self._freeze_string("__builtins__")) + return self.builtin_lookup(mod, name) + + def builtin_lookup(self, mod, name): + """ + Args + ---- + mod: + The __builtins__ dictionary or module, as looked up in + a module's globals. + name: str + The object to lookup + """ + fromdict = self.pyapi.dict_getitem(mod, self._freeze_string(name)) + self.incref(fromdict) # fromdict is borrowed + bbifdict = self.builder.basic_block + + with cgutils.if_unlikely(self.builder, self.is_null(fromdict)): + # This happen if we are using the __main__ module + frommod = self.pyapi.object_getattr(mod, self._freeze_string(name)) + + with cgutils.if_unlikely(self.builder, self.is_null(frommod)): + self.pyapi.raise_missing_global_error(name) + self.return_exception_raised() + + bbifmod = self.builder.basic_block + + builtin = self.builder.phi(self.pyapi.pyobj) + builtin.add_incoming(fromdict, bbifdict) + builtin.add_incoming(frommod, bbifmod) + + return builtin + + def check_occurred(self): + """ + Return if an exception occurred. + """ + err_occurred = cgutils.is_not_null(self.builder, + self.pyapi.err_occurred()) + + with cgutils.if_unlikely(self.builder, err_occurred): + self.return_exception_raised() + + def check_error(self, obj): + """ + Return if *obj* is NULL. + """ + with cgutils.if_unlikely(self.builder, self.is_null(obj)): + self.return_exception_raised() + + return obj + + def check_int_status(self, num, ok_value=0): + """ + Raise an exception if *num* is smaller than *ok_value*. + """ + ok = llvmlite.ir.Constant(num.type, ok_value) + pred = self.builder.icmp_signed('<', num, ok) + with cgutils.if_unlikely(self.builder, pred): + self.return_exception_raised() + + def is_null(self, obj): + return cgutils.is_null(self.builder, obj) + + def return_exception_raised(self): + """ + Return with the currently raised exception. + """ + self.cleanup_vars() + self.call_conv.return_exc(self.builder) + + def init_vars(self, block): + """ + Initialize live variables for *block*. + """ + self._live_vars = set(self.func_ir.get_block_entry_vars(block)) + + def _getvar(self, name, ltype=None): + if name not in self.varmap: + self.varmap[name] = self.alloca(name, ltype=ltype) + return self.varmap[name] + + def loadvar(self, name): + """ + Load the llvm value of the variable named *name*. + """ + # If this raises then the live variables analysis is wrong + assert name in self._live_vars, name + ptr = self.varmap[name] + val = self.builder.load(ptr) + with cgutils.if_unlikely(self.builder, self.is_null(val)): + self.pyapi.raise_missing_name_error(name) + self.return_exception_raised() + return val + + def delvar(self, name): + """ + Delete the variable slot with the given name. This will decref + the corresponding Python object. + """ + # If this raises then the live variables analysis is wrong + self._live_vars.remove(name) + ptr = self._getvar(name) # initializes `name` if not already + self.decref(self.builder.load(ptr)) + # This is a safety guard against double decref's, but really + # the IR should be correct and have only one Del per variable + # and code path. + self.builder.store(cgutils.get_null_value(ptr.type.pointee), ptr) + + def storevar(self, value, name, clobber=False): + """ + Stores a llvm value and allocate stack slot if necessary. + The llvm value can be of arbitrary type. + """ + is_redefine = name in self._live_vars and not clobber + ptr = self._getvar(name, ltype=value.type) + if is_redefine: + old = self.builder.load(ptr) + else: + self._live_vars.add(name) + assert value.type == ptr.type.pointee, (str(value.type), + str(ptr.type.pointee)) + self.builder.store(value, ptr) + # Safe to call decref even on non python object + if is_redefine: + self.decref(old) + + def cleanup_vars(self): + """ + Cleanup live variables. + """ + for name in self._live_vars: + ptr = self._getvar(name) + self.decref(self.builder.load(ptr)) + + def alloca(self, name, ltype=None): + """ + Allocate a stack slot and initialize it to NULL. + The default is to allocate a pyobject pointer. + Use ``ltype`` to override. + """ + if ltype is None: + ltype = self.context.get_value_type(types.pyobject) + with self.builder.goto_block(self.entry_block): + ptr = self.builder.alloca(ltype, name=name) + self.builder.store(cgutils.get_null_value(ltype), ptr) + return ptr + + def _alloca_var(self, name, fetype): + # This is here for API compatibility with lowering.py::Lower. + # NOTE: fetype is unused + return self.alloca(name) + + def incref(self, value): + self.pyapi.incref(value) + + def decref(self, value): + """ + This is allow to be called on non pyobject pointer, in which case + no code is inserted. + """ + lpyobj = self.context.get_value_type(types.pyobject) + if value.type == lpyobj: + self.pyapi.decref(value) + + def _freeze_string(self, string): + """ + Freeze a Python string object into the code. + """ + return self.lower_const(string) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/pythonapi.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/pythonapi.py new file mode 100644 index 000000000..5ea6dc222 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/pythonapi.py @@ -0,0 +1,1686 @@ +from collections import namedtuple +import contextlib +import pickle +import hashlib +import sys + +from llvmlite import ir +from llvmlite.ir import Constant + +import ctypes +from numba import _helperlib +from numba.core import ( + types, utils, config, lowering, cgutils, imputils, serialize, +) + +PY_UNICODE_1BYTE_KIND = _helperlib.py_unicode_1byte_kind +PY_UNICODE_2BYTE_KIND = _helperlib.py_unicode_2byte_kind +PY_UNICODE_4BYTE_KIND = _helperlib.py_unicode_4byte_kind +PY_UNICODE_WCHAR_KIND = _helperlib.py_unicode_wchar_kind + + +class _Registry(object): + + def __init__(self): + self.functions = {} + + def register(self, typeclass): + assert issubclass(typeclass, types.Type) + def decorator(func): + if typeclass in self.functions: + raise KeyError("duplicate registration for %s" % (typeclass,)) + self.functions[typeclass] = func + return func + return decorator + + def lookup(self, typeclass, default=None): + assert issubclass(typeclass, types.Type) + for cls in typeclass.__mro__: + func = self.functions.get(cls) + if func is not None: + return func + return default + +# Registries of boxing / unboxing implementations +_boxers = _Registry() +_unboxers = _Registry() +_reflectors = _Registry() + +box = _boxers.register +unbox = _unboxers.register +reflect = _reflectors.register + +class _BoxContext(namedtuple("_BoxContext", + ("context", "builder", "pyapi", "env_manager"))): + """ + The facilities required by boxing implementations. + """ + __slots__ = () + + def box(self, typ, val): + return self.pyapi.from_native_value(typ, val, self.env_manager) + + +class _UnboxContext(namedtuple("_UnboxContext", + ("context", "builder", "pyapi"))): + """ + The facilities required by unboxing implementations. + """ + __slots__ = () + + def unbox(self, typ, obj): + return self.pyapi.to_native_value(typ, obj) + + +class _ReflectContext(namedtuple("_ReflectContext", + ("context", "builder", "pyapi", "env_manager", + "is_error"))): + """ + The facilities required by reflection implementations. + """ + __slots__ = () + + # XXX the error bit is currently unused by consumers (e.g. PyCallWrapper) + def set_error(self): + self.builder.store(self.is_error, cgutils.true_bit) + + def box(self, typ, val): + return self.pyapi.from_native_value(typ, val, self.env_manager) + + def reflect(self, typ, val): + return self.pyapi.reflect_native_value(typ, val, self.env_manager) + + +class NativeValue(object): + """ + Encapsulate the result of converting a Python object to a native value, + recording whether the conversion was successful and how to cleanup. + """ + + def __init__(self, value, is_error=None, cleanup=None): + self.value = value + self.is_error = is_error if is_error is not None else cgutils.false_bit + self.cleanup = cleanup + + +class EnvironmentManager(object): + + def __init__(self, pyapi, env, env_body, env_ptr): + assert isinstance(env, lowering.Environment) + self.pyapi = pyapi + self.env = env + self.env_body = env_body + self.env_ptr = env_ptr + + def add_const(self, const): + """ + Add a constant to the environment, return its index. + """ + # All constants are frozen inside the environment + if isinstance(const, str): + const = sys.intern(const) + for index, val in enumerate(self.env.consts): + if val is const: + break + else: + index = len(self.env.consts) + self.env.consts.append(const) + return index + + def read_const(self, index): + """ + Look up constant number *index* inside the environment body. + A borrowed reference is returned. + + The returned LLVM value may have NULL value at runtime which indicates + an error at runtime. + """ + assert index < len(self.env.consts) + + builder = self.pyapi.builder + consts = self.env_body.consts + ret = cgutils.alloca_once(builder, self.pyapi.pyobj, zfill=True) + with builder.if_else(cgutils.is_not_null(builder, consts)) as \ + (br_not_null, br_null): + with br_not_null: + getitem = self.pyapi.list_getitem(consts, index) + builder.store(getitem, ret) + with br_null: + # This can happen when the Environment is accidentally released + # and has subsequently been garbage collected. + self.pyapi.err_set_string( + "PyExc_RuntimeError", + "`env.consts` is NULL in `read_const`", + ) + return builder.load(ret) + + +_IteratorLoop = namedtuple('_IteratorLoop', ('value', 'do_break')) + + +class PythonAPI(object): + """ + Code generation facilities to call into the CPython C API (and related + helpers). + """ + + def __init__(self, context, builder): + """ + Note: Maybe called multiple times when lowering a function + """ + self.context = context + self.builder = builder + + self.module = builder.basic_block.function.module + # A unique mapping of serialized objects in this module + try: + self.module.__serialized + except AttributeError: + self.module.__serialized = {} + + # Initialize types + self.pyobj = self.context.get_argument_type(types.pyobject) + self.pyobjptr = self.pyobj.as_pointer() + self.voidptr = ir.PointerType(ir.IntType(8)) + self.long = ir.IntType(ctypes.sizeof(ctypes.c_long) * 8) + self.ulong = self.long + self.longlong = ir.IntType(ctypes.sizeof(ctypes.c_ulonglong) * 8) + self.ulonglong = self.longlong + self.double = ir.DoubleType() + self.py_ssize_t = self.context.get_value_type(types.intp) + self.cstring = ir.PointerType(ir.IntType(8)) + self.gil_state = ir.IntType(_helperlib.py_gil_state_size * 8) + self.py_buffer_t = ir.ArrayType(ir.IntType(8), _helperlib.py_buffer_size) + self.py_hash_t = self.py_ssize_t + self.py_unicode_1byte_kind = _helperlib.py_unicode_1byte_kind + self.py_unicode_2byte_kind = _helperlib.py_unicode_2byte_kind + self.py_unicode_4byte_kind = _helperlib.py_unicode_4byte_kind + self.py_unicode_wchar_kind = _helperlib.py_unicode_wchar_kind + + def get_env_manager(self, env, env_body, env_ptr): + return EnvironmentManager(self, env, env_body, env_ptr) + + def emit_environment_sentry(self, envptr, return_pyobject=False, + debug_msg=''): + """Emits LLVM code to ensure the `envptr` is not NULL + """ + is_null = cgutils.is_null(self.builder, envptr) + with cgutils.if_unlikely(self.builder, is_null): + if return_pyobject: + fnty = self.builder.function.type.pointee + assert fnty.return_type == self.pyobj + self.err_set_string( + "PyExc_RuntimeError", f"missing Environment: {debug_msg}", + ) + self.builder.ret(self.get_null_object()) + else: + self.context.call_conv.return_user_exc( + self.builder, RuntimeError, + (f"missing Environment: {debug_msg}",), + ) + + # ------ Python API ----- + + # + # Basic object API + # + + def incref(self, obj): + fnty = ir.FunctionType(ir.VoidType(), [self.pyobj]) + fn = self._get_function(fnty, name="Py_IncRef") + self.builder.call(fn, [obj]) + + def decref(self, obj): + fnty = ir.FunctionType(ir.VoidType(), [self.pyobj]) + fn = self._get_function(fnty, name="Py_DecRef") + self.builder.call(fn, [obj]) + + def get_type(self, obj): + fnty = ir.FunctionType(self.pyobj, [self.pyobj]) + fn = self._get_function(fnty, name="numba_py_type") + return self.builder.call(fn, [obj]) + + # + # Argument unpacking + # + + def parse_tuple_and_keywords(self, args, kws, fmt, keywords, *objs): + charptr = ir.PointerType(ir.IntType(8)) + charptrary = ir.PointerType(charptr) + argtypes = [self.pyobj, self.pyobj, charptr, charptrary] + fnty = ir.FunctionType(ir.IntType(32), argtypes, var_arg=True) + fn = self._get_function(fnty, name="PyArg_ParseTupleAndKeywords") + return self.builder.call(fn, [args, kws, fmt, keywords] + list(objs)) + + def parse_tuple(self, args, fmt, *objs): + charptr = ir.PointerType(ir.IntType(8)) + argtypes = [self.pyobj, charptr] + fnty = ir.FunctionType(ir.IntType(32), argtypes, var_arg=True) + fn = self._get_function(fnty, name="PyArg_ParseTuple") + return self.builder.call(fn, [args, fmt] + list(objs)) + + def unpack_tuple(self, args, name, n_min, n_max, *objs): + charptr = ir.PointerType(ir.IntType(8)) + argtypes = [self.pyobj, charptr, self.py_ssize_t, self.py_ssize_t] + fnty = ir.FunctionType(ir.IntType(32), argtypes, var_arg=True) + fn = self._get_function(fnty, name="PyArg_UnpackTuple") + n_min = Constant(self.py_ssize_t, int(n_min)) + n_max = Constant(self.py_ssize_t, int(n_max)) + if isinstance(name, str): + name = self.context.insert_const_string(self.builder.module, name) + return self.builder.call(fn, [args, name, n_min, n_max] + list(objs)) + + # + # Exception and errors + # + + def err_occurred(self): + fnty = ir.FunctionType(self.pyobj, ()) + fn = self._get_function(fnty, name="PyErr_Occurred") + return self.builder.call(fn, ()) + + def err_clear(self): + fnty = ir.FunctionType(ir.VoidType(), ()) + fn = self._get_function(fnty, name="PyErr_Clear") + return self.builder.call(fn, ()) + + def err_set_string(self, exctype, msg): + fnty = ir.FunctionType(ir.VoidType(), [self.pyobj, self.cstring]) + fn = self._get_function(fnty, name="PyErr_SetString") + if isinstance(exctype, str): + exctype = self.get_c_object(exctype) + if isinstance(msg, str): + msg = self.context.insert_const_string(self.module, msg) + return self.builder.call(fn, (exctype, msg)) + + def err_format(self, exctype, msg, *format_args): + fnty = ir.FunctionType(ir.VoidType(), [self.pyobj, self.cstring], var_arg=True) + fn = self._get_function(fnty, name="PyErr_Format") + if isinstance(exctype, str): + exctype = self.get_c_object(exctype) + if isinstance(msg, str): + msg = self.context.insert_const_string(self.module, msg) + return self.builder.call(fn, (exctype, msg) + tuple(format_args)) + + def raise_object(self, exc=None): + """ + Raise an arbitrary exception (type or value or (type, args) + or None - if reraising). A reference to the argument is consumed. + """ + fnty = ir.FunctionType(ir.VoidType(), [self.pyobj]) + fn = self._get_function(fnty, name="numba_do_raise") + if exc is None: + exc = self.make_none() + return self.builder.call(fn, (exc,)) + + def err_set_object(self, exctype, excval): + fnty = ir.FunctionType(ir.VoidType(), [self.pyobj, self.pyobj]) + fn = self._get_function(fnty, name="PyErr_SetObject") + if isinstance(exctype, str): + exctype = self.get_c_object(exctype) + return self.builder.call(fn, (exctype, excval)) + + def err_set_none(self, exctype): + fnty = ir.FunctionType(ir.VoidType(), [self.pyobj]) + fn = self._get_function(fnty, name="PyErr_SetNone") + if isinstance(exctype, str): + exctype = self.get_c_object(exctype) + return self.builder.call(fn, (exctype,)) + + def err_write_unraisable(self, obj): + fnty = ir.FunctionType(ir.VoidType(), [self.pyobj]) + fn = self._get_function(fnty, name="PyErr_WriteUnraisable") + return self.builder.call(fn, (obj,)) + + def err_fetch(self, pty, pval, ptb): + fnty = ir.FunctionType(ir.VoidType(), [self.pyobjptr] * 3) + fn = self._get_function(fnty, name="PyErr_Fetch") + return self.builder.call(fn, (pty, pval, ptb)) + + def err_restore(self, ty, val, tb): + fnty = ir.FunctionType(ir.VoidType(), [self.pyobj] * 3) + fn = self._get_function(fnty, name="PyErr_Restore") + return self.builder.call(fn, (ty, val, tb)) + + @contextlib.contextmanager + def err_push(self, keep_new=False): + """ + Temporarily push the current error indicator while the code + block is executed. If *keep_new* is True and the code block + raises a new error, the new error is kept, otherwise the old + error indicator is restored at the end of the block. + """ + pty, pval, ptb = [cgutils.alloca_once(self.builder, self.pyobj) + for i in range(3)] + self.err_fetch(pty, pval, ptb) + yield + ty = self.builder.load(pty) + val = self.builder.load(pval) + tb = self.builder.load(ptb) + if keep_new: + new_error = cgutils.is_not_null(self.builder, self.err_occurred()) + with self.builder.if_else(new_error, likely=False) as (if_error, if_ok): + with if_error: + # Code block raised an error, keep it + self.decref(ty) + self.decref(val) + self.decref(tb) + with if_ok: + # Restore previous error + self.err_restore(ty, val, tb) + else: + self.err_restore(ty, val, tb) + + def get_c_object(self, name): + """ + Get a Python object through its C-accessible *name* + (e.g. "PyExc_ValueError"). The underlying variable must be + a `PyObject *`, and the value of that pointer is returned. + """ + # A LLVM global variable is implicitly a pointer to the declared + # type, so fix up by using pyobj.pointee. + return self.context.get_c_value(self.builder, self.pyobj.pointee, name, + dllimport=True) + + def raise_missing_global_error(self, name): + msg = "global name '%s' is not defined" % name + cstr = self.context.insert_const_string(self.module, msg) + self.err_set_string("PyExc_NameError", cstr) + + def raise_missing_name_error(self, name): + msg = "name '%s' is not defined" % name + cstr = self.context.insert_const_string(self.module, msg) + self.err_set_string("PyExc_NameError", cstr) + + def fatal_error(self, msg): + fnty = ir.FunctionType(ir.VoidType(), [self.cstring]) + fn = self._get_function(fnty, name="Py_FatalError") + fn.attributes.add("noreturn") + cstr = self.context.insert_const_string(self.module, msg) + self.builder.call(fn, (cstr,)) + + # + # Concrete dict API + # + + def dict_getitem_string(self, dic, name): + """Lookup name inside dict + + Returns a borrowed reference + """ + fnty = ir.FunctionType(self.pyobj, [self.pyobj, self.cstring]) + fn = self._get_function(fnty, name="PyDict_GetItemString") + cstr = self.context.insert_const_string(self.module, name) + return self.builder.call(fn, [dic, cstr]) + + def dict_getitem(self, dic, name): + """Lookup name inside dict + + Returns a borrowed reference + """ + fnty = ir.FunctionType(self.pyobj, [self.pyobj, self.pyobj]) + fn = self._get_function(fnty, name="PyDict_GetItem") + return self.builder.call(fn, [dic, name]) + + def dict_new(self, presize=0): + if presize == 0: + fnty = ir.FunctionType(self.pyobj, ()) + fn = self._get_function(fnty, name="PyDict_New") + return self.builder.call(fn, ()) + else: + fnty = ir.FunctionType(self.pyobj, [self.py_ssize_t]) + fn = self._get_function(fnty, name="_PyDict_NewPresized") + return self.builder.call(fn, + [Constant(self.py_ssize_t, int(presize))]) + + def dict_setitem(self, dictobj, nameobj, valobj): + fnty = ir.FunctionType(ir.IntType(32), (self.pyobj, self.pyobj, + self.pyobj)) + fn = self._get_function(fnty, name="PyDict_SetItem") + return self.builder.call(fn, (dictobj, nameobj, valobj)) + + def dict_setitem_string(self, dictobj, name, valobj): + fnty = ir.FunctionType(ir.IntType(32), (self.pyobj, self.cstring, + self.pyobj)) + fn = self._get_function(fnty, name="PyDict_SetItemString") + cstr = self.context.insert_const_string(self.module, name) + return self.builder.call(fn, (dictobj, cstr, valobj)) + + def dict_pack(self, keyvalues): + """ + Args + ----- + keyvalues: iterable of (str, llvm.Value of PyObject*) + """ + dictobj = self.dict_new() + with self.if_object_ok(dictobj): + for k, v in keyvalues: + self.dict_setitem_string(dictobj, k, v) + return dictobj + + # + # Concrete number APIs + # + + def float_from_double(self, fval): + fnty = ir.FunctionType(self.pyobj, [self.double]) + fn = self._get_function(fnty, name="PyFloat_FromDouble") + return self.builder.call(fn, [fval]) + + def number_as_ssize_t(self, numobj): + fnty = ir.FunctionType(self.py_ssize_t, [self.pyobj, self.pyobj]) + fn = self._get_function(fnty, name="PyNumber_AsSsize_t") + # We don't want any clipping, so pass OverflowError as the 2nd arg + exc_class = self.get_c_object("PyExc_OverflowError") + return self.builder.call(fn, [numobj, exc_class]) + + def number_long(self, numobj): + fnty = ir.FunctionType(self.pyobj, [self.pyobj]) + fn = self._get_function(fnty, name="PyNumber_Long") + return self.builder.call(fn, [numobj]) + + def long_as_ulonglong(self, numobj): + fnty = ir.FunctionType(self.ulonglong, [self.pyobj]) + fn = self._get_function(fnty, name="PyLong_AsUnsignedLongLong") + return self.builder.call(fn, [numobj]) + + def long_as_longlong(self, numobj): + fnty = ir.FunctionType(self.ulonglong, [self.pyobj]) + fn = self._get_function(fnty, name="PyLong_AsLongLong") + return self.builder.call(fn, [numobj]) + + def long_as_voidptr(self, numobj): + """ + Convert the given Python integer to a void*. This is recommended + over number_as_ssize_t as it isn't affected by signedness. + """ + fnty = ir.FunctionType(self.voidptr, [self.pyobj]) + fn = self._get_function(fnty, name="PyLong_AsVoidPtr") + return self.builder.call(fn, [numobj]) + + def _long_from_native_int(self, ival, func_name, native_int_type, + signed): + fnty = ir.FunctionType(self.pyobj, [native_int_type]) + fn = self._get_function(fnty, name=func_name) + resptr = cgutils.alloca_once(self.builder, self.pyobj) + fn = self._get_function(fnty, name=func_name) + self.builder.store(self.builder.call(fn, [ival]), resptr) + + return self.builder.load(resptr) + + def long_from_long(self, ival): + func_name = "PyLong_FromLong" + fnty = ir.FunctionType(self.pyobj, [self.long]) + fn = self._get_function(fnty, name=func_name) + return self.builder.call(fn, [ival]) + + def long_from_ulong(self, ival): + return self._long_from_native_int(ival, "PyLong_FromUnsignedLong", + self.long, signed=False) + + def long_from_ssize_t(self, ival): + return self._long_from_native_int(ival, "PyLong_FromSsize_t", + self.py_ssize_t, signed=True) + + def long_from_longlong(self, ival): + return self._long_from_native_int(ival, "PyLong_FromLongLong", + self.longlong, signed=True) + + def long_from_ulonglong(self, ival): + return self._long_from_native_int(ival, "PyLong_FromUnsignedLongLong", + self.ulonglong, signed=False) + + def long_from_signed_int(self, ival): + """ + Return a Python integer from any native integer value. + """ + bits = ival.type.width + if bits <= self.long.width: + return self.long_from_long(self.builder.sext(ival, self.long)) + elif bits <= self.longlong.width: + return self.long_from_longlong(self.builder.sext(ival, self.longlong)) + else: + raise OverflowError("integer too big (%d bits)" % (bits)) + + def long_from_unsigned_int(self, ival): + """ + Same as long_from_signed_int, but for unsigned values. + """ + bits = ival.type.width + if bits <= self.ulong.width: + return self.long_from_ulong(self.builder.zext(ival, self.ulong)) + elif bits <= self.ulonglong.width: + return self.long_from_ulonglong(self.builder.zext(ival, self.ulonglong)) + else: + raise OverflowError("integer too big (%d bits)" % (bits)) + + def _get_number_operator(self, name): + fnty = ir.FunctionType(self.pyobj, [self.pyobj, self.pyobj]) + fn = self._get_function(fnty, name="PyNumber_%s" % name) + return fn + + def _call_number_operator(self, name, lhs, rhs, inplace=False): + if inplace: + name = "InPlace" + name + fn = self._get_number_operator(name) + return self.builder.call(fn, [lhs, rhs]) + + def number_add(self, lhs, rhs, inplace=False): + return self._call_number_operator("Add", lhs, rhs, inplace=inplace) + + def number_subtract(self, lhs, rhs, inplace=False): + return self._call_number_operator("Subtract", lhs, rhs, inplace=inplace) + + def number_multiply(self, lhs, rhs, inplace=False): + return self._call_number_operator("Multiply", lhs, rhs, inplace=inplace) + + def number_truedivide(self, lhs, rhs, inplace=False): + return self._call_number_operator("TrueDivide", lhs, rhs, inplace=inplace) + + def number_floordivide(self, lhs, rhs, inplace=False): + return self._call_number_operator("FloorDivide", lhs, rhs, inplace=inplace) + + def number_remainder(self, lhs, rhs, inplace=False): + return self._call_number_operator("Remainder", lhs, rhs, inplace=inplace) + + def number_matrix_multiply(self, lhs, rhs, inplace=False): + return self._call_number_operator("MatrixMultiply", lhs, rhs, inplace=inplace) + + def number_lshift(self, lhs, rhs, inplace=False): + return self._call_number_operator("Lshift", lhs, rhs, inplace=inplace) + + def number_rshift(self, lhs, rhs, inplace=False): + return self._call_number_operator("Rshift", lhs, rhs, inplace=inplace) + + def number_and(self, lhs, rhs, inplace=False): + return self._call_number_operator("And", lhs, rhs, inplace=inplace) + + def number_or(self, lhs, rhs, inplace=False): + return self._call_number_operator("Or", lhs, rhs, inplace=inplace) + + def number_xor(self, lhs, rhs, inplace=False): + return self._call_number_operator("Xor", lhs, rhs, inplace=inplace) + + def number_power(self, lhs, rhs, inplace=False): + fnty = ir.FunctionType(self.pyobj, [self.pyobj] * 3) + fname = "PyNumber_InPlacePower" if inplace else "PyNumber_Power" + fn = self._get_function(fnty, fname) + return self.builder.call(fn, [lhs, rhs, self.borrow_none()]) + + def number_negative(self, obj): + fnty = ir.FunctionType(self.pyobj, [self.pyobj]) + fn = self._get_function(fnty, name="PyNumber_Negative") + return self.builder.call(fn, (obj,)) + + def number_positive(self, obj): + fnty = ir.FunctionType(self.pyobj, [self.pyobj]) + fn = self._get_function(fnty, name="PyNumber_Positive") + return self.builder.call(fn, (obj,)) + + def number_float(self, val): + fnty = ir.FunctionType(self.pyobj, [self.pyobj]) + fn = self._get_function(fnty, name="PyNumber_Float") + return self.builder.call(fn, [val]) + + def number_invert(self, obj): + fnty = ir.FunctionType(self.pyobj, [self.pyobj]) + fn = self._get_function(fnty, name="PyNumber_Invert") + return self.builder.call(fn, (obj,)) + + def float_as_double(self, fobj): + fnty = ir.FunctionType(self.double, [self.pyobj]) + fn = self._get_function(fnty, name="PyFloat_AsDouble") + return self.builder.call(fn, [fobj]) + + def bool_from_bool(self, bval): + """ + Get a Python bool from a LLVM boolean. + """ + longval = self.builder.zext(bval, self.long) + return self.bool_from_long(longval) + + def bool_from_long(self, ival): + fnty = ir.FunctionType(self.pyobj, [self.long]) + fn = self._get_function(fnty, name="PyBool_FromLong") + return self.builder.call(fn, [ival]) + + def complex_from_doubles(self, realval, imagval): + fnty = ir.FunctionType(self.pyobj, [ir.DoubleType(), ir.DoubleType()]) + fn = self._get_function(fnty, name="PyComplex_FromDoubles") + return self.builder.call(fn, [realval, imagval]) + + def complex_real_as_double(self, cobj): + fnty = ir.FunctionType(ir.DoubleType(), [self.pyobj]) + fn = self._get_function(fnty, name="PyComplex_RealAsDouble") + return self.builder.call(fn, [cobj]) + + def complex_imag_as_double(self, cobj): + fnty = ir.FunctionType(ir.DoubleType(), [self.pyobj]) + fn = self._get_function(fnty, name="PyComplex_ImagAsDouble") + return self.builder.call(fn, [cobj]) + + # + # Concrete slice API + # + def slice_as_ints(self, obj): + """ + Read the members of a slice of integers. + + Returns a (ok, start, stop, step) tuple where ok is a boolean and + the following members are pointer-sized ints. + """ + pstart = cgutils.alloca_once(self.builder, self.py_ssize_t) + pstop = cgutils.alloca_once(self.builder, self.py_ssize_t) + pstep = cgutils.alloca_once(self.builder, self.py_ssize_t) + fnty = ir.FunctionType(ir.IntType(32), + [self.pyobj] + [self.py_ssize_t.as_pointer()] * 3) + fn = self._get_function(fnty, name="numba_unpack_slice") + res = self.builder.call(fn, (obj, pstart, pstop, pstep)) + start = self.builder.load(pstart) + stop = self.builder.load(pstop) + step = self.builder.load(pstep) + return cgutils.is_null(self.builder, res), start, stop, step + + # + # List and sequence APIs + # + + def sequence_getslice(self, obj, start, stop): + fnty = ir.FunctionType(self.pyobj, [self.pyobj, self.py_ssize_t, + self.py_ssize_t]) + fn = self._get_function(fnty, name="PySequence_GetSlice") + return self.builder.call(fn, (obj, start, stop)) + + def sequence_tuple(self, obj): + fnty = ir.FunctionType(self.pyobj, [self.pyobj]) + fn = self._get_function(fnty, name="PySequence_Tuple") + return self.builder.call(fn, [obj]) + + def sequence_concat(self, obj1, obj2): + fnty = ir.FunctionType(self.pyobj, [self.pyobj, self.pyobj]) + fn = self._get_function(fnty, name="PySequence_Concat") + return self.builder.call(fn, [obj1, obj2]) + + def list_new(self, szval): + fnty = ir.FunctionType(self.pyobj, [self.py_ssize_t]) + fn = self._get_function(fnty, name="PyList_New") + return self.builder.call(fn, [szval]) + + def list_size(self, lst): + fnty = ir.FunctionType(self.py_ssize_t, [self.pyobj]) + fn = self._get_function(fnty, name="PyList_Size") + return self.builder.call(fn, [lst]) + + def list_append(self, lst, val): + fnty = ir.FunctionType(ir.IntType(32), [self.pyobj, self.pyobj]) + fn = self._get_function(fnty, name="PyList_Append") + return self.builder.call(fn, [lst, val]) + + def list_setitem(self, lst, idx, val): + """ + Warning: Steals reference to ``val`` + """ + fnty = ir.FunctionType(ir.IntType(32), [self.pyobj, self.py_ssize_t, + self.pyobj]) + fn = self._get_function(fnty, name="PyList_SetItem") + return self.builder.call(fn, [lst, idx, val]) + + def list_getitem(self, lst, idx): + """ + Returns a borrowed reference. + """ + fnty = ir.FunctionType(self.pyobj, [self.pyobj, self.py_ssize_t]) + fn = self._get_function(fnty, name="PyList_GetItem") + if isinstance(idx, int): + idx = self.context.get_constant(types.intp, idx) + return self.builder.call(fn, [lst, idx]) + + def list_setslice(self, lst, start, stop, obj): + if obj is None: + obj = self.get_null_object() + fnty = ir.FunctionType(ir.IntType(32), [self.pyobj, self.py_ssize_t, + self.py_ssize_t, self.pyobj]) + fn = self._get_function(fnty, name="PyList_SetSlice") + return self.builder.call(fn, (lst, start, stop, obj)) + + + # + # Concrete tuple API + # + + def tuple_getitem(self, tup, idx): + """ + Borrow reference + """ + fnty = ir.FunctionType(self.pyobj, [self.pyobj, self.py_ssize_t]) + fn = self._get_function(fnty, name="PyTuple_GetItem") + idx = self.context.get_constant(types.intp, idx) + return self.builder.call(fn, [tup, idx]) + + def tuple_pack(self, items): + fnty = ir.FunctionType(self.pyobj, [self.py_ssize_t], var_arg=True) + fn = self._get_function(fnty, name="PyTuple_Pack") + n = self.context.get_constant(types.intp, len(items)) + args = [n] + args.extend(items) + return self.builder.call(fn, args) + + def tuple_size(self, tup): + fnty = ir.FunctionType(self.py_ssize_t, [self.pyobj]) + fn = self._get_function(fnty, name="PyTuple_Size") + return self.builder.call(fn, [tup]) + + def tuple_new(self, count): + fnty = ir.FunctionType(self.pyobj, [ir.IntType(32)]) + fn = self._get_function(fnty, name='PyTuple_New') + return self.builder.call(fn, [self.context.get_constant(types.int32, + count)]) + + def tuple_setitem(self, tuple_val, index, item): + """ + Steals a reference to `item`. + """ + fnty = ir.FunctionType(ir.IntType(32), [self.pyobj, ir.IntType(32), self.pyobj]) + setitem_fn = self._get_function(fnty, name='PyTuple_SetItem') + index = self.context.get_constant(types.int32, index) + self.builder.call(setitem_fn, [tuple_val, index, item]) + + # + # Concrete set API + # + + def set_new(self, iterable=None): + if iterable is None: + iterable = self.get_null_object() + fnty = ir.FunctionType(self.pyobj, [self.pyobj]) + fn = self._get_function(fnty, name="PySet_New") + return self.builder.call(fn, [iterable]) + + def set_add(self, set, value): + fnty = ir.FunctionType(ir.IntType(32), [self.pyobj, self.pyobj]) + fn = self._get_function(fnty, name="PySet_Add") + return self.builder.call(fn, [set, value]) + + def set_clear(self, set): + fnty = ir.FunctionType(ir.IntType(32), [self.pyobj]) + fn = self._get_function(fnty, name="PySet_Clear") + return self.builder.call(fn, [set]) + + def set_size(self, set): + fnty = ir.FunctionType(self.py_ssize_t, [self.pyobj]) + fn = self._get_function(fnty, name="PySet_Size") + return self.builder.call(fn, [set]) + + def set_update(self, set, iterable): + fnty = ir.FunctionType(ir.IntType(32), [self.pyobj, self.pyobj]) + fn = self._get_function(fnty, name="_PySet_Update") + return self.builder.call(fn, [set, iterable]) + + def set_next_entry(self, set, posptr, keyptr, hashptr): + fnty = ir.FunctionType(ir.IntType(32), + [self.pyobj, self.py_ssize_t.as_pointer(), + self.pyobj.as_pointer(), self.py_hash_t.as_pointer()]) + fn = self._get_function(fnty, name="_PySet_NextEntry") + return self.builder.call(fn, (set, posptr, keyptr, hashptr)) + + @contextlib.contextmanager + def set_iterate(self, set): + builder = self.builder + + hashptr = cgutils.alloca_once(builder, self.py_hash_t, name="hashptr") + keyptr = cgutils.alloca_once(builder, self.pyobj, name="keyptr") + posptr = cgutils.alloca_once_value(builder, + Constant(self.py_ssize_t, 0), + name="posptr") + + bb_body = builder.append_basic_block("bb_body") + bb_end = builder.append_basic_block("bb_end") + + builder.branch(bb_body) + def do_break(): + builder.branch(bb_end) + + with builder.goto_block(bb_body): + r = self.set_next_entry(set, posptr, keyptr, hashptr) + finished = cgutils.is_null(builder, r) + with builder.if_then(finished, likely=False): + builder.branch(bb_end) + yield _IteratorLoop(builder.load(keyptr), do_break) + builder.branch(bb_body) + + builder.position_at_end(bb_end) + + # + # GIL APIs + # + + def gil_ensure(self): + """ + Ensure the GIL is acquired. + The returned value must be consumed by gil_release(). + """ + gilptrty = ir.PointerType(self.gil_state) + fnty = ir.FunctionType(ir.VoidType(), [gilptrty]) + fn = self._get_function(fnty, "numba_gil_ensure") + gilptr = cgutils.alloca_once(self.builder, self.gil_state) + self.builder.call(fn, [gilptr]) + return gilptr + + def gil_release(self, gil): + """ + Release the acquired GIL by gil_ensure(). + Must be paired with a gil_ensure(). + """ + gilptrty = ir.PointerType(self.gil_state) + fnty = ir.FunctionType(ir.VoidType(), [gilptrty]) + fn = self._get_function(fnty, "numba_gil_release") + return self.builder.call(fn, [gil]) + + def save_thread(self): + """ + Release the GIL and return the former thread state + (an opaque non-NULL pointer). + """ + fnty = ir.FunctionType(self.voidptr, []) + fn = self._get_function(fnty, name="PyEval_SaveThread") + return self.builder.call(fn, []) + + def restore_thread(self, thread_state): + """ + Restore the given thread state by reacquiring the GIL. + """ + fnty = ir.FunctionType(ir.VoidType(), [self.voidptr]) + fn = self._get_function(fnty, name="PyEval_RestoreThread") + self.builder.call(fn, [thread_state]) + + # + # Generic object private data (a way of associating an arbitrary void * + # pointer to an arbitrary Python object). + # + + def object_get_private_data(self, obj): + fnty = ir.FunctionType(self.voidptr, [self.pyobj]) + fn = self._get_function(fnty, name="numba_get_pyobject_private_data") + return self.builder.call(fn, (obj,)) + + def object_set_private_data(self, obj, ptr): + fnty = ir.FunctionType(ir.VoidType(), [self.pyobj, self.voidptr]) + fn = self._get_function(fnty, name="numba_set_pyobject_private_data") + return self.builder.call(fn, (obj, ptr)) + + def object_reset_private_data(self, obj): + fnty = ir.FunctionType(ir.VoidType(), [self.pyobj]) + fn = self._get_function(fnty, name="numba_reset_pyobject_private_data") + return self.builder.call(fn, (obj,)) + + + # + # Other APIs (organize them better!) + # + + def import_module_noblock(self, modname): + fnty = ir.FunctionType(self.pyobj, [self.cstring]) + fn = self._get_function(fnty, name="PyImport_ImportModuleNoBlock") + return self.builder.call(fn, [modname]) + + def call_function_objargs(self, callee, objargs): + fnty = ir.FunctionType(self.pyobj, [self.pyobj], var_arg=True) + fn = self._get_function(fnty, name="PyObject_CallFunctionObjArgs") + args = [callee] + list(objargs) + args.append(self.context.get_constant_null(types.pyobject)) + return self.builder.call(fn, args) + + def call_method(self, callee, method, objargs=()): + cname = self.context.insert_const_string(self.module, method) + fnty = ir.FunctionType(self.pyobj, [self.pyobj, self.cstring, self.cstring], + var_arg=True) + fn = self._get_function(fnty, name="PyObject_CallMethod") + fmt = 'O' * len(objargs) + cfmt = self.context.insert_const_string(self.module, fmt) + args = [callee, cname, cfmt] + if objargs: + args.extend(objargs) + args.append(self.context.get_constant_null(types.pyobject)) + return self.builder.call(fn, args) + + def call(self, callee, args=None, kws=None): + if args is None: + args = self.get_null_object() + if kws is None: + kws = self.get_null_object() + fnty = ir.FunctionType(self.pyobj, [self.pyobj] * 3) + fn = self._get_function(fnty, name="PyObject_Call") + return self.builder.call(fn, (callee, args, kws)) + + def object_type(self, obj): + """Emit a call to ``PyObject_Type(obj)`` to get the type of ``obj``. + """ + fnty = ir.FunctionType(self.pyobj, [self.pyobj]) + fn = self._get_function(fnty, name="PyObject_Type") + return self.builder.call(fn, (obj,)) + + def object_istrue(self, obj): + fnty = ir.FunctionType(ir.IntType(32), [self.pyobj]) + fn = self._get_function(fnty, name="PyObject_IsTrue") + return self.builder.call(fn, [obj]) + + def object_not(self, obj): + fnty = ir.FunctionType(ir.IntType(32), [self.pyobj]) + fn = self._get_function(fnty, name="PyObject_Not") + return self.builder.call(fn, [obj]) + + def object_richcompare(self, lhs, rhs, opstr): + """ + Refer to Python source Include/object.h for macros definition + of the opid. + """ + ops = ['<', '<=', '==', '!=', '>', '>='] + if opstr in ops: + opid = ops.index(opstr) + fnty = ir.FunctionType(self.pyobj, [self.pyobj, self.pyobj, ir.IntType(32)]) + fn = self._get_function(fnty, name="PyObject_RichCompare") + lopid = self.context.get_constant(types.int32, opid) + return self.builder.call(fn, (lhs, rhs, lopid)) + elif opstr == 'is': + bitflag = self.builder.icmp_unsigned('==', lhs, rhs) + return self.bool_from_bool(bitflag) + elif opstr == 'is not': + bitflag = self.builder.icmp_unsigned('!=', lhs, rhs) + return self.bool_from_bool(bitflag) + elif opstr in ('in', 'not in'): + fnty = ir.FunctionType(ir.IntType(32), [self.pyobj, self.pyobj]) + fn = self._get_function(fnty, name="PySequence_Contains") + status = self.builder.call(fn, (rhs, lhs)) + negone = self.context.get_constant(types.int32, -1) + is_good = self.builder.icmp_unsigned('!=', status, negone) + # Stack allocate output and initialize to Null + outptr = cgutils.alloca_once_value(self.builder, + Constant(self.pyobj, None)) + # If PySequence_Contains returns non-error value + with cgutils.if_likely(self.builder, is_good): + if opstr == 'not in': + status = self.builder.not_(status) + # Store the status as a boolean object + truncated = self.builder.trunc(status, ir.IntType(1)) + self.builder.store(self.bool_from_bool(truncated), + outptr) + + return self.builder.load(outptr) + else: + raise NotImplementedError("Unknown operator {op!r}".format( + op=opstr)) + + def iter_next(self, iterobj): + fnty = ir.FunctionType(self.pyobj, [self.pyobj]) + fn = self._get_function(fnty, name="PyIter_Next") + return self.builder.call(fn, [iterobj]) + + def object_getiter(self, obj): + fnty = ir.FunctionType(self.pyobj, [self.pyobj]) + fn = self._get_function(fnty, name="PyObject_GetIter") + return self.builder.call(fn, [obj]) + + def object_getattr_string(self, obj, attr): + cstr = self.context.insert_const_string(self.module, attr) + fnty = ir.FunctionType(self.pyobj, [self.pyobj, self.cstring]) + fn = self._get_function(fnty, name="PyObject_GetAttrString") + return self.builder.call(fn, [obj, cstr]) + + def object_getattr(self, obj, attr): + fnty = ir.FunctionType(self.pyobj, [self.pyobj, self.pyobj]) + fn = self._get_function(fnty, name="PyObject_GetAttr") + return self.builder.call(fn, [obj, attr]) + + def object_setattr_string(self, obj, attr, val): + cstr = self.context.insert_const_string(self.module, attr) + fnty = ir.FunctionType(ir.IntType(32), [self.pyobj, self.cstring, self.pyobj]) + fn = self._get_function(fnty, name="PyObject_SetAttrString") + return self.builder.call(fn, [obj, cstr, val]) + + def object_setattr(self, obj, attr, val): + fnty = ir.FunctionType(ir.IntType(32), [self.pyobj, self.pyobj, self.pyobj]) + fn = self._get_function(fnty, name="PyObject_SetAttr") + return self.builder.call(fn, [obj, attr, val]) + + def object_delattr_string(self, obj, attr): + # PyObject_DelAttrString() is actually a C macro calling + # PyObject_SetAttrString() with value == NULL. + return self.object_setattr_string(obj, attr, self.get_null_object()) + + def object_delattr(self, obj, attr): + # PyObject_DelAttr() is actually a C macro calling + # PyObject_SetAttr() with value == NULL. + return self.object_setattr(obj, attr, self.get_null_object()) + + def object_getitem(self, obj, key): + """ + Return obj[key] + """ + fnty = ir.FunctionType(self.pyobj, [self.pyobj, self.pyobj]) + fn = self._get_function(fnty, name="PyObject_GetItem") + return self.builder.call(fn, (obj, key)) + + def object_setitem(self, obj, key, val): + """ + obj[key] = val + """ + fnty = ir.FunctionType(ir.IntType(32), [self.pyobj, self.pyobj, self.pyobj]) + fn = self._get_function(fnty, name="PyObject_SetItem") + return self.builder.call(fn, (obj, key, val)) + + def object_delitem(self, obj, key): + """ + del obj[key] + """ + fnty = ir.FunctionType(ir.IntType(32), [self.pyobj, self.pyobj]) + fn = self._get_function(fnty, name="PyObject_DelItem") + return self.builder.call(fn, (obj, key)) + + def string_as_string(self, strobj): + fnty = ir.FunctionType(self.cstring, [self.pyobj]) + fname = "PyUnicode_AsUTF8" + fn = self._get_function(fnty, name=fname) + return self.builder.call(fn, [strobj]) + + def string_as_string_and_size(self, strobj): + """ + Returns a tuple of ``(ok, buffer, length)``. + The ``ok`` is i1 value that is set if ok. + The ``buffer`` is a i8* of the output buffer. + The ``length`` is a i32/i64 (py_ssize_t) of the length of the buffer. + """ + + p_length = cgutils.alloca_once(self.builder, self.py_ssize_t) + fnty = ir.FunctionType(self.cstring, [self.pyobj, + self.py_ssize_t.as_pointer()]) + fname = "PyUnicode_AsUTF8AndSize" + fn = self._get_function(fnty, name=fname) + + buffer = self.builder.call(fn, [strobj, p_length]) + ok = self.builder.icmp_unsigned('!=', + Constant(buffer.type, None), + buffer) + return (ok, buffer, self.builder.load(p_length)) + + def string_as_string_size_and_kind(self, strobj): + """ + Returns a tuple of ``(ok, buffer, length, kind)``. + The ``ok`` is i1 value that is set if ok. + The ``buffer`` is a i8* of the output buffer. + The ``length`` is a i32/i64 (py_ssize_t) of the length of the buffer. + The ``kind`` is a i32 (int32) of the Unicode kind constant + The ``hash`` is a long/uint64_t (py_hash_t) of the Unicode constant hash + """ + p_length = cgutils.alloca_once(self.builder, self.py_ssize_t) + p_kind = cgutils.alloca_once(self.builder, ir.IntType(32)) + p_ascii = cgutils.alloca_once(self.builder, ir.IntType(32)) + p_hash = cgutils.alloca_once(self.builder, self.py_hash_t) + fnty = ir.FunctionType(self.cstring, [self.pyobj, + self.py_ssize_t.as_pointer(), + ir.IntType(32).as_pointer(), + ir.IntType(32).as_pointer(), + self.py_hash_t.as_pointer()]) + fname = "numba_extract_unicode" + fn = self._get_function(fnty, name=fname) + + buffer = self.builder.call( + fn, [strobj, p_length, p_kind, p_ascii, p_hash]) + ok = self.builder.icmp_unsigned('!=', + Constant(buffer.type, None), + buffer) + return (ok, buffer, self.builder.load(p_length), + self.builder.load(p_kind), self.builder.load(p_ascii), + self.builder.load(p_hash)) + + def string_from_string_and_size(self, string, size): + fnty = ir.FunctionType(self.pyobj, [self.cstring, self.py_ssize_t]) + fname = "PyString_FromStringAndSize" + fn = self._get_function(fnty, name=fname) + return self.builder.call(fn, [string, size]) + + def string_from_string(self, string): + fnty = ir.FunctionType(self.pyobj, [self.cstring]) + fname = "PyUnicode_FromString" + fn = self._get_function(fnty, name=fname) + return self.builder.call(fn, [string]) + + def string_from_kind_and_data(self, kind, string, size): + fnty = ir.FunctionType(self.pyobj, [ir.IntType(32), self.cstring, self.py_ssize_t]) + fname = "PyUnicode_FromKindAndData" + fn = self._get_function(fnty, name=fname) + return self.builder.call(fn, [kind, string, size]) + + def bytes_from_string_and_size(self, string, size): + fnty = ir.FunctionType(self.pyobj, [self.cstring, self.py_ssize_t]) + fname = "PyBytes_FromStringAndSize" + fn = self._get_function(fnty, name=fname) + return self.builder.call(fn, [string, size]) + + def object_hash(self, obj): + fnty = ir.FunctionType(self.py_hash_t, [self.pyobj, ]) + fname = "PyObject_Hash" + fn = self._get_function(fnty, name=fname) + return self.builder.call(fn, [obj,]) + + def object_str(self, obj): + fnty = ir.FunctionType(self.pyobj, [self.pyobj]) + fn = self._get_function(fnty, name="PyObject_Str") + return self.builder.call(fn, [obj]) + + def make_none(self): + obj = self.borrow_none() + self.incref(obj) + return obj + + def borrow_none(self): + return self.get_c_object("_Py_NoneStruct") + + def sys_write_stdout(self, fmt, *args): + fnty = ir.FunctionType(ir.VoidType(), [self.cstring], var_arg=True) + fn = self._get_function(fnty, name="PySys_FormatStdout") + return self.builder.call(fn, (fmt,) + args) + + def object_dump(self, obj): + """ + Dump a Python object on C stderr. For debugging purposes. + """ + fnty = ir.FunctionType(ir.VoidType(), [self.pyobj]) + fn = self._get_function(fnty, name="_PyObject_Dump") + return self.builder.call(fn, (obj,)) + + # + # NRT (Numba runtime) APIs + # + + def nrt_adapt_ndarray_to_python(self, aryty, ary, dtypeptr): + assert self.context.enable_nrt, "NRT required" + + intty = ir.IntType(32) + # Embed the Python type of the array (maybe subclass) in the LLVM IR. + serial_aryty_pytype = self.unserialize(self.serialize_object(aryty.box_type)) + + fnty = ir.FunctionType(self.pyobj, + [self.voidptr, self.pyobj, intty, intty, self.pyobj]) + fn = self._get_function(fnty, name="NRT_adapt_ndarray_to_python_acqref") + fn.args[0].add_attribute('nocapture') + + ndim = self.context.get_constant(types.int32, aryty.ndim) + writable = self.context.get_constant(types.int32, int(aryty.mutable)) + + aryptr = cgutils.alloca_once_value(self.builder, ary) + return self.builder.call(fn, [self.builder.bitcast(aryptr, + self.voidptr), + serial_aryty_pytype, + ndim, writable, dtypeptr]) + + def nrt_meminfo_new_from_pyobject(self, data, pyobj): + """ + Allocate a new MemInfo with data payload borrowed from a python + object. + """ + mod = self.builder.module + fnty = ir.FunctionType( + cgutils.voidptr_t, + [cgutils.voidptr_t, cgutils.voidptr_t], + ) + fn = cgutils.get_or_insert_function( + mod, + fnty, + "NRT_meminfo_new_from_pyobject", + ) + fn.args[0].add_attribute('nocapture') + fn.args[1].add_attribute('nocapture') + fn.return_value.add_attribute("noalias") + return self.builder.call(fn, [data, pyobj]) + + def nrt_meminfo_as_pyobject(self, miptr): + mod = self.builder.module + fnty = ir.FunctionType( + self.pyobj, + [cgutils.voidptr_t] + ) + fn = cgutils.get_or_insert_function( + mod, + fnty, + 'NRT_meminfo_as_pyobject', + ) + fn.return_value.add_attribute("noalias") + return self.builder.call(fn, [miptr]) + + def nrt_meminfo_from_pyobject(self, miobj): + mod = self.builder.module + fnty = ir.FunctionType( + cgutils.voidptr_t, + [self.pyobj] + ) + fn = cgutils.get_or_insert_function( + mod, + fnty, + 'NRT_meminfo_from_pyobject', + ) + fn.return_value.add_attribute("noalias") + return self.builder.call(fn, [miobj]) + + def nrt_adapt_ndarray_from_python(self, ary, ptr): + assert self.context.enable_nrt + fnty = ir.FunctionType(ir.IntType(32), [self.pyobj, self.voidptr]) + fn = self._get_function(fnty, name="NRT_adapt_ndarray_from_python") + fn.args[0].add_attribute('nocapture') + fn.args[1].add_attribute('nocapture') + return self.builder.call(fn, (ary, ptr)) + + def nrt_adapt_buffer_from_python(self, buf, ptr): + assert self.context.enable_nrt + fnty = ir.FunctionType(ir.VoidType(), [ir.PointerType(self.py_buffer_t), + self.voidptr]) + fn = self._get_function(fnty, name="NRT_adapt_buffer_from_python") + fn.args[0].add_attribute('nocapture') + fn.args[1].add_attribute('nocapture') + return self.builder.call(fn, (buf, ptr)) + + # ------ utils ----- + + def _get_function(self, fnty, name): + return cgutils.get_or_insert_function(self.module, fnty, name) + + def alloca_obj(self): + return self.builder.alloca(self.pyobj) + + def alloca_buffer(self): + """ + Return a pointer to a stack-allocated, zero-initialized Py_buffer. + """ + # Treat the buffer as an opaque array of bytes + ptr = cgutils.alloca_once_value(self.builder, + Constant(self.py_buffer_t, None)) + return ptr + + @contextlib.contextmanager + def if_object_ok(self, obj): + with cgutils.if_likely(self.builder, + cgutils.is_not_null(self.builder, obj)): + yield + + def print_object(self, obj): + strobj = self.object_str(obj) + cstr = self.string_as_string(strobj) + fmt = self.context.insert_const_string(self.module, "%s") + self.sys_write_stdout(fmt, cstr) + self.decref(strobj) + + def print_string(self, text): + fmt = self.context.insert_const_string(self.module, text) + self.sys_write_stdout(fmt) + + def get_null_object(self): + return Constant(self.pyobj, None) + + def return_none(self): + none = self.make_none() + self.builder.ret(none) + + def list_pack(self, items): + n = len(items) + seq = self.list_new(self.context.get_constant(types.intp, n)) + with self.if_object_ok(seq): + for i in range(n): + idx = self.context.get_constant(types.intp, i) + self.incref(items[i]) + self.list_setitem(seq, idx, items[i]) + return seq + + def unserialize(self, structptr): + """ + Unserialize some data. *structptr* should be a pointer to + a {i8* data, i32 length} structure. + """ + fnty = ir.FunctionType(self.pyobj, + (self.voidptr, ir.IntType(32), self.voidptr)) + fn = self._get_function(fnty, name="numba_unpickle") + ptr = self.builder.extract_value(self.builder.load(structptr), 0) + n = self.builder.extract_value(self.builder.load(structptr), 1) + hashed = self.builder.extract_value(self.builder.load(structptr), 2) + return self.builder.call(fn, (ptr, n, hashed)) + + def serialize_uncached(self, obj): + """ + Same as serialize_object(), but don't create a global variable, + simply return a literal {i8* data, i32 length, i8* hashbuf} structure. + """ + # First make the array constant + data = serialize.dumps(obj) + assert len(data) < 2**31 + name = ".const.pickledata.%s" % (id(obj) if config.DIFF_IR == 0 else "DIFF_IR") + bdata = cgutils.make_bytearray(data) + # Make SHA1 hash on the pickled content + # NOTE: update buffer size in numba_unpickle() when changing the + # hash algorithm. + hashed = cgutils.make_bytearray(hashlib.sha1(data).digest()) + arr = self.context.insert_unique_const(self.module, name, bdata) + hasharr = self.context.insert_unique_const( + self.module, f"{name}.sha1", hashed, + ) + # Then populate the structure constant + struct = Constant.literal_struct([ + arr.bitcast(self.voidptr), + Constant(ir.IntType(32), arr.type.pointee.count), + hasharr.bitcast(self.voidptr), + ]) + return struct + + def serialize_object(self, obj): + """ + Serialize the given object in the bitcode, and return it + as a pointer to a {i8* data, i32 length}, structure constant + (suitable for passing to unserialize()). + """ + try: + gv = self.module.__serialized[obj] + except KeyError: + struct = self.serialize_uncached(obj) + name = ".const.picklebuf.%s" % (id(obj) if config.DIFF_IR == 0 else "DIFF_IR") + gv = self.context.insert_unique_const(self.module, name, struct) + # Make the id() (and hence the name) unique while populating the module. + self.module.__serialized[obj] = gv + return gv + + def c_api_error(self): + return cgutils.is_not_null(self.builder, self.err_occurred()) + + def to_native_value(self, typ, obj): + """ + Unbox the Python object as the given Numba type. + A NativeValue instance is returned. + """ + from numba.core.boxing import unbox_unsupported + + impl = _unboxers.lookup(typ.__class__, unbox_unsupported) + c = _UnboxContext(self.context, self.builder, self) + return impl(typ, obj, c) + + def from_native_return(self, typ, val, env_manager): + assert not isinstance(typ, types.Optional), "callconv should have " \ + "prevented the return of " \ + "optional value" + out = self.from_native_value(typ, val, env_manager) + return out + + def from_native_value(self, typ, val, env_manager=None): + """ + Box the native value of the given Numba type. A Python object + pointer is returned (NULL if an error occurred). + This method steals any native (NRT) reference embedded in *val*. + """ + from numba.core.boxing import box_unsupported + + impl = _boxers.lookup(typ.__class__, box_unsupported) + + c = _BoxContext(self.context, self.builder, self, env_manager) + return impl(typ, val, c) + + def reflect_native_value(self, typ, val, env_manager=None): + """ + Reflect the native value onto its Python original, if any. + An error bit (as an LLVM value) is returned. + """ + impl = _reflectors.lookup(typ.__class__) + if impl is None: + # Reflection isn't needed for most types + return cgutils.false_bit + + is_error = cgutils.alloca_once_value(self.builder, cgutils.false_bit) + c = _ReflectContext(self.context, self.builder, self, env_manager, + is_error) + impl(typ, val, c) + return self.builder.load(c.is_error) + + def to_native_generator(self, obj, typ): + """ + Extract the generator structure pointer from a generator *obj* + (a _dynfunc.Generator instance). + """ + gen_ptr_ty = ir.PointerType(self.context.get_data_type(typ)) + value = self.context.get_generator_state(self.builder, obj, gen_ptr_ty) + return NativeValue(value) + + def from_native_generator(self, val, typ, env=None): + """ + Make a Numba generator (a _dynfunc.Generator instance) from a + generator structure pointer *val*. + *env* is an optional _dynfunc.Environment instance to be wrapped + in the generator. + """ + llty = self.context.get_data_type(typ) + assert not llty.is_pointer + gen_struct_size = self.context.get_abi_sizeof(llty) + + gendesc = self.context.get_generator_desc(typ) + + # This is the PyCFunctionWithKeywords generated by PyCallWrapper + genfnty = ir.FunctionType(self.pyobj, [self.pyobj, self.pyobj, self.pyobj]) + genfn = self._get_function(genfnty, name=gendesc.llvm_cpython_wrapper_name) + + # This is the raw finalizer generated by _lower_generator_finalize_func() + finalizerty = ir.FunctionType(ir.VoidType(), [self.voidptr]) + if typ.has_finalizer: + finalizer = self._get_function(finalizerty, name=gendesc.llvm_finalizer_name) + else: + finalizer = Constant(ir.PointerType(finalizerty), None) + + # PyObject *numba_make_generator(state_size, initial_state, nextfunc, finalizer, env) + fnty = ir.FunctionType(self.pyobj, [self.py_ssize_t, + self.voidptr, + ir.PointerType(genfnty), + ir.PointerType(finalizerty), + self.voidptr]) + fn = self._get_function(fnty, name="numba_make_generator") + + state_size = Constant(self.py_ssize_t, gen_struct_size) + initial_state = self.builder.bitcast(val, self.voidptr) + if env is None: + env = self.get_null_object() + env = self.builder.bitcast(env, self.voidptr) + + return self.builder.call(fn, + (state_size, initial_state, genfn, finalizer, env)) + + def numba_array_adaptor(self, ary, ptr): + assert not self.context.enable_nrt + fnty = ir.FunctionType(ir.IntType(32), [self.pyobj, self.voidptr]) + fn = self._get_function(fnty, name="numba_adapt_ndarray") + fn.args[0].add_attribute('nocapture') + fn.args[1].add_attribute('nocapture') + return self.builder.call(fn, (ary, ptr)) + + def numba_buffer_adaptor(self, buf, ptr): + fnty = ir.FunctionType(ir.VoidType(), + [ir.PointerType(self.py_buffer_t), self.voidptr]) + fn = self._get_function(fnty, name="numba_adapt_buffer") + fn.args[0].add_attribute('nocapture') + fn.args[1].add_attribute('nocapture') + return self.builder.call(fn, (buf, ptr)) + + def complex_adaptor(self, cobj, cmplx): + fnty = ir.FunctionType(ir.IntType(32), [self.pyobj, cmplx.type]) + fn = self._get_function(fnty, name="numba_complex_adaptor") + return self.builder.call(fn, [cobj, cmplx]) + + def extract_record_data(self, obj, pbuf): + fnty = ir.FunctionType(self.voidptr, + [self.pyobj, ir.PointerType(self.py_buffer_t)]) + fn = self._get_function(fnty, name="numba_extract_record_data") + return self.builder.call(fn, [obj, pbuf]) + + def get_buffer(self, obj, pbuf): + fnty = ir.FunctionType(ir.IntType(32), + [self.pyobj, ir.PointerType(self.py_buffer_t)]) + fn = self._get_function(fnty, name="numba_get_buffer") + return self.builder.call(fn, [obj, pbuf]) + + def release_buffer(self, pbuf): + fnty = ir.FunctionType(ir.VoidType(), [ir.PointerType(self.py_buffer_t)]) + fn = self._get_function(fnty, name="numba_release_buffer") + return self.builder.call(fn, [pbuf]) + + def extract_np_datetime(self, obj): + fnty = ir.FunctionType(ir.IntType(64), [self.pyobj]) + fn = self._get_function(fnty, name="numba_extract_np_datetime") + return self.builder.call(fn, [obj]) + + def extract_np_timedelta(self, obj): + fnty = ir.FunctionType(ir.IntType(64), [self.pyobj]) + fn = self._get_function(fnty, name="numba_extract_np_timedelta") + return self.builder.call(fn, [obj]) + + def create_np_datetime(self, val, unit_code): + unit_code = Constant(ir.IntType(32), int(unit_code)) + fnty = ir.FunctionType(self.pyobj, [ir.IntType(64), ir.IntType(32)]) + fn = self._get_function(fnty, name="numba_create_np_datetime") + return self.builder.call(fn, [val, unit_code]) + + def create_np_timedelta(self, val, unit_code): + unit_code = Constant(ir.IntType(32), int(unit_code)) + fnty = ir.FunctionType(self.pyobj, [ir.IntType(64), ir.IntType(32)]) + fn = self._get_function(fnty, name="numba_create_np_timedelta") + return self.builder.call(fn, [val, unit_code]) + + def recreate_record(self, pdata, size, dtype, env_manager): + fnty = ir.FunctionType(self.pyobj, [ir.PointerType(ir.IntType(8)), + ir.IntType(32), self.pyobj]) + fn = self._get_function(fnty, name="numba_recreate_record") + dtypeaddr = env_manager.read_const(env_manager.add_const(dtype)) + return self.builder.call(fn, [pdata, size, dtypeaddr]) + + def string_from_constant_string(self, string): + cstr = self.context.insert_const_string(self.module, string) + sz = self.context.get_constant(types.intp, len(string)) + return self.string_from_string_and_size(cstr, sz) + + def call_jit_code(self, func, sig, args): + """Calls into Numba jitted code and propagate error using the Python + calling convention. + + Parameters + ---------- + func : function + The Python function to be compiled. This function is compiled + in nopython-mode. + sig : numba.typing.Signature + The function signature for *func*. + args : Sequence[llvmlite.binding.Value] + LLVM values to use as arguments. + + Returns + ------- + (is_error, res) : 2-tuple of llvmlite.binding.Value. + is_error : true iff *func* raised an exception. + res : Returned value from *func* iff *is_error* is false. + + If *is_error* is true, this method will adapt the nopython exception + into a Python exception. Caller should return NULL to Python to + indicate an error. + """ + # Compile *func* + builder = self.builder + cres = self.context.compile_subroutine(builder, func, sig) + got_retty = cres.signature.return_type + retty = sig.return_type + if got_retty != retty: + # This error indicates an error in *func* or the caller of this + # method. + raise errors.LoweringError( + f'mismatching signature {got_retty} != {retty}.\n' + ) + # Call into *func* + status, res = self.context.call_internal_no_propagate( + builder, cres.fndesc, sig, args, + ) + # Post-call handling for *func* + is_error_ptr = cgutils.alloca_once(builder, cgutils.bool_t, zfill=True) + res_type = self.context.get_value_type(sig.return_type) + res_ptr = cgutils.alloca_once(builder, res_type, zfill=True) + + # Handle error and adapt the nopython exception into cpython exception + with builder.if_else(status.is_error) as (has_err, no_err): + with has_err: + builder.store(status.is_error, is_error_ptr) + # Set error state in the Python interpreter + self.context.call_conv.raise_error(builder, self, status) + with no_err: + # Handle returned value + res = imputils.fix_returning_optional( + self.context, builder, sig, status, res, + ) + builder.store(res, res_ptr) + + is_error = builder.load(is_error_ptr) + res = builder.load(res_ptr) + return is_error, res + + +class ObjModeUtils: + """Internal utils for calling objmode dispatcher from within NPM code. + """ + def __init__(self, pyapi): + self.pyapi = pyapi + + def load_dispatcher(self, fnty, argtypes): + builder = self.pyapi.builder + tyctx = self.pyapi.context + m = builder.module + + # Add a global variable to cache the objmode dispatcher + gv = ir.GlobalVariable( + m, self.pyapi.pyobj, + name=m.get_unique_name("cached_objmode_dispatcher"), + ) + gv.initializer = gv.type.pointee(None) + gv.linkage = 'internal' + + # Make a basic-block to common exit + bb_end = builder.append_basic_block("bb_end") + + if serialize.is_serialiable(fnty.dispatcher): + serialized_dispatcher = self.pyapi.serialize_object( + (fnty.dispatcher, tuple(argtypes)), + ) + compile_args = self.pyapi.unserialize(serialized_dispatcher) + # unserialize (unpickling) can fail + failed_unser = cgutils.is_null(builder, compile_args) + with builder.if_then(failed_unser): + # early exit. `gv` is still null. + builder.branch(bb_end) + + cached = builder.load(gv) + with builder.if_then(cgutils.is_null(builder, cached)): + if serialize.is_serialiable(fnty.dispatcher): + cls = type(self) + compiler = self.pyapi.unserialize( + self.pyapi.serialize_object(cls._call_objmode_dispatcher) + ) + callee = self.pyapi.call_function_objargs( + compiler, [compile_args], + ) + # Clean up + self.pyapi.decref(compiler) + self.pyapi.decref(compile_args) + else: + entry_pt = fnty.dispatcher.compile(tuple(argtypes)) + callee = tyctx.add_dynamic_addr( + builder, id(entry_pt), info="with_objectmode", + ) + # Incref the dispatcher and cache it + self.pyapi.incref(callee) + builder.store(callee, gv) + # Jump to the exit block + builder.branch(bb_end) + # Define the exit block + builder.position_at_end(bb_end) + callee = builder.load(gv) + return callee + + @staticmethod + def _call_objmode_dispatcher(compile_args): + dispatcher, argtypes = compile_args + entrypt = dispatcher.compile(argtypes) + return entrypt diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/registry.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/registry.py new file mode 100644 index 000000000..eef492c40 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/registry.py @@ -0,0 +1,112 @@ +import contextlib + +from numba.core.descriptors import TargetDescriptor +from numba.core import utils, typing, dispatcher, cpu + +# ----------------------------------------------------------------------------- +# Default CPU target descriptors + +class _NestedContext(object): + _typing_context = None + _target_context = None + + @contextlib.contextmanager + def nested(self, typing_context, target_context): + old_nested = self._typing_context, self._target_context + try: + self._typing_context = typing_context + self._target_context = target_context + yield + finally: + self._typing_context, self._target_context = old_nested + + +class CPUTarget(TargetDescriptor): + options = cpu.CPUTargetOptions + _nested = _NestedContext() + + @utils.cached_property + def _toplevel_target_context(self): + # Lazily-initialized top-level target context, for all threads + return cpu.CPUContext(self.typing_context, self._target_name) + + @utils.cached_property + def _toplevel_typing_context(self): + # Lazily-initialized top-level typing context, for all threads + return typing.Context() + + @property + def target_context(self): + """ + The target context for CPU targets. + """ + nested = self._nested._target_context + if nested is not None: + return nested + else: + return self._toplevel_target_context + + @property + def typing_context(self): + """ + The typing context for CPU targets. + """ + nested = self._nested._typing_context + if nested is not None: + return nested + else: + return self._toplevel_typing_context + + def nested_context(self, typing_context, target_context): + """ + A context manager temporarily replacing the contexts with the + given ones, for the current thread of execution. + """ + return self._nested.nested(typing_context, target_context) + + +# The global CPU target +cpu_target = CPUTarget('cpu') + + +class CPUDispatcher(dispatcher.Dispatcher): + targetdescr = cpu_target + + +class DelayedRegistry(utils.UniqueDict): + """ + A unique dictionary but with deferred initialisation of the values. + + Attributes + ---------- + ondemand: + + A dictionary of key -> value, where value is executed + the first time it is is used. It is used for part of a deferred + initialization strategy. + """ + def __init__(self, *args, **kws): + self.ondemand = utils.UniqueDict() + self.key_type = kws.pop('key_type', None) + self.value_type = kws.pop('value_type', None) + self._type_check = self.key_type or self.value_type + super(DelayedRegistry, self).__init__(*args, **kws) + + def __getitem__(self, item): + if item in self.ondemand: + self[item] = self.ondemand[item]() + del self.ondemand[item] + return super(DelayedRegistry, self).__getitem__(item) + + def __setitem__(self, key, value): + if self._type_check: + def check(x, ty_x): + if isinstance(ty_x, type): + assert ty_x in x.__mro__, (x, ty_x) + else: + assert isinstance(x, ty_x), (x, ty_x) + if self.key_type is not None: + check(key, self.key_type) + if self.value_type is not None: + check(value, self.value_type) + return super(DelayedRegistry, self).__setitem__(key, value) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/removerefctpass.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/removerefctpass.py new file mode 100644 index 000000000..98c04869a --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/removerefctpass.py @@ -0,0 +1,120 @@ +""" +Implement a rewrite pass on a LLVM module to remove unnecessary +refcount operations. +""" + +from llvmlite.ir.transforms import CallVisitor + +from numba.core import types + + +class _MarkNrtCallVisitor(CallVisitor): + """ + A pass to mark all NRT_incref and NRT_decref. + """ + def __init__(self): + self.marked = set() + + def visit_Call(self, instr): + if getattr(instr.callee, 'name', '') in _accepted_nrtfns: + self.marked.add(instr) + + +def _rewrite_function(function): + # Mark NRT usage + markpass = _MarkNrtCallVisitor() + markpass.visit_Function(function) + # Remove NRT usage + for bb in function.basic_blocks: + for inst in list(bb.instructions): + if inst in markpass.marked: + bb.instructions.remove(inst) + + +_accepted_nrtfns = 'NRT_incref', 'NRT_decref' + + +def _legalize(module, dmm, fndesc): + """ + Legalize the code in the module. + Returns True if the module is legal for the rewrite pass that removes + unnecessary refcounts. + """ + + def valid_output(ty): + """ + Valid output are any type that does not need refcount + """ + model = dmm[ty] + return not model.contains_nrt_meminfo() + + def valid_input(ty): + """ + Valid input are any type that does not need refcount except Array. + """ + return valid_output(ty) or isinstance(ty, types.Array) + + + # Ensure no reference to function marked as + # "numba_args_may_always_need_nrt" + try: + nmd = module.get_named_metadata("numba_args_may_always_need_nrt") + except KeyError: + # Nothing marked + pass + else: + # Has functions marked as "numba_args_may_always_need_nrt" + if len(nmd.operands) > 0: + # The pass is illegal for this compilation unit. + return False + + # More legalization base on function type + argtypes = fndesc.argtypes + restype = fndesc.restype + calltypes = fndesc.calltypes + + # Legalize function arguments + for argty in argtypes: + if not valid_input(argty): + return False + + # Legalize function return + if not valid_output(restype): + return False + + # Legalize all called functions + for callty in calltypes.values(): + if callty is not None and not valid_output(callty.return_type): + return False + + # Ensure no allocation + for fn in module.functions: + if fn.name.startswith("NRT_"): + if fn.name not in _accepted_nrtfns: + return False + + return True + + +def remove_unnecessary_nrt_usage(function, context, fndesc): + """ + Remove unnecessary NRT incref/decref in the given LLVM function. + It uses highlevel type info to determine if the function does not need NRT. + Such a function does not: + + - return array object(s); + - take arguments that need refcounting except array; + - call function(s) that return refcounted object. + + In effect, the function will not capture or create references that extend + the lifetime of any refcounted objects beyond the lifetime of the function. + + The rewrite is performed in place. + If rewrite has happened, this function returns True, otherwise, it returns False. + """ + dmm = context.data_model_manager + if _legalize(function.module, dmm, fndesc): + _rewrite_function(function) + return True + else: + return False diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/retarget.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/retarget.py new file mode 100644 index 000000000..8fa98c78b --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/retarget.py @@ -0,0 +1,135 @@ +""" +Implement utils for supporting retargeting of dispatchers. + +WARNING: Features defined in this file are experimental. The API may change + without notice. +""" +import abc +import weakref + +from numba.core import errors + + +class RetargetCache: + """Cache for retargeted dispatchers. + + The cache uses the original dispatcher as the key. + """ + container_type = weakref.WeakKeyDictionary + + def __init__(self): + self._cache = self.container_type() + self._stat_hit = 0 + self._stat_miss = 0 + + def save_cache(self, orig_disp, new_disp): + """Save a dispatcher associated with the given key. + """ + self._cache[orig_disp] = new_disp + + def load_cache(self, orig_disp): + """Load a dispatcher associated with the given key. + """ + out = self._cache.get(orig_disp) + if out is None: + self._stat_miss += 1 + else: + self._stat_hit += 1 + return out + + def items(self): + """Returns the contents of the cache. + """ + return self._cache.items() + + def stats(self): + """Returns stats regarding cache hit/miss. + """ + return {'hit': self._stat_hit, 'miss': self._stat_miss} + + +class BaseRetarget(abc.ABC): + """Abstract base class for retargeting logic. + """ + @abc.abstractmethod + def check_compatible(self, orig_disp): + """Check that the retarget is compatible. + + This method does not return anything meaningful (e.g. None) + Incompatibility is signalled via raising an exception. + """ + pass + + @abc.abstractmethod + def retarget(self, orig_disp): + """Retargets the given dispatcher and returns a new dispatcher-like + callable. Or, returns the original dispatcher if the the target_backend + will not change. + """ + pass + + +class BasicRetarget(BaseRetarget): + """A basic retargeting implementation for a single output target. + + This class has two abstract methods/properties that subclasses must define. + + - `output_target` must return output target name. + - `compile_retarget` must define the logic to retarget the given dispatcher. + + By default, this class uses `RetargetCache` as the internal cache. This + can be modified by overriding the `.cache_type` class attribute. + + """ + cache_type = RetargetCache + + def __init__(self): + self.cache = self.cache_type() + + @abc.abstractproperty + def output_target(self) -> str: + """Returns the output target name. + + See numba/tests/test_retargeting.py for example usage. + """ + pass + + @abc.abstractmethod + def compile_retarget(self, orig_disp): + """Returns the retargeted dispatcher. + + See numba/tests/test_retargeting.py for example usage. + """ + pass + + def check_compatible(self, orig_disp): + """ + This implementation checks that + `self.output_target == orig_disp._required_target_backend` + """ + required_target = orig_disp._required_target_backend + output_target = self.output_target + if required_target is not None: + if output_target != required_target: + m = ("The output target does match the required target: " + f"{output_target} != {required_target}.") + raise errors.CompilerError(m) + + def retarget(self, orig_disp): + """Apply retargeting to orig_disp. + + The retargeted dispatchers are cached for future use. + """ + cache = self.cache + opts = orig_disp.targetoptions + # Skip if the original dispatcher is targeting the same output target + if opts.get('target_backend') == self.output_target: + return orig_disp + cached = cache.load_cache(orig_disp) + # No cache? + if cached is None: + out = self.compile_retarget(orig_disp) + cache.save_cache(orig_disp, out) + else: + out = cached + return out diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/rewrites/__init__.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/rewrites/__init__.py new file mode 100644 index 000000000..61f55a8ec --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/rewrites/__init__.py @@ -0,0 +1,8 @@ +""" +A subpackage hosting Numba IR rewrite passes. +""" + +from .registry import register_rewrite, rewrite_registry, Rewrite +# Register various built-in rewrite passes +from numba.core.rewrites import (static_getitem, static_raise, static_binop, + ir_print) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/rewrites/ir_print.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/rewrites/ir_print.py new file mode 100644 index 000000000..6d678381b --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/rewrites/ir_print.py @@ -0,0 +1,82 @@ +from numba.core import errors, ir +from numba.core.rewrites import register_rewrite, Rewrite + + +@register_rewrite('before-inference') +class RewritePrintCalls(Rewrite): + """ + Rewrite calls to the print() global function to dedicated IR print() nodes. + """ + + def match(self, func_ir, block, typemap, calltypes): + self.prints = prints = {} + self.block = block + # Find all assignments with a right-hand print() call + for inst in block.find_insts(ir.Assign): + if isinstance(inst.value, ir.Expr) and inst.value.op == 'call': + expr = inst.value + try: + callee = func_ir.infer_constant(expr.func) + except errors.ConstantInferenceError: + continue + if callee is print: + if expr.kws: + # Only positional args are supported + msg = ("Numba's print() function implementation does not " + "support keyword arguments.") + raise errors.UnsupportedError(msg, inst.loc) + prints[inst] = expr + return len(prints) > 0 + + def apply(self): + """ + Rewrite `var = call (...)` as a sequence of + `print(...)` and `var = const(None)`. + """ + new_block = self.block.copy() + new_block.clear() + for inst in self.block.body: + if inst in self.prints: + expr = self.prints[inst] + print_node = ir.Print(args=expr.args, vararg=expr.vararg, + loc=expr.loc) + new_block.append(print_node) + assign_node = ir.Assign(value=ir.Const(None, loc=expr.loc), + target=inst.target, + loc=inst.loc) + new_block.append(assign_node) + else: + new_block.append(inst) + return new_block + + +@register_rewrite('before-inference') +class DetectConstPrintArguments(Rewrite): + """ + Detect and store constant arguments to print() nodes. + """ + + def match(self, func_ir, block, typemap, calltypes): + self.consts = consts = {} + self.block = block + for inst in block.find_insts(ir.Print): + if inst.consts: + # Already rewritten + continue + for idx, var in enumerate(inst.args): + try: + const = func_ir.infer_constant(var) + except errors.ConstantInferenceError: + continue + consts.setdefault(inst, {})[idx] = const + + return len(consts) > 0 + + def apply(self): + """ + Store detected constant arguments on their nodes. + """ + for inst in self.block.body: + if inst in self.consts: + inst.consts = self.consts[inst] + return self.block diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/rewrites/registry.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/rewrites/registry.py new file mode 100644 index 000000000..ea22fc8e3 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/rewrites/registry.py @@ -0,0 +1,98 @@ +from collections import defaultdict + +from numba.core import config + + +class Rewrite(object): + '''Defines the abstract base class for Numba rewrites. + ''' + + def __init__(self, state=None): + '''Constructor for the Rewrite class. + ''' + pass + + def match(self, func_ir, block, typemap, calltypes): + '''Overload this method to check an IR block for matching terms in the + rewrite. + ''' + return False + + def apply(self): + '''Overload this method to return a rewritten IR basic block when a + match has been found. + ''' + raise NotImplementedError("Abstract Rewrite.apply() called!") + + +class RewriteRegistry(object): + '''Defines a registry for Numba rewrites. + ''' + _kinds = frozenset(['before-inference', 'after-inference']) + + def __init__(self): + '''Constructor for the rewrite registry. Initializes the rewrites + member to an empty list. + ''' + self.rewrites = defaultdict(list) + + def register(self, kind): + """ + Decorator adding a subclass of Rewrite to the registry for + the given *kind*. + """ + if kind not in self._kinds: + raise KeyError("invalid kind %r" % (kind,)) + def do_register(rewrite_cls): + if not issubclass(rewrite_cls, Rewrite): + raise TypeError('{0} is not a subclass of Rewrite'.format( + rewrite_cls)) + self.rewrites[kind].append(rewrite_cls) + return rewrite_cls + return do_register + + def apply(self, kind, state): + '''Given a pipeline and a dictionary of basic blocks, exhaustively + attempt to apply all registered rewrites to all basic blocks. + ''' + assert kind in self._kinds + blocks = state.func_ir.blocks + old_blocks = blocks.copy() + for rewrite_cls in self.rewrites[kind]: + # Exhaustively apply a rewrite until it stops matching. + rewrite = rewrite_cls(state) + work_list = list(blocks.items()) + while work_list: + key, block = work_list.pop() + matches = rewrite.match(state.func_ir, block, state.typemap, + state.calltypes) + if matches: + if config.DEBUG or config.DUMP_IR: + print("_" * 70) + print("REWRITING (%s):" % rewrite_cls.__name__) + block.dump() + print("_" * 60) + new_block = rewrite.apply() + blocks[key] = new_block + work_list.append((key, new_block)) + if config.DEBUG or config.DUMP_IR: + new_block.dump() + print("_" * 70) + # If any blocks were changed, perform a sanity check. + for key, block in blocks.items(): + if block != old_blocks[key]: + block.verify() + + # Some passes, e.g. _inline_const_arraycall are known to occasionally + # do invalid things WRT ir.Del, others, e.g. RewriteArrayExprs do valid + # things with ir.Del, but the placement is not optimal. The lines below + # fix-up the IR so that ref counts are valid and optimally placed, + # see #4093 for context. This has to be run here opposed to in + # apply() as the CFG needs computing so full IR is needed. + from numba.core import postproc + post_proc = postproc.PostProcessor(state.func_ir) + post_proc.run() + + +rewrite_registry = RewriteRegistry() +register_rewrite = rewrite_registry.register diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/rewrites/static_binop.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/rewrites/static_binop.py new file mode 100644 index 000000000..33487a675 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/rewrites/static_binop.py @@ -0,0 +1,35 @@ +from numba.core import errors, ir +from numba.core.rewrites import register_rewrite, Rewrite + + +@register_rewrite('before-inference') +class DetectStaticBinops(Rewrite): + """ + Detect constant arguments to select binops. + """ + + # Those operators can benefit from a constant-inferred argument + rhs_operators = {'**'} + + def match(self, func_ir, block, typemap, calltypes): + self.static_lhs = {} + self.static_rhs = {} + self.block = block + # Find binop expressions with a constant lhs or rhs + for expr in block.find_exprs(op='binop'): + try: + if (expr.fn in self.rhs_operators + and expr.static_rhs is ir.UNDEFINED): + self.static_rhs[expr] = func_ir.infer_constant(expr.rhs) + except errors.ConstantInferenceError: + continue + + return len(self.static_lhs) > 0 or len(self.static_rhs) > 0 + + def apply(self): + """ + Store constant arguments that were detected in match(). + """ + for expr, rhs in self.static_rhs.items(): + expr.static_rhs = rhs + return self.block diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/rewrites/static_getitem.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/rewrites/static_getitem.py new file mode 100644 index 000000000..56343d0ea --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/rewrites/static_getitem.py @@ -0,0 +1,175 @@ +from numba.core import errors, ir, types +from numba.core.rewrites import register_rewrite, Rewrite + + +@register_rewrite('before-inference') +class RewriteConstGetitems(Rewrite): + """ + Rewrite IR expressions of the kind `getitem(value=arr, index=$constXX)` + where `$constXX` is a known constant as + `static_getitem(value=arr, index=)`. + """ + + def match(self, func_ir, block, typemap, calltypes): + self.getitems = getitems = {} + self.block = block + # Detect all getitem expressions and find which ones can be + # rewritten + for expr in block.find_exprs(op='getitem'): + if expr.op == 'getitem': + try: + const = func_ir.infer_constant(expr.index) + except errors.ConstantInferenceError: + continue + getitems[expr] = const + + return len(getitems) > 0 + + def apply(self): + """ + Rewrite all matching getitems as static_getitems. + """ + new_block = self.block.copy() + new_block.clear() + for inst in self.block.body: + if isinstance(inst, ir.Assign): + expr = inst.value + if expr in self.getitems: + const = self.getitems[expr] + new_expr = ir.Expr.static_getitem(value=expr.value, + index=const, + index_var=expr.index, + loc=expr.loc) + inst = ir.Assign(value=new_expr, target=inst.target, + loc=inst.loc) + new_block.append(inst) + return new_block + + +@register_rewrite('after-inference') +class RewriteStringLiteralGetitems(Rewrite): + """ + Rewrite IR expressions of the kind `getitem(value=arr, index=$XX)` + where `$XX` is a StringLiteral value as + `static_getitem(value=arr, index=)`. + """ + + def match(self, func_ir, block, typemap, calltypes): + """ + Detect all getitem expressions and find which ones have + string literal indexes + """ + self.getitems = getitems = {} + self.block = block + self.calltypes = calltypes + for expr in block.find_exprs(op='getitem'): + if expr.op == 'getitem': + index_ty = typemap[expr.index.name] + if isinstance(index_ty, types.StringLiteral): + getitems[expr] = (expr.index, index_ty.literal_value) + + return len(getitems) > 0 + + def apply(self): + """ + Rewrite all matching getitems as static_getitems where the index + is the literal value of the string. + """ + new_block = ir.Block(self.block.scope, self.block.loc) + for inst in self.block.body: + if isinstance(inst, ir.Assign): + expr = inst.value + if expr in self.getitems: + const, lit_val = self.getitems[expr] + new_expr = ir.Expr.static_getitem(value=expr.value, + index=lit_val, + index_var=expr.index, + loc=expr.loc) + self.calltypes[new_expr] = self.calltypes[expr] + inst = ir.Assign(value=new_expr, target=inst.target, + loc=inst.loc) + new_block.append(inst) + return new_block + + +@register_rewrite('after-inference') +class RewriteStringLiteralSetitems(Rewrite): + """ + Rewrite IR expressions of the kind `setitem(value=arr, index=$XX, value=)` + where `$XX` is a StringLiteral value as + `static_setitem(value=arr, index=, value=)`. + """ + + def match(self, func_ir, block, typemap, calltypes): + """ + Detect all setitem expressions and find which ones have + string literal indexes + """ + self.setitems = setitems = {} + self.block = block + self.calltypes = calltypes + for inst in block.find_insts(ir.SetItem): + index_ty = typemap[inst.index.name] + if isinstance(index_ty, types.StringLiteral): + setitems[inst] = (inst.index, index_ty.literal_value) + + return len(setitems) > 0 + + def apply(self): + """ + Rewrite all matching setitems as static_setitems where the index + is the literal value of the string. + """ + new_block = ir.Block(self.block.scope, self.block.loc) + for inst in self.block.body: + if isinstance(inst, ir.SetItem): + if inst in self.setitems: + const, lit_val = self.setitems[inst] + new_inst = ir.StaticSetItem(target=inst.target, + index=lit_val, + index_var=inst.index, + value=inst.value, + loc=inst.loc) + self.calltypes[new_inst] = self.calltypes[inst] + inst = new_inst + new_block.append(inst) + return new_block + + +@register_rewrite('before-inference') +class RewriteConstSetitems(Rewrite): + """ + Rewrite IR statements of the kind `setitem(target=arr, index=$constXX, ...)` + where `$constXX` is a known constant as + `static_setitem(target=arr, index=, ...)`. + """ + + def match(self, func_ir, block, typemap, calltypes): + self.setitems = setitems = {} + self.block = block + # Detect all setitem statements and find which ones can be + # rewritten + for inst in block.find_insts(ir.SetItem): + try: + const = func_ir.infer_constant(inst.index) + except errors.ConstantInferenceError: + continue + setitems[inst] = const + + return len(setitems) > 0 + + def apply(self): + """ + Rewrite all matching setitems as static_setitems. + """ + new_block = self.block.copy() + new_block.clear() + for inst in self.block.body: + if inst in self.setitems: + const = self.setitems[inst] + new_inst = ir.StaticSetItem(inst.target, const, + inst.index, inst.value, inst.loc) + new_block.append(new_inst) + else: + new_block.append(inst) + return new_block diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/rewrites/static_raise.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/rewrites/static_raise.py new file mode 100644 index 000000000..61f7b5742 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/rewrites/static_raise.py @@ -0,0 +1,79 @@ +from numba.core import errors, ir +from numba.core.rewrites import register_rewrite, Rewrite + + +@register_rewrite('before-inference') +class RewriteConstRaises(Rewrite): + """ + Rewrite IR statements of the kind `raise(value)` + where `value` is the result of instantiating an exception with + constant arguments + into `static_raise(exception_type, constant args)`. + + This allows lowering in nopython mode, where one can't instantiate + exception instances from runtime data. + """ + + def _is_exception_type(self, const): + return isinstance(const, type) and issubclass(const, Exception) + + def _break_constant(self, const, loc): + """ + Break down constant exception. + """ + if isinstance(const, tuple): # it's a tuple(exception class, args) + if not self._is_exception_type(const[0]): + msg = "Encountered unsupported exception constant %r" + raise errors.UnsupportedError(msg % (const[0],), loc) + return const[0], tuple(const[1]) + elif self._is_exception_type(const): + return const, None + else: + if isinstance(const, str): + msg = ("Directly raising a string constant as an exception is " + "not supported.") + else: + msg = "Encountered unsupported constant type used for exception" + raise errors.UnsupportedError(msg, loc) + + def match(self, func_ir, block, typemap, calltypes): + self.raises = raises = {} + self.tryraises = tryraises = {} + self.block = block + # Detect all raise statements and find which ones can be + # rewritten + for inst in block.find_insts((ir.Raise, ir.TryRaise)): + if inst.exception is None: + # re-reraise + exc_type, exc_args = None, None + else: + # raise => find the definition site for + const = func_ir.infer_constant(inst.exception) + loc = inst.exception.loc + exc_type, exc_args = self._break_constant(const, loc) + if isinstance(inst, ir.Raise): + raises[inst] = exc_type, exc_args + elif isinstance(inst, ir.TryRaise): + tryraises[inst] = exc_type, exc_args + else: + raise ValueError('unexpected: {}'.format(type(inst))) + return (len(raises) + len(tryraises)) > 0 + + def apply(self): + """ + Rewrite all matching setitems as static_setitems. + """ + new_block = self.block.copy() + new_block.clear() + for inst in self.block.body: + if inst in self.raises: + exc_type, exc_args = self.raises[inst] + new_inst = ir.StaticRaise(exc_type, exc_args, inst.loc) + new_block.append(new_inst) + elif inst in self.tryraises: + exc_type, exc_args = self.tryraises[inst] + new_inst = ir.StaticTryRaise(exc_type, exc_args, inst.loc) + new_block.append(new_inst) + else: + new_block.append(inst) + return new_block diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/__init__.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/__init__.py new file mode 100644 index 000000000..590470031 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/__init__.py @@ -0,0 +1 @@ +from .nrt import rtsys diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/_nrt_python.c b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/_nrt_python.c new file mode 100644 index 000000000..b4c847e26 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/_nrt_python.c @@ -0,0 +1,459 @@ +/* + * Definition of NRT functions for marshalling from / to Python objects. + * This module is included by _nrt_pythonmod.c and by pycc-compiled modules. + */ + +#include "../../_pymodule.h" + +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#include +#include + +#include "../../_arraystruct.h" +#include "../../_numba_common.h" +#include "nrt.h" + + +/* + * Create a NRT MemInfo for data owned by a PyObject. + */ + +static void +pyobject_dtor(void *ptr, size_t size, void* info) { + PyGILState_STATE gstate; + PyObject *ownerobj = info; + + gstate = PyGILState_Ensure(); /* ensure the GIL */ + Py_DECREF(ownerobj); /* release the python object */ + PyGILState_Release(gstate); /* release the GIL */ +} + +NUMBA_EXPORT_FUNC(NRT_MemInfo *) +NRT_meminfo_new_from_pyobject(void *data, PyObject *ownerobj) { + size_t dummy_size = 0; + Py_INCREF(ownerobj); + return NRT_MemInfo_new(data, dummy_size, pyobject_dtor, ownerobj); +} + + +/* + * A Python object wrapping a NRT meminfo. + */ + +typedef struct { + PyObject_HEAD + NRT_MemInfo *meminfo; +} MemInfoObject; + + +static +int MemInfo_init(MemInfoObject *self, PyObject *args, PyObject *kwds) { + static char *keywords[] = {"ptr", NULL}; + PyObject *raw_ptr_obj; + void *raw_ptr; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O", keywords, &raw_ptr_obj)) { + return -1; + } + raw_ptr = PyLong_AsVoidPtr(raw_ptr_obj); + NRT_Debug(nrt_debug_print("MemInfo_init self=%p raw_ptr=%p\n", self, raw_ptr)); + + if(PyErr_Occurred()) return -1; + self->meminfo = (NRT_MemInfo *)raw_ptr; + assert (NRT_MemInfo_refcount(self->meminfo) > 0 && "0 refcount"); + return 0; +} + + +static int +MemInfo_getbuffer(PyObject *exporter, Py_buffer *view, int flags) { + Py_ssize_t len; + void *buf; + int readonly = 0; + + MemInfoObject *miobj = (MemInfoObject*)exporter; + NRT_MemInfo *mi = miobj->meminfo; + + buf = NRT_MemInfo_data(mi); + len = NRT_MemInfo_size(mi); + return PyBuffer_FillInfo(view, exporter, buf, len, readonly, flags); +} + +static PyBufferProcs MemInfo_bufferProcs = {MemInfo_getbuffer, NULL}; + +static +PyObject* +MemInfo_acquire(MemInfoObject *self) { + NRT_MemInfo_acquire(self->meminfo); + Py_RETURN_NONE; +} + +static +PyObject* +MemInfo_release(MemInfoObject *self) { + NRT_MemInfo_release(self->meminfo); + Py_RETURN_NONE; +} + +static +PyObject* +MemInfo_get_data(MemInfoObject *self, void *closure) { + return PyLong_FromVoidPtr(NRT_MemInfo_data(self->meminfo)); +} + +static +PyObject* +MemInfo_get_refcount(MemInfoObject *self, void *closure) { + size_t refct = NRT_MemInfo_refcount(self->meminfo); + if ( refct == (size_t)-1 ) { + PyErr_SetString(PyExc_ValueError, "invalid MemInfo"); + return NULL; + } + return PyLong_FromSize_t(refct); +} + +static +PyObject* +MemInfo_get_external_allocator(MemInfoObject *self, void *closure) { + void *p = NRT_MemInfo_external_allocator(self->meminfo); + return PyLong_FromVoidPtr(p); +} + +static +PyObject* +MemInfo_get_parent(MemInfoObject *self, void *closure) { + void *p = NRT_MemInfo_parent(self->meminfo); + if (p) { + Py_INCREF(p); + return (PyObject*)p; + } else { + Py_INCREF(Py_None); + return Py_None; + } +} + +static void +MemInfo_dealloc(MemInfoObject *self) +{ + NRT_MemInfo_release(self->meminfo); + Py_TYPE(self)->tp_free((PyObject*)self); +} + +static PyMethodDef MemInfo_methods[] = { + {"acquire", (PyCFunction)MemInfo_acquire, METH_NOARGS, + "Increment the reference count" + }, + {"release", (PyCFunction)MemInfo_release, METH_NOARGS, + "Decrement the reference count" + }, + {NULL} /* Sentinel */ +}; + + +static PyGetSetDef MemInfo_getsets[] = { + {"data", + (getter)MemInfo_get_data, NULL, + "Get the data pointer as an integer", + NULL}, + {"refcount", + (getter)MemInfo_get_refcount, NULL, + "Get the refcount", + NULL}, + {"external_allocator", + (getter)MemInfo_get_external_allocator, NULL, + "Get the external allocator", + NULL}, + {"parent", + (getter)MemInfo_get_parent, NULL, + NULL}, + {NULL} /* Sentinel */ +}; + + +static PyTypeObject MemInfoType = { + PyVarObject_HEAD_INIT(NULL, 0) + "_nrt_python._MemInfo", /* tp_name*/ + sizeof(MemInfoObject), /* tp_basicsize*/ + 0, /* tp_itemsize*/ + (destructor)MemInfo_dealloc, /* tp_dealloc*/ + 0, /* tp_print*/ + 0, /* tp_getattr*/ + 0, /* tp_setattr*/ + 0, /* tp_compare*/ + 0, /* tp_repr*/ + 0, /* tp_as_number*/ + 0, /* tp_as_sequence*/ + 0, /* tp_as_mapping*/ + 0, /* tp_hash */ + 0, /* tp_call*/ + 0, /* tp_str*/ + 0, /* tp_getattro*/ + 0, /* tp_setattro*/ + &MemInfo_bufferProcs, /* tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags*/ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + MemInfo_methods, /* tp_methods */ + 0, /* tp_members */ + MemInfo_getsets, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)MemInfo_init, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ +}; + + +/* +Return a MemInfo* as a MemInfoObject* +The NRT reference to the MemInfo is borrowed. +*/ +NUMBA_EXPORT_FUNC(MemInfoObject*) +NRT_meminfo_as_pyobject(NRT_MemInfo *meminfo) { + MemInfoObject *mi; + PyObject *addr; + + addr = PyLong_FromVoidPtr(meminfo); + if (!addr) return NULL; + mi = (MemInfoObject*)PyObject_CallFunctionObjArgs((PyObject *)&MemInfoType, addr, NULL); + Py_DECREF(addr); + if (!mi) return NULL; + return mi; +} + + +/* +Return a MemInfo* from a MemInfoObject* +A new reference is returned. +*/ +NUMBA_EXPORT_FUNC(NRT_MemInfo*) +NRT_meminfo_from_pyobject(MemInfoObject *miobj) { + NRT_MemInfo_acquire(miobj->meminfo); + return miobj->meminfo; +} + + +/* + * Array adaptor code + */ + +NUMBA_EXPORT_FUNC(int) +NRT_adapt_ndarray_from_python(PyObject *obj, arystruct_t* arystruct) { + PyArrayObject *ndary; + int i, ndim; + npy_intp *p; + void *data; + + if (!PyArray_Check(obj)) { + return -1; + } + + ndary = (PyArrayObject*)obj; + ndim = PyArray_NDIM(ndary); + data = PyArray_DATA(ndary); + + arystruct->meminfo = NRT_meminfo_new_from_pyobject((void*)data, obj); + arystruct->data = data; + arystruct->nitems = PyArray_SIZE(ndary); + arystruct->itemsize = PyArray_ITEMSIZE(ndary); + arystruct->parent = obj; + p = arystruct->shape_and_strides; + for (i = 0; i < ndim; i++, p++) { + *p = PyArray_DIM(ndary, i); + } + for (i = 0; i < ndim; i++, p++) { + *p = PyArray_STRIDE(ndary, i); + } + + NRT_Debug(nrt_debug_print("NRT_adapt_ndarray_from_python %p\n", + arystruct->meminfo)); + return 0; +} + +static +PyObject* try_to_return_parent(arystruct_t *arystruct, int ndim, + PyArray_Descr *descr) +{ + int i; + PyArrayObject *array = (PyArrayObject *)arystruct->parent; + + if (!PyArray_Check(arystruct->parent)) + /* Parent is a generic buffer-providing object */ + goto RETURN_ARRAY_COPY; + + if (PyArray_DATA(array) != arystruct->data) + goto RETURN_ARRAY_COPY; + + if (PyArray_NDIM(array) != ndim) + goto RETURN_ARRAY_COPY; + + if (PyObject_RichCompareBool((PyObject *) PyArray_DESCR(array), + (PyObject *) descr, Py_EQ) <= 0) + goto RETURN_ARRAY_COPY; + + for(i = 0; i < ndim; ++i) { + if (PyArray_DIMS(array)[i] != arystruct->shape_and_strides[i]) + goto RETURN_ARRAY_COPY; + if (PyArray_STRIDES(array)[i] != arystruct->shape_and_strides[ndim + i]) + goto RETURN_ARRAY_COPY; + } + + /* Yes, it is the same array + Return new reference */ + Py_INCREF((PyObject *)array); + return (PyObject *)array; + +RETURN_ARRAY_COPY: + return NULL; +} + +/** + * This function is used during the boxing of ndarray type. + * `arystruct` is a structure containing essential information from the + * unboxed array. + * `retty` is the subtype of the NumPy PyArray_Type this function should return. + * This is related to `numba.core.types.Array.box_type`. + * `ndim` is the number of dimension of the array. + * `writeable` corresponds to the "writable" flag in NumPy ndarray. + * `descr` is the NumPy data type description. + * + * This function was renamed in 0.52.0 to specify that it acquires references. + * It used to steal the reference of the arystruct. + * Refer to https://github.com/numba/numba/pull/6446 + */ +NUMBA_EXPORT_FUNC(PyObject *) +NRT_adapt_ndarray_to_python_acqref(arystruct_t* arystruct, PyTypeObject *retty, + int ndim, int writeable, PyArray_Descr *descr) +{ + PyArrayObject *array; + MemInfoObject *miobj = NULL; + PyObject *args; + npy_intp *shape, *strides; + int flags = 0; + + if (descr == NULL) { + PyErr_Format(PyExc_RuntimeError, + "In 'NRT_adapt_ndarray_to_python', 'descr' is NULL"); + return NULL; + } + + if (!NUMBA_PyArray_DescrCheck(descr)) { + PyErr_Format(PyExc_TypeError, + "expected dtype object, got '%.200s'", + Py_TYPE(descr)->tp_name); + return NULL; + } + + if (arystruct->parent) { + PyObject *obj = try_to_return_parent(arystruct, ndim, descr); + if (obj) { + return obj; + } + } + + if (arystruct->meminfo) { + /* wrap into MemInfoObject */ + miobj = PyObject_New(MemInfoObject, &MemInfoType); + args = PyTuple_New(1); + /* SETITEM steals reference */ + PyTuple_SET_ITEM(args, 0, PyLong_FromVoidPtr(arystruct->meminfo)); + NRT_Debug(nrt_debug_print("NRT_adapt_ndarray_to_python arystruct->meminfo=%p\n", arystruct->meminfo)); + /* Note: MemInfo_init() does not incref. This function steals the + * NRT reference, which we need to acquire. + */ + NRT_Debug(nrt_debug_print("NRT_adapt_ndarray_to_python_acqref created MemInfo=%p\n", miobj)); + NRT_MemInfo_acquire(arystruct->meminfo); + if (MemInfo_init(miobj, args, NULL)) { + NRT_Debug(nrt_debug_print("MemInfo_init failed.\n")); + return NULL; + } + Py_DECREF(args); + } + + shape = arystruct->shape_and_strides; + strides = shape + ndim; + Py_INCREF((PyObject *) descr); + array = (PyArrayObject *) PyArray_NewFromDescr(retty, descr, ndim, + shape, strides, arystruct->data, + flags, (PyObject *) miobj); + + if (array == NULL) + return NULL; + + /* Set writable */ +#if NPY_API_VERSION >= 0x00000007 + if (writeable) { + PyArray_ENABLEFLAGS(array, NPY_ARRAY_WRITEABLE); + } + else { + PyArray_CLEARFLAGS(array, NPY_ARRAY_WRITEABLE); + } +#else + if (writeable) { + array->flags |= NPY_WRITEABLE; + } + else { + array->flags &= ~NPY_WRITEABLE; + } +#endif + + if (miobj) { + /* Set the MemInfoObject as the base object */ +#if NPY_API_VERSION >= 0x00000007 + if (-1 == PyArray_SetBaseObject(array, + (PyObject *) miobj)) + { + Py_DECREF(array); + Py_DECREF(miobj); + return NULL; + } +#else + PyArray_BASE(array) = (PyObject *) miobj; +#endif + + } + return (PyObject *) array; +} + +NUMBA_EXPORT_FUNC(void) +NRT_adapt_buffer_from_python(Py_buffer *buf, arystruct_t *arystruct) +{ + int i; + npy_intp *p; + + if (buf->obj) { + /* Allocate new MemInfo only if the buffer has a parent */ + arystruct->meminfo = NRT_meminfo_new_from_pyobject((void*)buf->buf, buf->obj); + } + arystruct->data = buf->buf; + arystruct->itemsize = buf->itemsize; + arystruct->parent = buf->obj; + arystruct->nitems = 1; + p = arystruct->shape_and_strides; + for (i = 0; i < buf->ndim; i++, p++) { + *p = buf->shape[i]; + arystruct->nitems *= buf->shape[i]; + } + for (i = 0; i < buf->ndim; i++, p++) { + *p = buf->strides[i]; + } +} + + +/* Initialization subroutines for modules including this source file */ + +static int +init_nrt_python_module(PyObject *module) +{ + MemInfoType.tp_new = PyType_GenericNew; + if (PyType_Ready(&MemInfoType)) + return -1; + return 0; +} diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/_nrt_pythonmod.c b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/_nrt_pythonmod.c new file mode 100644 index 000000000..3552c7095 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/_nrt_pythonmod.c @@ -0,0 +1,207 @@ +#define NUMBA_EXPORT_FUNC(_rettype) static _rettype +#define NUMBA_EXPORT_DATA(_vartype) static _vartype + +#include "_nrt_python.c" + +static PyObject * +memsys_shutdown(PyObject *self, PyObject *args) { + NRT_MemSys_shutdown(); + Py_RETURN_NONE; +} + +static PyObject * +memsys_use_cpython_allocator(PyObject *self, PyObject *args) { + NRT_MemSys_set_allocator(PyMem_RawMalloc, + PyMem_RawRealloc, + PyMem_RawFree); + Py_RETURN_NONE; +} + +static PyObject * +memsys_set_atomic_inc_dec(PyObject *self, PyObject *args) { + PyObject *addr_inc_obj, *addr_dec_obj; + void *addr_inc, *addr_dec; + if (!PyArg_ParseTuple(args, "OO", &addr_inc_obj, &addr_dec_obj)) { + return NULL; + } + addr_inc = PyLong_AsVoidPtr(addr_inc_obj); + if(PyErr_Occurred()) return NULL; + addr_dec = PyLong_AsVoidPtr(addr_dec_obj); + if(PyErr_Occurred()) return NULL; + NRT_MemSys_set_atomic_inc_dec(addr_inc, addr_dec); + Py_RETURN_NONE; +} + +static PyObject * +memsys_set_atomic_cas(PyObject *self, PyObject *args) { + PyObject *addr_cas_obj; + void *addr_cas; + if (!PyArg_ParseTuple(args, "O", &addr_cas_obj)) { + return NULL; + } + addr_cas = PyLong_AsVoidPtr(addr_cas_obj); + if(PyErr_Occurred()) return NULL; + NRT_MemSys_set_atomic_cas(addr_cas); + Py_RETURN_NONE; +} + +static PyObject * +memsys_get_stats_alloc(PyObject *self, PyObject *args) { + return PyLong_FromSize_t(NRT_MemSys_get_stats_alloc()); +} + +static PyObject * +memsys_get_stats_free(PyObject *self, PyObject *args) { + return PyLong_FromSize_t(NRT_MemSys_get_stats_free()); +} + +static PyObject * +memsys_get_stats_mi_alloc(PyObject *self, PyObject *args) { + return PyLong_FromSize_t(NRT_MemSys_get_stats_mi_alloc()); +} + +static PyObject * +memsys_get_stats_mi_free(PyObject *self, PyObject *args) { + return PyLong_FromSize_t(NRT_MemSys_get_stats_mi_free()); +} + + +/* + * Create a new MemInfo with a owner PyObject + */ +static PyObject * +meminfo_new(PyObject *self, PyObject *args) { + PyObject *addr_data_obj; + void *addr_data; + PyObject *ownerobj; + NRT_MemInfo *mi; + if (!PyArg_ParseTuple(args, "OO", &addr_data_obj, &ownerobj)) { + return NULL; + } + addr_data = PyLong_AsVoidPtr(addr_data_obj); + if (PyErr_Occurred()) + return NULL; + mi = NRT_meminfo_new_from_pyobject(addr_data, ownerobj); + return PyLong_FromVoidPtr(mi); +} + +/* + * Create a new MemInfo with a new NRT allocation + */ +static PyObject * +meminfo_alloc(PyObject *self, PyObject *args) { + NRT_MemInfo *mi; + Py_ssize_t size; + if (!PyArg_ParseTuple(args, "n", &size)) { + return NULL; + } + mi = NRT_MemInfo_alloc(size); + return PyLong_FromVoidPtr(mi); +} + +/* + * Like meminfo_alloc but set memory to zero after allocation and before + * deallocation. + */ +static PyObject * +meminfo_alloc_safe(PyObject *self, PyObject *args) { + NRT_MemInfo *mi; + Py_ssize_t size; + if (!PyArg_ParseTuple(args, "n", &size)) { + return NULL; + } + mi = NRT_MemInfo_alloc_safe(size); + return PyLong_FromVoidPtr(mi); +} + +static PyMethodDef ext_methods[] = { +#define declmethod(func) { #func , ( PyCFunction )func , METH_VARARGS , NULL } +#define declmethod_noargs(func) { #func , ( PyCFunction )func , METH_NOARGS, NULL } + declmethod_noargs(memsys_use_cpython_allocator), + declmethod_noargs(memsys_shutdown), + declmethod(memsys_set_atomic_inc_dec), + declmethod(memsys_set_atomic_cas), + declmethod_noargs(memsys_get_stats_alloc), + declmethod_noargs(memsys_get_stats_free), + declmethod_noargs(memsys_get_stats_mi_alloc), + declmethod_noargs(memsys_get_stats_mi_free), + declmethod(meminfo_new), + declmethod(meminfo_alloc), + declmethod(meminfo_alloc_safe), + { NULL }, +#undef declmethod +}; + + + +static PyObject * +build_c_helpers_dict(void) +{ + PyObject *dct = PyDict_New(); + if (dct == NULL) + goto error; + +#define _declpointer(name, value) do { \ + PyObject *o = PyLong_FromVoidPtr(value); \ + if (o == NULL) goto error; \ + if (PyDict_SetItemString(dct, name, o)) { \ + Py_DECREF(o); \ + goto error; \ + } \ + Py_DECREF(o); \ +} while (0) + +#define declmethod(func) _declpointer(#func, &NRT_##func) +#define declmethod_internal(func) _declpointer(#func, &func) + +declmethod(adapt_ndarray_from_python); +declmethod(adapt_ndarray_to_python_acqref); +declmethod(adapt_buffer_from_python); +declmethod(meminfo_new_from_pyobject); +declmethod(meminfo_as_pyobject); +declmethod(meminfo_from_pyobject); +declmethod(MemInfo_alloc); +declmethod(MemInfo_alloc_safe); +declmethod(MemInfo_alloc_aligned); +declmethod(MemInfo_alloc_safe_aligned); +declmethod(MemInfo_alloc_safe_aligned_external); +declmethod_internal(_nrt_get_sample_external_allocator); +declmethod(MemInfo_alloc_dtor_safe); +declmethod(MemInfo_call_dtor); +declmethod(MemInfo_new_varsize); +declmethod(MemInfo_new_varsize_dtor); +declmethod(MemInfo_varsize_alloc); +declmethod(MemInfo_data); +declmethod(MemInfo_varsize_free); +declmethod(MemInfo_varsize_realloc); +declmethod(MemInfo_release); +declmethod(Allocate); +declmethod(Free); +declmethod(get_api); + + +#undef declmethod +#undef declmethod_internal + return dct; +error: + Py_XDECREF(dct); + return NULL; +} + +MOD_INIT(_nrt_python) { + PyObject *m; + MOD_DEF(m, "_nrt_python", "No docs", ext_methods) + if (m == NULL) + return MOD_ERROR_VAL; + import_array(); + NRT_MemSys_init(); + if (init_nrt_python_module(m)) + return MOD_ERROR_VAL; + + Py_INCREF(&MemInfoType); + PyModule_AddObject(m, "_MemInfo", (PyObject *) (&MemInfoType)); + + PyModule_AddObject(m, "c_helpers", build_c_helpers_dict()); + + return MOD_SUCCESS_VAL(m); +} diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/context.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/context.py new file mode 100644 index 000000000..9b73bbcf6 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/context.py @@ -0,0 +1,401 @@ +import functools + +from llvmlite import ir + +from numba.core import types, cgutils, errors + + +class NRTContext(object): + """ + An object providing access to NRT APIs in the lowering pass. + """ + + def __init__(self, context, enabled): + self._context = context + self._enabled = enabled + + def _require_nrt(self): + if not self._enabled: + raise errors.NumbaRuntimeError("NRT required but not enabled") + + def _check_null_result(func): + @functools.wraps(func) + def wrap(self, builder, *args, **kwargs): + memptr = func(self, builder, *args, **kwargs) + msg = "Allocation failed (probably too large)." + cgutils.guard_memory_error(self._context, builder, memptr, msg=msg) + return memptr + return wrap + + @_check_null_result + def allocate(self, builder, size): + """ + Low-level allocate a new memory area of `size` bytes. The result of the + call is checked and if it is NULL, i.e. allocation failed, then a + MemoryError is raised. + """ + return self.allocate_unchecked(builder, size) + + def allocate_unchecked(self, builder, size): + """ + Low-level allocate a new memory area of `size` bytes. Returns NULL to + indicate error/failure to allocate. + """ + self._require_nrt() + + mod = builder.module + fnty = ir.FunctionType(cgutils.voidptr_t, [cgutils.intp_t]) + fn = cgutils.get_or_insert_function(mod, fnty, "NRT_Allocate") + fn.return_value.add_attribute("noalias") + return builder.call(fn, [size]) + + def free(self, builder, ptr): + """ + Low-level free a memory area allocated with allocate(). + """ + self._require_nrt() + + mod = builder.module + fnty = ir.FunctionType(ir.VoidType(), [cgutils.voidptr_t]) + fn = cgutils.get_or_insert_function(mod, fnty, "NRT_Free") + return builder.call(fn, [ptr]) + + @_check_null_result + def meminfo_alloc(self, builder, size): + """ + Allocate a new MemInfo with a data payload of `size` bytes. + + A pointer to the MemInfo is returned. + + The result of the call is checked and if it is NULL, i.e. allocation + failed, then a MemoryError is raised. + """ + return self.meminfo_alloc_unchecked(builder, size) + + def meminfo_alloc_unchecked(self, builder, size): + """ + Allocate a new MemInfo with a data payload of `size` bytes. + + A pointer to the MemInfo is returned. + + Returns NULL to indicate error/failure to allocate. + """ + self._require_nrt() + + mod = builder.module + fnty = ir.FunctionType(cgutils.voidptr_t, [cgutils.intp_t]) + fn = cgutils.get_or_insert_function(mod, fnty, "NRT_MemInfo_alloc_safe") + fn.return_value.add_attribute("noalias") + return builder.call(fn, [size]) + + @_check_null_result + def meminfo_alloc_dtor(self, builder, size, dtor): + """ + Allocate a new MemInfo with a data payload of `size` bytes and a + destructor `dtor`. + + A pointer to the MemInfo is returned. + + The result of the call is checked and if it is NULL, i.e. allocation + failed, then a MemoryError is raised. + """ + return self.meminfo_alloc_dtor_unchecked(builder, size, dtor) + + def meminfo_alloc_dtor_unchecked(self, builder, size, dtor): + """ + Allocate a new MemInfo with a data payload of `size` bytes and a + destructor `dtor`. + + A pointer to the MemInfo is returned. + + Returns NULL to indicate error/failure to allocate. + """ + self._require_nrt() + + mod = builder.module + fnty = ir.FunctionType(cgutils.voidptr_t, + [cgutils.intp_t, cgutils.voidptr_t]) + fn = cgutils.get_or_insert_function(mod, fnty, + "NRT_MemInfo_alloc_dtor_safe") + fn.return_value.add_attribute("noalias") + return builder.call(fn, [size, + builder.bitcast(dtor, cgutils.voidptr_t)]) + + @_check_null_result + def meminfo_alloc_aligned(self, builder, size, align): + """ + Allocate a new MemInfo with an aligned data payload of `size` bytes. + The data pointer is aligned to `align` bytes. `align` can be either + a Python int or a LLVM uint32 value. + + A pointer to the MemInfo is returned. + + The result of the call is checked and if it is NULL, i.e. allocation + failed, then a MemoryError is raised. + """ + return self.meminfo_alloc_aligned_unchecked(builder, size, align) + + def meminfo_alloc_aligned_unchecked(self, builder, size, align): + """ + Allocate a new MemInfo with an aligned data payload of `size` bytes. + The data pointer is aligned to `align` bytes. `align` can be either + a Python int or a LLVM uint32 value. + + A pointer to the MemInfo is returned. + + Returns NULL to indicate error/failure to allocate. + """ + self._require_nrt() + + mod = builder.module + u32 = ir.IntType(32) + fnty = ir.FunctionType(cgutils.voidptr_t, [cgutils.intp_t, u32]) + fn = cgutils.get_or_insert_function(mod, fnty, + "NRT_MemInfo_alloc_safe_aligned") + fn.return_value.add_attribute("noalias") + if isinstance(align, int): + align = self._context.get_constant(types.uint32, align) + else: + assert align.type == u32, "align must be a uint32" + return builder.call(fn, [size, align]) + + @_check_null_result + def meminfo_new_varsize(self, builder, size): + """ + Allocate a MemInfo pointing to a variable-sized data area. The area + is separately allocated (i.e. two allocations are made) so that + re-allocating it doesn't change the MemInfo's address. + + A pointer to the MemInfo is returned. + + The result of the call is checked and if it is NULL, i.e. allocation + failed, then a MemoryError is raised. + """ + return self.meminfo_new_varsize_unchecked(builder, size) + + def meminfo_new_varsize_unchecked(self, builder, size): + """ + Allocate a MemInfo pointing to a variable-sized data area. The area + is separately allocated (i.e. two allocations are made) so that + re-allocating it doesn't change the MemInfo's address. + + A pointer to the MemInfo is returned. + + Returns NULL to indicate error/failure to allocate. + """ + self._require_nrt() + + mod = builder.module + fnty = ir.FunctionType(cgutils.voidptr_t, [cgutils.intp_t]) + fn = cgutils.get_or_insert_function(mod, fnty, + "NRT_MemInfo_new_varsize") + fn.return_value.add_attribute("noalias") + return builder.call(fn, [size]) + + @_check_null_result + def meminfo_new_varsize_dtor(self, builder, size, dtor): + """ + Like meminfo_new_varsize() but also set the destructor for + cleaning up references to objects inside the allocation. + + A pointer to the MemInfo is returned. + + The result of the call is checked and if it is NULL, i.e. allocation + failed, then a MemoryError is raised. + """ + return self.meminfo_new_varsize_dtor_unchecked(builder, size, dtor) + + def meminfo_new_varsize_dtor_unchecked(self, builder, size, dtor): + """ + Like meminfo_new_varsize() but also set the destructor for + cleaning up references to objects inside the allocation. + + A pointer to the MemInfo is returned. + + Returns NULL to indicate error/failure to allocate. + """ + self._require_nrt() + + mod = builder.module + fnty = ir.FunctionType(cgutils.voidptr_t, + [cgutils.intp_t, cgutils.voidptr_t]) + fn = cgutils.get_or_insert_function( + mod, fnty, "NRT_MemInfo_new_varsize_dtor") + return builder.call(fn, [size, dtor]) + + @_check_null_result + def meminfo_varsize_alloc(self, builder, meminfo, size): + """ + Allocate a new data area for a MemInfo created by meminfo_new_varsize(). + The new data pointer is returned, for convenience. + + Contrary to realloc(), this always allocates a new area and doesn't + copy the old data. This is useful if resizing a container needs + more than simply copying the data area (e.g. for hash tables). + + The old pointer will have to be freed with meminfo_varsize_free(). + + The result of the call is checked and if it is NULL, i.e. allocation + failed, then a MemoryError is raised. + """ + return self.meminfo_varsize_alloc_unchecked(builder, meminfo, size) + + def meminfo_varsize_alloc_unchecked(self, builder, meminfo, size): + """ + Allocate a new data area for a MemInfo created by meminfo_new_varsize(). + The new data pointer is returned, for convenience. + + Contrary to realloc(), this always allocates a new area and doesn't + copy the old data. This is useful if resizing a container needs + more than simply copying the data area (e.g. for hash tables). + + The old pointer will have to be freed with meminfo_varsize_free(). + + Returns NULL to indicate error/failure to allocate. + """ + return self._call_varsize_alloc(builder, meminfo, size, + "NRT_MemInfo_varsize_alloc") + + @_check_null_result + def meminfo_varsize_realloc(self, builder, meminfo, size): + """ + Reallocate a data area allocated by meminfo_new_varsize(). + The new data pointer is returned, for convenience. + + The result of the call is checked and if it is NULL, i.e. allocation + failed, then a MemoryError is raised. + """ + return self.meminfo_varsize_realloc_unchecked(builder, meminfo, size) + + def meminfo_varsize_realloc_unchecked(self, builder, meminfo, size): + """ + Reallocate a data area allocated by meminfo_new_varsize(). + The new data pointer is returned, for convenience. + + Returns NULL to indicate error/failure to allocate. + """ + return self._call_varsize_alloc(builder, meminfo, size, + "NRT_MemInfo_varsize_realloc") + + def meminfo_varsize_free(self, builder, meminfo, ptr): + """ + Free a memory area allocated for a NRT varsize object. + Note this does *not* free the NRT object itself! + """ + self._require_nrt() + + mod = builder.module + fnty = ir.FunctionType(ir.VoidType(), + [cgutils.voidptr_t, cgutils.voidptr_t]) + fn = cgutils.get_or_insert_function(mod, fnty, + "NRT_MemInfo_varsize_free") + return builder.call(fn, (meminfo, ptr)) + + def _call_varsize_alloc(self, builder, meminfo, size, funcname): + self._require_nrt() + + mod = builder.module + fnty = ir.FunctionType(cgutils.voidptr_t, + [cgutils.voidptr_t, cgutils.intp_t]) + fn = cgutils.get_or_insert_function(mod, fnty, funcname) + fn.return_value.add_attribute("noalias") + return builder.call(fn, [meminfo, size]) + + def meminfo_data(self, builder, meminfo): + """ + Given a MemInfo pointer, return a pointer to the allocated data + managed by it. This works for MemInfos allocated with all the + above methods. + """ + self._require_nrt() + + from numba.core.runtime.nrtdynmod import meminfo_data_ty + + mod = builder.module + fn = cgutils.get_or_insert_function(mod, meminfo_data_ty, + "NRT_MemInfo_data_fast") + return builder.call(fn, [meminfo]) + + def get_meminfos(self, builder, ty, val): + """Return a list of *(type, meminfo)* inside the given value. + """ + datamodel = self._context.data_model_manager[ty] + members = datamodel.traverse(builder) + + meminfos = [] + if datamodel.has_nrt_meminfo(): + mi = datamodel.get_nrt_meminfo(builder, val) + meminfos.append((ty, mi)) + + for mtyp, getter in members: + field = getter(val) + inner_meminfos = self.get_meminfos(builder, mtyp, field) + meminfos.extend(inner_meminfos) + return meminfos + + def _call_incref_decref(self, builder, typ, value, funcname): + """Call function of *funcname* on every meminfo found in *value*. + """ + self._require_nrt() + + from numba.core.runtime.nrtdynmod import incref_decref_ty + + meminfos = self.get_meminfos(builder, typ, value) + for _, mi in meminfos: + mod = builder.module + fn = cgutils.get_or_insert_function(mod, incref_decref_ty, + funcname) + # XXX "nonnull" causes a crash in test_dyn_array: can this + # function be called with a NULL pointer? + fn.args[0].add_attribute("noalias") + fn.args[0].add_attribute("nocapture") + builder.call(fn, [mi]) + + def incref(self, builder, typ, value): + """ + Recursively incref the given *value* and its members. + """ + self._call_incref_decref(builder, typ, value, "NRT_incref") + + def decref(self, builder, typ, value): + """ + Recursively decref the given *value* and its members. + """ + self._call_incref_decref(builder, typ, value, "NRT_decref") + + def get_nrt_api(self, builder): + """Calls NRT_get_api(), which returns the NRT API function table. + """ + self._require_nrt() + + fnty = ir.FunctionType(cgutils.voidptr_t, ()) + mod = builder.module + fn = cgutils.get_or_insert_function(mod, fnty, "NRT_get_api") + return builder.call(fn, ()) + + def eh_check(self, builder): + """Check if an exception is raised + """ + ctx = self._context + cc = ctx.call_conv + # Inspect the excinfo argument on the function + trystatus = cc.check_try_status(builder) + excinfo = trystatus.excinfo + has_raised = builder.not_(cgutils.is_null(builder, excinfo)) + with builder.if_then(has_raised): + self.eh_end_try(builder) + return has_raised + + def eh_try(self, builder): + """Begin a try-block. + """ + ctx = self._context + cc = ctx.call_conv + cc.set_try_status(builder) + + def eh_end_try(self, builder): + """End a try-block + """ + ctx = self._context + cc = ctx.call_conv + cc.unset_try_status(builder) diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/nrt.c b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/nrt.c new file mode 100644 index 000000000..3f67182e4 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/nrt.c @@ -0,0 +1,595 @@ +#include +#include /* for memset */ +#include "nrt.h" +#include "assert.h" + +#if !defined MIN +#define MIN(a, b) ((a) < (b)) ? (a) : (b) +#endif + + +typedef int (*atomic_meminfo_cas_func)(void **ptr, void *cmp, + void *repl, void **oldptr); + + +/* NOTE: if changing the layout, please update numba.core.runtime.atomicops */ +struct MemInfo { + size_t refct; + NRT_dtor_function dtor; + void *dtor_info; + void *data; + size_t size; /* only used for NRT allocated memory */ + NRT_ExternalAllocator *external_allocator; +}; + + +/* + * Misc helpers. + */ + +static void nrt_fatal_error(const char *msg) +{ + fprintf(stderr, "Fatal Numba error: %s\n", msg); + fflush(stderr); /* it helps in Windows debug build */ + +#if defined(MS_WINDOWS) && defined(_DEBUG) + DebugBreak(); +#endif + abort(); +} + +/* + * Global resources. + */ + +struct MemSys { + /* Atomic increment and decrement function */ + NRT_atomic_inc_dec_func atomic_inc, atomic_dec; + /* Atomic CAS */ + atomic_meminfo_cas_func atomic_cas; + /* Shutdown flag */ + int shutting; + /* Stats */ + size_t stats_alloc, stats_free, stats_mi_alloc, stats_mi_free; + /* System allocation functions */ + struct { + NRT_malloc_func malloc; + NRT_realloc_func realloc; + NRT_free_func free; + } allocator; +}; + +/* The Memory System object */ +static NRT_MemSys TheMSys; + + +void NRT_MemSys_init(void) { + memset(&TheMSys, 0, sizeof(NRT_MemSys)); + /* Bind to libc allocator */ + TheMSys.allocator.malloc = malloc; + TheMSys.allocator.realloc = realloc; + TheMSys.allocator.free = free; +} + +void NRT_MemSys_shutdown(void) { + TheMSys.shutting = 1; + /* Revert to use our non-atomic stub for all atomic operations + because the JIT-ed version will be removed. + Since we are at interpreter shutdown, + it cannot be running multiple threads anymore. */ + NRT_MemSys_set_atomic_inc_dec_stub(); + NRT_MemSys_set_atomic_cas_stub(); +} + +void NRT_MemSys_set_allocator(NRT_malloc_func malloc_func, + NRT_realloc_func realloc_func, + NRT_free_func free_func) +{ + if ((malloc_func != TheMSys.allocator.malloc || + realloc_func != TheMSys.allocator.realloc || + free_func != TheMSys.allocator.free) && + (TheMSys.stats_alloc != TheMSys.stats_free || + TheMSys.stats_mi_alloc != TheMSys.stats_mi_free)) { + nrt_fatal_error("cannot change allocator while blocks are allocated"); + } + TheMSys.allocator.malloc = malloc_func; + TheMSys.allocator.realloc = realloc_func; + TheMSys.allocator.free = free_func; +} + +void NRT_MemSys_set_atomic_inc_dec(NRT_atomic_inc_dec_func inc, + NRT_atomic_inc_dec_func dec) +{ + TheMSys.atomic_inc = inc; + TheMSys.atomic_dec = dec; +} + +void NRT_MemSys_set_atomic_cas(NRT_atomic_cas_func cas) { + TheMSys.atomic_cas = (atomic_meminfo_cas_func) cas; +} + +size_t NRT_MemSys_get_stats_alloc() { + return TheMSys.stats_alloc; +} + +size_t NRT_MemSys_get_stats_free() { + return TheMSys.stats_free; +} + +size_t NRT_MemSys_get_stats_mi_alloc() { + return TheMSys.stats_mi_alloc; +} + +size_t NRT_MemSys_get_stats_mi_free() { + return TheMSys.stats_mi_free; +} + +static +size_t nrt_testing_atomic_inc(size_t *ptr){ + /* non atomic */ + size_t out = *ptr; + out += 1; + *ptr = out; + return out; +} + +static +size_t nrt_testing_atomic_dec(size_t *ptr){ + /* non atomic */ + size_t out = *ptr; + out -= 1; + *ptr = out; + return out; +} + +static +int nrt_testing_atomic_cas(void* volatile *ptr, void *cmp, void *val, + void * *oldptr){ + /* non atomic */ + void *old = *ptr; + *oldptr = old; + if (old == cmp) { + *ptr = val; + return 1; + } + return 0; + +} + +void NRT_MemSys_set_atomic_inc_dec_stub(void){ + NRT_MemSys_set_atomic_inc_dec(nrt_testing_atomic_inc, + nrt_testing_atomic_dec); +} + +void NRT_MemSys_set_atomic_cas_stub(void) { + NRT_MemSys_set_atomic_cas(nrt_testing_atomic_cas); +} + + +/* + * The MemInfo structure. + */ + +void NRT_MemInfo_init(NRT_MemInfo *mi,void *data, size_t size, + NRT_dtor_function dtor, void *dtor_info, + NRT_ExternalAllocator *external_allocator) +{ + mi->refct = 1; /* starts with 1 refct */ + mi->dtor = dtor; + mi->dtor_info = dtor_info; + mi->data = data; + mi->size = size; + mi->external_allocator = external_allocator; + NRT_Debug(nrt_debug_print("NRT_MemInfo_init mi=%p external_allocator=%p\n", mi, external_allocator)); + /* Update stats */ + TheMSys.atomic_inc(&TheMSys.stats_mi_alloc); +} + +NRT_MemInfo *NRT_MemInfo_new(void *data, size_t size, + NRT_dtor_function dtor, void *dtor_info) +{ + NRT_MemInfo *mi = NRT_Allocate(sizeof(NRT_MemInfo)); + if (mi != NULL) { + NRT_Debug(nrt_debug_print("NRT_MemInfo_new mi=%p\n", mi)); + NRT_MemInfo_init(mi, data, size, dtor, dtor_info, NULL); + } + return mi; +} + +size_t NRT_MemInfo_refcount(NRT_MemInfo *mi) { + /* Should never returns 0 for a valid MemInfo */ + if (mi && mi->data) + return mi->refct; + else{ + return (size_t)-1; + } +} + +static +void nrt_internal_dtor_safe(void *ptr, size_t size, void *info) { + NRT_Debug(nrt_debug_print("nrt_internal_dtor_safe %p, %p\n", ptr, info)); + /* See NRT_MemInfo_alloc_safe() */ + memset(ptr, 0xDE, MIN(size, 256)); +} + +static +void *nrt_allocate_meminfo_and_data(size_t size, NRT_MemInfo **mi_out, NRT_ExternalAllocator *allocator) { + NRT_MemInfo *mi = NULL; + NRT_Debug(nrt_debug_print("nrt_allocate_meminfo_and_data %p\n", allocator)); + char *base = NRT_Allocate_External(sizeof(NRT_MemInfo) + size, allocator); + if (base == NULL) { + *mi_out = NULL; /* set meminfo to NULL as allocation failed */ + return NULL; /* return early as allocation failed */ + } + mi = (NRT_MemInfo *) base; + *mi_out = mi; + return base + sizeof(NRT_MemInfo); +} + + +static +void nrt_internal_custom_dtor_safe(void *ptr, size_t size, void *info) { + NRT_dtor_function dtor = info; + NRT_Debug(nrt_debug_print("nrt_internal_custom_dtor_safe %p, %p\n", + ptr, info)); + if (dtor) { + dtor(ptr, size, NULL); + } + + nrt_internal_dtor_safe(ptr, size, NULL); +} + + +NRT_MemInfo *NRT_MemInfo_alloc(size_t size) { + NRT_MemInfo *mi = NULL; + void *data = nrt_allocate_meminfo_and_data(size, &mi, NULL); + if (data == NULL) { + return NULL; /* return early as allocation failed */ + } + NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc %p\n", data)); + NRT_MemInfo_init(mi, data, size, NULL, NULL, NULL); + return mi; +} + +NRT_MemInfo *NRT_MemInfo_alloc_external(size_t size, NRT_ExternalAllocator *allocator) { + NRT_MemInfo *mi = NULL; + void *data = nrt_allocate_meminfo_and_data(size, &mi, allocator); + if (data == NULL) { + return NULL; /* return early as allocation failed */ + } + NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc %p\n", data)); + NRT_MemInfo_init(mi, data, size, NULL, NULL, allocator); + return mi; +} + +NRT_MemInfo *NRT_MemInfo_alloc_safe(size_t size) { + return NRT_MemInfo_alloc_dtor_safe(size, NULL); +} + +NRT_MemInfo* NRT_MemInfo_alloc_dtor_safe(size_t size, NRT_dtor_function dtor) { + NRT_MemInfo *mi = NULL; + void *data = nrt_allocate_meminfo_and_data(size, &mi, NULL); + if (data == NULL) { + return NULL; /* return early as allocation failed */ + } + /* Only fill up a couple cachelines with debug markers, to minimize + overhead. */ + memset(data, 0xCB, MIN(size, 256)); + NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc_dtor_safe %p %zu\n", data, size)); + NRT_MemInfo_init(mi, data, size, nrt_internal_custom_dtor_safe, dtor, NULL); + return mi; +} + + +static +void *nrt_allocate_meminfo_and_data_align(size_t size, unsigned align, + NRT_MemInfo **mi, NRT_ExternalAllocator *allocator) +{ + size_t offset = 0, intptr = 0, remainder = 0; + NRT_Debug(nrt_debug_print("nrt_allocate_meminfo_and_data_align %p\n", allocator)); + char *base = nrt_allocate_meminfo_and_data(size + 2 * align, mi, allocator); + if (base == NULL) { + return NULL; /* return early as allocation failed */ + } + intptr = (size_t) base; + /* See if we are aligned */ + remainder = intptr % align; + if (remainder == 0){ /* Yes */ + offset = 0; + } else { /* No, move forward `offset` bytes */ + offset = align - remainder; + } + return base + offset; +} + +NRT_MemInfo *NRT_MemInfo_alloc_aligned(size_t size, unsigned align) { + NRT_MemInfo *mi = NULL; + void *data = nrt_allocate_meminfo_and_data_align(size, align, &mi, NULL); + if (data == NULL) { + return NULL; /* return early as allocation failed */ + } + NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc_aligned %p\n", data)); + NRT_MemInfo_init(mi, data, size, NULL, NULL, NULL); + return mi; +} + +NRT_MemInfo *NRT_MemInfo_alloc_safe_aligned(size_t size, unsigned align) { + NRT_MemInfo *mi = NULL; + void *data = nrt_allocate_meminfo_and_data_align(size, align, &mi, NULL); + if (data == NULL) { + return NULL; /* return early as allocation failed */ + } + /* Only fill up a couple cachelines with debug markers, to minimize + overhead. */ + memset(data, 0xCB, MIN(size, 256)); + NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc_safe_aligned %p %zu\n", + data, size)); + NRT_MemInfo_init(mi, data, size, nrt_internal_dtor_safe, (void*)size, NULL); + return mi; +} + +NRT_MemInfo *NRT_MemInfo_alloc_safe_aligned_external(size_t size, unsigned align, NRT_ExternalAllocator *allocator) { + NRT_MemInfo *mi = NULL; + NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc_safe_aligned_external %p\n", allocator)); + void *data = nrt_allocate_meminfo_and_data_align(size, align, &mi, allocator); + if (data == NULL) { + return NULL; /* return early as allocation failed */ + } + /* Only fill up a couple cachelines with debug markers, to minimize + overhead. */ + memset(data, 0xCB, MIN(size, 256)); + NRT_Debug(nrt_debug_print("NRT_MemInfo_alloc_safe_aligned %p %zu\n", + data, size)); + NRT_MemInfo_init(mi, data, size, nrt_internal_dtor_safe, (void*)size, allocator); + return mi; +} + +void NRT_dealloc(NRT_MemInfo *mi) { + NRT_Debug(nrt_debug_print("NRT_dealloc meminfo: %p external_allocator: %p\n", mi, mi->external_allocator)); + if (mi->external_allocator) { + mi->external_allocator->free(mi, mi->external_allocator->opaque_data); + TheMSys.atomic_inc(&TheMSys.stats_free); + } else { + NRT_Free(mi); + } +} + +void NRT_MemInfo_destroy(NRT_MemInfo *mi) { + NRT_dealloc(mi); + TheMSys.atomic_inc(&TheMSys.stats_mi_free); +} + +void NRT_MemInfo_acquire(NRT_MemInfo *mi) { + NRT_Debug(nrt_debug_print("NRT_MemInfo_acquire %p refct=%zu\n", mi, + mi->refct)); + assert(mi->refct > 0 && "RefCt cannot be zero"); + TheMSys.atomic_inc(&mi->refct); +} + +void NRT_MemInfo_call_dtor(NRT_MemInfo *mi) { + NRT_Debug(nrt_debug_print("NRT_MemInfo_call_dtor %p\n", mi)); + if (mi->dtor && !TheMSys.shutting) + /* We have a destructor and the system is not shutting down */ + mi->dtor(mi->data, mi->size, mi->dtor_info); + /* Clear and release MemInfo */ + NRT_MemInfo_destroy(mi); +} + +void NRT_MemInfo_release(NRT_MemInfo *mi) { + NRT_Debug(nrt_debug_print("NRT_MemInfo_release %p refct=%zu\n", mi, + mi->refct)); + assert (mi->refct > 0 && "RefCt cannot be 0"); + /* RefCt drop to zero */ + if (TheMSys.atomic_dec(&mi->refct) == 0) { + NRT_MemInfo_call_dtor(mi); + } +} + +void* NRT_MemInfo_data(NRT_MemInfo* mi) { + return mi->data; +} + +size_t NRT_MemInfo_size(NRT_MemInfo* mi) { + return mi->size; +} + +void * NRT_MemInfo_external_allocator(NRT_MemInfo *mi) { + NRT_Debug(nrt_debug_print("NRT_MemInfo_external_allocator meminfo: %p external_allocator: %p\n", mi, mi->external_allocator)); + return mi->external_allocator; +} + +void *NRT_MemInfo_parent(NRT_MemInfo *mi) { + return mi->dtor_info; +} + +void NRT_MemInfo_dump(NRT_MemInfo *mi, FILE *out) { + fprintf(out, "MemInfo %p refcount %zu\n", mi, mi->refct); +} + +/* + * Resizable buffer API. + */ + +static void +nrt_varsize_dtor(void *ptr, size_t size, void *info) { + NRT_Debug(nrt_debug_print("nrt_varsize_dtor %p\n", ptr)); + if (info) { + /* call element dtor */ + typedef void dtor_fn_t(void *ptr); + dtor_fn_t *dtor = info; + dtor(ptr); + } + NRT_Free(ptr); +} + +NRT_MemInfo *NRT_MemInfo_new_varsize(size_t size) +{ + NRT_MemInfo *mi = NULL; + void *data = NRT_Allocate(size); + if (data == NULL) { + return NULL; /* return early as allocation failed */ + } + + mi = NRT_MemInfo_new(data, size, nrt_varsize_dtor, NULL); + NRT_Debug(nrt_debug_print("NRT_MemInfo_new_varsize size=%zu " + "-> meminfo=%p, data=%p\n", size, mi, data)); + return mi; +} + +NRT_MemInfo *NRT_MemInfo_new_varsize_dtor(size_t size, NRT_dtor_function dtor) { + NRT_MemInfo *mi = NRT_MemInfo_new_varsize(size); + if (mi) { + mi->dtor_info = dtor; + } + return mi; +} + +void *NRT_MemInfo_varsize_alloc(NRT_MemInfo *mi, size_t size) +{ + if (mi->dtor != nrt_varsize_dtor) { + nrt_fatal_error("ERROR: NRT_MemInfo_varsize_alloc called " + "with a non varsize-allocated meminfo"); + return NULL; /* unreachable */ + } + mi->data = NRT_Allocate(size); + if (mi->data == NULL) + return NULL; + mi->size = size; + NRT_Debug(nrt_debug_print("NRT_MemInfo_varsize_alloc %p size=%zu " + "-> data=%p\n", mi, size, mi->data)); + return mi->data; +} + +void *NRT_MemInfo_varsize_realloc(NRT_MemInfo *mi, size_t size) +{ + if (mi->dtor != nrt_varsize_dtor) { + nrt_fatal_error("ERROR: NRT_MemInfo_varsize_realloc called " + "with a non varsize-allocated meminfo"); + return NULL; /* unreachable */ + } + mi->data = NRT_Reallocate(mi->data, size); + if (mi->data == NULL) + return NULL; + mi->size = size; + NRT_Debug(nrt_debug_print("NRT_MemInfo_varsize_realloc %p size=%zu " + "-> data=%p\n", mi, size, mi->data)); + return mi->data; +} + +void NRT_MemInfo_varsize_free(NRT_MemInfo *mi, void *ptr) +{ + NRT_Free(ptr); + if (ptr == mi->data) + mi->data = NULL; +} + +/* + * Low-level allocation wrappers. + */ + +void* NRT_Allocate(size_t size) { + return NRT_Allocate_External(size, NULL); +} + +void* NRT_Allocate_External(size_t size, NRT_ExternalAllocator *allocator) { + void *ptr = NULL; + if (allocator) { + ptr = allocator->malloc(size, allocator->opaque_data); + NRT_Debug(nrt_debug_print("NRT_Allocate_External custom bytes=%zu ptr=%p\n", size, ptr)); + } else { + ptr = TheMSys.allocator.malloc(size); + NRT_Debug(nrt_debug_print("NRT_Allocate_External bytes=%zu ptr=%p\n", size, ptr)); + } + TheMSys.atomic_inc(&TheMSys.stats_alloc); + return ptr; +} + +void *NRT_Reallocate(void *ptr, size_t size) { + void *new_ptr = TheMSys.allocator.realloc(ptr, size); + NRT_Debug(nrt_debug_print("NRT_Reallocate bytes=%zu ptr=%p -> %p\n", + size, ptr, new_ptr)); + return new_ptr; +} + +void NRT_Free(void *ptr) { + NRT_Debug(nrt_debug_print("NRT_Free %p\n", ptr)); + TheMSys.allocator.free(ptr); + TheMSys.atomic_inc(&TheMSys.stats_free); +} + +/* + * Sample external allocator implementation for internal testing. + */ + +static int sample_external_opaque_data = 0xabacad; + +static +void* sample_external_malloc(size_t size, void* opaque_data) { + if (opaque_data != &sample_external_opaque_data) return NULL; + return TheMSys.allocator.malloc(size); +} + +static +void* sample_external_realloc(void *ptr, size_t new_size, void *opaque_data) { + if (opaque_data != &sample_external_opaque_data) return NULL; + return TheMSys.allocator.realloc(ptr, new_size); +} + +static +void sample_external_free(void *ptr, void* opaque_data) { + TheMSys.allocator.free(ptr); +} + +static NRT_ExternalAllocator sample_external_allocator = { + // malloc + sample_external_malloc, + // realloc + sample_external_realloc, + // free + sample_external_free, + // opaque_data + &sample_external_opaque_data +}; + +NRT_ExternalAllocator* _nrt_get_sample_external_allocator() { + return &sample_external_allocator; +} + +/* + * Debugging printf function used internally + */ +void nrt_debug_print(char *fmt, ...) { + va_list args; + + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); +} + + +static +void nrt_manage_memory_dtor(void *data, size_t size, void *info) { + NRT_managed_dtor* dtor = (NRT_managed_dtor*)info; + dtor(data); +} + +static +NRT_MemInfo* nrt_manage_memory(void *data, NRT_managed_dtor dtor) { + return NRT_MemInfo_new(data, 0, nrt_manage_memory_dtor, dtor); +} + + +static const +NRT_api_functions nrt_functions_table = { + NRT_MemInfo_alloc, + NRT_MemInfo_alloc_external, + nrt_manage_memory, + NRT_MemInfo_acquire, + NRT_MemInfo_release, + NRT_MemInfo_data +}; + + +const NRT_api_functions* NRT_get_api(void) { + return &nrt_functions_table; +} diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/nrt.h b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/nrt.h new file mode 100644 index 000000000..2bfc9033e --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/nrt.h @@ -0,0 +1,272 @@ +/* +All functions described here are threadsafe. +*/ + +#ifndef NUMBA_NRT_H_ +#define NUMBA_NRT_H_ + + +#include +#include +#include "../../_numba_common.h" + +#include "nrt_external.h" + +/* Debugging facilities - enabled at compile-time */ +/* #undef NDEBUG */ +#if 0 +# define NRT_Debug(X) {X; fflush(stdout); } +#else +# define NRT_Debug(X) if (0) { X; } +#endif + +/* TypeDefs */ +typedef void (*NRT_dtor_function)(void *ptr, size_t size, void *info); +typedef void (*NRT_dealloc_func)(void *ptr, void *dealloc_info); +typedef size_t (*NRT_atomic_inc_dec_func)(size_t *ptr); +typedef int (*NRT_atomic_cas_func)(void * volatile *ptr, void *cmp, void *repl, + void **oldptr); + +typedef struct MemSys NRT_MemSys; + +typedef void *(*NRT_malloc_func)(size_t size); +typedef void *(*NRT_realloc_func)(void *ptr, size_t new_size); +typedef void (*NRT_free_func)(void *ptr); + +/* Memory System API */ + +/* Initialize the memory system */ +VISIBILITY_HIDDEN +void NRT_MemSys_init(void); + +/* Shutdown the memory system */ +VISIBILITY_HIDDEN +void NRT_MemSys_shutdown(void); + +/* + * Register the system allocation functions + */ +VISIBILITY_HIDDEN +void NRT_MemSys_set_allocator(NRT_malloc_func, NRT_realloc_func, NRT_free_func); + +/* + * Register the atomic increment and decrement functions + */ +VISIBILITY_HIDDEN +void NRT_MemSys_set_atomic_inc_dec(NRT_atomic_inc_dec_func inc, + NRT_atomic_inc_dec_func dec); + + +/* + * Register the atomic compare and swap function + */ +VISIBILITY_HIDDEN +void NRT_MemSys_set_atomic_cas(NRT_atomic_cas_func cas); + +/* + * Register a non-atomic STUB for increment and decrement + */ +VISIBILITY_HIDDEN +void NRT_MemSys_set_atomic_inc_dec_stub(void); + +/* + * Register a non-atomic STUB for compare and swap + */ +VISIBILITY_HIDDEN +void NRT_MemSys_set_atomic_cas_stub(void); + +/* + * The following functions get internal statistics of the memory subsystem. + */ +VISIBILITY_HIDDEN +size_t NRT_MemSys_get_stats_alloc(void); +VISIBILITY_HIDDEN +size_t NRT_MemSys_get_stats_free(void); +VISIBILITY_HIDDEN +size_t NRT_MemSys_get_stats_mi_alloc(void); +VISIBILITY_HIDDEN +size_t NRT_MemSys_get_stats_mi_free(void); + +/* Memory Info API */ + +/* Create a new MemInfo for external memory + * + * data: data pointer being tracked + * dtor: destructor to execute + * dtor_info: additional information to pass to the destructor + */ +VISIBILITY_HIDDEN +NRT_MemInfo* NRT_MemInfo_new(void *data, size_t size, + NRT_dtor_function dtor, void *dtor_info); + +/* + * The `external_allocator` is for experimental API to customize the allocator. + * Set to NULL to use the default builtin allocator. + */ +VISIBILITY_HIDDEN +void NRT_MemInfo_init(NRT_MemInfo *mi, void *data, size_t size, + NRT_dtor_function dtor, void *dtor_info, + NRT_ExternalAllocator *external_allocator); + +/* + * Returns the refcount of a MemInfo or (size_t)-1 if error. + */ +VISIBILITY_HIDDEN +size_t NRT_MemInfo_refcount(NRT_MemInfo *mi); + +/* + * Allocate memory of `size` bytes and return a pointer to a MemInfo structure + * that describes the allocation + */ +VISIBILITY_HIDDEN +NRT_MemInfo *NRT_MemInfo_alloc(size_t size); + +NRT_MemInfo *NRT_MemInfo_alloc_external(size_t size, NRT_ExternalAllocator *allocator); + +/* + * The "safe" NRT_MemInfo_alloc performs additional steps to help debug + * memory errors. + * It is guaranteed to: + * - zero-fill to the memory region after allocation and before deallocation. + * - may do more in the future + */ +VISIBILITY_HIDDEN +NRT_MemInfo *NRT_MemInfo_alloc_safe(size_t size); + +/* + * Similar to NRT_MemInfo_alloc_safe but with a custom dtor. + */ +VISIBILITY_HIDDEN +NRT_MemInfo* NRT_MemInfo_alloc_dtor_safe(size_t size, NRT_dtor_function dtor); + +/* + * Aligned versions of the NRT_MemInfo_alloc and NRT_MemInfo_alloc_safe. + * These take an additional argument `align` for number of bytes to align to. + */ +VISIBILITY_HIDDEN +NRT_MemInfo *NRT_MemInfo_alloc_aligned(size_t size, unsigned align); +VISIBILITY_HIDDEN +NRT_MemInfo *NRT_MemInfo_alloc_safe_aligned(size_t size, unsigned align); + +/* + * Experimental. + * A variation to use an external allocator. + */ +NRT_MemInfo *NRT_MemInfo_alloc_safe_aligned_external(size_t size, unsigned align, NRT_ExternalAllocator *allocator); + +/* + * Internal API. + * Release a MemInfo. Calls NRT_MemSys_insert_meminfo. + */ +VISIBILITY_HIDDEN +void NRT_MemInfo_destroy(NRT_MemInfo *mi); + +/* + * Acquire a reference to a MemInfo + */ +VISIBILITY_HIDDEN +void NRT_MemInfo_acquire(NRT_MemInfo* mi); + +/* + * Release a reference to a MemInfo + */ +VISIBILITY_HIDDEN +void NRT_MemInfo_release(NRT_MemInfo* mi); + +/* + * Internal/Compiler API. + * Invoke the registered destructor of a MemInfo. + */ +VISIBILITY_HIDDEN +void NRT_MemInfo_call_dtor(NRT_MemInfo *mi); + +/* + * Returns the data pointer + */ +VISIBILITY_HIDDEN +void* NRT_MemInfo_data(NRT_MemInfo* mi); + +/* + * Returns the allocated size + */ +VISIBILITY_HIDDEN +size_t NRT_MemInfo_size(NRT_MemInfo* mi); + + +/* + * Experimental. + * Returns the external allocator + */ +VISIBILITY_HIDDEN +void* NRT_MemInfo_external_allocator(NRT_MemInfo* mi); + +/* + * Returns the parent MemInfo + */ +VISIBILITY_HIDDEN +void* NRT_MemInfo_parent(NRT_MemInfo* mi); + + +/* + * NRT API for resizable buffers. + */ +VISIBILITY_HIDDEN +NRT_MemInfo *NRT_MemInfo_new_varsize(size_t size); +VISIBILITY_HIDDEN +NRT_MemInfo *NRT_MemInfo_new_varsize_dtor(size_t size, NRT_dtor_function dtor); +VISIBILITY_HIDDEN +void *NRT_MemInfo_varsize_alloc(NRT_MemInfo *mi, size_t size); +VISIBILITY_HIDDEN +void *NRT_MemInfo_varsize_realloc(NRT_MemInfo *mi, size_t size); +VISIBILITY_HIDDEN +void NRT_MemInfo_varsize_free(NRT_MemInfo *mi, void *ptr); + +/* + * Print debug info to FILE + */ +VISIBILITY_HIDDEN +void NRT_MemInfo_dump(NRT_MemInfo *mi, FILE *out); + + +/* Low-level allocation wrappers. */ + +/* + * Allocate memory of `size` bytes. + */ +VISIBILITY_HIDDEN void* NRT_Allocate(size_t size); + +/* + * Experimental + * + * An alternative allocator that allows using an external allocator. + */ +VISIBILITY_HIDDEN void* NRT_Allocate_External(size_t size, NRT_ExternalAllocator *allocator); + +/* + * Deallocate memory pointed by `ptr`. + */ +VISIBILITY_HIDDEN void NRT_Free(void *ptr); + +/* + * Reallocate memory at `ptr`. + */ +VISIBILITY_HIDDEN void *NRT_Reallocate(void *ptr, size_t size); + +/* + * Debugging printf function used internally + */ +VISIBILITY_HIDDEN void nrt_debug_print(char *fmt, ...); + +/* + * Get API function table. + */ +VISIBILITY_HIDDEN const NRT_api_functions* NRT_get_api(void); + + +/* + * FOR INTERNAL USE ONLY. + * Get a sample external allocator for testing + */ +VISIBILITY_HIDDEN NRT_ExternalAllocator* _nrt_get_sample_external_allocator(void); + +#endif /* NUMBA_NRT_H_ */ diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/nrt.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/nrt.py new file mode 100644 index 000000000..fef29e3c8 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/nrt.py @@ -0,0 +1,136 @@ +from collections import namedtuple +from weakref import finalize as _finalize + +from numba.core.runtime import nrtdynmod +from llvmlite import binding as ll + +from numba.core.compiler_lock import global_compiler_lock +from numba.core.typing.typeof import typeof_impl +from numba.core import types +from numba.core.runtime import _nrt_python as _nrt + +_nrt_mstats = namedtuple("nrt_mstats", ["alloc", "free", "mi_alloc", "mi_free"]) + + +class _Runtime(object): + def __init__(self): + self._init = False + + @global_compiler_lock + def initialize(self, ctx): + """Initializes the NRT + + Must be called before any actual call to the NRT API. + Safe to be called multiple times. + """ + if self._init: + # Already initialized + return + + # Register globals into the system + for py_name in _nrt.c_helpers: + if py_name.startswith("_"): + # internal API + c_name = py_name + else: + c_name = "NRT_" + py_name + c_address = _nrt.c_helpers[py_name] + ll.add_symbol(c_name, c_address) + + # Compile atomic operations + self._library = nrtdynmod.compile_nrt_functions(ctx) + + self._ptr_inc = self._library.get_pointer_to_function("nrt_atomic_add") + self._ptr_dec = self._library.get_pointer_to_function("nrt_atomic_sub") + self._ptr_cas = self._library.get_pointer_to_function("nrt_atomic_cas") + + # Install atomic ops to NRT + _nrt.memsys_set_atomic_inc_dec(self._ptr_inc, self._ptr_dec) + _nrt.memsys_set_atomic_cas(self._ptr_cas) + + self._init = True + + def _init_guard(self): + if not self._init: + msg = "Runtime must be initialized before use." + raise RuntimeError(msg) + + @staticmethod + def shutdown(): + """ + Shutdown the NRT + Safe to be called without calling Runtime.initialize first + """ + _nrt.memsys_shutdown() + + @property + def library(self): + """ + Return the Library object containing the various NRT functions. + """ + self._init_guard() + return self._library + + def meminfo_new(self, data, pyobj): + """ + Returns a MemInfo object that tracks memory at `data` owned by `pyobj`. + MemInfo will acquire a reference on `pyobj`. + The release of MemInfo will release a reference on `pyobj`. + """ + self._init_guard() + mi = _nrt.meminfo_new(data, pyobj) + return MemInfo(mi) + + def meminfo_alloc(self, size, safe=False): + """ + Allocate a new memory of `size` bytes and returns a MemInfo object + that tracks the allocation. When there is no more reference to the + MemInfo object, the underlying memory will be deallocated. + + If `safe` flag is True, the memory is allocated using the `safe` scheme. + This is used for debugging and testing purposes. + See `NRT_MemInfo_alloc_safe()` in "nrt.h" for details. + """ + self._init_guard() + if size < 0: + msg = f"Cannot allocate a negative number of bytes: {size}." + raise ValueError(msg) + if safe: + mi = _nrt.meminfo_alloc_safe(size) + else: + mi = _nrt.meminfo_alloc(size) + if mi == 0: # alloc failed or size was 0 and alloc returned NULL. + msg = f"Requested allocation of {size} bytes failed." + raise MemoryError(msg) + return MemInfo(mi) + + def get_allocation_stats(self): + """ + Returns a namedtuple of (alloc, free, mi_alloc, mi_free) for count of + each memory operations. + """ + # No init guard needed to access stats members + return _nrt_mstats(alloc=_nrt.memsys_get_stats_alloc(), + free=_nrt.memsys_get_stats_free(), + mi_alloc=_nrt.memsys_get_stats_mi_alloc(), + mi_free=_nrt.memsys_get_stats_mi_free()) + + +# Alias to _nrt_python._MemInfo +MemInfo = _nrt._MemInfo + + +@typeof_impl.register(MemInfo) +def typeof_meminfo(val, c): + return types.MemInfoPointer(types.voidptr) + + +# Create runtime +_nrt.memsys_use_cpython_allocator() +rtsys = _Runtime() + +# Install finalizer +_finalize(rtsys, _Runtime.shutdown) + +# Avoid future use of the class +del _Runtime diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/nrt_external.h b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/nrt_external.h new file mode 100644 index 000000000..868955015 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/nrt_external.h @@ -0,0 +1,65 @@ +#ifndef NUMBA_NRT_EXTERNAL_H_ +#define NUMBA_NRT_EXTERNAL_H_ + +#include + +typedef struct MemInfo NRT_MemInfo; + +typedef void NRT_managed_dtor(void *data); + +typedef void *(*NRT_external_malloc_func)(size_t size, void *opaque_data); +typedef void *(*NRT_external_realloc_func)(void *ptr, size_t new_size, void *opaque_data); +typedef void (*NRT_external_free_func)(void *ptr, void *opaque_data); + +struct ExternalMemAllocator { + NRT_external_malloc_func malloc; + NRT_external_realloc_func realloc; + NRT_external_free_func free; + void *opaque_data; +}; + +typedef struct ExternalMemAllocator NRT_ExternalAllocator; + +typedef struct { + /* Methods to create MemInfos. + + MemInfos are like smart pointers for objects that are managed by the Numba. + */ + + /* Allocate memory + + *nbytes* is the number of bytes to be allocated + + Returning a new reference. + */ + NRT_MemInfo* (*allocate)(size_t nbytes); + /* Allocates memory using an external allocator but still using Numba's MemInfo. + * + * NOTE: An externally provided allocator must behave the same way as C99 + * stdlib.h's "malloc" function with respect to return value + * (including the behaviour that occurs when requesting an allocation + * of size 0 bytes). + */ + NRT_MemInfo* (*allocate_external)(size_t nbytes, NRT_ExternalAllocator *allocator); + + /* Convert externally allocated memory into a MemInfo. + + *data* is the memory pointer + *dtor* is the deallocator of the memory + */ + NRT_MemInfo* (*manage_memory)(void *data, NRT_managed_dtor dtor); + + /* Acquire a reference */ + void (*acquire)(NRT_MemInfo* mi); + + /* Release a reference */ + void (*release)(NRT_MemInfo* mi); + + /* Get MemInfo data pointer */ + void* (*get_data)(NRT_MemInfo* mi); + +} NRT_api_functions; + + + +#endif /* NUMBA_NRT_EXTERNAL_H_ */ diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/nrtdynmod.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/nrtdynmod.py new file mode 100644 index 000000000..c8cc1973d --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/nrtdynmod.py @@ -0,0 +1,215 @@ +""" +Dynamically generate the NRT module +""" + + +from numba.core import config +from numba.core import types, cgutils +from llvmlite import ir, binding + + +_word_type = ir.IntType(config.MACHINE_BITS) +_pointer_type = ir.PointerType(ir.IntType(8)) + +_meminfo_struct_type = ir.LiteralStructType([ + _word_type, # size_t refct + _pointer_type, # dtor_function dtor + _pointer_type, # void *dtor_info + _pointer_type, # void *data + _word_type, # size_t size + ]) + + +incref_decref_ty = ir.FunctionType(ir.VoidType(), [_pointer_type]) +meminfo_data_ty = ir.FunctionType(_pointer_type, [_pointer_type]) + + +def _define_nrt_meminfo_data(module): + """ + Implement NRT_MemInfo_data_fast in the module. This allows LLVM + to inline lookup of the data pointer. + """ + fn = cgutils.get_or_insert_function(module, meminfo_data_ty, + "NRT_MemInfo_data_fast") + builder = ir.IRBuilder(fn.append_basic_block()) + [ptr] = fn.args + struct_ptr = builder.bitcast(ptr, _meminfo_struct_type.as_pointer()) + data_ptr = builder.load(cgutils.gep(builder, struct_ptr, 0, 3)) + builder.ret(data_ptr) + + +def _define_nrt_incref(module, atomic_incr): + """ + Implement NRT_incref in the module + """ + fn_incref = cgutils.get_or_insert_function(module, incref_decref_ty, + "NRT_incref") + # Cannot inline this for refcount pruning to work + fn_incref.attributes.add('noinline') + builder = ir.IRBuilder(fn_incref.append_basic_block()) + [ptr] = fn_incref.args + is_null = builder.icmp_unsigned("==", ptr, cgutils.get_null_value(ptr.type)) + with cgutils.if_unlikely(builder, is_null): + builder.ret_void() + + word_ptr = builder.bitcast(ptr, atomic_incr.args[0].type) + if config.DEBUG_NRT: + cgutils.printf(builder, "*** NRT_Incref %zu [%p]\n", builder.load(word_ptr), + ptr) + builder.call(atomic_incr, [word_ptr]) + builder.ret_void() + + +def _define_nrt_decref(module, atomic_decr): + """ + Implement NRT_decref in the module + """ + fn_decref = cgutils.get_or_insert_function(module, incref_decref_ty, + "NRT_decref") + # Cannot inline this for refcount pruning to work + fn_decref.attributes.add('noinline') + calldtor = ir.Function(module, + ir.FunctionType(ir.VoidType(), [_pointer_type]), + name="NRT_MemInfo_call_dtor") + + builder = ir.IRBuilder(fn_decref.append_basic_block()) + [ptr] = fn_decref.args + is_null = builder.icmp_unsigned("==", ptr, cgutils.get_null_value(ptr.type)) + with cgutils.if_unlikely(builder, is_null): + builder.ret_void() + + + # For memory fence usage, see https://llvm.org/docs/Atomics.html + + # A release fence is used before the relevant write operation. + # No-op on x86. On POWER, it lowers to lwsync. + builder.fence("release") + + word_ptr = builder.bitcast(ptr, atomic_decr.args[0].type) + + if config.DEBUG_NRT: + cgutils.printf(builder, "*** NRT_Decref %zu [%p]\n", builder.load(word_ptr), + ptr) + newrefct = builder.call(atomic_decr, + [word_ptr]) + + refct_eq_0 = builder.icmp_unsigned("==", newrefct, + ir.Constant(newrefct.type, 0)) + with cgutils.if_unlikely(builder, refct_eq_0): + # An acquire fence is used after the relevant read operation. + # No-op on x86. On POWER, it lowers to lwsync. + builder.fence("acquire") + builder.call(calldtor, [ptr]) + builder.ret_void() + + +# Set this to True to measure the overhead of atomic refcounts compared +# to non-atomic. +_disable_atomicity = 0 + + +def _define_atomic_inc_dec(module, op, ordering): + """Define a llvm function for atomic increment/decrement to the given module + Argument ``op`` is the operation "add"/"sub". Argument ``ordering`` is + the memory ordering. The generated function returns the new value. + """ + ftype = ir.FunctionType(_word_type, [_word_type.as_pointer()]) + fn_atomic = ir.Function(module, ftype, name="nrt_atomic_{0}".format(op)) + + [ptr] = fn_atomic.args + bb = fn_atomic.append_basic_block() + builder = ir.IRBuilder(bb) + ONE = ir.Constant(_word_type, 1) + if not _disable_atomicity: + oldval = builder.atomic_rmw(op, ptr, ONE, ordering=ordering) + # Perform the operation on the old value so that we can pretend returning + # the "new" value. + res = getattr(builder, op)(oldval, ONE) + builder.ret(res) + else: + oldval = builder.load(ptr) + newval = getattr(builder, op)(oldval, ONE) + builder.store(newval, ptr) + builder.ret(oldval) + + return fn_atomic + + +def _define_atomic_cas(module, ordering): + """Define a llvm function for atomic compare-and-swap. + The generated function is a direct wrapper of the LLVM cmpxchg with the + difference that the a int indicate success (1) or failure (0) is returned + and the last argument is a output pointer for storing the old value. + + Note + ---- + On failure, the generated function behaves like an atomic load. The loaded + value is stored to the last argument. + """ + ftype = ir.FunctionType(ir.IntType(32), [_word_type.as_pointer(), + _word_type, _word_type, + _word_type.as_pointer()]) + fn_cas = ir.Function(module, ftype, name="nrt_atomic_cas") + + [ptr, cmp, repl, oldptr] = fn_cas.args + bb = fn_cas.append_basic_block() + builder = ir.IRBuilder(bb) + outtup = builder.cmpxchg(ptr, cmp, repl, ordering=ordering) + old, ok = cgutils.unpack_tuple(builder, outtup, 2) + builder.store(old, oldptr) + builder.ret(builder.zext(ok, ftype.return_type)) + + return fn_cas + + +def _define_nrt_unresolved_abort(ctx, module): + """ + Defines an abort function due to unresolved symbol. + + The function takes no args and will always raise an exception. + It should be safe to call this function with incorrect number of arguments. + """ + fnty = ctx.call_conv.get_function_type(types.none, ()) + fn = ir.Function(module, fnty, name="nrt_unresolved_abort") + bb = fn.append_basic_block() + builder = ir.IRBuilder(bb) + msg = "numba jitted function aborted due to unresolved symbol" + ctx.call_conv.return_user_exc(builder, RuntimeError, (msg,)) + return fn + + +def create_nrt_module(ctx): + """ + Create an IR module defining the LLVM NRT functions. + A (IR module, library) tuple is returned. + """ + codegen = ctx.codegen() + library = codegen.create_library("nrt") + + # Implement LLVM module with atomic ops + ir_mod = library.create_ir_module("nrt_module") + + atomic_inc = _define_atomic_inc_dec(ir_mod, "add", ordering='monotonic') + atomic_dec = _define_atomic_inc_dec(ir_mod, "sub", ordering='monotonic') + _define_atomic_cas(ir_mod, ordering='monotonic') + + _define_nrt_meminfo_data(ir_mod) + _define_nrt_incref(ir_mod, atomic_inc) + _define_nrt_decref(ir_mod, atomic_dec) + + _define_nrt_unresolved_abort(ctx, ir_mod) + + return ir_mod, library + + +def compile_nrt_functions(ctx): + """ + Compile all LLVM NRT functions and return a library containing them. + The library is created using the given target context. + """ + ir_mod, library = create_nrt_module(ctx) + + library.add_ir_module(ir_mod) + library.finalize() + + return library diff --git a/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/nrtopt.py b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/nrtopt.py new file mode 100644 index 000000000..2a6f56b09 --- /dev/null +++ b/cv/3d_detection/pointrcnn-iou/pytorch/numba/numba/core/runtime/nrtopt.py @@ -0,0 +1,182 @@ +""" +NRT specific optimizations +""" +import re +from collections import defaultdict, deque +from llvmlite import binding as ll +from numba.core import cgutils + +_regex_incref = re.compile(r'\s*(?:tail)?\s*call void @NRT_incref\((.*)\)') +_regex_decref = re.compile(r'\s*(?:tail)?\s*call void @NRT_decref\((.*)\)') +_regex_bb = re.compile( + r'|'.join([ + # unnamed BB is just a plain number + r'[0-9]+:', + # with a proper identifier (see llvm langref) + r'[\'"]?[-a-zA-Z$._0-9][-a-zA-Z$._0-9]*[\'"]?:', + # is a start of a function definition + r'^define', + # no name + r'^;\s*