diff --git a/.ci/generate-buildkite-pipeline-premerge b/.ci/generate-buildkite-pipeline-premerge
new file mode 100755
index 0000000000000000000000000000000000000000..98a8b8fff3687ab8bed3a59fbe1f12d3d859672e
--- /dev/null
+++ b/.ci/generate-buildkite-pipeline-premerge
@@ -0,0 +1,312 @@
+#!/usr/bin/env bash
+#===----------------------------------------------------------------------===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===----------------------------------------------------------------------===##
+
+#
+# This file generates a Buildkite pipeline that triggers the various CI jobs for
+# the LLVM project during pre-commit CI.
+#
+# See https://buildkite.com/docs/agent/v3/cli-pipeline#pipeline-format.
+#
+# As this outputs a yaml file, it's possible to log messages to stderr or
+# prefix with "#".
+
+
+set -eu
+set -o pipefail
+
+# Environment variables script works with:
+
+# Set by buildkite
+: ${BUILDKITE_PULL_REQUEST_BASE_BRANCH:=}
+: ${BUILDKITE_COMMIT:=}
+: ${BUILDKITE_BRANCH:=}
+# Fetch origin to have an up to date merge base for the diff.
+git fetch origin
+# List of files affected by this commit
+: ${MODIFIED_FILES:=$(git diff --name-only origin/${BUILDKITE_PULL_REQUEST_BASE_BRANCH}...HEAD)}
+# Filter rules for generic windows tests
+: ${WINDOWS_AGENTS:='{"queue": "windows"}'}
+# Filter rules for generic linux tests
+: ${LINUX_AGENTS:='{"queue": "linux"}'}
+
+reviewID="$(git log --format=%B -n 1 | sed -nE 's/^Review-ID:[[:space:]]*(.+)$/\1/p')"
+if [[ "${reviewID}" != "" ]]; then
+  buildMessage="https://llvm.org/${reviewID}"
+else
+  buildMessage="Push to branch ${BUILDKITE_BRANCH}"
+fi
+
+cat <<EOF
+steps:
+EOF
+
+echo "Files modified:" >&2
+echo "$MODIFIED_FILES" >&2
+modified_dirs=$(echo "$MODIFIED_FILES" | cut -d'/' -f1 | sort -u)
+echo "Directories modified:" >&2
+echo "$modified_dirs" >&2
+
+function compute-projects-to-test() {
+  isForWindows=$1
+  shift
+  projects=${@}
+  for project in ${projects}; do
+    echo "${project}"
+    case ${project} in
+    lld)
+      for p in bolt cross-project-tests; do
+        echo $p
+      done
+    ;;
+    llvm)
+      for p in bolt clang clang-tools-extra lld lldb mlir polly; do
+        echo $p
+      done
+      # Flang is not stable in Windows CI at the moment
+      if [[ $isForWindows == 0 ]]; then
+        echo flang
+      fi
+    ;;
+    clang)
+      # lldb is temporarily removed to alleviate Linux pre-commit CI waiting times
+      for p in clang-tools-extra compiler-rt cross-project-tests; do
+        echo $p
+      done
+    ;;
+    clang-tools-extra)
+      echo libc
+    ;;
+    mlir)
+      # Flang is not stable in Windows CI at the moment
+      if [[ $isForWindows == 0 ]]; then
+        echo flang
+      fi
+    ;;
+    *)
+      # Nothing to do
+    ;;
+    esac
+  done
+}
+
+function compute-runtimes-to-test() {
+  projects=${@}
+  for project in ${projects}; do
+    case ${project} in
+    clang)
+      for p in libcxx libcxxabi libunwind; do
+        echo $p
+      done
+    ;;
+    *)
+      # Nothing to do
+    ;;
+    esac
+  done
+}
+
+function add-dependencies() {
+  projects=${@}
+  for project in ${projects}; do
+    echo "${project}"
+    case ${project} in
+    bolt)
+      for p in clang lld llvm; do
+        echo $p
+      done
+    ;;
+    cross-project-tests)
+      for p in lld clang; do
+        echo $p
+      done
+    ;;
+    clang-tools-extra)
+      for p in llvm clang; do
+        echo $p
+      done
+    ;;
+    compiler-rt|libc|openmp)
+      echo clang lld
+    ;;
+    flang|lldb|libclc)
+      for p in llvm clang; do
+        echo $p
+      done
+    ;;
+    lld|mlir|polly)
+      echo llvm
+    ;;
+    *)
+      # Nothing to do
+    ;;
+    esac
+  done
+}
+
+function exclude-linux() {
+  projects=${@}
+  for project in ${projects}; do
+    case ${project} in
+    cross-project-tests) ;; # tests failing
+    openmp)              ;; # https://github.com/google/llvm-premerge-checks/issues/410
+    *)
+      echo "${project}"
+    ;;
+    esac
+  done
+}
+
+function exclude-windows() {
+  projects=${@}
+  for project in ${projects}; do
+    case ${project} in
+    cross-project-tests) ;; # tests failing
+    compiler-rt)         ;; # tests taking too long
+    openmp)              ;; # TODO: having trouble with the Perl installation
+    libc)                ;; # no Windows support
+    lldb)                ;; # custom environment requirements (https://github.com/llvm/llvm-project/pull/94208#issuecomment-2146256857)
+    bolt)                ;; # tests are not supported yet
+    *)
+      echo "${project}"
+    ;;
+    esac
+  done
+}
+
+# Prints only projects that are both present in $modified_dirs and the passed
+# list.
+function keep-modified-projects() {
+  projects=${@}
+  for project in ${projects}; do
+    if echo "$modified_dirs" | grep -q -E "^${project}$"; then
+      echo "${project}"
+    fi
+  done
+}
+
+function check-targets() {
+  projects=${@}
+  for project in ${projects}; do
+    case ${project} in
+    clang-tools-extra)
+      echo "check-clang-tools"
+    ;;
+    compiler-rt)
+      echo "check-all"
+    ;;
+    cross-project-tests)
+      echo "check-cross-project"
+    ;;
+    libcxx)
+      echo "check-cxx"
+    ;;
+    libcxxabi)
+      echo "check-cxxabi"
+    ;;
+    libunwind)
+      echo "check-unwind"
+    ;;
+    lldb)
+      echo "check-lldb"
+    ;;
+    pstl)
+      echo "check-all"
+    ;;
+    libclc)
+      echo "check-all"
+    ;;
+    *)
+      echo "check-${project}"
+    ;;
+    esac
+  done
+}
+
+# Project specific pipelines.
+
+# If libc++ or one of the runtimes directories changed.
+if echo "$modified_dirs" | grep -q -E "^(libcxx|libcxxabi|libunwind|runtimes|cmake)$"; then
+  cat <<EOF
+- trigger: "libcxx-ci"
+  build:
+    message: "${buildMessage}"
+    commit: "${BUILDKITE_COMMIT}"
+    branch: "${BUILDKITE_BRANCH}"
+EOF
+fi
+
+# Generic pipeline for projects that have not defined custom steps.
+#
+# Individual projects should instead define the pre-commit CI tests that suits their
+# needs while letting them run on the infrastructure provided by LLVM.
+
+# Figure out which projects need to be built on each platform
+all_projects="bolt clang clang-tools-extra compiler-rt cross-project-tests flang libc libclc lld lldb llvm mlir openmp polly pstl"
+modified_projects="$(keep-modified-projects ${all_projects})"
+
+linux_projects_to_test=$(exclude-linux $(compute-projects-to-test 0 ${modified_projects}))
+linux_check_targets=$(check-targets ${linux_projects_to_test} | sort | uniq)
+linux_projects=$(add-dependencies ${linux_projects_to_test} | sort | uniq)
+
+linux_runtimes_to_test=$(compute-runtimes-to-test ${linux_projects_to_test})
+linux_runtime_check_targets=$(check-targets ${linux_runtimes_to_test} | sort | uniq)
+linux_runtimes=$(echo ${linux_runtimes_to_test} | sort | uniq)
+
+windows_projects_to_test=$(exclude-windows $(compute-projects-to-test 1 ${modified_projects}))
+windows_check_targets=$(check-targets ${windows_projects_to_test} | sort | uniq)
+windows_projects=$(add-dependencies ${windows_projects_to_test} | sort | uniq)
+
+# Generate the appropriate pipeline
+if [[ "${linux_projects}" != "" ]]; then
+  cat <<EOF
+- label: ':linux: Linux x64'
+  artifact_paths:
+  - 'artifacts/**/*'
+  - '*_result.json'
+  - 'build/test-results.xml'
+  agents: ${LINUX_AGENTS}
+  retry:
+    automatic:
+      - exit_status: -1  # Agent was lost
+        limit: 2
+      - exit_status: 255 # Forced agent shutdown
+        limit: 2
+  timeout_in_minutes: 120
+  env:
+    CC: 'clang'
+    CXX: 'clang++'
+  commands:
+  - './.ci/monolithic-linux.sh "$(echo ${linux_projects} | tr ' ' ';')" "$(echo ${linux_check_targets})" "$(echo ${linux_runtimes} | tr ' ' ';')" "$(echo ${linux_runtime_check_targets})"'
+EOF
+fi
+
+if [[ "${windows_projects}" != "" ]]; then
+  cat <<EOF
+- label: ':windows: Windows x64'
+  artifact_paths:
+  - 'artifacts/**/*'
+  - '*_result.json'
+  - 'build/test-results.xml'
+  agents: ${WINDOWS_AGENTS}
+  retry:
+    automatic:
+      - exit_status: -1  # Agent was lost
+        limit: 2
+      - exit_status: 255 # Forced agent shutdown
+        limit: 2
+  timeout_in_minutes: 150
+  env:
+    CC: 'cl'
+    CXX: 'cl'
+    LD: 'link'
+  commands:
+  - 'C:\\BuildTools\\Common7\\Tools\\VsDevCmd.bat -arch=amd64 -host_arch=amd64'
+  - 'bash .ci/monolithic-windows.sh "$(echo ${windows_projects} | tr ' ' ';')" "$(echo ${windows_check_targets})"'
+EOF
+fi
diff --git a/.ci/monolithic-linux.sh b/.ci/monolithic-linux.sh
new file mode 100755
index 0000000000000000000000000000000000000000..b78dc59432b65c9dcdf5130fe6aea85fb2d03148
--- /dev/null
+++ b/.ci/monolithic-linux.sh
@@ -0,0 +1,129 @@
+#!/usr/bin/env bash
+#===----------------------------------------------------------------------===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===----------------------------------------------------------------------===##
+
+#
+# This script performs a monolithic build of the monorepo and runs the tests of
+# most projects on Linux. This should be replaced by per-project scripts that
+# run only the relevant tests.
+#
+
+set -ex
+set -o pipefail
+
+MONOREPO_ROOT="${MONOREPO_ROOT:="$(git rev-parse --show-toplevel)"}"
+BUILD_DIR="${BUILD_DIR:=${MONOREPO_ROOT}/build}"
+INSTALL_DIR="${BUILD_DIR}/install"
+rm -rf "${BUILD_DIR}"
+
+ccache --zero-stats
+
+if [[ -n "${CLEAR_CACHE:-}" ]]; then
+  echo "clearing cache"
+  ccache --clear
+fi
+
+function show-stats {
+  mkdir -p artifacts
+  ccache --print-stats > artifacts/ccache_stats.txt
+}
+trap show-stats EXIT
+
+projects="${1}"
+targets="${2}"
+
+echo "--- cmake"
+pip install -q -r "${MONOREPO_ROOT}"/mlir/python/requirements.txt
+pip install -q -r "${MONOREPO_ROOT}"/lldb/test/requirements.txt
+cmake -S "${MONOREPO_ROOT}"/llvm -B "${BUILD_DIR}" \
+      -D LLVM_ENABLE_PROJECTS="${projects}" \
+      -G Ninja \
+      -D CMAKE_BUILD_TYPE=Release \
+      -D LLVM_ENABLE_ASSERTIONS=ON \
+      -D LLVM_BUILD_EXAMPLES=ON \
+      -D COMPILER_RT_BUILD_LIBFUZZER=OFF \
+      -D LLVM_LIT_ARGS="-v --xunit-xml-output ${BUILD_DIR}/test-results.xml --timeout=1200 --time-tests" \
+      -D LLVM_ENABLE_LLD=ON \
+      -D CMAKE_CXX_FLAGS=-gmlt \
+      -D LLVM_CCACHE_BUILD=ON \
+      -D MLIR_ENABLE_BINDINGS_PYTHON=ON \
+      -D CMAKE_INSTALL_PREFIX="${INSTALL_DIR}"
+
+echo "--- ninja"
+# Targets are not escaped as they are passed as separate arguments.
+ninja -C "${BUILD_DIR}" -k 0 ${targets}
+
+runtimes="${3}"
+runtime_targets="${4}"
+
+# Compiling runtimes with just-built Clang and running their tests
+# as an additional testing for Clang.
+if [[ "${runtimes}" != "" ]]; then
+  if [[ "${runtime_targets}" == "" ]]; then
+    echo "Runtimes to build are specified, but targets are not."
+    exit 1
+  fi
+
+  echo "--- ninja install-clang"
+
+  ninja -C ${BUILD_DIR} install-clang install-clang-resource-headers
+
+  RUNTIMES_BUILD_DIR="${MONOREPO_ROOT}/build-runtimes"
+  INSTALL_DIR="${BUILD_DIR}/install"
+  mkdir -p ${RUNTIMES_BUILD_DIR}
+
+  echo "--- cmake runtimes C++03"
+
+  cmake -S "${MONOREPO_ROOT}/runtimes" -B "${RUNTIMES_BUILD_DIR}" -GNinja \
+      -D CMAKE_C_COMPILER="${INSTALL_DIR}/bin/clang" \
+      -D CMAKE_CXX_COMPILER="${INSTALL_DIR}/bin/clang++" \
+      -D LLVM_ENABLE_RUNTIMES="${runtimes}" \
+      -D LIBCXX_CXX_ABI=libcxxabi \
+      -D CMAKE_BUILD_TYPE=RelWithDebInfo \
+      -D CMAKE_INSTALL_PREFIX="${INSTALL_DIR}" \
+      -D LIBCXX_TEST_PARAMS="std=c++03" \
+      -D LIBCXXABI_TEST_PARAMS="std=c++03"
+
+  echo "--- ninja runtimes C++03"
+
+  ninja -vC "${RUNTIMES_BUILD_DIR}" ${runtime_targets}
+
+  echo "--- cmake runtimes C++26"
+
+  rm -rf "${RUNTIMES_BUILD_DIR}"
+  cmake -S "${MONOREPO_ROOT}/runtimes" -B "${RUNTIMES_BUILD_DIR}" -GNinja \
+      -D CMAKE_C_COMPILER="${INSTALL_DIR}/bin/clang" \
+      -D CMAKE_CXX_COMPILER="${INSTALL_DIR}/bin/clang++" \
+      -D LLVM_ENABLE_RUNTIMES="${runtimes}" \
+      -D LIBCXX_CXX_ABI=libcxxabi \
+      -D CMAKE_BUILD_TYPE=RelWithDebInfo \
+      -D CMAKE_INSTALL_PREFIX="${INSTALL_DIR}" \
+      -D LIBCXX_TEST_PARAMS="std=c++26" \
+      -D LIBCXXABI_TEST_PARAMS="std=c++26"
+
+  echo "--- ninja runtimes C++26"
+
+  ninja -vC "${RUNTIMES_BUILD_DIR}" ${runtime_targets}
+
+  echo "--- cmake runtimes clang modules"
+
+  rm -rf "${RUNTIMES_BUILD_DIR}"
+  cmake -S "${MONOREPO_ROOT}/runtimes" -B "${RUNTIMES_BUILD_DIR}" -GNinja \
+      -D CMAKE_C_COMPILER="${INSTALL_DIR}/bin/clang" \
+      -D CMAKE_CXX_COMPILER="${INSTALL_DIR}/bin/clang++" \
+      -D LLVM_ENABLE_RUNTIMES="${runtimes}" \
+      -D LIBCXX_CXX_ABI=libcxxabi \
+      -D CMAKE_BUILD_TYPE=RelWithDebInfo \
+      -D CMAKE_INSTALL_PREFIX="${INSTALL_DIR}" \
+      -D LIBCXX_TEST_PARAMS="enable_modules=clang" \
+      -D LIBCXXABI_TEST_PARAMS="enable_modules=clang"
+
+  echo "--- ninja runtimes clang modules"
+  
+  ninja -vC "${RUNTIMES_BUILD_DIR}" ${runtime_targets}
+fi
diff --git a/.ci/monolithic-windows.sh b/.ci/monolithic-windows.sh
new file mode 100755
index 0000000000000000000000000000000000000000..91e719c52d43632e73112e4e1a8b286af492c21b
--- /dev/null
+++ b/.ci/monolithic-windows.sh
@@ -0,0 +1,69 @@
+#!/usr/bin/env bash
+#===----------------------------------------------------------------------===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===----------------------------------------------------------------------===##
+
+#
+# This script performs a monolithic build of the monorepo and runs the tests of
+# most projects on Windows. This should be replaced by per-project scripts that
+# run only the relevant tests.
+#
+
+set -ex
+set -o pipefail
+
+MONOREPO_ROOT="${MONOREPO_ROOT:="$(git rev-parse --show-toplevel)"}"
+BUILD_DIR="${BUILD_DIR:=${MONOREPO_ROOT}/build}"
+
+rm -rf "${BUILD_DIR}"
+
+if [[ -n "${CLEAR_CACHE:-}" ]]; then
+  echo "clearing sccache"
+  rm -rf "$SCCACHE_DIR"
+fi
+
+sccache --zero-stats
+function show-stats {
+  mkdir -p artifacts
+  sccache --show-stats >> artifacts/sccache_stats.txt
+}
+trap show-stats EXIT
+
+projects="${1}"
+targets="${2}"
+
+echo "--- cmake"
+pip install -q -r "${MONOREPO_ROOT}"/mlir/python/requirements.txt
+
+# The CMAKE_*_LINKER_FLAGS to disable the manifest come from research
+# on fixing a build reliability issue on the build server, please
+# see https://github.com/llvm/llvm-project/pull/82393 and
+# https://discourse.llvm.org/t/rfc-future-of-windows-pre-commit-ci/76840/40
+# for further information.
+# We limit the number of parallel compile jobs to 24 control memory
+# consumption and improve build reliability.
+cmake -S "${MONOREPO_ROOT}"/llvm -B "${BUILD_DIR}" \
+      -D LLVM_ENABLE_PROJECTS="${projects}" \
+      -G Ninja \
+      -D CMAKE_BUILD_TYPE=Release \
+      -D LLVM_ENABLE_ASSERTIONS=ON \
+      -D LLVM_BUILD_EXAMPLES=ON \
+      -D COMPILER_RT_BUILD_LIBFUZZER=OFF \
+      -D LLVM_LIT_ARGS="-v --xunit-xml-output ${BUILD_DIR}/test-results.xml --timeout=1200 --time-tests" \
+      -D COMPILER_RT_BUILD_ORC=OFF \
+      -D CMAKE_C_COMPILER_LAUNCHER=sccache \
+      -D CMAKE_CXX_COMPILER_LAUNCHER=sccache \
+      -D MLIR_ENABLE_BINDINGS_PYTHON=ON \
+      -D CMAKE_EXE_LINKER_FLAGS="/MANIFEST:NO" \
+      -D CMAKE_MODULE_LINKER_FLAGS="/MANIFEST:NO" \
+      -D CMAKE_SHARED_LINKER_FLAGS="/MANIFEST:NO" \
+      -D LLVM_PARALLEL_COMPILE_JOBS=16 \
+      -D LLVM_PARALLEL_LINK_JOBS=4
+
+echo "--- ninja"
+# Targets are not escaped as they are passed as separate arguments.
+ninja -C "${BUILD_DIR}" -k 0 ${targets}
diff --git a/.gitattributes b/.gitattributes
index 6b281f33f737db9ae661e80cfa66c5c778b11cb6..3a15b714f0b269da7edeff1b439f2a890f2af97a 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,3 +1,8 @@
+llvm-19.1.7.tar.gz.aa filter=lfs diff=lfs merge=lfs -text
+llvm-19.1.7.tar.gz.ab filter=lfs diff=lfs merge=lfs -text
+llvm-19.1.7.tar.gz.ac filter=lfs diff=lfs merge=lfs -text
+llvm-19.1.7.tar.gz.ad filter=lfs diff=lfs merge=lfs -text
+
 libcxx/src/**/*.cpp     merge=libcxx-reformat
 libcxx/include/**/*.h   merge=libcxx-reformat
 
diff --git a/OAT.xml b/OAT.xml
deleted file mode 100644
index d9dd34a3fb76bf849e0ca8fa644ca18c9fa22b46..0000000000000000000000000000000000000000
--- a/OAT.xml
+++ /dev/null
@@ -1,229 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!-- Copyright (c) 2021 Huawei Device Co., Ltd.
-
-     Licensed under the Apache License, Version 2.0 (the "License");
-     you may not use this file except in compliance with the License.
-     You may obtain a copy of the License at
-
-          http://www.apache.org/licenses/LICENSE-2.0
-
-     Unless required by applicable law or agreed to in writing, software
-     distributed under the License is distributed on an "AS IS" BASIS,
-     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-     See the License for the specific language governing permissions and
-     limitations under the License.
-
-    This is the configuration file template for OpenHarmony OSS Audit Tool, please copy it to your project root dir and modify it refer to OpenHarmony/tools_oat/README.
-    All configurations in this file will be merged to OAT-Default.xml, if you have any questions or concerns, please create issue in OpenHarmony/tools_oat and @jalenchen or chenyaxun.
-
-    licensefile:
-    1.If the project don't have "LICENSE" in root dir, please define all the license files in this project in , OAT will check license files according to this rule.
-
-    policylist:
-    1. policy: If the OAT-Default.xml policies do not meet your requirements, please add policies here.
-    2. policyitem: The fields type, name, path, desc is required, and the fields rule, group, filefilter is optional,the default value is:
-    <policyitem type="" name="" path="" desc="" rule="may" group="defaultGroup" filefilter="defaultPolicyFilter"/>
-    3. policyitem type:
-        "compatibility" is used to check license compatibility in the specified path;
-        "license" is used to check source license header in the specified path;
-        "copyright" is used to check source copyright header in the specified path;
-        "import" is used to check source dependency in the specified path, such as import ... ,include ...
-        "filetype" is used to check file type in the specified path, supported file types: archive, binary
-        "filename" is used to check whether the specified file exists in the specified path(projectroot means the root dir of the project), supported file names: LICENSE, README, README.OpenSource
-    4. policyitem name: This field is used for define the license, copyright, "*" means match all, the "!" prefix means could not match this value. For example, "!GPL" means can not use GPL license.
-    5. policyitem path: This field is used for define the source file scope to apply this policyitem, the "!" prefix means exclude the files. For example, "!.*/lib/.*" means files in lib dir will be exclude while process this policyitem.
-    6. policyitem rule and group: These two fields are used together to merge policy results. "may" policyitems in the same group means any one in this group passed, the result will be passed.
-    7. policyitem filefilter: Used to bind filefilter which define filter rules.
-    7. policyitem desc: Used to describe the reason of this policy item, committers will check this while merging the code.
-    8. filefilter: Filter rules, the type filename is used to filter file name, the type filepath is used to filter file path.
-
-    Note:If the text contains special characters, please escape them according to the following rules:
-    " == &quot;
-    & == &amp;
-    ' == &apos;
-    < == &lt;
-    > == &gt;
--->
-
-<configuration>
-    <oatconfig> 
-        <licensefile>llvm/LICENSE.TXT</licensefile>
-        <policylist>
-            <policy name="projectPolicy" desc="">
-                <policyitem type="filetype" name="!binary" path=".*" rule="must" group="defaultGroup" filefilter="binaryFileTypePolicyFilter" desc="不需检测"/>
-                <policyitem type="filetype" name="!archive" path=".*" rule="must" group="defaultGroup" filefilter="binaryFileTypePolicyFilter" desc="不需检测"/>
-                <policyitem type="compatibility" name="GPL-2.0+" path="polly/lib/External/isl/m4/.*" desc="GPL-2.0+协议，测试代码，进程独立，不会造成GPL协议污染"/>
-                <policyitem type="compatibility" name="GPL-3.0+" path="polly/lib/External/isl/m4/.*" desc="GPL-3.0+协议，测试代码，进程独立，不会造成GPL协议污染"/>
-                <policyitem type="compatibility" name="GPLStyleLicense" path="polly/lib/External/.*" desc="GPLStyleLicense协议，测试代码，进程独立，不会造成GPL协议污染"/>
-                <policyitem type="compatibility" name="GPLStyleLicense" path="llvm/.*" desc="GPLStyleLicense协议，测试代码，进程独立，不会造成GPL协议污染"/>
-                <policyitem type="compatibility" name="LGPLStyleLicense" path="polly/www/video-js/video.js" desc="js文件，不涉及链接，使用符合LGPL协议要求"/>
-                <policyitem type="compatibility" name="BSD-2-Clause" path="libcxx/src/support/solaris/.*" desc="BSD-2-Clause，协议友好，使用符合协议要求"/>
-                <policyitem type="compatibility" name="BSD-3-Clause" path="llvm/.*" desc="BSD-3-Clause，协议友好，使用符合协议要求"/>
-                <policyitem type="compatibility" name="BSDStyleLicense" path="llvm/lib/Support/.*" desc="BSDStyleLicense，协议友好，使用符合协议要求"/>
-                <policyitem type="compatibility" name="BSDStyleLicense" path="lld/docs/AtomLLD.rst" desc="BSDStyleLicense，协议友好，使用符合协议要求"/>
-                <policyitem type="compatibility" name="BSDStyleLicense" path="llvm/include/llvm/Support/xxhash.h" desc="BSDStyleLicense，协议友好，使用符合协议要求"/>
-                <policyitem type="copyright" name="*" path=".*" rule="may" group="defaultGroup" filefilter="copyrightPolicyFilter" desc="三方开源，不需检测文件头是否遗漏版权声明"/>
-            </policy>
-        </policylist>
-        <filefilterlist>
-            <filefilter name="defaultFilter" desc="Files not to check">
-            </filefilter>
-            <filefilter name="defaultPolicyFilter" desc="Filters for compatibility，license header policies">
-                <filteritem type="filepath" name="libcxx/docs/index.rst" desc="doc文件"/>
-                <filteritem type="filepath" name="clang/tools/clang-format-vs/README.txt" desc="doc文件"/>
-                <filteritem type="filepath" name="clang/www/demo/what is this directory.txt" desc="doc文件"/>
-                <filteritem type="filepath" name="libc/docs/header_generation.rst" desc="doc文件"/>
-                <filteritem type="filepath" name="llvm/lib/Support/COPYRIGHT.regex" desc="License及copyright本身"/>
-                <filteritem type="filepath" name="libclc/README.TXT" desc="readme文件"/>
-                <filteritem type="filepath" name="lldb/tools/debugserver/source/MacOSX/stack_logging.h" desc="APSL v2.0 License，未引入修改，使用符合协议要求"/>
-                <filteritem type="filepath" name="lldb/utils/test/README-disasm" desc="readme文件"/>
-                <filteritem type="filepath" name="polly/lib/External/isl/ChangeLog" desc="changelog描述性文件"/>
-                <filteritem type="filepath" name="llvm/test/MC/RISCV/rv32i-invalid.s" desc="文件不包含license"/>
-                <filteritem type="filepath" name="llvm/test/Transforms/InstCombine/compare-unescaped.ll" desc="文件不包含license"/>
-                <filteritem type="filepath" name="clang/tools/clang-format-vs/source.extension.vsixmanifest.in" desc="文件不包含license"/>
-                <filteritem type="filepath" name="compiler-rt/lib/profile/WindowsMMap.c" desc="public domain"/>
-                <filteritem type="filepath" name="libcxx/utils/google-benchmark/.*" desc="readme等描述性文件"/>
-                <filteritem type="filepath" name="lldb/third_party/Python/module/pexpect-4.6/.*" desc="ISC License，协议友好，使用符合协议要求"/>
-                <filteritem type="filepath" name="lldb/third_party/Python/module/ptyprocess-0.6.0/.*" desc="ISC License，协议友好，使用符合协议要求"/>
-                <filteritem type="filepath" name="polly/lib/External/ppcg/.*" desc="MIT License，协议友好，使用符合协议要求"/>
-                <filteritem type="filepath" name="polly/lib/External/isl/.*" desc="MIT License，协议友好，使用符合协议要求"/>
-                <filteritem type="filepath" name="openmp/runtime/doc/doxygen/header.tex" desc="文件不包含license"/>
-                <filteritem type="filepath" name="llvm/llvm.spec.in" desc="文件不包含license"/>
-                <filteritem type="filepath" name="llvm/include/llvm/Support/MD5.h" desc="public domain"/>
-                <filteritem type="filepath" name="llvm/include/llvm/Support/xxhash.h" desc="BSD License，协议友好，使用符合协议要求"/>
-                <filteritem type="filepath" name="llvm/lib/Support/regstrlcpy.c" desc="协议源于OpenBSD，协议友好，且使用符合协议要求"/>
-                <filteritem type="filepath" name="llvm/lib/Support/xxhash.cpp" desc="BSD License，协议友好，使用符合协议要求"/>
-                <filteritem type="filepath" name="llvm/test/CodeGen/BPF/undef.ll" desc="文件不包含license"/>
-                <filteritem type="filepath" name="llvm/tools/msbuild/source.extension.vsixmanifest" desc="文件不包含license"/>
-                <filteritem type="filepath" name="llvm/tools/msbuild/llvm.csproj" desc="文件不包含license"/>
-                <filteritem type="filepath" name="llvm/utils/lit/setup.py" desc="文件不包含license"/>
-                <filteritem type="filepath" name="llvm/utils/benchmark/CONTRIBUTORS" desc="描述性文件"/>
-                <filteritem type="filepath" name="llvm/utils/benchmark/README.LLVM" desc="描述性文件"/>
-                <filteritem type="filepath" name="llvm/utils/kate/llvm-tablegen.xml" desc="文件不包含license"/>
-                <filteritem type="filepath" name="llvm/utils/kate/llvm.xml" desc="文件不包含license"/>
-                <filteritem type="filepath" name="llvm/utils/unittest/googletest/README.LLVM" desc="readme文件"/>
-                <filteritem type="filepath" name="llvm/utils/unittest/googlemock/README.LLVM" desc="readme文件"/>
-                <filteritem type="filepath" name="clang/test/.*" desc="测试文件，不包含license"/>
-                <filteritem type="filepath" name="libclc/generic/include/clc/relational/select.h" desc="文件不包含license"/>
-                <filteritem type="filepath" name="lld/test/.*" desc="文件不包含license"/>
-                <filteritem type="filepath" name="lldb/scripts/install_custom_python.py" desc="文件不包含license"/>
-                <filteritem type="filepath" name="llvm/test/.*" desc="测试文件，不包含license"/>
-                <filteritem type="filepath" name="llvm/utils/lit/tests/selecting.py" desc="文件不包含license"/>
-                <filteritem type="filepath" name="llvm/utils/lit/utils/check-sdist" desc="文件不包含license"/>
-                <filteritem type="filepath" name="openmp/runtime/doc/doxygen/config" desc="文件不包含license"/>
-                <filteritem type="filepath" name="openmp/runtime/test/worksharing/for/omp_nonmonotonic_dynamic1.c" desc="文件不包含license"/>
-                <filteritem type="filepath" name="parallel-libs/.*" desc="文件不包含license"/>
-                <filteritem type="filepath" name="polly/docs/Architecture.rst" desc="文件不包含license"/>
-                <filteritem type="filepath" name="polly/docs/doxygen.cfg.in" desc="文件不包含license"/>
-                <filteritem type="filepath" name="polly/test/ScopDetect/only-one-affine-loop.ll" desc="文件不包含license"/>
-                <filteritem type="filepath" name="clang/.*" desc="文件不包含license"/>
-                <filteritem type="filepath" name="clang-tools-extra/.*" desc="文件不包含license"/>
-                <filteritem type="filepath" name="compiler-rt/.*" desc="文件不包含license"/>
-                <filteritem type="filepath" name="debuginfo-tests/.*" desc="文件不包含license"/>
-                <filteritem type="filepath" name="flang/.*" desc="文件不包含license"/>
-                <filteritem type="filepath" name="libclc/.*" desc="文件不包含license"/>
-                <filteritem type="filepath" name="libcxx/.*" desc="文件不包含license"/>
-                <filteritem type="filepath" name="libcxxabi/.*" desc="文件不包含license"/>
-                <filteritem type="filepath" name="libunwind/.*" desc="文件不包含license"/>
-                <filteritem type="filepath" name="lld/.*" desc="文件不包含license"/>
-                <filteritem type="filepath" name="lldb/.*" desc="文件不包含license"/>
-                <filteritem type="filepath" name="llvm/.*" desc="文件不包含license"/>
-                <filteritem type="filepath" name="mlir/.*" desc="文件不包含license"/>
-                <filteritem type="filepath" name="openmp/.*" desc="文件不包含license"/>
-                <filteritem type="filepath" name="polly/.*" desc="文件不包含license"/>
-                <filteritem type="filepath" name="libc/.*" desc="文件不包含license"/>
-                <filteritem type="filepath" name=".arclint" desc="文件不包含license"/>
-                <filteritem type="filepath" name=".arcconfig" desc="文件不包含license"/>
-                <filteritem type="filepath" name=".clang-tidy" desc="文件不包含license"/>
-                <filteritem type="filepath" name=".git-blame-ignore-revs" desc="文件不包含license"/>
-                <filteritem type="filepath" name="utils/.*" desc="文件不包含license"/>
-                <filteritem type="filepath" name="llvm-build/build_cpython-mingw.sh" desc="文件不包含license"/>
-                <filteritem type="filepath" name="llvm-build/env_prepare.sh" desc="文件不包含license"/>
-                <filteritem type="filepath" name="pstl/LICENSE.txt" desc="LICENSE文件"/>
-                <filteritem type="filepath" name="pstl/docs/ReleaseNotes.rst" desc="文件不包含license"/>
-                <filteritem type="filepath" name="lldb/third_party/Python/module/six/LICENSE" desc="MIT 误报"/>
-                <filteritem type="filepath" name="lldb/third_party/Python/module/six/six.py" desc="MIT 误报"/>
-                <filteritem type="filepath" name="clang/lib/Headers/avx512vlvp2intersectintrin.h" desc="MIT 误报"/>
-                <filteritem type="filepath" name="clang/lib/Headers/avx512vp2intersectintrin.h" desc="MIT 误报"/>
-                <filteritem type="filepath" name="clang/lib/Headers/cuda_wrappers/algorithm" desc="MIT 误报"/>
-                <filteritem type="filepath" name="clang/lib/Headers/cuda_wrappers/complex" desc="MIT 误报"/>
-                <filteritem type="filepath" name="clang/lib/Headers/cuda_wrappers/new" desc="MIT 误报"/>
-                <filteritem type="filepath" name="clang/lib/Driver/ToolChains/MSVCSetupApi.h" desc="MIT 误报"/>
-                <filteritem type="filepath" name="clang-tools-extra/clangd/clients/clangd-vscode/LICENSE" desc="MIT 误报"/>
-                <filteritem type="filepath" name="clang/lib/Headers/keylockerintrin.h" desc="MIT 误报"/>
-                <filteritem type="filepath" name="clang/lib/Headers/avxvnniintrin.h" desc="MIT 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/deduct_F.fail.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/numerics/bit/bitops.count/countl_one.pass.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/deduct_F.pass.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/numerics/bit/bitops.count/countl_zero.pass.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/deduct_ptr.pass.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/numerics/bit/bitops.count/countr_one.pass.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/numerics/bit/bitops.count/countr_zero.pass.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.spec/swap.pass.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/utilities/utility/pairs/pairs.spec/comparison.pass.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/numerics/bit/bitops.count/popcount.pass.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/utilities/utility/pairs/pairs.spec/make_pair.pass.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/utilities/utility/pairs/pairs.spec/non_member_swap.pass.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/language.support/support.dynamic/destroying_delete_t.pass.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/language.support/support.dynamic/destroying_delete_t_declaration.pass.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/numerics/bit/nothing_to_do.pass.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/equal.pass.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/language.support/support.dynamic/nothrow_t.fail.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/member_swap.pass.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/language.support/support.dynamic/nothrow_t.pass.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.weak/util.smartptr.weak.spec/swap.pass.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/non_member_swap.pass.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/numerics/bit/bit.pow.two/ceil2.fail.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/libcxx/numerics/bit.ops.pass.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/relational.pass.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/numerics/bit/bit.pow.two/ceil2.pass.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/swap_noexcept.pass.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/numerics/bit/bit.pow.two/floor2.pass.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/numerics/bit/bit.pow.two/ispow2.pass.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/numerics/bit/bit.pow.two/log2p1.pass.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/thread/thread.mutex/thread.lock/types.fail.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/numerics/bit/bit.pow.two/has_single_bit.pass.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/numerics/bit/bit.pow.two/bit_width.pass.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/numerics/bit/bit.pow.two/bit_floor.pass.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.pass.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.fail.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="libcxx/test/libcxx/utilities/function.objects/func.wrap/depr_in_cxx03.verify.cpp" desc="MIT|NCSA 误报"/>
-                <filteritem type="filepath" name="lldb/packages/Python/lldbsuite/test/functionalities/gdb_remote_client/TestJLink6Armv7RegisterDefinition.py" desc="InvalidLicense 误报"/>
-                <filteritem type="filepath" name="lldb/packages/Python/lldbsuite/test/tools/lldb-server/TestGdbRemoteHostInfo.py" desc="InvalidLicense 误报"/>
-                <filteritem type="filepath" name="lldb/packages/Python/lldbsuite/test/functionalities/gdb_remote_client/TestNestedRegDefinitions.py" desc="InvalidLicense 误报"/>
-                <filteritem type="filepath" name="libcxx/benchmarks/GenerateInput.h" desc="InvalidLicense 误报"/>
-                <filteritem type="filepath" name="lldb/packages/Python/lldbsuite/test/functionalities/gdb_remote_client/TestRegDefinitionInParts.py" desc="InvalidLicense 误报"/>
-                <filteritem type="filepath" name="lldb/test/API/functionalities/gdb_remote_client/TestJLink6Armv7RegisterDefinition.py" desc="InvalidLicense 误报"/>
-                <filteritem type="filepath" name="libcxx/utils/docker/debian9/buildbot/install-packages.sh" desc="InvalidLicense 误报"/>
-                <filteritem type="filepath" name="lldb/test/API/functionalities/gdb_remote_client/TestNestedRegDefinitions.py" desc="InvalidLicense 误报"/>
-                <filteritem type="filepath" name="lldb/scripts/lldb-test-qemu/rootfs.sh" desc="InvalidLicense 误报"/>
-                <filteritem type="filepath" name="lldb/test/API/commands/watchpoints/hello_watchlocation/main.cpp" desc="InvalidLicense 误报"/>
-                <filteritem type="filepath" name="lldb/test/API/functionalities/gdb_remote_client/TestRegDefinitionInParts.py" desc="InvalidLicense 误报"/>
-                <filteritem type="filepath" name="lldb/test/API/python_api/watchpoint/watchlocation/main.cpp" desc="InvalidLicense 误报"/>
-                <filteritem type="filepath" name="clang/utils/analyzer/SATestBenchmark.py" desc="InvalidLicense 误报"/>
-                <filteritem type="filepath" name="lldb/test/Shell/ExecControl/StopHook/Inputs/stop-hook-threads.cpp" desc="InvalidLicense 误报"/>
-                <filteritem type="filepath" name="lldb/test/API/tools/lldb-server/TestGdbRemoteHostInfo.py" desc="InvalidLicense 误报"/>
-                <filteritem type="filepath" name="third-party/benchmark/CONTRIBUTORS" desc="贡献者名单 不包含License"/>
-                <filteritem type="filepath" name="bolt/LICENSE.TXT" desc="LLVM Exceptions to the Apache 2.0 License 误报为GPL2.0"/>
-                <filteritem type="filepath" name="cross-project-tests/debuginfo-tests/dexter/LICENSE.txt" desc="LLVM Exceptions to the Apache 2.0 License 误报为GPL2.0"/>
-                <filteritem type="filepath" name="llvm-build/checksec/templates/bootstrap.bundle.min.js" desc="InvalidLicense 误报"/>
-                <filteritem type="filepath" name="llvm-build/checksec/templates/jquery.min.js" desc="InvalidLicense 误报"/>
-            </filefilter>
-            <filefilter name="copyrightPolicyFilter" desc="Filters for copyright header policies">
-            </filefilter>
-            <filefilter name="licenseFileNamePolicyFilter" desc="Filters for LICENSE file policies">
-            </filefilter>
-            <filefilter name="readmeFileNamePolicyFilter" desc="Filters for README file policies">
-            </filefilter>
-            <filefilter name="readmeOpenSourcefileNamePolicyFilter" desc="Filters for README.OpenSource file policies">
-            </filefilter>
-            <filefilter name="binaryFileTypePolicyFilter" desc="Filters for binary file policies">
-                <filteritem type="filepath" name=".*" desc="binary or archive"/>
-            </filefilter>
-        </filefilterlist>
-        <licensematcherlist>
-        </licensematcherlist>
-    </oatconfig>
-</configuration>
diff --git a/README.OpenSource b/README.OpenSource
deleted file mode 100644
index 6df508c33b2e25ef0aff57e469d1d68782420d8b..0000000000000000000000000000000000000000
--- a/README.OpenSource
+++ /dev/null
@@ -1,11 +0,0 @@
-[
-  {
-    "Name": "LLVM",
-    "License": "Apache License v2.0 with LLVM Exceptions",
-    "License File": "LICENSE.TXT",
-    "Version Number": "19.1.7",
-    "Owner": "sunqiang13@huawei.com",
-    "Upstream URL": "http://llvm.org/",
-    "Description": "The LLVM Project is a collection of modular and reusable compiler and toolchain technologies. Despite its name, LLVM has little to do with traditional virtual machines. The name \"LLVM\" itself is not an acronym; it is the full name of the project."
-  }
-]
diff --git a/bolt/test/X86/Inputs/dwarf4-df-basic.s b/bolt/test/X86/Inputs/dwarf4-df-basic.s
old mode 100644
new mode 100755
diff --git a/bolt/test/X86/dwarf4-invalid-reference-die-offset-no-internal-dwarf-error.s b/bolt/test/X86/dwarf4-invalid-reference-die-offset-no-internal-dwarf-error.s
old mode 100644
new mode 100755
diff --git a/bolt/test/X86/dwarf4-invalid-reference-die-offset-with-internal-dwarf-error-cant-parse-die.s b/bolt/test/X86/dwarf4-invalid-reference-die-offset-with-internal-dwarf-error-cant-parse-die.s
old mode 100644
new mode 100755
diff --git a/bolt/test/X86/dwarf4-invalid-reference-die-offset-with-internal-dwarf-error-invalid-die.s b/bolt/test/X86/dwarf4-invalid-reference-die-offset-with-internal-dwarf-error-invalid-die.s
old mode 100644
new mode 100755
diff --git a/bolt/test/X86/false-jump-table.s b/bolt/test/X86/false-jump-table.s
old mode 100755
new mode 100644
diff --git a/bolt/test/X86/issue20.s b/bolt/test/X86/issue20.s
old mode 100755
new mode 100644
diff --git a/bolt/test/X86/issue20.test b/bolt/test/X86/issue20.test
old mode 100755
new mode 100644
diff --git a/bolt/test/X86/issue26.s b/bolt/test/X86/issue26.s
old mode 100755
new mode 100644
diff --git a/bolt/test/X86/issue26.test b/bolt/test/X86/issue26.test
old mode 100755
new mode 100644
diff --git a/bolt/test/X86/zero-sized-object.s b/bolt/test/X86/zero-sized-object.s
old mode 100755
new mode 100644
diff --git a/bolt/utils/nfc-stat-parser.py b/bolt/utils/nfc-stat-parser.py
old mode 100644
new mode 100755
diff --git a/clang-tools-extra/clang-tidy/add_new_check.py b/clang-tools-extra/clang-tidy/add_new_check.py
old mode 100644
new mode 100755
diff --git a/clang-tools-extra/clangd/TidyFastChecks.py b/clang-tools-extra/clangd/TidyFastChecks.py
old mode 100644
new mode 100755
diff --git a/clang/docs/ClangNvlinkWrapper.rst b/clang/docs/ClangNVLinkWrapper.rst
similarity index 100%
rename from clang/docs/ClangNvlinkWrapper.rst
rename to clang/docs/ClangNVLinkWrapper.rst
diff --git a/clang/include/clang/Basic/Target/MSP430/gen-msp430-def.py b/clang/include/clang/Basic/Target/MSP430/gen-msp430-def.py
old mode 100644
new mode 100755
diff --git a/clang/test/C/C2y/n3262.c b/clang/test/C/C2y/n3262.c
index 864ab351bdbc231b4a22beba6cd618b339535c58..3ff2062d88dde898cb16374c8ee23b3c4a18571f 100644
--- a/clang/test/C/C2y/n3262.c
+++ b/clang/test/C/C2y/n3262.c
@@ -1,20 +1,20 @@
-// RUN: %clang_cc1 -verify -std=c2y -Wall -pedantic %s
-// expected-no-diagnostics
-
-/* WG14 N3262: Yes
- * Usability of a byte-wise copy of va_list
- *
- * NB: Clang explicitly documents this as being undefined behavior. A
- * diagnostic is produced for some targets but not for others for assignment or
- * initialization, but no diagnostic is possible to produce for use with memcpy
- * in the general case, nor with a manual bytewise copy via a for loop.
- *
- * Therefore, nothing is tested in this file; it serves as a reminder that we
- * validated our documentation against the paper. See
- * clang/docs/LanguageExtensions.rst for more details.
- *
- * FIXME: it would be nice to add ubsan support for recognizing when an invalid
- * copy is made and diagnosing on copy (or on use of the copied va_list).
- */
-
-int main() {}
+// RUN: %clang_cc1 -verify -std=c2y -Wall -pedantic %s
+// expected-no-diagnostics
+
+/* WG14 N3262: Yes
+ * Usability of a byte-wise copy of va_list
+ *
+ * NB: Clang explicitly documents this as being undefined behavior. A
+ * diagnostic is produced for some targets but not for others for assignment or
+ * initialization, but no diagnostic is possible to produce for use with memcpy
+ * in the general case, nor with a manual bytewise copy via a for loop.
+ *
+ * Therefore, nothing is tested in this file; it serves as a reminder that we
+ * validated our documentation against the paper. See
+ * clang/docs/LanguageExtensions.rst for more details.
+ *
+ * FIXME: it would be nice to add ubsan support for recognizing when an invalid
+ * copy is made and diagnosing on copy (or on use of the copied va_list).
+ */
+
+int main() {}
diff --git a/clang/test/C/C2y/n3274.c b/clang/test/C/C2y/n3274.c
index 6bf8d72d0f3319cda65ad16a67f625a4c7e066f5..ccdb89f4069ded80ba2ef5bb48a5eda9722b1110 100644
--- a/clang/test/C/C2y/n3274.c
+++ b/clang/test/C/C2y/n3274.c
@@ -1,18 +1,18 @@
-// RUN: %clang_cc1 -verify -std=c23 -Wall -pedantic %s
-// RUN: %clang_cc1 -verify -std=c2y -Wall -pedantic %s
-
-/* WG14 N3274: Yes
- * Remove imaginary types
- */
-
-// Clang has never supported _Imaginary.
-#ifdef __STDC_IEC_559_COMPLEX__
-#error "When did this happen?"
-#endif
-
-_Imaginary float i; // expected-error {{imaginary types are not supported}}
-
-// _Imaginary is a keyword in older language modes, but doesn't need to be one
-// in C2y or later. However, to improve diagnostic behavior, we retain it as a
-// keyword in all language modes -- it is not available as an identifier.
-static_assert(!__is_identifier(_Imaginary));
+// RUN: %clang_cc1 -verify -std=c23 -Wall -pedantic %s
+// RUN: %clang_cc1 -verify -std=c2y -Wall -pedantic %s
+
+/* WG14 N3274: Yes
+ * Remove imaginary types
+ */
+
+// Clang has never supported _Imaginary.
+#ifdef __STDC_IEC_559_COMPLEX__
+#error "When did this happen?"
+#endif
+
+_Imaginary float i; // expected-error {{imaginary types are not supported}}
+
+// _Imaginary is a keyword in older language modes, but doesn't need to be one
+// in C2y or later. However, to improve diagnostic behavior, we retain it as a
+// keyword in all language modes -- it is not available as an identifier.
+static_assert(!__is_identifier(_Imaginary));
diff --git a/clang/test/CodeGen/Inputs/memprof.exe b/clang/test/CodeGen/Inputs/memprof.exe
old mode 100644
new mode 100755
diff --git a/clang/test/CodeGen/Inputs/update_memprof_inputs.sh b/clang/test/CodeGen/Inputs/update_memprof_inputs.sh
old mode 100644
new mode 100755
diff --git a/clang/test/Driver/Inputs/basic_cross_hurd_tree/usr/x86_64-gnu/bin/as b/clang/test/Driver/Inputs/basic_cross_hurd_tree/usr/x86_64-gnu/bin/as
old mode 100644
new mode 100755
diff --git a/clang/test/Driver/Inputs/basic_cross_hurd_tree/usr/x86_64-gnu/bin/ld b/clang/test/Driver/Inputs/basic_cross_hurd_tree/usr/x86_64-gnu/bin/ld
old mode 100644
new mode 100755
diff --git a/clang/test/Driver/Inputs/lld/ld.lld b/clang/test/Driver/Inputs/lld/ld.lld
old mode 100644
new mode 100755
diff --git a/clang/test/Driver/Inputs/nvptx-arch/nvptx_arch_fail b/clang/test/Driver/Inputs/nvptx-arch/nvptx_arch_fail
old mode 100644
new mode 100755
diff --git a/clang/test/Driver/Inputs/nvptx-arch/nvptx_arch_sm_70 b/clang/test/Driver/Inputs/nvptx-arch/nvptx_arch_sm_70
old mode 100644
new mode 100755
diff --git a/clang/test/Driver/Inputs/rhel_7_tree/opt/rh/devtoolset-7/root/usr/bin/ld b/clang/test/Driver/Inputs/rhel_7_tree/opt/rh/devtoolset-7/root/usr/bin/ld
old mode 100644
new mode 100755
diff --git a/clang/test/Driver/flang/msvc-link.f90 b/clang/test/Driver/flang/msvc-link.f90
index 3f7e162a9a6116ae39c643d84ab5e5a9e8ea518f..463749510eb5f88a38bb4538e40b4775b2b7b0e7 100644
--- a/clang/test/Driver/flang/msvc-link.f90
+++ b/clang/test/Driver/flang/msvc-link.f90
@@ -1,5 +1,5 @@
-! RUN: %clang --driver-mode=flang --target=x86_64-pc-windows-msvc -### %s -Ltest 2>&1 | FileCheck %s
-!
-! Test that user provided paths come before the Flang runtimes
-! CHECK: "-libpath:test"
-! CHECK: "-libpath:{{.*(\\|/)}}lib"
+! RUN: %clang --driver-mode=flang --target=x86_64-pc-windows-msvc -### %s -Ltest 2>&1 | FileCheck %s
+!
+! Test that user provided paths come before the Flang runtimes
+! CHECK: "-libpath:test"
+! CHECK: "-libpath:{{.*(\\|/)}}lib"
diff --git a/clang/test/Frontend/rewrite-includes-mixed-eol-crlf.c b/clang/test/Frontend/rewrite-includes-mixed-eol-crlf.c
index 2faeaba322921844a642d7c4a0a1c0082b53d1d6..d6724444c0667667a03c7ab061453ed564237a1b 100644
--- a/clang/test/Frontend/rewrite-includes-mixed-eol-crlf.c
+++ b/clang/test/Frontend/rewrite-includes-mixed-eol-crlf.c
@@ -1,8 +1,8 @@
-// RUN: %clang_cc1 -E -frewrite-includes %s | %clang_cc1 -
-// expected-no-diagnostics
-// Note: This source file has CRLF line endings.
-// This test validates that -frewrite-includes translates the end of line (EOL)
-// form used in header files to the EOL form used in the the primary source
-// file when the files use different EOL forms.
-#include "rewrite-includes-mixed-eol-crlf.h"
-#include "rewrite-includes-mixed-eol-lf.h"
+// RUN: %clang_cc1 -E -frewrite-includes %s | %clang_cc1 -
+// expected-no-diagnostics
+// Note: This source file has CRLF line endings.
+// This test validates that -frewrite-includes translates the end of line (EOL)
+// form used in header files to the EOL form used in the the primary source
+// file when the files use different EOL forms.
+#include "rewrite-includes-mixed-eol-crlf.h"
+#include "rewrite-includes-mixed-eol-lf.h"
diff --git a/clang/test/Frontend/rewrite-includes-mixed-eol-crlf.h b/clang/test/Frontend/rewrite-includes-mixed-eol-crlf.h
index baedc282296bd7878da8b48f38bd6d0a37e65b56..0439b88b75e2cfb99be8b2d456d9b022db2eabe5 100644
--- a/clang/test/Frontend/rewrite-includes-mixed-eol-crlf.h
+++ b/clang/test/Frontend/rewrite-includes-mixed-eol-crlf.h
@@ -1,11 +1,11 @@
-// Note: This header file has CRLF line endings.
-// The indentation in some of the conditional inclusion directives below is
-// intentional and is required for this test to function as a regression test
-// for GH59736.
-_Static_assert(__LINE__ == 5, "");
-#if 1
-_Static_assert(__LINE__ == 7, "");
-  #if 1
-  _Static_assert(__LINE__ == 9, "");
-  #endif
-#endif
+// Note: This header file has CRLF line endings.
+// The indentation in some of the conditional inclusion directives below is
+// intentional and is required for this test to function as a regression test
+// for GH59736.
+_Static_assert(__LINE__ == 5, "");
+#if 1
+_Static_assert(__LINE__ == 7, "");
+  #if 1
+  _Static_assert(__LINE__ == 9, "");
+  #endif
+#endif
diff --git a/clang/test/Interpreter/Inputs/libdynamic-library-test.so b/clang/test/Interpreter/Inputs/libdynamic-library-test.so
old mode 100644
new mode 100755
diff --git a/clang/test/ParserHLSL/bitfields.hlsl b/clang/test/ParserHLSL/bitfields.hlsl
index 57b6705babdc12db2cdaf1cdbd428fd308b7711d..307d1143a068e2358cba56c181ea8f48766f958a 100644
--- a/clang/test/ParserHLSL/bitfields.hlsl
+++ b/clang/test/ParserHLSL/bitfields.hlsl
@@ -1,31 +1,31 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -ast-dump -x hlsl -o - %s | FileCheck %s
-
-
-struct MyBitFields {
-  // CHECK: FieldDecl 0x{{[0-9a-f]+}} <line:9:3, col:25> col:16 referenced field1 'unsigned int'
-  // CHECK:-ConstantExpr 0x{{[0-9a-f]+}} <col:25> 'int'
-  // CHECK:-value: Int 3
-  // CHECK:-IntegerLiteral 0x{{[0-9a-f]+}} <col:25> 'int' 3
-  unsigned int field1 : 3; // 3 bits for field1
-
-  // CHECK:FieldDecl 0x{{[0-9a-f]+}} <line:15:3, col:25> col:16 referenced field2 'unsigned int'
-  // CHECK:-ConstantExpr 0x{{[0-9a-f]+}} <col:25> 'int'
-  // CHECK:-value: Int 4
-  // CHECK:-IntegerLiteral 0x{{[0-9a-f]+}} <col:25> 'int' 4
-  unsigned int field2 : 4; // 4 bits for field2
-  
-  // CHECK:FieldDecl 0x{{[0-9a-f]+}} <line:21:3, col:16> col:7 field3 'int'
-  // CHECK:-ConstantExpr 0x{{[0-9a-f]+}} <col:16> 'int'
-  // CHECK:-value: Int 5
-  // CHECK:-IntegerLiteral 0x{{[0-9a-f]+}} <col:16> 'int' 5
-  int field3 : 5;          // 5 bits for field3 (signed)
-};
-
-
-
-[numthreads(1,1,1)]
-void main() {
-  MyBitFields m;
-  m.field1 = 4;
-  m.field2 = m.field1*2;
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -ast-dump -x hlsl -o - %s | FileCheck %s
+
+
+struct MyBitFields {
+  // CHECK: FieldDecl 0x{{[0-9a-f]+}} <line:9:3, col:25> col:16 referenced field1 'unsigned int'
+  // CHECK:-ConstantExpr 0x{{[0-9a-f]+}} <col:25> 'int'
+  // CHECK:-value: Int 3
+  // CHECK:-IntegerLiteral 0x{{[0-9a-f]+}} <col:25> 'int' 3
+  unsigned int field1 : 3; // 3 bits for field1
+
+  // CHECK:FieldDecl 0x{{[0-9a-f]+}} <line:15:3, col:25> col:16 referenced field2 'unsigned int'
+  // CHECK:-ConstantExpr 0x{{[0-9a-f]+}} <col:25> 'int'
+  // CHECK:-value: Int 4
+  // CHECK:-IntegerLiteral 0x{{[0-9a-f]+}} <col:25> 'int' 4
+  unsigned int field2 : 4; // 4 bits for field2
+  
+  // CHECK:FieldDecl 0x{{[0-9a-f]+}} <line:21:3, col:16> col:7 field3 'int'
+  // CHECK:-ConstantExpr 0x{{[0-9a-f]+}} <col:16> 'int'
+  // CHECK:-value: Int 5
+  // CHECK:-IntegerLiteral 0x{{[0-9a-f]+}} <col:16> 'int' 5
+  int field3 : 5;          // 5 bits for field3 (signed)
+};
+
+
+
+[numthreads(1,1,1)]
+void main() {
+  MyBitFields m;
+  m.field1 = 4;
+  m.field2 = m.field1*2;
 }
\ No newline at end of file
diff --git a/clang/test/ParserHLSL/hlsl_annotations_on_struct_members.hlsl b/clang/test/ParserHLSL/hlsl_annotations_on_struct_members.hlsl
index 5b228d039345e1f37a31ecdfb5d31bcfb36ec236..2eebc920388b5b278a0273df6fabf37e6c86385b 100644
--- a/clang/test/ParserHLSL/hlsl_annotations_on_struct_members.hlsl
+++ b/clang/test/ParserHLSL/hlsl_annotations_on_struct_members.hlsl
@@ -1,21 +1,21 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -ast-dump -o - %s | FileCheck %s
-
-// tests that hlsl annotations are properly parsed when applied on field decls,
-// and that the annotation gets properly placed on the AST.
-
-struct Eg9{
-  // CHECK: CXXRecordDecl 0x{{[0-9a-f]+}} <col:1, col:8> col:8 implicit struct Eg9
-  // CHECK: FieldDecl 0x{{[0-9a-f]+}} <line:10:3, col:16> col:16 referenced a 'unsigned int'
-  // CHECK: -HLSLSV_DispatchThreadIDAttr 0x{{[0-9a-f]+}} <col:20>
-  unsigned int a : SV_DispatchThreadID;
-};
-Eg9 e9;
-
-
-RWBuffer<int> In : register(u1);
-
-
-[numthreads(1,1,1)]
-void main() {
-  In[0] = e9.a;
-}
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -ast-dump -o - %s | FileCheck %s
+
+// tests that hlsl annotations are properly parsed when applied on field decls,
+// and that the annotation gets properly placed on the AST.
+
+struct Eg9{
+  // CHECK: CXXRecordDecl 0x{{[0-9a-f]+}} <col:1, col:8> col:8 implicit struct Eg9
+  // CHECK: FieldDecl 0x{{[0-9a-f]+}} <line:10:3, col:16> col:16 referenced a 'unsigned int'
+  // CHECK: -HLSLSV_DispatchThreadIDAttr 0x{{[0-9a-f]+}} <col:20>
+  unsigned int a : SV_DispatchThreadID;
+};
+Eg9 e9;
+
+
+RWBuffer<int> In : register(u1);
+
+
+[numthreads(1,1,1)]
+void main() {
+  In[0] = e9.a;
+}
diff --git a/clang/test/ParserHLSL/hlsl_resource_class_attr.hlsl b/clang/test/ParserHLSL/hlsl_resource_class_attr.hlsl
index 0663a55ae0e6d7bc9dffd3bdbcb9280e7b8f8365..410b4524f1c3df566734b1be7e7161033a3125b7 100644
--- a/clang/test/ParserHLSL/hlsl_resource_class_attr.hlsl
+++ b/clang/test/ParserHLSL/hlsl_resource_class_attr.hlsl
@@ -1,32 +1,32 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -ast-dump -o - %s | FileCheck %s
-
-
-// CHECK: -HLSLResourceClassAttr 0x{{[0-9a-f]+}} <col:31> SRV
-struct [[hlsl::resource_class(SRV)]] Eg1 {
-  int i;  
-};
-
-Eg1 e1;
-
-// CHECK: -CXXRecordDecl 0x{{[0-9a-f]+}} <line:13:1, line:15:1> line:13:38 referenced struct Eg2 definition
-// CHECK: -HLSLResourceClassAttr 0x{{[0-9a-f]+}} <col:31> UAV
-struct [[hlsl::resource_class(UAV)]] Eg2 {
-  int i;
-};
-Eg2 e2;
-
-// CHECK: -CXXRecordDecl 0x{{[0-9a-f]+}} <line:20:1, line:22:1> line:20:42 referenced struct Eg3 definition
-// CHECK: -HLSLResourceClassAttr 0x{{[0-9a-f]+}} <col:31> CBuffer
-struct [[hlsl::resource_class(CBuffer)]] Eg3 {
-  int i;
-}; 
-Eg3 e3;
-
-// CHECK: -CXXRecordDecl 0x{{[0-9a-f]+}} <line:27:1, line:29:1> line:27:42 referenced struct Eg4 definition
-// CHECK: -HLSLResourceClassAttr 0x{{[0-9a-f]+}} <col:31> Sampler
-struct [[hlsl::resource_class(Sampler)]] Eg4 {
-  int i;
-};
-Eg4 e4;
-
-RWBuffer<int> In : register(u1);
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -ast-dump -o - %s | FileCheck %s
+
+
+// CHECK: -HLSLResourceClassAttr 0x{{[0-9a-f]+}} <col:31> SRV
+struct [[hlsl::resource_class(SRV)]] Eg1 {
+  int i;  
+};
+
+Eg1 e1;
+
+// CHECK: -CXXRecordDecl 0x{{[0-9a-f]+}} <line:13:1, line:15:1> line:13:38 referenced struct Eg2 definition
+// CHECK: -HLSLResourceClassAttr 0x{{[0-9a-f]+}} <col:31> UAV
+struct [[hlsl::resource_class(UAV)]] Eg2 {
+  int i;
+};
+Eg2 e2;
+
+// CHECK: -CXXRecordDecl 0x{{[0-9a-f]+}} <line:20:1, line:22:1> line:20:42 referenced struct Eg3 definition
+// CHECK: -HLSLResourceClassAttr 0x{{[0-9a-f]+}} <col:31> CBuffer
+struct [[hlsl::resource_class(CBuffer)]] Eg3 {
+  int i;
+}; 
+Eg3 e3;
+
+// CHECK: -CXXRecordDecl 0x{{[0-9a-f]+}} <line:27:1, line:29:1> line:27:42 referenced struct Eg4 definition
+// CHECK: -HLSLResourceClassAttr 0x{{[0-9a-f]+}} <col:31> Sampler
+struct [[hlsl::resource_class(Sampler)]] Eg4 {
+  int i;
+};
+Eg4 e4;
+
+RWBuffer<int> In : register(u1);
diff --git a/clang/test/ParserHLSL/hlsl_resource_class_attr_error.hlsl b/clang/test/ParserHLSL/hlsl_resource_class_attr_error.hlsl
index 5ba4c884fb16e92af425ffc70845b014b849f242..00fcd769760bbaf16be78dcdfc62825c47ac86f4 100644
--- a/clang/test/ParserHLSL/hlsl_resource_class_attr_error.hlsl
+++ b/clang/test/ParserHLSL/hlsl_resource_class_attr_error.hlsl
@@ -1,15 +1,15 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -ast-dump -o - %s -verify
-
-// expected-error@+1{{'resource_class' attribute takes one argument}}
-struct [[hlsl::resource_class()]] Eg1 {
-  int i;  
-};
-
-Eg1 e1;
-
-// expected-warning@+1{{ResourceClass attribute argument not supported: gibberish}}
-struct [[hlsl::resource_class(gibberish)]] Eg2 {
-  int i;  
-};
-
-Eg2 e2;
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -ast-dump -o - %s -verify
+
+// expected-error@+1{{'resource_class' attribute takes one argument}}
+struct [[hlsl::resource_class()]] Eg1 {
+  int i;  
+};
+
+Eg1 e1;
+
+// expected-warning@+1{{ResourceClass attribute argument not supported: gibberish}}
+struct [[hlsl::resource_class(gibberish)]] Eg2 {
+  int i;  
+};
+
+Eg2 e2;
diff --git a/clang/test/Sema/aarch64-sve-vector-trig-ops.c b/clang/test/Sema/aarch64-sve-vector-trig-ops.c
index 3e6d70d1c6a5c6d8fdf14a7519327679aedb7ae9..dfa77d20e949f9ae1e7e1820d1a80e57da1609ec 100644
--- a/clang/test/Sema/aarch64-sve-vector-trig-ops.c
+++ b/clang/test/Sema/aarch64-sve-vector-trig-ops.c
@@ -1,59 +1,59 @@
-// RUN: %clang_cc1 -triple aarch64 -target-feature +sve \
-// RUN:   -disable-O0-optnone -o - -fsyntax-only %s -verify
-// REQUIRES: aarch64-registered-target
-
-#include <arm_sve.h>
-
-svfloat32_t test_asin_vv_i8mf8(svfloat32_t v) {
-
-  return __builtin_elementwise_asin(v);
-  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-}
-
-svfloat32_t test_acos_vv_i8mf8(svfloat32_t v) {
-
-  return __builtin_elementwise_acos(v);
-  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-}
-
-svfloat32_t test_atan_vv_i8mf8(svfloat32_t v) {
-
-  return __builtin_elementwise_atan(v);
-  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-}
-
-svfloat32_t test_sin_vv_i8mf8(svfloat32_t v) {
-
-  return __builtin_elementwise_sin(v);
-  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-}
-
-svfloat32_t test_cos_vv_i8mf8(svfloat32_t v) {
-
-  return __builtin_elementwise_cos(v);
-  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-}
-
-svfloat32_t test_tan_vv_i8mf8(svfloat32_t v) {
-
-  return __builtin_elementwise_tan(v);
-  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-}
-
-svfloat32_t test_sinh_vv_i8mf8(svfloat32_t v) {
-
-  return __builtin_elementwise_sinh(v);
-  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-}
-
-svfloat32_t test_cosh_vv_i8mf8(svfloat32_t v) {
-
-  return __builtin_elementwise_cosh(v);
-  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-}
-
-svfloat32_t test_tanh_vv_i8mf8(svfloat32_t v) {
-
-  return __builtin_elementwise_tanh(v);
-  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-}
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve \
+// RUN:   -disable-O0-optnone -o - -fsyntax-only %s -verify
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sve.h>
+
+svfloat32_t test_asin_vv_i8mf8(svfloat32_t v) {
+
+  return __builtin_elementwise_asin(v);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+}
+
+svfloat32_t test_acos_vv_i8mf8(svfloat32_t v) {
+
+  return __builtin_elementwise_acos(v);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+}
+
+svfloat32_t test_atan_vv_i8mf8(svfloat32_t v) {
+
+  return __builtin_elementwise_atan(v);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+}
+
+svfloat32_t test_sin_vv_i8mf8(svfloat32_t v) {
+
+  return __builtin_elementwise_sin(v);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+}
+
+svfloat32_t test_cos_vv_i8mf8(svfloat32_t v) {
+
+  return __builtin_elementwise_cos(v);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+}
+
+svfloat32_t test_tan_vv_i8mf8(svfloat32_t v) {
+
+  return __builtin_elementwise_tan(v);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+}
+
+svfloat32_t test_sinh_vv_i8mf8(svfloat32_t v) {
+
+  return __builtin_elementwise_sinh(v);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+}
+
+svfloat32_t test_cosh_vv_i8mf8(svfloat32_t v) {
+
+  return __builtin_elementwise_cosh(v);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+}
+
+svfloat32_t test_tanh_vv_i8mf8(svfloat32_t v) {
+
+  return __builtin_elementwise_tanh(v);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+}
diff --git a/clang/test/Sema/riscv-rvv-vector-trig-ops.c b/clang/test/Sema/riscv-rvv-vector-trig-ops.c
index 1283f35ec3d93918973f8cdef33454feda640d46..f0cd5ca4a1de1f3c534951228d00c04ac1b51301 100644
--- a/clang/test/Sema/riscv-rvv-vector-trig-ops.c
+++ b/clang/test/Sema/riscv-rvv-vector-trig-ops.c
@@ -1,61 +1,61 @@
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +v -target-feature +zfh -target-feature +zvfh \
-// RUN:   -disable-O0-optnone -o - -fsyntax-only %s -verify
-// REQUIRES: riscv-registered-target
-
-#include <riscv_vector.h>
-
-vfloat32mf2_t test_asin_vv_i8mf8(vfloat32mf2_t v) {
-
-    return __builtin_elementwise_asin(v);
-    // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-  }
-  
-  vfloat32mf2_t test_acos_vv_i8mf8(vfloat32mf2_t v) {
-  
-    return __builtin_elementwise_acos(v);
-    // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-  }
-  
-  vfloat32mf2_t test_atan_vv_i8mf8(vfloat32mf2_t v) {
-  
-    return __builtin_elementwise_atan(v);
-    // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-  }
-
-vfloat32mf2_t test_sin_vv_i8mf8(vfloat32mf2_t v) {
-
-  return __builtin_elementwise_sin(v);
-  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-}
-
-vfloat32mf2_t test_cos_vv_i8mf8(vfloat32mf2_t v) {
-
-  return __builtin_elementwise_cos(v);
-  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-}
-
-vfloat32mf2_t test_tan_vv_i8mf8(vfloat32mf2_t v) {
-
-  return __builtin_elementwise_tan(v);
-  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-}
-
-vfloat32mf2_t test_sinh_vv_i8mf8(vfloat32mf2_t v) {
-
-    return __builtin_elementwise_sinh(v);
-    // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-  }
-  
-  vfloat32mf2_t test_cosh_vv_i8mf8(vfloat32mf2_t v) {
-  
-    return __builtin_elementwise_cosh(v);
-    // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-  }
-  
-  vfloat32mf2_t test_tanh_vv_i8mf8(vfloat32mf2_t v) {
-  
-    return __builtin_elementwise_tanh(v);
-    // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
-  }
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
+// RUN:   -target-feature +v -target-feature +zfh -target-feature +zvfh \
+// RUN:   -disable-O0-optnone -o - -fsyntax-only %s -verify
+// REQUIRES: riscv-registered-target
+
+#include <riscv_vector.h>
+
+vfloat32mf2_t test_asin_vv_i8mf8(vfloat32mf2_t v) {
+
+    return __builtin_elementwise_asin(v);
+    // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+  }
+  
+  vfloat32mf2_t test_acos_vv_i8mf8(vfloat32mf2_t v) {
+  
+    return __builtin_elementwise_acos(v);
+    // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+  }
+  
+  vfloat32mf2_t test_atan_vv_i8mf8(vfloat32mf2_t v) {
+  
+    return __builtin_elementwise_atan(v);
+    // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+  }
+
+vfloat32mf2_t test_sin_vv_i8mf8(vfloat32mf2_t v) {
+
+  return __builtin_elementwise_sin(v);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+}
+
+vfloat32mf2_t test_cos_vv_i8mf8(vfloat32mf2_t v) {
+
+  return __builtin_elementwise_cos(v);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+}
+
+vfloat32mf2_t test_tan_vv_i8mf8(vfloat32mf2_t v) {
+
+  return __builtin_elementwise_tan(v);
+  // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+}
+
+vfloat32mf2_t test_sinh_vv_i8mf8(vfloat32mf2_t v) {
+
+    return __builtin_elementwise_sinh(v);
+    // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+  }
+  
+  vfloat32mf2_t test_cosh_vv_i8mf8(vfloat32mf2_t v) {
+  
+    return __builtin_elementwise_cosh(v);
+    // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+  }
+  
+  vfloat32mf2_t test_tanh_vv_i8mf8(vfloat32mf2_t v) {
+  
+    return __builtin_elementwise_tanh(v);
+    // expected-error@-1 {{1st argument must be a vector, integer or floating point type}}
+  }
   
\ No newline at end of file
diff --git a/clang/test/SemaHLSL/Availability/avail-diag-default-compute.hlsl b/clang/test/SemaHLSL/Availability/avail-diag-default-compute.hlsl
index b60fba62bdb000ab788fafdd29f667ab7d29bbee..764b9e843f7f1c0aaff5b1ef7507802926ac2022 100644
--- a/clang/test/SemaHLSL/Availability/avail-diag-default-compute.hlsl
+++ b/clang/test/SemaHLSL/Availability/avail-diag-default-compute.hlsl
@@ -1,119 +1,119 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute \
-// RUN: -fsyntax-only -verify %s
-
-__attribute__((availability(shadermodel, introduced = 6.5)))
-float fx(float);  // #fx
-
-__attribute__((availability(shadermodel, introduced = 6.6)))
-half fx(half);  // #fx_half
-
-__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
-__attribute__((availability(shadermodel, introduced = 6.5, environment = compute)))
-float fy(float); // #fy
-
-__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
-__attribute__((availability(shadermodel, introduced = 6.5, environment = mesh)))
-float fz(float); // #fz
-
-float also_alive(float f) {
-  // expected-error@#also_alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #also_alive_fx_call
-  // expected-error@#also_alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #also_alive_fy_call
-  // expected-error@#also_alive_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #also_alive_fz_call
-  return 0;
-}
-
-float alive(float f) {
-  // expected-error@#alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #alive_fx_call
-  // expected-error@#alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #alive_fy_call
-  // expected-error@#alive_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #alive_fz_call
-
-  return also_alive(f);
-}
-
-float also_dead(float f) {
-  // unreachable code - no errors expected
-  float A = fx(f);
-  float B = fy(f);
-  float C = fz(f);
-  return 0;
-}
-
-float dead(float f) {
-  // unreachable code - no errors expected
-  float A = fx(f);
-  float B = fy(f);
-  float C = fz(f);
-
-  return also_dead(f);
-}
-
-template<typename T>
-T aliveTemp(T f) {
-  // expected-error@#aliveTemp_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #aliveTemp_fx_call
-  // expected-error@#aliveTemp_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #aliveTemp_fy_call
-  // expected-error@#aliveTemp_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #aliveTemp_fz_call
-  return 0;
-}
-
-template<typename T> T aliveTemp2(T f) {
-  // expected-error@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.6 or newer}}
-  // expected-note@#fx_half {{'fx' has been marked as being introduced in Shader Model 6.6 here, but the deployment target is Shader Model 6.0}}
-  // expected-error@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  return fx(f); // #aliveTemp2_fx_call
-}
-
-half test(half x) {
-  return aliveTemp2(x);
-}
-
-float test(float x) {
-  return aliveTemp2(x);
-}
-
-class MyClass
-{
-  float F;
-  float makeF() {
-    // expected-error@#MyClass_makeF_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-    // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-    float A = fx(F); // #MyClass_makeF_fx_call
-    // expected-error@#MyClass_makeF_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-    // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-    float B = fy(F); // #MyClass_makeF_fy_call
-    // expected-error@#MyClass_makeF_fz_call {{'fz' is unavailable}}
-    // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-    float C = fz(F); // #MyClass_makeF_fz_call
-    return 0;
-  }
-};
-
-[numthreads(4,1,1)]
-float main() {
-  float f = 3;
-  MyClass C = { 1.0f };
-  float a = alive(f);
-  float b = aliveTemp<float>(f); // #aliveTemp_inst
-  float c = C.makeF();
-  float d = test((float)1.0);
-  float e = test((half)1.0);
-  return a * b * c;
-}
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute \
+// RUN: -fsyntax-only -verify %s
+
+__attribute__((availability(shadermodel, introduced = 6.5)))
+float fx(float);  // #fx
+
+__attribute__((availability(shadermodel, introduced = 6.6)))
+half fx(half);  // #fx_half
+
+__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
+__attribute__((availability(shadermodel, introduced = 6.5, environment = compute)))
+float fy(float); // #fy
+
+__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
+__attribute__((availability(shadermodel, introduced = 6.5, environment = mesh)))
+float fz(float); // #fz
+
+float also_alive(float f) {
+  // expected-error@#also_alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #also_alive_fx_call
+  // expected-error@#also_alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #also_alive_fy_call
+  // expected-error@#also_alive_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #also_alive_fz_call
+  return 0;
+}
+
+float alive(float f) {
+  // expected-error@#alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #alive_fx_call
+  // expected-error@#alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #alive_fy_call
+  // expected-error@#alive_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #alive_fz_call
+
+  return also_alive(f);
+}
+
+float also_dead(float f) {
+  // unreachable code - no errors expected
+  float A = fx(f);
+  float B = fy(f);
+  float C = fz(f);
+  return 0;
+}
+
+float dead(float f) {
+  // unreachable code - no errors expected
+  float A = fx(f);
+  float B = fy(f);
+  float C = fz(f);
+
+  return also_dead(f);
+}
+
+template<typename T>
+T aliveTemp(T f) {
+  // expected-error@#aliveTemp_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #aliveTemp_fx_call
+  // expected-error@#aliveTemp_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #aliveTemp_fy_call
+  // expected-error@#aliveTemp_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #aliveTemp_fz_call
+  return 0;
+}
+
+template<typename T> T aliveTemp2(T f) {
+  // expected-error@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.6 or newer}}
+  // expected-note@#fx_half {{'fx' has been marked as being introduced in Shader Model 6.6 here, but the deployment target is Shader Model 6.0}}
+  // expected-error@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  return fx(f); // #aliveTemp2_fx_call
+}
+
+half test(half x) {
+  return aliveTemp2(x);
+}
+
+float test(float x) {
+  return aliveTemp2(x);
+}
+
+class MyClass
+{
+  float F;
+  float makeF() {
+    // expected-error@#MyClass_makeF_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+    // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+    float A = fx(F); // #MyClass_makeF_fx_call
+    // expected-error@#MyClass_makeF_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+    // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+    float B = fy(F); // #MyClass_makeF_fy_call
+    // expected-error@#MyClass_makeF_fz_call {{'fz' is unavailable}}
+    // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+    float C = fz(F); // #MyClass_makeF_fz_call
+    return 0;
+  }
+};
+
+[numthreads(4,1,1)]
+float main() {
+  float f = 3;
+  MyClass C = { 1.0f };
+  float a = alive(f);
+  float b = aliveTemp<float>(f); // #aliveTemp_inst
+  float c = C.makeF();
+  float d = test((float)1.0);
+  float e = test((half)1.0);
+  return a * b * c;
+}
diff --git a/clang/test/SemaHLSL/Availability/avail-diag-default-lib.hlsl b/clang/test/SemaHLSL/Availability/avail-diag-default-lib.hlsl
index 35b7c384f26cdd4a3a9e44f525fa1bd25bb116a6..6bfc8577670cc718a1158b46238fb810e0d0c969 100644
--- a/clang/test/SemaHLSL/Availability/avail-diag-default-lib.hlsl
+++ b/clang/test/SemaHLSL/Availability/avail-diag-default-lib.hlsl
@@ -1,180 +1,180 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library \
-// RUN: -fsyntax-only -verify %s
-
-__attribute__((availability(shadermodel, introduced = 6.5)))
-float fx(float);  // #fx
-
-__attribute__((availability(shadermodel, introduced = 6.6)))
-half fx(half);  // #fx_half
-
-__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
-__attribute__((availability(shadermodel, introduced = 6.5, environment = compute)))
-float fy(float); // #fy
-
-__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
-__attribute__((availability(shadermodel, introduced = 6.5, environment = mesh)))
-float fz(float); // #fz
-
-float also_alive(float f) {
-  // expected-error@#also_alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #also_alive_fx_call
-  
-  // expected-error@#also_alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #also_alive_fy_call
-
-  // expected-error@#also_alive_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #also_alive_fz_call
-
-  return 0;
-}
-
-float alive(float f) {
-  // expected-error@#alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #alive_fx_call
-
-  // expected-error@#alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #alive_fy_call
-
-  // expected-error@#alive_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #alive_fz_call
-
-  return also_alive(f);
-}
-
-float also_dead(float f) {
-  // unreachable code - no errors expected
-  float A = fx(f);
-  float B = fy(f);
-  float C = fz(f);
-  return 0;
-}
-
-float dead(float f) {
-  // unreachable code - no errors expected
-  float A = fx(f);
-  float B = fy(f);
-  float C = fz(f);
-  return also_dead(f);
-}
-
-template<typename T>
-T aliveTemp(T f) {
-  // expected-error@#aliveTemp_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #aliveTemp_fx_call
-  // expected-error@#aliveTemp_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #aliveTemp_fy_call
-  // expected-error@#aliveTemp_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #aliveTemp_fz_call
-  return 0;
-}
-
-template<typename T> T aliveTemp2(T f) {
-  // expected-error@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.6 or newer}}
-  // expected-note@#fx_half {{'fx' has been marked as being introduced in Shader Model 6.6 here, but the deployment target is Shader Model 6.0}}
-  // expected-error@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  return fx(f); // #aliveTemp2_fx_call
-}
-
-half test(half x) {
-  return aliveTemp2(x);
-}
-
-float test(float x) {
-  return aliveTemp2(x);
-}
-
-class MyClass
-{
-  float F;
-  float makeF() {
-    // expected-error@#MyClass_makeF_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-    // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-    float A = fx(F); // #MyClass_makeF_fx_call
-    // expected-error@#MyClass_makeF_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-    // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-    float B = fy(F); // #MyClass_makeF_fy_call
-    // expected-error@#MyClass_makeF_fz_call {{'fz' is unavailable}}
-    // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-    float C = fz(F); // #MyClass_makeF_fz_call
-    return 0;
-  }
-};
-
-// Exported function without body, not used
-export void exportedFunctionUnused(float f);
-
-// Exported function with body, without export, not used
-void exportedFunctionUnused(float f) {
-  // expected-error@#exportedFunctionUnused_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #exportedFunctionUnused_fx_call
-
-  // API with shader-stage-specific availability in unused exported library function
-  // - no errors expected because the actual shader stage this function
-  // will be used in not known at this time
-  float B = fy(f);
-  float C = fz(f);
-}
-
-// Exported function with body - called from main() which is a compute shader entry point
-export void exportedFunctionUsed(float f) {
-  // expected-error@#exportedFunctionUsed_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #exportedFunctionUsed_fx_call
-
-  // expected-error@#exportedFunctionUsed_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #exportedFunctionUsed_fy_call
-
-  // expected-error@#exportedFunctionUsed_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #exportedFunctionUsed_fz_call
-}
-
-namespace A {
-  namespace B {
-    export {
-      void exportedFunctionInNS(float x) {
-        // expected-error@#exportedFunctionInNS_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-        // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-        float A = fx(x); // #exportedFunctionInNS_fx_call
-
-        // API with shader-stage-specific availability in exported library function
-        // - no errors expected because the actual shader stage this function
-        // will be used in not known at this time
-        float B = fy(x);
-        float C = fz(x);
-      }
-    }
-  }
-}
-
-// Shader entry point without body
-[shader("compute")]
-[numthreads(4,1,1)]
-float main();
-
-// Shader entry point with body
-[shader("compute")]
-[numthreads(4,1,1)]
-float main() {
-  float f = 3;
-  MyClass C = { 1.0f };
-  float a = alive(f);
-  float b = aliveTemp<float>(f); // #aliveTemp_inst
-  float c = C.makeF();
-  float d = test((float)1.0);
-  float e = test((half)1.0);
-  exportedFunctionUsed(1.0f);
-  return a * b * c;
-}
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library \
+// RUN: -fsyntax-only -verify %s
+
+__attribute__((availability(shadermodel, introduced = 6.5)))
+float fx(float);  // #fx
+
+__attribute__((availability(shadermodel, introduced = 6.6)))
+half fx(half);  // #fx_half
+
+__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
+__attribute__((availability(shadermodel, introduced = 6.5, environment = compute)))
+float fy(float); // #fy
+
+__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
+__attribute__((availability(shadermodel, introduced = 6.5, environment = mesh)))
+float fz(float); // #fz
+
+float also_alive(float f) {
+  // expected-error@#also_alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #also_alive_fx_call
+  
+  // expected-error@#also_alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #also_alive_fy_call
+
+  // expected-error@#also_alive_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #also_alive_fz_call
+
+  return 0;
+}
+
+float alive(float f) {
+  // expected-error@#alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #alive_fx_call
+
+  // expected-error@#alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #alive_fy_call
+
+  // expected-error@#alive_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #alive_fz_call
+
+  return also_alive(f);
+}
+
+float also_dead(float f) {
+  // unreachable code - no errors expected
+  float A = fx(f);
+  float B = fy(f);
+  float C = fz(f);
+  return 0;
+}
+
+float dead(float f) {
+  // unreachable code - no errors expected
+  float A = fx(f);
+  float B = fy(f);
+  float C = fz(f);
+  return also_dead(f);
+}
+
+template<typename T>
+T aliveTemp(T f) {
+  // expected-error@#aliveTemp_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #aliveTemp_fx_call
+  // expected-error@#aliveTemp_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #aliveTemp_fy_call
+  // expected-error@#aliveTemp_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #aliveTemp_fz_call
+  return 0;
+}
+
+template<typename T> T aliveTemp2(T f) {
+  // expected-error@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.6 or newer}}
+  // expected-note@#fx_half {{'fx' has been marked as being introduced in Shader Model 6.6 here, but the deployment target is Shader Model 6.0}}
+  // expected-error@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  return fx(f); // #aliveTemp2_fx_call
+}
+
+half test(half x) {
+  return aliveTemp2(x);
+}
+
+float test(float x) {
+  return aliveTemp2(x);
+}
+
+class MyClass
+{
+  float F;
+  float makeF() {
+    // expected-error@#MyClass_makeF_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+    // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+    float A = fx(F); // #MyClass_makeF_fx_call
+    // expected-error@#MyClass_makeF_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+    // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+    float B = fy(F); // #MyClass_makeF_fy_call
+    // expected-error@#MyClass_makeF_fz_call {{'fz' is unavailable}}
+    // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+    float C = fz(F); // #MyClass_makeF_fz_call
+    return 0;
+  }
+};
+
+// Exported function without body, not used
+export void exportedFunctionUnused(float f);
+
+// Exported function with body, without export, not used
+void exportedFunctionUnused(float f) {
+  // expected-error@#exportedFunctionUnused_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #exportedFunctionUnused_fx_call
+
+  // API with shader-stage-specific availability in unused exported library function
+  // - no errors expected because the actual shader stage this function
+  // will be used in not known at this time
+  float B = fy(f);
+  float C = fz(f);
+}
+
+// Exported function with body - called from main() which is a compute shader entry point
+export void exportedFunctionUsed(float f) {
+  // expected-error@#exportedFunctionUsed_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #exportedFunctionUsed_fx_call
+
+  // expected-error@#exportedFunctionUsed_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #exportedFunctionUsed_fy_call
+
+  // expected-error@#exportedFunctionUsed_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #exportedFunctionUsed_fz_call
+}
+
+namespace A {
+  namespace B {
+    export {
+      void exportedFunctionInNS(float x) {
+        // expected-error@#exportedFunctionInNS_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+        // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+        float A = fx(x); // #exportedFunctionInNS_fx_call
+
+        // API with shader-stage-specific availability in exported library function
+        // - no errors expected because the actual shader stage this function
+        // will be used in not known at this time
+        float B = fy(x);
+        float C = fz(x);
+      }
+    }
+  }
+}
+
+// Shader entry point without body
+[shader("compute")]
+[numthreads(4,1,1)]
+float main();
+
+// Shader entry point with body
+[shader("compute")]
+[numthreads(4,1,1)]
+float main() {
+  float f = 3;
+  MyClass C = { 1.0f };
+  float a = alive(f);
+  float b = aliveTemp<float>(f); // #aliveTemp_inst
+  float c = C.makeF();
+  float d = test((float)1.0);
+  float e = test((half)1.0);
+  exportedFunctionUsed(1.0f);
+  return a * b * c;
+}
diff --git a/clang/test/SemaHLSL/Availability/avail-diag-relaxed-compute.hlsl b/clang/test/SemaHLSL/Availability/avail-diag-relaxed-compute.hlsl
index 40687983839303a4ba7a052bf08db9a8c532ca29..65836c55821d77c72fa783bd88a7bae79ca0b723 100644
--- a/clang/test/SemaHLSL/Availability/avail-diag-relaxed-compute.hlsl
+++ b/clang/test/SemaHLSL/Availability/avail-diag-relaxed-compute.hlsl
@@ -1,119 +1,119 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute \
-// RUN: -fsyntax-only -Wno-error=hlsl-availability -verify %s
-
-__attribute__((availability(shadermodel, introduced = 6.5)))
-float fx(float);  // #fx
-
-__attribute__((availability(shadermodel, introduced = 6.6)))
-half fx(half);  // #fx_half
-
-__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
-__attribute__((availability(shadermodel, introduced = 6.5, environment = compute)))
-float fy(float); // #fy
-
-__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
-__attribute__((availability(shadermodel, introduced = 6.5, environment = mesh)))
-float fz(float); // #fz
-
-float also_alive(float f) {
-  // expected-warning@#also_alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #also_alive_fx_call
-  // expected-warning@#also_alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #also_alive_fy_call
-  // expected-warning@#also_alive_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #also_alive_fz_call
-  return 0;
-}
-
-float alive(float f) {
-  // expected-warning@#alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #alive_fx_call
-  // expected-warning@#alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #alive_fy_call
-  // expected-warning@#alive_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #alive_fz_call
-
-  return also_alive(f);
-}
-
-float also_dead(float f) {
-  // unreachable code - no errors expected
-  float A = fx(f);
-  float B = fy(f);
-  float C = fz(f);
-  return 0;
-}
-
-float dead(float f) {
-  // unreachable code - no errors expected
-  float A = fx(f);
-  float B = fy(f);
-  float C = fz(f);
-
-  return also_dead(f);
-}
-
-template<typename T>
-T aliveTemp(T f) {
-  // expected-warning@#aliveTemp_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #aliveTemp_fx_call
-  // expected-warning@#aliveTemp_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #aliveTemp_fy_call
-  // expected-warning@#aliveTemp_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #aliveTemp_fz_call
-  return 0;
-}
-
-template<typename T> T aliveTemp2(T f) {
-  // expected-warning@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.6 or newer}}
-  // expected-note@#fx_half {{'fx' has been marked as being introduced in Shader Model 6.6 here, but the deployment target is Shader Model 6.0}}
-  // expected-warning@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  return fx(f); // #aliveTemp2_fx_call
-}
-
-half test(half x) {
-  return aliveTemp2(x);
-}
-
-float test(float x) {
-  return aliveTemp2(x);
-}
-
-class MyClass
-{
-  float F;
-  float makeF() {
-    // expected-warning@#MyClass_makeF_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-    // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-    float A = fx(F); // #MyClass_makeF_fx_call
-    // expected-warning@#MyClass_makeF_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-    // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-    float B = fy(F); // #MyClass_makeF_fy_call
-    // expected-warning@#MyClass_makeF_fz_call {{'fz' is unavailable}}
-    // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-    float C = fz(F); // #MyClass_makeF_fz_call
-    return 0;
-  }
-};
-
-[numthreads(4,1,1)]
-float main() {
-  float f = 3;
-  MyClass C = { 1.0f };
-  float a = alive(f);
-  float b = aliveTemp<float>(f); // #aliveTemp_inst
-  float c = C.makeF();
-  float d = test((float)1.0);
-  float e = test((half)1.0);
-  return a * b * c;
-}
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute \
+// RUN: -fsyntax-only -Wno-error=hlsl-availability -verify %s
+
+__attribute__((availability(shadermodel, introduced = 6.5)))
+float fx(float);  // #fx
+
+__attribute__((availability(shadermodel, introduced = 6.6)))
+half fx(half);  // #fx_half
+
+__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
+__attribute__((availability(shadermodel, introduced = 6.5, environment = compute)))
+float fy(float); // #fy
+
+__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
+__attribute__((availability(shadermodel, introduced = 6.5, environment = mesh)))
+float fz(float); // #fz
+
+float also_alive(float f) {
+  // expected-warning@#also_alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #also_alive_fx_call
+  // expected-warning@#also_alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #also_alive_fy_call
+  // expected-warning@#also_alive_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #also_alive_fz_call
+  return 0;
+}
+
+float alive(float f) {
+  // expected-warning@#alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #alive_fx_call
+  // expected-warning@#alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #alive_fy_call
+  // expected-warning@#alive_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #alive_fz_call
+
+  return also_alive(f);
+}
+
+float also_dead(float f) {
+  // unreachable code - no errors expected
+  float A = fx(f);
+  float B = fy(f);
+  float C = fz(f);
+  return 0;
+}
+
+float dead(float f) {
+  // unreachable code - no errors expected
+  float A = fx(f);
+  float B = fy(f);
+  float C = fz(f);
+
+  return also_dead(f);
+}
+
+template<typename T>
+T aliveTemp(T f) {
+  // expected-warning@#aliveTemp_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #aliveTemp_fx_call
+  // expected-warning@#aliveTemp_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #aliveTemp_fy_call
+  // expected-warning@#aliveTemp_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #aliveTemp_fz_call
+  return 0;
+}
+
+template<typename T> T aliveTemp2(T f) {
+  // expected-warning@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.6 or newer}}
+  // expected-note@#fx_half {{'fx' has been marked as being introduced in Shader Model 6.6 here, but the deployment target is Shader Model 6.0}}
+  // expected-warning@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  return fx(f); // #aliveTemp2_fx_call
+}
+
+half test(half x) {
+  return aliveTemp2(x);
+}
+
+float test(float x) {
+  return aliveTemp2(x);
+}
+
+class MyClass
+{
+  float F;
+  float makeF() {
+    // expected-warning@#MyClass_makeF_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+    // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+    float A = fx(F); // #MyClass_makeF_fx_call
+    // expected-warning@#MyClass_makeF_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+    // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+    float B = fy(F); // #MyClass_makeF_fy_call
+    // expected-warning@#MyClass_makeF_fz_call {{'fz' is unavailable}}
+    // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+    float C = fz(F); // #MyClass_makeF_fz_call
+    return 0;
+  }
+};
+
+[numthreads(4,1,1)]
+float main() {
+  float f = 3;
+  MyClass C = { 1.0f };
+  float a = alive(f);
+  float b = aliveTemp<float>(f); // #aliveTemp_inst
+  float c = C.makeF();
+  float d = test((float)1.0);
+  float e = test((half)1.0);
+  return a * b * c;
+}
diff --git a/clang/test/SemaHLSL/Availability/avail-diag-relaxed-lib.hlsl b/clang/test/SemaHLSL/Availability/avail-diag-relaxed-lib.hlsl
index a23e91a546b167c3c33015e5c04635ea84505ad6..4c9783138f670197b0dbb30b0444194e440a547f 100644
--- a/clang/test/SemaHLSL/Availability/avail-diag-relaxed-lib.hlsl
+++ b/clang/test/SemaHLSL/Availability/avail-diag-relaxed-lib.hlsl
@@ -1,162 +1,162 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library \
-// RUN: -fsyntax-only -Wno-error=hlsl-availability -verify %s
-
-__attribute__((availability(shadermodel, introduced = 6.5)))
-float fx(float);  // #fx
-
-__attribute__((availability(shadermodel, introduced = 6.6)))
-half fx(half);  // #fx_half
-
-__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
-__attribute__((availability(shadermodel, introduced = 6.5, environment = compute)))
-float fy(float); // #fy
-
-__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
-__attribute__((availability(shadermodel, introduced = 6.5, environment = mesh)))
-float fz(float); // #fz
-
-float also_alive(float f) {
-  // expected-warning@#also_alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #also_alive_fx_call
-  
-  // expected-warning@#also_alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #also_alive_fy_call
-
-  // expected-warning@#also_alive_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #also_alive_fz_call
-
-  return 0;
-}
-
-float alive(float f) {
-  // expected-warning@#alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #alive_fx_call
-
-  // expected-warning@#alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #alive_fy_call
-
-  // expected-warning@#alive_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #alive_fz_call
-
-  return also_alive(f);
-}
-
-float also_dead(float f) {
-  // unreachable code - no errors expected
-  float A = fx(f);
-  float B = fy(f);
-  float C = fz(f);
-  return 0;
-}
-
-float dead(float f) {
-  // unreachable code - no errors expected
-  float A = fx(f);
-  float B = fy(f);
-  float C = fz(f);
-  return also_dead(f);
-}
-
-template<typename T>
-T aliveTemp(T f) {
-  // expected-warning@#aliveTemp_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #aliveTemp_fx_call
-  // expected-warning@#aliveTemp_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #aliveTemp_fy_call
-  // expected-warning@#aliveTemp_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #aliveTemp_fz_call
-  return 0;
-}
-
-template<typename T> T aliveTemp2(T f) {
-  // expected-warning@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.6 or newer}}
-  // expected-note@#fx_half {{'fx' has been marked as being introduced in Shader Model 6.6 here, but the deployment target is Shader Model 6.0}}
-  // expected-warning@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  return fx(f); // #aliveTemp2_fx_call
-}
-
-half test(half x) {
-  return aliveTemp2(x);
-}
-
-float test(float x) {
-  return aliveTemp2(x);
-}
-
-class MyClass
-{
-  float F;
-  float makeF() {
-    // expected-warning@#MyClass_makeF_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-    // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-    float A = fx(F); // #MyClass_makeF_fx_call
-    // expected-warning@#MyClass_makeF_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-    // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-    float B = fy(F); // #MyClass_makeF_fy_call
-    // expected-warning@#MyClass_makeF_fz_call {{'fz' is unavailable}}
-    // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-    float C = fz(F); // #MyClass_makeF_fz_call
-    return 0;
-  }
-};
-
-// Exported function without body, not used
-export void exportedFunctionUnused(float f);
-
-// Exported function with body, without export, not used
-void exportedFunctionUnused(float f) {
-  // expected-warning@#exportedFunctionUnused_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #exportedFunctionUnused_fx_call
-
-  // API with shader-stage-specific availability in unused exported library function
-  // - no errors expected because the actual shader stage this function
-  // will be used in not known at this time
-  float B = fy(f);
-  float C = fz(f);
-}
-
-// Exported function with body - called from main() which is a compute shader entry point
-export void exportedFunctionUsed(float f) {
-  // expected-warning@#exportedFunctionUsed_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #exportedFunctionUsed_fx_call
-
-  // expected-warning@#exportedFunctionUsed_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #exportedFunctionUsed_fy_call
-
-  // expected-warning@#exportedFunctionUsed_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #exportedFunctionUsed_fz_call
-}
-
-// Shader entry point without body
-[shader("compute")]
-[numthreads(4,1,1)]
-float main();
-
-// Shader entry point with body
-[shader("compute")]
-[numthreads(4,1,1)]
-float main() {
-  float f = 3;
-  MyClass C = { 1.0f };
-  float a = alive(f);
-  float b = aliveTemp<float>(f); // #aliveTemp_inst
-  float c = C.makeF();
-  float d = test((float)1.0);
-  float e = test((half)1.0);
-  exportedFunctionUsed(1.0f);
-  return a * b * c;
-}
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library \
+// RUN: -fsyntax-only -Wno-error=hlsl-availability -verify %s
+
+__attribute__((availability(shadermodel, introduced = 6.5)))
+float fx(float);  // #fx
+
+__attribute__((availability(shadermodel, introduced = 6.6)))
+half fx(half);  // #fx_half
+
+__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
+__attribute__((availability(shadermodel, introduced = 6.5, environment = compute)))
+float fy(float); // #fy
+
+__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
+__attribute__((availability(shadermodel, introduced = 6.5, environment = mesh)))
+float fz(float); // #fz
+
+float also_alive(float f) {
+  // expected-warning@#also_alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #also_alive_fx_call
+  
+  // expected-warning@#also_alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #also_alive_fy_call
+
+  // expected-warning@#also_alive_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #also_alive_fz_call
+
+  return 0;
+}
+
+float alive(float f) {
+  // expected-warning@#alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #alive_fx_call
+
+  // expected-warning@#alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #alive_fy_call
+
+  // expected-warning@#alive_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #alive_fz_call
+
+  return also_alive(f);
+}
+
+float also_dead(float f) {
+  // unreachable code - no errors expected
+  float A = fx(f);
+  float B = fy(f);
+  float C = fz(f);
+  return 0;
+}
+
+float dead(float f) {
+  // unreachable code - no errors expected
+  float A = fx(f);
+  float B = fy(f);
+  float C = fz(f);
+  return also_dead(f);
+}
+
+template<typename T>
+T aliveTemp(T f) {
+  // expected-warning@#aliveTemp_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #aliveTemp_fx_call
+  // expected-warning@#aliveTemp_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #aliveTemp_fy_call
+  // expected-warning@#aliveTemp_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #aliveTemp_fz_call
+  return 0;
+}
+
+template<typename T> T aliveTemp2(T f) {
+  // expected-warning@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.6 or newer}}
+  // expected-note@#fx_half {{'fx' has been marked as being introduced in Shader Model 6.6 here, but the deployment target is Shader Model 6.0}}
+  // expected-warning@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  return fx(f); // #aliveTemp2_fx_call
+}
+
+half test(half x) {
+  return aliveTemp2(x);
+}
+
+float test(float x) {
+  return aliveTemp2(x);
+}
+
+class MyClass
+{
+  float F;
+  float makeF() {
+    // expected-warning@#MyClass_makeF_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+    // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+    float A = fx(F); // #MyClass_makeF_fx_call
+    // expected-warning@#MyClass_makeF_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+    // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+    float B = fy(F); // #MyClass_makeF_fy_call
+    // expected-warning@#MyClass_makeF_fz_call {{'fz' is unavailable}}
+    // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+    float C = fz(F); // #MyClass_makeF_fz_call
+    return 0;
+  }
+};
+
+// Exported function without body, not used
+export void exportedFunctionUnused(float f);
+
+// Exported function with body, without export, not used
+void exportedFunctionUnused(float f) {
+  // expected-warning@#exportedFunctionUnused_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #exportedFunctionUnused_fx_call
+
+  // API with shader-stage-specific availability in unused exported library function
+  // - no errors expected because the actual shader stage this function
+  // will be used in not known at this time
+  float B = fy(f);
+  float C = fz(f);
+}
+
+// Exported function with body - called from main() which is a compute shader entry point
+export void exportedFunctionUsed(float f) {
+  // expected-warning@#exportedFunctionUsed_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #exportedFunctionUsed_fx_call
+
+  // expected-warning@#exportedFunctionUsed_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #exportedFunctionUsed_fy_call
+
+  // expected-warning@#exportedFunctionUsed_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #exportedFunctionUsed_fz_call
+}
+
+// Shader entry point without body
+[shader("compute")]
+[numthreads(4,1,1)]
+float main();
+
+// Shader entry point with body
+[shader("compute")]
+[numthreads(4,1,1)]
+float main() {
+  float f = 3;
+  MyClass C = { 1.0f };
+  float a = alive(f);
+  float b = aliveTemp<float>(f); // #aliveTemp_inst
+  float c = C.makeF();
+  float d = test((float)1.0);
+  float e = test((half)1.0);
+  exportedFunctionUsed(1.0f);
+  return a * b * c;
+}
diff --git a/clang/test/SemaHLSL/Availability/avail-diag-strict-compute.hlsl b/clang/test/SemaHLSL/Availability/avail-diag-strict-compute.hlsl
index a8783c10cbabca68f5187a53a32a0f9df67cf24d..b67e10c9a9017ac3f7a96d7f9504a3c4b87850f1 100644
--- a/clang/test/SemaHLSL/Availability/avail-diag-strict-compute.hlsl
+++ b/clang/test/SemaHLSL/Availability/avail-diag-strict-compute.hlsl
@@ -1,129 +1,129 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute \
-// RUN: -fhlsl-strict-availability -fsyntax-only -verify %s
-
-__attribute__((availability(shadermodel, introduced = 6.5)))
-float fx(float);  // #fx
-
-__attribute__((availability(shadermodel, introduced = 6.6)))
-half fx(half);  // #fx_half
-
-__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
-__attribute__((availability(shadermodel, introduced = 6.5, environment = compute)))
-float fy(float); // #fy
-
-__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
-__attribute__((availability(shadermodel, introduced = 6.5, environment = mesh)))
-float fz(float); // #fz
-
-float also_alive(float f) {
-  // expected-error@#also_alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #also_alive_fx_call
-  // expected-error@#also_alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #also_alive_fy_call
-  // expected-error@#also_alive_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #also_alive_fz_call
-  return 0;
-}
-
-float alive(float f) {
-  // expected-error@#alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #alive_fx_call
-  // expected-error@#alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #alive_fy_call
-  // expected-error@#alive_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #alive_fz_call
-
-  return also_alive(f);
-}
-
-float also_dead(float f) {
-  // expected-error@#also_dead_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #also_dead_fx_call
-  // expected-error@#also_dead_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #also_dead_fy_call
-  // expected-error@#also_dead_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #also_dead_fz_call
-  return 0;
-}
-
-float dead(float f) {
-  // expected-error@#dead_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #dead_fx_call
-  // expected-error@#dead_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #dead_fy_call
-  // expected-error@#dead_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #dead_fz_call
-
-  return also_dead(f);
-}
-
-template<typename T>
-T aliveTemp(T f) {
-  // expected-error@#aliveTemp_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#aliveTemp_inst {{in instantiation of function template specialization 'aliveTemp<float>' requested here}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #aliveTemp_fx_call
-  // expected-error@#aliveTemp_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #aliveTemp_fy_call
-  // expected-error@#aliveTemp_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #aliveTemp_fz_call
-  return 0;
-}
-
-template<typename T> T aliveTemp2(T f) {
-  // expected-error@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.6 or newer}}
-  // expected-note@#fx_half {{'fx' has been marked as being introduced in Shader Model 6.6 here, but the deployment target is Shader Model 6.0}}
-  // expected-error@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  return fx(f); // #aliveTemp2_fx_call
-}
-
-half test(half x) {
-  return aliveTemp2(x); // expected-note {{in instantiation of function template specialization 'aliveTemp2<half>' requested here}}
-}
-
-float test(float x) {
-  return aliveTemp2(x); // expected-note {{in instantiation of function template specialization 'aliveTemp2<float>' requested here}}
-}
-
-class MyClass
-{
-  float F;
-  float makeF() {
-    // expected-error@#MyClass_makeF_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-    // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-    float A = fx(F); // #MyClass_makeF_fx_call
-    // expected-error@#MyClass_makeF_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-    // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-    float B = fy(F); // #MyClass_makeF_fy_call
-    // expected-error@#MyClass_makeF_fz_call {{'fz' is unavailable}}
-    // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-    float C = fz(F); // #MyClass_makeF_fz_call
-  }
-};
-
-[numthreads(4,1,1)]
-float main() {
-  float f = 3;
-  MyClass C = { 1.0f };
-  float a = alive(f);
-  float b = aliveTemp<float>(f); // #aliveTemp_inst
-  float c = C.makeF();
-  float d = test((float)1.0);
-  float e = test((half)1.0);
-  return a * b * c;
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute \
+// RUN: -fhlsl-strict-availability -fsyntax-only -verify %s
+
+__attribute__((availability(shadermodel, introduced = 6.5)))
+float fx(float);  // #fx
+
+__attribute__((availability(shadermodel, introduced = 6.6)))
+half fx(half);  // #fx_half
+
+__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
+__attribute__((availability(shadermodel, introduced = 6.5, environment = compute)))
+float fy(float); // #fy
+
+__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
+__attribute__((availability(shadermodel, introduced = 6.5, environment = mesh)))
+float fz(float); // #fz
+
+float also_alive(float f) {
+  // expected-error@#also_alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #also_alive_fx_call
+  // expected-error@#also_alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #also_alive_fy_call
+  // expected-error@#also_alive_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #also_alive_fz_call
+  return 0;
+}
+
+float alive(float f) {
+  // expected-error@#alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #alive_fx_call
+  // expected-error@#alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #alive_fy_call
+  // expected-error@#alive_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #alive_fz_call
+
+  return also_alive(f);
+}
+
+float also_dead(float f) {
+  // expected-error@#also_dead_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #also_dead_fx_call
+  // expected-error@#also_dead_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #also_dead_fy_call
+  // expected-error@#also_dead_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #also_dead_fz_call
+  return 0;
+}
+
+float dead(float f) {
+  // expected-error@#dead_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #dead_fx_call
+  // expected-error@#dead_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #dead_fy_call
+  // expected-error@#dead_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #dead_fz_call
+
+  return also_dead(f);
+}
+
+template<typename T>
+T aliveTemp(T f) {
+  // expected-error@#aliveTemp_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#aliveTemp_inst {{in instantiation of function template specialization 'aliveTemp<float>' requested here}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #aliveTemp_fx_call
+  // expected-error@#aliveTemp_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #aliveTemp_fy_call
+  // expected-error@#aliveTemp_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #aliveTemp_fz_call
+  return 0;
+}
+
+template<typename T> T aliveTemp2(T f) {
+  // expected-error@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.6 or newer}}
+  // expected-note@#fx_half {{'fx' has been marked as being introduced in Shader Model 6.6 here, but the deployment target is Shader Model 6.0}}
+  // expected-error@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  return fx(f); // #aliveTemp2_fx_call
+}
+
+half test(half x) {
+  return aliveTemp2(x); // expected-note {{in instantiation of function template specialization 'aliveTemp2<half>' requested here}}
+}
+
+float test(float x) {
+  return aliveTemp2(x); // expected-note {{in instantiation of function template specialization 'aliveTemp2<float>' requested here}}
+}
+
+class MyClass
+{
+  float F;
+  float makeF() {
+    // expected-error@#MyClass_makeF_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+    // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+    float A = fx(F); // #MyClass_makeF_fx_call
+    // expected-error@#MyClass_makeF_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+    // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+    float B = fy(F); // #MyClass_makeF_fy_call
+    // expected-error@#MyClass_makeF_fz_call {{'fz' is unavailable}}
+    // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+    float C = fz(F); // #MyClass_makeF_fz_call
+  }
+};
+
+[numthreads(4,1,1)]
+float main() {
+  float f = 3;
+  MyClass C = { 1.0f };
+  float a = alive(f);
+  float b = aliveTemp<float>(f); // #aliveTemp_inst
+  float c = C.makeF();
+  float d = test((float)1.0);
+  float e = test((half)1.0);
+  return a * b * c;
 }
\ No newline at end of file
diff --git a/clang/test/SemaHLSL/Availability/avail-diag-strict-lib.hlsl b/clang/test/SemaHLSL/Availability/avail-diag-strict-lib.hlsl
index 0fffbc96dac19440691f80030ba9532493e8e855..c7be5afbc2d22f8d603d39de6ab5a1a2b6da7cb9 100644
--- a/clang/test/SemaHLSL/Availability/avail-diag-strict-lib.hlsl
+++ b/clang/test/SemaHLSL/Availability/avail-diag-strict-lib.hlsl
@@ -1,192 +1,192 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library \
-// RUN: -fhlsl-strict-availability -fsyntax-only -verify %s
-
-__attribute__((availability(shadermodel, introduced = 6.5)))
-float fx(float);  // #fx
-
-__attribute__((availability(shadermodel, introduced = 6.6)))
-half fx(half);  // #fx_half
-
-__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
-__attribute__((availability(shadermodel, introduced = 6.5, environment = compute)))
-float fy(float); // #fy
-
-__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
-__attribute__((availability(shadermodel, introduced = 6.5, environment = mesh)))
-float fz(float); // #fz
-
-// FIXME: all diagnostics marked as FUTURE will come alive when HLSL default
-// diagnostic mode is implemented in a future PR which will verify calls in
-// all functions that are reachable from the shader library entry points
-
-float also_alive(float f) {
-  // expected-error@#also_alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #also_alive_fx_call
-  
-  // expected-error@#also_alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #also_alive_fy_call
-
-  // expected-error@#also_alive_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #also_alive_fz_call
-
-  return 0;
-}
-
-float alive(float f) {
-  // expected-error@#alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #alive_fx_call
-
-  // expected-error@#alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #alive_fy_call
-
-  // expected-error@#alive_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #alive_fz_call
-
-  return also_alive(f);
-}
-
-float also_dead(float f) {
-  // expected-error@#also_dead_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #also_dead_fx_call
-  
-  // Call to environment-specific function from an unreachable function 
-  // in a shader library - no diagnostic expected.
-  float B = fy(f); // #also_dead_fy_call
-
-  // Call to environment-specific function from an unreachable function 
-  // in a shader library - no diagnostic expected.
-  float C = fz(f); // #also_dead_fz_call
-  return 0;
-}
-
-float dead(float f) {
-  // expected-error@#dead_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #dead_fx_call
-
-  // Call to environment-specific function from an unreachable function 
-  // in a shader library - no diagnostic expected.
-  float B = fy(f); // #dead_fy_call
-
-  // Call to environment-specific function from an unreachable function 
-  // in a shader library - no diagnostic expected.
-  float C = fz(f); // #dead_fz_call
-
-  return also_dead(f);
-}
-
-template<typename T>
-T aliveTemp(T f) {
-  // expected-error@#aliveTemp_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#aliveTemp_inst {{in instantiation of function template specialization 'aliveTemp<float>' requested here}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #aliveTemp_fx_call
-  // expected-error@#aliveTemp_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #aliveTemp_fy_call
-  // expected-error@#aliveTemp_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #aliveTemp_fz_call
-  return 0;
-}
-
-template<typename T> T aliveTemp2(T f) {
-  // expected-error@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.6 or newer}}
-  // expected-note@#fx_half {{'fx' has been marked as being introduced in Shader Model 6.6 here, but the deployment target is Shader Model 6.0}}
-  // expected-error@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  return fx(f); // #aliveTemp2_fx_call
-}
-
-half test(half x) {
-  return aliveTemp2(x); // expected-note {{in instantiation of function template specialization 'aliveTemp2<half>' requested here}}
-}
-
-float test(float x) {
-  return aliveTemp2(x); // expected-note {{in instantiation of function template specialization 'aliveTemp2<float>' requested here}}
-}
-
-class MyClass
-{
-  float F;
-  float makeF() {
-    // expected-error@#MyClass_makeF_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-    // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-    float A = fx(F); // #MyClass_makeF_fx_call
-    // expected-error@#MyClass_makeF_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-    // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-    float B = fy(F); // #MyClass_makeF_fy_call
-    // expected-error@#MyClass_makeF_fz_call {{'fz' is unavailable}}
-    // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-    float C = fz(F); // #MyClass_makeF_fz_call
-  }
-};
-
-// Exported function without body, not used
-export void exportedFunctionUnused(float f);
-
-// Exported function with body, without export, not used
-void exportedFunctionUnused(float f) {
-  // expected-error@#exportedFunctionUnused_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #exportedFunctionUnused_fx_call
-
-  // API with shader-stage-specific availability in unused exported library function
-  // - no errors expected because the actual shader stage this function
-  // will be used in not known at this time
-  float B = fy(f);
-  float C = fz(f);
-}
-
-// Exported function with body - called from main() which is a compute shader entry point
-export void exportedFunctionUsed(float f) {
-  // expected-error@#exportedFunctionUsed_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #exportedFunctionUsed_fx_call
-
-  // expected-error@#exportedFunctionUsed_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #exportedFunctionUsed_fy_call
-
-  // expected-error@#exportedFunctionUsed_fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float C = fz(f); // #exportedFunctionUsed_fz_call
-}
-
-namespace A {
-  namespace B {
-    export {
-      void exportedFunctionInNS(float x) {
-        // expected-error@#exportedFunctionInNS_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-        // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-        float A = fx(x); // #exportedFunctionInNS_fx_call
-
-        // API with shader-stage-specific availability in exported library function
-        // - no errors expected because the actual shader stage this function
-        // will be used in not known at this time
-        float B = fy(x);
-        float C = fz(x);
-      }
-    }
-  }
-}
-
-[shader("compute")]
-[numthreads(4,1,1)]
-float main() {
-  float f = 3;
-  MyClass C = { 1.0f };
-  float a = alive(f);float b = aliveTemp<float>(f); // #aliveTemp_inst
-  float c = C.makeF();
-  float d = test((float)1.0);
-  float e = test((half)1.0);
-  exportedFunctionUsed(1.0f);
-  return a * b * c;
-}
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library \
+// RUN: -fhlsl-strict-availability -fsyntax-only -verify %s
+
+__attribute__((availability(shadermodel, introduced = 6.5)))
+float fx(float);  // #fx
+
+__attribute__((availability(shadermodel, introduced = 6.6)))
+half fx(half);  // #fx_half
+
+__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
+__attribute__((availability(shadermodel, introduced = 6.5, environment = compute)))
+float fy(float); // #fy
+
+__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
+__attribute__((availability(shadermodel, introduced = 6.5, environment = mesh)))
+float fz(float); // #fz
+
+// FIXME: all diagnostics marked as FUTURE will come alive when HLSL default
+// diagnostic mode is implemented in a future PR which will verify calls in
+// all functions that are reachable from the shader library entry points
+
+float also_alive(float f) {
+  // expected-error@#also_alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #also_alive_fx_call
+  
+  // expected-error@#also_alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #also_alive_fy_call
+
+  // expected-error@#also_alive_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #also_alive_fz_call
+
+  return 0;
+}
+
+float alive(float f) {
+  // expected-error@#alive_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #alive_fx_call
+
+  // expected-error@#alive_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #alive_fy_call
+
+  // expected-error@#alive_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #alive_fz_call
+
+  return also_alive(f);
+}
+
+float also_dead(float f) {
+  // expected-error@#also_dead_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #also_dead_fx_call
+  
+  // Call to environment-specific function from an unreachable function 
+  // in a shader library - no diagnostic expected.
+  float B = fy(f); // #also_dead_fy_call
+
+  // Call to environment-specific function from an unreachable function 
+  // in a shader library - no diagnostic expected.
+  float C = fz(f); // #also_dead_fz_call
+  return 0;
+}
+
+float dead(float f) {
+  // expected-error@#dead_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #dead_fx_call
+
+  // Call to environment-specific function from an unreachable function 
+  // in a shader library - no diagnostic expected.
+  float B = fy(f); // #dead_fy_call
+
+  // Call to environment-specific function from an unreachable function 
+  // in a shader library - no diagnostic expected.
+  float C = fz(f); // #dead_fz_call
+
+  return also_dead(f);
+}
+
+template<typename T>
+T aliveTemp(T f) {
+  // expected-error@#aliveTemp_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#aliveTemp_inst {{in instantiation of function template specialization 'aliveTemp<float>' requested here}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #aliveTemp_fx_call
+  // expected-error@#aliveTemp_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #aliveTemp_fy_call
+  // expected-error@#aliveTemp_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #aliveTemp_fz_call
+  return 0;
+}
+
+template<typename T> T aliveTemp2(T f) {
+  // expected-error@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.6 or newer}}
+  // expected-note@#fx_half {{'fx' has been marked as being introduced in Shader Model 6.6 here, but the deployment target is Shader Model 6.0}}
+  // expected-error@#aliveTemp2_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  return fx(f); // #aliveTemp2_fx_call
+}
+
+half test(half x) {
+  return aliveTemp2(x); // expected-note {{in instantiation of function template specialization 'aliveTemp2<half>' requested here}}
+}
+
+float test(float x) {
+  return aliveTemp2(x); // expected-note {{in instantiation of function template specialization 'aliveTemp2<float>' requested here}}
+}
+
+class MyClass
+{
+  float F;
+  float makeF() {
+    // expected-error@#MyClass_makeF_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+    // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+    float A = fx(F); // #MyClass_makeF_fx_call
+    // expected-error@#MyClass_makeF_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+    // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+    float B = fy(F); // #MyClass_makeF_fy_call
+    // expected-error@#MyClass_makeF_fz_call {{'fz' is unavailable}}
+    // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+    float C = fz(F); // #MyClass_makeF_fz_call
+  }
+};
+
+// Exported function without body, not used
+export void exportedFunctionUnused(float f);
+
+// Exported function with body, without export, not used
+void exportedFunctionUnused(float f) {
+  // expected-error@#exportedFunctionUnused_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #exportedFunctionUnused_fx_call
+
+  // API with shader-stage-specific availability in unused exported library function
+  // - no errors expected because the actual shader stage this function
+  // will be used in not known at this time
+  float B = fy(f);
+  float C = fz(f);
+}
+
+// Exported function with body - called from main() which is a compute shader entry point
+export void exportedFunctionUsed(float f) {
+  // expected-error@#exportedFunctionUsed_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #exportedFunctionUsed_fx_call
+
+  // expected-error@#exportedFunctionUsed_fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #exportedFunctionUsed_fy_call
+
+  // expected-error@#exportedFunctionUsed_fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 6.5 in mesh environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float C = fz(f); // #exportedFunctionUsed_fz_call
+}
+
+namespace A {
+  namespace B {
+    export {
+      void exportedFunctionInNS(float x) {
+        // expected-error@#exportedFunctionInNS_fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+        // expected-note@#fx {{'fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+        float A = fx(x); // #exportedFunctionInNS_fx_call
+
+        // API with shader-stage-specific availability in exported library function
+        // - no errors expected because the actual shader stage this function
+        // will be used in not known at this time
+        float B = fy(x);
+        float C = fz(x);
+      }
+    }
+  }
+}
+
+[shader("compute")]
+[numthreads(4,1,1)]
+float main() {
+  float f = 3;
+  MyClass C = { 1.0f };
+  float a = alive(f);float b = aliveTemp<float>(f); // #aliveTemp_inst
+  float c = C.makeF();
+  float d = test((float)1.0);
+  float e = test((half)1.0);
+  exportedFunctionUsed(1.0f);
+  return a * b * c;
+}
diff --git a/clang/test/SemaHLSL/Availability/avail-lib-multiple-stages.hlsl b/clang/test/SemaHLSL/Availability/avail-lib-multiple-stages.hlsl
index bfefc9b116a64f9e701f6e924b57b7bdb98744a9..b56ab8fe4526ba896c8a20d97b6bc52496d69798 100644
--- a/clang/test/SemaHLSL/Availability/avail-lib-multiple-stages.hlsl
+++ b/clang/test/SemaHLSL/Availability/avail-lib-multiple-stages.hlsl
@@ -1,57 +1,57 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library \
-// RUN: -fsyntax-only -verify %s
-
-__attribute__((availability(shadermodel, introduced = 6.5)))
-float fx(float);  // #fx
-
-__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
-__attribute__((availability(shadermodel, introduced = 6.5, environment = compute)))
-float fy(float); // #fy
-
-__attribute__((availability(shadermodel, introduced = 5.0, environment = compute)))
-float fz(float); // #fz
-
-
-void F(float f) {
-  // Make sure we only get this error once, even though this function is scanned twice - once
-  // in compute shader context and once in pixel shader context.
-  // expected-error@#fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
-  // expected-note@#fx {{fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
-  float A = fx(f); // #fx_call
-  
-  // expected-error@#fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
-  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
-  float B = fy(f); // #fy_call
-
-  // expected-error@#fz_call {{'fz' is unavailable}}
-  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 5.0 in compute environment here, but the deployment target is Shader Model 6.0 pixel environment}}
-  float X = fz(f); // #fz_call
-}
-
-void deadCode(float f) {
-  // no diagnostics expected under default diagnostic mode
-  float A = fx(f);
-  float B = fy(f);
-  float X = fz(f);
-}
-
-// Pixel shader
-[shader("pixel")]
-void mainPixel() {
-  F(1.0);
-}
-
-// First Compute shader
-[shader("compute")]
-[numthreads(4,1,1)]
-void mainCompute1() {
-  F(2.0);
-}
-
-// Second compute shader to make sure we do not get duplicate messages if F is called
-// from multiple entry points.
-[shader("compute")]
-[numthreads(4,1,1)]
-void mainCompute2() {
-  F(3.0);
-}
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library \
+// RUN: -fsyntax-only -verify %s
+
+__attribute__((availability(shadermodel, introduced = 6.5)))
+float fx(float);  // #fx
+
+__attribute__((availability(shadermodel, introduced = 5.0, environment = pixel)))
+__attribute__((availability(shadermodel, introduced = 6.5, environment = compute)))
+float fy(float); // #fy
+
+__attribute__((availability(shadermodel, introduced = 5.0, environment = compute)))
+float fz(float); // #fz
+
+
+void F(float f) {
+  // Make sure we only get this error once, even though this function is scanned twice - once
+  // in compute shader context and once in pixel shader context.
+  // expected-error@#fx_call {{'fx' is only available on Shader Model 6.5 or newer}}
+  // expected-note@#fx {{fx' has been marked as being introduced in Shader Model 6.5 here, but the deployment target is Shader Model 6.0}}
+  float A = fx(f); // #fx_call
+  
+  // expected-error@#fy_call {{'fy' is only available in compute environment on Shader Model 6.5 or newer}}
+  // expected-note@#fy {{'fy' has been marked as being introduced in Shader Model 6.5 in compute environment here, but the deployment target is Shader Model 6.0 compute environment}}
+  float B = fy(f); // #fy_call
+
+  // expected-error@#fz_call {{'fz' is unavailable}}
+  // expected-note@#fz {{'fz' has been marked as being introduced in Shader Model 5.0 in compute environment here, but the deployment target is Shader Model 6.0 pixel environment}}
+  float X = fz(f); // #fz_call
+}
+
+void deadCode(float f) {
+  // no diagnostics expected under default diagnostic mode
+  float A = fx(f);
+  float B = fy(f);
+  float X = fz(f);
+}
+
+// Pixel shader
+[shader("pixel")]
+void mainPixel() {
+  F(1.0);
+}
+
+// First Compute shader
+[shader("compute")]
+[numthreads(4,1,1)]
+void mainCompute1() {
+  F(2.0);
+}
+
+// Second compute shader to make sure we do not get duplicate messages if F is called
+// from multiple entry points.
+[shader("compute")]
+[numthreads(4,1,1)]
+void mainCompute2() {
+  F(3.0);
+}
diff --git a/clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp b/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp
similarity index 100%
rename from clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp
rename to clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp
diff --git a/clang/unittests/Format/QualifierFixerTest.cpp b/clang/unittests/Format/QualifierFixerTest.cpp
old mode 100755
new mode 100644
diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt
index 6cf20ab7c183cefb3fd3085fc308d7d64c4f7fa6..b5160f9a048cc4e31fc9f359cbf79ee5c34bd299 100644
--- a/compiler-rt/CMakeLists.txt
+++ b/compiler-rt/CMakeLists.txt
@@ -151,7 +151,14 @@ if ("${COMPILER_RT_DEFAULT_TARGET_TRIPLE}" MATCHES ".*android.*")
   string(REGEX MATCH "-target(=| +)[^ ]+android[a-z]*([0-9]+)" ANDROID_API_LEVEL "${CMAKE_C_FLAGS}")
   set(ANDROID_API_LEVEL ${CMAKE_MATCH_2})
 endif()
+
+# We define OHOS for ohos targets for now
+if (OHOS)
+  set(OHOS_FAMILY 1)
+endif()
+
 pythonize_bool(ANDROID)
+pythonize_bool(OHOS_FAMILY)
 
 set(COMPILER_RT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 set(COMPILER_RT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
@@ -162,7 +169,7 @@ pythonize_bool(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR)
 # and target a UNIX-like system or Windows.
 # We can run tests on Android even when we are cross-compiling.
 if((NOT "${CMAKE_CROSSCOMPILING}" AND (UNIX OR WIN32))
-   OR ANDROID OR COMPILER_RT_EMULATOR)
+   OR ANDROID OR COMPILER_RT_EMULATOR OR OHOS)
   option(COMPILER_RT_CAN_EXECUTE_TESTS "Can we execute instrumented tests" ON)
 else()
   option(COMPILER_RT_CAN_EXECUTE_TESTS "Can we execute instrumented tests" OFF)
@@ -293,7 +300,7 @@ cmake_dependent_option(COMPILER_RT_STATIC_CXX_LIBRARY
   "COMPILER_RT_CXX_LIBRARY" OFF)
 
 set(DEFAULT_COMPILER_RT_USE_BUILTINS_LIBRARY OFF)
-if (FUCHSIA)
+if (FUCHSIA OR OHOS)
   set(DEFAULT_COMPILER_RT_USE_BUILTINS_LIBRARY ON)
 endif()
 
@@ -573,24 +580,9 @@ if (SANITIZER_NO_UNDEFINED_SYMBOLS)
   list(APPEND SANITIZER_COMMON_LINK_FLAGS -Wl,-z,defs)
 endif()
 
-# TODO: COMPILER_RT_COMMON_CFLAGS and COMPILER_RT_COMMON_LINK_FLAGS are
-# intended for use in non-sanitizer runtimes such as libFuzzer, profile or XRay,
-# move these higher to include common flags, then derive SANITIZER_COMMON_CFLAGS
-# and SANITIZER_COMMON_LINK_FLAGS from those and append sanitizer-specific flags.
-set(COMPILER_RT_COMMON_CFLAGS ${SANITIZER_COMMON_CFLAGS})
-set(COMPILER_RT_COMMON_LINK_FLAGS ${SANITIZER_COMMON_LINK_FLAGS})
-
-# We don't use the C++ standard library, so avoid including it by mistake.
-append_list_if(COMPILER_RT_HAS_NOSTDINCXX_FLAG -nostdinc++ SANITIZER_COMMON_CFLAGS)
-append_list_if(COMPILER_RT_HAS_NOSTDLIBXX_FLAG -nostdlib++ SANITIZER_COMMON_LINK_FLAGS)
-
-# Remove -stdlib= which is unused when passing -nostdinc++...
-string(REGEX MATCHALL "-stdlib=[a-zA-Z+]*" stdlib_flag "${CMAKE_CXX_FLAGS}")
-string(REGEX REPLACE "-stdlib=[a-zA-Z+]*" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
-
-# ...we need it to build some runtimes and tests so readd it where appropriate.
-list(APPEND COMPILER_RT_COMMON_CFLAGS ${stdlib_flag})
-list(APPEND COMPILER_RT_COMMON_LINK_FLAGS ${stdlib_flag})
+if (OHOS)
+  list(APPEND SANITIZER_COMMON_LINK_LIBS unwind)
+endif()
 
 # TODO: There's a lot of duplication across lib/*/tests/CMakeLists.txt files,
 # move some of the common flags to COMPILER_RT_UNITTEST_CFLAGS.
diff --git a/compiler-rt/cmake/Modules/AddCompilerRT.cmake b/compiler-rt/cmake/Modules/AddCompilerRT.cmake
index 6962b733733a6ab8d07554409849a49f7706d84c..e6331898d94dedb5e8c72c3227dc9d0954fe51c4 100644
--- a/compiler-rt/cmake/Modules/AddCompilerRT.cmake
+++ b/compiler-rt/cmake/Modules/AddCompilerRT.cmake
@@ -272,9 +272,6 @@ function(add_compiler_rt_runtime name type)
          NOT name STREQUAL "clang_rt.builtins")
         get_compiler_rt_target(${arch} target)
         find_compiler_rt_library(builtins builtins_${libname} TARGET ${target})
-        if(builtins_${libname} STREQUAL "NOTFOUND")
-          message(FATAL_ERROR "Cannot find builtins library for the target architecture")
-        endif()
       endif()
       set(sources_${libname} ${LIB_SOURCES})
       format_object_libs(sources_${libname} ${arch} ${LIB_OBJECT_LIBS})
diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
index 37ad48bef818a2e37e940b8f37b22e79706aa307..3bfd1e3ba838f04072bb723acb1c6a096936a616 100644
--- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
+++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
@@ -49,7 +49,7 @@ if(OS_NAME MATCHES "Linux")
       ${RISCV64} ${LOONGARCH64})
 elseif (OS_NAME MATCHES "Windows")
   set(ALL_FUZZER_SUPPORTED_ARCH ${X86} ${X86_64})
-elseif(OS_NAME MATCHES "Android")
+elseif(OS_NAME MATCHES "Android|OHOS")
   set(ALL_FUZZER_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${RISCV64})
 elseif(OS_NAME MATCHES "Fuchsia")
   set(ALL_FUZZER_SUPPORTED_ARCH ${X86_64} ${ARM64} ${RISCV64})
diff --git a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
index 379e2c25949cb4150d30927a9e5204a2dd0f6a17..7538dad4fb9ca665b24444e838112f9bb4392838 100644
--- a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
+++ b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
@@ -512,7 +512,11 @@ endfunction()
 function(get_compiler_rt_install_dir arch install_dir)
   if(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR AND NOT APPLE)
     get_compiler_rt_target(${arch} target)
-    set(${install_dir} ${COMPILER_RT_INSTALL_LIBRARY_DIR}/${target} PARENT_SCOPE)
+    if(OHOS)
+      set(${install_dir} ${COMPILER_RT_INSTALL_LIBRARY_DIR}/${target}/${LLVM_TARGET_MULTILIB_SUFFIX} PARENT_SCOPE)
+    else()
+      set(${install_dir} ${COMPILER_RT_INSTALL_LIBRARY_DIR}/${target} PARENT_SCOPE)
+    endif()
   else()
     set(${install_dir} ${COMPILER_RT_INSTALL_LIBRARY_DIR} PARENT_SCOPE)
   endif()
@@ -521,7 +525,11 @@ endfunction()
 function(get_compiler_rt_output_dir arch output_dir)
   if(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR AND NOT APPLE)
     get_compiler_rt_target(${arch} target)
-    set(${output_dir} ${COMPILER_RT_OUTPUT_LIBRARY_DIR}/${target} PARENT_SCOPE)
+    if(OHOS)
+      set(${output_dir} ${COMPILER_RT_OUTPUT_LIBRARY_DIR}/${target}/${LLVM_TARGET_MULTILIB_SUFFIX} PARENT_SCOPE)
+    else()
+      set(${output_dir} ${COMPILER_RT_OUTPUT_LIBRARY_DIR}/${target} PARENT_SCOPE)
+    endif()
   else()
     set(${output_dir} ${COMPILER_RT_OUTPUT_LIBRARY_DIR} PARENT_SCOPE)
   endif()
diff --git a/compiler-rt/cmake/base-config-ix.cmake b/compiler-rt/cmake/base-config-ix.cmake
index 5a97992756a9c9ab6468256aad0eaf70f1905103..a91c259a1f6d1a1ef81a29e127be70aeaa5f66ad 100644
--- a/compiler-rt/cmake/base-config-ix.cmake
+++ b/compiler-rt/cmake/base-config-ix.cmake
@@ -200,6 +200,9 @@ macro(test_targets)
     # Examine compiler output to determine target architecture.
     detect_target_arch()
     set(COMPILER_RT_OS_SUFFIX "-android")
+  elseif(OHOS)
+    detect_target_arch()
+    set(COMPILER_RT_OS_SUFFIX "")
   elseif(NOT APPLE) # Supported archs for Apple platforms are generated later
     if(COMPILER_RT_DEFAULT_TARGET_ONLY)
       add_default_target_arch(${COMPILER_RT_DEFAULT_TARGET_ARCH})
diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake
index dad557af2ae8c8d8a02674925863f0d732d9e50c..c5350ac0b9d4299b607a9871ef7b46e279bed46c 100644
--- a/compiler-rt/cmake/config-ix.cmake
+++ b/compiler-rt/cmake/config-ix.cmake
@@ -62,6 +62,8 @@ if (C_SUPPORTS_NODEFAULTLIBS_FLAG)
                         shell32 user32 kernel32 mingw32 ${MINGW_RUNTIME}
                         moldname mingwex msvcrt)
     list(APPEND CMAKE_REQUIRED_LIBRARIES ${MINGW_LIBRARIES})
+  elseif (OHOS)
+    list(APPEND CMAKE_REQUIRED_LIBRARIES unwind)
   endif()
   if (NOT TARGET unwind)
     # Don't check for a library named unwind, if there's a target with that name within
@@ -178,9 +180,13 @@ check_include_files("sys/auxv.h"    COMPILER_RT_HAS_AUXV)
 # Libraries.
 check_library_exists(atomic __atomic_load_8 "" COMPILER_RT_HAS_LIBATOMIC)
 check_library_exists(dl dlopen "" COMPILER_RT_HAS_LIBDL)
-check_library_exists(rt shm_open "" COMPILER_RT_HAS_LIBRT)
+if (NOT OHOS)
+    check_library_exists(rt shm_open "" COMPILER_RT_HAS_LIBRT)
+endif()
 check_library_exists(m pow "" COMPILER_RT_HAS_LIBM)
-check_library_exists(pthread pthread_create "" COMPILER_RT_HAS_LIBPTHREAD)
+if (NOT OHOS)
+    check_library_exists(pthread pthread_create "" COMPILER_RT_HAS_LIBPTHREAD)
+endif()
 check_library_exists(execinfo backtrace "" COMPILER_RT_HAS_LIBEXECINFO)
 
 if (ANDROID AND COMPILER_RT_HAS_LIBDL)
@@ -740,7 +746,7 @@ set(COMPILER_RT_SANITIZERS_TO_BUILD all CACHE STRING
 list_replace(COMPILER_RT_SANITIZERS_TO_BUILD all "${ALL_SANITIZERS}")
 
 if (SANITIZER_COMMON_SUPPORTED_ARCH AND NOT LLVM_USE_SANITIZER AND
-    (OS_NAME MATCHES "Android|Darwin|Linux|FreeBSD|NetBSD|Fuchsia|SunOS" OR
+    (OS_NAME MATCHES "Android|Darwin|Linux|FreeBSD|NetBSD|Fuchsia|SunOS|OHOS" OR
     (OS_NAME MATCHES "Windows" AND NOT CYGWIN AND
         (NOT MINGW OR CMAKE_CXX_COMPILER_ID MATCHES "Clang"))))
   set(COMPILER_RT_HAS_SANITIZER_COMMON TRUE)
@@ -766,7 +772,7 @@ else()
 endif()
 
 if (COMPILER_RT_HAS_SANITIZER_COMMON AND HWASAN_SUPPORTED_ARCH AND
-    OS_NAME MATCHES "Linux|Android|Fuchsia")
+    OS_NAME MATCHES "Linux|Android|Fuchsia|OHOS")
   set(COMPILER_RT_HAS_HWASAN TRUE)
 else()
   set(COMPILER_RT_HAS_HWASAN FALSE)
@@ -779,7 +785,7 @@ else()
   set(COMPILER_RT_HAS_RTSAN FALSE)
 endif()
 
-if (OS_NAME MATCHES "Linux|FreeBSD|Windows|NetBSD|SunOS")
+if (OS_NAME MATCHES "Linux|FreeBSD|Windows|NetBSD|SunOS|OHOS")
   set(COMPILER_RT_ASAN_HAS_STATIC_RUNTIME TRUE)
 else()
   set(COMPILER_RT_ASAN_HAS_STATIC_RUNTIME FALSE)
@@ -816,7 +822,7 @@ else()
 endif()
 
 if (PROFILE_SUPPORTED_ARCH AND NOT LLVM_USE_SANITIZER AND
-    OS_NAME MATCHES "Darwin|Linux|FreeBSD|Windows|Android|Fuchsia|SunOS|NetBSD|AIX")
+    OS_NAME MATCHES "Darwin|Linux|FreeBSD|Windows|Android|Fuchsia|SunOS|NetBSD|AIX|OHOS")
   set(COMPILER_RT_HAS_PROFILE TRUE)
 else()
   set(COMPILER_RT_HAS_PROFILE FALSE)
@@ -830,7 +836,7 @@ else()
 endif()
 
 if (COMPILER_RT_HAS_SANITIZER_COMMON AND TSAN_SUPPORTED_ARCH)
-  if (OS_NAME MATCHES "Linux|Darwin|FreeBSD|NetBSD")
+  if (OS_NAME MATCHES "Linux|Darwin|FreeBSD|NetBSD|OHOS")
     set(COMPILER_RT_HAS_TSAN TRUE)
   elseif (OS_NAME MATCHES "Android" AND ANDROID_PLATFORM_LEVEL GREATER 23)
     set(COMPILER_RT_HAS_TSAN TRUE)
@@ -848,14 +854,14 @@ else()
 endif()
 
 if (COMPILER_RT_HAS_SANITIZER_COMMON AND UBSAN_SUPPORTED_ARCH AND
-    OS_NAME MATCHES "Darwin|Linux|FreeBSD|NetBSD|Windows|Android|Fuchsia|SunOS")
+    OS_NAME MATCHES "Darwin|Linux|FreeBSD|NetBSD|Windows|Android|Fuchsia|SunOS|OHOS")
   set(COMPILER_RT_HAS_UBSAN TRUE)
 else()
   set(COMPILER_RT_HAS_UBSAN FALSE)
 endif()
 
 if (COMPILER_RT_HAS_SANITIZER_COMMON AND UBSAN_SUPPORTED_ARCH AND
-    OS_NAME MATCHES "Linux|FreeBSD|NetBSD|Android|Darwin")
+    OS_NAME MATCHES "Linux|FreeBSD|NetBSD|Android|Darwin|OHOS")
   set(COMPILER_RT_HAS_UBSAN_MINIMAL TRUE)
 else()
   set(COMPILER_RT_HAS_UBSAN_MINIMAL FALSE)
@@ -885,6 +891,13 @@ else()
   set(COMPILER_RT_HAS_SCUDO_STANDALONE FALSE)
 endif()
 
+if (COMPILER_RT_HAS_SANITIZER_COMMON AND SCUDO_SUPPORTED_ARCH AND
+    OS_NAME MATCHES "Linux|Fuchsia|OHOS")
+  set(COMPILER_RT_HAS_SCUDO TRUE)
+else()
+  set(COMPILER_RT_HAS_SCUDO FALSE)
+endif()
+
 if (COMPILER_RT_HAS_SANITIZER_COMMON AND XRAY_SUPPORTED_ARCH AND
     OS_NAME MATCHES "Darwin|Linux|FreeBSD|NetBSD|Fuchsia")
   set(COMPILER_RT_HAS_XRAY TRUE)
@@ -899,14 +912,14 @@ else()
 endif()
 
 if (COMPILER_RT_HAS_SANITIZER_COMMON AND FUZZER_SUPPORTED_ARCH AND
-    OS_NAME MATCHES "Android|Darwin|Linux|NetBSD|FreeBSD|Fuchsia|Windows")
+    OS_NAME MATCHES "Android|Darwin|Linux|NetBSD|FreeBSD|Fuchsia|Windows|OHOS")
   set(COMPILER_RT_HAS_FUZZER TRUE)
 else()
   set(COMPILER_RT_HAS_FUZZER FALSE)
 endif()
 
 if (COMPILER_RT_HAS_SANITIZER_COMMON AND SHADOWCALLSTACK_SUPPORTED_ARCH AND
-    OS_NAME MATCHES "Linux|Android")
+    OS_NAME MATCHES "Linux|Android|OHOS")
   set(COMPILER_RT_HAS_SHADOWCALLSTACK TRUE)
 else()
   set(COMPILER_RT_HAS_SHADOWCALLSTACK FALSE)
diff --git a/compiler-rt/cmake/crt-config-ix.cmake b/compiler-rt/cmake/crt-config-ix.cmake
index ebc7d671e74ee6593cb7338f55a25cda6a8c1813..64112f1636b098a0a21674157e1a58417c553b1c 100644
--- a/compiler-rt/cmake/crt-config-ix.cmake
+++ b/compiler-rt/cmake/crt-config-ix.cmake
@@ -47,7 +47,7 @@ if(NOT APPLE)
   message(STATUS "Supported architectures for crt: ${CRT_SUPPORTED_ARCH}")
 endif()
 
-if (CRT_SUPPORTED_ARCH AND OS_NAME MATCHES "Linux|SerenityOS" AND NOT LLVM_USE_SANITIZER)
+if (CRT_SUPPORTED_ARCH AND OS_NAME MATCHES "Linux|SerenityOS|OHOS" AND NOT LLVM_USE_SANITIZER)
   set(COMPILER_RT_HAS_CRT TRUE)
 else()
   set(COMPILER_RT_HAS_CRT FALSE)
diff --git a/compiler-rt/lib/asan/asan_allocator.h b/compiler-rt/lib/asan/asan_allocator.h
index c3c4fae85b129ceec7c2d04aff7fc7e2c53ede72..cb56fcc88527346736f191b65b1332489a7eab0b 100644
--- a/compiler-rt/lib/asan/asan_allocator.h
+++ b/compiler-rt/lib/asan/asan_allocator.h
@@ -192,7 +192,7 @@ typedef DefaultSizeClassMap SizeClassMap;
 const uptr kAllocatorSpace = ~(uptr)0;
 const uptr kAllocatorSize  =  0x20000000000ULL;  // 2T.
 typedef DefaultSizeClassMap SizeClassMap;
-#  elif defined(__aarch64__) && SANITIZER_ANDROID
+#elif defined(__aarch64__) && (SANITIZER_ANDROID || SANITIZER_OHOS)
 // Android needs to support 39, 42 and 48 bit VMA.
 const uptr kAllocatorSpace =  ~(uptr)0;
 const uptr kAllocatorSize  =  0x2000000000ULL;  // 128G.
diff --git a/compiler-rt/lib/asan/asan_internal.h b/compiler-rt/lib/asan/asan_internal.h
index 06dfc4b17733975d61876a2c05a16ac6366e80b4..9ad0dcdd57b36024eb6de16115e93178c962f7e2 100644
--- a/compiler-rt/lib/asan/asan_internal.h
+++ b/compiler-rt/lib/asan/asan_internal.h
@@ -35,7 +35,7 @@
 // If set, values like allocator chunk size, as well as defaults for some flags
 // will be changed towards less memory overhead.
 #ifndef ASAN_LOW_MEMORY
-#  if SANITIZER_IOS || SANITIZER_ANDROID
+#  if SANITIZER_IOS || SANITIZER_ANDROID || SANITIZER_OHOS
 #    define ASAN_LOW_MEMORY 1
 #  else
 #    define ASAN_LOW_MEMORY 0
diff --git a/compiler-rt/lib/asan/asan_linux.cpp b/compiler-rt/lib/asan/asan_linux.cpp
index 0b470db86748fb54f49b2b1bcff37c8a37512003..03372badea62250d9e38bc4e408b02be89367bc4 100644
--- a/compiler-rt/lib/asan/asan_linux.cpp
+++ b/compiler-rt/lib/asan/asan_linux.cpp
@@ -45,7 +45,8 @@
 #    include <link.h>
 #  endif
 
-#  if SANITIZER_ANDROID || SANITIZER_FREEBSD || SANITIZER_SOLARIS
+#  if SANITIZER_ANDROID || SANITIZER_FREEBSD || SANITIZER_SOLARIS|| \
+    SANITIZER_OHOS
 #    include <ucontext.h>
 #  elif SANITIZER_NETBSD
 #    include <link_elf.h>
@@ -107,7 +108,7 @@ void FlushUnneededASanShadowMemory(uptr p, uptr size) {
   ReleaseMemoryPagesToOS(MemToShadow(p), MemToShadow(p + size));
 }
 
-#  if SANITIZER_ANDROID
+#  if SANITIZER_ANDROID || SANITIZER_OHOS
 // FIXME: should we do anything for Android?
 void AsanCheckDynamicRTPrereqs() {}
 void AsanCheckIncompatibleRT() {}
@@ -216,6 +217,7 @@ void SignContextStack(void *context) {
   ucp->uc_stack.ss_flags = HashContextStack(*ucp);
 }
 
+#if !SANITIZER_ANDROID && !SANITIZER_OHOS
 void ReadContextStack(void *context, uptr *stack, uptr *ssize) {
   const ucontext_t *ucp = reinterpret_cast<const ucontext_t *>(context);
   if (HashContextStack(*ucp) == ucp->uc_stack.ss_flags) {
diff --git a/compiler-rt/lib/asan/tests/asan_test.cpp b/compiler-rt/lib/asan/tests/asan_test.cpp
index 827c2ae3a9cdc87d12a1c23b2a1fba0715054157..e0f30cd0aad282014d0942a854cacdf77c22c842 100644
--- a/compiler-rt/lib/asan/tests/asan_test.cpp
+++ b/compiler-rt/lib/asan/tests/asan_test.cpp
@@ -1330,7 +1330,9 @@ TEST(AddressSanitizer, LongDoubleNegativeTest) {
   memcpy(Ident(&c), Ident(&b), sizeof(long double));
 }
 
-#if !defined(_WIN32)
+#if !defined(_WIN32) && !defined(__OHOS__)
+// On OHOS/Musl sched_param is not int.
+// See __interceptor_pthread_getschedparam
 TEST(AddressSanitizer, pthread_getschedparam) {
   int policy;
   struct sched_param param;
diff --git a/compiler-rt/lib/builtins/divtf3.c b/compiler-rt/lib/builtins/divtf3.c
index bd76763b07d3a417946cb822273c55c133a5e165..4da9e899d0234fc4c9f6dd5115e9314b2c863acd 100644
--- a/compiler-rt/lib/builtins/divtf3.c
+++ b/compiler-rt/lib/builtins/divtf3.c
@@ -16,7 +16,7 @@
 
 #if defined(CRT_HAS_TF_MODE)
 
-#define NUMBER_OF_HALF_ITERATIONS 4
+#define NUMBER_OF_HALF_ITERATIONS 5
 #define NUMBER_OF_FULL_ITERATIONS 1
 
 #include "fp_div_impl.inc"
diff --git a/compiler-rt/lib/builtins/fp_div_impl.inc b/compiler-rt/lib/builtins/fp_div_impl.inc
index 29bcd1920edfb4c5ec59f2e4201548b0339f217e..b49a6fd2bd73721a906bf8cc66addee777a45e70 100644
--- a/compiler-rt/lib/builtins/fp_div_impl.inc
+++ b/compiler-rt/lib/builtins/fp_div_impl.inc
@@ -325,7 +325,7 @@ static __inline fp_t __divXf3__(fp_t a, fp_t b) {
 #define RECIPROCAL_PRECISION REP_C(10)
 #elif defined(DOUBLE_PRECISION) && NUMBER_OF_HALF_ITERATIONS == 3 && NUMBER_OF_FULL_ITERATIONS == 1
 #define RECIPROCAL_PRECISION REP_C(220)
-#elif defined(QUAD_PRECISION) && NUMBER_OF_HALF_ITERATIONS == 4 && NUMBER_OF_FULL_ITERATIONS == 1
+#elif defined(QUAD_PRECISION) && NUMBER_OF_HALF_ITERATIONS == 5 && NUMBER_OF_FULL_ITERATIONS == 1
 #define RECIPROCAL_PRECISION REP_C(13922)
 #else
 #error Invalid number of iterations
diff --git a/compiler-rt/lib/builtins/int_util.h b/compiler-rt/lib/builtins/int_util.h
index c372c2edc637137d28926ad9af948f31766907ba..32b66732f8f7514391b6b7ce0dc6f6f4e78045da 100644
--- a/compiler-rt/lib/builtins/int_util.h
+++ b/compiler-rt/lib/builtins/int_util.h
@@ -40,6 +40,9 @@ NORETURN void __compilerrt_abort_impl(const char *file, int line,
 #define REPEAT_4_TIMES(code_to_repeat)                                         \
   REPEAT_3_TIMES(code_to_repeat)                                               \
   code_to_repeat
+#define REPEAT_5_TIMES(code_to_repeat)                                         \
+  REPEAT_4_TIMES(code_to_repeat)                                               \
+  code_to_repeat
 
 #define REPEAT_N_TIMES_(N, code_to_repeat) REPEAT_##N##_TIMES(code_to_repeat)
 #define REPEAT_N_TIMES(N, code_to_repeat) REPEAT_N_TIMES_(N, code_to_repeat)
diff --git a/compiler-rt/lib/cfi/CMakeLists.txt b/compiler-rt/lib/cfi/CMakeLists.txt
index 2197fa4a5c758d8da9c856ba7a6bbfd83556a6d3..cf8479d39c25e09a0fbdac54ea9b029d28d717ae 100644
--- a/compiler-rt/lib/cfi/CMakeLists.txt
+++ b/compiler-rt/lib/cfi/CMakeLists.txt
@@ -1,6 +1,6 @@
 add_compiler_rt_component(cfi)
 
-if(OS_NAME MATCHES "Linux" OR OS_NAME MATCHES "FreeBSD" OR OS_NAME MATCHES "NetBSD")
+if(OS_NAME MATCHES "Linux" OR OS_NAME MATCHES "FreeBSD" OR OS_NAME MATCHES "NetBSD" OR OS_NAME MATCHES "OHOS")
   set(CFI_SOURCES
     cfi.cpp
     )
diff --git a/compiler-rt/lib/fuzzer/CMakeLists.txt b/compiler-rt/lib/fuzzer/CMakeLists.txt
index fb5adf1e5c9e6984a8267f05eac44c5d70661b9d..4c84f0475ed76eaf138149a080840ef7a1d02ae1 100644
--- a/compiler-rt/lib/fuzzer/CMakeLists.txt
+++ b/compiler-rt/lib/fuzzer/CMakeLists.txt
@@ -147,7 +147,7 @@ if(OS_NAME MATCHES "Android|Linux|Fuchsia" AND
     set(cxx_${arch}_merge_dir "${CMAKE_CURRENT_BINARY_DIR}/cxx_${arch}_merge.dir")
     file(MAKE_DIRECTORY ${cxx_${arch}_merge_dir})
     add_custom_command(TARGET clang_rt.${name}-${arch} POST_BUILD
-      COMMAND ${CMAKE_CXX_COMPILER} ${target_cflags} -Wl,--whole-archive "$<TARGET_LINKER_FILE:clang_rt.${name}-${arch}>" -Wl,--no-whole-archive ${dir}/lib/libc++.a -r -o ${name}.o
+      COMMAND ${CMAKE_CXX_COMPILER} ${target_cflags} -Wl,--whole-archive "$<TARGET_LINKER_FILE:clang_rt.${name}-${arch}>" -Wl,--no-whole-archive ${dir}/lib/libc++.a -nodefaultlibs -r -o ${name}.o
       COMMAND ${CMAKE_OBJCOPY} --localize-hidden ${name}.o
       COMMAND ${CMAKE_COMMAND} -E remove "$<TARGET_LINKER_FILE:clang_rt.${name}-${arch}>"
       COMMAND ${CMAKE_AR} qcs "$<TARGET_LINKER_FILE:clang_rt.${name}-${arch}>" ${name}.o
diff --git a/compiler-rt/lib/hwasan/hwasan.cpp b/compiler-rt/lib/hwasan/hwasan.cpp
index ccdc0b4bc21bd36a131b77e13c58d682c2742433..4d0203eab51f9123a2318985af0db3727da47433 100644
--- a/compiler-rt/lib/hwasan/hwasan.cpp
+++ b/compiler-rt/lib/hwasan/hwasan.cpp
@@ -83,7 +83,7 @@ static void InitializeFlags() {
     cf.intercept_tls_get_addr = true;
     cf.exitcode = 99;
     // 8 shadow pages ~512kB, small enough to cover common stack sizes.
-    cf.clear_shadow_mmap_threshold = 4096 * (SANITIZER_ANDROID ? 2 : 8);
+    cf.clear_shadow_mmap_threshold = 4096 * ((SANITIZER_ANDROID) ? 2 : 8);
     // Sigtrap is used in error reporting.
     cf.handle_sigtrap = kHandleSignalExclusive;
     // For now only tested on Linux and Fuchsia. Other plantforms can be turned
diff --git a/compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cpp b/compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cpp
index 48bc3b631ac076356c6abc280ba854c897f731d6..9bf9e7eeaa61c2c3f689b11783a10cd9972c00b1 100644
--- a/compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cpp
+++ b/compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cpp
@@ -26,7 +26,7 @@
 // The code in this file needs to run in an unrelocated binary. It should not
 // access any external symbol, including its own non-hidden globals.
 
-#if SANITIZER_ANDROID
+#if SANITIZER_ANDROID || SANITIZER_OHOS
 extern "C" {
 
 INTERFACE_ATTRIBUTE void __hwasan_shadow();
diff --git a/compiler-rt/lib/interception/interception.h b/compiler-rt/lib/interception/interception.h
index 38c152952e323269af485ee386d621288d93f145..5d9d4f89468cdc50da4c6544c7450c37badc5c05 100644
--- a/compiler-rt/lib/interception/interception.h
+++ b/compiler-rt/lib/interception/interception.h
@@ -19,7 +19,7 @@
 
 #if !SANITIZER_LINUX && !SANITIZER_FREEBSD && !SANITIZER_APPLE &&    \
     !SANITIZER_NETBSD && !SANITIZER_WINDOWS && !SANITIZER_FUCHSIA && \
-    !SANITIZER_SOLARIS
+    !SANITIZER_SOLARIS && !SANITIZER_OHOS
 #  error "Interception doesn't work on this operating system."
 #endif
 
diff --git a/compiler-rt/lib/interception/interception_type_test.cpp b/compiler-rt/lib/interception/interception_type_test.cpp
index 7c3de82a1e869c84477fd20d61a202bc5525b22e..0ce5ff09fd92bc19c69018659a73fc8dab71b5f3 100644
--- a/compiler-rt/lib/interception/interception_type_test.cpp
+++ b/compiler-rt/lib/interception/interception_type_test.cpp
@@ -31,8 +31,8 @@ COMPILER_CHECK(sizeof(::OFF64_T) == sizeof(off64_t));
 // The following are the cases when pread (and friends) is used instead of
 // pread64. In those cases we need OFF_T to match off_t. We don't care about the
 // rest (they depend on _FILE_OFFSET_BITS setting when building an application).
-# if SANITIZER_ANDROID || !defined _FILE_OFFSET_BITS || \
-  _FILE_OFFSET_BITS != 64
+#if SANITIZER_ANDROID || !defined _FILE_OFFSET_BITS || \
+    _FILE_OFFSET_BITS != 64 || SANITIZER_OHOS
 COMPILER_CHECK(sizeof(::OFF_T) == sizeof(off_t));
 # endif
 
diff --git a/compiler-rt/lib/orc/endianness.h b/compiler-rt/lib/orc/endianness.h
index 4ee5505ce6ddf167329d5dfb1f4b2c6ac3232e66..a8e55629a6be32a8d332eac056348adf52b33a25 100644
--- a/compiler-rt/lib/orc/endianness.h
+++ b/compiler-rt/lib/orc/endianness.h
@@ -22,7 +22,8 @@
 #endif
 
 #if defined(__linux__) || defined(__GNU__) || defined(__HAIKU__) ||            \
-    defined(__Fuchsia__) || defined(__EMSCRIPTEN__)
+    defined(__Fuchsia__) || defined(__EMSCRIPTEN__) ||                         \
+    defined(__OHOS_FAMILY__)
 #include <endian.h>
 #elif defined(_AIX)
 #include <sys/machine.h>
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
index 7a7af7936af3151f49489a20a0cc3642637dbdfc..9273c5a27adf0da6481a485f242d0ba5d3d4f3cd 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
@@ -2190,11 +2190,13 @@ INTERCEPTOR(int, clock_gettime, u32 clk_id, void *tp) {
 #if SANITIZER_GLIBC
 namespace __sanitizer {
 extern "C" {
+#if !SANITIZER_OHOS
 int real_clock_gettime(u32 clk_id, void *tp) {
   if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
     return internal_clock_gettime(clk_id, tp);
   return REAL(clock_gettime)(clk_id, tp);
 }
+#endif
 }  // extern "C"
 }  // namespace __sanitizer
 #endif
@@ -2604,7 +2606,7 @@ INTERCEPTOR(int, wait3, int *status, int options, void *rusage) {
   }
   return res;
 }
-#if SANITIZER_ANDROID
+#if SANITIZER_ANDROID || SANITIZER_OHOS
 INTERCEPTOR(int, __wait4, int pid, int *status, int options, void *rusage) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, __wait4, pid, status, options, rusage);
@@ -3931,7 +3933,7 @@ INTERCEPTOR(char *, strerror, int errnum) {
 //    static storage.
 #if ((_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) && !_GNU_SOURCE) || \
     SANITIZER_APPLE || SANITIZER_ANDROID || SANITIZER_NETBSD ||                 \
-    SANITIZER_FREEBSD
+    SANITIZER_FREEBSD || SANITIZER_OHOS
 // POSIX version. Spec is not clear on whether buf is NULL-terminated.
 // At least on OSX, buf contents are valid even when the call fails.
 INTERCEPTOR(int, strerror_r, int errnum, char *buf, SIZE_T buflen) {
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc
index 49ec4097c900bd7fd23efaa7ac3bf32438d67ee8..17ac1af2431cb8a8999678b170835f32c21b1145 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc
@@ -98,7 +98,7 @@ static void ioctl_table_fill() {
   _(SIOCSIFNETMASK, READ, struct_ifreq_sz);
 #endif
 
-#if (SANITIZER_LINUX && !SANITIZER_ANDROID)
+#  if (SANITIZER_LINUX && !SANITIZER_ANDROID && !SANITIZER_OHOS)
   _(SIOCGETSGCNT, WRITE, struct_sioc_sg_req_sz);
   _(SIOCGETVIFCNT, WRITE, struct_sioc_vif_req_sz);
 #endif
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_syscalls.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_syscalls.inc
index 14615f9668dea647937e1712dc0d5f19cdd03442..8d3854ff72af05d92c558a05fa0246c043deafd3 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_syscalls.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_syscalls.inc
@@ -2527,7 +2527,7 @@ PRE_SYSCALL(ni_syscall)() {}
 POST_SYSCALL(ni_syscall)(long res) {}
 
 PRE_SYSCALL(ptrace)(long request, long pid, long addr, long data) {
-#  if !SANITIZER_ANDROID &&                                                   \
+#  if !SANITIZER_ANDROID && !SANITIZER_OHOS &&                                \
       (defined(__i386) || defined(__x86_64) || defined(__mips64) ||           \
        defined(__powerpc64__) || defined(__aarch64__) || defined(__s390__) || \
        defined(__loongarch__) || SANITIZER_RISCV64)
@@ -2549,7 +2549,7 @@ PRE_SYSCALL(ptrace)(long request, long pid, long addr, long data) {
 }
 
 POST_SYSCALL(ptrace)(long res, long request, long pid, long addr, long data) {
-#  if !SANITIZER_ANDROID &&                                                   \
+#  if !SANITIZER_ANDROID && !SANITIZER_OHOS &&                                \
       (defined(__i386) || defined(__x86_64) || defined(__mips64) ||           \
        defined(__powerpc64__) || defined(__aarch64__) || defined(__s390__) || \
        defined(__loongarch__) || SANITIZER_RISCV64)
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_errno.h b/compiler-rt/lib/sanitizer_common/sanitizer_errno.h
index 46c85364cef5603afab7e7613512b04290441d15..e85533c0fd8ad1167df869f076e17b9118a8a1c1 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_errno.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_errno.h
@@ -23,7 +23,7 @@
 
 #if SANITIZER_FREEBSD || SANITIZER_APPLE
 #  define __errno_location __error
-#elif SANITIZER_ANDROID || SANITIZER_NETBSD
+#elif SANITIZER_ANDROID || SANITIZER_NETBSD || SANITIZER_OHOS
 #  define __errno_location __errno
 #elif SANITIZER_SOLARIS
 #  define __errno_location ___errno
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc b/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc
index c1e3530618c20df393387fd5f4eafd7807b118b4..de13436d74f4217708f5ad3d3b1482a970c45da8 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc
@@ -62,7 +62,7 @@ COMMON_FLAG(
 COMMON_FLAG(const char *, log_suffix, nullptr,
             "String to append to log file name, e.g. \".txt\".")
 COMMON_FLAG(
-    bool, log_to_syslog, (bool)SANITIZER_ANDROID || (bool)SANITIZER_APPLE,
+    bool, log_to_syslog, (bool)SANITIZER_ANDROID || (bool)SANITIZER_OHOS || (bool)SANITIZER_APPLE,
     "Write all sanitizer output to syslog in addition to other means of "
     "logging.")
 COMMON_FLAG(
@@ -242,13 +242,13 @@ COMMON_FLAG(bool, intercept_stat, true,
 COMMON_FLAG(bool, intercept_send, true,
             "If set, uses custom wrappers for send* functions "
             "to find more errors.")
-COMMON_FLAG(bool, decorate_proc_maps, (bool)SANITIZER_ANDROID,
+COMMON_FLAG(bool, decorate_proc_maps, (bool)SANITIZER_ANDROID || (bool)SANITIZER_OHOS,
             "If set, decorate sanitizer mappings in /proc/self/maps with "
             "user-readable names")
 COMMON_FLAG(int, exitcode, 1, "Override the program exit status if the tool "
                               "found an error")
 COMMON_FLAG(
-    bool, abort_on_error, (bool)SANITIZER_ANDROID || (bool)SANITIZER_APPLE,
+    bool, abort_on_error, (bool)SANITIZER_ANDROID || (bool)SANITIZER_OHOS || (bool)SANITIZER_APPLE,
     "If set, the tool calls abort() instead of _exit() after printing the "
     "error report.")
 COMMON_FLAG(bool, suppress_equal_pcs, true,
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h b/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h
index eebfb00aad7acd328c1d8931314d9d382c2f35af..4f012483ec949853b39692df55a4d40a3cd26cb1 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h
@@ -181,7 +181,8 @@ typedef long pid_t;
 typedef int pid_t;
 #endif
 
-#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_APPLE ||             \
+#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_APPLE ||           \
+    SANITIZER_OHOS ||                                                     \
     (SANITIZER_SOLARIS && (defined(_LP64) || _FILE_OFFSET_BITS == 64)) || \
     (SANITIZER_LINUX && !SANITIZER_GLIBC && !SANITIZER_ANDROID) ||        \
     (SANITIZER_LINUX && (defined(__x86_64__) || defined(__hexagon__)))
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
index be3b3bd94e2a587ce0bf03de0277a933a19f319d..9ea879913326759afe5b634d737e87c77b29d651 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
@@ -78,7 +78,7 @@
 #    include <sys/utsname.h>
 #  endif
 
-#  if SANITIZER_LINUX && !SANITIZER_ANDROID
+#  if SANITIZER_LINUX && !SANITIZER_ANDROID && !SANITIZER_OHOS
 #    include <sys/personality.h>
 #  endif
 
@@ -179,7 +179,7 @@ void BlockSignals(__sanitizer_sigset_t *oldset) {
 #  if SANITIZER_LINUX
   __sanitizer_sigset_t currentset;
 
-#    if !SANITIZER_ANDROID
+#    if !SANITIZER_ANDROID && !SANITIZER_OHOS
   // FIXME: https://github.com/google/sanitizers/issues/1816
   SetSigProcMask(NULL, &currentset);
 
@@ -954,7 +954,7 @@ int internal_sigaction_norestorer(int signum, const void *act, void *oldact) {
     // rt_sigaction, so we need to do the same (we'll need to reimplement the
     // restorers; for x86_64 the restorer address can be obtained from
     // oldact->sa_restorer upon a call to sigaction(xxx, NULL, oldact).
-#      if !SANITIZER_ANDROID || !SANITIZER_MIPS32
+#if (!SANITIZER_ANDROID && !SANITIZER_OHOS) || !SANITIZER_MIPS32
     k_act.sa_restorer = u_act->sa_restorer;
 #      endif
   }
@@ -970,7 +970,7 @@ int internal_sigaction_norestorer(int signum, const void *act, void *oldact) {
     internal_memcpy(&u_oldact->sa_mask, &k_oldact.sa_mask,
                     sizeof(__sanitizer_kernel_sigset_t));
     u_oldact->sa_flags = k_oldact.sa_flags;
-#      if !SANITIZER_ANDROID || !SANITIZER_MIPS32
+#if (!SANITIZER_ANDROID && !SANITIZER_OHOS) || !SANITIZER_MIPS32
     u_oldact->sa_restorer = k_oldact.sa_restorer;
 #      endif
   }
@@ -1141,7 +1141,7 @@ static uptr GetKernelAreaSize() {
       return 0;
   }
 
-#      if !SANITIZER_ANDROID
+#if !SANITIZER_ANDROID && !SANITIZER_OHOS
   // Even if nothing is mapped, top Gb may still be accessible
   // if we are running on 64-bit kernel.
   // Uname may report misleading results if personality type
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.h b/compiler-rt/lib/sanitizer_common/sanitizer_linux.h
index c30f0326793d5af18851a58374239a6415e868e3..fa8200fb6de107a8ea75c055cf0ec881800bec65 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.h
@@ -135,7 +135,7 @@ inline void ReleaseMemoryPagesToOSAndZeroFill(uptr beg, uptr end) {
   ReleaseMemoryPagesToOS(beg, end);
 }
 
-#  if SANITIZER_ANDROID
+#if SANITIZER_ANDROID || SANITIZER_OHOS
 
 #    if defined(__aarch64__)
 #      define __get_tls()                           \
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
index c3c717bbdbe4c963516dc72824f9e835bb57cc85..236ca65ec4a659a8d9567d5535f021d59628954a 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
@@ -84,10 +84,10 @@ struct __sanitizer::linux_dirent {
 #    endif
 #  endif
 
-#  if !SANITIZER_ANDROID
-#    include <elf.h>
-#    include <unistd.h>
-#  endif
+#if !SANITIZER_ANDROID && !SANITIZER_OHOS
+#include <elf.h>
+#include <unistd.h>
+#endif
 
 namespace __sanitizer {
 
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform.h
index 57966403c92a9208dd1393129913b78a9ffe1fd0..e799fc95db8c131ebddfff67db0ba13654c3a49f 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform.h
@@ -14,7 +14,7 @@
 
 #if !defined(__linux__) && !defined(__FreeBSD__) && !defined(__NetBSD__) && \
     !defined(__APPLE__) && !defined(_WIN32) && !defined(__Fuchsia__) &&     \
-    !(defined(__sun__) && defined(__svr4__))
+    !defined(__OHOS_FAMILY__) && !(defined(__sun__) && defined(__svr4__))
 #  error "This operating system is not supported"
 #endif
 
@@ -123,6 +123,12 @@
 #  define SANITIZER_ANDROID 0
 #endif
 
+#if defined(__OHOS__)
+#define SANITIZER_OHOS 1
+#else
+#define SANITIZER_OHOS 0
+#endif
+
 #if defined(__Fuchsia__)
 #  define SANITIZER_FUCHSIA 1
 #else
@@ -298,6 +304,12 @@
 #  if (SANITIZER_RISCV64 && !SANITIZER_FUCHSIA && !SANITIZER_LINUX) || \
       SANITIZER_IOS || SANITIZER_DRIVERKIT
 #    define SANITIZER_CAN_USE_ALLOCATOR64 0
+#endif
+#  if ((SANITIZER_ANDROID || SANITIZER_OHOS) && defined(__aarch64__)) || \
+      SANITIZER_FUCHSIA
+#    define SANITIZER_CAN_USE_ALLOCATOR64 1
+#  elif defined(__mips64) || defined(__aarch64__) || defined(__i386__) || \
+      defined(__arm__) || SANITIZER_RISCV64 || defined(__hexagon__)
 #  elif defined(__mips64) || defined(__hexagon__)
 #    define SANITIZER_CAN_USE_ALLOCATOR64 0
 #  else
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
index 05cd2d71cbbae6e952da18d4045e5cd523aafe91..3737ee58dc401a1d40d47251c8940dd5b8130e78 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
@@ -58,6 +58,12 @@
 #define SI_ANDROID 0
 #endif
 
+#if SANITIZER_OHOS
+#define SI_OHOS 1
+#else
+#define SI_OHOS 0
+#endif
+
 #if SANITIZER_FREEBSD
 #define SI_FREEBSD 1
 #else
@@ -271,7 +277,7 @@
 #define SANITIZER_INTERCEPT_SYSINFO SI_LINUX
 #define SANITIZER_INTERCEPT_READDIR SI_POSIX
 #define SANITIZER_INTERCEPT_READDIR64 SI_GLIBC || SI_SOLARIS32
-#if SI_LINUX_NOT_ANDROID &&                                                \
+#if SI_LINUX_NOT_ANDROID && !SI_OHOS &&                                               \
     (defined(__i386) || defined(__x86_64) || defined(__mips64) ||          \
      defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \
      defined(__s390__) || defined(__loongarch__) || SANITIZER_RISCV64)
@@ -342,7 +348,7 @@
   (SI_FREEBSD || SI_MAC || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_ETHER_R (SI_FREEBSD || SI_LINUX_NOT_ANDROID)
 #define SANITIZER_INTERCEPT_SHMCTL                                       \
-  (((SI_FREEBSD || SI_LINUX_NOT_ANDROID) && SANITIZER_WORDSIZE == 64) || \
+  (((SI_FREEBSD || SI_LINUX_NOT_ANDROID) && !SI_OHOS && SANITIZER_WORDSIZE == 64) || \
    SI_NETBSD || SI_SOLARIS)
 #define SANITIZER_INTERCEPT_RANDOM_R SI_GLIBC
 #define SANITIZER_INTERCEPT_PTHREAD_ATTR_GET SI_POSIX
@@ -469,7 +475,7 @@
 #define SI_STAT_LINUX (SI_LINUX && __GLIBC_PREREQ(2, 33))
 #define SANITIZER_INTERCEPT_STAT                                        \
   (SI_FREEBSD || SI_MAC || SI_ANDROID || SI_NETBSD || SI_SOLARIS ||     \
-   SI_STAT_LINUX)
+   SI_STAT_LINUX || SI_OHOS)
 #define SANITIZER_INTERCEPT_STAT64 SI_STAT_LINUX && SANITIZER_HAS_STAT64
 #define SANITIZER_INTERCEPT_LSTAT (SI_NETBSD || SI_FREEBSD || SI_STAT_LINUX)
 #define SANITIZER_INTERCEPT___XSTAT \
@@ -501,7 +507,7 @@
 #define SANITIZER_INTERCEPT_WCSCAT SI_POSIX
 #define SANITIZER_INTERCEPT_WCSDUP SI_POSIX
 #define SANITIZER_INTERCEPT_SIGNAL_AND_SIGACTION (!SI_WINDOWS && SI_NOT_FUCHSIA)
-#define SANITIZER_INTERCEPT_BSD_SIGNAL SI_ANDROID
+#define SANITIZER_INTERCEPT_BSD_SIGNAL (SI_ANDROID || SI_OHOS)
 
 #define SANITIZER_INTERCEPT_ACCT (SI_NETBSD || SI_FREEBSD)
 #define SANITIZER_INTERCEPT_USER_FROM_UID SI_NETBSD
@@ -513,7 +519,7 @@
 #define SANITIZER_INTERCEPT_GETGROUPLIST \
   (SI_NETBSD || SI_FREEBSD || SI_LINUX)
 #define SANITIZER_INTERCEPT_STRLCPY \
-  (SI_NETBSD || SI_FREEBSD || SI_MAC || SI_ANDROID)
+  (SI_NETBSD || SI_FREEBSD || SI_MAC || SI_ANDROID || SI_OHOS)
 
 #define SANITIZER_INTERCEPT_NAME_TO_HANDLE_AT SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_OPEN_BY_HANDLE_AT SI_LINUX_NOT_ANDROID
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_linux.cpp
index bf0f355847cb13332fac82fb756fe2afa068991a..6d608cbb63ed6ab9623e39f451f105e5d80f635e 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_linux.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_linux.cpp
@@ -76,7 +76,7 @@ CHECK_SIZE_AND_OFFSET(io_event, obj);
 CHECK_SIZE_AND_OFFSET(io_event, res);
 CHECK_SIZE_AND_OFFSET(io_event, res2);
 
-#if !SANITIZER_ANDROID
+#  if !SANITIZER_ANDROID && !SANITIZER_OHOS
 COMPILER_CHECK(sizeof(struct __sanitizer_perf_event_attr) <=
                sizeof(struct perf_event_attr));
 CHECK_SIZE_AND_OFFSET(perf_event_attr, type);
@@ -85,7 +85,7 @@ CHECK_SIZE_AND_OFFSET(perf_event_attr, size);
 
 COMPILER_CHECK(iocb_cmd_pread == IOCB_CMD_PREAD);
 COMPILER_CHECK(iocb_cmd_pwrite == IOCB_CMD_PWRITE);
-#if !SANITIZER_ANDROID
+#  if !SANITIZER_ANDROID && !SANITIZER_OHOS
 COMPILER_CHECK(iocb_cmd_preadv == IOCB_CMD_PREADV);
 COMPILER_CHECK(iocb_cmd_pwritev == IOCB_CMD_PWRITEV);
 #endif
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp
index 6d61d276d77e35d20fb7bc0712aa659f4e89b7a1..f9d723b8f00d67d7344bbd450a354e0e007dfef0 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp
@@ -70,7 +70,9 @@
 #include <malloc.h>
 #include <mntent.h>
 #include <netinet/ether.h>
+#if !SANITIZER_OHOS
 #include <sys/sysinfo.h>
+#endif
 #include <sys/vt.h>
 #include <linux/cdrom.h>
 #include <linux/fd.h>
@@ -94,6 +96,13 @@
 #if SANITIZER_LINUX
 # include <utime.h>
 # include <sys/ptrace.h>
+#    if SANITIZER_OHOS
+// Do not include asm/sigcontext.h on behalf of asm/ptrace.h
+// to avoid multiple definiton errors.
+#      define __ASM_SIGCONTEXT_H 1
+#      include <sys/user.h>
+#    endif
+
 #    if defined(__mips64) || defined(__aarch64__) || defined(__arm__) || \
         defined(__hexagon__) || defined(__loongarch__) ||SANITIZER_RISCV64
 #      include <asm/ptrace.h>
@@ -134,6 +143,13 @@ typedef struct user_fpregs elf_fpregset_t;
 
 #if SANITIZER_ANDROID
 #include <linux/mtio.h>
+#elif SANITIZER_OHOS
+#include <crypt.h>
+#include <linux/mtio.h>
+#include <mqueue.h>
+#include <sys/msg.h>
+#include <sys/shm.h>
+#include <sys/timex.h>
 #else
 #include <glob.h>
 #include <mqueue.h>
@@ -312,7 +328,6 @@ namespace __sanitizer {
   int e_tabsz = (int)E_TABSZ;
 #endif
 
-
 #if SANITIZER_LINUX && !SANITIZER_ANDROID
   unsigned struct_shminfo_sz = sizeof(struct shminfo);
   unsigned struct_shm_info_sz = sizeof(struct shm_info);
@@ -533,7 +548,7 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr);
   unsigned struct_ppp_stats_sz = sizeof(struct ppp_stats);
 #endif  // SANITIZER_GLIBC
 
-#if !SANITIZER_ANDROID && !SANITIZER_APPLE
+#if !SANITIZER_ANDROID && !SANITIZER_APPLE && !SANITIZER_OHOS
   unsigned struct_sioc_sg_req_sz = sizeof(struct sioc_sg_req);
   unsigned struct_sioc_vif_req_sz = sizeof(struct sioc_vif_req);
 #endif
@@ -586,7 +601,7 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr);
   unsigned IOCTL_TIOCSPGRP = TIOCSPGRP;
   unsigned IOCTL_TIOCSTI = TIOCSTI;
   unsigned IOCTL_TIOCSWINSZ = TIOCSWINSZ;
-#if SANITIZER_LINUX && !SANITIZER_ANDROID
+#if SANITIZER_LINUX && !SANITIZER_ANDROID && !SANITIZER_OHOS
   unsigned IOCTL_SIOCGETSGCNT = SIOCGETSGCNT;
   unsigned IOCTL_SIOCGETVIFCNT = SIOCGETVIFCNT;
 #endif
@@ -878,7 +893,7 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr);
   unsigned IOCTL_VT_WAITACTIVE = VT_WAITACTIVE;
 #endif // SANITIZER_LINUX
 
-#if SANITIZER_LINUX && !SANITIZER_ANDROID
+#if SANITIZER_LINUX && !SANITIZER_ANDROID && !SANITIZER_OHOS
   unsigned IOCTL_EQL_EMANCIPATE = EQL_EMANCIPATE;
   unsigned IOCTL_EQL_ENSLAVE = EQL_ENSLAVE;
   unsigned IOCTL_EQL_GETMASTRCFG = EQL_GETMASTRCFG;
@@ -962,7 +977,7 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr);
   unsigned IOCTL_TIOCSSERIAL = TIOCSSERIAL;
 #endif // SANITIZER_LINUX && !SANITIZER_ANDROID
 
-#if SANITIZER_LINUX && !SANITIZER_ANDROID
+#if SANITIZER_LINUX && !SANITIZER_ANDROID && !SANITIZER_OHOS
   unsigned IOCTL_GIO_SCRNMAP = GIO_SCRNMAP;
   unsigned IOCTL_KDDISABIO = KDDISABIO;
   unsigned IOCTL_KDENABIO = KDENABIO;
@@ -1130,7 +1145,8 @@ CHECK_STRUCT_SIZE_AND_OFFSET(sigaction, sa_mask);
 // didn't exist.
 CHECK_STRUCT_SIZE_AND_OFFSET(sigaction, sa_flags);
 #endif
-#if SANITIZER_LINUX && (!SANITIZER_ANDROID || !SANITIZER_MIPS32)
+#if SANITIZER_LINUX && (!SANITIZER_ANDROID || !SANITIZER_MIPS32) && \
+    !SANITIZER_OHOS
 CHECK_STRUCT_SIZE_AND_OFFSET(sigaction, sa_restorer);
 #endif
 
@@ -1233,7 +1249,7 @@ CHECK_TYPE_SIZE(clock_t);
 CHECK_TYPE_SIZE(clockid_t);
 #endif
 
-#if !SANITIZER_ANDROID
+#if !SANITIZER_ANDROID && !SANITIZER_OHOS
 CHECK_TYPE_SIZE(ifaddrs);
 CHECK_SIZE_AND_OFFSET(ifaddrs, ifa_next);
 CHECK_SIZE_AND_OFFSET(ifaddrs, ifa_name);
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h
index 34bfef1f7ef45695602c8c9130f149b8728281c9..169f3d5922ca36167a7864417c56aa9ec54b1ea9 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h
@@ -209,6 +209,12 @@ struct __sanitizer_sem_t {
 extern unsigned struct_ustat_sz;
 extern unsigned struct_rlimit64_sz;
 extern unsigned struct_statvfs64_sz;
+#endif
+
+#if SANITIZER_LINUX && !SANITIZER_ANDROID
+struct __sanitizer_struct_mallinfo {
+  int v[10];
+};
 
 struct __sanitizer_ipc_perm {
   int __key;
@@ -642,7 +648,7 @@ struct __sanitizer_sigaction {
   uptr sa_flags;
   void (*sa_restorer)();
 };
-#else // !SANITIZER_ANDROID
+#else  // !SANITIZER_ANDROID
 struct __sanitizer_sigaction {
 #if defined(__mips__) && !SANITIZER_FREEBSD
   unsigned int sa_flags;
@@ -831,7 +837,7 @@ struct __sanitizer_wordexp_t {
   uptr we_offs;
 };
 
-#if SANITIZER_LINUX && !SANITIZER_ANDROID
+#if SANITIZER_LINUX && !SANITIZER_ANDROID && !SANITIZER_OHOS
 struct __sanitizer_FILE {
   int _flags;
   char *_IO_read_ptr;
@@ -855,7 +861,7 @@ typedef void __sanitizer_FILE;
 # define SANITIZER_HAS_STRUCT_FILE 0
 #endif
 
-#if SANITIZER_LINUX && !SANITIZER_ANDROID &&                               \
+#if SANITIZER_LINUX && !SANITIZER_ANDROID && !SANITIZER_OHOS &&            \
     (defined(__i386) || defined(__x86_64) || defined(__mips64) ||          \
      defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \
      defined(__s390__) || defined(__loongarch__) || SANITIZER_RISCV64)
@@ -1033,7 +1039,7 @@ extern unsigned struct_synth_info_sz;
 extern unsigned struct_vt_mode_sz;
 #endif // SANITIZER_LINUX
 
-#if SANITIZER_LINUX && !SANITIZER_ANDROID
+#if SANITIZER_LINUX && !SANITIZER_ANDROID && !SANITIZER_OHOS
 extern unsigned struct_ax25_parms_struct_sz;
 extern unsigned struct_input_keymap_entry_sz;
 extern unsigned struct_ipx_config_data_sz;
@@ -1059,7 +1065,7 @@ extern unsigned struct_audio_buf_info_sz;
 extern unsigned struct_ppp_stats_sz;
 #endif  // (SANITIZER_LINUX || SANITIZER_FREEBSD) && !SANITIZER_ANDROID
 
-#if !SANITIZER_ANDROID && !SANITIZER_APPLE
+#if !SANITIZER_ANDROID && !SANITIZER_APPLE && !SANITIZER_OHOS
 extern unsigned struct_sioc_sg_req_sz;
 extern unsigned struct_sioc_vif_req_sz;
 #endif
@@ -1114,7 +1120,7 @@ extern unsigned IOCTL_TIOCSETD;
 extern unsigned IOCTL_TIOCSPGRP;
 extern unsigned IOCTL_TIOCSTI;
 extern unsigned IOCTL_TIOCSWINSZ;
-#if SANITIZER_LINUX && !SANITIZER_ANDROID
+#if SANITIZER_LINUX && !SANITIZER_ANDROID && !SANITIZER_OHOS
 extern unsigned IOCTL_SIOCGETSGCNT;
 extern unsigned IOCTL_SIOCGETVIFCNT;
 #endif
@@ -1386,7 +1392,16 @@ extern unsigned IOCTL_VT_SETMODE;
 extern unsigned IOCTL_VT_WAITACTIVE;
 #endif  // SANITIZER_LINUX
 
-#if SANITIZER_LINUX && !SANITIZER_ANDROID
+#if SANITIZER_LINUX && !SANITIZER_ANDROID && !SANITIZER_OHOS
+extern unsigned IOCTL_CYGETDEFTHRESH;
+extern unsigned IOCTL_CYGETDEFTIMEOUT;
+extern unsigned IOCTL_CYGETMON;
+extern unsigned IOCTL_CYGETTHRESH;
+extern unsigned IOCTL_CYGETTIMEOUT;
+extern unsigned IOCTL_CYSETDEFTHRESH;
+extern unsigned IOCTL_CYSETDEFTIMEOUT;
+extern unsigned IOCTL_CYSETTHRESH;
+extern unsigned IOCTL_CYSETTIMEOUT;
 extern unsigned IOCTL_EQL_EMANCIPATE;
 extern unsigned IOCTL_EQL_ENSLAVE;
 extern unsigned IOCTL_EQL_GETMASTRCFG;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_posix.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_posix.cpp
index 7d7d57543199473cbeafc610af861b21445300df..f0ec3825180e3f05c85c819fae201cc4dc01dbbc 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_posix.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_posix.cpp
@@ -353,7 +353,7 @@ bool ShouldMockFailureToOpen(const char *path) {
          internal_strncmp(path, "/proc/", 6) == 0;
 }
 
-#if SANITIZER_LINUX && !SANITIZER_ANDROID && !SANITIZER_GO
+#if SANITIZER_LINUX && !SANITIZER_ANDROID && !SANITIZER_GO && !SANITIZER_OHOS
 int GetNamedMappingFd(const char *name, uptr size, int *flags) {
   if (!common_flags()->decorate_proc_maps || !name)
     return -1;
@@ -385,7 +385,7 @@ int GetNamedMappingFd(const char *name, uptr size, int *flags) {
 }
 #endif
 
-#if SANITIZER_ANDROID
+#if SANITIZER_ANDROID || SANITIZER_OHOS
 #define PR_SET_VMA 0x53564d41
 #define PR_SET_VMA_ANON_NAME 0
 void DecorateMapping(uptr addr, uptr size, const char *name) {
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp
index 9ffb36f812c45d178202b3edd8d1ec12e382250e..a9a7217418a1ef0dbf4dd4851ac5b97f84e1e46c 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp
@@ -191,7 +191,8 @@ void SetAlternateSignalStack() {
   CHECK_EQ(0, sigaltstack(nullptr, &oldstack));
   // If the alternate stack is already in place, do nothing.
   // Android always sets an alternate stack, but it's too small for us.
-  if (!SANITIZER_ANDROID && !(oldstack.ss_flags & SS_DISABLE)) return;
+  if (!SANITIZER_ANDROID && !(oldstack.ss_flags & SS_DISABLE))
+    return;
   // TODO(glider): the mapped stack should have the MAP_STACK flag in the
   // future. It is not required by man 2 sigaltstack now (they're using
   // malloc()).
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.cpp
index 3776e8c97057ebdf4b7d168fbb96228f1e949707..6f6467ede96fb2e1ac4a0fdddf757032ceff39da 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.cpp
@@ -27,7 +27,7 @@ struct StackDepotNode {
   u32 link;
   StackStore::Id store_id;
 
-  static const u32 kTabSizeLog = SANITIZER_ANDROID ? 16 : 20;
+  static const u32 kTabSizeLog = (SANITIZER_ANDROID || SANITIZER_OHOS) ? 16 : 20;
 
   typedef StackTrace args_type;
   bool eq(hash_type hash, const args_type &args) const {
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.h b/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.h
index 82cf7578d0fb9bd05d460b988447959dbdb008ce..bd11f50647bc916d7bb4a27bb16a112bf98a289c 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.h
@@ -31,7 +31,8 @@ struct StackDepotHandle {
   void inc_use_count_unsafe();
 };
 
-const int kStackDepotMaxUseCount = 1U << (SANITIZER_ANDROID ? 16 : 20);
+const int kStackDepotMaxUseCount =
+    1U << ((SANITIZER_ANDROID || SANITIZER_OHOS) ? 16 : 20);
 
 StackDepotStats StackDepotGetStats();
 u32 StackDepotPut(StackTrace stack);
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp
index 526a71c398260d647b30a0fb0188f01274145ae4..aa758d40398b8160fd791ea411d7df19deb49e5f 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp
@@ -34,6 +34,11 @@
 #if (defined(__aarch64__) || SANITIZER_RISCV64 || SANITIZER_LOONGARCH64) && \
      !SANITIZER_ANDROID
 // GLIBC 2.20+ sys/user does not include asm/ptrace.h
+#if SANITIZER_OHOS
+// Do not include asm/sigcontext.h on behalf of asm/ptrace.h
+// to avoid multiple definiton errors.
+#define __ASM_SIGCONTEXT_H 1
+#endif
 # include <asm/ptrace.h>
 #endif
 #include <sys/user.h>  // for user_regs_struct
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp
index ffbaf1468ec8ff1a896548bbf2ff381d7033aec1..49dda74b07abaea4c2276cb0ba2ae0c392a3fd12 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_report.cpp
@@ -133,7 +133,7 @@ void ReportErrorSummary(const char *error_type, const StackTrace *stack,
 }
 
 void ReportMmapWriteExec(int prot, int flags) {
-#if SANITIZER_POSIX && (!SANITIZER_GO && !SANITIZER_ANDROID)
+#if SANITIZER_POSIX && (!SANITIZER_GO && !SANITIZER_ANDROID) && !SANITIZER_OHOS
   int pflags = (PROT_WRITE | PROT_EXEC);
   if ((prot & pflags) != pflags)
     return;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_unwind_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_unwind_linux_libcdep.cpp
index 6a8e82e2e213c0d732bf289b071a93f96e5604e5..ef1fc87cf0dcad92be68f9c3905ee6eaa475f1be 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_unwind_linux_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_unwind_linux_libcdep.cpp
@@ -16,7 +16,7 @@
 #include "sanitizer_common.h"
 #include "sanitizer_stacktrace.h"
 
-#if SANITIZER_ANDROID
+#if SANITIZER_ANDROID || SANITIZER_OHOS
 #include <dlfcn.h>  // for dlopen()
 #endif
 
diff --git a/compiler-rt/lib/ubsan/ubsan_signals_standalone.cpp b/compiler-rt/lib/ubsan/ubsan_signals_standalone.cpp
index 68edd3a1b2062e3f9cff550ce974856183da0605..4f0d42773576b007f797b5ec13cc26934743569a 100644
--- a/compiler-rt/lib/ubsan/ubsan_signals_standalone.cpp
+++ b/compiler-rt/lib/ubsan/ubsan_signals_standalone.cpp
@@ -26,7 +26,7 @@
 // debuggerd handler, but before the ART handler.
 // * Interceptors don't work at all when ubsan runtime is loaded late, ex. when
 // it is part of an APK that does not use wrap.sh method.
-#if SANITIZER_FUCHSIA || SANITIZER_ANDROID
+#if SANITIZER_FUCHSIA || SANITIZER_ANDROID || SANITIZER_OHOS
 
 namespace __ubsan {
 void InitializeDeadlySignals() {}
diff --git a/compiler-rt/test/asan/TestCases/Linux/lit.local.cfg.py b/compiler-rt/test/asan/TestCases/Linux/lit.local.cfg.py
index 603ca0365068f5e6917664eff43452bb1dc856de..bc16a80f74d078420bd268dc3395f86d4065134e 100644
--- a/compiler-rt/test/asan/TestCases/Linux/lit.local.cfg.py
+++ b/compiler-rt/test/asan/TestCases/Linux/lit.local.cfg.py
@@ -6,5 +6,5 @@ def getRoot(config):
 
 root = getRoot(config)
 
-if root.host_os not in ["Linux"]:
+if root.host_os not in ["Linux", 'OHOS']:
     config.unsupported = True
diff --git a/compiler-rt/test/asan/TestCases/Linux/odr_c_test.c b/compiler-rt/test/asan/TestCases/Linux/odr_c_test.c
index 9929b4a67af38fdfd71e44aea17c754691665c3d..f5f2340288675702333a19c6ebf62c852eb69ef8 100644
--- a/compiler-rt/test/asan/TestCases/Linux/odr_c_test.c
+++ b/compiler-rt/test/asan/TestCases/Linux/odr_c_test.c
@@ -17,7 +17,7 @@ __attribute__((aligned(8))) int x;
 __attribute__((aligned(1))) char y;
 // The gold linker puts ZZZ at the start of bss (where it is aligned)
 // unless we have a large alternative like Displace:
-__attribute__((aligned(1))) char Displace[105];
+__attribute__((aligned(8))) char Displace[105];
 __attribute__((aligned(1))) char ZZZ[100];
 #elif defined(FILE2)
 int ZZZ = 1;
diff --git a/compiler-rt/test/asan/TestCases/Linux/shmctl.cpp b/compiler-rt/test/asan/TestCases/Linux/shmctl.cpp
index e1752bc894c063f3b998d81b90557fd5bd5ba4ed..8fed52092be8377e0f2c12d9414df41312e2ea55 100644
--- a/compiler-rt/test/asan/TestCases/Linux/shmctl.cpp
+++ b/compiler-rt/test/asan/TestCases/Linux/shmctl.cpp
@@ -1,5 +1,5 @@
 // FIXME: https://code.google.com/p/address-sanitizer/issues/detail?id=316
-// XFAIL: android
+// XFAIL: android && !ohos_family
 //
 // RUN: %clangxx_asan -O1 %s -o %t && %run %t 2>&1
 // Regression test for
@@ -22,6 +22,6 @@ int main() {
   struct shm_info shmInfo;
   res = shmctl(0, SHM_INFO, (struct shmid_ds *)&shmInfo);
   assert(res > -1);
-  
+
   return 0;
 }
diff --git a/compiler-rt/test/asan/lit.cfg.py b/compiler-rt/test/asan/lit.cfg.py
index 83b3cbe789caccfab03ef84511cfa7a34cea6769..799975a2316c964aec838a614e6b382dc6deaeef 100644
--- a/compiler-rt/test/asan/lit.cfg.py
+++ b/compiler-rt/test/asan/lit.cfg.py
@@ -283,8 +283,8 @@ else:
     config.substitutions.append(("%pie", "-pie"))
 
 # Only run the tests on supported OSs.
-if config.host_os not in ["Linux", "Darwin", "FreeBSD", "SunOS", "Windows", "NetBSD"]:
-    config.unsupported = True
+if config.host_os not in ['OHOS', 'Linux', 'Darwin', 'FreeBSD', 'SunOS', 'Windows', 'NetBSD']:
+  config.unsupported = True
 
 if not config.parallelism_group:
     config.parallelism_group = "shadow-memory"
diff --git a/compiler-rt/test/builtins/Unit/divtf3_test.c b/compiler-rt/test/builtins/Unit/divtf3_test.c
index d46fcc0477693819bc6c1bbd16b05a70564344f3..59d983548fc67bf379eb849f79688d03679bf13b 100644
--- a/compiler-rt/test/builtins/Unit/divtf3_test.c
+++ b/compiler-rt/test/builtins/Unit/divtf3_test.c
@@ -156,6 +156,11 @@ int main() {
                    UINT64_C(0x0001000000000000), UINT64_C(0)))
     return 1;
 
+    if (test__divtf3(-1L,
+                     -0.999999999999999999999999999999999904L,
+                     UINT64_C(0x1),
+                     UINT64_C(0x3fff000000000000)))
+        return 1;
 #else
   printf("skipped\n");
 
diff --git a/compiler-rt/test/lit.common.cfg.py b/compiler-rt/test/lit.common.cfg.py
index d4b1e1d71d3c54657c0fa6c4bd74785b1328e4ca..e0ef79a5a3167ec0cc02b0f6c594697a43ba5f26 100644
--- a/compiler-rt/test/lit.common.cfg.py
+++ b/compiler-rt/test/lit.common.cfg.py
@@ -111,6 +111,9 @@ def push_dynamic_library_lookup_path(config, new_path):
         config.environment[dynamic_library_lookup_var] = new_ld_library_path_64
 
 
+def is_ohos_family_mobile():
+  return config.ohos_family and config.target_arch != 'x86_64'
+
 # Choose between lit's internal shell pipeline runner and a real shell.  If
 # LIT_USE_INTERNAL_SHELL is in the environment, we use that as an override.
 use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
diff --git a/compiler-rt/test/lit.common.configured.in b/compiler-rt/test/lit.common.configured.in
index f7276627995520a97724a4fce7928b58b78d851b..5ca6bd0a0de3d1630919ffd7cbdcd29612f52f0f 100644
--- a/compiler-rt/test/lit.common.configured.in
+++ b/compiler-rt/test/lit.common.configured.in
@@ -43,6 +43,7 @@ set_default("use_lld", @COMPILER_RT_TEST_USE_LLD_PYBOOL@)
 set_default("use_thinlto", False)
 set_default("use_lto", config.use_thinlto)
 set_default("android", @ANDROID_PYBOOL@)
+set_default("ohos_family", @OHOS_FAMILY_PYBOOL@)
 set_default("android_api_level", "@ANDROID_API_LEVEL@")
 set_default("android_serial", "@ANDROID_SERIAL_FOR_TESTING@")
 set_default("android_files_to_push", [])
diff --git a/compiler-rt/test/sanitizer_common/ohos_family_commands/ohos_common.py b/compiler-rt/test/sanitizer_common/ohos_family_commands/ohos_common.py
new file mode 100644
index 0000000000000000000000000000000000000000..da6e6b9d2fd43e020e8d3946a0a84f236090d0be
--- /dev/null
+++ b/compiler-rt/test/sanitizer_common/ohos_family_commands/ohos_common.py
@@ -0,0 +1,45 @@
+import os, subprocess, tempfile
+
+HOS_TMPDIR = '/data/local/tmp/Output'
+ADB = os.environ.get('ADB', 'adb')
+
+verbose = False
+if os.environ.get('HOS_RUN_VERBOSE') == '1':
+    verbose = True
+
+def host_to_device_path(path):
+    rel = os.path.relpath(path, "/")
+    dev = os.path.join(HOS_TMPDIR, rel)
+    return dev
+
+def adb(args, attempts = 1):
+    if verbose:
+        print args
+    tmpname = tempfile.mktemp()
+    out = open(tmpname, 'w')
+    ret = 255
+    while attempts > 0 and ret != 0:
+      attempts -= 1
+      ret = subprocess.call([ADB] + args, stdout=out, stderr=subprocess.STDOUT)
+      if attempts != 0:
+        ret = 5
+    if ret != 0:
+      print "adb command failed", args
+      print tmpname
+      out.close()
+      out = open(tmpname, 'r')
+      print out.read()
+    out.close()
+    os.unlink(tmpname)
+    return ret
+
+def pull_from_device(path):
+    tmp = tempfile.mktemp()
+    adb(['pull', path, tmp], 5)
+    text = open(tmp, 'r').read()
+    os.unlink(tmp)
+    return text
+
+def push_to_device(path):
+    dst_path = host_to_device_path(path)
+    adb(['push', path, dst_path], 5)
diff --git a/flang/include/flang/Lower/EnvironmentDefault.h b/flang/include/flang/Lower/EnvironmentDefault.h
old mode 100644
new mode 100755
diff --git a/flang/include/flang/Optimizer/Builder/Runtime/EnvironmentDefaults.h b/flang/include/flang/Optimizer/Builder/Runtime/EnvironmentDefaults.h
old mode 100644
new mode 100755
diff --git a/flang/lib/Optimizer/Builder/Runtime/EnvironmentDefaults.cpp b/flang/lib/Optimizer/Builder/Runtime/EnvironmentDefaults.cpp
old mode 100644
new mode 100755
diff --git a/flang/runtime/environment-default-list.h b/flang/runtime/environment-default-list.h
old mode 100644
new mode 100755
diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-as b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-as
old mode 100644
new mode 100755
diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld
old mode 100644
new mode 100755
diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld.bfd b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld.bfd
old mode 100644
new mode 100755
diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld.gold b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld.gold
old mode 100644
new mode 100755
diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-as b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-as
old mode 100644
new mode 100755
diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld
old mode 100644
new mode 100755
diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld.bfd b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld.bfd
old mode 100644
new mode 100755
diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld.gold b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld.gold
old mode 100644
new mode 100755
diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/as b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/as
old mode 100644
new mode 100755
diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld
old mode 100644
new mode 100755
diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld.bfd b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld.bfd
old mode 100644
new mode 100755
diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld.gold b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld.gold
old mode 100644
new mode 100755
diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/as b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/as
old mode 100644
new mode 100755
diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld
old mode 100644
new mode 100755
diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld.bfd b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld.bfd
old mode 100644
new mode 100755
diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld.gold b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld.gold
old mode 100644
new mode 100755
diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld.lld b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld.lld
old mode 100644
new mode 100755
diff --git a/flang/test/Driver/convert.f90 b/flang/test/Driver/convert.f90
old mode 100644
new mode 100755
diff --git a/flang/test/Driver/msvc-dependent-lib-flags.f90 b/flang/test/Driver/msvc-dependent-lib-flags.f90
index 1b7ecb604ad67d368dbd88da6ac00b628b3fd8ab..765917f07d8e72355166073dd19c2d1d6aed6200 100644
--- a/flang/test/Driver/msvc-dependent-lib-flags.f90
+++ b/flang/test/Driver/msvc-dependent-lib-flags.f90
@@ -1,36 +1,36 @@
-! RUN: %flang -### --target=aarch64-windows-msvc -resource-dir=%S/Inputs/resource_dir %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC
-! RUN: %flang -### --target=aarch64-windows-msvc -resource-dir=%S/Inputs/resource_dir -fms-runtime-lib=static_dbg %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC-DEBUG
-! RUN: %flang -### --target=aarch64-windows-msvc -resource-dir=%S/Inputs/resource_dir -fms-runtime-lib=dll %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC-DLL
-! RUN: %flang -### --target=aarch64-windows-msvc -resource-dir=%S/Inputs/resource_dir -fms-runtime-lib=dll_dbg %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC-DLL-DEBUG
-
-! MSVC: -fc1
-! MSVC-SAME: --dependent-lib=clang_rt.builtins.lib
-! MSVC-SAME: -D_MT
-! MSVC-SAME: --dependent-lib=libcmt
-! MSVC-SAME: --dependent-lib=FortranRuntime.static.lib
-! MSVC-SAME: --dependent-lib=FortranDecimal.static.lib
-
-! MSVC-DEBUG: -fc1
-! MSVC-DEBUG-SAME: --dependent-lib=clang_rt.builtins.lib
-! MSVC-DEBUG-SAME: -D_MT
-! MSVC-DEBUG-SAME: -D_DEBUG
-! MSVC-DEBUG-SAME: --dependent-lib=libcmtd
-! MSVC-DEBUG-SAME: --dependent-lib=FortranRuntime.static_dbg.lib
-! MSVC-DEBUG-SAME: --dependent-lib=FortranDecimal.static_dbg.lib
-
-! MSVC-DLL: -fc1
-! MSVC-DLL-SAME: --dependent-lib=clang_rt.builtins.lib
-! MSVC-DLL-SAME: -D_MT
-! MSVC-DLL-SAME: -D_DLL
-! MSVC-DLL-SAME: --dependent-lib=msvcrt
-! MSVC-DLL-SAME: --dependent-lib=FortranRuntime.dynamic.lib
-! MSVC-DLL-SAME: --dependent-lib=FortranDecimal.dynamic.lib
-
-! MSVC-DLL-DEBUG: -fc1
-! MSVC-DLL-DEBUG-SAME: --dependent-lib=clang_rt.builtins.lib
-! MSVC-DLL-DEBUG-SAME: -D_MT
-! MSVC-DLL-DEBUG-SAME: -D_DEBUG
-! MSVC-DLL-DEBUG-SAME: -D_DLL
-! MSVC-DLL-DEBUG-SAME: --dependent-lib=msvcrtd
-! MSVC-DLL-DEBUG-SAME: --dependent-lib=FortranRuntime.dynamic_dbg.lib
-! MSVC-DLL-DEBUG-SAME: --dependent-lib=FortranDecimal.dynamic_dbg.lib
+! RUN: %flang -### --target=aarch64-windows-msvc -resource-dir=%S/Inputs/resource_dir %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC
+! RUN: %flang -### --target=aarch64-windows-msvc -resource-dir=%S/Inputs/resource_dir -fms-runtime-lib=static_dbg %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC-DEBUG
+! RUN: %flang -### --target=aarch64-windows-msvc -resource-dir=%S/Inputs/resource_dir -fms-runtime-lib=dll %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC-DLL
+! RUN: %flang -### --target=aarch64-windows-msvc -resource-dir=%S/Inputs/resource_dir -fms-runtime-lib=dll_dbg %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC-DLL-DEBUG
+
+! MSVC: -fc1
+! MSVC-SAME: --dependent-lib=clang_rt.builtins.lib
+! MSVC-SAME: -D_MT
+! MSVC-SAME: --dependent-lib=libcmt
+! MSVC-SAME: --dependent-lib=FortranRuntime.static.lib
+! MSVC-SAME: --dependent-lib=FortranDecimal.static.lib
+
+! MSVC-DEBUG: -fc1
+! MSVC-DEBUG-SAME: --dependent-lib=clang_rt.builtins.lib
+! MSVC-DEBUG-SAME: -D_MT
+! MSVC-DEBUG-SAME: -D_DEBUG
+! MSVC-DEBUG-SAME: --dependent-lib=libcmtd
+! MSVC-DEBUG-SAME: --dependent-lib=FortranRuntime.static_dbg.lib
+! MSVC-DEBUG-SAME: --dependent-lib=FortranDecimal.static_dbg.lib
+
+! MSVC-DLL: -fc1
+! MSVC-DLL-SAME: --dependent-lib=clang_rt.builtins.lib
+! MSVC-DLL-SAME: -D_MT
+! MSVC-DLL-SAME: -D_DLL
+! MSVC-DLL-SAME: --dependent-lib=msvcrt
+! MSVC-DLL-SAME: --dependent-lib=FortranRuntime.dynamic.lib
+! MSVC-DLL-SAME: --dependent-lib=FortranDecimal.dynamic.lib
+
+! MSVC-DLL-DEBUG: -fc1
+! MSVC-DLL-DEBUG-SAME: --dependent-lib=clang_rt.builtins.lib
+! MSVC-DLL-DEBUG-SAME: -D_MT
+! MSVC-DLL-DEBUG-SAME: -D_DEBUG
+! MSVC-DLL-DEBUG-SAME: -D_DLL
+! MSVC-DLL-DEBUG-SAME: --dependent-lib=msvcrtd
+! MSVC-DLL-DEBUG-SAME: --dependent-lib=FortranRuntime.dynamic_dbg.lib
+! MSVC-DLL-DEBUG-SAME: --dependent-lib=FortranDecimal.dynamic_dbg.lib
diff --git a/flang/test/Lower/convert.f90 b/flang/test/Lower/convert.f90
old mode 100644
new mode 100755
diff --git a/flang/test/Lower/environment-defaults.f90 b/flang/test/Lower/environment-defaults.f90
old mode 100644
new mode 100755
diff --git a/flang/test/Lower/io-statement-open-options.f90 b/flang/test/Lower/io-statement-open-options.f90
old mode 100644
new mode 100755
diff --git a/flang/test/Lower/pointer-args-callee.f90 b/flang/test/Lower/pointer-args-callee.f90
old mode 100644
new mode 100755
diff --git a/flang/test/Lower/pre-fir-tree08.f b/flang/test/Lower/pre-fir-tree08.f
old mode 100644
new mode 100755
diff --git a/libc/src/__support/CPP/Array.h b/libc/src/__support/CPP/array.h
similarity index 100%
rename from libc/src/__support/CPP/Array.h
rename to libc/src/__support/CPP/array.h
diff --git a/libc/src/__support/CPP/Bit.h b/libc/src/__support/CPP/bit.h
similarity index 100%
rename from libc/src/__support/CPP/Bit.h
rename to libc/src/__support/CPP/bit.h
diff --git a/libc/src/__support/CPP/Bitset.h b/libc/src/__support/CPP/bitset.h
similarity index 100%
rename from libc/src/__support/CPP/Bitset.h
rename to libc/src/__support/CPP/bitset.h
diff --git a/libc/src/__support/CPP/Functional.h b/libc/src/__support/CPP/functional.h
similarity index 100%
rename from libc/src/__support/CPP/Functional.h
rename to libc/src/__support/CPP/functional.h
diff --git a/libc/src/__support/CPP/Limits.h b/libc/src/__support/CPP/limits.h
similarity index 100%
rename from libc/src/__support/CPP/Limits.h
rename to libc/src/__support/CPP/limits.h
diff --git a/libc/src/__support/CPP/Utility.h b/libc/src/__support/CPP/utility.h
similarity index 100%
rename from libc/src/__support/CPP/Utility.h
rename to libc/src/__support/CPP/utility.h
diff --git a/libc/utils/docgen/docgen.py b/libc/utils/docgen/docgen.py
old mode 100644
new mode 100755
diff --git a/libcxx/test/libcxx/transitive_includes_to_csv.py b/libcxx/test/libcxx/transitive_includes_to_csv.py
old mode 100644
new mode 100755
diff --git a/libcxx/utils/adb_run.py b/libcxx/utils/adb_run.py
old mode 100644
new mode 100755
diff --git a/libcxx/utils/ci/build-picolibc.sh b/libcxx/utils/ci/build-picolibc.sh
old mode 100644
new mode 100755
diff --git a/libcxx/utils/ci/vendor/android/build-emulator-images.sh b/libcxx/utils/ci/vendor/android/build-emulator-images.sh
old mode 100644
new mode 100755
diff --git a/libcxx/utils/ci/vendor/android/container-setup.sh b/libcxx/utils/ci/vendor/android/container-setup.sh
old mode 100644
new mode 100755
diff --git a/libcxx/utils/ci/vendor/android/emulator-entrypoint.sh b/libcxx/utils/ci/vendor/android/emulator-entrypoint.sh
old mode 100644
new mode 100755
diff --git a/libcxx/utils/ci/vendor/android/emulator-wait-for-ready.sh b/libcxx/utils/ci/vendor/android/emulator-wait-for-ready.sh
old mode 100644
new mode 100755
diff --git a/libcxx/utils/ci/vendor/android/run-buildbot-container b/libcxx/utils/ci/vendor/android/run-buildbot-container
old mode 100644
new mode 100755
diff --git a/libcxx/utils/ci/vendor/android/start-emulator.sh b/libcxx/utils/ci/vendor/android/start-emulator.sh
old mode 100644
new mode 100755
diff --git a/libcxx/utils/ci/vendor/android/stop-emulator.sh b/libcxx/utils/ci/vendor/android/stop-emulator.sh
old mode 100644
new mode 100755
diff --git a/libcxx/utils/clang-format-merge-driver.sh b/libcxx/utils/clang-format-merge-driver.sh
old mode 100644
new mode 100755
diff --git a/libcxx/utils/generate_escaped_output_table.py b/libcxx/utils/generate_escaped_output_table.py
old mode 100644
new mode 100755
diff --git a/libcxx/utils/generate_indic_conjunct_break_table.py b/libcxx/utils/generate_indic_conjunct_break_table.py
old mode 100644
new mode 100755
diff --git a/libcxx/utils/libcxx-lit b/libcxx/utils/libcxx-lit
old mode 100644
new mode 100755
diff --git a/libcxx/utils/qemu_baremetal.py b/libcxx/utils/qemu_baremetal.py
old mode 100644
new mode 100755
diff --git a/libcxx/utils/zos_rename_dll_side_deck.sh b/libcxx/utils/zos_rename_dll_side_deck.sh
old mode 100644
new mode 100755
diff --git a/lld/test/COFF/Inputs/cl-gl.lib b/lld/test/COFF/Inputs/cl-gl.lib
old mode 100644
new mode 100755
diff --git a/lldb/scripts/generate-sbapi-dwarf-enum.py b/lldb/scripts/generate-sbapi-dwarf-enum.py
old mode 100644
new mode 100755
diff --git a/lldb/scripts/lldb-test-qemu/run-qemu.sh b/lldb/scripts/lldb-test-qemu/run-qemu.sh
old mode 100644
new mode 100755
diff --git a/lldb/source/Plugins/ABI/RISCV/CMakeLists.txt b/lldb/source/Plugins/ABI/RISCV/CMakeLists.txt
old mode 100644
new mode 100755
diff --git a/lldb/test/API/commands/trace/inline-function/a.out b/lldb/test/API/commands/trace/inline-function/a.out
old mode 100644
new mode 100755
diff --git a/lldb/test/API/functionalities/postmortem/elf-core/linux-riscv64.out b/lldb/test/API/functionalities/postmortem/elf-core/linux-riscv64.out
old mode 100644
new mode 100755
diff --git a/lldb/test/API/functionalities/target-new-solib-notifications/Makefile b/lldb/test/API/functionalities/target-new-solib-notifications/Makefile
index 680f82b218134b9a9e31fbd390ccb4f7ff2ff393..6c61d210eeb2f3f55fbfa16e831e9e537182f023 100644
--- a/lldb/test/API/functionalities/target-new-solib-notifications/Makefile
+++ b/lldb/test/API/functionalities/target-new-solib-notifications/Makefile
@@ -1,23 +1,23 @@
-CXX_SOURCES := main.cpp
+CXX_SOURCES := main.cpp
 LD_EXTRAS := -L. -l_d -l_c -l_a -l_b
-
-a.out: lib_b lib_a lib_c lib_d
-
-include Makefile.rules
-
-lib_a: lib_b
-	$(MAKE) -f $(MAKEFILE_RULES) \
+
+a.out: lib_b lib_a lib_c lib_d
+
+include Makefile.rules
+
+lib_a: lib_b
+	$(MAKE) -f $(MAKEFILE_RULES) \
 		DYLIB_ONLY=YES DYLIB_CXX_SOURCES=a.cpp DYLIB_NAME=_a \
 		LD_EXTRAS="-L. -l_b"
-
-lib_b:
-	$(MAKE) -f $(MAKEFILE_RULES) \
+
+lib_b:
+	$(MAKE) -f $(MAKEFILE_RULES) \
 		DYLIB_ONLY=YES DYLIB_CXX_SOURCES=b.cpp DYLIB_NAME=_b
-
-lib_c:
-	$(MAKE) -f $(MAKEFILE_RULES) \
+
+lib_c:
+	$(MAKE) -f $(MAKEFILE_RULES) \
 		DYLIB_ONLY=YES DYLIB_CXX_SOURCES=c.cpp DYLIB_NAME=_c
-
-lib_d:
-	$(MAKE) -f $(MAKEFILE_RULES) \
+
+lib_d:
+	$(MAKE) -f $(MAKEFILE_RULES) \
 		DYLIB_ONLY=YES DYLIB_CXX_SOURCES=d.cpp DYLIB_NAME=_d
diff --git a/lldb/test/API/functionalities/target-new-solib-notifications/a.cpp b/lldb/test/API/functionalities/target-new-solib-notifications/a.cpp
index 66633b70ee1e50793333cc3f08b33208f3cbef08..778b46ed5cef1a0fa1751f2cd9e137ddaf1918fa 100644
--- a/lldb/test/API/functionalities/target-new-solib-notifications/a.cpp
+++ b/lldb/test/API/functionalities/target-new-solib-notifications/a.cpp
@@ -1,3 +1,3 @@
-extern "C" int b_function();
-
-extern "C" int a_function() { return b_function(); }
+extern "C" int b_function();
+
+extern "C" int a_function() { return b_function(); }
diff --git a/lldb/test/API/functionalities/target-new-solib-notifications/b.cpp b/lldb/test/API/functionalities/target-new-solib-notifications/b.cpp
index 8b16fbdb5728cde6c931111791513e01ea20798b..4f1a4032ee0eedd9298bac3823d046c52a4ecf9e 100644
--- a/lldb/test/API/functionalities/target-new-solib-notifications/b.cpp
+++ b/lldb/test/API/functionalities/target-new-solib-notifications/b.cpp
@@ -1 +1 @@
-extern "C" int b_function() { return 500; }
+extern "C" int b_function() { return 500; }
diff --git a/lldb/test/API/functionalities/target-new-solib-notifications/c.cpp b/lldb/test/API/functionalities/target-new-solib-notifications/c.cpp
index 120c88f2bb609a8498bd05b8411c2be211140870..8abd1b155a75904856db20904cd7551921142bfa 100644
--- a/lldb/test/API/functionalities/target-new-solib-notifications/c.cpp
+++ b/lldb/test/API/functionalities/target-new-solib-notifications/c.cpp
@@ -1 +1 @@
-extern "C" int c_function() { return 600; }
+extern "C" int c_function() { return 600; }
diff --git a/lldb/test/API/functionalities/target-new-solib-notifications/d.cpp b/lldb/test/API/functionalities/target-new-solib-notifications/d.cpp
index d37ad2621ae4e99790c6d6e33cd357e9fef42d6f..58888a29ba323aad0908b6bf03cb90821528e8bd 100644
--- a/lldb/test/API/functionalities/target-new-solib-notifications/d.cpp
+++ b/lldb/test/API/functionalities/target-new-solib-notifications/d.cpp
@@ -1 +1 @@
-extern "C" int d_function() { return 700; }
+extern "C" int d_function() { return 700; }
diff --git a/lldb/test/API/functionalities/target-new-solib-notifications/main.cpp b/lldb/test/API/functionalities/target-new-solib-notifications/main.cpp
index bd2c79cdab9daa6cc1d9de5103651e3aac20063d..77b38c5ccdc6985612733b1d72cee1bea9427314 100644
--- a/lldb/test/API/functionalities/target-new-solib-notifications/main.cpp
+++ b/lldb/test/API/functionalities/target-new-solib-notifications/main.cpp
@@ -1,16 +1,16 @@
-#include <stdio.h>
-
-extern "C" int a_function();
-extern "C" int c_function();
-extern "C" int b_function();
-extern "C" int d_function();
-
-int main() {
-  a_function();
-  b_function();
-  c_function();
-  d_function();
-
-  puts("running"); // breakpoint here
-  return 0;
-}
+#include <stdio.h>
+
+extern "C" int a_function();
+extern "C" int c_function();
+extern "C" int b_function();
+extern "C" int d_function();
+
+int main() {
+  a_function();
+  b_function();
+  c_function();
+  d_function();
+
+  puts("running"); // breakpoint here
+  return 0;
+}
diff --git a/lldb/test/API/tools/lldb-dap/coreFile/linux-x86_64.out b/lldb/test/API/tools/lldb-dap/coreFile/linux-x86_64.out
old mode 100644
new mode 100755
diff --git a/lldb/tools/lldb-vscode b/lldb/tools/lldb-vscode
deleted file mode 100644
index 46b40044086c9b68be267b12ab9b20f0e4e64c52..0000000000000000000000000000000000000000
--- a/lldb/tools/lldb-vscode
+++ /dev/null
@@ -1 +0,0 @@
-lldb-dap
\ No newline at end of file
diff --git a/lldb/tools/lldb-vscode b/lldb/tools/lldb-vscode
new file mode 120000
index 0000000000000000000000000000000000000000..46b40044086c9b68be267b12ab9b20f0e4e64c52
--- /dev/null
+++ b/lldb/tools/lldb-vscode
@@ -0,0 +1 @@
+lldb-dap
\ No newline at end of file
diff --git a/llvm-19.1.7.tar.gz.aa b/llvm-19.1.7.tar.gz.aa
new file mode 100644
index 0000000000000000000000000000000000000000..6cd75546a74260db1586be10f7c2602133e315dc
--- /dev/null
+++ b/llvm-19.1.7.tar.gz.aa
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3dae36a25f12f73c7bb3e69dd00ca4c67698d0866f327af033484cca1f902639
+size 838860800
diff --git a/llvm-19.1.7.tar.gz.ab b/llvm-19.1.7.tar.gz.ab
new file mode 100644
index 0000000000000000000000000000000000000000..ce407803f121716ba52b1aac09257c9478ee192a
--- /dev/null
+++ b/llvm-19.1.7.tar.gz.ab
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e357f1e90193e823371947eb77ad0676a066edd3b21b575f6a64e3e6f6c33337
+size 838860800
diff --git a/llvm-19.1.7.tar.gz.ac b/llvm-19.1.7.tar.gz.ac
new file mode 100644
index 0000000000000000000000000000000000000000..7fa27984a4a1936e66229855ee38d1c45c9f73ea
--- /dev/null
+++ b/llvm-19.1.7.tar.gz.ac
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:15ed8418b026b17c1694d34ecb812fe123b87b3aaf84a8909b834bac5b21e496
+size 838860800
diff --git a/llvm-19.1.7.tar.gz.ad b/llvm-19.1.7.tar.gz.ad
new file mode 100644
index 0000000000000000000000000000000000000000..c60785185a7de3dd35540554d20c9de7b2324fe0
--- /dev/null
+++ b/llvm-19.1.7.tar.gz.ad
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bc9a1d06b3470e373bed82da37509b717534c29bc36f3c66ee30479c589cf6cb
+size 28966004
diff --git a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp
similarity index 100%
rename from llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
rename to llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp
diff --git a/llvm/lib/Support/rpmalloc/CACHE.md b/llvm/lib/Support/rpmalloc/CACHE.md
index 645093026debf17a312802ab367f4b12e167200f..052320baf532757e3856b08e0cd2bee29f3e86df 100644
--- a/llvm/lib/Support/rpmalloc/CACHE.md
+++ b/llvm/lib/Support/rpmalloc/CACHE.md
@@ -1,19 +1,19 @@
-# Thread caches
-rpmalloc has a thread cache of free memory blocks which can be used in allocations without interfering with other threads or going to system to map more memory, as well as a global cache shared by all threads to let spans of memory pages flow between threads. Configuring the size of these caches can be crucial to obtaining good performance while minimizing memory overhead blowup. Below is a simple case study using the benchmark tool to compare different thread cache configurations for rpmalloc.
-
-The rpmalloc thread cache is configured to be unlimited, performance oriented as meaning default values, size oriented where both thread cache and global cache is reduced significantly, or disabled where both thread and global caches are disabled and completely free pages are directly unmapped.
-
-The benchmark is configured to run threads allocating 150000 blocks distributed in the `[16, 16000]` bytes range with a linear falloff probability. It runs 1000 loops, and every iteration 75000 blocks (50%) are freed and allocated in a scattered pattern. There are no cross thread allocations/deallocations. Parameters: `benchmark n 0 0 0 1000 150000 75000 16 16000`. The benchmarks are run on an Ubuntu 16.10 machine with 8 cores (4 physical, HT) and 12GiB RAM.
-
-The benchmark also includes results for the standard library malloc implementation as a reference for comparison with the nocache setting.
-
-![Ubuntu 16.10 random [16, 16000] bytes, 8 cores](https://docs.google.com/spreadsheets/d/1NWNuar1z0uPCB5iVS_Cs6hSo2xPkTmZf0KsgWS_Fb_4/pubchart?oid=387883204&format=image)
-![Ubuntu 16.10 random [16, 16000] bytes, 8 cores](https://docs.google.com/spreadsheets/d/1NWNuar1z0uPCB5iVS_Cs6hSo2xPkTmZf0KsgWS_Fb_4/pubchart?oid=1644710241&format=image)
-
-For single threaded case the unlimited cache and performance oriented cache settings have identical performance and memory overhead, indicating that the memory pages fit in the combined thread and global cache. As number of threads increase to 2-4 threads, the performance settings have slightly higher performance which can seem odd at first, but can be explained by low contention on the global cache where some memory pages can flow between threads without stalling, reducing the overall number of calls to map new memory pages (also indicated by the slightly lower memory overhead). 
-
-As threads increase even more to 5-10 threads, the increased contention and eventual limit of global cache cause the unlimited setting to gain a slight advantage in performance. As expected the memory overhead remains constant for unlimited caches, while going down for performance setting when number of threads increases.
-
-The size oriented setting maintain good performance compared to the standard library while reducing the memory overhead compared to the performance setting with a decent amount.
-
-The nocache setting still outperforms the reference standard library allocator for workloads up to 6 threads while maintaining a near zero memory overhead, which is even slightly lower than the standard library. For use case scenarios where number of allocation of each size class is lower the overhead in rpmalloc from the 64KiB span size will of course increase.
+# Thread caches
+rpmalloc has a thread cache of free memory blocks which can be used in allocations without interfering with other threads or going to system to map more memory, as well as a global cache shared by all threads to let spans of memory pages flow between threads. Configuring the size of these caches can be crucial to obtaining good performance while minimizing memory overhead blowup. Below is a simple case study using the benchmark tool to compare different thread cache configurations for rpmalloc.
+
+The rpmalloc thread cache is configured to be unlimited, performance oriented as meaning default values, size oriented where both thread cache and global cache is reduced significantly, or disabled where both thread and global caches are disabled and completely free pages are directly unmapped.
+
+The benchmark is configured to run threads allocating 150000 blocks distributed in the `[16, 16000]` bytes range with a linear falloff probability. It runs 1000 loops, and every iteration 75000 blocks (50%) are freed and allocated in a scattered pattern. There are no cross thread allocations/deallocations. Parameters: `benchmark n 0 0 0 1000 150000 75000 16 16000`. The benchmarks are run on an Ubuntu 16.10 machine with 8 cores (4 physical, HT) and 12GiB RAM.
+
+The benchmark also includes results for the standard library malloc implementation as a reference for comparison with the nocache setting.
+
+![Ubuntu 16.10 random [16, 16000] bytes, 8 cores](https://docs.google.com/spreadsheets/d/1NWNuar1z0uPCB5iVS_Cs6hSo2xPkTmZf0KsgWS_Fb_4/pubchart?oid=387883204&format=image)
+![Ubuntu 16.10 random [16, 16000] bytes, 8 cores](https://docs.google.com/spreadsheets/d/1NWNuar1z0uPCB5iVS_Cs6hSo2xPkTmZf0KsgWS_Fb_4/pubchart?oid=1644710241&format=image)
+
+For single threaded case the unlimited cache and performance oriented cache settings have identical performance and memory overhead, indicating that the memory pages fit in the combined thread and global cache. As number of threads increase to 2-4 threads, the performance settings have slightly higher performance which can seem odd at first, but can be explained by low contention on the global cache where some memory pages can flow between threads without stalling, reducing the overall number of calls to map new memory pages (also indicated by the slightly lower memory overhead). 
+
+As threads increase even more to 5-10 threads, the increased contention and eventual limit of global cache cause the unlimited setting to gain a slight advantage in performance. As expected the memory overhead remains constant for unlimited caches, while going down for performance setting when number of threads increases.
+
+The size oriented setting maintain good performance compared to the standard library while reducing the memory overhead compared to the performance setting with a decent amount.
+
+The nocache setting still outperforms the reference standard library allocator for workloads up to 6 threads while maintaining a near zero memory overhead, which is even slightly lower than the standard library. For use case scenarios where number of allocation of each size class is lower the overhead in rpmalloc from the 64KiB span size will of course increase.
diff --git a/llvm/lib/Support/rpmalloc/README.md b/llvm/lib/Support/rpmalloc/README.md
index 2233df9da42d524c90728038b3961f6e7ebaeb26..916bca0118d868695c154de526944cb6353deee8 100644
--- a/llvm/lib/Support/rpmalloc/README.md
+++ b/llvm/lib/Support/rpmalloc/README.md
@@ -1,220 +1,220 @@
-# rpmalloc - General Purpose Memory Allocator
-This library provides a cross platform lock free thread caching 16-byte aligned memory allocator implemented in C.
-This is a fork of rpmalloc 1.4.5.
-
-Platforms currently supported:
-
-- Windows
-- MacOS
-- iOS
-- Linux
-- Android
-- Haiku
-
-The code should be easily portable to any platform with atomic operations and an mmap-style virtual memory management API. The API used to map/unmap memory pages can be configured in runtime to a custom implementation and mapping granularity/size.
-
-This library is put in the public domain; you can redistribute it and/or modify it without any restrictions. Or, if you choose, you can use it under the MIT license.
-
-# Performance
-We believe rpmalloc is faster than most popular memory allocators like tcmalloc, hoard, ptmalloc3 and others without causing extra allocated memory overhead in the thread caches compared to these allocators. We also believe the implementation to be easier to read and modify compared to these allocators, as it is a single source file of ~3000 lines of C code. All allocations have a natural 16-byte alignment.
-
-Contained in a parallel repository is a benchmark utility that performs interleaved unaligned allocations and deallocations (both in-thread and cross-thread) in multiple threads. It measures number of memory operations performed per CPU second, as well as memory overhead by comparing the virtual memory mapped with the number of bytes requested in allocation calls. The setup of number of thread, cross-thread deallocation rate and allocation size limits is configured by command line arguments.
-
-https://github.com/mjansson/rpmalloc-benchmark
-
-Below is an example performance comparison chart of rpmalloc and other popular allocator implementations, with default configurations used.
-
-![Ubuntu 16.10, random [16, 8000] bytes, 8 cores](https://docs.google.com/spreadsheets/d/1NWNuar1z0uPCB5iVS_Cs6hSo2xPkTmZf0KsgWS_Fb_4/pubchart?oid=301017877&format=image)
-
-The benchmark producing these numbers were run on an Ubuntu 16.10 machine with 8 logical cores (4 physical, HT). The actual numbers are not to be interpreted as absolute performance figures, but rather as relative comparisons between the different allocators. For additional benchmark results, see the [BENCHMARKS](BENCHMARKS.md) file.
-
-Configuration of the thread and global caches can be important depending on your use pattern. See [CACHE](CACHE.md) for a case study and some comments/guidelines.
-
-# Required functions
-
-Before calling any other function in the API, you __MUST__ call the initialization function, either __rpmalloc_initialize__ or __rpmalloc_initialize_config__, or you will get undefined behaviour when calling other rpmalloc entry point.
-
-Before terminating your use of the allocator, you __SHOULD__ call __rpmalloc_finalize__ in order to release caches and unmap virtual memory, as well as prepare the allocator for global scope cleanup at process exit or dynamic library unload depending on your use case.
-
-# Using
-The easiest way to use the library is simply adding __rpmalloc.[h|c]__ to your project and compile them along with your sources. This contains only the rpmalloc specific entry points and does not provide internal hooks to process and/or thread creation at the moment. You are required to call these functions from your own code in order to initialize and finalize the allocator in your process and threads:
-
-__rpmalloc_initialize__ : Call at process start to initialize the allocator
-
-__rpmalloc_initialize_config__ : Optional entry point to call at process start to initialize the allocator with a custom memory mapping backend, memory page size and mapping granularity.
-
-__rpmalloc_finalize__: Call at process exit to finalize the allocator
-
-__rpmalloc_thread_initialize__: Call at each thread start to initialize the thread local data for the allocator
-
-__rpmalloc_thread_finalize__: Call at each thread exit to finalize and release thread cache back to global cache
-
-__rpmalloc_config__: Get the current runtime configuration of the allocator
-
-Then simply use the __rpmalloc__/__rpfree__ and the other malloc style replacement functions. Remember all allocations are 16-byte aligned, so no need to call the explicit rpmemalign/rpaligned_alloc/rpposix_memalign functions unless you need greater alignment, they are simply wrappers to make it easier to replace in existing code.
-
-If you wish to override the standard library malloc family of functions and have automatic initialization/finalization of process and threads, define __ENABLE_OVERRIDE__ to non-zero which will include the `malloc.c` file in compilation of __rpmalloc.c__, and then rebuild the library or your project where you added the rpmalloc source. If you compile rpmalloc as a separate library you must make the linker use the override symbols from the library by referencing at least one symbol. The easiest way is to simply include `rpmalloc.h` in at least one source file and call `rpmalloc_linker_reference` somewhere - it's a dummy empty function. On Windows platforms and C++ overrides you have to `#include <rpnew.h>` in at least one source file and also manually handle the initialize/finalize of the process and all threads. The list of libc entry points replaced may not be complete, use libc/stdc++ replacement only as a convenience for testing the library on an existing code base, not a final solution.
-
-For explicit first class heaps, see the __rpmalloc_heap_*__ API under [first class heaps](#first-class-heaps) section, requiring __RPMALLOC_FIRST_CLASS_HEAPS__ tp be defined to 1.
-
-# Building
-To compile as a static library run the configure python script which generates a Ninja build script, then build using ninja. The ninja build produces two static libraries, one named `rpmalloc` and one named `rpmallocwrap`, where the latter includes the libc entry point overrides.
-
-The configure + ninja build also produces two shared object/dynamic libraries. The `rpmallocwrap` shared library can be used with LD_PRELOAD/DYLD_INSERT_LIBRARIES to inject in a preexisting binary, replacing any malloc/free family of function calls. This is only implemented for Linux and macOS targets. The list of libc entry points replaced may not be complete, use preloading as a convenience for testing the library on an existing binary, not a final solution. The dynamic library also provides automatic init/fini of process and threads for all platforms.
-
-The latest stable release is available in the master branch. For latest development code, use the develop branch.
-
-# Cache configuration options
-Free memory pages are cached both per thread and in a global cache for all threads. The size of the thread caches is determined by an adaptive scheme where each cache is limited by a percentage of the maximum allocation count of the corresponding size class. The size of the global caches is determined by a multiple of the maximum of all thread caches. The factors controlling the cache sizes can be set by editing the individual defines in the `rpmalloc.c` source file for fine tuned control.
-
-__ENABLE_UNLIMITED_CACHE__: By default defined to 0, set to 1 to make all caches infinite, i.e never release spans to global cache unless thread finishes and never unmap memory pages back to the OS. Highest performance but largest memory overhead.
-
-__ENABLE_UNLIMITED_GLOBAL_CACHE__: By default defined to 0, set to 1 to make global caches infinite, i.e never unmap memory pages back to the OS.
-
-__ENABLE_UNLIMITED_THREAD_CACHE__: By default defined to 0, set to 1 to make thread caches infinite, i.e never release spans to global cache unless thread finishes.
-
-__ENABLE_GLOBAL_CACHE__: By default defined to 1, enables the global cache shared between all threads. Set to 0 to disable the global cache and directly unmap pages evicted from the thread cache.
-
-__ENABLE_THREAD_CACHE__: By default defined to 1, enables the per-thread cache. Set to 0 to disable the thread cache and directly unmap pages no longer in use (also disables the global cache).
-
-__ENABLE_ADAPTIVE_THREAD_CACHE__: Introduces a simple heuristics in the thread cache size, keeping 25% of the high water mark for each span count class.
-
-# Other configuration options
-Detailed statistics are available if __ENABLE_STATISTICS__ is defined to 1 (default is 0, or disabled), either on compile command line or by setting the value in `rpmalloc.c`. This will cause a slight overhead in runtime to collect statistics for each memory operation, and will also add 4 bytes overhead per allocation to track sizes.
-
-Integer safety checks on all calls are enabled if __ENABLE_VALIDATE_ARGS__ is defined to 1 (default is 0, or disabled), either on compile command line or by setting the value in `rpmalloc.c`. If enabled, size arguments to the global entry points are verified not to cause integer overflows in calculations.
-
-Asserts are enabled if __ENABLE_ASSERTS__ is defined to 1 (default is 0, or disabled), either on compile command line or by setting the value in `rpmalloc.c`.
-
-To include __malloc.c__ in compilation and provide overrides of standard library malloc entry points define __ENABLE_OVERRIDE__ to 1. To enable automatic initialization of finalization of process and threads in order to preload the library into executables using standard library malloc, define __ENABLE_PRELOAD__ to 1.
-
-To enable the runtime configurable memory page and span sizes, define __RPMALLOC_CONFIGURABLE__ to 1. By default, memory page size is determined by system APIs and memory span size is set to 64KiB.
-
-To enable support for first class heaps, define __RPMALLOC_FIRST_CLASS_HEAPS__ to 1. By default, the first class heap API is disabled.
-
-# Huge pages
-The allocator has support for huge/large pages on Windows, Linux and MacOS. To enable it, pass a non-zero value in the config value `enable_huge_pages` when initializing the allocator with `rpmalloc_initialize_config`. If the system does not support huge pages it will be automatically disabled. You can query the status by looking at `enable_huge_pages` in the config returned from a call to `rpmalloc_config` after initialization is done.
-
-# Quick overview
-The allocator is similar in spirit to tcmalloc from the [Google Performance Toolkit](https://github.com/gperftools/gperftools). It uses separate heaps for each thread and partitions memory blocks according to a preconfigured set of size classes, up to 2MiB. Larger blocks are mapped and unmapped directly. Allocations for different size classes will be served from different set of memory pages, each "span" of pages is dedicated to one size class. Spans of pages can flow between threads when the thread cache overflows and are released to a global cache, or when the thread ends. Unlike tcmalloc, single blocks do not flow between threads, only entire spans of pages.
-
-# Implementation details
-The allocator is based on a fixed but configurable page alignment (defaults to 64KiB) and 16 byte block alignment, where all runs of memory pages (spans) are mapped to this alignment boundary. On Windows this is automatically guaranteed up to 64KiB by the VirtualAlloc granularity, and on mmap systems it is achieved by oversizing the mapping and aligning the returned virtual memory address to the required boundaries. By aligning to a fixed size the free operation can locate the header of the memory span without having to do a table lookup (as tcmalloc does) by simply masking out the low bits of the address (for 64KiB this would be the low 16 bits).
-
-Memory blocks are divided into three categories. For 64KiB span size/alignment the small blocks are [16, 1024] bytes, medium blocks (1024, 32256] bytes, and large blocks (32256, 2097120] bytes. The three categories are further divided in size classes. If the span size is changed, the small block classes remain but medium blocks go from (1024, span size] bytes.
-
-Small blocks have a size class granularity of 16 bytes each in 64 buckets. Medium blocks have a granularity of 512 bytes, 61 buckets (default). Large blocks have the same granularity as the configured span size (default 64KiB). All allocations are fitted to these size class boundaries (an allocation of 36 bytes will allocate a block of 48 bytes). Each small and medium size class has an associated span (meaning a contiguous set of memory pages) configuration describing how many pages the size class will allocate each time the cache is empty and a new allocation is requested.
-
-Spans for small and medium blocks are cached in four levels to avoid calls to map/unmap memory pages. The first level is a per thread single active span for each size class. The second level is a per thread list of partially free spans for each size class. The third level is a per thread list of free spans. The fourth level is a global list of free spans.
-
-Each span for a small and medium size class keeps track of how many blocks are allocated/free, as well as a list of which blocks that are free for allocation. To avoid locks, each span is completely owned by the allocating thread, and all cross-thread deallocations will be deferred to the owner thread through a separate free list per span.
-
-Large blocks, or super spans, are cached in two levels. The first level is a per thread list of free super spans. The second level is a global list of free super spans.
-
-# Memory mapping
-By default the allocator uses OS APIs to map virtual memory pages as needed, either `VirtualAlloc` on Windows or `mmap` on POSIX systems. If you want to use your own custom memory mapping provider you can use __rpmalloc_initialize_config__ and pass function pointers to map and unmap virtual memory. These function should reserve and free the requested number of bytes.
-
-The returned memory address from the memory map function MUST be aligned to the memory page size and the memory span size (which ever is larger), both of which is configurable. Either provide the page and span sizes during initialization using __rpmalloc_initialize_config__, or use __rpmalloc_config__ to find the required alignment which is equal to the maximum of page and span size. The span size MUST be a power of two in [4096, 262144] range, and be a multiple or divisor of the memory page size.
-
-Memory mapping requests are always done in multiples of the memory page size. You can specify a custom page size when initializing rpmalloc with __rpmalloc_initialize_config__, or pass 0 to let rpmalloc determine the system memory page size using OS APIs. The page size MUST be a power of two.
-
-To reduce system call overhead, memory spans are mapped in batches controlled by the `span_map_count` configuration variable (which defaults to the `DEFAULT_SPAN_MAP_COUNT` value if 0, which in turn is sized according to the cache configuration define, defaulting to 64). If the memory page size is larger than the span size, the number of spans to map in a single call will be adjusted to guarantee a multiple of the page size, and the spans will be kept mapped until the entire span range can be unmapped in one call (to avoid trying to unmap partial pages).
-
-On macOS and iOS mmap requests are tagged with tag 240 for easy identification with the vmmap tool.
-
-# Span breaking
-Super spans (spans a multiple > 1 of the span size) can be subdivided into smaller spans to fulfill a need to map a new span of memory. By default the allocator will greedily grab and break any larger span from the available caches before mapping new virtual memory. However, spans can currently not be glued together to form larger super spans again. Subspans can traverse the cache and be used by different threads individually.
-
-A span that is a subspan of a larger super span can be individually decommitted to reduce physical memory pressure when the span is evicted from caches and scheduled to be unmapped. The entire original super span will keep track of the subspans it is broken up into, and when the entire range is decommitted the super span will be unmapped. This allows platforms like Windows that require the entire virtual memory range that was mapped in a call to VirtualAlloc to be unmapped in one call to VirtualFree, while still decommitting individual pages in subspans (if the page size is smaller than the span size).
-
-If you use a custom memory map/unmap function you need to take this into account by looking at the `release` parameter given to the `memory_unmap` function. It is set to 0 for decommitting individual pages and the total super span byte size for finally releasing the entire super span memory range.
-
-# Memory fragmentation
-There is no memory fragmentation by the allocator in the sense that it will not leave unallocated and unusable "holes" in the memory pages by calls to allocate and free blocks of different sizes. This is due to the fact that the memory pages allocated for each size class is split up in perfectly aligned blocks which are not reused for a request of a different size. The block freed by a call to `rpfree` will always be immediately available for an allocation request within the same size class.
-
-However, there is memory fragmentation in the meaning that a request for x bytes followed by a request of y bytes where x and y are at least one size class different in size will return blocks that are at least one memory page apart in virtual address space. Only blocks of the same size will potentially be within the same memory page span.
-
-rpmalloc keeps an "active span" and free list for each size class. This leads to back-to-back allocations will most likely be served from within the same span of memory pages (unless the span runs out of free blocks). The rpmalloc implementation will also use any "holes" in memory pages in semi-filled spans before using a completely free span.
-
-# First class heaps
-rpmalloc provides a first class heap type with explicit heap control API. Heaps are maintained with calls to __rpmalloc_heap_acquire__ and __rpmalloc_heap_release__ and allocations/frees are done with __rpmalloc_heap_alloc__ and __rpmalloc_heap_free__. See the `rpmalloc.h` documentation for the full list of functions in the heap API. The main use case of explicit heap control is to scope allocations in a heap and release everything with a single call to __rpmalloc_heap_free_all__ without having to maintain ownership of memory blocks. Note that the heap API is not thread-safe, the caller must make sure that each heap is only used in a single thread at any given time.
-
-# Producer-consumer scenario
-Compared to the some other allocators, rpmalloc does not suffer as much from a producer-consumer thread scenario where one thread allocates memory blocks and another thread frees the blocks. In some allocators the free blocks need to traverse both the thread cache of the thread doing the free operations as well as the global cache before being reused in the allocating thread. In rpmalloc the freed blocks will be reused as soon as the allocating thread needs to get new spans from the thread cache. This enables faster release of completely freed memory pages as blocks in a memory page will not be aliased between different owning threads.
-
-# Best case scenarios
-Threads that keep ownership of allocated memory blocks within the thread and free the blocks from the same thread will have optimal performance.
-
-Threads that have allocation patterns where the difference in memory usage high and low water marks fit within the thread cache thresholds in the allocator will never touch the global cache except during thread init/fini and have optimal performance. Tweaking the cache limits can be done on a per-size-class basis.
-
-# Worst case scenarios
-Since each thread cache maps spans of memory pages per size class, a thread that allocates just a few blocks of each size class (16, 32, ...) for many size classes will never fill each bucket, and thus map a lot of memory pages while only using a small fraction of the mapped memory. However, the wasted memory will always be less than 4KiB (or the configured memory page size) per size class as each span is initialized one memory page at a time. The cache for free spans will be reused by all size classes.
-
-Threads that perform a lot of allocations and deallocations in a pattern that have a large difference in high and low water marks, and that difference is larger than the thread cache size, will put a lot of contention on the global cache. What will happen is the thread cache will overflow on each low water mark causing pages to be released to the global cache, then underflow on high water mark causing pages to be re-acquired from the global cache. This can be mitigated by changing the __MAX_SPAN_CACHE_DIVISOR__ define in the source code (at the cost of higher average memory overhead).
-
-# Caveats
-VirtualAlloc has an internal granularity of 64KiB. However, mmap lacks this granularity control, and the implementation instead oversizes the memory mapping with configured span size to be able to always return a memory area with the required alignment. Since the extra memory pages are never touched this will not result in extra committed physical memory pages, but rather only increase virtual memory address space.
-
-All entry points assume the passed values are valid, for example passing an invalid pointer to free would most likely result in a segmentation fault. __The library does not try to guard against errors!__.
-
-To support global scope data doing dynamic allocation/deallocation such as C++ objects with custom constructors and destructors, the call to __rpmalloc_finalize__ will not completely terminate the allocator but rather empty all caches and put the allocator in finalization mode. Once this call has been made, the allocator is no longer thread safe and expects all remaining calls to originate from global data destruction on main thread. Any spans or heaps becoming free during this phase will be immediately unmapped to allow correct teardown of the process or dynamic library without any leaks.
-
-# Other languages
-
-[Johan Andersson](https://github.com/repi) at Embark has created a Rust wrapper available at [rpmalloc-rs](https://github.com/EmbarkStudios/rpmalloc-rs)
-
-[Stas Denisov](https://github.com/nxrighthere) has created a C# wrapper available at [Rpmalloc-CSharp](https://github.com/nxrighthere/Rpmalloc-CSharp)
-
-# License
-
-This is free and unencumbered software released into the public domain.
-
-Anyone is free to copy, modify, publish, use, compile, sell, or
-distribute this software, either in source code form or as a compiled
-binary, for any purpose, commercial or non-commercial, and by any
-means.
-
-In jurisdictions that recognize copyright laws, the author or authors
-of this software dedicate any and all copyright interest in the
-software to the public domain. We make this dedication for the benefit
-of the public at large and to the detriment of our heirs and
-successors. We intend this dedication to be an overt act of
-relinquishment in perpetuity of all present and future rights to this
-software under copyright law.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-OTHER DEALINGS IN THE SOFTWARE.
-
-For more information, please refer to <http://unlicense.org>
-
-
-You can also use this software under the MIT license if public domain is
-not recognized in your country
-
-
-The MIT License (MIT)
-
-Copyright (c) 2017 Mattias Jansson
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
+# rpmalloc - General Purpose Memory Allocator
+This library provides a cross platform lock free thread caching 16-byte aligned memory allocator implemented in C.
+This is a fork of rpmalloc 1.4.5.
+
+Platforms currently supported:
+
+- Windows
+- MacOS
+- iOS
+- Linux
+- Android
+- Haiku
+
+The code should be easily portable to any platform with atomic operations and an mmap-style virtual memory management API. The API used to map/unmap memory pages can be configured in runtime to a custom implementation and mapping granularity/size.
+
+This library is put in the public domain; you can redistribute it and/or modify it without any restrictions. Or, if you choose, you can use it under the MIT license.
+
+# Performance
+We believe rpmalloc is faster than most popular memory allocators like tcmalloc, hoard, ptmalloc3 and others without causing extra allocated memory overhead in the thread caches compared to these allocators. We also believe the implementation to be easier to read and modify compared to these allocators, as it is a single source file of ~3000 lines of C code. All allocations have a natural 16-byte alignment.
+
+Contained in a parallel repository is a benchmark utility that performs interleaved unaligned allocations and deallocations (both in-thread and cross-thread) in multiple threads. It measures number of memory operations performed per CPU second, as well as memory overhead by comparing the virtual memory mapped with the number of bytes requested in allocation calls. The setup of number of thread, cross-thread deallocation rate and allocation size limits is configured by command line arguments.
+
+https://github.com/mjansson/rpmalloc-benchmark
+
+Below is an example performance comparison chart of rpmalloc and other popular allocator implementations, with default configurations used.
+
+![Ubuntu 16.10, random [16, 8000] bytes, 8 cores](https://docs.google.com/spreadsheets/d/1NWNuar1z0uPCB5iVS_Cs6hSo2xPkTmZf0KsgWS_Fb_4/pubchart?oid=301017877&format=image)
+
+The benchmark producing these numbers were run on an Ubuntu 16.10 machine with 8 logical cores (4 physical, HT). The actual numbers are not to be interpreted as absolute performance figures, but rather as relative comparisons between the different allocators. For additional benchmark results, see the [BENCHMARKS](BENCHMARKS.md) file.
+
+Configuration of the thread and global caches can be important depending on your use pattern. See [CACHE](CACHE.md) for a case study and some comments/guidelines.
+
+# Required functions
+
+Before calling any other function in the API, you __MUST__ call the initialization function, either __rpmalloc_initialize__ or __rpmalloc_initialize_config__, or you will get undefined behaviour when calling other rpmalloc entry point.
+
+Before terminating your use of the allocator, you __SHOULD__ call __rpmalloc_finalize__ in order to release caches and unmap virtual memory, as well as prepare the allocator for global scope cleanup at process exit or dynamic library unload depending on your use case.
+
+# Using
+The easiest way to use the library is simply adding __rpmalloc.[h|c]__ to your project and compile them along with your sources. This contains only the rpmalloc specific entry points and does not provide internal hooks to process and/or thread creation at the moment. You are required to call these functions from your own code in order to initialize and finalize the allocator in your process and threads:
+
+__rpmalloc_initialize__ : Call at process start to initialize the allocator
+
+__rpmalloc_initialize_config__ : Optional entry point to call at process start to initialize the allocator with a custom memory mapping backend, memory page size and mapping granularity.
+
+__rpmalloc_finalize__: Call at process exit to finalize the allocator
+
+__rpmalloc_thread_initialize__: Call at each thread start to initialize the thread local data for the allocator
+
+__rpmalloc_thread_finalize__: Call at each thread exit to finalize and release thread cache back to global cache
+
+__rpmalloc_config__: Get the current runtime configuration of the allocator
+
+Then simply use the __rpmalloc__/__rpfree__ and the other malloc style replacement functions. Remember all allocations are 16-byte aligned, so no need to call the explicit rpmemalign/rpaligned_alloc/rpposix_memalign functions unless you need greater alignment, they are simply wrappers to make it easier to replace in existing code.
+
+If you wish to override the standard library malloc family of functions and have automatic initialization/finalization of process and threads, define __ENABLE_OVERRIDE__ to non-zero which will include the `malloc.c` file in compilation of __rpmalloc.c__, and then rebuild the library or your project where you added the rpmalloc source. If you compile rpmalloc as a separate library you must make the linker use the override symbols from the library by referencing at least one symbol. The easiest way is to simply include `rpmalloc.h` in at least one source file and call `rpmalloc_linker_reference` somewhere - it's a dummy empty function. On Windows platforms and C++ overrides you have to `#include <rpnew.h>` in at least one source file and also manually handle the initialize/finalize of the process and all threads. The list of libc entry points replaced may not be complete, use libc/stdc++ replacement only as a convenience for testing the library on an existing code base, not a final solution.
+
+For explicit first class heaps, see the __rpmalloc_heap_*__ API under [first class heaps](#first-class-heaps) section, requiring __RPMALLOC_FIRST_CLASS_HEAPS__ tp be defined to 1.
+
+# Building
+To compile as a static library run the configure python script which generates a Ninja build script, then build using ninja. The ninja build produces two static libraries, one named `rpmalloc` and one named `rpmallocwrap`, where the latter includes the libc entry point overrides.
+
+The configure + ninja build also produces two shared object/dynamic libraries. The `rpmallocwrap` shared library can be used with LD_PRELOAD/DYLD_INSERT_LIBRARIES to inject in a preexisting binary, replacing any malloc/free family of function calls. This is only implemented for Linux and macOS targets. The list of libc entry points replaced may not be complete, use preloading as a convenience for testing the library on an existing binary, not a final solution. The dynamic library also provides automatic init/fini of process and threads for all platforms.
+
+The latest stable release is available in the master branch. For latest development code, use the develop branch.
+
+# Cache configuration options
+Free memory pages are cached both per thread and in a global cache for all threads. The size of the thread caches is determined by an adaptive scheme where each cache is limited by a percentage of the maximum allocation count of the corresponding size class. The size of the global caches is determined by a multiple of the maximum of all thread caches. The factors controlling the cache sizes can be set by editing the individual defines in the `rpmalloc.c` source file for fine tuned control.
+
+__ENABLE_UNLIMITED_CACHE__: By default defined to 0, set to 1 to make all caches infinite, i.e never release spans to global cache unless thread finishes and never unmap memory pages back to the OS. Highest performance but largest memory overhead.
+
+__ENABLE_UNLIMITED_GLOBAL_CACHE__: By default defined to 0, set to 1 to make global caches infinite, i.e never unmap memory pages back to the OS.
+
+__ENABLE_UNLIMITED_THREAD_CACHE__: By default defined to 0, set to 1 to make thread caches infinite, i.e never release spans to global cache unless thread finishes.
+
+__ENABLE_GLOBAL_CACHE__: By default defined to 1, enables the global cache shared between all threads. Set to 0 to disable the global cache and directly unmap pages evicted from the thread cache.
+
+__ENABLE_THREAD_CACHE__: By default defined to 1, enables the per-thread cache. Set to 0 to disable the thread cache and directly unmap pages no longer in use (also disables the global cache).
+
+__ENABLE_ADAPTIVE_THREAD_CACHE__: Introduces a simple heuristics in the thread cache size, keeping 25% of the high water mark for each span count class.
+
+# Other configuration options
+Detailed statistics are available if __ENABLE_STATISTICS__ is defined to 1 (default is 0, or disabled), either on compile command line or by setting the value in `rpmalloc.c`. This will cause a slight overhead in runtime to collect statistics for each memory operation, and will also add 4 bytes overhead per allocation to track sizes.
+
+Integer safety checks on all calls are enabled if __ENABLE_VALIDATE_ARGS__ is defined to 1 (default is 0, or disabled), either on compile command line or by setting the value in `rpmalloc.c`. If enabled, size arguments to the global entry points are verified not to cause integer overflows in calculations.
+
+Asserts are enabled if __ENABLE_ASSERTS__ is defined to 1 (default is 0, or disabled), either on compile command line or by setting the value in `rpmalloc.c`.
+
+To include __malloc.c__ in compilation and provide overrides of standard library malloc entry points define __ENABLE_OVERRIDE__ to 1. To enable automatic initialization of finalization of process and threads in order to preload the library into executables using standard library malloc, define __ENABLE_PRELOAD__ to 1.
+
+To enable the runtime configurable memory page and span sizes, define __RPMALLOC_CONFIGURABLE__ to 1. By default, memory page size is determined by system APIs and memory span size is set to 64KiB.
+
+To enable support for first class heaps, define __RPMALLOC_FIRST_CLASS_HEAPS__ to 1. By default, the first class heap API is disabled.
+
+# Huge pages
+The allocator has support for huge/large pages on Windows, Linux and MacOS. To enable it, pass a non-zero value in the config value `enable_huge_pages` when initializing the allocator with `rpmalloc_initialize_config`. If the system does not support huge pages it will be automatically disabled. You can query the status by looking at `enable_huge_pages` in the config returned from a call to `rpmalloc_config` after initialization is done.
+
+# Quick overview
+The allocator is similar in spirit to tcmalloc from the [Google Performance Toolkit](https://github.com/gperftools/gperftools). It uses separate heaps for each thread and partitions memory blocks according to a preconfigured set of size classes, up to 2MiB. Larger blocks are mapped and unmapped directly. Allocations for different size classes will be served from different set of memory pages, each "span" of pages is dedicated to one size class. Spans of pages can flow between threads when the thread cache overflows and are released to a global cache, or when the thread ends. Unlike tcmalloc, single blocks do not flow between threads, only entire spans of pages.
+
+# Implementation details
+The allocator is based on a fixed but configurable page alignment (defaults to 64KiB) and 16 byte block alignment, where all runs of memory pages (spans) are mapped to this alignment boundary. On Windows this is automatically guaranteed up to 64KiB by the VirtualAlloc granularity, and on mmap systems it is achieved by oversizing the mapping and aligning the returned virtual memory address to the required boundaries. By aligning to a fixed size the free operation can locate the header of the memory span without having to do a table lookup (as tcmalloc does) by simply masking out the low bits of the address (for 64KiB this would be the low 16 bits).
+
+Memory blocks are divided into three categories. For 64KiB span size/alignment the small blocks are [16, 1024] bytes, medium blocks (1024, 32256] bytes, and large blocks (32256, 2097120] bytes. The three categories are further divided in size classes. If the span size is changed, the small block classes remain but medium blocks go from (1024, span size] bytes.
+
+Small blocks have a size class granularity of 16 bytes each in 64 buckets. Medium blocks have a granularity of 512 bytes, 61 buckets (default). Large blocks have the same granularity as the configured span size (default 64KiB). All allocations are fitted to these size class boundaries (an allocation of 36 bytes will allocate a block of 48 bytes). Each small and medium size class has an associated span (meaning a contiguous set of memory pages) configuration describing how many pages the size class will allocate each time the cache is empty and a new allocation is requested.
+
+Spans for small and medium blocks are cached in four levels to avoid calls to map/unmap memory pages. The first level is a per thread single active span for each size class. The second level is a per thread list of partially free spans for each size class. The third level is a per thread list of free spans. The fourth level is a global list of free spans.
+
+Each span for a small and medium size class keeps track of how many blocks are allocated/free, as well as a list of which blocks that are free for allocation. To avoid locks, each span is completely owned by the allocating thread, and all cross-thread deallocations will be deferred to the owner thread through a separate free list per span.
+
+Large blocks, or super spans, are cached in two levels. The first level is a per thread list of free super spans. The second level is a global list of free super spans.
+
+# Memory mapping
+By default the allocator uses OS APIs to map virtual memory pages as needed, either `VirtualAlloc` on Windows or `mmap` on POSIX systems. If you want to use your own custom memory mapping provider you can use __rpmalloc_initialize_config__ and pass function pointers to map and unmap virtual memory. These function should reserve and free the requested number of bytes.
+
+The returned memory address from the memory map function MUST be aligned to the memory page size and the memory span size (which ever is larger), both of which is configurable. Either provide the page and span sizes during initialization using __rpmalloc_initialize_config__, or use __rpmalloc_config__ to find the required alignment which is equal to the maximum of page and span size. The span size MUST be a power of two in [4096, 262144] range, and be a multiple or divisor of the memory page size.
+
+Memory mapping requests are always done in multiples of the memory page size. You can specify a custom page size when initializing rpmalloc with __rpmalloc_initialize_config__, or pass 0 to let rpmalloc determine the system memory page size using OS APIs. The page size MUST be a power of two.
+
+To reduce system call overhead, memory spans are mapped in batches controlled by the `span_map_count` configuration variable (which defaults to the `DEFAULT_SPAN_MAP_COUNT` value if 0, which in turn is sized according to the cache configuration define, defaulting to 64). If the memory page size is larger than the span size, the number of spans to map in a single call will be adjusted to guarantee a multiple of the page size, and the spans will be kept mapped until the entire span range can be unmapped in one call (to avoid trying to unmap partial pages).
+
+On macOS and iOS mmap requests are tagged with tag 240 for easy identification with the vmmap tool.
+
+# Span breaking
+Super spans (spans a multiple > 1 of the span size) can be subdivided into smaller spans to fulfill a need to map a new span of memory. By default the allocator will greedily grab and break any larger span from the available caches before mapping new virtual memory. However, spans can currently not be glued together to form larger super spans again. Subspans can traverse the cache and be used by different threads individually.
+
+A span that is a subspan of a larger super span can be individually decommitted to reduce physical memory pressure when the span is evicted from caches and scheduled to be unmapped. The entire original super span will keep track of the subspans it is broken up into, and when the entire range is decommitted the super span will be unmapped. This allows platforms like Windows that require the entire virtual memory range that was mapped in a call to VirtualAlloc to be unmapped in one call to VirtualFree, while still decommitting individual pages in subspans (if the page size is smaller than the span size).
+
+If you use a custom memory map/unmap function you need to take this into account by looking at the `release` parameter given to the `memory_unmap` function. It is set to 0 for decommitting individual pages and the total super span byte size for finally releasing the entire super span memory range.
+
+# Memory fragmentation
+There is no memory fragmentation by the allocator in the sense that it will not leave unallocated and unusable "holes" in the memory pages by calls to allocate and free blocks of different sizes. This is due to the fact that the memory pages allocated for each size class is split up in perfectly aligned blocks which are not reused for a request of a different size. The block freed by a call to `rpfree` will always be immediately available for an allocation request within the same size class.
+
+However, there is memory fragmentation in the meaning that a request for x bytes followed by a request of y bytes where x and y are at least one size class different in size will return blocks that are at least one memory page apart in virtual address space. Only blocks of the same size will potentially be within the same memory page span.
+
+rpmalloc keeps an "active span" and free list for each size class. This leads to back-to-back allocations will most likely be served from within the same span of memory pages (unless the span runs out of free blocks). The rpmalloc implementation will also use any "holes" in memory pages in semi-filled spans before using a completely free span.
+
+# First class heaps
+rpmalloc provides a first class heap type with explicit heap control API. Heaps are maintained with calls to __rpmalloc_heap_acquire__ and __rpmalloc_heap_release__ and allocations/frees are done with __rpmalloc_heap_alloc__ and __rpmalloc_heap_free__. See the `rpmalloc.h` documentation for the full list of functions in the heap API. The main use case of explicit heap control is to scope allocations in a heap and release everything with a single call to __rpmalloc_heap_free_all__ without having to maintain ownership of memory blocks. Note that the heap API is not thread-safe, the caller must make sure that each heap is only used in a single thread at any given time.
+
+# Producer-consumer scenario
+Compared to the some other allocators, rpmalloc does not suffer as much from a producer-consumer thread scenario where one thread allocates memory blocks and another thread frees the blocks. In some allocators the free blocks need to traverse both the thread cache of the thread doing the free operations as well as the global cache before being reused in the allocating thread. In rpmalloc the freed blocks will be reused as soon as the allocating thread needs to get new spans from the thread cache. This enables faster release of completely freed memory pages as blocks in a memory page will not be aliased between different owning threads.
+
+# Best case scenarios
+Threads that keep ownership of allocated memory blocks within the thread and free the blocks from the same thread will have optimal performance.
+
+Threads that have allocation patterns where the difference in memory usage high and low water marks fit within the thread cache thresholds in the allocator will never touch the global cache except during thread init/fini and have optimal performance. Tweaking the cache limits can be done on a per-size-class basis.
+
+# Worst case scenarios
+Since each thread cache maps spans of memory pages per size class, a thread that allocates just a few blocks of each size class (16, 32, ...) for many size classes will never fill each bucket, and thus map a lot of memory pages while only using a small fraction of the mapped memory. However, the wasted memory will always be less than 4KiB (or the configured memory page size) per size class as each span is initialized one memory page at a time. The cache for free spans will be reused by all size classes.
+
+Threads that perform a lot of allocations and deallocations in a pattern that have a large difference in high and low water marks, and that difference is larger than the thread cache size, will put a lot of contention on the global cache. What will happen is the thread cache will overflow on each low water mark causing pages to be released to the global cache, then underflow on high water mark causing pages to be re-acquired from the global cache. This can be mitigated by changing the __MAX_SPAN_CACHE_DIVISOR__ define in the source code (at the cost of higher average memory overhead).
+
+# Caveats
+VirtualAlloc has an internal granularity of 64KiB. However, mmap lacks this granularity control, and the implementation instead oversizes the memory mapping with configured span size to be able to always return a memory area with the required alignment. Since the extra memory pages are never touched this will not result in extra committed physical memory pages, but rather only increase virtual memory address space.
+
+All entry points assume the passed values are valid, for example passing an invalid pointer to free would most likely result in a segmentation fault. __The library does not try to guard against errors!__.
+
+To support global scope data doing dynamic allocation/deallocation such as C++ objects with custom constructors and destructors, the call to __rpmalloc_finalize__ will not completely terminate the allocator but rather empty all caches and put the allocator in finalization mode. Once this call has been made, the allocator is no longer thread safe and expects all remaining calls to originate from global data destruction on main thread. Any spans or heaps becoming free during this phase will be immediately unmapped to allow correct teardown of the process or dynamic library without any leaks.
+
+# Other languages
+
+[Johan Andersson](https://github.com/repi) at Embark has created a Rust wrapper available at [rpmalloc-rs](https://github.com/EmbarkStudios/rpmalloc-rs)
+
+[Stas Denisov](https://github.com/nxrighthere) has created a C# wrapper available at [Rpmalloc-CSharp](https://github.com/nxrighthere/Rpmalloc-CSharp)
+
+# License
+
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org>
+
+
+You can also use this software under the MIT license if public domain is
+not recognized in your country
+
+
+The MIT License (MIT)
+
+Copyright (c) 2017 Mattias Jansson
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/llvm/lib/Support/rpmalloc/malloc.c b/llvm/lib/Support/rpmalloc/malloc.c
index 59e13aab3ef7ed82c5754d93e2c9cc5cf9c2913c..3fcfe848250c6bf36930e77ab493f2a50c600cce 100644
--- a/llvm/lib/Support/rpmalloc/malloc.c
+++ b/llvm/lib/Support/rpmalloc/malloc.c
@@ -1,724 +1,724 @@
-//===------------------------ malloc.c ------------------*- C -*-=============//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This library provides a cross-platform lock free thread caching malloc
-// implementation in C11.
-//
-//
-// This file provides overrides for the standard library malloc entry points for
-// C and new/delete operators for C++ It also provides automatic
-// initialization/finalization of process and threads
-//
-//===----------------------------------------------------------------------===//
-
-#if defined(__TINYC__)
-#include <sys/types.h>
-#endif
-
-#ifndef ARCH_64BIT
-#if defined(__LLP64__) || defined(__LP64__) || defined(_WIN64)
-#define ARCH_64BIT 1
-_Static_assert(sizeof(size_t) == 8, "Data type size mismatch");
-_Static_assert(sizeof(void *) == 8, "Data type size mismatch");
-#else
-#define ARCH_64BIT 0
-_Static_assert(sizeof(size_t) == 4, "Data type size mismatch");
-_Static_assert(sizeof(void *) == 4, "Data type size mismatch");
-#endif
-#endif
-
-#if (defined(__GNUC__) || defined(__clang__))
-#pragma GCC visibility push(default)
-#endif
-
-#define USE_IMPLEMENT 1
-#define USE_INTERPOSE 0
-#define USE_ALIAS 0
-
-#if defined(__APPLE__)
-#undef USE_INTERPOSE
-#define USE_INTERPOSE 1
-
-typedef struct interpose_t {
-  void *new_func;
-  void *orig_func;
-} interpose_t;
-
-#define MAC_INTERPOSE_PAIR(newf, oldf) {(void *)newf, (void *)oldf}
-#define MAC_INTERPOSE_SINGLE(newf, oldf)                                       \
-  __attribute__((used)) static const interpose_t macinterpose##newf##oldf      \
-      __attribute__((section("__DATA, __interpose"))) =                        \
-          MAC_INTERPOSE_PAIR(newf, oldf)
-
-#endif
-
-#if !defined(_WIN32) && !defined(__APPLE__)
-#undef USE_IMPLEMENT
-#undef USE_ALIAS
-#define USE_IMPLEMENT 0
-#define USE_ALIAS 1
-#endif
-
-#ifdef _MSC_VER
-#pragma warning(disable : 4100)
-#undef malloc
-#undef free
-#undef calloc
-#define RPMALLOC_RESTRICT __declspec(restrict)
-#else
-#define RPMALLOC_RESTRICT
-#endif
-
-#if ENABLE_OVERRIDE
-
-typedef struct rp_nothrow_t {
-  int __dummy;
-} rp_nothrow_t;
-
-#if USE_IMPLEMENT
-
-extern inline RPMALLOC_RESTRICT void *RPMALLOC_CDECL malloc(size_t size) {
-  return rpmalloc(size);
-}
-extern inline RPMALLOC_RESTRICT void *RPMALLOC_CDECL calloc(size_t count,
-                                                            size_t size) {
-  return rpcalloc(count, size);
-}
-extern inline RPMALLOC_RESTRICT void *RPMALLOC_CDECL realloc(void *ptr,
-                                                             size_t size) {
-  return rprealloc(ptr, size);
-}
-extern inline void *RPMALLOC_CDECL reallocf(void *ptr, size_t size) {
-  return rprealloc(ptr, size);
-}
-extern inline void *RPMALLOC_CDECL aligned_alloc(size_t alignment,
-                                                 size_t size) {
-  return rpaligned_alloc(alignment, size);
-}
-extern inline void *RPMALLOC_CDECL memalign(size_t alignment, size_t size) {
-  return rpmemalign(alignment, size);
-}
-extern inline int RPMALLOC_CDECL posix_memalign(void **memptr, size_t alignment,
-                                                size_t size) {
-  return rpposix_memalign(memptr, alignment, size);
-}
-extern inline void RPMALLOC_CDECL free(void *ptr) { rpfree(ptr); }
-extern inline void RPMALLOC_CDECL cfree(void *ptr) { rpfree(ptr); }
-extern inline size_t RPMALLOC_CDECL malloc_usable_size(void *ptr) {
-  return rpmalloc_usable_size(ptr);
-}
-extern inline size_t RPMALLOC_CDECL malloc_size(void *ptr) {
-  return rpmalloc_usable_size(ptr);
-}
-
-#ifdef _WIN32
-extern inline RPMALLOC_RESTRICT void *RPMALLOC_CDECL _malloc_base(size_t size) {
-  return rpmalloc(size);
-}
-extern inline void RPMALLOC_CDECL _free_base(void *ptr) { rpfree(ptr); }
-extern inline RPMALLOC_RESTRICT void *RPMALLOC_CDECL _calloc_base(size_t count,
-                                                                  size_t size) {
-  return rpcalloc(count, size);
-}
-extern inline size_t RPMALLOC_CDECL _msize(void *ptr) {
-  return rpmalloc_usable_size(ptr);
-}
-extern inline size_t RPMALLOC_CDECL _msize_base(void *ptr) {
-  return rpmalloc_usable_size(ptr);
-}
-extern inline RPMALLOC_RESTRICT void *RPMALLOC_CDECL
-_realloc_base(void *ptr, size_t size) {
-  return rprealloc(ptr, size);
-}
-#endif
-
-#ifdef _WIN32
-// For Windows, #include <rpnew.h> in one source file to get the C++ operator
-// overrides implemented in your module
-#else
-// Overload the C++ operators using the mangled names
-// (https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling) operators
-// delete and delete[]
-#define RPDEFVIS __attribute__((visibility("default")))
-extern void _ZdlPv(void *p);
-void RPDEFVIS _ZdlPv(void *p) { rpfree(p); }
-extern void _ZdaPv(void *p);
-void RPDEFVIS _ZdaPv(void *p) { rpfree(p); }
-#if ARCH_64BIT
-// 64-bit operators new and new[], normal and aligned
-extern void *_Znwm(uint64_t size);
-void *RPDEFVIS _Znwm(uint64_t size) { return rpmalloc(size); }
-extern void *_Znam(uint64_t size);
-void *RPDEFVIS _Znam(uint64_t size) { return rpmalloc(size); }
-extern void *_Znwmm(uint64_t size, uint64_t align);
-void *RPDEFVIS _Znwmm(uint64_t size, uint64_t align) {
-  return rpaligned_alloc(align, size);
-}
-extern void *_Znamm(uint64_t size, uint64_t align);
-void *RPDEFVIS _Znamm(uint64_t size, uint64_t align) {
-  return rpaligned_alloc(align, size);
-}
-extern void *_ZnwmSt11align_val_t(uint64_t size, uint64_t align);
-void *RPDEFVIS _ZnwmSt11align_val_t(uint64_t size, uint64_t align) {
-  return rpaligned_alloc(align, size);
-}
-extern void *_ZnamSt11align_val_t(uint64_t size, uint64_t align);
-void *RPDEFVIS _ZnamSt11align_val_t(uint64_t size, uint64_t align) {
-  return rpaligned_alloc(align, size);
-}
-extern void *_ZnwmRKSt9nothrow_t(uint64_t size, rp_nothrow_t t);
-void *RPDEFVIS _ZnwmRKSt9nothrow_t(uint64_t size, rp_nothrow_t t) {
-  (void)sizeof(t);
-  return rpmalloc(size);
-}
-extern void *_ZnamRKSt9nothrow_t(uint64_t size, rp_nothrow_t t);
-void *RPDEFVIS _ZnamRKSt9nothrow_t(uint64_t size, rp_nothrow_t t) {
-  (void)sizeof(t);
-  return rpmalloc(size);
-}
-extern void *_ZnwmSt11align_val_tRKSt9nothrow_t(uint64_t size, uint64_t align,
-                                                rp_nothrow_t t);
-void *RPDEFVIS _ZnwmSt11align_val_tRKSt9nothrow_t(uint64_t size, uint64_t align,
-                                                  rp_nothrow_t t) {
-  (void)sizeof(t);
-  return rpaligned_alloc(align, size);
-}
-extern void *_ZnamSt11align_val_tRKSt9nothrow_t(uint64_t size, uint64_t align,
-                                                rp_nothrow_t t);
-void *RPDEFVIS _ZnamSt11align_val_tRKSt9nothrow_t(uint64_t size, uint64_t align,
-                                                  rp_nothrow_t t) {
-  (void)sizeof(t);
-  return rpaligned_alloc(align, size);
-}
-// 64-bit operators sized delete and delete[], normal and aligned
-extern void _ZdlPvm(void *p, uint64_t size);
-void RPDEFVIS _ZdlPvm(void *p, uint64_t size) {
-  rpfree(p);
-  (void)sizeof(size);
-}
-extern void _ZdaPvm(void *p, uint64_t size);
-void RPDEFVIS _ZdaPvm(void *p, uint64_t size) {
-  rpfree(p);
-  (void)sizeof(size);
-}
-extern void _ZdlPvSt11align_val_t(void *p, uint64_t align);
-void RPDEFVIS _ZdlPvSt11align_val_t(void *p, uint64_t align) {
-  rpfree(p);
-  (void)sizeof(align);
-}
-extern void _ZdaPvSt11align_val_t(void *p, uint64_t align);
-void RPDEFVIS _ZdaPvSt11align_val_t(void *p, uint64_t align) {
-  rpfree(p);
-  (void)sizeof(align);
-}
-extern void _ZdlPvmSt11align_val_t(void *p, uint64_t size, uint64_t align);
-void RPDEFVIS _ZdlPvmSt11align_val_t(void *p, uint64_t size, uint64_t align) {
-  rpfree(p);
-  (void)sizeof(size);
-  (void)sizeof(align);
-}
-extern void _ZdaPvmSt11align_val_t(void *p, uint64_t size, uint64_t align);
-void RPDEFVIS _ZdaPvmSt11align_val_t(void *p, uint64_t size, uint64_t align) {
-  rpfree(p);
-  (void)sizeof(size);
-  (void)sizeof(align);
-}
-#else
-// 32-bit operators new and new[], normal and aligned
-extern void *_Znwj(uint32_t size);
-void *RPDEFVIS _Znwj(uint32_t size) { return rpmalloc(size); }
-extern void *_Znaj(uint32_t size);
-void *RPDEFVIS _Znaj(uint32_t size) { return rpmalloc(size); }
-extern void *_Znwjj(uint32_t size, uint32_t align);
-void *RPDEFVIS _Znwjj(uint32_t size, uint32_t align) {
-  return rpaligned_alloc(align, size);
-}
-extern void *_Znajj(uint32_t size, uint32_t align);
-void *RPDEFVIS _Znajj(uint32_t size, uint32_t align) {
-  return rpaligned_alloc(align, size);
-}
-extern void *_ZnwjSt11align_val_t(size_t size, size_t align);
-void *RPDEFVIS _ZnwjSt11align_val_t(size_t size, size_t align) {
-  return rpaligned_alloc(align, size);
-}
-extern void *_ZnajSt11align_val_t(size_t size, size_t align);
-void *RPDEFVIS _ZnajSt11align_val_t(size_t size, size_t align) {
-  return rpaligned_alloc(align, size);
-}
-extern void *_ZnwjRKSt9nothrow_t(size_t size, rp_nothrow_t t);
-void *RPDEFVIS _ZnwjRKSt9nothrow_t(size_t size, rp_nothrow_t t) {
-  (void)sizeof(t);
-  return rpmalloc(size);
-}
-extern void *_ZnajRKSt9nothrow_t(size_t size, rp_nothrow_t t);
-void *RPDEFVIS _ZnajRKSt9nothrow_t(size_t size, rp_nothrow_t t) {
-  (void)sizeof(t);
-  return rpmalloc(size);
-}
-extern void *_ZnwjSt11align_val_tRKSt9nothrow_t(size_t size, size_t align,
-                                                rp_nothrow_t t);
-void *RPDEFVIS _ZnwjSt11align_val_tRKSt9nothrow_t(size_t size, size_t align,
-                                                  rp_nothrow_t t) {
-  (void)sizeof(t);
-  return rpaligned_alloc(align, size);
-}
-extern void *_ZnajSt11align_val_tRKSt9nothrow_t(size_t size, size_t align,
-                                                rp_nothrow_t t);
-void *RPDEFVIS _ZnajSt11align_val_tRKSt9nothrow_t(size_t size, size_t align,
-                                                  rp_nothrow_t t) {
-  (void)sizeof(t);
-  return rpaligned_alloc(align, size);
-}
-// 32-bit operators sized delete and delete[], normal and aligned
-extern void _ZdlPvj(void *p, uint64_t size);
-void RPDEFVIS _ZdlPvj(void *p, uint64_t size) {
-  rpfree(p);
-  (void)sizeof(size);
-}
-extern void _ZdaPvj(void *p, uint64_t size);
-void RPDEFVIS _ZdaPvj(void *p, uint64_t size) {
-  rpfree(p);
-  (void)sizeof(size);
-}
-extern void _ZdlPvSt11align_val_t(void *p, uint32_t align);
-void RPDEFVIS _ZdlPvSt11align_val_t(void *p, uint64_t a) {
-  rpfree(p);
-  (void)sizeof(align);
-}
-extern void _ZdaPvSt11align_val_t(void *p, uint32_t align);
-void RPDEFVIS _ZdaPvSt11align_val_t(void *p, uint64_t a) {
-  rpfree(p);
-  (void)sizeof(align);
-}
-extern void _ZdlPvjSt11align_val_t(void *p, uint32_t size, uint32_t align);
-void RPDEFVIS _ZdlPvjSt11align_val_t(void *p, uint64_t size, uint64_t align) {
-  rpfree(p);
-  (void)sizeof(size);
-  (void)sizeof(a);
-}
-extern void _ZdaPvjSt11align_val_t(void *p, uint32_t size, uint32_t align);
-void RPDEFVIS _ZdaPvjSt11align_val_t(void *p, uint64_t size, uint64_t align) {
-  rpfree(p);
-  (void)sizeof(size);
-  (void)sizeof(a);
-}
-#endif
-#endif
-#endif
-
-#if USE_INTERPOSE || USE_ALIAS
-
-static void *rpmalloc_nothrow(size_t size, rp_nothrow_t t) {
-  (void)sizeof(t);
-  return rpmalloc(size);
-}
-static void *rpaligned_alloc_reverse(size_t size, size_t align) {
-  return rpaligned_alloc(align, size);
-}
-static void *rpaligned_alloc_reverse_nothrow(size_t size, size_t align,
-                                             rp_nothrow_t t) {
-  (void)sizeof(t);
-  return rpaligned_alloc(align, size);
-}
-static void rpfree_size(void *p, size_t size) {
-  (void)sizeof(size);
-  rpfree(p);
-}
-static void rpfree_aligned(void *p, size_t align) {
-  (void)sizeof(align);
-  rpfree(p);
-}
-static void rpfree_size_aligned(void *p, size_t size, size_t align) {
-  (void)sizeof(size);
-  (void)sizeof(align);
-  rpfree(p);
-}
-
-#endif
-
-#if USE_INTERPOSE
-
-__attribute__((used)) static const interpose_t macinterpose_malloc[]
-    __attribute__((section("__DATA, __interpose"))) = {
-        // new and new[]
-        MAC_INTERPOSE_PAIR(rpmalloc, _Znwm),
-        MAC_INTERPOSE_PAIR(rpmalloc, _Znam),
-        MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse, _Znwmm),
-        MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse, _Znamm),
-        MAC_INTERPOSE_PAIR(rpmalloc_nothrow, _ZnwmRKSt9nothrow_t),
-        MAC_INTERPOSE_PAIR(rpmalloc_nothrow, _ZnamRKSt9nothrow_t),
-        MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse, _ZnwmSt11align_val_t),
-        MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse, _ZnamSt11align_val_t),
-        MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse_nothrow,
-                           _ZnwmSt11align_val_tRKSt9nothrow_t),
-        MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse_nothrow,
-                           _ZnamSt11align_val_tRKSt9nothrow_t),
-        // delete and delete[]
-        MAC_INTERPOSE_PAIR(rpfree, _ZdlPv), MAC_INTERPOSE_PAIR(rpfree, _ZdaPv),
-        MAC_INTERPOSE_PAIR(rpfree_size, _ZdlPvm),
-        MAC_INTERPOSE_PAIR(rpfree_size, _ZdaPvm),
-        MAC_INTERPOSE_PAIR(rpfree_aligned, _ZdlPvSt11align_val_t),
-        MAC_INTERPOSE_PAIR(rpfree_aligned, _ZdaPvSt11align_val_t),
-        MAC_INTERPOSE_PAIR(rpfree_size_aligned, _ZdlPvmSt11align_val_t),
-        MAC_INTERPOSE_PAIR(rpfree_size_aligned, _ZdaPvmSt11align_val_t),
-        // libc entry points
-        MAC_INTERPOSE_PAIR(rpmalloc, malloc),
-        MAC_INTERPOSE_PAIR(rpmalloc, calloc),
-        MAC_INTERPOSE_PAIR(rprealloc, realloc),
-        MAC_INTERPOSE_PAIR(rprealloc, reallocf),
-#if defined(__MAC_10_15) && __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_15
-        MAC_INTERPOSE_PAIR(rpaligned_alloc, aligned_alloc),
-#endif
-        MAC_INTERPOSE_PAIR(rpmemalign, memalign),
-        MAC_INTERPOSE_PAIR(rpposix_memalign, posix_memalign),
-        MAC_INTERPOSE_PAIR(rpfree, free), MAC_INTERPOSE_PAIR(rpfree, cfree),
-        MAC_INTERPOSE_PAIR(rpmalloc_usable_size, malloc_usable_size),
-        MAC_INTERPOSE_PAIR(rpmalloc_usable_size, malloc_size)};
-
-#endif
-
-#if USE_ALIAS
-
-#define RPALIAS(fn) __attribute__((alias(#fn), used, visibility("default")));
-
-// Alias the C++ operators using the mangled names
-// (https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling)
-
-// operators delete and delete[]
-void _ZdlPv(void *p) RPALIAS(rpfree) void _ZdaPv(void *p) RPALIAS(rpfree)
-
-#if ARCH_64BIT
-    // 64-bit operators new and new[], normal and aligned
-    void *_Znwm(uint64_t size) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(1)
-        RPALIAS(rpmalloc) void *_Znam(uint64_t size) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(1) RPALIAS(rpmalloc) void *_Znwmm(uint64_t size,
-                                                                 uint64_t align)
-        RPALIAS(rpaligned_alloc_reverse) void *_Znamm(uint64_t size,
-                                                      uint64_t align)
-            RPALIAS(rpaligned_alloc_reverse) void *_ZnwmSt11align_val_t(
-                size_t size, size_t align)
-                RPALIAS(rpaligned_alloc_reverse) void *_ZnamSt11align_val_t(
-                    size_t size, size_t align)
-                    RPALIAS(rpaligned_alloc_reverse) void *_ZnwmRKSt9nothrow_t(
-                        size_t size, rp_nothrow_t t)
-                        RPALIAS(rpmalloc_nothrow) void *_ZnamRKSt9nothrow_t(
-                            size_t size,
-                            rp_nothrow_t t) RPALIAS(rpmalloc_nothrow) void
-                            *_ZnwmSt11align_val_tRKSt9nothrow_t(size_t size,
-                                                                size_t align,
-                                                                rp_nothrow_t t)
-                                RPALIAS(rpaligned_alloc_reverse_nothrow) void
-                                    *_ZnamSt11align_val_tRKSt9nothrow_t(
-                                        size_t size, size_t align,
-                                        rp_nothrow_t t)
-                                        RPALIAS(rpaligned_alloc_reverse_nothrow)
-    // 64-bit operators delete and delete[], sized and aligned
-    void _ZdlPvm(void *p, size_t n) RPALIAS(rpfree_size) void _ZdaPvm(void *p,
-                                                                      size_t n)
-        RPALIAS(rpfree_size) void _ZdlPvSt11align_val_t(void *p, size_t a)
-            RPALIAS(rpfree_aligned) void _ZdaPvSt11align_val_t(void *p,
-                                                               size_t a)
-                RPALIAS(rpfree_aligned) void _ZdlPvmSt11align_val_t(void *p,
-                                                                    size_t n,
-                                                                    size_t a)
-                    RPALIAS(rpfree_size_aligned) void _ZdaPvmSt11align_val_t(
-                        void *p, size_t n, size_t a)
-                        RPALIAS(rpfree_size_aligned)
-#else
-    // 32-bit operators new and new[], normal and aligned
-    void *_Znwj(uint32_t size) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(1)
-        RPALIAS(rpmalloc) void *_Znaj(uint32_t size) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(1) RPALIAS(rpmalloc) void *_Znwjj(uint32_t size,
-                                                                 uint32_t align)
-        RPALIAS(rpaligned_alloc_reverse) void *_Znajj(uint32_t size,
-                                                      uint32_t align)
-            RPALIAS(rpaligned_alloc_reverse) void *_ZnwjSt11align_val_t(
-                size_t size, size_t align)
-                RPALIAS(rpaligned_alloc_reverse) void *_ZnajSt11align_val_t(
-                    size_t size, size_t align)
-                    RPALIAS(rpaligned_alloc_reverse) void *_ZnwjRKSt9nothrow_t(
-                        size_t size, rp_nothrow_t t)
-                        RPALIAS(rpmalloc_nothrow) void *_ZnajRKSt9nothrow_t(
-                            size_t size,
-                            rp_nothrow_t t) RPALIAS(rpmalloc_nothrow) void
-                            *_ZnwjSt11align_val_tRKSt9nothrow_t(size_t size,
-                                                                size_t align,
-                                                                rp_nothrow_t t)
-                                RPALIAS(rpaligned_alloc_reverse_nothrow) void
-                                    *_ZnajSt11align_val_tRKSt9nothrow_t(
-                                        size_t size, size_t align,
-                                        rp_nothrow_t t)
-                                        RPALIAS(rpaligned_alloc_reverse_nothrow)
-    // 32-bit operators delete and delete[], sized and aligned
-    void _ZdlPvj(void *p, size_t n) RPALIAS(rpfree_size) void _ZdaPvj(void *p,
-                                                                      size_t n)
-        RPALIAS(rpfree_size) void _ZdlPvSt11align_val_t(void *p, size_t a)
-            RPALIAS(rpfree_aligned) void _ZdaPvSt11align_val_t(void *p,
-                                                               size_t a)
-                RPALIAS(rpfree_aligned) void _ZdlPvjSt11align_val_t(void *p,
-                                                                    size_t n,
-                                                                    size_t a)
-                    RPALIAS(rpfree_size_aligned) void _ZdaPvjSt11align_val_t(
-                        void *p, size_t n, size_t a)
-                        RPALIAS(rpfree_size_aligned)
-#endif
-
-                            void *malloc(size_t size) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(1)
-        RPALIAS(rpmalloc) void *calloc(size_t count, size_t size)
-            RPALIAS(rpcalloc) void *realloc(void *ptr, size_t size)
-                RPALIAS(rprealloc) void *reallocf(void *ptr, size_t size)
-                    RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(2)
-        RPALIAS(rprealloc) void *aligned_alloc(size_t alignment, size_t size)
-            RPALIAS(rpaligned_alloc) void *memalign(
-                size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(2)
-        RPALIAS(rpmemalign) int posix_memalign(void **memptr, size_t alignment,
-                                               size_t size)
-            RPALIAS(rpposix_memalign) void free(void *ptr)
-                RPALIAS(rpfree) void cfree(void *ptr) RPALIAS(rpfree)
-#if defined(__ANDROID__) || defined(__FreeBSD__)
-                    size_t
-    malloc_usable_size(const void *ptr) RPALIAS(rpmalloc_usable_size)
-#else
-                    size_t
-    malloc_usable_size(void *ptr) RPALIAS(rpmalloc_usable_size)
-#endif
-        size_t malloc_size(void *ptr) RPALIAS(rpmalloc_usable_size)
-
-#endif
-
-            static inline size_t _rpmalloc_page_size(void) {
-  return _memory_page_size;
-}
-
-extern void *RPMALLOC_CDECL reallocarray(void *ptr, size_t count, size_t size);
-
-extern void *RPMALLOC_CDECL reallocarray(void *ptr, size_t count, size_t size) {
-  size_t total;
-#if ENABLE_VALIDATE_ARGS
-#ifdef _MSC_VER
-  int err = SizeTMult(count, size, &total);
-  if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
-    errno = EINVAL;
-    return 0;
-  }
-#else
-  int err = __builtin_umull_overflow(count, size, &total);
-  if (err || (total >= MAX_ALLOC_SIZE)) {
-    errno = EINVAL;
-    return 0;
-  }
-#endif
-#else
-  total = count * size;
-#endif
-  return realloc(ptr, total);
-}
-
-extern inline void *RPMALLOC_CDECL valloc(size_t size) {
-  get_thread_heap();
-  return rpaligned_alloc(_rpmalloc_page_size(), size);
-}
-
-extern inline void *RPMALLOC_CDECL pvalloc(size_t size) {
-  get_thread_heap();
-  const size_t page_size = _rpmalloc_page_size();
-  const size_t aligned_size = ((size + page_size - 1) / page_size) * page_size;
-#if ENABLE_VALIDATE_ARGS
-  if (aligned_size < size) {
-    errno = EINVAL;
-    return 0;
-  }
-#endif
-  return rpaligned_alloc(_rpmalloc_page_size(), aligned_size);
-}
-
-#endif // ENABLE_OVERRIDE
-
-#if ENABLE_PRELOAD
-
-#ifdef _WIN32
-
-#if defined(BUILD_DYNAMIC_LINK) && BUILD_DYNAMIC_LINK
-
-extern __declspec(dllexport) BOOL WINAPI DllMain(HINSTANCE instance,
-                                                 DWORD reason, LPVOID reserved);
-
-extern __declspec(dllexport) BOOL WINAPI DllMain(HINSTANCE instance,
-                                                 DWORD reason,
-                                                 LPVOID reserved) {
-  (void)sizeof(reserved);
-  (void)sizeof(instance);
-  if (reason == DLL_PROCESS_ATTACH)
-    rpmalloc_initialize();
-  else if (reason == DLL_PROCESS_DETACH)
-    rpmalloc_finalize();
-  else if (reason == DLL_THREAD_ATTACH)
-    rpmalloc_thread_initialize();
-  else if (reason == DLL_THREAD_DETACH)
-    rpmalloc_thread_finalize(1);
-  return TRUE;
-}
-
-// end BUILD_DYNAMIC_LINK
-#else
-
-extern void _global_rpmalloc_init(void) {
-  rpmalloc_set_main_thread();
-  rpmalloc_initialize();
-}
-
-#if defined(__clang__) || defined(__GNUC__)
-
-static void __attribute__((constructor)) initializer(void) {
-  _global_rpmalloc_init();
-}
-
-#elif defined(_MSC_VER)
-
-static int _global_rpmalloc_xib(void) {
-  _global_rpmalloc_init();
-  return 0;
-}
-
-#pragma section(".CRT$XIB", read)
-__declspec(allocate(".CRT$XIB")) void (*_rpmalloc_module_init)(void) =
-    _global_rpmalloc_xib;
-#if defined(_M_IX86) || defined(__i386__)
-#pragma comment(linker, "/include:"                                            \
-                        "__rpmalloc_module_init")
-#else
-#pragma comment(linker, "/include:"                                            \
-                        "_rpmalloc_module_init")
-#endif
-
-#endif
-
-// end !BUILD_DYNAMIC_LINK
-#endif
-
-#else
-
-#include <pthread.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-extern void rpmalloc_set_main_thread(void);
-
-static pthread_key_t destructor_key;
-
-static void thread_destructor(void *);
-
-static void __attribute__((constructor)) initializer(void) {
-  rpmalloc_set_main_thread();
-  rpmalloc_initialize();
-  pthread_key_create(&destructor_key, thread_destructor);
-}
-
-static void __attribute__((destructor)) finalizer(void) { rpmalloc_finalize(); }
-
-typedef struct {
-  void *(*real_start)(void *);
-  void *real_arg;
-} thread_starter_arg;
-
-static void *thread_starter(void *argptr) {
-  thread_starter_arg *arg = argptr;
-  void *(*real_start)(void *) = arg->real_start;
-  void *real_arg = arg->real_arg;
-  rpmalloc_thread_initialize();
-  rpfree(argptr);
-  pthread_setspecific(destructor_key, (void *)1);
-  return (*real_start)(real_arg);
-}
-
-static void thread_destructor(void *value) {
-  (void)sizeof(value);
-  rpmalloc_thread_finalize(1);
-}
-
-#ifdef __APPLE__
-
-static int pthread_create_proxy(pthread_t *thread, const pthread_attr_t *attr,
-                                void *(*start_routine)(void *), void *arg) {
-  rpmalloc_initialize();
-  thread_starter_arg *starter_arg = rpmalloc(sizeof(thread_starter_arg));
-  starter_arg->real_start = start_routine;
-  starter_arg->real_arg = arg;
-  return pthread_create(thread, attr, thread_starter, starter_arg);
-}
-
-MAC_INTERPOSE_SINGLE(pthread_create_proxy, pthread_create);
-
-#else
-
-#include <dlfcn.h>
-
-int pthread_create(pthread_t *thread, const pthread_attr_t *attr,
-                   void *(*start_routine)(void *), void *arg) {
-#if defined(__linux__) || defined(__FreeBSD__) || defined(__OpenBSD__) ||      \
-    defined(__NetBSD__) || defined(__DragonFly__) || defined(__APPLE__) ||     \
-    defined(__HAIKU__)
-  char fname[] = "pthread_create";
-#else
-  char fname[] = "_pthread_create";
-#endif
-  void *real_pthread_create = dlsym(RTLD_NEXT, fname);
-  rpmalloc_thread_initialize();
-  thread_starter_arg *starter_arg = rpmalloc(sizeof(thread_starter_arg));
-  starter_arg->real_start = start_routine;
-  starter_arg->real_arg = arg;
-  return (*(int (*)(pthread_t *, const pthread_attr_t *, void *(*)(void *),
-                    void *))real_pthread_create)(thread, attr, thread_starter,
-                                                 starter_arg);
-}
-
-#endif
-
-#endif
-
-#endif
-
-#if ENABLE_OVERRIDE
-
-#if defined(__GLIBC__) && defined(__linux__)
-
-void *__libc_malloc(size_t size) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(1)
-        RPALIAS(rpmalloc) void *__libc_calloc(size_t count, size_t size)
-            RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE2(1, 2)
-                RPALIAS(rpcalloc) void *__libc_realloc(void *p, size_t size)
-                    RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(2) RPALIAS(rprealloc) void __libc_free(void *p)
-        RPALIAS(rpfree) void __libc_cfree(void *p)
-            RPALIAS(rpfree) void *__libc_memalign(size_t align, size_t size)
-                RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(2)
-        RPALIAS(rpmemalign) int __posix_memalign(void **p, size_t align,
-                                                 size_t size)
-            RPALIAS(rpposix_memalign)
-
-                extern void *__libc_valloc(size_t size);
-extern void *__libc_pvalloc(size_t size);
-
-void *__libc_valloc(size_t size) { return valloc(size); }
-
-void *__libc_pvalloc(size_t size) { return pvalloc(size); }
-
-#endif
-
-#endif
-
-#if (defined(__GNUC__) || defined(__clang__))
-#pragma GCC visibility pop
-#endif
+//===------------------------ malloc.c ------------------*- C -*-=============//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This library provides a cross-platform lock free thread caching malloc
+// implementation in C11.
+//
+//
+// This file provides overrides for the standard library malloc entry points for
+// C and new/delete operators for C++ It also provides automatic
+// initialization/finalization of process and threads
+//
+//===----------------------------------------------------------------------===//
+
+#if defined(__TINYC__)
+#include <sys/types.h>
+#endif
+
+#ifndef ARCH_64BIT
+#if defined(__LLP64__) || defined(__LP64__) || defined(_WIN64)
+#define ARCH_64BIT 1
+_Static_assert(sizeof(size_t) == 8, "Data type size mismatch");
+_Static_assert(sizeof(void *) == 8, "Data type size mismatch");
+#else
+#define ARCH_64BIT 0
+_Static_assert(sizeof(size_t) == 4, "Data type size mismatch");
+_Static_assert(sizeof(void *) == 4, "Data type size mismatch");
+#endif
+#endif
+
+#if (defined(__GNUC__) || defined(__clang__))
+#pragma GCC visibility push(default)
+#endif
+
+#define USE_IMPLEMENT 1
+#define USE_INTERPOSE 0
+#define USE_ALIAS 0
+
+#if defined(__APPLE__)
+#undef USE_INTERPOSE
+#define USE_INTERPOSE 1
+
+typedef struct interpose_t {
+  void *new_func;
+  void *orig_func;
+} interpose_t;
+
+#define MAC_INTERPOSE_PAIR(newf, oldf) {(void *)newf, (void *)oldf}
+#define MAC_INTERPOSE_SINGLE(newf, oldf)                                       \
+  __attribute__((used)) static const interpose_t macinterpose##newf##oldf      \
+      __attribute__((section("__DATA, __interpose"))) =                        \
+          MAC_INTERPOSE_PAIR(newf, oldf)
+
+#endif
+
+#if !defined(_WIN32) && !defined(__APPLE__)
+#undef USE_IMPLEMENT
+#undef USE_ALIAS
+#define USE_IMPLEMENT 0
+#define USE_ALIAS 1
+#endif
+
+#ifdef _MSC_VER
+#pragma warning(disable : 4100)
+#undef malloc
+#undef free
+#undef calloc
+#define RPMALLOC_RESTRICT __declspec(restrict)
+#else
+#define RPMALLOC_RESTRICT
+#endif
+
+#if ENABLE_OVERRIDE
+
+typedef struct rp_nothrow_t {
+  int __dummy;
+} rp_nothrow_t;
+
+#if USE_IMPLEMENT
+
+extern inline RPMALLOC_RESTRICT void *RPMALLOC_CDECL malloc(size_t size) {
+  return rpmalloc(size);
+}
+extern inline RPMALLOC_RESTRICT void *RPMALLOC_CDECL calloc(size_t count,
+                                                            size_t size) {
+  return rpcalloc(count, size);
+}
+extern inline RPMALLOC_RESTRICT void *RPMALLOC_CDECL realloc(void *ptr,
+                                                             size_t size) {
+  return rprealloc(ptr, size);
+}
+extern inline void *RPMALLOC_CDECL reallocf(void *ptr, size_t size) {
+  return rprealloc(ptr, size);
+}
+extern inline void *RPMALLOC_CDECL aligned_alloc(size_t alignment,
+                                                 size_t size) {
+  return rpaligned_alloc(alignment, size);
+}
+extern inline void *RPMALLOC_CDECL memalign(size_t alignment, size_t size) {
+  return rpmemalign(alignment, size);
+}
+extern inline int RPMALLOC_CDECL posix_memalign(void **memptr, size_t alignment,
+                                                size_t size) {
+  return rpposix_memalign(memptr, alignment, size);
+}
+extern inline void RPMALLOC_CDECL free(void *ptr) { rpfree(ptr); }
+extern inline void RPMALLOC_CDECL cfree(void *ptr) { rpfree(ptr); }
+extern inline size_t RPMALLOC_CDECL malloc_usable_size(void *ptr) {
+  return rpmalloc_usable_size(ptr);
+}
+extern inline size_t RPMALLOC_CDECL malloc_size(void *ptr) {
+  return rpmalloc_usable_size(ptr);
+}
+
+#ifdef _WIN32
+extern inline RPMALLOC_RESTRICT void *RPMALLOC_CDECL _malloc_base(size_t size) {
+  return rpmalloc(size);
+}
+extern inline void RPMALLOC_CDECL _free_base(void *ptr) { rpfree(ptr); }
+extern inline RPMALLOC_RESTRICT void *RPMALLOC_CDECL _calloc_base(size_t count,
+                                                                  size_t size) {
+  return rpcalloc(count, size);
+}
+extern inline size_t RPMALLOC_CDECL _msize(void *ptr) {
+  return rpmalloc_usable_size(ptr);
+}
+extern inline size_t RPMALLOC_CDECL _msize_base(void *ptr) {
+  return rpmalloc_usable_size(ptr);
+}
+extern inline RPMALLOC_RESTRICT void *RPMALLOC_CDECL
+_realloc_base(void *ptr, size_t size) {
+  return rprealloc(ptr, size);
+}
+#endif
+
+#ifdef _WIN32
+// For Windows, #include <rpnew.h> in one source file to get the C++ operator
+// overrides implemented in your module
+#else
+// Overload the C++ operators using the mangled names
+// (https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling) operators
+// delete and delete[]
+#define RPDEFVIS __attribute__((visibility("default")))
+extern void _ZdlPv(void *p);
+void RPDEFVIS _ZdlPv(void *p) { rpfree(p); }
+extern void _ZdaPv(void *p);
+void RPDEFVIS _ZdaPv(void *p) { rpfree(p); }
+#if ARCH_64BIT
+// 64-bit operators new and new[], normal and aligned
+extern void *_Znwm(uint64_t size);
+void *RPDEFVIS _Znwm(uint64_t size) { return rpmalloc(size); }
+extern void *_Znam(uint64_t size);
+void *RPDEFVIS _Znam(uint64_t size) { return rpmalloc(size); }
+extern void *_Znwmm(uint64_t size, uint64_t align);
+void *RPDEFVIS _Znwmm(uint64_t size, uint64_t align) {
+  return rpaligned_alloc(align, size);
+}
+extern void *_Znamm(uint64_t size, uint64_t align);
+void *RPDEFVIS _Znamm(uint64_t size, uint64_t align) {
+  return rpaligned_alloc(align, size);
+}
+extern void *_ZnwmSt11align_val_t(uint64_t size, uint64_t align);
+void *RPDEFVIS _ZnwmSt11align_val_t(uint64_t size, uint64_t align) {
+  return rpaligned_alloc(align, size);
+}
+extern void *_ZnamSt11align_val_t(uint64_t size, uint64_t align);
+void *RPDEFVIS _ZnamSt11align_val_t(uint64_t size, uint64_t align) {
+  return rpaligned_alloc(align, size);
+}
+extern void *_ZnwmRKSt9nothrow_t(uint64_t size, rp_nothrow_t t);
+void *RPDEFVIS _ZnwmRKSt9nothrow_t(uint64_t size, rp_nothrow_t t) {
+  (void)sizeof(t);
+  return rpmalloc(size);
+}
+extern void *_ZnamRKSt9nothrow_t(uint64_t size, rp_nothrow_t t);
+void *RPDEFVIS _ZnamRKSt9nothrow_t(uint64_t size, rp_nothrow_t t) {
+  (void)sizeof(t);
+  return rpmalloc(size);
+}
+extern void *_ZnwmSt11align_val_tRKSt9nothrow_t(uint64_t size, uint64_t align,
+                                                rp_nothrow_t t);
+void *RPDEFVIS _ZnwmSt11align_val_tRKSt9nothrow_t(uint64_t size, uint64_t align,
+                                                  rp_nothrow_t t) {
+  (void)sizeof(t);
+  return rpaligned_alloc(align, size);
+}
+extern void *_ZnamSt11align_val_tRKSt9nothrow_t(uint64_t size, uint64_t align,
+                                                rp_nothrow_t t);
+void *RPDEFVIS _ZnamSt11align_val_tRKSt9nothrow_t(uint64_t size, uint64_t align,
+                                                  rp_nothrow_t t) {
+  (void)sizeof(t);
+  return rpaligned_alloc(align, size);
+}
+// 64-bit operators sized delete and delete[], normal and aligned
+extern void _ZdlPvm(void *p, uint64_t size);
+void RPDEFVIS _ZdlPvm(void *p, uint64_t size) {
+  rpfree(p);
+  (void)sizeof(size);
+}
+extern void _ZdaPvm(void *p, uint64_t size);
+void RPDEFVIS _ZdaPvm(void *p, uint64_t size) {
+  rpfree(p);
+  (void)sizeof(size);
+}
+extern void _ZdlPvSt11align_val_t(void *p, uint64_t align);
+void RPDEFVIS _ZdlPvSt11align_val_t(void *p, uint64_t align) {
+  rpfree(p);
+  (void)sizeof(align);
+}
+extern void _ZdaPvSt11align_val_t(void *p, uint64_t align);
+void RPDEFVIS _ZdaPvSt11align_val_t(void *p, uint64_t align) {
+  rpfree(p);
+  (void)sizeof(align);
+}
+extern void _ZdlPvmSt11align_val_t(void *p, uint64_t size, uint64_t align);
+void RPDEFVIS _ZdlPvmSt11align_val_t(void *p, uint64_t size, uint64_t align) {
+  rpfree(p);
+  (void)sizeof(size);
+  (void)sizeof(align);
+}
+extern void _ZdaPvmSt11align_val_t(void *p, uint64_t size, uint64_t align);
+void RPDEFVIS _ZdaPvmSt11align_val_t(void *p, uint64_t size, uint64_t align) {
+  rpfree(p);
+  (void)sizeof(size);
+  (void)sizeof(align);
+}
+#else
+// 32-bit operators new and new[], normal and aligned
+extern void *_Znwj(uint32_t size);
+void *RPDEFVIS _Znwj(uint32_t size) { return rpmalloc(size); }
+extern void *_Znaj(uint32_t size);
+void *RPDEFVIS _Znaj(uint32_t size) { return rpmalloc(size); }
+extern void *_Znwjj(uint32_t size, uint32_t align);
+void *RPDEFVIS _Znwjj(uint32_t size, uint32_t align) {
+  return rpaligned_alloc(align, size);
+}
+extern void *_Znajj(uint32_t size, uint32_t align);
+void *RPDEFVIS _Znajj(uint32_t size, uint32_t align) {
+  return rpaligned_alloc(align, size);
+}
+extern void *_ZnwjSt11align_val_t(size_t size, size_t align);
+void *RPDEFVIS _ZnwjSt11align_val_t(size_t size, size_t align) {
+  return rpaligned_alloc(align, size);
+}
+extern void *_ZnajSt11align_val_t(size_t size, size_t align);
+void *RPDEFVIS _ZnajSt11align_val_t(size_t size, size_t align) {
+  return rpaligned_alloc(align, size);
+}
+extern void *_ZnwjRKSt9nothrow_t(size_t size, rp_nothrow_t t);
+void *RPDEFVIS _ZnwjRKSt9nothrow_t(size_t size, rp_nothrow_t t) {
+  (void)sizeof(t);
+  return rpmalloc(size);
+}
+extern void *_ZnajRKSt9nothrow_t(size_t size, rp_nothrow_t t);
+void *RPDEFVIS _ZnajRKSt9nothrow_t(size_t size, rp_nothrow_t t) {
+  (void)sizeof(t);
+  return rpmalloc(size);
+}
+extern void *_ZnwjSt11align_val_tRKSt9nothrow_t(size_t size, size_t align,
+                                                rp_nothrow_t t);
+void *RPDEFVIS _ZnwjSt11align_val_tRKSt9nothrow_t(size_t size, size_t align,
+                                                  rp_nothrow_t t) {
+  (void)sizeof(t);
+  return rpaligned_alloc(align, size);
+}
+extern void *_ZnajSt11align_val_tRKSt9nothrow_t(size_t size, size_t align,
+                                                rp_nothrow_t t);
+void *RPDEFVIS _ZnajSt11align_val_tRKSt9nothrow_t(size_t size, size_t align,
+                                                  rp_nothrow_t t) {
+  (void)sizeof(t);
+  return rpaligned_alloc(align, size);
+}
+// 32-bit operators sized delete and delete[], normal and aligned
+extern void _ZdlPvj(void *p, uint64_t size);
+void RPDEFVIS _ZdlPvj(void *p, uint64_t size) {
+  rpfree(p);
+  (void)sizeof(size);
+}
+extern void _ZdaPvj(void *p, uint64_t size);
+void RPDEFVIS _ZdaPvj(void *p, uint64_t size) {
+  rpfree(p);
+  (void)sizeof(size);
+}
+extern void _ZdlPvSt11align_val_t(void *p, uint32_t align);
+void RPDEFVIS _ZdlPvSt11align_val_t(void *p, uint64_t a) {
+  rpfree(p);
+  (void)sizeof(align);
+}
+extern void _ZdaPvSt11align_val_t(void *p, uint32_t align);
+void RPDEFVIS _ZdaPvSt11align_val_t(void *p, uint64_t a) {
+  rpfree(p);
+  (void)sizeof(align);
+}
+extern void _ZdlPvjSt11align_val_t(void *p, uint32_t size, uint32_t align);
+void RPDEFVIS _ZdlPvjSt11align_val_t(void *p, uint64_t size, uint64_t align) {
+  rpfree(p);
+  (void)sizeof(size);
+  (void)sizeof(a);
+}
+extern void _ZdaPvjSt11align_val_t(void *p, uint32_t size, uint32_t align);
+void RPDEFVIS _ZdaPvjSt11align_val_t(void *p, uint64_t size, uint64_t align) {
+  rpfree(p);
+  (void)sizeof(size);
+  (void)sizeof(a);
+}
+#endif
+#endif
+#endif
+
+#if USE_INTERPOSE || USE_ALIAS
+
+static void *rpmalloc_nothrow(size_t size, rp_nothrow_t t) {
+  (void)sizeof(t);
+  return rpmalloc(size);
+}
+static void *rpaligned_alloc_reverse(size_t size, size_t align) {
+  return rpaligned_alloc(align, size);
+}
+static void *rpaligned_alloc_reverse_nothrow(size_t size, size_t align,
+                                             rp_nothrow_t t) {
+  (void)sizeof(t);
+  return rpaligned_alloc(align, size);
+}
+static void rpfree_size(void *p, size_t size) {
+  (void)sizeof(size);
+  rpfree(p);
+}
+static void rpfree_aligned(void *p, size_t align) {
+  (void)sizeof(align);
+  rpfree(p);
+}
+static void rpfree_size_aligned(void *p, size_t size, size_t align) {
+  (void)sizeof(size);
+  (void)sizeof(align);
+  rpfree(p);
+}
+
+#endif
+
+#if USE_INTERPOSE
+
+__attribute__((used)) static const interpose_t macinterpose_malloc[]
+    __attribute__((section("__DATA, __interpose"))) = {
+        // new and new[]
+        MAC_INTERPOSE_PAIR(rpmalloc, _Znwm),
+        MAC_INTERPOSE_PAIR(rpmalloc, _Znam),
+        MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse, _Znwmm),
+        MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse, _Znamm),
+        MAC_INTERPOSE_PAIR(rpmalloc_nothrow, _ZnwmRKSt9nothrow_t),
+        MAC_INTERPOSE_PAIR(rpmalloc_nothrow, _ZnamRKSt9nothrow_t),
+        MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse, _ZnwmSt11align_val_t),
+        MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse, _ZnamSt11align_val_t),
+        MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse_nothrow,
+                           _ZnwmSt11align_val_tRKSt9nothrow_t),
+        MAC_INTERPOSE_PAIR(rpaligned_alloc_reverse_nothrow,
+                           _ZnamSt11align_val_tRKSt9nothrow_t),
+        // delete and delete[]
+        MAC_INTERPOSE_PAIR(rpfree, _ZdlPv), MAC_INTERPOSE_PAIR(rpfree, _ZdaPv),
+        MAC_INTERPOSE_PAIR(rpfree_size, _ZdlPvm),
+        MAC_INTERPOSE_PAIR(rpfree_size, _ZdaPvm),
+        MAC_INTERPOSE_PAIR(rpfree_aligned, _ZdlPvSt11align_val_t),
+        MAC_INTERPOSE_PAIR(rpfree_aligned, _ZdaPvSt11align_val_t),
+        MAC_INTERPOSE_PAIR(rpfree_size_aligned, _ZdlPvmSt11align_val_t),
+        MAC_INTERPOSE_PAIR(rpfree_size_aligned, _ZdaPvmSt11align_val_t),
+        // libc entry points
+        MAC_INTERPOSE_PAIR(rpmalloc, malloc),
+        MAC_INTERPOSE_PAIR(rpmalloc, calloc),
+        MAC_INTERPOSE_PAIR(rprealloc, realloc),
+        MAC_INTERPOSE_PAIR(rprealloc, reallocf),
+#if defined(__MAC_10_15) && __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_15
+        MAC_INTERPOSE_PAIR(rpaligned_alloc, aligned_alloc),
+#endif
+        MAC_INTERPOSE_PAIR(rpmemalign, memalign),
+        MAC_INTERPOSE_PAIR(rpposix_memalign, posix_memalign),
+        MAC_INTERPOSE_PAIR(rpfree, free), MAC_INTERPOSE_PAIR(rpfree, cfree),
+        MAC_INTERPOSE_PAIR(rpmalloc_usable_size, malloc_usable_size),
+        MAC_INTERPOSE_PAIR(rpmalloc_usable_size, malloc_size)};
+
+#endif
+
+#if USE_ALIAS
+
+#define RPALIAS(fn) __attribute__((alias(#fn), used, visibility("default")));
+
+// Alias the C++ operators using the mangled names
+// (https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling)
+
+// operators delete and delete[]
+void _ZdlPv(void *p) RPALIAS(rpfree) void _ZdaPv(void *p) RPALIAS(rpfree)
+
+#if ARCH_64BIT
+    // 64-bit operators new and new[], normal and aligned
+    void *_Znwm(uint64_t size) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(1)
+        RPALIAS(rpmalloc) void *_Znam(uint64_t size) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(1) RPALIAS(rpmalloc) void *_Znwmm(uint64_t size,
+                                                                 uint64_t align)
+        RPALIAS(rpaligned_alloc_reverse) void *_Znamm(uint64_t size,
+                                                      uint64_t align)
+            RPALIAS(rpaligned_alloc_reverse) void *_ZnwmSt11align_val_t(
+                size_t size, size_t align)
+                RPALIAS(rpaligned_alloc_reverse) void *_ZnamSt11align_val_t(
+                    size_t size, size_t align)
+                    RPALIAS(rpaligned_alloc_reverse) void *_ZnwmRKSt9nothrow_t(
+                        size_t size, rp_nothrow_t t)
+                        RPALIAS(rpmalloc_nothrow) void *_ZnamRKSt9nothrow_t(
+                            size_t size,
+                            rp_nothrow_t t) RPALIAS(rpmalloc_nothrow) void
+                            *_ZnwmSt11align_val_tRKSt9nothrow_t(size_t size,
+                                                                size_t align,
+                                                                rp_nothrow_t t)
+                                RPALIAS(rpaligned_alloc_reverse_nothrow) void
+                                    *_ZnamSt11align_val_tRKSt9nothrow_t(
+                                        size_t size, size_t align,
+                                        rp_nothrow_t t)
+                                        RPALIAS(rpaligned_alloc_reverse_nothrow)
+    // 64-bit operators delete and delete[], sized and aligned
+    void _ZdlPvm(void *p, size_t n) RPALIAS(rpfree_size) void _ZdaPvm(void *p,
+                                                                      size_t n)
+        RPALIAS(rpfree_size) void _ZdlPvSt11align_val_t(void *p, size_t a)
+            RPALIAS(rpfree_aligned) void _ZdaPvSt11align_val_t(void *p,
+                                                               size_t a)
+                RPALIAS(rpfree_aligned) void _ZdlPvmSt11align_val_t(void *p,
+                                                                    size_t n,
+                                                                    size_t a)
+                    RPALIAS(rpfree_size_aligned) void _ZdaPvmSt11align_val_t(
+                        void *p, size_t n, size_t a)
+                        RPALIAS(rpfree_size_aligned)
+#else
+    // 32-bit operators new and new[], normal and aligned
+    void *_Znwj(uint32_t size) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(1)
+        RPALIAS(rpmalloc) void *_Znaj(uint32_t size) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(1) RPALIAS(rpmalloc) void *_Znwjj(uint32_t size,
+                                                                 uint32_t align)
+        RPALIAS(rpaligned_alloc_reverse) void *_Znajj(uint32_t size,
+                                                      uint32_t align)
+            RPALIAS(rpaligned_alloc_reverse) void *_ZnwjSt11align_val_t(
+                size_t size, size_t align)
+                RPALIAS(rpaligned_alloc_reverse) void *_ZnajSt11align_val_t(
+                    size_t size, size_t align)
+                    RPALIAS(rpaligned_alloc_reverse) void *_ZnwjRKSt9nothrow_t(
+                        size_t size, rp_nothrow_t t)
+                        RPALIAS(rpmalloc_nothrow) void *_ZnajRKSt9nothrow_t(
+                            size_t size,
+                            rp_nothrow_t t) RPALIAS(rpmalloc_nothrow) void
+                            *_ZnwjSt11align_val_tRKSt9nothrow_t(size_t size,
+                                                                size_t align,
+                                                                rp_nothrow_t t)
+                                RPALIAS(rpaligned_alloc_reverse_nothrow) void
+                                    *_ZnajSt11align_val_tRKSt9nothrow_t(
+                                        size_t size, size_t align,
+                                        rp_nothrow_t t)
+                                        RPALIAS(rpaligned_alloc_reverse_nothrow)
+    // 32-bit operators delete and delete[], sized and aligned
+    void _ZdlPvj(void *p, size_t n) RPALIAS(rpfree_size) void _ZdaPvj(void *p,
+                                                                      size_t n)
+        RPALIAS(rpfree_size) void _ZdlPvSt11align_val_t(void *p, size_t a)
+            RPALIAS(rpfree_aligned) void _ZdaPvSt11align_val_t(void *p,
+                                                               size_t a)
+                RPALIAS(rpfree_aligned) void _ZdlPvjSt11align_val_t(void *p,
+                                                                    size_t n,
+                                                                    size_t a)
+                    RPALIAS(rpfree_size_aligned) void _ZdaPvjSt11align_val_t(
+                        void *p, size_t n, size_t a)
+                        RPALIAS(rpfree_size_aligned)
+#endif
+
+                            void *malloc(size_t size) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(1)
+        RPALIAS(rpmalloc) void *calloc(size_t count, size_t size)
+            RPALIAS(rpcalloc) void *realloc(void *ptr, size_t size)
+                RPALIAS(rprealloc) void *reallocf(void *ptr, size_t size)
+                    RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(2)
+        RPALIAS(rprealloc) void *aligned_alloc(size_t alignment, size_t size)
+            RPALIAS(rpaligned_alloc) void *memalign(
+                size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(2)
+        RPALIAS(rpmemalign) int posix_memalign(void **memptr, size_t alignment,
+                                               size_t size)
+            RPALIAS(rpposix_memalign) void free(void *ptr)
+                RPALIAS(rpfree) void cfree(void *ptr) RPALIAS(rpfree)
+#if defined(__ANDROID__) || defined(__FreeBSD__)
+                    size_t
+    malloc_usable_size(const void *ptr) RPALIAS(rpmalloc_usable_size)
+#else
+                    size_t
+    malloc_usable_size(void *ptr) RPALIAS(rpmalloc_usable_size)
+#endif
+        size_t malloc_size(void *ptr) RPALIAS(rpmalloc_usable_size)
+
+#endif
+
+            static inline size_t _rpmalloc_page_size(void) {
+  return _memory_page_size;
+}
+
+extern void *RPMALLOC_CDECL reallocarray(void *ptr, size_t count, size_t size);
+
+extern void *RPMALLOC_CDECL reallocarray(void *ptr, size_t count, size_t size) {
+  size_t total;
+#if ENABLE_VALIDATE_ARGS
+#ifdef _MSC_VER
+  int err = SizeTMult(count, size, &total);
+  if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
+    errno = EINVAL;
+    return 0;
+  }
+#else
+  int err = __builtin_umull_overflow(count, size, &total);
+  if (err || (total >= MAX_ALLOC_SIZE)) {
+    errno = EINVAL;
+    return 0;
+  }
+#endif
+#else
+  total = count * size;
+#endif
+  return realloc(ptr, total);
+}
+
+extern inline void *RPMALLOC_CDECL valloc(size_t size) {
+  get_thread_heap();
+  return rpaligned_alloc(_rpmalloc_page_size(), size);
+}
+
+extern inline void *RPMALLOC_CDECL pvalloc(size_t size) {
+  get_thread_heap();
+  const size_t page_size = _rpmalloc_page_size();
+  const size_t aligned_size = ((size + page_size - 1) / page_size) * page_size;
+#if ENABLE_VALIDATE_ARGS
+  if (aligned_size < size) {
+    errno = EINVAL;
+    return 0;
+  }
+#endif
+  return rpaligned_alloc(_rpmalloc_page_size(), aligned_size);
+}
+
+#endif // ENABLE_OVERRIDE
+
+#if ENABLE_PRELOAD
+
+#ifdef _WIN32
+
+#if defined(BUILD_DYNAMIC_LINK) && BUILD_DYNAMIC_LINK
+
+extern __declspec(dllexport) BOOL WINAPI DllMain(HINSTANCE instance,
+                                                 DWORD reason, LPVOID reserved);
+
+extern __declspec(dllexport) BOOL WINAPI DllMain(HINSTANCE instance,
+                                                 DWORD reason,
+                                                 LPVOID reserved) {
+  (void)sizeof(reserved);
+  (void)sizeof(instance);
+  if (reason == DLL_PROCESS_ATTACH)
+    rpmalloc_initialize();
+  else if (reason == DLL_PROCESS_DETACH)
+    rpmalloc_finalize();
+  else if (reason == DLL_THREAD_ATTACH)
+    rpmalloc_thread_initialize();
+  else if (reason == DLL_THREAD_DETACH)
+    rpmalloc_thread_finalize(1);
+  return TRUE;
+}
+
+// end BUILD_DYNAMIC_LINK
+#else
+
+extern void _global_rpmalloc_init(void) {
+  rpmalloc_set_main_thread();
+  rpmalloc_initialize();
+}
+
+#if defined(__clang__) || defined(__GNUC__)
+
+static void __attribute__((constructor)) initializer(void) {
+  _global_rpmalloc_init();
+}
+
+#elif defined(_MSC_VER)
+
+static int _global_rpmalloc_xib(void) {
+  _global_rpmalloc_init();
+  return 0;
+}
+
+#pragma section(".CRT$XIB", read)
+__declspec(allocate(".CRT$XIB")) void (*_rpmalloc_module_init)(void) =
+    _global_rpmalloc_xib;
+#if defined(_M_IX86) || defined(__i386__)
+#pragma comment(linker, "/include:"                                            \
+                        "__rpmalloc_module_init")
+#else
+#pragma comment(linker, "/include:"                                            \
+                        "_rpmalloc_module_init")
+#endif
+
+#endif
+
+// end !BUILD_DYNAMIC_LINK
+#endif
+
+#else
+
+#include <pthread.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+extern void rpmalloc_set_main_thread(void);
+
+static pthread_key_t destructor_key;
+
+static void thread_destructor(void *);
+
+static void __attribute__((constructor)) initializer(void) {
+  rpmalloc_set_main_thread();
+  rpmalloc_initialize();
+  pthread_key_create(&destructor_key, thread_destructor);
+}
+
+static void __attribute__((destructor)) finalizer(void) { rpmalloc_finalize(); }
+
+typedef struct {
+  void *(*real_start)(void *);
+  void *real_arg;
+} thread_starter_arg;
+
+static void *thread_starter(void *argptr) {
+  thread_starter_arg *arg = argptr;
+  void *(*real_start)(void *) = arg->real_start;
+  void *real_arg = arg->real_arg;
+  rpmalloc_thread_initialize();
+  rpfree(argptr);
+  pthread_setspecific(destructor_key, (void *)1);
+  return (*real_start)(real_arg);
+}
+
+static void thread_destructor(void *value) {
+  (void)sizeof(value);
+  rpmalloc_thread_finalize(1);
+}
+
+#ifdef __APPLE__
+
+static int pthread_create_proxy(pthread_t *thread, const pthread_attr_t *attr,
+                                void *(*start_routine)(void *), void *arg) {
+  rpmalloc_initialize();
+  thread_starter_arg *starter_arg = rpmalloc(sizeof(thread_starter_arg));
+  starter_arg->real_start = start_routine;
+  starter_arg->real_arg = arg;
+  return pthread_create(thread, attr, thread_starter, starter_arg);
+}
+
+MAC_INTERPOSE_SINGLE(pthread_create_proxy, pthread_create);
+
+#else
+
+#include <dlfcn.h>
+
+int pthread_create(pthread_t *thread, const pthread_attr_t *attr,
+                   void *(*start_routine)(void *), void *arg) {
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__OpenBSD__) ||      \
+    defined(__NetBSD__) || defined(__DragonFly__) || defined(__APPLE__) ||     \
+    defined(__HAIKU__)
+  char fname[] = "pthread_create";
+#else
+  char fname[] = "_pthread_create";
+#endif
+  void *real_pthread_create = dlsym(RTLD_NEXT, fname);
+  rpmalloc_thread_initialize();
+  thread_starter_arg *starter_arg = rpmalloc(sizeof(thread_starter_arg));
+  starter_arg->real_start = start_routine;
+  starter_arg->real_arg = arg;
+  return (*(int (*)(pthread_t *, const pthread_attr_t *, void *(*)(void *),
+                    void *))real_pthread_create)(thread, attr, thread_starter,
+                                                 starter_arg);
+}
+
+#endif
+
+#endif
+
+#endif
+
+#if ENABLE_OVERRIDE
+
+#if defined(__GLIBC__) && defined(__linux__)
+
+void *__libc_malloc(size_t size) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(1)
+        RPALIAS(rpmalloc) void *__libc_calloc(size_t count, size_t size)
+            RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE2(1, 2)
+                RPALIAS(rpcalloc) void *__libc_realloc(void *p, size_t size)
+                    RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(2) RPALIAS(rprealloc) void __libc_free(void *p)
+        RPALIAS(rpfree) void __libc_cfree(void *p)
+            RPALIAS(rpfree) void *__libc_memalign(size_t align, size_t size)
+                RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(2)
+        RPALIAS(rpmemalign) int __posix_memalign(void **p, size_t align,
+                                                 size_t size)
+            RPALIAS(rpposix_memalign)
+
+                extern void *__libc_valloc(size_t size);
+extern void *__libc_pvalloc(size_t size);
+
+void *__libc_valloc(size_t size) { return valloc(size); }
+
+void *__libc_pvalloc(size_t size) { return pvalloc(size); }
+
+#endif
+
+#endif
+
+#if (defined(__GNUC__) || defined(__clang__))
+#pragma GCC visibility pop
+#endif
diff --git a/llvm/lib/Support/rpmalloc/rpmalloc.c b/llvm/lib/Support/rpmalloc/rpmalloc.c
index 0976ec8ae6af4e94af86cd1ae6118266c22f5678..a06d3cdb5b52ef25e54bf6f595d4d5ab300a26d0 100644
--- a/llvm/lib/Support/rpmalloc/rpmalloc.c
+++ b/llvm/lib/Support/rpmalloc/rpmalloc.c
@@ -1,3992 +1,3992 @@
-//===---------------------- rpmalloc.c ------------------*- C -*-=============//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This library provides a cross-platform lock free thread caching malloc
-// implementation in C11.
-//
-//===----------------------------------------------------------------------===//
-
-#include "rpmalloc.h"
-
-////////////
-///
-/// Build time configurable limits
-///
-//////
-
-#if defined(__clang__)
-#pragma clang diagnostic ignored "-Wunused-macros"
-#pragma clang diagnostic ignored "-Wunused-function"
-#if __has_warning("-Wreserved-identifier")
-#pragma clang diagnostic ignored "-Wreserved-identifier"
-#endif
-#if __has_warning("-Wstatic-in-inline")
-#pragma clang diagnostic ignored "-Wstatic-in-inline"
-#endif
-#elif defined(__GNUC__)
-#pragma GCC diagnostic ignored "-Wunused-macros"
-#pragma GCC diagnostic ignored "-Wunused-function"
-#endif
-
-#if !defined(__has_builtin)
-#define __has_builtin(b) 0
-#endif
-
-#if defined(__GNUC__) || defined(__clang__)
-
-#if __has_builtin(__builtin_memcpy_inline)
-#define _rpmalloc_memcpy_const(x, y, s) __builtin_memcpy_inline(x, y, s)
-#else
-#define _rpmalloc_memcpy_const(x, y, s)                                        \
-  do {                                                                         \
-    _Static_assert(__builtin_choose_expr(__builtin_constant_p(s), 1, 0),       \
-                   "len must be a constant integer");                          \
-    memcpy(x, y, s);                                                           \
-  } while (0)
-#endif
-
-#if __has_builtin(__builtin_memset_inline)
-#define _rpmalloc_memset_const(x, y, s) __builtin_memset_inline(x, y, s)
-#else
-#define _rpmalloc_memset_const(x, y, s)                                        \
-  do {                                                                         \
-    _Static_assert(__builtin_choose_expr(__builtin_constant_p(s), 1, 0),       \
-                   "len must be a constant integer");                          \
-    memset(x, y, s);                                                           \
-  } while (0)
-#endif
-#else
-#define _rpmalloc_memcpy_const(x, y, s) memcpy(x, y, s)
-#define _rpmalloc_memset_const(x, y, s) memset(x, y, s)
-#endif
-
-#if __has_builtin(__builtin_assume)
-#define rpmalloc_assume(cond) __builtin_assume(cond)
-#elif defined(__GNUC__)
-#define rpmalloc_assume(cond)                                                  \
-  do {                                                                         \
-    if (!__builtin_expect(cond, 0))                                            \
-      __builtin_unreachable();                                                 \
-  } while (0)
-#elif defined(_MSC_VER)
-#define rpmalloc_assume(cond) __assume(cond)
-#else
-#define rpmalloc_assume(cond) 0
-#endif
-
-#ifndef HEAP_ARRAY_SIZE
-//! Size of heap hashmap
-#define HEAP_ARRAY_SIZE 47
-#endif
-#ifndef ENABLE_THREAD_CACHE
-//! Enable per-thread cache
-#define ENABLE_THREAD_CACHE 1
-#endif
-#ifndef ENABLE_GLOBAL_CACHE
-//! Enable global cache shared between all threads, requires thread cache
-#define ENABLE_GLOBAL_CACHE 1
-#endif
-#ifndef ENABLE_VALIDATE_ARGS
-//! Enable validation of args to public entry points
-#define ENABLE_VALIDATE_ARGS 0
-#endif
-#ifndef ENABLE_STATISTICS
-//! Enable statistics collection
-#define ENABLE_STATISTICS 0
-#endif
-#ifndef ENABLE_ASSERTS
-//! Enable asserts
-#define ENABLE_ASSERTS 0
-#endif
-#ifndef ENABLE_OVERRIDE
-//! Override standard library malloc/free and new/delete entry points
-#define ENABLE_OVERRIDE 0
-#endif
-#ifndef ENABLE_PRELOAD
-//! Support preloading
-#define ENABLE_PRELOAD 0
-#endif
-#ifndef DISABLE_UNMAP
-//! Disable unmapping memory pages (also enables unlimited cache)
-#define DISABLE_UNMAP 0
-#endif
-#ifndef ENABLE_UNLIMITED_CACHE
-//! Enable unlimited global cache (no unmapping until finalization)
-#define ENABLE_UNLIMITED_CACHE 0
-#endif
-#ifndef ENABLE_ADAPTIVE_THREAD_CACHE
-//! Enable adaptive thread cache size based on use heuristics
-#define ENABLE_ADAPTIVE_THREAD_CACHE 0
-#endif
-#ifndef DEFAULT_SPAN_MAP_COUNT
-//! Default number of spans to map in call to map more virtual memory (default
-//! values yield 4MiB here)
-#define DEFAULT_SPAN_MAP_COUNT 64
-#endif
-#ifndef GLOBAL_CACHE_MULTIPLIER
-//! Multiplier for global cache
-#define GLOBAL_CACHE_MULTIPLIER 8
-#endif
-
-#if DISABLE_UNMAP && !ENABLE_GLOBAL_CACHE
-#error Must use global cache if unmap is disabled
-#endif
-
-#if DISABLE_UNMAP
-#undef ENABLE_UNLIMITED_CACHE
-#define ENABLE_UNLIMITED_CACHE 1
-#endif
-
-#if !ENABLE_GLOBAL_CACHE
-#undef ENABLE_UNLIMITED_CACHE
-#define ENABLE_UNLIMITED_CACHE 0
-#endif
-
-#if !ENABLE_THREAD_CACHE
-#undef ENABLE_ADAPTIVE_THREAD_CACHE
-#define ENABLE_ADAPTIVE_THREAD_CACHE 0
-#endif
-
-#if defined(_WIN32) || defined(__WIN32__) || defined(_WIN64)
-#define PLATFORM_WINDOWS 1
-#define PLATFORM_POSIX 0
-#else
-#define PLATFORM_WINDOWS 0
-#define PLATFORM_POSIX 1
-#endif
-
-/// Platform and arch specifics
-#if defined(_MSC_VER) && !defined(__clang__)
-#pragma warning(disable : 5105)
-#ifndef FORCEINLINE
-#define FORCEINLINE inline __forceinline
-#endif
-#define _Static_assert static_assert
-#else
-#ifndef FORCEINLINE
-#define FORCEINLINE inline __attribute__((__always_inline__))
-#endif
-#endif
-#if PLATFORM_WINDOWS
-#ifndef WIN32_LEAN_AND_MEAN
-#define WIN32_LEAN_AND_MEAN
-#endif
-#include <windows.h>
-#if ENABLE_VALIDATE_ARGS
-#include <intsafe.h>
-#endif
-#else
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-#include <unistd.h>
-#if defined(__linux__) || defined(__ANDROID__)
-#include <sys/prctl.h>
-#if !defined(PR_SET_VMA)
-#define PR_SET_VMA 0x53564d41
-#define PR_SET_VMA_ANON_NAME 0
-#endif
-#endif
-#if defined(__APPLE__)
-#include <TargetConditionals.h>
-#if !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
-#include <mach/mach_vm.h>
-#include <mach/vm_statistics.h>
-#endif
-#include <pthread.h>
-#endif
-#if defined(__HAIKU__) || defined(__TINYC__)
-#include <pthread.h>
-#endif
-#endif
-
-#include <errno.h>
-#include <stdint.h>
-#include <string.h>
-
-#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
-#include <fibersapi.h>
-static DWORD fls_key;
-#endif
-
-#if PLATFORM_POSIX
-#include <sched.h>
-#include <sys/mman.h>
-#ifdef __FreeBSD__
-#include <sys/sysctl.h>
-#define MAP_HUGETLB MAP_ALIGNED_SUPER
-#ifndef PROT_MAX
-#define PROT_MAX(f) 0
-#endif
-#else
-#define PROT_MAX(f) 0
-#endif
-#ifdef __sun
-extern int madvise(caddr_t, size_t, int);
-#endif
-#ifndef MAP_UNINITIALIZED
-#define MAP_UNINITIALIZED 0
-#endif
-#endif
-#include <errno.h>
-
-#if ENABLE_ASSERTS
-#undef NDEBUG
-#if defined(_MSC_VER) && !defined(_DEBUG)
-#define _DEBUG
-#endif
-#include <assert.h>
-#define RPMALLOC_TOSTRING_M(x) #x
-#define RPMALLOC_TOSTRING(x) RPMALLOC_TOSTRING_M(x)
-#define rpmalloc_assert(truth, message)                                        \
-  do {                                                                         \
-    if (!(truth)) {                                                            \
-      if (_memory_config.error_callback) {                                     \
-        _memory_config.error_callback(message " (" RPMALLOC_TOSTRING(          \
-            truth) ") at " __FILE__ ":" RPMALLOC_TOSTRING(__LINE__));          \
-      } else {                                                                 \
-        assert((truth) && message);                                            \
-      }                                                                        \
-    }                                                                          \
-  } while (0)
-#else
-#define rpmalloc_assert(truth, message)                                        \
-  do {                                                                         \
-  } while (0)
-#endif
-#if ENABLE_STATISTICS
-#include <stdio.h>
-#endif
-
-//////
-///
-/// Atomic access abstraction (since MSVC does not do C11 yet)
-///
-//////
-
-#if defined(_MSC_VER) && !defined(__clang__)
-
-typedef volatile long atomic32_t;
-typedef volatile long long atomic64_t;
-typedef volatile void *atomicptr_t;
-
-static FORCEINLINE int32_t atomic_load32(atomic32_t *src) { return *src; }
-static FORCEINLINE void atomic_store32(atomic32_t *dst, int32_t val) {
-  *dst = val;
-}
-static FORCEINLINE int32_t atomic_incr32(atomic32_t *val) {
-  return (int32_t)InterlockedIncrement(val);
-}
-static FORCEINLINE int32_t atomic_decr32(atomic32_t *val) {
-  return (int32_t)InterlockedDecrement(val);
-}
-static FORCEINLINE int32_t atomic_add32(atomic32_t *val, int32_t add) {
-  return (int32_t)InterlockedExchangeAdd(val, add) + add;
-}
-static FORCEINLINE int atomic_cas32_acquire(atomic32_t *dst, int32_t val,
-                                            int32_t ref) {
-  return (InterlockedCompareExchange(dst, val, ref) == ref) ? 1 : 0;
-}
-static FORCEINLINE void atomic_store32_release(atomic32_t *dst, int32_t val) {
-  *dst = val;
-}
-static FORCEINLINE int64_t atomic_load64(atomic64_t *src) { return *src; }
-static FORCEINLINE int64_t atomic_add64(atomic64_t *val, int64_t add) {
-  return (int64_t)InterlockedExchangeAdd64(val, add) + add;
-}
-static FORCEINLINE void *atomic_load_ptr(atomicptr_t *src) {
-  return (void *)*src;
-}
-static FORCEINLINE void atomic_store_ptr(atomicptr_t *dst, void *val) {
-  *dst = val;
-}
-static FORCEINLINE void atomic_store_ptr_release(atomicptr_t *dst, void *val) {
-  *dst = val;
-}
-static FORCEINLINE void *atomic_exchange_ptr_acquire(atomicptr_t *dst,
-                                                     void *val) {
-  return (void *)InterlockedExchangePointer((void *volatile *)dst, val);
-}
-static FORCEINLINE int atomic_cas_ptr(atomicptr_t *dst, void *val, void *ref) {
-  return (InterlockedCompareExchangePointer((void *volatile *)dst, val, ref) ==
-          ref)
-             ? 1
-             : 0;
-}
-
-#define EXPECTED(x) (x)
-#define UNEXPECTED(x) (x)
-
-#else
-
-#include <stdatomic.h>
-
-typedef volatile _Atomic(int32_t) atomic32_t;
-typedef volatile _Atomic(int64_t) atomic64_t;
-typedef volatile _Atomic(void *) atomicptr_t;
-
-static FORCEINLINE int32_t atomic_load32(atomic32_t *src) {
-  return atomic_load_explicit(src, memory_order_relaxed);
-}
-static FORCEINLINE void atomic_store32(atomic32_t *dst, int32_t val) {
-  atomic_store_explicit(dst, val, memory_order_relaxed);
-}
-static FORCEINLINE int32_t atomic_incr32(atomic32_t *val) {
-  return atomic_fetch_add_explicit(val, 1, memory_order_relaxed) + 1;
-}
-static FORCEINLINE int32_t atomic_decr32(atomic32_t *val) {
-  return atomic_fetch_add_explicit(val, -1, memory_order_relaxed) - 1;
-}
-static FORCEINLINE int32_t atomic_add32(atomic32_t *val, int32_t add) {
-  return atomic_fetch_add_explicit(val, add, memory_order_relaxed) + add;
-}
-static FORCEINLINE int atomic_cas32_acquire(atomic32_t *dst, int32_t val,
-                                            int32_t ref) {
-  return atomic_compare_exchange_weak_explicit(
-      dst, &ref, val, memory_order_acquire, memory_order_relaxed);
-}
-static FORCEINLINE void atomic_store32_release(atomic32_t *dst, int32_t val) {
-  atomic_store_explicit(dst, val, memory_order_release);
-}
-static FORCEINLINE int64_t atomic_load64(atomic64_t *val) {
-  return atomic_load_explicit(val, memory_order_relaxed);
-}
-static FORCEINLINE int64_t atomic_add64(atomic64_t *val, int64_t add) {
-  return atomic_fetch_add_explicit(val, add, memory_order_relaxed) + add;
-}
-static FORCEINLINE void *atomic_load_ptr(atomicptr_t *src) {
-  return atomic_load_explicit(src, memory_order_relaxed);
-}
-static FORCEINLINE void atomic_store_ptr(atomicptr_t *dst, void *val) {
-  atomic_store_explicit(dst, val, memory_order_relaxed);
-}
-static FORCEINLINE void atomic_store_ptr_release(atomicptr_t *dst, void *val) {
-  atomic_store_explicit(dst, val, memory_order_release);
-}
-static FORCEINLINE void *atomic_exchange_ptr_acquire(atomicptr_t *dst,
-                                                     void *val) {
-  return atomic_exchange_explicit(dst, val, memory_order_acquire);
-}
-static FORCEINLINE int atomic_cas_ptr(atomicptr_t *dst, void *val, void *ref) {
-  return atomic_compare_exchange_weak_explicit(
-      dst, &ref, val, memory_order_relaxed, memory_order_relaxed);
-}
-
-#define EXPECTED(x) __builtin_expect((x), 1)
-#define UNEXPECTED(x) __builtin_expect((x), 0)
-
-#endif
-
-////////////
-///
-/// Statistics related functions (evaluate to nothing when statistics not
-/// enabled)
-///
-//////
-
-#if ENABLE_STATISTICS
-#define _rpmalloc_stat_inc(counter) atomic_incr32(counter)
-#define _rpmalloc_stat_dec(counter) atomic_decr32(counter)
-#define _rpmalloc_stat_add(counter, value)                                     \
-  atomic_add32(counter, (int32_t)(value))
-#define _rpmalloc_stat_add64(counter, value)                                   \
-  atomic_add64(counter, (int64_t)(value))
-#define _rpmalloc_stat_add_peak(counter, value, peak)                          \
-  do {                                                                         \
-    int32_t _cur_count = atomic_add32(counter, (int32_t)(value));              \
-    if (_cur_count > (peak))                                                   \
-      peak = _cur_count;                                                       \
-  } while (0)
-#define _rpmalloc_stat_sub(counter, value)                                     \
-  atomic_add32(counter, -(int32_t)(value))
-#define _rpmalloc_stat_inc_alloc(heap, class_idx)                              \
-  do {                                                                         \
-    int32_t alloc_current =                                                    \
-        atomic_incr32(&heap->size_class_use[class_idx].alloc_current);         \
-    if (alloc_current > heap->size_class_use[class_idx].alloc_peak)            \
-      heap->size_class_use[class_idx].alloc_peak = alloc_current;              \
-    atomic_incr32(&heap->size_class_use[class_idx].alloc_total);               \
-  } while (0)
-#define _rpmalloc_stat_inc_free(heap, class_idx)                               \
-  do {                                                                         \
-    atomic_decr32(&heap->size_class_use[class_idx].alloc_current);             \
-    atomic_incr32(&heap->size_class_use[class_idx].free_total);                \
-  } while (0)
-#else
-#define _rpmalloc_stat_inc(counter)                                            \
-  do {                                                                         \
-  } while (0)
-#define _rpmalloc_stat_dec(counter)                                            \
-  do {                                                                         \
-  } while (0)
-#define _rpmalloc_stat_add(counter, value)                                     \
-  do {                                                                         \
-  } while (0)
-#define _rpmalloc_stat_add64(counter, value)                                   \
-  do {                                                                         \
-  } while (0)
-#define _rpmalloc_stat_add_peak(counter, value, peak)                          \
-  do {                                                                         \
-  } while (0)
-#define _rpmalloc_stat_sub(counter, value)                                     \
-  do {                                                                         \
-  } while (0)
-#define _rpmalloc_stat_inc_alloc(heap, class_idx)                              \
-  do {                                                                         \
-  } while (0)
-#define _rpmalloc_stat_inc_free(heap, class_idx)                               \
-  do {                                                                         \
-  } while (0)
-#endif
-
-///
-/// Preconfigured limits and sizes
-///
-
-//! Granularity of a small allocation block (must be power of two)
-#define SMALL_GRANULARITY 16
-//! Small granularity shift count
-#define SMALL_GRANULARITY_SHIFT 4
-//! Number of small block size classes
-#define SMALL_CLASS_COUNT 65
-//! Maximum size of a small block
-#define SMALL_SIZE_LIMIT (SMALL_GRANULARITY * (SMALL_CLASS_COUNT - 1))
-//! Granularity of a medium allocation block
-#define MEDIUM_GRANULARITY 512
-//! Medium granularity shift count
-#define MEDIUM_GRANULARITY_SHIFT 9
-//! Number of medium block size classes
-#define MEDIUM_CLASS_COUNT 61
-//! Total number of small + medium size classes
-#define SIZE_CLASS_COUNT (SMALL_CLASS_COUNT + MEDIUM_CLASS_COUNT)
-//! Number of large block size classes
-#define LARGE_CLASS_COUNT 63
-//! Maximum size of a medium block
-#define MEDIUM_SIZE_LIMIT                                                      \
-  (SMALL_SIZE_LIMIT + (MEDIUM_GRANULARITY * MEDIUM_CLASS_COUNT))
-//! Maximum size of a large block
-#define LARGE_SIZE_LIMIT                                                       \
-  ((LARGE_CLASS_COUNT * _memory_span_size) - SPAN_HEADER_SIZE)
-//! Size of a span header (must be a multiple of SMALL_GRANULARITY and a power
-//! of two)
-#define SPAN_HEADER_SIZE 128
-//! Number of spans in thread cache
-#define MAX_THREAD_SPAN_CACHE 400
-//! Number of spans to transfer between thread and global cache
-#define THREAD_SPAN_CACHE_TRANSFER 64
-//! Number of spans in thread cache for large spans (must be greater than
-//! LARGE_CLASS_COUNT / 2)
-#define MAX_THREAD_SPAN_LARGE_CACHE 100
-//! Number of spans to transfer between thread and global cache for large spans
-#define THREAD_SPAN_LARGE_CACHE_TRANSFER 6
-
-_Static_assert((SMALL_GRANULARITY & (SMALL_GRANULARITY - 1)) == 0,
-               "Small granularity must be power of two");
-_Static_assert((SPAN_HEADER_SIZE & (SPAN_HEADER_SIZE - 1)) == 0,
-               "Span header size must be power of two");
-
-#if ENABLE_VALIDATE_ARGS
-//! Maximum allocation size to avoid integer overflow
-#undef MAX_ALLOC_SIZE
-#define MAX_ALLOC_SIZE (((size_t) - 1) - _memory_span_size)
-#endif
-
-#define pointer_offset(ptr, ofs) (void *)((char *)(ptr) + (ptrdiff_t)(ofs))
-#define pointer_diff(first, second)                                            \
-  (ptrdiff_t)((const char *)(first) - (const char *)(second))
-
-#define INVALID_POINTER ((void *)((uintptr_t) - 1))
-
-#define SIZE_CLASS_LARGE SIZE_CLASS_COUNT
-#define SIZE_CLASS_HUGE ((uint32_t) - 1)
-
-////////////
-///
-/// Data types
-///
-//////
-
-//! A memory heap, per thread
-typedef struct heap_t heap_t;
-//! Span of memory pages
-typedef struct span_t span_t;
-//! Span list
-typedef struct span_list_t span_list_t;
-//! Span active data
-typedef struct span_active_t span_active_t;
-//! Size class definition
-typedef struct size_class_t size_class_t;
-//! Global cache
-typedef struct global_cache_t global_cache_t;
-
-//! Flag indicating span is the first (master) span of a split superspan
-#define SPAN_FLAG_MASTER 1U
-//! Flag indicating span is a secondary (sub) span of a split superspan
-#define SPAN_FLAG_SUBSPAN 2U
-//! Flag indicating span has blocks with increased alignment
-#define SPAN_FLAG_ALIGNED_BLOCKS 4U
-//! Flag indicating an unmapped master span
-#define SPAN_FLAG_UNMAPPED_MASTER 8U
-
-#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
-struct span_use_t {
-  //! Current number of spans used (actually used, not in cache)
-  atomic32_t current;
-  //! High water mark of spans used
-  atomic32_t high;
-#if ENABLE_STATISTICS
-  //! Number of spans in deferred list
-  atomic32_t spans_deferred;
-  //! Number of spans transitioned to global cache
-  atomic32_t spans_to_global;
-  //! Number of spans transitioned from global cache
-  atomic32_t spans_from_global;
-  //! Number of spans transitioned to thread cache
-  atomic32_t spans_to_cache;
-  //! Number of spans transitioned from thread cache
-  atomic32_t spans_from_cache;
-  //! Number of spans transitioned to reserved state
-  atomic32_t spans_to_reserved;
-  //! Number of spans transitioned from reserved state
-  atomic32_t spans_from_reserved;
-  //! Number of raw memory map calls
-  atomic32_t spans_map_calls;
-#endif
-};
-typedef struct span_use_t span_use_t;
-#endif
-
-#if ENABLE_STATISTICS
-struct size_class_use_t {
-  //! Current number of allocations
-  atomic32_t alloc_current;
-  //! Peak number of allocations
-  int32_t alloc_peak;
-  //! Total number of allocations
-  atomic32_t alloc_total;
-  //! Total number of frees
-  atomic32_t free_total;
-  //! Number of spans in use
-  atomic32_t spans_current;
-  //! Number of spans transitioned to cache
-  int32_t spans_peak;
-  //! Number of spans transitioned to cache
-  atomic32_t spans_to_cache;
-  //! Number of spans transitioned from cache
-  atomic32_t spans_from_cache;
-  //! Number of spans transitioned from reserved state
-  atomic32_t spans_from_reserved;
-  //! Number of spans mapped
-  atomic32_t spans_map_calls;
-  int32_t unused;
-};
-typedef struct size_class_use_t size_class_use_t;
-#endif
-
-// A span can either represent a single span of memory pages with size declared
-// by span_map_count configuration variable, or a set of spans in a continuous
-// region, a super span. Any reference to the term "span" usually refers to both
-// a single span or a super span. A super span can further be divided into
-// multiple spans (or this, super spans), where the first (super)span is the
-// master and subsequent (super)spans are subspans. The master span keeps track
-// of how many subspans that are still alive and mapped in virtual memory, and
-// once all subspans and master have been unmapped the entire superspan region
-// is released and unmapped (on Windows for example, the entire superspan range
-// has to be released in the same call to release the virtual memory range, but
-// individual subranges can be decommitted individually to reduce physical
-// memory use).
-struct span_t {
-  //! Free list
-  void *free_list;
-  //! Total block count of size class
-  uint32_t block_count;
-  //! Size class
-  uint32_t size_class;
-  //! Index of last block initialized in free list
-  uint32_t free_list_limit;
-  //! Number of used blocks remaining when in partial state
-  uint32_t used_count;
-  //! Deferred free list
-  atomicptr_t free_list_deferred;
-  //! Size of deferred free list, or list of spans when part of a cache list
-  uint32_t list_size;
-  //! Size of a block
-  uint32_t block_size;
-  //! Flags and counters
-  uint32_t flags;
-  //! Number of spans
-  uint32_t span_count;
-  //! Total span counter for master spans
-  uint32_t total_spans;
-  //! Offset from master span for subspans
-  uint32_t offset_from_master;
-  //! Remaining span counter, for master spans
-  atomic32_t remaining_spans;
-  //! Alignment offset
-  uint32_t align_offset;
-  //! Owning heap
-  heap_t *heap;
-  //! Next span
-  span_t *next;
-  //! Previous span
-  span_t *prev;
-};
-_Static_assert(sizeof(span_t) <= SPAN_HEADER_SIZE, "span size mismatch");
-
-struct span_cache_t {
-  size_t count;
-  span_t *span[MAX_THREAD_SPAN_CACHE];
-};
-typedef struct span_cache_t span_cache_t;
-
-struct span_large_cache_t {
-  size_t count;
-  span_t *span[MAX_THREAD_SPAN_LARGE_CACHE];
-};
-typedef struct span_large_cache_t span_large_cache_t;
-
-struct heap_size_class_t {
-  //! Free list of active span
-  void *free_list;
-  //! Double linked list of partially used spans with free blocks.
-  //  Previous span pointer in head points to tail span of list.
-  span_t *partial_span;
-  //! Early level cache of fully free spans
-  span_t *cache;
-};
-typedef struct heap_size_class_t heap_size_class_t;
-
-// Control structure for a heap, either a thread heap or a first class heap if
-// enabled
-struct heap_t {
-  //! Owning thread ID
-  uintptr_t owner_thread;
-  //! Free lists for each size class
-  heap_size_class_t size_class[SIZE_CLASS_COUNT];
-#if ENABLE_THREAD_CACHE
-  //! Arrays of fully freed spans, single span
-  span_cache_t span_cache;
-#endif
-  //! List of deferred free spans (single linked list)
-  atomicptr_t span_free_deferred;
-  //! Number of full spans
-  size_t full_span_count;
-  //! Mapped but unused spans
-  span_t *span_reserve;
-  //! Master span for mapped but unused spans
-  span_t *span_reserve_master;
-  //! Number of mapped but unused spans
-  uint32_t spans_reserved;
-  //! Child count
-  atomic32_t child_count;
-  //! Next heap in id list
-  heap_t *next_heap;
-  //! Next heap in orphan list
-  heap_t *next_orphan;
-  //! Heap ID
-  int32_t id;
-  //! Finalization state flag
-  int finalize;
-  //! Master heap owning the memory pages
-  heap_t *master_heap;
-#if ENABLE_THREAD_CACHE
-  //! Arrays of fully freed spans, large spans with > 1 span count
-  span_large_cache_t span_large_cache[LARGE_CLASS_COUNT - 1];
-#endif
-#if RPMALLOC_FIRST_CLASS_HEAPS
-  //! Double linked list of fully utilized spans with free blocks for each size
-  //! class.
-  //  Previous span pointer in head points to tail span of list.
-  span_t *full_span[SIZE_CLASS_COUNT];
-  //! Double linked list of large and huge spans allocated by this heap
-  span_t *large_huge_span;
-#endif
-#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
-  //! Current and high water mark of spans used per span count
-  span_use_t span_use[LARGE_CLASS_COUNT];
-#endif
-#if ENABLE_STATISTICS
-  //! Allocation stats per size class
-  size_class_use_t size_class_use[SIZE_CLASS_COUNT + 1];
-  //! Number of bytes transitioned thread -> global
-  atomic64_t thread_to_global;
-  //! Number of bytes transitioned global -> thread
-  atomic64_t global_to_thread;
-#endif
-};
-
-// Size class for defining a block size bucket
-struct size_class_t {
-  //! Size of blocks in this class
-  uint32_t block_size;
-  //! Number of blocks in each chunk
-  uint16_t block_count;
-  //! Class index this class is merged with
-  uint16_t class_idx;
-};
-_Static_assert(sizeof(size_class_t) == 8, "Size class size mismatch");
-
-struct global_cache_t {
-  //! Cache lock
-  atomic32_t lock;
-  //! Cache count
-  uint32_t count;
-#if ENABLE_STATISTICS
-  //! Insert count
-  size_t insert_count;
-  //! Extract count
-  size_t extract_count;
-#endif
-  //! Cached spans
-  span_t *span[GLOBAL_CACHE_MULTIPLIER * MAX_THREAD_SPAN_CACHE];
-  //! Unlimited cache overflow
-  span_t *overflow;
-};
-
-////////////
-///
-/// Global data
-///
-//////
-
-//! Default span size (64KiB)
-#define _memory_default_span_size (64 * 1024)
-#define _memory_default_span_size_shift 16
-#define _memory_default_span_mask (~((uintptr_t)(_memory_span_size - 1)))
-
-//! Initialized flag
-static int _rpmalloc_initialized;
-//! Main thread ID
-static uintptr_t _rpmalloc_main_thread_id;
-//! Configuration
-static rpmalloc_config_t _memory_config;
-//! Memory page size
-static size_t _memory_page_size;
-//! Shift to divide by page size
-static size_t _memory_page_size_shift;
-//! Granularity at which memory pages are mapped by OS
-static size_t _memory_map_granularity;
-#if RPMALLOC_CONFIGURABLE
-//! Size of a span of memory pages
-static size_t _memory_span_size;
-//! Shift to divide by span size
-static size_t _memory_span_size_shift;
-//! Mask to get to start of a memory span
-static uintptr_t _memory_span_mask;
-#else
-//! Hardwired span size
-#define _memory_span_size _memory_default_span_size
-#define _memory_span_size_shift _memory_default_span_size_shift
-#define _memory_span_mask _memory_default_span_mask
-#endif
-//! Number of spans to map in each map call
-static size_t _memory_span_map_count;
-//! Number of spans to keep reserved in each heap
-static size_t _memory_heap_reserve_count;
-//! Global size classes
-static size_class_t _memory_size_class[SIZE_CLASS_COUNT];
-//! Run-time size limit of medium blocks
-static size_t _memory_medium_size_limit;
-//! Heap ID counter
-static atomic32_t _memory_heap_id;
-//! Huge page support
-static int _memory_huge_pages;
-#if ENABLE_GLOBAL_CACHE
-//! Global span cache
-static global_cache_t _memory_span_cache[LARGE_CLASS_COUNT];
-#endif
-//! Global reserved spans
-static span_t *_memory_global_reserve;
-//! Global reserved count
-static size_t _memory_global_reserve_count;
-//! Global reserved master
-static span_t *_memory_global_reserve_master;
-//! All heaps
-static heap_t *_memory_heaps[HEAP_ARRAY_SIZE];
-//! Used to restrict access to mapping memory for huge pages
-static atomic32_t _memory_global_lock;
-//! Orphaned heaps
-static heap_t *_memory_orphan_heaps;
-#if RPMALLOC_FIRST_CLASS_HEAPS
-//! Orphaned heaps (first class heaps)
-static heap_t *_memory_first_class_orphan_heaps;
-#endif
-#if ENABLE_STATISTICS
-//! Allocations counter
-static atomic64_t _allocation_counter;
-//! Deallocations counter
-static atomic64_t _deallocation_counter;
-//! Active heap count
-static atomic32_t _memory_active_heaps;
-//! Number of currently mapped memory pages
-static atomic32_t _mapped_pages;
-//! Peak number of concurrently mapped memory pages
-static int32_t _mapped_pages_peak;
-//! Number of mapped master spans
-static atomic32_t _master_spans;
-//! Number of unmapped dangling master spans
-static atomic32_t _unmapped_master_spans;
-//! Running counter of total number of mapped memory pages since start
-static atomic32_t _mapped_total;
-//! Running counter of total number of unmapped memory pages since start
-static atomic32_t _unmapped_total;
-//! Number of currently mapped memory pages in OS calls
-static atomic32_t _mapped_pages_os;
-//! Number of currently allocated pages in huge allocations
-static atomic32_t _huge_pages_current;
-//! Peak number of currently allocated pages in huge allocations
-static int32_t _huge_pages_peak;
-#endif
-
-////////////
-///
-/// Thread local heap and ID
-///
-//////
-
-//! Current thread heap
-#if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) ||          \
-    defined(__TINYC__)
-static pthread_key_t _memory_thread_heap;
-#else
-#ifdef _MSC_VER
-#define _Thread_local __declspec(thread)
-#define TLS_MODEL
-#else
-#ifndef __HAIKU__
-#define TLS_MODEL __attribute__((tls_model("initial-exec")))
-#else
-#define TLS_MODEL
-#endif
-#if !defined(__clang__) && defined(__GNUC__)
-#define _Thread_local __thread
-#endif
-#endif
-static _Thread_local heap_t *_memory_thread_heap TLS_MODEL;
-#endif
-
-static inline heap_t *get_thread_heap_raw(void) {
-#if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
-  return pthread_getspecific(_memory_thread_heap);
-#else
-  return _memory_thread_heap;
-#endif
-}
-
-//! Get the current thread heap
-static inline heap_t *get_thread_heap(void) {
-  heap_t *heap = get_thread_heap_raw();
-#if ENABLE_PRELOAD
-  if (EXPECTED(heap != 0))
-    return heap;
-  rpmalloc_initialize();
-  return get_thread_heap_raw();
-#else
-  return heap;
-#endif
-}
-
-//! Fast thread ID
-static inline uintptr_t get_thread_id(void) {
-#if defined(_WIN32)
-  return (uintptr_t)((void *)NtCurrentTeb());
-#elif (defined(__GNUC__) || defined(__clang__)) && !defined(__CYGWIN__)
-  uintptr_t tid;
-#if defined(__i386__)
-  __asm__("movl %%gs:0, %0" : "=r"(tid) : :);
-#elif defined(__x86_64__)
-#if defined(__MACH__)
-  __asm__("movq %%gs:0, %0" : "=r"(tid) : :);
-#else
-  __asm__("movq %%fs:0, %0" : "=r"(tid) : :);
-#endif
-#elif defined(__arm__)
-  __asm__ volatile("mrc p15, 0, %0, c13, c0, 3" : "=r"(tid));
-#elif defined(__aarch64__)
-#if defined(__MACH__)
-  // tpidr_el0 likely unused, always return 0 on iOS
-  __asm__ volatile("mrs %0, tpidrro_el0" : "=r"(tid));
-#else
-  __asm__ volatile("mrs %0, tpidr_el0" : "=r"(tid));
-#endif
-#else
-#error This platform needs implementation of get_thread_id()
-#endif
-  return tid;
-#else
-#error This platform needs implementation of get_thread_id()
-#endif
-}
-
-//! Set the current thread heap
-static void set_thread_heap(heap_t *heap) {
-#if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) ||          \
-    defined(__TINYC__)
-  pthread_setspecific(_memory_thread_heap, heap);
-#else
-  _memory_thread_heap = heap;
-#endif
-  if (heap)
-    heap->owner_thread = get_thread_id();
-}
-
-//! Set main thread ID
-extern void rpmalloc_set_main_thread(void);
-
-void rpmalloc_set_main_thread(void) {
-  _rpmalloc_main_thread_id = get_thread_id();
-}
-
-static void _rpmalloc_spin(void) {
-#if defined(_MSC_VER)
-#if defined(_M_ARM64)
-  __yield();
-#else
-  _mm_pause();
-#endif
-#elif defined(__x86_64__) || defined(__i386__)
-  __asm__ volatile("pause" ::: "memory");
-#elif defined(__aarch64__) || (defined(__arm__) && __ARM_ARCH >= 7)
-  __asm__ volatile("yield" ::: "memory");
-#elif defined(__powerpc__) || defined(__powerpc64__)
-  // No idea if ever been compiled in such archs but ... as precaution
-  __asm__ volatile("or 27,27,27");
-#elif defined(__sparc__)
-  __asm__ volatile("rd %ccr, %g0 \n\trd %ccr, %g0 \n\trd %ccr, %g0");
-#else
-  struct timespec ts = {0};
-  nanosleep(&ts, 0);
-#endif
-}
-
-#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
-static void NTAPI _rpmalloc_thread_destructor(void *value) {
-#if ENABLE_OVERRIDE
-  // If this is called on main thread it means rpmalloc_finalize
-  // has not been called and shutdown is forced (through _exit) or unclean
-  if (get_thread_id() == _rpmalloc_main_thread_id)
-    return;
-#endif
-  if (value)
-    rpmalloc_thread_finalize(1);
-}
-#endif
-
-////////////
-///
-/// Low level memory map/unmap
-///
-//////
-
-static void _rpmalloc_set_name(void *address, size_t size) {
-#if defined(__linux__) || defined(__ANDROID__)
-  const char *name = _memory_huge_pages ? _memory_config.huge_page_name
-                                        : _memory_config.page_name;
-  if (address == MAP_FAILED || !name)
-    return;
-  // If the kernel does not support CONFIG_ANON_VMA_NAME or if the call fails
-  // (e.g. invalid name) it is a no-op basically.
-  (void)prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, (uintptr_t)address, size,
-              (uintptr_t)name);
-#else
-  (void)sizeof(size);
-  (void)sizeof(address);
-#endif
-}
-
-//! Map more virtual memory
-//  size is number of bytes to map
-//  offset receives the offset in bytes from start of mapped region
-//  returns address to start of mapped region to use
-static void *_rpmalloc_mmap(size_t size, size_t *offset) {
-  rpmalloc_assert(!(size % _memory_page_size), "Invalid mmap size");
-  rpmalloc_assert(size >= _memory_page_size, "Invalid mmap size");
-  void *address = _memory_config.memory_map(size, offset);
-  if (EXPECTED(address != 0)) {
-    _rpmalloc_stat_add_peak(&_mapped_pages, (size >> _memory_page_size_shift),
-                            _mapped_pages_peak);
-    _rpmalloc_stat_add(&_mapped_total, (size >> _memory_page_size_shift));
-  }
-  return address;
-}
-
-//! Unmap virtual memory
-//  address is the memory address to unmap, as returned from _memory_map
-//  size is the number of bytes to unmap, which might be less than full region
-//  for a partial unmap offset is the offset in bytes to the actual mapped
-//  region, as set by _memory_map release is set to 0 for partial unmap, or size
-//  of entire range for a full unmap
-static void _rpmalloc_unmap(void *address, size_t size, size_t offset,
-                            size_t release) {
-  rpmalloc_assert(!release || (release >= size), "Invalid unmap size");
-  rpmalloc_assert(!release || (release >= _memory_page_size),
-                  "Invalid unmap size");
-  if (release) {
-    rpmalloc_assert(!(release % _memory_page_size), "Invalid unmap size");
-    _rpmalloc_stat_sub(&_mapped_pages, (release >> _memory_page_size_shift));
-    _rpmalloc_stat_add(&_unmapped_total, (release >> _memory_page_size_shift));
-  }
-  _memory_config.memory_unmap(address, size, offset, release);
-}
-
-//! Default implementation to map new pages to virtual memory
-static void *_rpmalloc_mmap_os(size_t size, size_t *offset) {
-  // Either size is a heap (a single page) or a (multiple) span - we only need
-  // to align spans, and only if larger than map granularity
-  size_t padding = ((size >= _memory_span_size) &&
-                    (_memory_span_size > _memory_map_granularity))
-                       ? _memory_span_size
-                       : 0;
-  rpmalloc_assert(size >= _memory_page_size, "Invalid mmap size");
-#if PLATFORM_WINDOWS
-  // Ok to MEM_COMMIT - according to MSDN, "actual physical pages are not
-  // allocated unless/until the virtual addresses are actually accessed"
-  void *ptr = VirtualAlloc(0, size + padding,
-                           (_memory_huge_pages ? MEM_LARGE_PAGES : 0) |
-                               MEM_RESERVE | MEM_COMMIT,
-                           PAGE_READWRITE);
-  if (!ptr) {
-    if (_memory_config.map_fail_callback) {
-      if (_memory_config.map_fail_callback(size + padding))
-        return _rpmalloc_mmap_os(size, offset);
-    } else {
-      rpmalloc_assert(ptr, "Failed to map virtual memory block");
-    }
-    return 0;
-  }
-#else
-  int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_UNINITIALIZED;
-#if defined(__APPLE__) && !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
-  int fd = (int)VM_MAKE_TAG(240U);
-  if (_memory_huge_pages)
-    fd |= VM_FLAGS_SUPERPAGE_SIZE_2MB;
-  void *ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, fd, 0);
-#elif defined(MAP_HUGETLB)
-  void *ptr = mmap(0, size + padding,
-                   PROT_READ | PROT_WRITE | PROT_MAX(PROT_READ | PROT_WRITE),
-                   (_memory_huge_pages ? MAP_HUGETLB : 0) | flags, -1, 0);
-#if defined(MADV_HUGEPAGE)
-  // In some configurations, huge pages allocations might fail thus
-  // we fallback to normal allocations and promote the region as transparent
-  // huge page
-  if ((ptr == MAP_FAILED || !ptr) && _memory_huge_pages) {
-    ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, -1, 0);
-    if (ptr && ptr != MAP_FAILED) {
-      int prm = madvise(ptr, size + padding, MADV_HUGEPAGE);
-      (void)prm;
-      rpmalloc_assert((prm == 0), "Failed to promote the page to THP");
-    }
-  }
-#endif
-  _rpmalloc_set_name(ptr, size + padding);
-#elif defined(MAP_ALIGNED)
-  const size_t align =
-      (sizeof(size_t) * 8) - (size_t)(__builtin_clzl(size - 1));
-  void *ptr =
-      mmap(0, size + padding, PROT_READ | PROT_WRITE,
-           (_memory_huge_pages ? MAP_ALIGNED(align) : 0) | flags, -1, 0);
-#elif defined(MAP_ALIGN)
-  caddr_t base = (_memory_huge_pages ? (caddr_t)(4 << 20) : 0);
-  void *ptr = mmap(base, size + padding, PROT_READ | PROT_WRITE,
-                   (_memory_huge_pages ? MAP_ALIGN : 0) | flags, -1, 0);
-#else
-  void *ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, -1, 0);
-#endif
-  if ((ptr == MAP_FAILED) || !ptr) {
-    if (_memory_config.map_fail_callback) {
-      if (_memory_config.map_fail_callback(size + padding))
-        return _rpmalloc_mmap_os(size, offset);
-    } else if (errno != ENOMEM) {
-      rpmalloc_assert((ptr != MAP_FAILED) && ptr,
-                      "Failed to map virtual memory block");
-    }
-    return 0;
-  }
-#endif
-  _rpmalloc_stat_add(&_mapped_pages_os,
-                     (int32_t)((size + padding) >> _memory_page_size_shift));
-  if (padding) {
-    size_t final_padding = padding - ((uintptr_t)ptr & ~_memory_span_mask);
-    rpmalloc_assert(final_padding <= _memory_span_size,
-                    "Internal failure in padding");
-    rpmalloc_assert(final_padding <= padding, "Internal failure in padding");
-    rpmalloc_assert(!(final_padding % 8), "Internal failure in padding");
-    ptr = pointer_offset(ptr, final_padding);
-    *offset = final_padding >> 3;
-  }
-  rpmalloc_assert((size < _memory_span_size) ||
-                      !((uintptr_t)ptr & ~_memory_span_mask),
-                  "Internal failure in padding");
-  return ptr;
-}
-
-//! Default implementation to unmap pages from virtual memory
-static void _rpmalloc_unmap_os(void *address, size_t size, size_t offset,
-                               size_t release) {
-  rpmalloc_assert(release || (offset == 0), "Invalid unmap size");
-  rpmalloc_assert(!release || (release >= _memory_page_size),
-                  "Invalid unmap size");
-  rpmalloc_assert(size >= _memory_page_size, "Invalid unmap size");
-  if (release && offset) {
-    offset <<= 3;
-    address = pointer_offset(address, -(int32_t)offset);
-    if ((release >= _memory_span_size) &&
-        (_memory_span_size > _memory_map_granularity)) {
-      // Padding is always one span size
-      release += _memory_span_size;
-    }
-  }
-#if !DISABLE_UNMAP
-#if PLATFORM_WINDOWS
-  if (!VirtualFree(address, release ? 0 : size,
-                   release ? MEM_RELEASE : MEM_DECOMMIT)) {
-    rpmalloc_assert(0, "Failed to unmap virtual memory block");
-  }
-#else
-  if (release) {
-    if (munmap(address, release)) {
-      rpmalloc_assert(0, "Failed to unmap virtual memory block");
-    }
-  } else {
-#if defined(MADV_FREE_REUSABLE)
-    int ret;
-    while ((ret = madvise(address, size, MADV_FREE_REUSABLE)) == -1 &&
-           (errno == EAGAIN))
-      errno = 0;
-    if ((ret == -1) && (errno != 0)) {
-#elif defined(MADV_DONTNEED)
-    if (madvise(address, size, MADV_DONTNEED)) {
-#elif defined(MADV_PAGEOUT)
-    if (madvise(address, size, MADV_PAGEOUT)) {
-#elif defined(MADV_FREE)
-    if (madvise(address, size, MADV_FREE)) {
-#else
-    if (posix_madvise(address, size, POSIX_MADV_DONTNEED)) {
-#endif
-      rpmalloc_assert(0, "Failed to madvise virtual memory block as free");
-    }
-  }
-#endif
-#endif
-  if (release)
-    _rpmalloc_stat_sub(&_mapped_pages_os, release >> _memory_page_size_shift);
-}
-
-static void _rpmalloc_span_mark_as_subspan_unless_master(span_t *master,
-                                                         span_t *subspan,
-                                                         size_t span_count);
-
-//! Use global reserved spans to fulfill a memory map request (reserve size must
-//! be checked by caller)
-static span_t *_rpmalloc_global_get_reserved_spans(size_t span_count) {
-  span_t *span = _memory_global_reserve;
-  _rpmalloc_span_mark_as_subspan_unless_master(_memory_global_reserve_master,
-                                               span, span_count);
-  _memory_global_reserve_count -= span_count;
-  if (_memory_global_reserve_count)
-    _memory_global_reserve =
-        (span_t *)pointer_offset(span, span_count << _memory_span_size_shift);
-  else
-    _memory_global_reserve = 0;
-  return span;
-}
-
-//! Store the given spans as global reserve (must only be called from within new
-//! heap allocation, not thread safe)
-static void _rpmalloc_global_set_reserved_spans(span_t *master, span_t *reserve,
-                                                size_t reserve_span_count) {
-  _memory_global_reserve_master = master;
-  _memory_global_reserve_count = reserve_span_count;
-  _memory_global_reserve = reserve;
-}
-
-////////////
-///
-/// Span linked list management
-///
-//////
-
-//! Add a span to double linked list at the head
-static void _rpmalloc_span_double_link_list_add(span_t **head, span_t *span) {
-  if (*head)
-    (*head)->prev = span;
-  span->next = *head;
-  *head = span;
-}
-
-//! Pop head span from double linked list
-static void _rpmalloc_span_double_link_list_pop_head(span_t **head,
-                                                     span_t *span) {
-  rpmalloc_assert(*head == span, "Linked list corrupted");
-  span = *head;
-  *head = span->next;
-}
-
-//! Remove a span from double linked list
-static void _rpmalloc_span_double_link_list_remove(span_t **head,
-                                                   span_t *span) {
-  rpmalloc_assert(*head, "Linked list corrupted");
-  if (*head == span) {
-    *head = span->next;
-  } else {
-    span_t *next_span = span->next;
-    span_t *prev_span = span->prev;
-    prev_span->next = next_span;
-    if (EXPECTED(next_span != 0))
-      next_span->prev = prev_span;
-  }
-}
-
-////////////
-///
-/// Span control
-///
-//////
-
-static void _rpmalloc_heap_cache_insert(heap_t *heap, span_t *span);
-
-static void _rpmalloc_heap_finalize(heap_t *heap);
-
-static void _rpmalloc_heap_set_reserved_spans(heap_t *heap, span_t *master,
-                                              span_t *reserve,
-                                              size_t reserve_span_count);
-
-//! Declare the span to be a subspan and store distance from master span and
-//! span count
-static void _rpmalloc_span_mark_as_subspan_unless_master(span_t *master,
-                                                         span_t *subspan,
-                                                         size_t span_count) {
-  rpmalloc_assert((subspan != master) || (subspan->flags & SPAN_FLAG_MASTER),
-                  "Span master pointer and/or flag mismatch");
-  if (subspan != master) {
-    subspan->flags = SPAN_FLAG_SUBSPAN;
-    subspan->offset_from_master =
-        (uint32_t)((uintptr_t)pointer_diff(subspan, master) >>
-                   _memory_span_size_shift);
-    subspan->align_offset = 0;
-  }
-  subspan->span_count = (uint32_t)span_count;
-}
-
-//! Use reserved spans to fulfill a memory map request (reserve size must be
-//! checked by caller)
-static span_t *_rpmalloc_span_map_from_reserve(heap_t *heap,
-                                               size_t span_count) {
-  // Update the heap span reserve
-  span_t *span = heap->span_reserve;
-  heap->span_reserve =
-      (span_t *)pointer_offset(span, span_count * _memory_span_size);
-  heap->spans_reserved -= (uint32_t)span_count;
-
-  _rpmalloc_span_mark_as_subspan_unless_master(heap->span_reserve_master, span,
-                                               span_count);
-  if (span_count <= LARGE_CLASS_COUNT)
-    _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_from_reserved);
-
-  return span;
-}
-
-//! Get the aligned number of spans to map in based on wanted count, configured
-//! mapping granularity and the page size
-static size_t _rpmalloc_span_align_count(size_t span_count) {
-  size_t request_count = (span_count > _memory_span_map_count)
-                             ? span_count
-                             : _memory_span_map_count;
-  if ((_memory_page_size > _memory_span_size) &&
-      ((request_count * _memory_span_size) % _memory_page_size))
-    request_count +=
-        _memory_span_map_count - (request_count % _memory_span_map_count);
-  return request_count;
-}
-
-//! Setup a newly mapped span
-static void _rpmalloc_span_initialize(span_t *span, size_t total_span_count,
-                                      size_t span_count, size_t align_offset) {
-  span->total_spans = (uint32_t)total_span_count;
-  span->span_count = (uint32_t)span_count;
-  span->align_offset = (uint32_t)align_offset;
-  span->flags = SPAN_FLAG_MASTER;
-  atomic_store32(&span->remaining_spans, (int32_t)total_span_count);
-}
-
-static void _rpmalloc_span_unmap(span_t *span);
-
-//! Map an aligned set of spans, taking configured mapping granularity and the
-//! page size into account
-static span_t *_rpmalloc_span_map_aligned_count(heap_t *heap,
-                                                size_t span_count) {
-  // If we already have some, but not enough, reserved spans, release those to
-  // heap cache and map a new full set of spans. Otherwise we would waste memory
-  // if page size > span size (huge pages)
-  size_t aligned_span_count = _rpmalloc_span_align_count(span_count);
-  size_t align_offset = 0;
-  span_t *span = (span_t *)_rpmalloc_mmap(
-      aligned_span_count * _memory_span_size, &align_offset);
-  if (!span)
-    return 0;
-  _rpmalloc_span_initialize(span, aligned_span_count, span_count, align_offset);
-  _rpmalloc_stat_inc(&_master_spans);
-  if (span_count <= LARGE_CLASS_COUNT)
-    _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_map_calls);
-  if (aligned_span_count > span_count) {
-    span_t *reserved_spans =
-        (span_t *)pointer_offset(span, span_count * _memory_span_size);
-    size_t reserved_count = aligned_span_count - span_count;
-    if (heap->spans_reserved) {
-      _rpmalloc_span_mark_as_subspan_unless_master(
-          heap->span_reserve_master, heap->span_reserve, heap->spans_reserved);
-      _rpmalloc_heap_cache_insert(heap, heap->span_reserve);
-    }
-    if (reserved_count > _memory_heap_reserve_count) {
-      // If huge pages or eager spam map count, the global reserve spin lock is
-      // held by caller, _rpmalloc_span_map
-      rpmalloc_assert(atomic_load32(&_memory_global_lock) == 1,
-                      "Global spin lock not held as expected");
-      size_t remain_count = reserved_count - _memory_heap_reserve_count;
-      reserved_count = _memory_heap_reserve_count;
-      span_t *remain_span = (span_t *)pointer_offset(
-          reserved_spans, reserved_count * _memory_span_size);
-      if (_memory_global_reserve) {
-        _rpmalloc_span_mark_as_subspan_unless_master(
-            _memory_global_reserve_master, _memory_global_reserve,
-            _memory_global_reserve_count);
-        _rpmalloc_span_unmap(_memory_global_reserve);
-      }
-      _rpmalloc_global_set_reserved_spans(span, remain_span, remain_count);
-    }
-    _rpmalloc_heap_set_reserved_spans(heap, span, reserved_spans,
-                                      reserved_count);
-  }
-  return span;
-}
-
-//! Map in memory pages for the given number of spans (or use previously
-//! reserved pages)
-static span_t *_rpmalloc_span_map(heap_t *heap, size_t span_count) {
-  if (span_count <= heap->spans_reserved)
-    return _rpmalloc_span_map_from_reserve(heap, span_count);
-  span_t *span = 0;
-  int use_global_reserve =
-      (_memory_page_size > _memory_span_size) ||
-      (_memory_span_map_count > _memory_heap_reserve_count);
-  if (use_global_reserve) {
-    // If huge pages, make sure only one thread maps more memory to avoid bloat
-    while (!atomic_cas32_acquire(&_memory_global_lock, 1, 0))
-      _rpmalloc_spin();
-    if (_memory_global_reserve_count >= span_count) {
-      size_t reserve_count =
-          (!heap->spans_reserved ? _memory_heap_reserve_count : span_count);
-      if (_memory_global_reserve_count < reserve_count)
-        reserve_count = _memory_global_reserve_count;
-      span = _rpmalloc_global_get_reserved_spans(reserve_count);
-      if (span) {
-        if (reserve_count > span_count) {
-          span_t *reserved_span = (span_t *)pointer_offset(
-              span, span_count << _memory_span_size_shift);
-          _rpmalloc_heap_set_reserved_spans(heap, _memory_global_reserve_master,
-                                            reserved_span,
-                                            reserve_count - span_count);
-        }
-        // Already marked as subspan in _rpmalloc_global_get_reserved_spans
-        span->span_count = (uint32_t)span_count;
-      }
-    }
-  }
-  if (!span)
-    span = _rpmalloc_span_map_aligned_count(heap, span_count);
-  if (use_global_reserve)
-    atomic_store32_release(&_memory_global_lock, 0);
-  return span;
-}
-
-//! Unmap memory pages for the given number of spans (or mark as unused if no
-//! partial unmappings)
-static void _rpmalloc_span_unmap(span_t *span) {
-  rpmalloc_assert((span->flags & SPAN_FLAG_MASTER) ||
-                      (span->flags & SPAN_FLAG_SUBSPAN),
-                  "Span flag corrupted");
-  rpmalloc_assert(!(span->flags & SPAN_FLAG_MASTER) ||
-                      !(span->flags & SPAN_FLAG_SUBSPAN),
-                  "Span flag corrupted");
-
-  int is_master = !!(span->flags & SPAN_FLAG_MASTER);
-  span_t *master =
-      is_master ? span
-                : ((span_t *)pointer_offset(
-                      span, -(intptr_t)((uintptr_t)span->offset_from_master *
-                                        _memory_span_size)));
-  rpmalloc_assert(is_master || (span->flags & SPAN_FLAG_SUBSPAN),
-                  "Span flag corrupted");
-  rpmalloc_assert(master->flags & SPAN_FLAG_MASTER, "Span flag corrupted");
-
-  size_t span_count = span->span_count;
-  if (!is_master) {
-    // Directly unmap subspans (unless huge pages, in which case we defer and
-    // unmap entire page range with master)
-    rpmalloc_assert(span->align_offset == 0, "Span align offset corrupted");
-    if (_memory_span_size >= _memory_page_size)
-      _rpmalloc_unmap(span, span_count * _memory_span_size, 0, 0);
-  } else {
-    // Special double flag to denote an unmapped master
-    // It must be kept in memory since span header must be used
-    span->flags |=
-        SPAN_FLAG_MASTER | SPAN_FLAG_SUBSPAN | SPAN_FLAG_UNMAPPED_MASTER;
-    _rpmalloc_stat_add(&_unmapped_master_spans, 1);
-  }
-
-  if (atomic_add32(&master->remaining_spans, -(int32_t)span_count) <= 0) {
-    // Everything unmapped, unmap the master span with release flag to unmap the
-    // entire range of the super span
-    rpmalloc_assert(!!(master->flags & SPAN_FLAG_MASTER) &&
-                        !!(master->flags & SPAN_FLAG_SUBSPAN),
-                    "Span flag corrupted");
-    size_t unmap_count = master->span_count;
-    if (_memory_span_size < _memory_page_size)
-      unmap_count = master->total_spans;
-    _rpmalloc_stat_sub(&_master_spans, 1);
-    _rpmalloc_stat_sub(&_unmapped_master_spans, 1);
-    _rpmalloc_unmap(master, unmap_count * _memory_span_size,
-                    master->align_offset,
-                    (size_t)master->total_spans * _memory_span_size);
-  }
-}
-
-//! Move the span (used for small or medium allocations) to the heap thread
-//! cache
-static void _rpmalloc_span_release_to_cache(heap_t *heap, span_t *span) {
-  rpmalloc_assert(heap == span->heap, "Span heap pointer corrupted");
-  rpmalloc_assert(span->size_class < SIZE_CLASS_COUNT,
-                  "Invalid span size class");
-  rpmalloc_assert(span->span_count == 1, "Invalid span count");
-#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
-  atomic_decr32(&heap->span_use[0].current);
-#endif
-  _rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current);
-  if (!heap->finalize) {
-    _rpmalloc_stat_inc(&heap->span_use[0].spans_to_cache);
-    _rpmalloc_stat_inc(&heap->size_class_use[span->size_class].spans_to_cache);
-    if (heap->size_class[span->size_class].cache)
-      _rpmalloc_heap_cache_insert(heap,
-                                  heap->size_class[span->size_class].cache);
-    heap->size_class[span->size_class].cache = span;
-  } else {
-    _rpmalloc_span_unmap(span);
-  }
-}
-
-//! Initialize a (partial) free list up to next system memory page, while
-//! reserving the first block as allocated, returning number of blocks in list
-static uint32_t free_list_partial_init(void **list, void **first_block,
-                                       void *page_start, void *block_start,
-                                       uint32_t block_count,
-                                       uint32_t block_size) {
-  rpmalloc_assert(block_count, "Internal failure");
-  *first_block = block_start;
-  if (block_count > 1) {
-    void *free_block = pointer_offset(block_start, block_size);
-    void *block_end =
-        pointer_offset(block_start, (size_t)block_size * block_count);
-    // If block size is less than half a memory page, bound init to next memory
-    // page boundary
-    if (block_size < (_memory_page_size >> 1)) {
-      void *page_end = pointer_offset(page_start, _memory_page_size);
-      if (page_end < block_end)
-        block_end = page_end;
-    }
-    *list = free_block;
-    block_count = 2;
-    void *next_block = pointer_offset(free_block, block_size);
-    while (next_block < block_end) {
-      *((void **)free_block) = next_block;
-      free_block = next_block;
-      ++block_count;
-      next_block = pointer_offset(next_block, block_size);
-    }
-    *((void **)free_block) = 0;
-  } else {
-    *list = 0;
-  }
-  return block_count;
-}
-
-//! Initialize an unused span (from cache or mapped) to be new active span,
-//! putting the initial free list in heap class free list
-static void *_rpmalloc_span_initialize_new(heap_t *heap,
-                                           heap_size_class_t *heap_size_class,
-                                           span_t *span, uint32_t class_idx) {
-  rpmalloc_assert(span->span_count == 1, "Internal failure");
-  size_class_t *size_class = _memory_size_class + class_idx;
-  span->size_class = class_idx;
-  span->heap = heap;
-  span->flags &= ~SPAN_FLAG_ALIGNED_BLOCKS;
-  span->block_size = size_class->block_size;
-  span->block_count = size_class->block_count;
-  span->free_list = 0;
-  span->list_size = 0;
-  atomic_store_ptr_release(&span->free_list_deferred, 0);
-
-  // Setup free list. Only initialize one system page worth of free blocks in
-  // list
-  void *block;
-  span->free_list_limit =
-      free_list_partial_init(&heap_size_class->free_list, &block, span,
-                             pointer_offset(span, SPAN_HEADER_SIZE),
-                             size_class->block_count, size_class->block_size);
-  // Link span as partial if there remains blocks to be initialized as free
-  // list, or full if fully initialized
-  if (span->free_list_limit < span->block_count) {
-    _rpmalloc_span_double_link_list_add(&heap_size_class->partial_span, span);
-    span->used_count = span->free_list_limit;
-  } else {
-#if RPMALLOC_FIRST_CLASS_HEAPS
-    _rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span);
-#endif
-    ++heap->full_span_count;
-    span->used_count = span->block_count;
-  }
-  return block;
-}
-
-static void _rpmalloc_span_extract_free_list_deferred(span_t *span) {
-  // We need acquire semantics on the CAS operation since we are interested in
-  // the list size Refer to _rpmalloc_deallocate_defer_small_or_medium for
-  // further comments on this dependency
-  do {
-    span->free_list =
-        atomic_exchange_ptr_acquire(&span->free_list_deferred, INVALID_POINTER);
-  } while (span->free_list == INVALID_POINTER);
-  span->used_count -= span->list_size;
-  span->list_size = 0;
-  atomic_store_ptr_release(&span->free_list_deferred, 0);
-}
-
-static int _rpmalloc_span_is_fully_utilized(span_t *span) {
-  rpmalloc_assert(span->free_list_limit <= span->block_count,
-                  "Span free list corrupted");
-  return !span->free_list && (span->free_list_limit >= span->block_count);
-}
-
-static int _rpmalloc_span_finalize(heap_t *heap, size_t iclass, span_t *span,
-                                   span_t **list_head) {
-  void *free_list = heap->size_class[iclass].free_list;
-  span_t *class_span = (span_t *)((uintptr_t)free_list & _memory_span_mask);
-  if (span == class_span) {
-    // Adopt the heap class free list back into the span free list
-    void *block = span->free_list;
-    void *last_block = 0;
-    while (block) {
-      last_block = block;
-      block = *((void **)block);
-    }
-    uint32_t free_count = 0;
-    block = free_list;
-    while (block) {
-      ++free_count;
-      block = *((void **)block);
-    }
-    if (last_block) {
-      *((void **)last_block) = free_list;
-    } else {
-      span->free_list = free_list;
-    }
-    heap->size_class[iclass].free_list = 0;
-    span->used_count -= free_count;
-  }
-  // If this assert triggers you have memory leaks
-  rpmalloc_assert(span->list_size == span->used_count, "Memory leak detected");
-  if (span->list_size == span->used_count) {
-    _rpmalloc_stat_dec(&heap->span_use[0].current);
-    _rpmalloc_stat_dec(&heap->size_class_use[iclass].spans_current);
-    // This function only used for spans in double linked lists
-    if (list_head)
-      _rpmalloc_span_double_link_list_remove(list_head, span);
-    _rpmalloc_span_unmap(span);
-    return 1;
-  }
-  return 0;
-}
-
-////////////
-///
-/// Global cache
-///
-//////
-
-#if ENABLE_GLOBAL_CACHE
-
-//! Finalize a global cache
-static void _rpmalloc_global_cache_finalize(global_cache_t *cache) {
-  while (!atomic_cas32_acquire(&cache->lock, 1, 0))
-    _rpmalloc_spin();
-
-  for (size_t ispan = 0; ispan < cache->count; ++ispan)
-    _rpmalloc_span_unmap(cache->span[ispan]);
-  cache->count = 0;
-
-  while (cache->overflow) {
-    span_t *span = cache->overflow;
-    cache->overflow = span->next;
-    _rpmalloc_span_unmap(span);
-  }
-
-  atomic_store32_release(&cache->lock, 0);
-}
-
-static void _rpmalloc_global_cache_insert_spans(span_t **span,
-                                                size_t span_count,
-                                                size_t count) {
-  const size_t cache_limit =
-      (span_count == 1) ? GLOBAL_CACHE_MULTIPLIER * MAX_THREAD_SPAN_CACHE
-                        : GLOBAL_CACHE_MULTIPLIER *
-                              (MAX_THREAD_SPAN_LARGE_CACHE - (span_count >> 1));
-
-  global_cache_t *cache = &_memory_span_cache[span_count - 1];
-
-  size_t insert_count = count;
-  while (!atomic_cas32_acquire(&cache->lock, 1, 0))
-    _rpmalloc_spin();
-
-#if ENABLE_STATISTICS
-  cache->insert_count += count;
-#endif
-  if ((cache->count + insert_count) > cache_limit)
-    insert_count = cache_limit - cache->count;
-
-  memcpy(cache->span + cache->count, span, sizeof(span_t *) * insert_count);
-  cache->count += (uint32_t)insert_count;
-
-#if ENABLE_UNLIMITED_CACHE
-  while (insert_count < count) {
-#else
-  // Enable unlimited cache if huge pages, or we will leak since it is unlikely
-  // that an entire huge page will be unmapped, and we're unable to partially
-  // decommit a huge page
-  while ((_memory_page_size > _memory_span_size) && (insert_count < count)) {
-#endif
-    span_t *current_span = span[insert_count++];
-    current_span->next = cache->overflow;
-    cache->overflow = current_span;
-  }
-  atomic_store32_release(&cache->lock, 0);
-
-  span_t *keep = 0;
-  for (size_t ispan = insert_count; ispan < count; ++ispan) {
-    span_t *current_span = span[ispan];
-    // Keep master spans that has remaining subspans to avoid dangling them
-    if ((current_span->flags & SPAN_FLAG_MASTER) &&
-        (atomic_load32(&current_span->remaining_spans) >
-         (int32_t)current_span->span_count)) {
-      current_span->next = keep;
-      keep = current_span;
-    } else {
-      _rpmalloc_span_unmap(current_span);
-    }
-  }
-
-  if (keep) {
-    while (!atomic_cas32_acquire(&cache->lock, 1, 0))
-      _rpmalloc_spin();
-
-    size_t islot = 0;
-    while (keep) {
-      for (; islot < cache->count; ++islot) {
-        span_t *current_span = cache->span[islot];
-        if (!(current_span->flags & SPAN_FLAG_MASTER) ||
-            ((current_span->flags & SPAN_FLAG_MASTER) &&
-             (atomic_load32(&current_span->remaining_spans) <=
-              (int32_t)current_span->span_count))) {
-          _rpmalloc_span_unmap(current_span);
-          cache->span[islot] = keep;
-          break;
-        }
-      }
-      if (islot == cache->count)
-        break;
-      keep = keep->next;
-    }
-
-    if (keep) {
-      span_t *tail = keep;
-      while (tail->next)
-        tail = tail->next;
-      tail->next = cache->overflow;
-      cache->overflow = keep;
-    }
-
-    atomic_store32_release(&cache->lock, 0);
-  }
-}
-
-static size_t _rpmalloc_global_cache_extract_spans(span_t **span,
-                                                   size_t span_count,
-                                                   size_t count) {
-  global_cache_t *cache = &_memory_span_cache[span_count - 1];
-
-  size_t extract_count = 0;
-  while (!atomic_cas32_acquire(&cache->lock, 1, 0))
-    _rpmalloc_spin();
-
-#if ENABLE_STATISTICS
-  cache->extract_count += count;
-#endif
-  size_t want = count - extract_count;
-  if (want > cache->count)
-    want = cache->count;
-
-  memcpy(span + extract_count, cache->span + (cache->count - want),
-         sizeof(span_t *) * want);
-  cache->count -= (uint32_t)want;
-  extract_count += want;
-
-  while ((extract_count < count) && cache->overflow) {
-    span_t *current_span = cache->overflow;
-    span[extract_count++] = current_span;
-    cache->overflow = current_span->next;
-  }
-
-#if ENABLE_ASSERTS
-  for (size_t ispan = 0; ispan < extract_count; ++ispan) {
-    rpmalloc_assert(span[ispan]->span_count == span_count,
-                    "Global cache span count mismatch");
-  }
-#endif
-
-  atomic_store32_release(&cache->lock, 0);
-
-  return extract_count;
-}
-
-#endif
-
-////////////
-///
-/// Heap control
-///
-//////
-
-static void _rpmalloc_deallocate_huge(span_t *);
-
-//! Store the given spans as reserve in the given heap
-static void _rpmalloc_heap_set_reserved_spans(heap_t *heap, span_t *master,
-                                              span_t *reserve,
-                                              size_t reserve_span_count) {
-  heap->span_reserve_master = master;
-  heap->span_reserve = reserve;
-  heap->spans_reserved = (uint32_t)reserve_span_count;
-}
-
-//! Adopt the deferred span cache list, optionally extracting the first single
-//! span for immediate re-use
-static void _rpmalloc_heap_cache_adopt_deferred(heap_t *heap,
-                                                span_t **single_span) {
-  span_t *span = (span_t *)((void *)atomic_exchange_ptr_acquire(
-      &heap->span_free_deferred, 0));
-  while (span) {
-    span_t *next_span = (span_t *)span->free_list;
-    rpmalloc_assert(span->heap == heap, "Span heap pointer corrupted");
-    if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) {
-      rpmalloc_assert(heap->full_span_count, "Heap span counter corrupted");
-      --heap->full_span_count;
-      _rpmalloc_stat_dec(&heap->span_use[0].spans_deferred);
-#if RPMALLOC_FIRST_CLASS_HEAPS
-      _rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class],
-                                             span);
-#endif
-      _rpmalloc_stat_dec(&heap->span_use[0].current);
-      _rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current);
-      if (single_span && !*single_span)
-        *single_span = span;
-      else
-        _rpmalloc_heap_cache_insert(heap, span);
-    } else {
-      if (span->size_class == SIZE_CLASS_HUGE) {
-        _rpmalloc_deallocate_huge(span);
-      } else {
-        rpmalloc_assert(span->size_class == SIZE_CLASS_LARGE,
-                        "Span size class invalid");
-        rpmalloc_assert(heap->full_span_count, "Heap span counter corrupted");
-        --heap->full_span_count;
-#if RPMALLOC_FIRST_CLASS_HEAPS
-        _rpmalloc_span_double_link_list_remove(&heap->large_huge_span, span);
-#endif
-        uint32_t idx = span->span_count - 1;
-        _rpmalloc_stat_dec(&heap->span_use[idx].spans_deferred);
-        _rpmalloc_stat_dec(&heap->span_use[idx].current);
-        if (!idx && single_span && !*single_span)
-          *single_span = span;
-        else
-          _rpmalloc_heap_cache_insert(heap, span);
-      }
-    }
-    span = next_span;
-  }
-}
-
-static void _rpmalloc_heap_unmap(heap_t *heap) {
-  if (!heap->master_heap) {
-    if ((heap->finalize > 1) && !atomic_load32(&heap->child_count)) {
-      span_t *span = (span_t *)((uintptr_t)heap & _memory_span_mask);
-      _rpmalloc_span_unmap(span);
-    }
-  } else {
-    if (atomic_decr32(&heap->master_heap->child_count) == 0) {
-      _rpmalloc_heap_unmap(heap->master_heap);
-    }
-  }
-}
-
-static void _rpmalloc_heap_global_finalize(heap_t *heap) {
-  if (heap->finalize++ > 1) {
-    --heap->finalize;
-    return;
-  }
-
-  _rpmalloc_heap_finalize(heap);
-
-#if ENABLE_THREAD_CACHE
-  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
-    span_cache_t *span_cache;
-    if (!iclass)
-      span_cache = &heap->span_cache;
-    else
-      span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
-    for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
-      _rpmalloc_span_unmap(span_cache->span[ispan]);
-    span_cache->count = 0;
-  }
-#endif
-
-  if (heap->full_span_count) {
-    --heap->finalize;
-    return;
-  }
-
-  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
-    if (heap->size_class[iclass].free_list ||
-        heap->size_class[iclass].partial_span) {
-      --heap->finalize;
-      return;
-    }
-  }
-  // Heap is now completely free, unmap and remove from heap list
-  size_t list_idx = (size_t)heap->id % HEAP_ARRAY_SIZE;
-  heap_t *list_heap = _memory_heaps[list_idx];
-  if (list_heap == heap) {
-    _memory_heaps[list_idx] = heap->next_heap;
-  } else {
-    while (list_heap->next_heap != heap)
-      list_heap = list_heap->next_heap;
-    list_heap->next_heap = heap->next_heap;
-  }
-
-  _rpmalloc_heap_unmap(heap);
-}
-
-//! Insert a single span into thread heap cache, releasing to global cache if
-//! overflow
-static void _rpmalloc_heap_cache_insert(heap_t *heap, span_t *span) {
-  if (UNEXPECTED(heap->finalize != 0)) {
-    _rpmalloc_span_unmap(span);
-    _rpmalloc_heap_global_finalize(heap);
-    return;
-  }
-#if ENABLE_THREAD_CACHE
-  size_t span_count = span->span_count;
-  _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_to_cache);
-  if (span_count == 1) {
-    span_cache_t *span_cache = &heap->span_cache;
-    span_cache->span[span_cache->count++] = span;
-    if (span_cache->count == MAX_THREAD_SPAN_CACHE) {
-      const size_t remain_count =
-          MAX_THREAD_SPAN_CACHE - THREAD_SPAN_CACHE_TRANSFER;
-#if ENABLE_GLOBAL_CACHE
-      _rpmalloc_stat_add64(&heap->thread_to_global,
-                           THREAD_SPAN_CACHE_TRANSFER * _memory_span_size);
-      _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_to_global,
-                         THREAD_SPAN_CACHE_TRANSFER);
-      _rpmalloc_global_cache_insert_spans(span_cache->span + remain_count,
-                                          span_count,
-                                          THREAD_SPAN_CACHE_TRANSFER);
-#else
-      for (size_t ispan = 0; ispan < THREAD_SPAN_CACHE_TRANSFER; ++ispan)
-        _rpmalloc_span_unmap(span_cache->span[remain_count + ispan]);
-#endif
-      span_cache->count = remain_count;
-    }
-  } else {
-    size_t cache_idx = span_count - 2;
-    span_large_cache_t *span_cache = heap->span_large_cache + cache_idx;
-    span_cache->span[span_cache->count++] = span;
-    const size_t cache_limit =
-        (MAX_THREAD_SPAN_LARGE_CACHE - (span_count >> 1));
-    if (span_cache->count == cache_limit) {
-      const size_t transfer_limit = 2 + (cache_limit >> 2);
-      const size_t transfer_count =
-          (THREAD_SPAN_LARGE_CACHE_TRANSFER <= transfer_limit
-               ? THREAD_SPAN_LARGE_CACHE_TRANSFER
-               : transfer_limit);
-      const size_t remain_count = cache_limit - transfer_count;
-#if ENABLE_GLOBAL_CACHE
-      _rpmalloc_stat_add64(&heap->thread_to_global,
-                           transfer_count * span_count * _memory_span_size);
-      _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_to_global,
-                         transfer_count);
-      _rpmalloc_global_cache_insert_spans(span_cache->span + remain_count,
-                                          span_count, transfer_count);
-#else
-      for (size_t ispan = 0; ispan < transfer_count; ++ispan)
-        _rpmalloc_span_unmap(span_cache->span[remain_count + ispan]);
-#endif
-      span_cache->count = remain_count;
-    }
-  }
-#else
-  (void)sizeof(heap);
-  _rpmalloc_span_unmap(span);
-#endif
-}
-
-//! Extract the given number of spans from the different cache levels
-static span_t *_rpmalloc_heap_thread_cache_extract(heap_t *heap,
-                                                   size_t span_count) {
-  span_t *span = 0;
-#if ENABLE_THREAD_CACHE
-  span_cache_t *span_cache;
-  if (span_count == 1)
-    span_cache = &heap->span_cache;
-  else
-    span_cache = (span_cache_t *)(heap->span_large_cache + (span_count - 2));
-  if (span_cache->count) {
-    _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_from_cache);
-    return span_cache->span[--span_cache->count];
-  }
-#endif
-  return span;
-}
-
-static span_t *_rpmalloc_heap_thread_cache_deferred_extract(heap_t *heap,
-                                                            size_t span_count) {
-  span_t *span = 0;
-  if (span_count == 1) {
-    _rpmalloc_heap_cache_adopt_deferred(heap, &span);
-  } else {
-    _rpmalloc_heap_cache_adopt_deferred(heap, 0);
-    span = _rpmalloc_heap_thread_cache_extract(heap, span_count);
-  }
-  return span;
-}
-
-static span_t *_rpmalloc_heap_reserved_extract(heap_t *heap,
-                                               size_t span_count) {
-  if (heap->spans_reserved >= span_count)
-    return _rpmalloc_span_map(heap, span_count);
-  return 0;
-}
-
-//! Extract a span from the global cache
-static span_t *_rpmalloc_heap_global_cache_extract(heap_t *heap,
-                                                   size_t span_count) {
-#if ENABLE_GLOBAL_CACHE
-#if ENABLE_THREAD_CACHE
-  span_cache_t *span_cache;
-  size_t wanted_count;
-  if (span_count == 1) {
-    span_cache = &heap->span_cache;
-    wanted_count = THREAD_SPAN_CACHE_TRANSFER;
-  } else {
-    span_cache = (span_cache_t *)(heap->span_large_cache + (span_count - 2));
-    wanted_count = THREAD_SPAN_LARGE_CACHE_TRANSFER;
-  }
-  span_cache->count = _rpmalloc_global_cache_extract_spans(
-      span_cache->span, span_count, wanted_count);
-  if (span_cache->count) {
-    _rpmalloc_stat_add64(&heap->global_to_thread,
-                         span_count * span_cache->count * _memory_span_size);
-    _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_from_global,
-                       span_cache->count);
-    return span_cache->span[--span_cache->count];
-  }
-#else
-  span_t *span = 0;
-  size_t count = _rpmalloc_global_cache_extract_spans(&span, span_count, 1);
-  if (count) {
-    _rpmalloc_stat_add64(&heap->global_to_thread,
-                         span_count * count * _memory_span_size);
-    _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_from_global,
-                       count);
-    return span;
-  }
-#endif
-#endif
-  (void)sizeof(heap);
-  (void)sizeof(span_count);
-  return 0;
-}
-
-static void _rpmalloc_inc_span_statistics(heap_t *heap, size_t span_count,
-                                          uint32_t class_idx) {
-  (void)sizeof(heap);
-  (void)sizeof(span_count);
-  (void)sizeof(class_idx);
-#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
-  uint32_t idx = (uint32_t)span_count - 1;
-  uint32_t current_count =
-      (uint32_t)atomic_incr32(&heap->span_use[idx].current);
-  if (current_count > (uint32_t)atomic_load32(&heap->span_use[idx].high))
-    atomic_store32(&heap->span_use[idx].high, (int32_t)current_count);
-  _rpmalloc_stat_add_peak(&heap->size_class_use[class_idx].spans_current, 1,
-                          heap->size_class_use[class_idx].spans_peak);
-#endif
-}
-
-//! Get a span from one of the cache levels (thread cache, reserved, global
-//! cache) or fallback to mapping more memory
-static span_t *
-_rpmalloc_heap_extract_new_span(heap_t *heap,
-                                heap_size_class_t *heap_size_class,
-                                size_t span_count, uint32_t class_idx) {
-  span_t *span;
-#if ENABLE_THREAD_CACHE
-  if (heap_size_class && heap_size_class->cache) {
-    span = heap_size_class->cache;
-    heap_size_class->cache =
-        (heap->span_cache.count
-             ? heap->span_cache.span[--heap->span_cache.count]
-             : 0);
-    _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
-    return span;
-  }
-#endif
-  (void)sizeof(class_idx);
-  // Allow 50% overhead to increase cache hits
-  size_t base_span_count = span_count;
-  size_t limit_span_count =
-      (span_count > 2) ? (span_count + (span_count >> 1)) : span_count;
-  if (limit_span_count > LARGE_CLASS_COUNT)
-    limit_span_count = LARGE_CLASS_COUNT;
-  do {
-    span = _rpmalloc_heap_thread_cache_extract(heap, span_count);
-    if (EXPECTED(span != 0)) {
-      _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
-      _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
-      return span;
-    }
-    span = _rpmalloc_heap_thread_cache_deferred_extract(heap, span_count);
-    if (EXPECTED(span != 0)) {
-      _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
-      _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
-      return span;
-    }
-    span = _rpmalloc_heap_global_cache_extract(heap, span_count);
-    if (EXPECTED(span != 0)) {
-      _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
-      _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
-      return span;
-    }
-    span = _rpmalloc_heap_reserved_extract(heap, span_count);
-    if (EXPECTED(span != 0)) {
-      _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_reserved);
-      _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
-      return span;
-    }
-    ++span_count;
-  } while (span_count <= limit_span_count);
-  // Final fallback, map in more virtual memory
-  span = _rpmalloc_span_map(heap, base_span_count);
-  _rpmalloc_inc_span_statistics(heap, base_span_count, class_idx);
-  _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_map_calls);
-  return span;
-}
-
-static void _rpmalloc_heap_initialize(heap_t *heap) {
-  _rpmalloc_memset_const(heap, 0, sizeof(heap_t));
-  // Get a new heap ID
-  heap->id = 1 + atomic_incr32(&_memory_heap_id);
-
-  // Link in heap in heap ID map
-  size_t list_idx = (size_t)heap->id % HEAP_ARRAY_SIZE;
-  heap->next_heap = _memory_heaps[list_idx];
-  _memory_heaps[list_idx] = heap;
-}
-
-static void _rpmalloc_heap_orphan(heap_t *heap, int first_class) {
-  heap->owner_thread = (uintptr_t)-1;
-#if RPMALLOC_FIRST_CLASS_HEAPS
-  heap_t **heap_list =
-      (first_class ? &_memory_first_class_orphan_heaps : &_memory_orphan_heaps);
-#else
-  (void)sizeof(first_class);
-  heap_t **heap_list = &_memory_orphan_heaps;
-#endif
-  heap->next_orphan = *heap_list;
-  *heap_list = heap;
-}
-
-//! Allocate a new heap from newly mapped memory pages
-static heap_t *_rpmalloc_heap_allocate_new(void) {
-  // Map in pages for a 16 heaps. If page size is greater than required size for
-  // this, map a page and use first part for heaps and remaining part for spans
-  // for allocations. Adds a lot of complexity, but saves a lot of memory on
-  // systems where page size > 64 spans (4MiB)
-  size_t heap_size = sizeof(heap_t);
-  size_t aligned_heap_size = 16 * ((heap_size + 15) / 16);
-  size_t request_heap_count = 16;
-  size_t heap_span_count = ((aligned_heap_size * request_heap_count) +
-                            sizeof(span_t) + _memory_span_size - 1) /
-                           _memory_span_size;
-  size_t block_size = _memory_span_size * heap_span_count;
-  size_t span_count = heap_span_count;
-  span_t *span = 0;
-  // If there are global reserved spans, use these first
-  if (_memory_global_reserve_count >= heap_span_count) {
-    span = _rpmalloc_global_get_reserved_spans(heap_span_count);
-  }
-  if (!span) {
-    if (_memory_page_size > block_size) {
-      span_count = _memory_page_size / _memory_span_size;
-      block_size = _memory_page_size;
-      // If using huge pages, make sure to grab enough heaps to avoid
-      // reallocating a huge page just to serve new heaps
-      size_t possible_heap_count =
-          (block_size - sizeof(span_t)) / aligned_heap_size;
-      if (possible_heap_count >= (request_heap_count * 16))
-        request_heap_count *= 16;
-      else if (possible_heap_count < request_heap_count)
-        request_heap_count = possible_heap_count;
-      heap_span_count = ((aligned_heap_size * request_heap_count) +
-                         sizeof(span_t) + _memory_span_size - 1) /
-                        _memory_span_size;
-    }
-
-    size_t align_offset = 0;
-    span = (span_t *)_rpmalloc_mmap(block_size, &align_offset);
-    if (!span)
-      return 0;
-
-    // Master span will contain the heaps
-    _rpmalloc_stat_inc(&_master_spans);
-    _rpmalloc_span_initialize(span, span_count, heap_span_count, align_offset);
-  }
-
-  size_t remain_size = _memory_span_size - sizeof(span_t);
-  heap_t *heap = (heap_t *)pointer_offset(span, sizeof(span_t));
-  _rpmalloc_heap_initialize(heap);
-
-  // Put extra heaps as orphans
-  size_t num_heaps = remain_size / aligned_heap_size;
-  if (num_heaps < request_heap_count)
-    num_heaps = request_heap_count;
-  atomic_store32(&heap->child_count, (int32_t)num_heaps - 1);
-  heap_t *extra_heap = (heap_t *)pointer_offset(heap, aligned_heap_size);
-  while (num_heaps > 1) {
-    _rpmalloc_heap_initialize(extra_heap);
-    extra_heap->master_heap = heap;
-    _rpmalloc_heap_orphan(extra_heap, 1);
-    extra_heap = (heap_t *)pointer_offset(extra_heap, aligned_heap_size);
-    --num_heaps;
-  }
-
-  if (span_count > heap_span_count) {
-    // Cap reserved spans
-    size_t remain_count = span_count - heap_span_count;
-    size_t reserve_count =
-        (remain_count > _memory_heap_reserve_count ? _memory_heap_reserve_count
-                                                   : remain_count);
-    span_t *remain_span =
-        (span_t *)pointer_offset(span, heap_span_count * _memory_span_size);
-    _rpmalloc_heap_set_reserved_spans(heap, span, remain_span, reserve_count);
-
-    if (remain_count > reserve_count) {
-      // Set to global reserved spans
-      remain_span = (span_t *)pointer_offset(remain_span,
-                                             reserve_count * _memory_span_size);
-      reserve_count = remain_count - reserve_count;
-      _rpmalloc_global_set_reserved_spans(span, remain_span, reserve_count);
-    }
-  }
-
-  return heap;
-}
-
-static heap_t *_rpmalloc_heap_extract_orphan(heap_t **heap_list) {
-  heap_t *heap = *heap_list;
-  *heap_list = (heap ? heap->next_orphan : 0);
-  return heap;
-}
-
-//! Allocate a new heap, potentially reusing a previously orphaned heap
-static heap_t *_rpmalloc_heap_allocate(int first_class) {
-  heap_t *heap = 0;
-  while (!atomic_cas32_acquire(&_memory_global_lock, 1, 0))
-    _rpmalloc_spin();
-  if (first_class == 0)
-    heap = _rpmalloc_heap_extract_orphan(&_memory_orphan_heaps);
-#if RPMALLOC_FIRST_CLASS_HEAPS
-  if (!heap)
-    heap = _rpmalloc_heap_extract_orphan(&_memory_first_class_orphan_heaps);
-#endif
-  if (!heap)
-    heap = _rpmalloc_heap_allocate_new();
-  atomic_store32_release(&_memory_global_lock, 0);
-  if (heap)
-    _rpmalloc_heap_cache_adopt_deferred(heap, 0);
-  return heap;
-}
-
-static void _rpmalloc_heap_release(void *heapptr, int first_class,
-                                   int release_cache) {
-  heap_t *heap = (heap_t *)heapptr;
-  if (!heap)
-    return;
-  // Release thread cache spans back to global cache
-  _rpmalloc_heap_cache_adopt_deferred(heap, 0);
-  if (release_cache || heap->finalize) {
-#if ENABLE_THREAD_CACHE
-    for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
-      span_cache_t *span_cache;
-      if (!iclass)
-        span_cache = &heap->span_cache;
-      else
-        span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
-      if (!span_cache->count)
-        continue;
-#if ENABLE_GLOBAL_CACHE
-      if (heap->finalize) {
-        for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
-          _rpmalloc_span_unmap(span_cache->span[ispan]);
-      } else {
-        _rpmalloc_stat_add64(&heap->thread_to_global, span_cache->count *
-                                                          (iclass + 1) *
-                                                          _memory_span_size);
-        _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global,
-                           span_cache->count);
-        _rpmalloc_global_cache_insert_spans(span_cache->span, iclass + 1,
-                                            span_cache->count);
-      }
-#else
-      for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
-        _rpmalloc_span_unmap(span_cache->span[ispan]);
-#endif
-      span_cache->count = 0;
-    }
-#endif
-  }
-
-  if (get_thread_heap_raw() == heap)
-    set_thread_heap(0);
-
-#if ENABLE_STATISTICS
-  atomic_decr32(&_memory_active_heaps);
-  rpmalloc_assert(atomic_load32(&_memory_active_heaps) >= 0,
-                  "Still active heaps during finalization");
-#endif
-
-  // If we are forcibly terminating with _exit the state of the
-  // lock atomic is unknown and it's best to just go ahead and exit
-  if (get_thread_id() != _rpmalloc_main_thread_id) {
-    while (!atomic_cas32_acquire(&_memory_global_lock, 1, 0))
-      _rpmalloc_spin();
-  }
-  _rpmalloc_heap_orphan(heap, first_class);
-  atomic_store32_release(&_memory_global_lock, 0);
-}
-
-static void _rpmalloc_heap_release_raw(void *heapptr, int release_cache) {
-  _rpmalloc_heap_release(heapptr, 0, release_cache);
-}
-
-static void _rpmalloc_heap_release_raw_fc(void *heapptr) {
-  _rpmalloc_heap_release_raw(heapptr, 1);
-}
-
-static void _rpmalloc_heap_finalize(heap_t *heap) {
-  if (heap->spans_reserved) {
-    span_t *span = _rpmalloc_span_map(heap, heap->spans_reserved);
-    _rpmalloc_span_unmap(span);
-    heap->spans_reserved = 0;
-  }
-
-  _rpmalloc_heap_cache_adopt_deferred(heap, 0);
-
-  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
-    if (heap->size_class[iclass].cache)
-      _rpmalloc_span_unmap(heap->size_class[iclass].cache);
-    heap->size_class[iclass].cache = 0;
-    span_t *span = heap->size_class[iclass].partial_span;
-    while (span) {
-      span_t *next = span->next;
-      _rpmalloc_span_finalize(heap, iclass, span,
-                              &heap->size_class[iclass].partial_span);
-      span = next;
-    }
-    // If class still has a free list it must be a full span
-    if (heap->size_class[iclass].free_list) {
-      span_t *class_span =
-          (span_t *)((uintptr_t)heap->size_class[iclass].free_list &
-                     _memory_span_mask);
-      span_t **list = 0;
-#if RPMALLOC_FIRST_CLASS_HEAPS
-      list = &heap->full_span[iclass];
-#endif
-      --heap->full_span_count;
-      if (!_rpmalloc_span_finalize(heap, iclass, class_span, list)) {
-        if (list)
-          _rpmalloc_span_double_link_list_remove(list, class_span);
-        _rpmalloc_span_double_link_list_add(
-            &heap->size_class[iclass].partial_span, class_span);
-      }
-    }
-  }
-
-#if ENABLE_THREAD_CACHE
-  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
-    span_cache_t *span_cache;
-    if (!iclass)
-      span_cache = &heap->span_cache;
-    else
-      span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
-    for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
-      _rpmalloc_span_unmap(span_cache->span[ispan]);
-    span_cache->count = 0;
-  }
-#endif
-  rpmalloc_assert(!atomic_load_ptr(&heap->span_free_deferred),
-                  "Heaps still active during finalization");
-}
-
-////////////
-///
-/// Allocation entry points
-///
-//////
-
-//! Pop first block from a free list
-static void *free_list_pop(void **list) {
-  void *block = *list;
-  *list = *((void **)block);
-  return block;
-}
-
-//! Allocate a small/medium sized memory block from the given heap
-static void *_rpmalloc_allocate_from_heap_fallback(
-    heap_t *heap, heap_size_class_t *heap_size_class, uint32_t class_idx) {
-  span_t *span = heap_size_class->partial_span;
-  rpmalloc_assume(heap != 0);
-  if (EXPECTED(span != 0)) {
-    rpmalloc_assert(span->block_count ==
-                        _memory_size_class[span->size_class].block_count,
-                    "Span block count corrupted");
-    rpmalloc_assert(!_rpmalloc_span_is_fully_utilized(span),
-                    "Internal failure");
-    void *block;
-    if (span->free_list) {
-      // Span local free list is not empty, swap to size class free list
-      block = free_list_pop(&span->free_list);
-      heap_size_class->free_list = span->free_list;
-      span->free_list = 0;
-    } else {
-      // If the span did not fully initialize free list, link up another page
-      // worth of blocks
-      void *block_start = pointer_offset(
-          span, SPAN_HEADER_SIZE +
-                    ((size_t)span->free_list_limit * span->block_size));
-      span->free_list_limit += free_list_partial_init(
-          &heap_size_class->free_list, &block,
-          (void *)((uintptr_t)block_start & ~(_memory_page_size - 1)),
-          block_start, span->block_count - span->free_list_limit,
-          span->block_size);
-    }
-    rpmalloc_assert(span->free_list_limit <= span->block_count,
-                    "Span block count corrupted");
-    span->used_count = span->free_list_limit;
-
-    // Swap in deferred free list if present
-    if (atomic_load_ptr(&span->free_list_deferred))
-      _rpmalloc_span_extract_free_list_deferred(span);
-
-    // If span is still not fully utilized keep it in partial list and early
-    // return block
-    if (!_rpmalloc_span_is_fully_utilized(span))
-      return block;
-
-    // The span is fully utilized, unlink from partial list and add to fully
-    // utilized list
-    _rpmalloc_span_double_link_list_pop_head(&heap_size_class->partial_span,
-                                             span);
-#if RPMALLOC_FIRST_CLASS_HEAPS
-    _rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span);
-#endif
-    ++heap->full_span_count;
-    return block;
-  }
-
-  // Find a span in one of the cache levels
-  span = _rpmalloc_heap_extract_new_span(heap, heap_size_class, 1, class_idx);
-  if (EXPECTED(span != 0)) {
-    // Mark span as owned by this heap and set base data, return first block
-    return _rpmalloc_span_initialize_new(heap, heap_size_class, span,
-                                         class_idx);
-  }
-
-  return 0;
-}
-
-//! Allocate a small sized memory block from the given heap
-static void *_rpmalloc_allocate_small(heap_t *heap, size_t size) {
-  rpmalloc_assert(heap, "No thread heap");
-  // Small sizes have unique size classes
-  const uint32_t class_idx =
-      (uint32_t)((size + (SMALL_GRANULARITY - 1)) >> SMALL_GRANULARITY_SHIFT);
-  heap_size_class_t *heap_size_class = heap->size_class + class_idx;
-  _rpmalloc_stat_inc_alloc(heap, class_idx);
-  if (EXPECTED(heap_size_class->free_list != 0))
-    return free_list_pop(&heap_size_class->free_list);
-  return _rpmalloc_allocate_from_heap_fallback(heap, heap_size_class,
-                                               class_idx);
-}
-
-//! Allocate a medium sized memory block from the given heap
-static void *_rpmalloc_allocate_medium(heap_t *heap, size_t size) {
-  rpmalloc_assert(heap, "No thread heap");
-  // Calculate the size class index and do a dependent lookup of the final class
-  // index (in case of merged classes)
-  const uint32_t base_idx =
-      (uint32_t)(SMALL_CLASS_COUNT +
-                 ((size - (SMALL_SIZE_LIMIT + 1)) >> MEDIUM_GRANULARITY_SHIFT));
-  const uint32_t class_idx = _memory_size_class[base_idx].class_idx;
-  heap_size_class_t *heap_size_class = heap->size_class + class_idx;
-  _rpmalloc_stat_inc_alloc(heap, class_idx);
-  if (EXPECTED(heap_size_class->free_list != 0))
-    return free_list_pop(&heap_size_class->free_list);
-  return _rpmalloc_allocate_from_heap_fallback(heap, heap_size_class,
-                                               class_idx);
-}
-
-//! Allocate a large sized memory block from the given heap
-static void *_rpmalloc_allocate_large(heap_t *heap, size_t size) {
-  rpmalloc_assert(heap, "No thread heap");
-  // Calculate number of needed max sized spans (including header)
-  // Since this function is never called if size > LARGE_SIZE_LIMIT
-  // the span_count is guaranteed to be <= LARGE_CLASS_COUNT
-  size += SPAN_HEADER_SIZE;
-  size_t span_count = size >> _memory_span_size_shift;
-  if (size & (_memory_span_size - 1))
-    ++span_count;
-
-  // Find a span in one of the cache levels
-  span_t *span =
-      _rpmalloc_heap_extract_new_span(heap, 0, span_count, SIZE_CLASS_LARGE);
-  if (!span)
-    return span;
-
-  // Mark span as owned by this heap and set base data
-  rpmalloc_assert(span->span_count >= span_count, "Internal failure");
-  span->size_class = SIZE_CLASS_LARGE;
-  span->heap = heap;
-
-#if RPMALLOC_FIRST_CLASS_HEAPS
-  _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
-#endif
-  ++heap->full_span_count;
-
-  return pointer_offset(span, SPAN_HEADER_SIZE);
-}
-
-//! Allocate a huge block by mapping memory pages directly
-static void *_rpmalloc_allocate_huge(heap_t *heap, size_t size) {
-  rpmalloc_assert(heap, "No thread heap");
-  _rpmalloc_heap_cache_adopt_deferred(heap, 0);
-  size += SPAN_HEADER_SIZE;
-  size_t num_pages = size >> _memory_page_size_shift;
-  if (size & (_memory_page_size - 1))
-    ++num_pages;
-  size_t align_offset = 0;
-  span_t *span =
-      (span_t *)_rpmalloc_mmap(num_pages * _memory_page_size, &align_offset);
-  if (!span)
-    return span;
-
-  // Store page count in span_count
-  span->size_class = SIZE_CLASS_HUGE;
-  span->span_count = (uint32_t)num_pages;
-  span->align_offset = (uint32_t)align_offset;
-  span->heap = heap;
-  _rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak);
-
-#if RPMALLOC_FIRST_CLASS_HEAPS
-  _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
-#endif
-  ++heap->full_span_count;
-
-  return pointer_offset(span, SPAN_HEADER_SIZE);
-}
-
-//! Allocate a block of the given size
-static void *_rpmalloc_allocate(heap_t *heap, size_t size) {
-  _rpmalloc_stat_add64(&_allocation_counter, 1);
-  if (EXPECTED(size <= SMALL_SIZE_LIMIT))
-    return _rpmalloc_allocate_small(heap, size);
-  else if (size <= _memory_medium_size_limit)
-    return _rpmalloc_allocate_medium(heap, size);
-  else if (size <= LARGE_SIZE_LIMIT)
-    return _rpmalloc_allocate_large(heap, size);
-  return _rpmalloc_allocate_huge(heap, size);
-}
-
-static void *_rpmalloc_aligned_allocate(heap_t *heap, size_t alignment,
-                                        size_t size) {
-  if (alignment <= SMALL_GRANULARITY)
-    return _rpmalloc_allocate(heap, size);
-
-#if ENABLE_VALIDATE_ARGS
-  if ((size + alignment) < size) {
-    errno = EINVAL;
-    return 0;
-  }
-  if (alignment & (alignment - 1)) {
-    errno = EINVAL;
-    return 0;
-  }
-#endif
-
-  if ((alignment <= SPAN_HEADER_SIZE) &&
-      ((size + SPAN_HEADER_SIZE) < _memory_medium_size_limit)) {
-    // If alignment is less or equal to span header size (which is power of
-    // two), and size aligned to span header size multiples is less than size +
-    // alignment, then use natural alignment of blocks to provide alignment
-    size_t multiple_size = size ? (size + (SPAN_HEADER_SIZE - 1)) &
-                                      ~(uintptr_t)(SPAN_HEADER_SIZE - 1)
-                                : SPAN_HEADER_SIZE;
-    rpmalloc_assert(!(multiple_size % SPAN_HEADER_SIZE),
-                    "Failed alignment calculation");
-    if (multiple_size <= (size + alignment))
-      return _rpmalloc_allocate(heap, multiple_size);
-  }
-
-  void *ptr = 0;
-  size_t align_mask = alignment - 1;
-  if (alignment <= _memory_page_size) {
-    ptr = _rpmalloc_allocate(heap, size + alignment);
-    if ((uintptr_t)ptr & align_mask) {
-      ptr = (void *)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment);
-      // Mark as having aligned blocks
-      span_t *span = (span_t *)((uintptr_t)ptr & _memory_span_mask);
-      span->flags |= SPAN_FLAG_ALIGNED_BLOCKS;
-    }
-    return ptr;
-  }
-
-  // Fallback to mapping new pages for this request. Since pointers passed
-  // to rpfree must be able to reach the start of the span by bitmasking of
-  // the address with the span size, the returned aligned pointer from this
-  // function must be with a span size of the start of the mapped area.
-  // In worst case this requires us to loop and map pages until we get a
-  // suitable memory address. It also means we can never align to span size
-  // or greater, since the span header will push alignment more than one
-  // span size away from span start (thus causing pointer mask to give us
-  // an invalid span start on free)
-  if (alignment & align_mask) {
-    errno = EINVAL;
-    return 0;
-  }
-  if (alignment >= _memory_span_size) {
-    errno = EINVAL;
-    return 0;
-  }
-
-  size_t extra_pages = alignment / _memory_page_size;
-
-  // Since each span has a header, we will at least need one extra memory page
-  size_t num_pages = 1 + (size / _memory_page_size);
-  if (size & (_memory_page_size - 1))
-    ++num_pages;
-
-  if (extra_pages > num_pages)
-    num_pages = 1 + extra_pages;
-
-  size_t original_pages = num_pages;
-  size_t limit_pages = (_memory_span_size / _memory_page_size) * 2;
-  if (limit_pages < (original_pages * 2))
-    limit_pages = original_pages * 2;
-
-  size_t mapped_size, align_offset;
-  span_t *span;
-
-retry:
-  align_offset = 0;
-  mapped_size = num_pages * _memory_page_size;
-
-  span = (span_t *)_rpmalloc_mmap(mapped_size, &align_offset);
-  if (!span) {
-    errno = ENOMEM;
-    return 0;
-  }
-  ptr = pointer_offset(span, SPAN_HEADER_SIZE);
-
-  if ((uintptr_t)ptr & align_mask)
-    ptr = (void *)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment);
-
-  if (((size_t)pointer_diff(ptr, span) >= _memory_span_size) ||
-      (pointer_offset(ptr, size) > pointer_offset(span, mapped_size)) ||
-      (((uintptr_t)ptr & _memory_span_mask) != (uintptr_t)span)) {
-    _rpmalloc_unmap(span, mapped_size, align_offset, mapped_size);
-    ++num_pages;
-    if (num_pages > limit_pages) {
-      errno = EINVAL;
-      return 0;
-    }
-    goto retry;
-  }
-
-  // Store page count in span_count
-  span->size_class = SIZE_CLASS_HUGE;
-  span->span_count = (uint32_t)num_pages;
-  span->align_offset = (uint32_t)align_offset;
-  span->heap = heap;
-  _rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak);
-
-#if RPMALLOC_FIRST_CLASS_HEAPS
-  _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
-#endif
-  ++heap->full_span_count;
-
-  _rpmalloc_stat_add64(&_allocation_counter, 1);
-
-  return ptr;
-}
-
-////////////
-///
-/// Deallocation entry points
-///
-//////
-
-//! Deallocate the given small/medium memory block in the current thread local
-//! heap
-static void _rpmalloc_deallocate_direct_small_or_medium(span_t *span,
-                                                        void *block) {
-  heap_t *heap = span->heap;
-  rpmalloc_assert(heap->owner_thread == get_thread_id() ||
-                      !heap->owner_thread || heap->finalize,
-                  "Internal failure");
-  // Add block to free list
-  if (UNEXPECTED(_rpmalloc_span_is_fully_utilized(span))) {
-    span->used_count = span->block_count;
-#if RPMALLOC_FIRST_CLASS_HEAPS
-    _rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class],
-                                           span);
-#endif
-    _rpmalloc_span_double_link_list_add(
-        &heap->size_class[span->size_class].partial_span, span);
-    --heap->full_span_count;
-  }
-  *((void **)block) = span->free_list;
-  --span->used_count;
-  span->free_list = block;
-  if (UNEXPECTED(span->used_count == span->list_size)) {
-    // If there are no used blocks it is guaranteed that no other external
-    // thread is accessing the span
-    if (span->used_count) {
-      // Make sure we have synchronized the deferred list and list size by using
-      // acquire semantics and guarantee that no external thread is accessing
-      // span concurrently
-      void *free_list;
-      do {
-        free_list = atomic_exchange_ptr_acquire(&span->free_list_deferred,
-                                                INVALID_POINTER);
-      } while (free_list == INVALID_POINTER);
-      atomic_store_ptr_release(&span->free_list_deferred, free_list);
-    }
-    _rpmalloc_span_double_link_list_remove(
-        &heap->size_class[span->size_class].partial_span, span);
-    _rpmalloc_span_release_to_cache(heap, span);
-  }
-}
-
-static void _rpmalloc_deallocate_defer_free_span(heap_t *heap, span_t *span) {
-  if (span->size_class != SIZE_CLASS_HUGE)
-    _rpmalloc_stat_inc(&heap->span_use[span->span_count - 1].spans_deferred);
-  // This list does not need ABA protection, no mutable side state
-  do {
-    span->free_list = (void *)atomic_load_ptr(&heap->span_free_deferred);
-  } while (!atomic_cas_ptr(&heap->span_free_deferred, span, span->free_list));
-}
-
-//! Put the block in the deferred free list of the owning span
-static void _rpmalloc_deallocate_defer_small_or_medium(span_t *span,
-                                                       void *block) {
-  // The memory ordering here is a bit tricky, to avoid having to ABA protect
-  // the deferred free list to avoid desynchronization of list and list size
-  // we need to have acquire semantics on successful CAS of the pointer to
-  // guarantee the list_size variable validity + release semantics on pointer
-  // store
-  void *free_list;
-  do {
-    free_list =
-        atomic_exchange_ptr_acquire(&span->free_list_deferred, INVALID_POINTER);
-  } while (free_list == INVALID_POINTER);
-  *((void **)block) = free_list;
-  uint32_t free_count = ++span->list_size;
-  int all_deferred_free = (free_count == span->block_count);
-  atomic_store_ptr_release(&span->free_list_deferred, block);
-  if (all_deferred_free) {
-    // Span was completely freed by this block. Due to the INVALID_POINTER spin
-    // lock no other thread can reach this state simultaneously on this span.
-    // Safe to move to owner heap deferred cache
-    _rpmalloc_deallocate_defer_free_span(span->heap, span);
-  }
-}
-
-static void _rpmalloc_deallocate_small_or_medium(span_t *span, void *p) {
-  _rpmalloc_stat_inc_free(span->heap, span->size_class);
-  if (span->flags & SPAN_FLAG_ALIGNED_BLOCKS) {
-    // Realign pointer to block start
-    void *blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
-    uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start);
-    p = pointer_offset(p, -(int32_t)(block_offset % span->block_size));
-  }
-  // Check if block belongs to this heap or if deallocation should be deferred
-#if RPMALLOC_FIRST_CLASS_HEAPS
-  int defer =
-      (span->heap->owner_thread &&
-       (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
-#else
-  int defer =
-      ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
-#endif
-  if (!defer)
-    _rpmalloc_deallocate_direct_small_or_medium(span, p);
-  else
-    _rpmalloc_deallocate_defer_small_or_medium(span, p);
-}
-
-//! Deallocate the given large memory block to the current heap
-static void _rpmalloc_deallocate_large(span_t *span) {
-  rpmalloc_assert(span->size_class == SIZE_CLASS_LARGE, "Bad span size class");
-  rpmalloc_assert(!(span->flags & SPAN_FLAG_MASTER) ||
-                      !(span->flags & SPAN_FLAG_SUBSPAN),
-                  "Span flag corrupted");
-  rpmalloc_assert((span->flags & SPAN_FLAG_MASTER) ||
-                      (span->flags & SPAN_FLAG_SUBSPAN),
-                  "Span flag corrupted");
-  // We must always defer (unless finalizing) if from another heap since we
-  // cannot touch the list or counters of another heap
-#if RPMALLOC_FIRST_CLASS_HEAPS
-  int defer =
-      (span->heap->owner_thread &&
-       (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
-#else
-  int defer =
-      ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
-#endif
-  if (defer) {
-    _rpmalloc_deallocate_defer_free_span(span->heap, span);
-    return;
-  }
-  rpmalloc_assert(span->heap->full_span_count, "Heap span counter corrupted");
-  --span->heap->full_span_count;
-#if RPMALLOC_FIRST_CLASS_HEAPS
-  _rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span);
-#endif
-#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
-  // Decrease counter
-  size_t idx = span->span_count - 1;
-  atomic_decr32(&span->heap->span_use[idx].current);
-#endif
-  heap_t *heap = span->heap;
-  rpmalloc_assert(heap, "No thread heap");
-#if ENABLE_THREAD_CACHE
-  const int set_as_reserved =
-      ((span->span_count > 1) && (heap->span_cache.count == 0) &&
-       !heap->finalize && !heap->spans_reserved);
-#else
-  const int set_as_reserved =
-      ((span->span_count > 1) && !heap->finalize && !heap->spans_reserved);
-#endif
-  if (set_as_reserved) {
-    heap->span_reserve = span;
-    heap->spans_reserved = span->span_count;
-    if (span->flags & SPAN_FLAG_MASTER) {
-      heap->span_reserve_master = span;
-    } else { // SPAN_FLAG_SUBSPAN
-      span_t *master = (span_t *)pointer_offset(
-          span,
-          -(intptr_t)((size_t)span->offset_from_master * _memory_span_size));
-      heap->span_reserve_master = master;
-      rpmalloc_assert(master->flags & SPAN_FLAG_MASTER, "Span flag corrupted");
-      rpmalloc_assert(atomic_load32(&master->remaining_spans) >=
-                          (int32_t)span->span_count,
-                      "Master span count corrupted");
-    }
-    _rpmalloc_stat_inc(&heap->span_use[idx].spans_to_reserved);
-  } else {
-    // Insert into cache list
-    _rpmalloc_heap_cache_insert(heap, span);
-  }
-}
-
-//! Deallocate the given huge span
-static void _rpmalloc_deallocate_huge(span_t *span) {
-  rpmalloc_assert(span->heap, "No span heap");
-#if RPMALLOC_FIRST_CLASS_HEAPS
-  int defer =
-      (span->heap->owner_thread &&
-       (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
-#else
-  int defer =
-      ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
-#endif
-  if (defer) {
-    _rpmalloc_deallocate_defer_free_span(span->heap, span);
-    return;
-  }
-  rpmalloc_assert(span->heap->full_span_count, "Heap span counter corrupted");
-  --span->heap->full_span_count;
-#if RPMALLOC_FIRST_CLASS_HEAPS
-  _rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span);
-#endif
-
-  // Oversized allocation, page count is stored in span_count
-  size_t num_pages = span->span_count;
-  _rpmalloc_unmap(span, num_pages * _memory_page_size, span->align_offset,
-                  num_pages * _memory_page_size);
-  _rpmalloc_stat_sub(&_huge_pages_current, num_pages);
-}
-
-//! Deallocate the given block
-static void _rpmalloc_deallocate(void *p) {
-  _rpmalloc_stat_add64(&_deallocation_counter, 1);
-  // Grab the span (always at start of span, using span alignment)
-  span_t *span = (span_t *)((uintptr_t)p & _memory_span_mask);
-  if (UNEXPECTED(!span))
-    return;
-  if (EXPECTED(span->size_class < SIZE_CLASS_COUNT))
-    _rpmalloc_deallocate_small_or_medium(span, p);
-  else if (span->size_class == SIZE_CLASS_LARGE)
-    _rpmalloc_deallocate_large(span);
-  else
-    _rpmalloc_deallocate_huge(span);
-}
-
-////////////
-///
-/// Reallocation entry points
-///
-//////
-
-static size_t _rpmalloc_usable_size(void *p);
-
-//! Reallocate the given block to the given size
-static void *_rpmalloc_reallocate(heap_t *heap, void *p, size_t size,
-                                  size_t oldsize, unsigned int flags) {
-  if (p) {
-    // Grab the span using guaranteed span alignment
-    span_t *span = (span_t *)((uintptr_t)p & _memory_span_mask);
-    if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) {
-      // Small/medium sized block
-      rpmalloc_assert(span->span_count == 1, "Span counter corrupted");
-      void *blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
-      uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start);
-      uint32_t block_idx = block_offset / span->block_size;
-      void *block =
-          pointer_offset(blocks_start, (size_t)block_idx * span->block_size);
-      if (!oldsize)
-        oldsize =
-            (size_t)((ptrdiff_t)span->block_size - pointer_diff(p, block));
-      if ((size_t)span->block_size >= size) {
-        // Still fits in block, never mind trying to save memory, but preserve
-        // data if alignment changed
-        if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
-          memmove(block, p, oldsize);
-        return block;
-      }
-    } else if (span->size_class == SIZE_CLASS_LARGE) {
-      // Large block
-      size_t total_size = size + SPAN_HEADER_SIZE;
-      size_t num_spans = total_size >> _memory_span_size_shift;
-      if (total_size & (_memory_span_mask - 1))
-        ++num_spans;
-      size_t current_spans = span->span_count;
-      void *block = pointer_offset(span, SPAN_HEADER_SIZE);
-      if (!oldsize)
-        oldsize = (current_spans * _memory_span_size) -
-                  (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE;
-      if ((current_spans >= num_spans) && (total_size >= (oldsize / 2))) {
-        // Still fits in block, never mind trying to save memory, but preserve
-        // data if alignment changed
-        if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
-          memmove(block, p, oldsize);
-        return block;
-      }
-    } else {
-      // Oversized block
-      size_t total_size = size + SPAN_HEADER_SIZE;
-      size_t num_pages = total_size >> _memory_page_size_shift;
-      if (total_size & (_memory_page_size - 1))
-        ++num_pages;
-      // Page count is stored in span_count
-      size_t current_pages = span->span_count;
-      void *block = pointer_offset(span, SPAN_HEADER_SIZE);
-      if (!oldsize)
-        oldsize = (current_pages * _memory_page_size) -
-                  (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE;
-      if ((current_pages >= num_pages) && (num_pages >= (current_pages / 2))) {
-        // Still fits in block, never mind trying to save memory, but preserve
-        // data if alignment changed
-        if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
-          memmove(block, p, oldsize);
-        return block;
-      }
-    }
-  } else {
-    oldsize = 0;
-  }
-
-  if (!!(flags & RPMALLOC_GROW_OR_FAIL))
-    return 0;
-
-  // Size is greater than block size, need to allocate a new block and
-  // deallocate the old Avoid hysteresis by overallocating if increase is small
-  // (below 37%)
-  size_t lower_bound = oldsize + (oldsize >> 2) + (oldsize >> 3);
-  size_t new_size =
-      (size > lower_bound) ? size : ((size > oldsize) ? lower_bound : size);
-  void *block = _rpmalloc_allocate(heap, new_size);
-  if (p && block) {
-    if (!(flags & RPMALLOC_NO_PRESERVE))
-      memcpy(block, p, oldsize < new_size ? oldsize : new_size);
-    _rpmalloc_deallocate(p);
-  }
-
-  return block;
-}
-
-static void *_rpmalloc_aligned_reallocate(heap_t *heap, void *ptr,
-                                          size_t alignment, size_t size,
-                                          size_t oldsize, unsigned int flags) {
-  if (alignment <= SMALL_GRANULARITY)
-    return _rpmalloc_reallocate(heap, ptr, size, oldsize, flags);
-
-  int no_alloc = !!(flags & RPMALLOC_GROW_OR_FAIL);
-  size_t usablesize = (ptr ? _rpmalloc_usable_size(ptr) : 0);
-  if ((usablesize >= size) && !((uintptr_t)ptr & (alignment - 1))) {
-    if (no_alloc || (size >= (usablesize / 2)))
-      return ptr;
-  }
-  // Aligned alloc marks span as having aligned blocks
-  void *block =
-      (!no_alloc ? _rpmalloc_aligned_allocate(heap, alignment, size) : 0);
-  if (EXPECTED(block != 0)) {
-    if (!(flags & RPMALLOC_NO_PRESERVE) && ptr) {
-      if (!oldsize)
-        oldsize = usablesize;
-      memcpy(block, ptr, oldsize < size ? oldsize : size);
-    }
-    _rpmalloc_deallocate(ptr);
-  }
-  return block;
-}
-
-////////////
-///
-/// Initialization, finalization and utility
-///
-//////
-
-//! Get the usable size of the given block
-static size_t _rpmalloc_usable_size(void *p) {
-  // Grab the span using guaranteed span alignment
-  span_t *span = (span_t *)((uintptr_t)p & _memory_span_mask);
-  if (span->size_class < SIZE_CLASS_COUNT) {
-    // Small/medium block
-    void *blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
-    return span->block_size -
-           ((size_t)pointer_diff(p, blocks_start) % span->block_size);
-  }
-  if (span->size_class == SIZE_CLASS_LARGE) {
-    // Large block
-    size_t current_spans = span->span_count;
-    return (current_spans * _memory_span_size) - (size_t)pointer_diff(p, span);
-  }
-  // Oversized block, page count is stored in span_count
-  size_t current_pages = span->span_count;
-  return (current_pages * _memory_page_size) - (size_t)pointer_diff(p, span);
-}
-
-//! Adjust and optimize the size class properties for the given class
-static void _rpmalloc_adjust_size_class(size_t iclass) {
-  size_t block_size = _memory_size_class[iclass].block_size;
-  size_t block_count = (_memory_span_size - SPAN_HEADER_SIZE) / block_size;
-
-  _memory_size_class[iclass].block_count = (uint16_t)block_count;
-  _memory_size_class[iclass].class_idx = (uint16_t)iclass;
-
-  // Check if previous size classes can be merged
-  if (iclass >= SMALL_CLASS_COUNT) {
-    size_t prevclass = iclass;
-    while (prevclass > 0) {
-      --prevclass;
-      // A class can be merged if number of pages and number of blocks are equal
-      if (_memory_size_class[prevclass].block_count ==
-          _memory_size_class[iclass].block_count)
-        _rpmalloc_memcpy_const(_memory_size_class + prevclass,
-                               _memory_size_class + iclass,
-                               sizeof(_memory_size_class[iclass]));
-      else
-        break;
-    }
-  }
-}
-
-//! Initialize the allocator and setup global data
-extern inline int rpmalloc_initialize(void) {
-  if (_rpmalloc_initialized) {
-    rpmalloc_thread_initialize();
-    return 0;
-  }
-  return rpmalloc_initialize_config(0);
-}
-
-int rpmalloc_initialize_config(const rpmalloc_config_t *config) {
-  if (_rpmalloc_initialized) {
-    rpmalloc_thread_initialize();
-    return 0;
-  }
-  _rpmalloc_initialized = 1;
-
-  if (config)
-    memcpy(&_memory_config, config, sizeof(rpmalloc_config_t));
-  else
-    _rpmalloc_memset_const(&_memory_config, 0, sizeof(rpmalloc_config_t));
-
-  if (!_memory_config.memory_map || !_memory_config.memory_unmap) {
-    _memory_config.memory_map = _rpmalloc_mmap_os;
-    _memory_config.memory_unmap = _rpmalloc_unmap_os;
-  }
-
-#if PLATFORM_WINDOWS
-  SYSTEM_INFO system_info;
-  memset(&system_info, 0, sizeof(system_info));
-  GetSystemInfo(&system_info);
-  _memory_map_granularity = system_info.dwAllocationGranularity;
-#else
-  _memory_map_granularity = (size_t)sysconf(_SC_PAGESIZE);
-#endif
-
-#if RPMALLOC_CONFIGURABLE
-  _memory_page_size = _memory_config.page_size;
-#else
-  _memory_page_size = 0;
-#endif
-  _memory_huge_pages = 0;
-  if (!_memory_page_size) {
-#if PLATFORM_WINDOWS
-    _memory_page_size = system_info.dwPageSize;
-#else
-    _memory_page_size = _memory_map_granularity;
-    if (_memory_config.enable_huge_pages) {
-#if defined(__linux__)
-      size_t huge_page_size = 0;
-      FILE *meminfo = fopen("/proc/meminfo", "r");
-      if (meminfo) {
-        char line[128];
-        while (!huge_page_size && fgets(line, sizeof(line) - 1, meminfo)) {
-          line[sizeof(line) - 1] = 0;
-          if (strstr(line, "Hugepagesize:"))
-            huge_page_size = (size_t)strtol(line + 13, 0, 10) * 1024;
-        }
-        fclose(meminfo);
-      }
-      if (huge_page_size) {
-        _memory_huge_pages = 1;
-        _memory_page_size = huge_page_size;
-        _memory_map_granularity = huge_page_size;
-      }
-#elif defined(__FreeBSD__)
-      int rc;
-      size_t sz = sizeof(rc);
-
-      if (sysctlbyname("vm.pmap.pg_ps_enabled", &rc, &sz, NULL, 0) == 0 &&
-          rc == 1) {
-        static size_t defsize = 2 * 1024 * 1024;
-        int nsize = 0;
-        size_t sizes[4] = {0};
-        _memory_huge_pages = 1;
-        _memory_page_size = defsize;
-        if ((nsize = getpagesizes(sizes, 4)) >= 2) {
-          nsize--;
-          for (size_t csize = sizes[nsize]; nsize >= 0 && csize;
-               --nsize, csize = sizes[nsize]) {
-            //! Unlikely, but as a precaution..
-            rpmalloc_assert(!(csize & (csize - 1)) && !(csize % 1024),
-                            "Invalid page size");
-            if (defsize < csize) {
-              _memory_page_size = csize;
-              break;
-            }
-          }
-        }
-        _memory_map_granularity = _memory_page_size;
-      }
-#elif defined(__APPLE__) || defined(__NetBSD__)
-      _memory_huge_pages = 1;
-      _memory_page_size = 2 * 1024 * 1024;
-      _memory_map_granularity = _memory_page_size;
-#endif
-    }
-#endif
-  } else {
-    if (_memory_config.enable_huge_pages)
-      _memory_huge_pages = 1;
-  }
-
-#if PLATFORM_WINDOWS
-  if (_memory_config.enable_huge_pages) {
-    HANDLE token = 0;
-    size_t large_page_minimum = GetLargePageMinimum();
-    if (large_page_minimum)
-      OpenProcessToken(GetCurrentProcess(),
-                       TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token);
-    if (token) {
-      LUID luid;
-      if (LookupPrivilegeValue(0, SE_LOCK_MEMORY_NAME, &luid)) {
-        TOKEN_PRIVILEGES token_privileges;
-        memset(&token_privileges, 0, sizeof(token_privileges));
-        token_privileges.PrivilegeCount = 1;
-        token_privileges.Privileges[0].Luid = luid;
-        token_privileges.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
-        if (AdjustTokenPrivileges(token, FALSE, &token_privileges, 0, 0, 0)) {
-          if (GetLastError() == ERROR_SUCCESS)
-            _memory_huge_pages = 1;
-        }
-      }
-      CloseHandle(token);
-    }
-    if (_memory_huge_pages) {
-      if (large_page_minimum > _memory_page_size)
-        _memory_page_size = large_page_minimum;
-      if (large_page_minimum > _memory_map_granularity)
-        _memory_map_granularity = large_page_minimum;
-    }
-  }
-#endif
-
-  size_t min_span_size = 256;
-  size_t max_page_size;
-#if UINTPTR_MAX > 0xFFFFFFFF
-  max_page_size = 4096ULL * 1024ULL * 1024ULL;
-#else
-  max_page_size = 4 * 1024 * 1024;
-#endif
-  if (_memory_page_size < min_span_size)
-    _memory_page_size = min_span_size;
-  if (_memory_page_size > max_page_size)
-    _memory_page_size = max_page_size;
-  _memory_page_size_shift = 0;
-  size_t page_size_bit = _memory_page_size;
-  while (page_size_bit != 1) {
-    ++_memory_page_size_shift;
-    page_size_bit >>= 1;
-  }
-  _memory_page_size = ((size_t)1 << _memory_page_size_shift);
-
-#if RPMALLOC_CONFIGURABLE
-  if (!_memory_config.span_size) {
-    _memory_span_size = _memory_default_span_size;
-    _memory_span_size_shift = _memory_default_span_size_shift;
-    _memory_span_mask = _memory_default_span_mask;
-  } else {
-    size_t span_size = _memory_config.span_size;
-    if (span_size > (256 * 1024))
-      span_size = (256 * 1024);
-    _memory_span_size = 4096;
-    _memory_span_size_shift = 12;
-    while (_memory_span_size < span_size) {
-      _memory_span_size <<= 1;
-      ++_memory_span_size_shift;
-    }
-    _memory_span_mask = ~(uintptr_t)(_memory_span_size - 1);
-  }
-#endif
-
-  _memory_span_map_count =
-      (_memory_config.span_map_count ? _memory_config.span_map_count
-                                     : DEFAULT_SPAN_MAP_COUNT);
-  if ((_memory_span_size * _memory_span_map_count) < _memory_page_size)
-    _memory_span_map_count = (_memory_page_size / _memory_span_size);
-  if ((_memory_page_size >= _memory_span_size) &&
-      ((_memory_span_map_count * _memory_span_size) % _memory_page_size))
-    _memory_span_map_count = (_memory_page_size / _memory_span_size);
-  _memory_heap_reserve_count = (_memory_span_map_count > DEFAULT_SPAN_MAP_COUNT)
-                                   ? DEFAULT_SPAN_MAP_COUNT
-                                   : _memory_span_map_count;
-
-  _memory_config.page_size = _memory_page_size;
-  _memory_config.span_size = _memory_span_size;
-  _memory_config.span_map_count = _memory_span_map_count;
-  _memory_config.enable_huge_pages = _memory_huge_pages;
-
-#if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) ||          \
-    defined(__TINYC__)
-  if (pthread_key_create(&_memory_thread_heap, _rpmalloc_heap_release_raw_fc))
-    return -1;
-#endif
-#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
-  fls_key = FlsAlloc(&_rpmalloc_thread_destructor);
-#endif
-
-  // Setup all small and medium size classes
-  size_t iclass = 0;
-  _memory_size_class[iclass].block_size = SMALL_GRANULARITY;
-  _rpmalloc_adjust_size_class(iclass);
-  for (iclass = 1; iclass < SMALL_CLASS_COUNT; ++iclass) {
-    size_t size = iclass * SMALL_GRANULARITY;
-    _memory_size_class[iclass].block_size = (uint32_t)size;
-    _rpmalloc_adjust_size_class(iclass);
-  }
-  // At least two blocks per span, then fall back to large allocations
-  _memory_medium_size_limit = (_memory_span_size - SPAN_HEADER_SIZE) >> 1;
-  if (_memory_medium_size_limit > MEDIUM_SIZE_LIMIT)
-    _memory_medium_size_limit = MEDIUM_SIZE_LIMIT;
-  for (iclass = 0; iclass < MEDIUM_CLASS_COUNT; ++iclass) {
-    size_t size = SMALL_SIZE_LIMIT + ((iclass + 1) * MEDIUM_GRANULARITY);
-    if (size > _memory_medium_size_limit) {
-      _memory_medium_size_limit =
-          SMALL_SIZE_LIMIT + (iclass * MEDIUM_GRANULARITY);
-      break;
-    }
-    _memory_size_class[SMALL_CLASS_COUNT + iclass].block_size = (uint32_t)size;
-    _rpmalloc_adjust_size_class(SMALL_CLASS_COUNT + iclass);
-  }
-
-  _memory_orphan_heaps = 0;
-#if RPMALLOC_FIRST_CLASS_HEAPS
-  _memory_first_class_orphan_heaps = 0;
-#endif
-#if ENABLE_STATISTICS
-  atomic_store32(&_memory_active_heaps, 0);
-  atomic_store32(&_mapped_pages, 0);
-  _mapped_pages_peak = 0;
-  atomic_store32(&_master_spans, 0);
-  atomic_store32(&_mapped_total, 0);
-  atomic_store32(&_unmapped_total, 0);
-  atomic_store32(&_mapped_pages_os, 0);
-  atomic_store32(&_huge_pages_current, 0);
-  _huge_pages_peak = 0;
-#endif
-  memset(_memory_heaps, 0, sizeof(_memory_heaps));
-  atomic_store32_release(&_memory_global_lock, 0);
-
-  rpmalloc_linker_reference();
-
-  // Initialize this thread
-  rpmalloc_thread_initialize();
-  return 0;
-}
-
-//! Finalize the allocator
-void rpmalloc_finalize(void) {
-  rpmalloc_thread_finalize(1);
-  // rpmalloc_dump_statistics(stdout);
-
-  if (_memory_global_reserve) {
-    atomic_add32(&_memory_global_reserve_master->remaining_spans,
-                 -(int32_t)_memory_global_reserve_count);
-    _memory_global_reserve_master = 0;
-    _memory_global_reserve_count = 0;
-    _memory_global_reserve = 0;
-  }
-  atomic_store32_release(&_memory_global_lock, 0);
-
-  // Free all thread caches and fully free spans
-  for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) {
-    heap_t *heap = _memory_heaps[list_idx];
-    while (heap) {
-      heap_t *next_heap = heap->next_heap;
-      heap->finalize = 1;
-      _rpmalloc_heap_global_finalize(heap);
-      heap = next_heap;
-    }
-  }
-
-#if ENABLE_GLOBAL_CACHE
-  // Free global caches
-  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass)
-    _rpmalloc_global_cache_finalize(&_memory_span_cache[iclass]);
-#endif
-
-#if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
-  pthread_key_delete(_memory_thread_heap);
-#endif
-#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
-  FlsFree(fls_key);
-  fls_key = 0;
-#endif
-#if ENABLE_STATISTICS
-  // If you hit these asserts you probably have memory leaks (perhaps global
-  // scope data doing dynamic allocations) or double frees in your code
-  rpmalloc_assert(atomic_load32(&_mapped_pages) == 0, "Memory leak detected");
-  rpmalloc_assert(atomic_load32(&_mapped_pages_os) == 0,
-                  "Memory leak detected");
-#endif
-
-  _rpmalloc_initialized = 0;
-}
-
-//! Initialize thread, assign heap
-extern inline void rpmalloc_thread_initialize(void) {
-  if (!get_thread_heap_raw()) {
-    heap_t *heap = _rpmalloc_heap_allocate(0);
-    if (heap) {
-      _rpmalloc_stat_inc(&_memory_active_heaps);
-      set_thread_heap(heap);
-#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
-      FlsSetValue(fls_key, heap);
-#endif
-    }
-  }
-}
-
-//! Finalize thread, orphan heap
-void rpmalloc_thread_finalize(int release_caches) {
-  heap_t *heap = get_thread_heap_raw();
-  if (heap)
-    _rpmalloc_heap_release_raw(heap, release_caches);
-  set_thread_heap(0);
-#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
-  FlsSetValue(fls_key, 0);
-#endif
-}
-
-int rpmalloc_is_thread_initialized(void) {
-  return (get_thread_heap_raw() != 0) ? 1 : 0;
-}
-
-const rpmalloc_config_t *rpmalloc_config(void) { return &_memory_config; }
-
-// Extern interface
-
-extern inline RPMALLOC_ALLOCATOR void *rpmalloc(size_t size) {
-#if ENABLE_VALIDATE_ARGS
-  if (size >= MAX_ALLOC_SIZE) {
-    errno = EINVAL;
-    return 0;
-  }
-#endif
-  heap_t *heap = get_thread_heap();
-  return _rpmalloc_allocate(heap, size);
-}
-
-extern inline void rpfree(void *ptr) { _rpmalloc_deallocate(ptr); }
-
-extern inline RPMALLOC_ALLOCATOR void *rpcalloc(size_t num, size_t size) {
-  size_t total;
-#if ENABLE_VALIDATE_ARGS
-#if PLATFORM_WINDOWS
-  int err = SizeTMult(num, size, &total);
-  if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
-    errno = EINVAL;
-    return 0;
-  }
-#else
-  int err = __builtin_umull_overflow(num, size, &total);
-  if (err || (total >= MAX_ALLOC_SIZE)) {
-    errno = EINVAL;
-    return 0;
-  }
-#endif
-#else
-  total = num * size;
-#endif
-  heap_t *heap = get_thread_heap();
-  void *block = _rpmalloc_allocate(heap, total);
-  if (block)
-    memset(block, 0, total);
-  return block;
-}
-
-extern inline RPMALLOC_ALLOCATOR void *rprealloc(void *ptr, size_t size) {
-#if ENABLE_VALIDATE_ARGS
-  if (size >= MAX_ALLOC_SIZE) {
-    errno = EINVAL;
-    return ptr;
-  }
-#endif
-  heap_t *heap = get_thread_heap();
-  return _rpmalloc_reallocate(heap, ptr, size, 0, 0);
-}
-
-extern RPMALLOC_ALLOCATOR void *rpaligned_realloc(void *ptr, size_t alignment,
-                                                  size_t size, size_t oldsize,
-                                                  unsigned int flags) {
-#if ENABLE_VALIDATE_ARGS
-  if ((size + alignment < size) || (alignment > _memory_page_size)) {
-    errno = EINVAL;
-    return 0;
-  }
-#endif
-  heap_t *heap = get_thread_heap();
-  return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, oldsize,
-                                      flags);
-}
-
-extern RPMALLOC_ALLOCATOR void *rpaligned_alloc(size_t alignment, size_t size) {
-  heap_t *heap = get_thread_heap();
-  return _rpmalloc_aligned_allocate(heap, alignment, size);
-}
-
-extern inline RPMALLOC_ALLOCATOR void *
-rpaligned_calloc(size_t alignment, size_t num, size_t size) {
-  size_t total;
-#if ENABLE_VALIDATE_ARGS
-#if PLATFORM_WINDOWS
-  int err = SizeTMult(num, size, &total);
-  if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
-    errno = EINVAL;
-    return 0;
-  }
-#else
-  int err = __builtin_umull_overflow(num, size, &total);
-  if (err || (total >= MAX_ALLOC_SIZE)) {
-    errno = EINVAL;
-    return 0;
-  }
-#endif
-#else
-  total = num * size;
-#endif
-  void *block = rpaligned_alloc(alignment, total);
-  if (block)
-    memset(block, 0, total);
-  return block;
-}
-
-extern inline RPMALLOC_ALLOCATOR void *rpmemalign(size_t alignment,
-                                                  size_t size) {
-  return rpaligned_alloc(alignment, size);
-}
-
-extern inline int rpposix_memalign(void **memptr, size_t alignment,
-                                   size_t size) {
-  if (memptr)
-    *memptr = rpaligned_alloc(alignment, size);
-  else
-    return EINVAL;
-  return *memptr ? 0 : ENOMEM;
-}
-
-extern inline size_t rpmalloc_usable_size(void *ptr) {
-  return (ptr ? _rpmalloc_usable_size(ptr) : 0);
-}
-
-extern inline void rpmalloc_thread_collect(void) {}
-
-void rpmalloc_thread_statistics(rpmalloc_thread_statistics_t *stats) {
-  memset(stats, 0, sizeof(rpmalloc_thread_statistics_t));
-  heap_t *heap = get_thread_heap_raw();
-  if (!heap)
-    return;
-
-  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
-    size_class_t *size_class = _memory_size_class + iclass;
-    span_t *span = heap->size_class[iclass].partial_span;
-    while (span) {
-      size_t free_count = span->list_size;
-      size_t block_count = size_class->block_count;
-      if (span->free_list_limit < block_count)
-        block_count = span->free_list_limit;
-      free_count += (block_count - span->used_count);
-      stats->sizecache += free_count * size_class->block_size;
-      span = span->next;
-    }
-  }
-
-#if ENABLE_THREAD_CACHE
-  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
-    span_cache_t *span_cache;
-    if (!iclass)
-      span_cache = &heap->span_cache;
-    else
-      span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
-    stats->spancache += span_cache->count * (iclass + 1) * _memory_span_size;
-  }
-#endif
-
-  span_t *deferred = (span_t *)atomic_load_ptr(&heap->span_free_deferred);
-  while (deferred) {
-    if (deferred->size_class != SIZE_CLASS_HUGE)
-      stats->spancache += (size_t)deferred->span_count * _memory_span_size;
-    deferred = (span_t *)deferred->free_list;
-  }
-
-#if ENABLE_STATISTICS
-  stats->thread_to_global = (size_t)atomic_load64(&heap->thread_to_global);
-  stats->global_to_thread = (size_t)atomic_load64(&heap->global_to_thread);
-
-  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
-    stats->span_use[iclass].current =
-        (size_t)atomic_load32(&heap->span_use[iclass].current);
-    stats->span_use[iclass].peak =
-        (size_t)atomic_load32(&heap->span_use[iclass].high);
-    stats->span_use[iclass].to_global =
-        (size_t)atomic_load32(&heap->span_use[iclass].spans_to_global);
-    stats->span_use[iclass].from_global =
-        (size_t)atomic_load32(&heap->span_use[iclass].spans_from_global);
-    stats->span_use[iclass].to_cache =
-        (size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache);
-    stats->span_use[iclass].from_cache =
-        (size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache);
-    stats->span_use[iclass].to_reserved =
-        (size_t)atomic_load32(&heap->span_use[iclass].spans_to_reserved);
-    stats->span_use[iclass].from_reserved =
-        (size_t)atomic_load32(&heap->span_use[iclass].spans_from_reserved);
-    stats->span_use[iclass].map_calls =
-        (size_t)atomic_load32(&heap->span_use[iclass].spans_map_calls);
-  }
-  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
-    stats->size_use[iclass].alloc_current =
-        (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_current);
-    stats->size_use[iclass].alloc_peak =
-        (size_t)heap->size_class_use[iclass].alloc_peak;
-    stats->size_use[iclass].alloc_total =
-        (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_total);
-    stats->size_use[iclass].free_total =
-        (size_t)atomic_load32(&heap->size_class_use[iclass].free_total);
-    stats->size_use[iclass].spans_to_cache =
-        (size_t)atomic_load32(&heap->size_class_use[iclass].spans_to_cache);
-    stats->size_use[iclass].spans_from_cache =
-        (size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_cache);
-    stats->size_use[iclass].spans_from_reserved = (size_t)atomic_load32(
-        &heap->size_class_use[iclass].spans_from_reserved);
-    stats->size_use[iclass].map_calls =
-        (size_t)atomic_load32(&heap->size_class_use[iclass].spans_map_calls);
-  }
-#endif
-}
-
-void rpmalloc_global_statistics(rpmalloc_global_statistics_t *stats) {
-  memset(stats, 0, sizeof(rpmalloc_global_statistics_t));
-#if ENABLE_STATISTICS
-  stats->mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size;
-  stats->mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size;
-  stats->mapped_total =
-      (size_t)atomic_load32(&_mapped_total) * _memory_page_size;
-  stats->unmapped_total =
-      (size_t)atomic_load32(&_unmapped_total) * _memory_page_size;
-  stats->huge_alloc =
-      (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size;
-  stats->huge_alloc_peak = (size_t)_huge_pages_peak * _memory_page_size;
-#endif
-#if ENABLE_GLOBAL_CACHE
-  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
-    global_cache_t *cache = &_memory_span_cache[iclass];
-    while (!atomic_cas32_acquire(&cache->lock, 1, 0))
-      _rpmalloc_spin();
-    uint32_t count = cache->count;
-#if ENABLE_UNLIMITED_CACHE
-    span_t *current_span = cache->overflow;
-    while (current_span) {
-      ++count;
-      current_span = current_span->next;
-    }
-#endif
-    atomic_store32_release(&cache->lock, 0);
-    stats->cached += count * (iclass + 1) * _memory_span_size;
-  }
-#endif
-}
-
-#if ENABLE_STATISTICS
-
-static void _memory_heap_dump_statistics(heap_t *heap, void *file) {
-  fprintf(file, "Heap %d stats:\n", heap->id);
-  fprintf(file, "Class   CurAlloc  PeakAlloc   TotAlloc    TotFree  BlkSize "
-                "BlkCount SpansCur SpansPeak  PeakAllocMiB  ToCacheMiB "
-                "FromCacheMiB FromReserveMiB MmapCalls\n");
-  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
-    if (!atomic_load32(&heap->size_class_use[iclass].alloc_total))
-      continue;
-    fprintf(
-        file,
-        "%3u:  %10u %10u %10u %10u %8u %8u %8d %9d %13zu %11zu %12zu %14zu "
-        "%9u\n",
-        (uint32_t)iclass,
-        atomic_load32(&heap->size_class_use[iclass].alloc_current),
-        heap->size_class_use[iclass].alloc_peak,
-        atomic_load32(&heap->size_class_use[iclass].alloc_total),
-        atomic_load32(&heap->size_class_use[iclass].free_total),
-        _memory_size_class[iclass].block_size,
-        _memory_size_class[iclass].block_count,
-        atomic_load32(&heap->size_class_use[iclass].spans_current),
-        heap->size_class_use[iclass].spans_peak,
-        ((size_t)heap->size_class_use[iclass].alloc_peak *
-         (size_t)_memory_size_class[iclass].block_size) /
-            (size_t)(1024 * 1024),
-        ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_to_cache) *
-         _memory_span_size) /
-            (size_t)(1024 * 1024),
-        ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_cache) *
-         _memory_span_size) /
-            (size_t)(1024 * 1024),
-        ((size_t)atomic_load32(
-             &heap->size_class_use[iclass].spans_from_reserved) *
-         _memory_span_size) /
-            (size_t)(1024 * 1024),
-        atomic_load32(&heap->size_class_use[iclass].spans_map_calls));
-  }
-  fprintf(file, "Spans  Current     Peak Deferred  PeakMiB  Cached  ToCacheMiB "
-                "FromCacheMiB ToReserveMiB FromReserveMiB ToGlobalMiB "
-                "FromGlobalMiB  MmapCalls\n");
-  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
-    if (!atomic_load32(&heap->span_use[iclass].high) &&
-        !atomic_load32(&heap->span_use[iclass].spans_map_calls))
-      continue;
-    fprintf(
-        file,
-        "%4u: %8d %8u %8u %8zu %7u %11zu %12zu %12zu %14zu %11zu %13zu %10u\n",
-        (uint32_t)(iclass + 1), atomic_load32(&heap->span_use[iclass].current),
-        atomic_load32(&heap->span_use[iclass].high),
-        atomic_load32(&heap->span_use[iclass].spans_deferred),
-        ((size_t)atomic_load32(&heap->span_use[iclass].high) *
-         (size_t)_memory_span_size * (iclass + 1)) /
-            (size_t)(1024 * 1024),
-#if ENABLE_THREAD_CACHE
-        (unsigned int)(!iclass ? heap->span_cache.count
-                               : heap->span_large_cache[iclass - 1].count),
-        ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache) *
-         (iclass + 1) * _memory_span_size) /
-            (size_t)(1024 * 1024),
-        ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache) *
-         (iclass + 1) * _memory_span_size) /
-            (size_t)(1024 * 1024),
-#else
-        0, (size_t)0, (size_t)0,
-#endif
-        ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_reserved) *
-         (iclass + 1) * _memory_span_size) /
-            (size_t)(1024 * 1024),
-        ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_reserved) *
-         (iclass + 1) * _memory_span_size) /
-            (size_t)(1024 * 1024),
-        ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_global) *
-         (size_t)_memory_span_size * (iclass + 1)) /
-            (size_t)(1024 * 1024),
-        ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_global) *
-         (size_t)_memory_span_size * (iclass + 1)) /
-            (size_t)(1024 * 1024),
-        atomic_load32(&heap->span_use[iclass].spans_map_calls));
-  }
-  fprintf(file, "Full spans: %zu\n", heap->full_span_count);
-  fprintf(file, "ThreadToGlobalMiB GlobalToThreadMiB\n");
-  fprintf(
-      file, "%17zu %17zu\n",
-      (size_t)atomic_load64(&heap->thread_to_global) / (size_t)(1024 * 1024),
-      (size_t)atomic_load64(&heap->global_to_thread) / (size_t)(1024 * 1024));
-}
-
-#endif
-
-void rpmalloc_dump_statistics(void *file) {
-#if ENABLE_STATISTICS
-  for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) {
-    heap_t *heap = _memory_heaps[list_idx];
-    while (heap) {
-      int need_dump = 0;
-      for (size_t iclass = 0; !need_dump && (iclass < SIZE_CLASS_COUNT);
-           ++iclass) {
-        if (!atomic_load32(&heap->size_class_use[iclass].alloc_total)) {
-          rpmalloc_assert(
-              !atomic_load32(&heap->size_class_use[iclass].free_total),
-              "Heap statistics counter mismatch");
-          rpmalloc_assert(
-              !atomic_load32(&heap->size_class_use[iclass].spans_map_calls),
-              "Heap statistics counter mismatch");
-          continue;
-        }
-        need_dump = 1;
-      }
-      for (size_t iclass = 0; !need_dump && (iclass < LARGE_CLASS_COUNT);
-           ++iclass) {
-        if (!atomic_load32(&heap->span_use[iclass].high) &&
-            !atomic_load32(&heap->span_use[iclass].spans_map_calls))
-          continue;
-        need_dump = 1;
-      }
-      if (need_dump)
-        _memory_heap_dump_statistics(heap, file);
-      heap = heap->next_heap;
-    }
-  }
-  fprintf(file, "Global stats:\n");
-  size_t huge_current =
-      (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size;
-  size_t huge_peak = (size_t)_huge_pages_peak * _memory_page_size;
-  fprintf(file, "HugeCurrentMiB HugePeakMiB\n");
-  fprintf(file, "%14zu %11zu\n", huge_current / (size_t)(1024 * 1024),
-          huge_peak / (size_t)(1024 * 1024));
-
-#if ENABLE_GLOBAL_CACHE
-  fprintf(file, "GlobalCacheMiB\n");
-  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
-    global_cache_t *cache = _memory_span_cache + iclass;
-    size_t global_cache = (size_t)cache->count * iclass * _memory_span_size;
-
-    size_t global_overflow_cache = 0;
-    span_t *span = cache->overflow;
-    while (span) {
-      global_overflow_cache += iclass * _memory_span_size;
-      span = span->next;
-    }
-    if (global_cache || global_overflow_cache || cache->insert_count ||
-        cache->extract_count)
-      fprintf(file,
-              "%4zu: %8zuMiB (%8zuMiB overflow) %14zu insert %14zu extract\n",
-              iclass + 1, global_cache / (size_t)(1024 * 1024),
-              global_overflow_cache / (size_t)(1024 * 1024),
-              cache->insert_count, cache->extract_count);
-  }
-#endif
-
-  size_t mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size;
-  size_t mapped_os =
-      (size_t)atomic_load32(&_mapped_pages_os) * _memory_page_size;
-  size_t mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size;
-  size_t mapped_total =
-      (size_t)atomic_load32(&_mapped_total) * _memory_page_size;
-  size_t unmapped_total =
-      (size_t)atomic_load32(&_unmapped_total) * _memory_page_size;
-  fprintf(
-      file,
-      "MappedMiB MappedOSMiB MappedPeakMiB MappedTotalMiB UnmappedTotalMiB\n");
-  fprintf(file, "%9zu %11zu %13zu %14zu %16zu\n",
-          mapped / (size_t)(1024 * 1024), mapped_os / (size_t)(1024 * 1024),
-          mapped_peak / (size_t)(1024 * 1024),
-          mapped_total / (size_t)(1024 * 1024),
-          unmapped_total / (size_t)(1024 * 1024));
-
-  fprintf(file, "\n");
-#if 0
-	int64_t allocated = atomic_load64(&_allocation_counter);
-	int64_t deallocated = atomic_load64(&_deallocation_counter);
-	fprintf(file, "Allocation count: %lli\n", allocated);
-	fprintf(file, "Deallocation count: %lli\n", deallocated);
-	fprintf(file, "Current allocations: %lli\n", (allocated - deallocated));
-	fprintf(file, "Master spans: %d\n", atomic_load32(&_master_spans));
-	fprintf(file, "Dangling master spans: %d\n", atomic_load32(&_unmapped_master_spans));
-#endif
-#endif
-  (void)sizeof(file);
-}
-
-#if RPMALLOC_FIRST_CLASS_HEAPS
-
-extern inline rpmalloc_heap_t *rpmalloc_heap_acquire(void) {
-  // Must be a pristine heap from newly mapped memory pages, or else memory
-  // blocks could already be allocated from the heap which would (wrongly) be
-  // released when heap is cleared with rpmalloc_heap_free_all(). Also heaps
-  // guaranteed to be pristine from the dedicated orphan list can be used.
-  heap_t *heap = _rpmalloc_heap_allocate(1);
-  rpmalloc_assume(heap != NULL);
-  heap->owner_thread = 0;
-  _rpmalloc_stat_inc(&_memory_active_heaps);
-  return heap;
-}
-
-extern inline void rpmalloc_heap_release(rpmalloc_heap_t *heap) {
-  if (heap)
-    _rpmalloc_heap_release(heap, 1, 1);
-}
-
-extern inline RPMALLOC_ALLOCATOR void *
-rpmalloc_heap_alloc(rpmalloc_heap_t *heap, size_t size) {
-#if ENABLE_VALIDATE_ARGS
-  if (size >= MAX_ALLOC_SIZE) {
-    errno = EINVAL;
-    return 0;
-  }
-#endif
-  return _rpmalloc_allocate(heap, size);
-}
-
-extern inline RPMALLOC_ALLOCATOR void *
-rpmalloc_heap_aligned_alloc(rpmalloc_heap_t *heap, size_t alignment,
-                            size_t size) {
-#if ENABLE_VALIDATE_ARGS
-  if (size >= MAX_ALLOC_SIZE) {
-    errno = EINVAL;
-    return 0;
-  }
-#endif
-  return _rpmalloc_aligned_allocate(heap, alignment, size);
-}
-
-extern inline RPMALLOC_ALLOCATOR void *
-rpmalloc_heap_calloc(rpmalloc_heap_t *heap, size_t num, size_t size) {
-  return rpmalloc_heap_aligned_calloc(heap, 0, num, size);
-}
-
-extern inline RPMALLOC_ALLOCATOR void *
-rpmalloc_heap_aligned_calloc(rpmalloc_heap_t *heap, size_t alignment,
-                             size_t num, size_t size) {
-  size_t total;
-#if ENABLE_VALIDATE_ARGS
-#if PLATFORM_WINDOWS
-  int err = SizeTMult(num, size, &total);
-  if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
-    errno = EINVAL;
-    return 0;
-  }
-#else
-  int err = __builtin_umull_overflow(num, size, &total);
-  if (err || (total >= MAX_ALLOC_SIZE)) {
-    errno = EINVAL;
-    return 0;
-  }
-#endif
-#else
-  total = num * size;
-#endif
-  void *block = _rpmalloc_aligned_allocate(heap, alignment, total);
-  if (block)
-    memset(block, 0, total);
-  return block;
-}
-
-extern inline RPMALLOC_ALLOCATOR void *
-rpmalloc_heap_realloc(rpmalloc_heap_t *heap, void *ptr, size_t size,
-                      unsigned int flags) {
-#if ENABLE_VALIDATE_ARGS
-  if (size >= MAX_ALLOC_SIZE) {
-    errno = EINVAL;
-    return ptr;
-  }
-#endif
-  return _rpmalloc_reallocate(heap, ptr, size, 0, flags);
-}
-
-extern inline RPMALLOC_ALLOCATOR void *
-rpmalloc_heap_aligned_realloc(rpmalloc_heap_t *heap, void *ptr,
-                              size_t alignment, size_t size,
-                              unsigned int flags) {
-#if ENABLE_VALIDATE_ARGS
-  if ((size + alignment < size) || (alignment > _memory_page_size)) {
-    errno = EINVAL;
-    return 0;
-  }
-#endif
-  return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, 0, flags);
-}
-
-extern inline void rpmalloc_heap_free(rpmalloc_heap_t *heap, void *ptr) {
-  (void)sizeof(heap);
-  _rpmalloc_deallocate(ptr);
-}
-
-extern inline void rpmalloc_heap_free_all(rpmalloc_heap_t *heap) {
-  span_t *span;
-  span_t *next_span;
-
-  _rpmalloc_heap_cache_adopt_deferred(heap, 0);
-
-  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
-    span = heap->size_class[iclass].partial_span;
-    while (span) {
-      next_span = span->next;
-      _rpmalloc_heap_cache_insert(heap, span);
-      span = next_span;
-    }
-    heap->size_class[iclass].partial_span = 0;
-    span = heap->full_span[iclass];
-    while (span) {
-      next_span = span->next;
-      _rpmalloc_heap_cache_insert(heap, span);
-      span = next_span;
-    }
-
-    span = heap->size_class[iclass].cache;
-    if (span)
-      _rpmalloc_heap_cache_insert(heap, span);
-    heap->size_class[iclass].cache = 0;
-  }
-  memset(heap->size_class, 0, sizeof(heap->size_class));
-  memset(heap->full_span, 0, sizeof(heap->full_span));
-
-  span = heap->large_huge_span;
-  while (span) {
-    next_span = span->next;
-    if (UNEXPECTED(span->size_class == SIZE_CLASS_HUGE))
-      _rpmalloc_deallocate_huge(span);
-    else
-      _rpmalloc_heap_cache_insert(heap, span);
-    span = next_span;
-  }
-  heap->large_huge_span = 0;
-  heap->full_span_count = 0;
-
-#if ENABLE_THREAD_CACHE
-  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
-    span_cache_t *span_cache;
-    if (!iclass)
-      span_cache = &heap->span_cache;
-    else
-      span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
-    if (!span_cache->count)
-      continue;
-#if ENABLE_GLOBAL_CACHE
-    _rpmalloc_stat_add64(&heap->thread_to_global,
-                         span_cache->count * (iclass + 1) * _memory_span_size);
-    _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global,
-                       span_cache->count);
-    _rpmalloc_global_cache_insert_spans(span_cache->span, iclass + 1,
-                                        span_cache->count);
-#else
-    for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
-      _rpmalloc_span_unmap(span_cache->span[ispan]);
-#endif
-    span_cache->count = 0;
-  }
-#endif
-
-#if ENABLE_STATISTICS
-  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
-    atomic_store32(&heap->size_class_use[iclass].alloc_current, 0);
-    atomic_store32(&heap->size_class_use[iclass].spans_current, 0);
-  }
-  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
-    atomic_store32(&heap->span_use[iclass].current, 0);
-  }
-#endif
-}
-
-extern inline void rpmalloc_heap_thread_set_current(rpmalloc_heap_t *heap) {
-  heap_t *prev_heap = get_thread_heap_raw();
-  if (prev_heap != heap) {
-    set_thread_heap(heap);
-    if (prev_heap)
-      rpmalloc_heap_release(prev_heap);
-  }
-}
-
-extern inline rpmalloc_heap_t *rpmalloc_get_heap_for_ptr(void *ptr) {
-  // Grab the span, and then the heap from the span
-  span_t *span = (span_t *)((uintptr_t)ptr & _memory_span_mask);
-  if (span) {
-    return span->heap;
-  }
-  return 0;
-}
-
-#endif
-
-#if ENABLE_PRELOAD || ENABLE_OVERRIDE
-
-#include "malloc.c"
-
-#endif
-
-void rpmalloc_linker_reference(void) { (void)sizeof(_rpmalloc_initialized); }
+//===---------------------- rpmalloc.c ------------------*- C -*-=============//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This library provides a cross-platform lock free thread caching malloc
+// implementation in C11.
+//
+//===----------------------------------------------------------------------===//
+
+#include "rpmalloc.h"
+
+////////////
+///
+/// Build time configurable limits
+///
+//////
+
+#if defined(__clang__)
+#pragma clang diagnostic ignored "-Wunused-macros"
+#pragma clang diagnostic ignored "-Wunused-function"
+#if __has_warning("-Wreserved-identifier")
+#pragma clang diagnostic ignored "-Wreserved-identifier"
+#endif
+#if __has_warning("-Wstatic-in-inline")
+#pragma clang diagnostic ignored "-Wstatic-in-inline"
+#endif
+#elif defined(__GNUC__)
+#pragma GCC diagnostic ignored "-Wunused-macros"
+#pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+
+#if !defined(__has_builtin)
+#define __has_builtin(b) 0
+#endif
+
+#if defined(__GNUC__) || defined(__clang__)
+
+#if __has_builtin(__builtin_memcpy_inline)
+#define _rpmalloc_memcpy_const(x, y, s) __builtin_memcpy_inline(x, y, s)
+#else
+#define _rpmalloc_memcpy_const(x, y, s)                                        \
+  do {                                                                         \
+    _Static_assert(__builtin_choose_expr(__builtin_constant_p(s), 1, 0),       \
+                   "len must be a constant integer");                          \
+    memcpy(x, y, s);                                                           \
+  } while (0)
+#endif
+
+#if __has_builtin(__builtin_memset_inline)
+#define _rpmalloc_memset_const(x, y, s) __builtin_memset_inline(x, y, s)
+#else
+#define _rpmalloc_memset_const(x, y, s)                                        \
+  do {                                                                         \
+    _Static_assert(__builtin_choose_expr(__builtin_constant_p(s), 1, 0),       \
+                   "len must be a constant integer");                          \
+    memset(x, y, s);                                                           \
+  } while (0)
+#endif
+#else
+#define _rpmalloc_memcpy_const(x, y, s) memcpy(x, y, s)
+#define _rpmalloc_memset_const(x, y, s) memset(x, y, s)
+#endif
+
+#if __has_builtin(__builtin_assume)
+#define rpmalloc_assume(cond) __builtin_assume(cond)
+#elif defined(__GNUC__)
+#define rpmalloc_assume(cond)                                                  \
+  do {                                                                         \
+    if (!__builtin_expect(cond, 0))                                            \
+      __builtin_unreachable();                                                 \
+  } while (0)
+#elif defined(_MSC_VER)
+#define rpmalloc_assume(cond) __assume(cond)
+#else
+#define rpmalloc_assume(cond) 0
+#endif
+
+#ifndef HEAP_ARRAY_SIZE
+//! Size of heap hashmap
+#define HEAP_ARRAY_SIZE 47
+#endif
+#ifndef ENABLE_THREAD_CACHE
+//! Enable per-thread cache
+#define ENABLE_THREAD_CACHE 1
+#endif
+#ifndef ENABLE_GLOBAL_CACHE
+//! Enable global cache shared between all threads, requires thread cache
+#define ENABLE_GLOBAL_CACHE 1
+#endif
+#ifndef ENABLE_VALIDATE_ARGS
+//! Enable validation of args to public entry points
+#define ENABLE_VALIDATE_ARGS 0
+#endif
+#ifndef ENABLE_STATISTICS
+//! Enable statistics collection
+#define ENABLE_STATISTICS 0
+#endif
+#ifndef ENABLE_ASSERTS
+//! Enable asserts
+#define ENABLE_ASSERTS 0
+#endif
+#ifndef ENABLE_OVERRIDE
+//! Override standard library malloc/free and new/delete entry points
+#define ENABLE_OVERRIDE 0
+#endif
+#ifndef ENABLE_PRELOAD
+//! Support preloading
+#define ENABLE_PRELOAD 0
+#endif
+#ifndef DISABLE_UNMAP
+//! Disable unmapping memory pages (also enables unlimited cache)
+#define DISABLE_UNMAP 0
+#endif
+#ifndef ENABLE_UNLIMITED_CACHE
+//! Enable unlimited global cache (no unmapping until finalization)
+#define ENABLE_UNLIMITED_CACHE 0
+#endif
+#ifndef ENABLE_ADAPTIVE_THREAD_CACHE
+//! Enable adaptive thread cache size based on use heuristics
+#define ENABLE_ADAPTIVE_THREAD_CACHE 0
+#endif
+#ifndef DEFAULT_SPAN_MAP_COUNT
+//! Default number of spans to map in call to map more virtual memory (default
+//! values yield 4MiB here)
+#define DEFAULT_SPAN_MAP_COUNT 64
+#endif
+#ifndef GLOBAL_CACHE_MULTIPLIER
+//! Multiplier for global cache
+#define GLOBAL_CACHE_MULTIPLIER 8
+#endif
+
+#if DISABLE_UNMAP && !ENABLE_GLOBAL_CACHE
+#error Must use global cache if unmap is disabled
+#endif
+
+#if DISABLE_UNMAP
+#undef ENABLE_UNLIMITED_CACHE
+#define ENABLE_UNLIMITED_CACHE 1
+#endif
+
+#if !ENABLE_GLOBAL_CACHE
+#undef ENABLE_UNLIMITED_CACHE
+#define ENABLE_UNLIMITED_CACHE 0
+#endif
+
+#if !ENABLE_THREAD_CACHE
+#undef ENABLE_ADAPTIVE_THREAD_CACHE
+#define ENABLE_ADAPTIVE_THREAD_CACHE 0
+#endif
+
+#if defined(_WIN32) || defined(__WIN32__) || defined(_WIN64)
+#define PLATFORM_WINDOWS 1
+#define PLATFORM_POSIX 0
+#else
+#define PLATFORM_WINDOWS 0
+#define PLATFORM_POSIX 1
+#endif
+
+/// Platform and arch specifics
+#if defined(_MSC_VER) && !defined(__clang__)
+#pragma warning(disable : 5105)
+#ifndef FORCEINLINE
+#define FORCEINLINE inline __forceinline
+#endif
+#define _Static_assert static_assert
+#else
+#ifndef FORCEINLINE
+#define FORCEINLINE inline __attribute__((__always_inline__))
+#endif
+#endif
+#if PLATFORM_WINDOWS
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#endif
+#include <windows.h>
+#if ENABLE_VALIDATE_ARGS
+#include <intsafe.h>
+#endif
+#else
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <unistd.h>
+#if defined(__linux__) || defined(__ANDROID__)
+#include <sys/prctl.h>
+#if !defined(PR_SET_VMA)
+#define PR_SET_VMA 0x53564d41
+#define PR_SET_VMA_ANON_NAME 0
+#endif
+#endif
+#if defined(__APPLE__)
+#include <TargetConditionals.h>
+#if !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
+#include <mach/mach_vm.h>
+#include <mach/vm_statistics.h>
+#endif
+#include <pthread.h>
+#endif
+#if defined(__HAIKU__) || defined(__TINYC__)
+#include <pthread.h>
+#endif
+#endif
+
+#include <errno.h>
+#include <stdint.h>
+#include <string.h>
+
+#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
+#include <fibersapi.h>
+static DWORD fls_key;
+#endif
+
+#if PLATFORM_POSIX
+#include <sched.h>
+#include <sys/mman.h>
+#ifdef __FreeBSD__
+#include <sys/sysctl.h>
+#define MAP_HUGETLB MAP_ALIGNED_SUPER
+#ifndef PROT_MAX
+#define PROT_MAX(f) 0
+#endif
+#else
+#define PROT_MAX(f) 0
+#endif
+#ifdef __sun
+extern int madvise(caddr_t, size_t, int);
+#endif
+#ifndef MAP_UNINITIALIZED
+#define MAP_UNINITIALIZED 0
+#endif
+#endif
+#include <errno.h>
+
+#if ENABLE_ASSERTS
+#undef NDEBUG
+#if defined(_MSC_VER) && !defined(_DEBUG)
+#define _DEBUG
+#endif
+#include <assert.h>
+#define RPMALLOC_TOSTRING_M(x) #x
+#define RPMALLOC_TOSTRING(x) RPMALLOC_TOSTRING_M(x)
+#define rpmalloc_assert(truth, message)                                        \
+  do {                                                                         \
+    if (!(truth)) {                                                            \
+      if (_memory_config.error_callback) {                                     \
+        _memory_config.error_callback(message " (" RPMALLOC_TOSTRING(          \
+            truth) ") at " __FILE__ ":" RPMALLOC_TOSTRING(__LINE__));          \
+      } else {                                                                 \
+        assert((truth) && message);                                            \
+      }                                                                        \
+    }                                                                          \
+  } while (0)
+#else
+#define rpmalloc_assert(truth, message)                                        \
+  do {                                                                         \
+  } while (0)
+#endif
+#if ENABLE_STATISTICS
+#include <stdio.h>
+#endif
+
+//////
+///
+/// Atomic access abstraction (since MSVC does not do C11 yet)
+///
+//////
+
+#if defined(_MSC_VER) && !defined(__clang__)
+
+typedef volatile long atomic32_t;
+typedef volatile long long atomic64_t;
+typedef volatile void *atomicptr_t;
+
+static FORCEINLINE int32_t atomic_load32(atomic32_t *src) { return *src; }
+static FORCEINLINE void atomic_store32(atomic32_t *dst, int32_t val) {
+  *dst = val;
+}
+static FORCEINLINE int32_t atomic_incr32(atomic32_t *val) {
+  return (int32_t)InterlockedIncrement(val);
+}
+static FORCEINLINE int32_t atomic_decr32(atomic32_t *val) {
+  return (int32_t)InterlockedDecrement(val);
+}
+static FORCEINLINE int32_t atomic_add32(atomic32_t *val, int32_t add) {
+  return (int32_t)InterlockedExchangeAdd(val, add) + add;
+}
+static FORCEINLINE int atomic_cas32_acquire(atomic32_t *dst, int32_t val,
+                                            int32_t ref) {
+  return (InterlockedCompareExchange(dst, val, ref) == ref) ? 1 : 0;
+}
+static FORCEINLINE void atomic_store32_release(atomic32_t *dst, int32_t val) {
+  *dst = val;
+}
+static FORCEINLINE int64_t atomic_load64(atomic64_t *src) { return *src; }
+static FORCEINLINE int64_t atomic_add64(atomic64_t *val, int64_t add) {
+  return (int64_t)InterlockedExchangeAdd64(val, add) + add;
+}
+static FORCEINLINE void *atomic_load_ptr(atomicptr_t *src) {
+  return (void *)*src;
+}
+static FORCEINLINE void atomic_store_ptr(atomicptr_t *dst, void *val) {
+  *dst = val;
+}
+static FORCEINLINE void atomic_store_ptr_release(atomicptr_t *dst, void *val) {
+  *dst = val;
+}
+static FORCEINLINE void *atomic_exchange_ptr_acquire(atomicptr_t *dst,
+                                                     void *val) {
+  return (void *)InterlockedExchangePointer((void *volatile *)dst, val);
+}
+static FORCEINLINE int atomic_cas_ptr(atomicptr_t *dst, void *val, void *ref) {
+  return (InterlockedCompareExchangePointer((void *volatile *)dst, val, ref) ==
+          ref)
+             ? 1
+             : 0;
+}
+
+#define EXPECTED(x) (x)
+#define UNEXPECTED(x) (x)
+
+#else
+
+#include <stdatomic.h>
+
+typedef volatile _Atomic(int32_t) atomic32_t;
+typedef volatile _Atomic(int64_t) atomic64_t;
+typedef volatile _Atomic(void *) atomicptr_t;
+
+static FORCEINLINE int32_t atomic_load32(atomic32_t *src) {
+  return atomic_load_explicit(src, memory_order_relaxed);
+}
+static FORCEINLINE void atomic_store32(atomic32_t *dst, int32_t val) {
+  atomic_store_explicit(dst, val, memory_order_relaxed);
+}
+static FORCEINLINE int32_t atomic_incr32(atomic32_t *val) {
+  return atomic_fetch_add_explicit(val, 1, memory_order_relaxed) + 1;
+}
+static FORCEINLINE int32_t atomic_decr32(atomic32_t *val) {
+  return atomic_fetch_add_explicit(val, -1, memory_order_relaxed) - 1;
+}
+static FORCEINLINE int32_t atomic_add32(atomic32_t *val, int32_t add) {
+  return atomic_fetch_add_explicit(val, add, memory_order_relaxed) + add;
+}
+static FORCEINLINE int atomic_cas32_acquire(atomic32_t *dst, int32_t val,
+                                            int32_t ref) {
+  return atomic_compare_exchange_weak_explicit(
+      dst, &ref, val, memory_order_acquire, memory_order_relaxed);
+}
+static FORCEINLINE void atomic_store32_release(atomic32_t *dst, int32_t val) {
+  atomic_store_explicit(dst, val, memory_order_release);
+}
+static FORCEINLINE int64_t atomic_load64(atomic64_t *val) {
+  return atomic_load_explicit(val, memory_order_relaxed);
+}
+static FORCEINLINE int64_t atomic_add64(atomic64_t *val, int64_t add) {
+  return atomic_fetch_add_explicit(val, add, memory_order_relaxed) + add;
+}
+static FORCEINLINE void *atomic_load_ptr(atomicptr_t *src) {
+  return atomic_load_explicit(src, memory_order_relaxed);
+}
+static FORCEINLINE void atomic_store_ptr(atomicptr_t *dst, void *val) {
+  atomic_store_explicit(dst, val, memory_order_relaxed);
+}
+static FORCEINLINE void atomic_store_ptr_release(atomicptr_t *dst, void *val) {
+  atomic_store_explicit(dst, val, memory_order_release);
+}
+static FORCEINLINE void *atomic_exchange_ptr_acquire(atomicptr_t *dst,
+                                                     void *val) {
+  return atomic_exchange_explicit(dst, val, memory_order_acquire);
+}
+static FORCEINLINE int atomic_cas_ptr(atomicptr_t *dst, void *val, void *ref) {
+  return atomic_compare_exchange_weak_explicit(
+      dst, &ref, val, memory_order_relaxed, memory_order_relaxed);
+}
+
+#define EXPECTED(x) __builtin_expect((x), 1)
+#define UNEXPECTED(x) __builtin_expect((x), 0)
+
+#endif
+
+////////////
+///
+/// Statistics related functions (evaluate to nothing when statistics not
+/// enabled)
+///
+//////
+
+#if ENABLE_STATISTICS
+#define _rpmalloc_stat_inc(counter) atomic_incr32(counter)
+#define _rpmalloc_stat_dec(counter) atomic_decr32(counter)
+#define _rpmalloc_stat_add(counter, value)                                     \
+  atomic_add32(counter, (int32_t)(value))
+#define _rpmalloc_stat_add64(counter, value)                                   \
+  atomic_add64(counter, (int64_t)(value))
+#define _rpmalloc_stat_add_peak(counter, value, peak)                          \
+  do {                                                                         \
+    int32_t _cur_count = atomic_add32(counter, (int32_t)(value));              \
+    if (_cur_count > (peak))                                                   \
+      peak = _cur_count;                                                       \
+  } while (0)
+#define _rpmalloc_stat_sub(counter, value)                                     \
+  atomic_add32(counter, -(int32_t)(value))
+#define _rpmalloc_stat_inc_alloc(heap, class_idx)                              \
+  do {                                                                         \
+    int32_t alloc_current =                                                    \
+        atomic_incr32(&heap->size_class_use[class_idx].alloc_current);         \
+    if (alloc_current > heap->size_class_use[class_idx].alloc_peak)            \
+      heap->size_class_use[class_idx].alloc_peak = alloc_current;              \
+    atomic_incr32(&heap->size_class_use[class_idx].alloc_total);               \
+  } while (0)
+#define _rpmalloc_stat_inc_free(heap, class_idx)                               \
+  do {                                                                         \
+    atomic_decr32(&heap->size_class_use[class_idx].alloc_current);             \
+    atomic_incr32(&heap->size_class_use[class_idx].free_total);                \
+  } while (0)
+#else
+#define _rpmalloc_stat_inc(counter)                                            \
+  do {                                                                         \
+  } while (0)
+#define _rpmalloc_stat_dec(counter)                                            \
+  do {                                                                         \
+  } while (0)
+#define _rpmalloc_stat_add(counter, value)                                     \
+  do {                                                                         \
+  } while (0)
+#define _rpmalloc_stat_add64(counter, value)                                   \
+  do {                                                                         \
+  } while (0)
+#define _rpmalloc_stat_add_peak(counter, value, peak)                          \
+  do {                                                                         \
+  } while (0)
+#define _rpmalloc_stat_sub(counter, value)                                     \
+  do {                                                                         \
+  } while (0)
+#define _rpmalloc_stat_inc_alloc(heap, class_idx)                              \
+  do {                                                                         \
+  } while (0)
+#define _rpmalloc_stat_inc_free(heap, class_idx)                               \
+  do {                                                                         \
+  } while (0)
+#endif
+
+///
+/// Preconfigured limits and sizes
+///
+
+//! Granularity of a small allocation block (must be power of two)
+#define SMALL_GRANULARITY 16
+//! Small granularity shift count
+#define SMALL_GRANULARITY_SHIFT 4
+//! Number of small block size classes
+#define SMALL_CLASS_COUNT 65
+//! Maximum size of a small block
+#define SMALL_SIZE_LIMIT (SMALL_GRANULARITY * (SMALL_CLASS_COUNT - 1))
+//! Granularity of a medium allocation block
+#define MEDIUM_GRANULARITY 512
+//! Medium granularity shift count
+#define MEDIUM_GRANULARITY_SHIFT 9
+//! Number of medium block size classes
+#define MEDIUM_CLASS_COUNT 61
+//! Total number of small + medium size classes
+#define SIZE_CLASS_COUNT (SMALL_CLASS_COUNT + MEDIUM_CLASS_COUNT)
+//! Number of large block size classes
+#define LARGE_CLASS_COUNT 63
+//! Maximum size of a medium block
+#define MEDIUM_SIZE_LIMIT                                                      \
+  (SMALL_SIZE_LIMIT + (MEDIUM_GRANULARITY * MEDIUM_CLASS_COUNT))
+//! Maximum size of a large block
+#define LARGE_SIZE_LIMIT                                                       \
+  ((LARGE_CLASS_COUNT * _memory_span_size) - SPAN_HEADER_SIZE)
+//! Size of a span header (must be a multiple of SMALL_GRANULARITY and a power
+//! of two)
+#define SPAN_HEADER_SIZE 128
+//! Number of spans in thread cache
+#define MAX_THREAD_SPAN_CACHE 400
+//! Number of spans to transfer between thread and global cache
+#define THREAD_SPAN_CACHE_TRANSFER 64
+//! Number of spans in thread cache for large spans (must be greater than
+//! LARGE_CLASS_COUNT / 2)
+#define MAX_THREAD_SPAN_LARGE_CACHE 100
+//! Number of spans to transfer between thread and global cache for large spans
+#define THREAD_SPAN_LARGE_CACHE_TRANSFER 6
+
+_Static_assert((SMALL_GRANULARITY & (SMALL_GRANULARITY - 1)) == 0,
+               "Small granularity must be power of two");
+_Static_assert((SPAN_HEADER_SIZE & (SPAN_HEADER_SIZE - 1)) == 0,
+               "Span header size must be power of two");
+
+#if ENABLE_VALIDATE_ARGS
+//! Maximum allocation size to avoid integer overflow
+#undef MAX_ALLOC_SIZE
+#define MAX_ALLOC_SIZE (((size_t) - 1) - _memory_span_size)
+#endif
+
+#define pointer_offset(ptr, ofs) (void *)((char *)(ptr) + (ptrdiff_t)(ofs))
+#define pointer_diff(first, second)                                            \
+  (ptrdiff_t)((const char *)(first) - (const char *)(second))
+
+#define INVALID_POINTER ((void *)((uintptr_t) - 1))
+
+#define SIZE_CLASS_LARGE SIZE_CLASS_COUNT
+#define SIZE_CLASS_HUGE ((uint32_t) - 1)
+
+////////////
+///
+/// Data types
+///
+//////
+
+//! A memory heap, per thread
+typedef struct heap_t heap_t;
+//! Span of memory pages
+typedef struct span_t span_t;
+//! Span list
+typedef struct span_list_t span_list_t;
+//! Span active data
+typedef struct span_active_t span_active_t;
+//! Size class definition
+typedef struct size_class_t size_class_t;
+//! Global cache
+typedef struct global_cache_t global_cache_t;
+
+//! Flag indicating span is the first (master) span of a split superspan
+#define SPAN_FLAG_MASTER 1U
+//! Flag indicating span is a secondary (sub) span of a split superspan
+#define SPAN_FLAG_SUBSPAN 2U
+//! Flag indicating span has blocks with increased alignment
+#define SPAN_FLAG_ALIGNED_BLOCKS 4U
+//! Flag indicating an unmapped master span
+#define SPAN_FLAG_UNMAPPED_MASTER 8U
+
+#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
+struct span_use_t {
+  //! Current number of spans used (actually used, not in cache)
+  atomic32_t current;
+  //! High water mark of spans used
+  atomic32_t high;
+#if ENABLE_STATISTICS
+  //! Number of spans in deferred list
+  atomic32_t spans_deferred;
+  //! Number of spans transitioned to global cache
+  atomic32_t spans_to_global;
+  //! Number of spans transitioned from global cache
+  atomic32_t spans_from_global;
+  //! Number of spans transitioned to thread cache
+  atomic32_t spans_to_cache;
+  //! Number of spans transitioned from thread cache
+  atomic32_t spans_from_cache;
+  //! Number of spans transitioned to reserved state
+  atomic32_t spans_to_reserved;
+  //! Number of spans transitioned from reserved state
+  atomic32_t spans_from_reserved;
+  //! Number of raw memory map calls
+  atomic32_t spans_map_calls;
+#endif
+};
+typedef struct span_use_t span_use_t;
+#endif
+
+#if ENABLE_STATISTICS
+struct size_class_use_t {
+  //! Current number of allocations
+  atomic32_t alloc_current;
+  //! Peak number of allocations
+  int32_t alloc_peak;
+  //! Total number of allocations
+  atomic32_t alloc_total;
+  //! Total number of frees
+  atomic32_t free_total;
+  //! Number of spans in use
+  atomic32_t spans_current;
+  //! Number of spans transitioned to cache
+  int32_t spans_peak;
+  //! Number of spans transitioned to cache
+  atomic32_t spans_to_cache;
+  //! Number of spans transitioned from cache
+  atomic32_t spans_from_cache;
+  //! Number of spans transitioned from reserved state
+  atomic32_t spans_from_reserved;
+  //! Number of spans mapped
+  atomic32_t spans_map_calls;
+  int32_t unused;
+};
+typedef struct size_class_use_t size_class_use_t;
+#endif
+
+// A span can either represent a single span of memory pages with size declared
+// by span_map_count configuration variable, or a set of spans in a continuous
+// region, a super span. Any reference to the term "span" usually refers to both
+// a single span or a super span. A super span can further be divided into
+// multiple spans (or this, super spans), where the first (super)span is the
+// master and subsequent (super)spans are subspans. The master span keeps track
+// of how many subspans that are still alive and mapped in virtual memory, and
+// once all subspans and master have been unmapped the entire superspan region
+// is released and unmapped (on Windows for example, the entire superspan range
+// has to be released in the same call to release the virtual memory range, but
+// individual subranges can be decommitted individually to reduce physical
+// memory use).
+struct span_t {
+  //! Free list
+  void *free_list;
+  //! Total block count of size class
+  uint32_t block_count;
+  //! Size class
+  uint32_t size_class;
+  //! Index of last block initialized in free list
+  uint32_t free_list_limit;
+  //! Number of used blocks remaining when in partial state
+  uint32_t used_count;
+  //! Deferred free list
+  atomicptr_t free_list_deferred;
+  //! Size of deferred free list, or list of spans when part of a cache list
+  uint32_t list_size;
+  //! Size of a block
+  uint32_t block_size;
+  //! Flags and counters
+  uint32_t flags;
+  //! Number of spans
+  uint32_t span_count;
+  //! Total span counter for master spans
+  uint32_t total_spans;
+  //! Offset from master span for subspans
+  uint32_t offset_from_master;
+  //! Remaining span counter, for master spans
+  atomic32_t remaining_spans;
+  //! Alignment offset
+  uint32_t align_offset;
+  //! Owning heap
+  heap_t *heap;
+  //! Next span
+  span_t *next;
+  //! Previous span
+  span_t *prev;
+};
+_Static_assert(sizeof(span_t) <= SPAN_HEADER_SIZE, "span size mismatch");
+
+struct span_cache_t {
+  size_t count;
+  span_t *span[MAX_THREAD_SPAN_CACHE];
+};
+typedef struct span_cache_t span_cache_t;
+
+struct span_large_cache_t {
+  size_t count;
+  span_t *span[MAX_THREAD_SPAN_LARGE_CACHE];
+};
+typedef struct span_large_cache_t span_large_cache_t;
+
+struct heap_size_class_t {
+  //! Free list of active span
+  void *free_list;
+  //! Double linked list of partially used spans with free blocks.
+  //  Previous span pointer in head points to tail span of list.
+  span_t *partial_span;
+  //! Early level cache of fully free spans
+  span_t *cache;
+};
+typedef struct heap_size_class_t heap_size_class_t;
+
+// Control structure for a heap, either a thread heap or a first class heap if
+// enabled
+struct heap_t {
+  //! Owning thread ID
+  uintptr_t owner_thread;
+  //! Free lists for each size class
+  heap_size_class_t size_class[SIZE_CLASS_COUNT];
+#if ENABLE_THREAD_CACHE
+  //! Arrays of fully freed spans, single span
+  span_cache_t span_cache;
+#endif
+  //! List of deferred free spans (single linked list)
+  atomicptr_t span_free_deferred;
+  //! Number of full spans
+  size_t full_span_count;
+  //! Mapped but unused spans
+  span_t *span_reserve;
+  //! Master span for mapped but unused spans
+  span_t *span_reserve_master;
+  //! Number of mapped but unused spans
+  uint32_t spans_reserved;
+  //! Child count
+  atomic32_t child_count;
+  //! Next heap in id list
+  heap_t *next_heap;
+  //! Next heap in orphan list
+  heap_t *next_orphan;
+  //! Heap ID
+  int32_t id;
+  //! Finalization state flag
+  int finalize;
+  //! Master heap owning the memory pages
+  heap_t *master_heap;
+#if ENABLE_THREAD_CACHE
+  //! Arrays of fully freed spans, large spans with > 1 span count
+  span_large_cache_t span_large_cache[LARGE_CLASS_COUNT - 1];
+#endif
+#if RPMALLOC_FIRST_CLASS_HEAPS
+  //! Double linked list of fully utilized spans with free blocks for each size
+  //! class.
+  //  Previous span pointer in head points to tail span of list.
+  span_t *full_span[SIZE_CLASS_COUNT];
+  //! Double linked list of large and huge spans allocated by this heap
+  span_t *large_huge_span;
+#endif
+#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
+  //! Current and high water mark of spans used per span count
+  span_use_t span_use[LARGE_CLASS_COUNT];
+#endif
+#if ENABLE_STATISTICS
+  //! Allocation stats per size class
+  size_class_use_t size_class_use[SIZE_CLASS_COUNT + 1];
+  //! Number of bytes transitioned thread -> global
+  atomic64_t thread_to_global;
+  //! Number of bytes transitioned global -> thread
+  atomic64_t global_to_thread;
+#endif
+};
+
+// Size class for defining a block size bucket
+struct size_class_t {
+  //! Size of blocks in this class
+  uint32_t block_size;
+  //! Number of blocks in each chunk
+  uint16_t block_count;
+  //! Class index this class is merged with
+  uint16_t class_idx;
+};
+_Static_assert(sizeof(size_class_t) == 8, "Size class size mismatch");
+
+struct global_cache_t {
+  //! Cache lock
+  atomic32_t lock;
+  //! Cache count
+  uint32_t count;
+#if ENABLE_STATISTICS
+  //! Insert count
+  size_t insert_count;
+  //! Extract count
+  size_t extract_count;
+#endif
+  //! Cached spans
+  span_t *span[GLOBAL_CACHE_MULTIPLIER * MAX_THREAD_SPAN_CACHE];
+  //! Unlimited cache overflow
+  span_t *overflow;
+};
+
+////////////
+///
+/// Global data
+///
+//////
+
+//! Default span size (64KiB)
+#define _memory_default_span_size (64 * 1024)
+#define _memory_default_span_size_shift 16
+#define _memory_default_span_mask (~((uintptr_t)(_memory_span_size - 1)))
+
+//! Initialized flag
+static int _rpmalloc_initialized;
+//! Main thread ID
+static uintptr_t _rpmalloc_main_thread_id;
+//! Configuration
+static rpmalloc_config_t _memory_config;
+//! Memory page size
+static size_t _memory_page_size;
+//! Shift to divide by page size
+static size_t _memory_page_size_shift;
+//! Granularity at which memory pages are mapped by OS
+static size_t _memory_map_granularity;
+#if RPMALLOC_CONFIGURABLE
+//! Size of a span of memory pages
+static size_t _memory_span_size;
+//! Shift to divide by span size
+static size_t _memory_span_size_shift;
+//! Mask to get to start of a memory span
+static uintptr_t _memory_span_mask;
+#else
+//! Hardwired span size
+#define _memory_span_size _memory_default_span_size
+#define _memory_span_size_shift _memory_default_span_size_shift
+#define _memory_span_mask _memory_default_span_mask
+#endif
+//! Number of spans to map in each map call
+static size_t _memory_span_map_count;
+//! Number of spans to keep reserved in each heap
+static size_t _memory_heap_reserve_count;
+//! Global size classes
+static size_class_t _memory_size_class[SIZE_CLASS_COUNT];
+//! Run-time size limit of medium blocks
+static size_t _memory_medium_size_limit;
+//! Heap ID counter
+static atomic32_t _memory_heap_id;
+//! Huge page support
+static int _memory_huge_pages;
+#if ENABLE_GLOBAL_CACHE
+//! Global span cache
+static global_cache_t _memory_span_cache[LARGE_CLASS_COUNT];
+#endif
+//! Global reserved spans
+static span_t *_memory_global_reserve;
+//! Global reserved count
+static size_t _memory_global_reserve_count;
+//! Global reserved master
+static span_t *_memory_global_reserve_master;
+//! All heaps
+static heap_t *_memory_heaps[HEAP_ARRAY_SIZE];
+//! Used to restrict access to mapping memory for huge pages
+static atomic32_t _memory_global_lock;
+//! Orphaned heaps
+static heap_t *_memory_orphan_heaps;
+#if RPMALLOC_FIRST_CLASS_HEAPS
+//! Orphaned heaps (first class heaps)
+static heap_t *_memory_first_class_orphan_heaps;
+#endif
+#if ENABLE_STATISTICS
+//! Allocations counter
+static atomic64_t _allocation_counter;
+//! Deallocations counter
+static atomic64_t _deallocation_counter;
+//! Active heap count
+static atomic32_t _memory_active_heaps;
+//! Number of currently mapped memory pages
+static atomic32_t _mapped_pages;
+//! Peak number of concurrently mapped memory pages
+static int32_t _mapped_pages_peak;
+//! Number of mapped master spans
+static atomic32_t _master_spans;
+//! Number of unmapped dangling master spans
+static atomic32_t _unmapped_master_spans;
+//! Running counter of total number of mapped memory pages since start
+static atomic32_t _mapped_total;
+//! Running counter of total number of unmapped memory pages since start
+static atomic32_t _unmapped_total;
+//! Number of currently mapped memory pages in OS calls
+static atomic32_t _mapped_pages_os;
+//! Number of currently allocated pages in huge allocations
+static atomic32_t _huge_pages_current;
+//! Peak number of currently allocated pages in huge allocations
+static int32_t _huge_pages_peak;
+#endif
+
+////////////
+///
+/// Thread local heap and ID
+///
+//////
+
+//! Current thread heap
+#if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) ||          \
+    defined(__TINYC__)
+static pthread_key_t _memory_thread_heap;
+#else
+#ifdef _MSC_VER
+#define _Thread_local __declspec(thread)
+#define TLS_MODEL
+#else
+#ifndef __HAIKU__
+#define TLS_MODEL __attribute__((tls_model("initial-exec")))
+#else
+#define TLS_MODEL
+#endif
+#if !defined(__clang__) && defined(__GNUC__)
+#define _Thread_local __thread
+#endif
+#endif
+static _Thread_local heap_t *_memory_thread_heap TLS_MODEL;
+#endif
+
+static inline heap_t *get_thread_heap_raw(void) {
+#if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
+  return pthread_getspecific(_memory_thread_heap);
+#else
+  return _memory_thread_heap;
+#endif
+}
+
+//! Get the current thread heap
+static inline heap_t *get_thread_heap(void) {
+  heap_t *heap = get_thread_heap_raw();
+#if ENABLE_PRELOAD
+  if (EXPECTED(heap != 0))
+    return heap;
+  rpmalloc_initialize();
+  return get_thread_heap_raw();
+#else
+  return heap;
+#endif
+}
+
+//! Fast thread ID
+static inline uintptr_t get_thread_id(void) {
+#if defined(_WIN32)
+  return (uintptr_t)((void *)NtCurrentTeb());
+#elif (defined(__GNUC__) || defined(__clang__)) && !defined(__CYGWIN__)
+  uintptr_t tid;
+#if defined(__i386__)
+  __asm__("movl %%gs:0, %0" : "=r"(tid) : :);
+#elif defined(__x86_64__)
+#if defined(__MACH__)
+  __asm__("movq %%gs:0, %0" : "=r"(tid) : :);
+#else
+  __asm__("movq %%fs:0, %0" : "=r"(tid) : :);
+#endif
+#elif defined(__arm__)
+  __asm__ volatile("mrc p15, 0, %0, c13, c0, 3" : "=r"(tid));
+#elif defined(__aarch64__)
+#if defined(__MACH__)
+  // tpidr_el0 likely unused, always return 0 on iOS
+  __asm__ volatile("mrs %0, tpidrro_el0" : "=r"(tid));
+#else
+  __asm__ volatile("mrs %0, tpidr_el0" : "=r"(tid));
+#endif
+#else
+#error This platform needs implementation of get_thread_id()
+#endif
+  return tid;
+#else
+#error This platform needs implementation of get_thread_id()
+#endif
+}
+
+//! Set the current thread heap
+static void set_thread_heap(heap_t *heap) {
+#if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) ||          \
+    defined(__TINYC__)
+  pthread_setspecific(_memory_thread_heap, heap);
+#else
+  _memory_thread_heap = heap;
+#endif
+  if (heap)
+    heap->owner_thread = get_thread_id();
+}
+
+//! Set main thread ID
+extern void rpmalloc_set_main_thread(void);
+
+void rpmalloc_set_main_thread(void) {
+  _rpmalloc_main_thread_id = get_thread_id();
+}
+
+static void _rpmalloc_spin(void) {
+#if defined(_MSC_VER)
+#if defined(_M_ARM64)
+  __yield();
+#else
+  _mm_pause();
+#endif
+#elif defined(__x86_64__) || defined(__i386__)
+  __asm__ volatile("pause" ::: "memory");
+#elif defined(__aarch64__) || (defined(__arm__) && __ARM_ARCH >= 7)
+  __asm__ volatile("yield" ::: "memory");
+#elif defined(__powerpc__) || defined(__powerpc64__)
+  // No idea if ever been compiled in such archs but ... as precaution
+  __asm__ volatile("or 27,27,27");
+#elif defined(__sparc__)
+  __asm__ volatile("rd %ccr, %g0 \n\trd %ccr, %g0 \n\trd %ccr, %g0");
+#else
+  struct timespec ts = {0};
+  nanosleep(&ts, 0);
+#endif
+}
+
+#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
+static void NTAPI _rpmalloc_thread_destructor(void *value) {
+#if ENABLE_OVERRIDE
+  // If this is called on main thread it means rpmalloc_finalize
+  // has not been called and shutdown is forced (through _exit) or unclean
+  if (get_thread_id() == _rpmalloc_main_thread_id)
+    return;
+#endif
+  if (value)
+    rpmalloc_thread_finalize(1);
+}
+#endif
+
+////////////
+///
+/// Low level memory map/unmap
+///
+//////
+
+static void _rpmalloc_set_name(void *address, size_t size) {
+#if defined(__linux__) || defined(__ANDROID__)
+  const char *name = _memory_huge_pages ? _memory_config.huge_page_name
+                                        : _memory_config.page_name;
+  if (address == MAP_FAILED || !name)
+    return;
+  // If the kernel does not support CONFIG_ANON_VMA_NAME or if the call fails
+  // (e.g. invalid name) it is a no-op basically.
+  (void)prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, (uintptr_t)address, size,
+              (uintptr_t)name);
+#else
+  (void)sizeof(size);
+  (void)sizeof(address);
+#endif
+}
+
+//! Map more virtual memory
+//  size is number of bytes to map
+//  offset receives the offset in bytes from start of mapped region
+//  returns address to start of mapped region to use
+static void *_rpmalloc_mmap(size_t size, size_t *offset) {
+  rpmalloc_assert(!(size % _memory_page_size), "Invalid mmap size");
+  rpmalloc_assert(size >= _memory_page_size, "Invalid mmap size");
+  void *address = _memory_config.memory_map(size, offset);
+  if (EXPECTED(address != 0)) {
+    _rpmalloc_stat_add_peak(&_mapped_pages, (size >> _memory_page_size_shift),
+                            _mapped_pages_peak);
+    _rpmalloc_stat_add(&_mapped_total, (size >> _memory_page_size_shift));
+  }
+  return address;
+}
+
+//! Unmap virtual memory
+//  address is the memory address to unmap, as returned from _memory_map
+//  size is the number of bytes to unmap, which might be less than full region
+//  for a partial unmap offset is the offset in bytes to the actual mapped
+//  region, as set by _memory_map release is set to 0 for partial unmap, or size
+//  of entire range for a full unmap
+static void _rpmalloc_unmap(void *address, size_t size, size_t offset,
+                            size_t release) {
+  rpmalloc_assert(!release || (release >= size), "Invalid unmap size");
+  rpmalloc_assert(!release || (release >= _memory_page_size),
+                  "Invalid unmap size");
+  if (release) {
+    rpmalloc_assert(!(release % _memory_page_size), "Invalid unmap size");
+    _rpmalloc_stat_sub(&_mapped_pages, (release >> _memory_page_size_shift));
+    _rpmalloc_stat_add(&_unmapped_total, (release >> _memory_page_size_shift));
+  }
+  _memory_config.memory_unmap(address, size, offset, release);
+}
+
+//! Default implementation to map new pages to virtual memory
+static void *_rpmalloc_mmap_os(size_t size, size_t *offset) {
+  // Either size is a heap (a single page) or a (multiple) span - we only need
+  // to align spans, and only if larger than map granularity
+  size_t padding = ((size >= _memory_span_size) &&
+                    (_memory_span_size > _memory_map_granularity))
+                       ? _memory_span_size
+                       : 0;
+  rpmalloc_assert(size >= _memory_page_size, "Invalid mmap size");
+#if PLATFORM_WINDOWS
+  // Ok to MEM_COMMIT - according to MSDN, "actual physical pages are not
+  // allocated unless/until the virtual addresses are actually accessed"
+  void *ptr = VirtualAlloc(0, size + padding,
+                           (_memory_huge_pages ? MEM_LARGE_PAGES : 0) |
+                               MEM_RESERVE | MEM_COMMIT,
+                           PAGE_READWRITE);
+  if (!ptr) {
+    if (_memory_config.map_fail_callback) {
+      if (_memory_config.map_fail_callback(size + padding))
+        return _rpmalloc_mmap_os(size, offset);
+    } else {
+      rpmalloc_assert(ptr, "Failed to map virtual memory block");
+    }
+    return 0;
+  }
+#else
+  int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_UNINITIALIZED;
+#if defined(__APPLE__) && !TARGET_OS_IPHONE && !TARGET_OS_SIMULATOR
+  int fd = (int)VM_MAKE_TAG(240U);
+  if (_memory_huge_pages)
+    fd |= VM_FLAGS_SUPERPAGE_SIZE_2MB;
+  void *ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, fd, 0);
+#elif defined(MAP_HUGETLB)
+  void *ptr = mmap(0, size + padding,
+                   PROT_READ | PROT_WRITE | PROT_MAX(PROT_READ | PROT_WRITE),
+                   (_memory_huge_pages ? MAP_HUGETLB : 0) | flags, -1, 0);
+#if defined(MADV_HUGEPAGE)
+  // In some configurations, huge pages allocations might fail thus
+  // we fallback to normal allocations and promote the region as transparent
+  // huge page
+  if ((ptr == MAP_FAILED || !ptr) && _memory_huge_pages) {
+    ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, -1, 0);
+    if (ptr && ptr != MAP_FAILED) {
+      int prm = madvise(ptr, size + padding, MADV_HUGEPAGE);
+      (void)prm;
+      rpmalloc_assert((prm == 0), "Failed to promote the page to THP");
+    }
+  }
+#endif
+  _rpmalloc_set_name(ptr, size + padding);
+#elif defined(MAP_ALIGNED)
+  const size_t align =
+      (sizeof(size_t) * 8) - (size_t)(__builtin_clzl(size - 1));
+  void *ptr =
+      mmap(0, size + padding, PROT_READ | PROT_WRITE,
+           (_memory_huge_pages ? MAP_ALIGNED(align) : 0) | flags, -1, 0);
+#elif defined(MAP_ALIGN)
+  caddr_t base = (_memory_huge_pages ? (caddr_t)(4 << 20) : 0);
+  void *ptr = mmap(base, size + padding, PROT_READ | PROT_WRITE,
+                   (_memory_huge_pages ? MAP_ALIGN : 0) | flags, -1, 0);
+#else
+  void *ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, -1, 0);
+#endif
+  if ((ptr == MAP_FAILED) || !ptr) {
+    if (_memory_config.map_fail_callback) {
+      if (_memory_config.map_fail_callback(size + padding))
+        return _rpmalloc_mmap_os(size, offset);
+    } else if (errno != ENOMEM) {
+      rpmalloc_assert((ptr != MAP_FAILED) && ptr,
+                      "Failed to map virtual memory block");
+    }
+    return 0;
+  }
+#endif
+  _rpmalloc_stat_add(&_mapped_pages_os,
+                     (int32_t)((size + padding) >> _memory_page_size_shift));
+  if (padding) {
+    size_t final_padding = padding - ((uintptr_t)ptr & ~_memory_span_mask);
+    rpmalloc_assert(final_padding <= _memory_span_size,
+                    "Internal failure in padding");
+    rpmalloc_assert(final_padding <= padding, "Internal failure in padding");
+    rpmalloc_assert(!(final_padding % 8), "Internal failure in padding");
+    ptr = pointer_offset(ptr, final_padding);
+    *offset = final_padding >> 3;
+  }
+  rpmalloc_assert((size < _memory_span_size) ||
+                      !((uintptr_t)ptr & ~_memory_span_mask),
+                  "Internal failure in padding");
+  return ptr;
+}
+
+//! Default implementation to unmap pages from virtual memory
+static void _rpmalloc_unmap_os(void *address, size_t size, size_t offset,
+                               size_t release) {
+  rpmalloc_assert(release || (offset == 0), "Invalid unmap size");
+  rpmalloc_assert(!release || (release >= _memory_page_size),
+                  "Invalid unmap size");
+  rpmalloc_assert(size >= _memory_page_size, "Invalid unmap size");
+  if (release && offset) {
+    offset <<= 3;
+    address = pointer_offset(address, -(int32_t)offset);
+    if ((release >= _memory_span_size) &&
+        (_memory_span_size > _memory_map_granularity)) {
+      // Padding is always one span size
+      release += _memory_span_size;
+    }
+  }
+#if !DISABLE_UNMAP
+#if PLATFORM_WINDOWS
+  if (!VirtualFree(address, release ? 0 : size,
+                   release ? MEM_RELEASE : MEM_DECOMMIT)) {
+    rpmalloc_assert(0, "Failed to unmap virtual memory block");
+  }
+#else
+  if (release) {
+    if (munmap(address, release)) {
+      rpmalloc_assert(0, "Failed to unmap virtual memory block");
+    }
+  } else {
+#if defined(MADV_FREE_REUSABLE)
+    int ret;
+    while ((ret = madvise(address, size, MADV_FREE_REUSABLE)) == -1 &&
+           (errno == EAGAIN))
+      errno = 0;
+    if ((ret == -1) && (errno != 0)) {
+#elif defined(MADV_DONTNEED)
+    if (madvise(address, size, MADV_DONTNEED)) {
+#elif defined(MADV_PAGEOUT)
+    if (madvise(address, size, MADV_PAGEOUT)) {
+#elif defined(MADV_FREE)
+    if (madvise(address, size, MADV_FREE)) {
+#else
+    if (posix_madvise(address, size, POSIX_MADV_DONTNEED)) {
+#endif
+      rpmalloc_assert(0, "Failed to madvise virtual memory block as free");
+    }
+  }
+#endif
+#endif
+  if (release)
+    _rpmalloc_stat_sub(&_mapped_pages_os, release >> _memory_page_size_shift);
+}
+
+static void _rpmalloc_span_mark_as_subspan_unless_master(span_t *master,
+                                                         span_t *subspan,
+                                                         size_t span_count);
+
+//! Use global reserved spans to fulfill a memory map request (reserve size must
+//! be checked by caller)
+static span_t *_rpmalloc_global_get_reserved_spans(size_t span_count) {
+  span_t *span = _memory_global_reserve;
+  _rpmalloc_span_mark_as_subspan_unless_master(_memory_global_reserve_master,
+                                               span, span_count);
+  _memory_global_reserve_count -= span_count;
+  if (_memory_global_reserve_count)
+    _memory_global_reserve =
+        (span_t *)pointer_offset(span, span_count << _memory_span_size_shift);
+  else
+    _memory_global_reserve = 0;
+  return span;
+}
+
+//! Store the given spans as global reserve (must only be called from within new
+//! heap allocation, not thread safe)
+static void _rpmalloc_global_set_reserved_spans(span_t *master, span_t *reserve,
+                                                size_t reserve_span_count) {
+  _memory_global_reserve_master = master;
+  _memory_global_reserve_count = reserve_span_count;
+  _memory_global_reserve = reserve;
+}
+
+////////////
+///
+/// Span linked list management
+///
+//////
+
+//! Add a span to double linked list at the head
+static void _rpmalloc_span_double_link_list_add(span_t **head, span_t *span) {
+  if (*head)
+    (*head)->prev = span;
+  span->next = *head;
+  *head = span;
+}
+
+//! Pop head span from double linked list
+static void _rpmalloc_span_double_link_list_pop_head(span_t **head,
+                                                     span_t *span) {
+  rpmalloc_assert(*head == span, "Linked list corrupted");
+  span = *head;
+  *head = span->next;
+}
+
+//! Remove a span from double linked list
+static void _rpmalloc_span_double_link_list_remove(span_t **head,
+                                                   span_t *span) {
+  rpmalloc_assert(*head, "Linked list corrupted");
+  if (*head == span) {
+    *head = span->next;
+  } else {
+    span_t *next_span = span->next;
+    span_t *prev_span = span->prev;
+    prev_span->next = next_span;
+    if (EXPECTED(next_span != 0))
+      next_span->prev = prev_span;
+  }
+}
+
+////////////
+///
+/// Span control
+///
+//////
+
+static void _rpmalloc_heap_cache_insert(heap_t *heap, span_t *span);
+
+static void _rpmalloc_heap_finalize(heap_t *heap);
+
+static void _rpmalloc_heap_set_reserved_spans(heap_t *heap, span_t *master,
+                                              span_t *reserve,
+                                              size_t reserve_span_count);
+
+//! Declare the span to be a subspan and store distance from master span and
+//! span count
+static void _rpmalloc_span_mark_as_subspan_unless_master(span_t *master,
+                                                         span_t *subspan,
+                                                         size_t span_count) {
+  rpmalloc_assert((subspan != master) || (subspan->flags & SPAN_FLAG_MASTER),
+                  "Span master pointer and/or flag mismatch");
+  if (subspan != master) {
+    subspan->flags = SPAN_FLAG_SUBSPAN;
+    subspan->offset_from_master =
+        (uint32_t)((uintptr_t)pointer_diff(subspan, master) >>
+                   _memory_span_size_shift);
+    subspan->align_offset = 0;
+  }
+  subspan->span_count = (uint32_t)span_count;
+}
+
+//! Use reserved spans to fulfill a memory map request (reserve size must be
+//! checked by caller)
+static span_t *_rpmalloc_span_map_from_reserve(heap_t *heap,
+                                               size_t span_count) {
+  // Update the heap span reserve
+  span_t *span = heap->span_reserve;
+  heap->span_reserve =
+      (span_t *)pointer_offset(span, span_count * _memory_span_size);
+  heap->spans_reserved -= (uint32_t)span_count;
+
+  _rpmalloc_span_mark_as_subspan_unless_master(heap->span_reserve_master, span,
+                                               span_count);
+  if (span_count <= LARGE_CLASS_COUNT)
+    _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_from_reserved);
+
+  return span;
+}
+
+//! Get the aligned number of spans to map in based on wanted count, configured
+//! mapping granularity and the page size
+static size_t _rpmalloc_span_align_count(size_t span_count) {
+  size_t request_count = (span_count > _memory_span_map_count)
+                             ? span_count
+                             : _memory_span_map_count;
+  if ((_memory_page_size > _memory_span_size) &&
+      ((request_count * _memory_span_size) % _memory_page_size))
+    request_count +=
+        _memory_span_map_count - (request_count % _memory_span_map_count);
+  return request_count;
+}
+
+//! Setup a newly mapped span
+static void _rpmalloc_span_initialize(span_t *span, size_t total_span_count,
+                                      size_t span_count, size_t align_offset) {
+  span->total_spans = (uint32_t)total_span_count;
+  span->span_count = (uint32_t)span_count;
+  span->align_offset = (uint32_t)align_offset;
+  span->flags = SPAN_FLAG_MASTER;
+  atomic_store32(&span->remaining_spans, (int32_t)total_span_count);
+}
+
+static void _rpmalloc_span_unmap(span_t *span);
+
+//! Map an aligned set of spans, taking configured mapping granularity and the
+//! page size into account
+static span_t *_rpmalloc_span_map_aligned_count(heap_t *heap,
+                                                size_t span_count) {
+  // If we already have some, but not enough, reserved spans, release those to
+  // heap cache and map a new full set of spans. Otherwise we would waste memory
+  // if page size > span size (huge pages)
+  size_t aligned_span_count = _rpmalloc_span_align_count(span_count);
+  size_t align_offset = 0;
+  span_t *span = (span_t *)_rpmalloc_mmap(
+      aligned_span_count * _memory_span_size, &align_offset);
+  if (!span)
+    return 0;
+  _rpmalloc_span_initialize(span, aligned_span_count, span_count, align_offset);
+  _rpmalloc_stat_inc(&_master_spans);
+  if (span_count <= LARGE_CLASS_COUNT)
+    _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_map_calls);
+  if (aligned_span_count > span_count) {
+    span_t *reserved_spans =
+        (span_t *)pointer_offset(span, span_count * _memory_span_size);
+    size_t reserved_count = aligned_span_count - span_count;
+    if (heap->spans_reserved) {
+      _rpmalloc_span_mark_as_subspan_unless_master(
+          heap->span_reserve_master, heap->span_reserve, heap->spans_reserved);
+      _rpmalloc_heap_cache_insert(heap, heap->span_reserve);
+    }
+    if (reserved_count > _memory_heap_reserve_count) {
+      // If huge pages or eager spam map count, the global reserve spin lock is
+      // held by caller, _rpmalloc_span_map
+      rpmalloc_assert(atomic_load32(&_memory_global_lock) == 1,
+                      "Global spin lock not held as expected");
+      size_t remain_count = reserved_count - _memory_heap_reserve_count;
+      reserved_count = _memory_heap_reserve_count;
+      span_t *remain_span = (span_t *)pointer_offset(
+          reserved_spans, reserved_count * _memory_span_size);
+      if (_memory_global_reserve) {
+        _rpmalloc_span_mark_as_subspan_unless_master(
+            _memory_global_reserve_master, _memory_global_reserve,
+            _memory_global_reserve_count);
+        _rpmalloc_span_unmap(_memory_global_reserve);
+      }
+      _rpmalloc_global_set_reserved_spans(span, remain_span, remain_count);
+    }
+    _rpmalloc_heap_set_reserved_spans(heap, span, reserved_spans,
+                                      reserved_count);
+  }
+  return span;
+}
+
+//! Map in memory pages for the given number of spans (or use previously
+//! reserved pages)
+static span_t *_rpmalloc_span_map(heap_t *heap, size_t span_count) {
+  if (span_count <= heap->spans_reserved)
+    return _rpmalloc_span_map_from_reserve(heap, span_count);
+  span_t *span = 0;
+  int use_global_reserve =
+      (_memory_page_size > _memory_span_size) ||
+      (_memory_span_map_count > _memory_heap_reserve_count);
+  if (use_global_reserve) {
+    // If huge pages, make sure only one thread maps more memory to avoid bloat
+    while (!atomic_cas32_acquire(&_memory_global_lock, 1, 0))
+      _rpmalloc_spin();
+    if (_memory_global_reserve_count >= span_count) {
+      size_t reserve_count =
+          (!heap->spans_reserved ? _memory_heap_reserve_count : span_count);
+      if (_memory_global_reserve_count < reserve_count)
+        reserve_count = _memory_global_reserve_count;
+      span = _rpmalloc_global_get_reserved_spans(reserve_count);
+      if (span) {
+        if (reserve_count > span_count) {
+          span_t *reserved_span = (span_t *)pointer_offset(
+              span, span_count << _memory_span_size_shift);
+          _rpmalloc_heap_set_reserved_spans(heap, _memory_global_reserve_master,
+                                            reserved_span,
+                                            reserve_count - span_count);
+        }
+        // Already marked as subspan in _rpmalloc_global_get_reserved_spans
+        span->span_count = (uint32_t)span_count;
+      }
+    }
+  }
+  if (!span)
+    span = _rpmalloc_span_map_aligned_count(heap, span_count);
+  if (use_global_reserve)
+    atomic_store32_release(&_memory_global_lock, 0);
+  return span;
+}
+
+//! Unmap memory pages for the given number of spans (or mark as unused if no
+//! partial unmappings)
+static void _rpmalloc_span_unmap(span_t *span) {
+  rpmalloc_assert((span->flags & SPAN_FLAG_MASTER) ||
+                      (span->flags & SPAN_FLAG_SUBSPAN),
+                  "Span flag corrupted");
+  rpmalloc_assert(!(span->flags & SPAN_FLAG_MASTER) ||
+                      !(span->flags & SPAN_FLAG_SUBSPAN),
+                  "Span flag corrupted");
+
+  int is_master = !!(span->flags & SPAN_FLAG_MASTER);
+  span_t *master =
+      is_master ? span
+                : ((span_t *)pointer_offset(
+                      span, -(intptr_t)((uintptr_t)span->offset_from_master *
+                                        _memory_span_size)));
+  rpmalloc_assert(is_master || (span->flags & SPAN_FLAG_SUBSPAN),
+                  "Span flag corrupted");
+  rpmalloc_assert(master->flags & SPAN_FLAG_MASTER, "Span flag corrupted");
+
+  size_t span_count = span->span_count;
+  if (!is_master) {
+    // Directly unmap subspans (unless huge pages, in which case we defer and
+    // unmap entire page range with master)
+    rpmalloc_assert(span->align_offset == 0, "Span align offset corrupted");
+    if (_memory_span_size >= _memory_page_size)
+      _rpmalloc_unmap(span, span_count * _memory_span_size, 0, 0);
+  } else {
+    // Special double flag to denote an unmapped master
+    // It must be kept in memory since span header must be used
+    span->flags |=
+        SPAN_FLAG_MASTER | SPAN_FLAG_SUBSPAN | SPAN_FLAG_UNMAPPED_MASTER;
+    _rpmalloc_stat_add(&_unmapped_master_spans, 1);
+  }
+
+  if (atomic_add32(&master->remaining_spans, -(int32_t)span_count) <= 0) {
+    // Everything unmapped, unmap the master span with release flag to unmap the
+    // entire range of the super span
+    rpmalloc_assert(!!(master->flags & SPAN_FLAG_MASTER) &&
+                        !!(master->flags & SPAN_FLAG_SUBSPAN),
+                    "Span flag corrupted");
+    size_t unmap_count = master->span_count;
+    if (_memory_span_size < _memory_page_size)
+      unmap_count = master->total_spans;
+    _rpmalloc_stat_sub(&_master_spans, 1);
+    _rpmalloc_stat_sub(&_unmapped_master_spans, 1);
+    _rpmalloc_unmap(master, unmap_count * _memory_span_size,
+                    master->align_offset,
+                    (size_t)master->total_spans * _memory_span_size);
+  }
+}
+
+//! Move the span (used for small or medium allocations) to the heap thread
+//! cache
+static void _rpmalloc_span_release_to_cache(heap_t *heap, span_t *span) {
+  rpmalloc_assert(heap == span->heap, "Span heap pointer corrupted");
+  rpmalloc_assert(span->size_class < SIZE_CLASS_COUNT,
+                  "Invalid span size class");
+  rpmalloc_assert(span->span_count == 1, "Invalid span count");
+#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
+  atomic_decr32(&heap->span_use[0].current);
+#endif
+  _rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current);
+  if (!heap->finalize) {
+    _rpmalloc_stat_inc(&heap->span_use[0].spans_to_cache);
+    _rpmalloc_stat_inc(&heap->size_class_use[span->size_class].spans_to_cache);
+    if (heap->size_class[span->size_class].cache)
+      _rpmalloc_heap_cache_insert(heap,
+                                  heap->size_class[span->size_class].cache);
+    heap->size_class[span->size_class].cache = span;
+  } else {
+    _rpmalloc_span_unmap(span);
+  }
+}
+
+//! Initialize a (partial) free list up to next system memory page, while
+//! reserving the first block as allocated, returning number of blocks in list
+static uint32_t free_list_partial_init(void **list, void **first_block,
+                                       void *page_start, void *block_start,
+                                       uint32_t block_count,
+                                       uint32_t block_size) {
+  rpmalloc_assert(block_count, "Internal failure");
+  *first_block = block_start;
+  if (block_count > 1) {
+    void *free_block = pointer_offset(block_start, block_size);
+    void *block_end =
+        pointer_offset(block_start, (size_t)block_size * block_count);
+    // If block size is less than half a memory page, bound init to next memory
+    // page boundary
+    if (block_size < (_memory_page_size >> 1)) {
+      void *page_end = pointer_offset(page_start, _memory_page_size);
+      if (page_end < block_end)
+        block_end = page_end;
+    }
+    *list = free_block;
+    block_count = 2;
+    void *next_block = pointer_offset(free_block, block_size);
+    while (next_block < block_end) {
+      *((void **)free_block) = next_block;
+      free_block = next_block;
+      ++block_count;
+      next_block = pointer_offset(next_block, block_size);
+    }
+    *((void **)free_block) = 0;
+  } else {
+    *list = 0;
+  }
+  return block_count;
+}
+
+//! Initialize an unused span (from cache or mapped) to be new active span,
+//! putting the initial free list in heap class free list
+static void *_rpmalloc_span_initialize_new(heap_t *heap,
+                                           heap_size_class_t *heap_size_class,
+                                           span_t *span, uint32_t class_idx) {
+  rpmalloc_assert(span->span_count == 1, "Internal failure");
+  size_class_t *size_class = _memory_size_class + class_idx;
+  span->size_class = class_idx;
+  span->heap = heap;
+  span->flags &= ~SPAN_FLAG_ALIGNED_BLOCKS;
+  span->block_size = size_class->block_size;
+  span->block_count = size_class->block_count;
+  span->free_list = 0;
+  span->list_size = 0;
+  atomic_store_ptr_release(&span->free_list_deferred, 0);
+
+  // Setup free list. Only initialize one system page worth of free blocks in
+  // list
+  void *block;
+  span->free_list_limit =
+      free_list_partial_init(&heap_size_class->free_list, &block, span,
+                             pointer_offset(span, SPAN_HEADER_SIZE),
+                             size_class->block_count, size_class->block_size);
+  // Link span as partial if there remains blocks to be initialized as free
+  // list, or full if fully initialized
+  if (span->free_list_limit < span->block_count) {
+    _rpmalloc_span_double_link_list_add(&heap_size_class->partial_span, span);
+    span->used_count = span->free_list_limit;
+  } else {
+#if RPMALLOC_FIRST_CLASS_HEAPS
+    _rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span);
+#endif
+    ++heap->full_span_count;
+    span->used_count = span->block_count;
+  }
+  return block;
+}
+
+static void _rpmalloc_span_extract_free_list_deferred(span_t *span) {
+  // We need acquire semantics on the CAS operation since we are interested in
+  // the list size Refer to _rpmalloc_deallocate_defer_small_or_medium for
+  // further comments on this dependency
+  do {
+    span->free_list =
+        atomic_exchange_ptr_acquire(&span->free_list_deferred, INVALID_POINTER);
+  } while (span->free_list == INVALID_POINTER);
+  span->used_count -= span->list_size;
+  span->list_size = 0;
+  atomic_store_ptr_release(&span->free_list_deferred, 0);
+}
+
+static int _rpmalloc_span_is_fully_utilized(span_t *span) {
+  rpmalloc_assert(span->free_list_limit <= span->block_count,
+                  "Span free list corrupted");
+  return !span->free_list && (span->free_list_limit >= span->block_count);
+}
+
+static int _rpmalloc_span_finalize(heap_t *heap, size_t iclass, span_t *span,
+                                   span_t **list_head) {
+  void *free_list = heap->size_class[iclass].free_list;
+  span_t *class_span = (span_t *)((uintptr_t)free_list & _memory_span_mask);
+  if (span == class_span) {
+    // Adopt the heap class free list back into the span free list
+    void *block = span->free_list;
+    void *last_block = 0;
+    while (block) {
+      last_block = block;
+      block = *((void **)block);
+    }
+    uint32_t free_count = 0;
+    block = free_list;
+    while (block) {
+      ++free_count;
+      block = *((void **)block);
+    }
+    if (last_block) {
+      *((void **)last_block) = free_list;
+    } else {
+      span->free_list = free_list;
+    }
+    heap->size_class[iclass].free_list = 0;
+    span->used_count -= free_count;
+  }
+  // If this assert triggers you have memory leaks
+  rpmalloc_assert(span->list_size == span->used_count, "Memory leak detected");
+  if (span->list_size == span->used_count) {
+    _rpmalloc_stat_dec(&heap->span_use[0].current);
+    _rpmalloc_stat_dec(&heap->size_class_use[iclass].spans_current);
+    // This function only used for spans in double linked lists
+    if (list_head)
+      _rpmalloc_span_double_link_list_remove(list_head, span);
+    _rpmalloc_span_unmap(span);
+    return 1;
+  }
+  return 0;
+}
+
+////////////
+///
+/// Global cache
+///
+//////
+
+#if ENABLE_GLOBAL_CACHE
+
+//! Finalize a global cache
+static void _rpmalloc_global_cache_finalize(global_cache_t *cache) {
+  while (!atomic_cas32_acquire(&cache->lock, 1, 0))
+    _rpmalloc_spin();
+
+  for (size_t ispan = 0; ispan < cache->count; ++ispan)
+    _rpmalloc_span_unmap(cache->span[ispan]);
+  cache->count = 0;
+
+  while (cache->overflow) {
+    span_t *span = cache->overflow;
+    cache->overflow = span->next;
+    _rpmalloc_span_unmap(span);
+  }
+
+  atomic_store32_release(&cache->lock, 0);
+}
+
+static void _rpmalloc_global_cache_insert_spans(span_t **span,
+                                                size_t span_count,
+                                                size_t count) {
+  const size_t cache_limit =
+      (span_count == 1) ? GLOBAL_CACHE_MULTIPLIER * MAX_THREAD_SPAN_CACHE
+                        : GLOBAL_CACHE_MULTIPLIER *
+                              (MAX_THREAD_SPAN_LARGE_CACHE - (span_count >> 1));
+
+  global_cache_t *cache = &_memory_span_cache[span_count - 1];
+
+  size_t insert_count = count;
+  while (!atomic_cas32_acquire(&cache->lock, 1, 0))
+    _rpmalloc_spin();
+
+#if ENABLE_STATISTICS
+  cache->insert_count += count;
+#endif
+  if ((cache->count + insert_count) > cache_limit)
+    insert_count = cache_limit - cache->count;
+
+  memcpy(cache->span + cache->count, span, sizeof(span_t *) * insert_count);
+  cache->count += (uint32_t)insert_count;
+
+#if ENABLE_UNLIMITED_CACHE
+  while (insert_count < count) {
+#else
+  // Enable unlimited cache if huge pages, or we will leak since it is unlikely
+  // that an entire huge page will be unmapped, and we're unable to partially
+  // decommit a huge page
+  while ((_memory_page_size > _memory_span_size) && (insert_count < count)) {
+#endif
+    span_t *current_span = span[insert_count++];
+    current_span->next = cache->overflow;
+    cache->overflow = current_span;
+  }
+  atomic_store32_release(&cache->lock, 0);
+
+  span_t *keep = 0;
+  for (size_t ispan = insert_count; ispan < count; ++ispan) {
+    span_t *current_span = span[ispan];
+    // Keep master spans that has remaining subspans to avoid dangling them
+    if ((current_span->flags & SPAN_FLAG_MASTER) &&
+        (atomic_load32(&current_span->remaining_spans) >
+         (int32_t)current_span->span_count)) {
+      current_span->next = keep;
+      keep = current_span;
+    } else {
+      _rpmalloc_span_unmap(current_span);
+    }
+  }
+
+  if (keep) {
+    while (!atomic_cas32_acquire(&cache->lock, 1, 0))
+      _rpmalloc_spin();
+
+    size_t islot = 0;
+    while (keep) {
+      for (; islot < cache->count; ++islot) {
+        span_t *current_span = cache->span[islot];
+        if (!(current_span->flags & SPAN_FLAG_MASTER) ||
+            ((current_span->flags & SPAN_FLAG_MASTER) &&
+             (atomic_load32(&current_span->remaining_spans) <=
+              (int32_t)current_span->span_count))) {
+          _rpmalloc_span_unmap(current_span);
+          cache->span[islot] = keep;
+          break;
+        }
+      }
+      if (islot == cache->count)
+        break;
+      keep = keep->next;
+    }
+
+    if (keep) {
+      span_t *tail = keep;
+      while (tail->next)
+        tail = tail->next;
+      tail->next = cache->overflow;
+      cache->overflow = keep;
+    }
+
+    atomic_store32_release(&cache->lock, 0);
+  }
+}
+
+static size_t _rpmalloc_global_cache_extract_spans(span_t **span,
+                                                   size_t span_count,
+                                                   size_t count) {
+  global_cache_t *cache = &_memory_span_cache[span_count - 1];
+
+  size_t extract_count = 0;
+  while (!atomic_cas32_acquire(&cache->lock, 1, 0))
+    _rpmalloc_spin();
+
+#if ENABLE_STATISTICS
+  cache->extract_count += count;
+#endif
+  size_t want = count - extract_count;
+  if (want > cache->count)
+    want = cache->count;
+
+  memcpy(span + extract_count, cache->span + (cache->count - want),
+         sizeof(span_t *) * want);
+  cache->count -= (uint32_t)want;
+  extract_count += want;
+
+  while ((extract_count < count) && cache->overflow) {
+    span_t *current_span = cache->overflow;
+    span[extract_count++] = current_span;
+    cache->overflow = current_span->next;
+  }
+
+#if ENABLE_ASSERTS
+  for (size_t ispan = 0; ispan < extract_count; ++ispan) {
+    rpmalloc_assert(span[ispan]->span_count == span_count,
+                    "Global cache span count mismatch");
+  }
+#endif
+
+  atomic_store32_release(&cache->lock, 0);
+
+  return extract_count;
+}
+
+#endif
+
+////////////
+///
+/// Heap control
+///
+//////
+
+static void _rpmalloc_deallocate_huge(span_t *);
+
+//! Store the given spans as reserve in the given heap
+static void _rpmalloc_heap_set_reserved_spans(heap_t *heap, span_t *master,
+                                              span_t *reserve,
+                                              size_t reserve_span_count) {
+  heap->span_reserve_master = master;
+  heap->span_reserve = reserve;
+  heap->spans_reserved = (uint32_t)reserve_span_count;
+}
+
+//! Adopt the deferred span cache list, optionally extracting the first single
+//! span for immediate re-use
+static void _rpmalloc_heap_cache_adopt_deferred(heap_t *heap,
+                                                span_t **single_span) {
+  span_t *span = (span_t *)((void *)atomic_exchange_ptr_acquire(
+      &heap->span_free_deferred, 0));
+  while (span) {
+    span_t *next_span = (span_t *)span->free_list;
+    rpmalloc_assert(span->heap == heap, "Span heap pointer corrupted");
+    if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) {
+      rpmalloc_assert(heap->full_span_count, "Heap span counter corrupted");
+      --heap->full_span_count;
+      _rpmalloc_stat_dec(&heap->span_use[0].spans_deferred);
+#if RPMALLOC_FIRST_CLASS_HEAPS
+      _rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class],
+                                             span);
+#endif
+      _rpmalloc_stat_dec(&heap->span_use[0].current);
+      _rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current);
+      if (single_span && !*single_span)
+        *single_span = span;
+      else
+        _rpmalloc_heap_cache_insert(heap, span);
+    } else {
+      if (span->size_class == SIZE_CLASS_HUGE) {
+        _rpmalloc_deallocate_huge(span);
+      } else {
+        rpmalloc_assert(span->size_class == SIZE_CLASS_LARGE,
+                        "Span size class invalid");
+        rpmalloc_assert(heap->full_span_count, "Heap span counter corrupted");
+        --heap->full_span_count;
+#if RPMALLOC_FIRST_CLASS_HEAPS
+        _rpmalloc_span_double_link_list_remove(&heap->large_huge_span, span);
+#endif
+        uint32_t idx = span->span_count - 1;
+        _rpmalloc_stat_dec(&heap->span_use[idx].spans_deferred);
+        _rpmalloc_stat_dec(&heap->span_use[idx].current);
+        if (!idx && single_span && !*single_span)
+          *single_span = span;
+        else
+          _rpmalloc_heap_cache_insert(heap, span);
+      }
+    }
+    span = next_span;
+  }
+}
+
+static void _rpmalloc_heap_unmap(heap_t *heap) {
+  if (!heap->master_heap) {
+    if ((heap->finalize > 1) && !atomic_load32(&heap->child_count)) {
+      span_t *span = (span_t *)((uintptr_t)heap & _memory_span_mask);
+      _rpmalloc_span_unmap(span);
+    }
+  } else {
+    if (atomic_decr32(&heap->master_heap->child_count) == 0) {
+      _rpmalloc_heap_unmap(heap->master_heap);
+    }
+  }
+}
+
+static void _rpmalloc_heap_global_finalize(heap_t *heap) {
+  if (heap->finalize++ > 1) {
+    --heap->finalize;
+    return;
+  }
+
+  _rpmalloc_heap_finalize(heap);
+
+#if ENABLE_THREAD_CACHE
+  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
+    span_cache_t *span_cache;
+    if (!iclass)
+      span_cache = &heap->span_cache;
+    else
+      span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
+    for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
+      _rpmalloc_span_unmap(span_cache->span[ispan]);
+    span_cache->count = 0;
+  }
+#endif
+
+  if (heap->full_span_count) {
+    --heap->finalize;
+    return;
+  }
+
+  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
+    if (heap->size_class[iclass].free_list ||
+        heap->size_class[iclass].partial_span) {
+      --heap->finalize;
+      return;
+    }
+  }
+  // Heap is now completely free, unmap and remove from heap list
+  size_t list_idx = (size_t)heap->id % HEAP_ARRAY_SIZE;
+  heap_t *list_heap = _memory_heaps[list_idx];
+  if (list_heap == heap) {
+    _memory_heaps[list_idx] = heap->next_heap;
+  } else {
+    while (list_heap->next_heap != heap)
+      list_heap = list_heap->next_heap;
+    list_heap->next_heap = heap->next_heap;
+  }
+
+  _rpmalloc_heap_unmap(heap);
+}
+
+//! Insert a single span into thread heap cache, releasing to global cache if
+//! overflow
+static void _rpmalloc_heap_cache_insert(heap_t *heap, span_t *span) {
+  if (UNEXPECTED(heap->finalize != 0)) {
+    _rpmalloc_span_unmap(span);
+    _rpmalloc_heap_global_finalize(heap);
+    return;
+  }
+#if ENABLE_THREAD_CACHE
+  size_t span_count = span->span_count;
+  _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_to_cache);
+  if (span_count == 1) {
+    span_cache_t *span_cache = &heap->span_cache;
+    span_cache->span[span_cache->count++] = span;
+    if (span_cache->count == MAX_THREAD_SPAN_CACHE) {
+      const size_t remain_count =
+          MAX_THREAD_SPAN_CACHE - THREAD_SPAN_CACHE_TRANSFER;
+#if ENABLE_GLOBAL_CACHE
+      _rpmalloc_stat_add64(&heap->thread_to_global,
+                           THREAD_SPAN_CACHE_TRANSFER * _memory_span_size);
+      _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_to_global,
+                         THREAD_SPAN_CACHE_TRANSFER);
+      _rpmalloc_global_cache_insert_spans(span_cache->span + remain_count,
+                                          span_count,
+                                          THREAD_SPAN_CACHE_TRANSFER);
+#else
+      for (size_t ispan = 0; ispan < THREAD_SPAN_CACHE_TRANSFER; ++ispan)
+        _rpmalloc_span_unmap(span_cache->span[remain_count + ispan]);
+#endif
+      span_cache->count = remain_count;
+    }
+  } else {
+    size_t cache_idx = span_count - 2;
+    span_large_cache_t *span_cache = heap->span_large_cache + cache_idx;
+    span_cache->span[span_cache->count++] = span;
+    const size_t cache_limit =
+        (MAX_THREAD_SPAN_LARGE_CACHE - (span_count >> 1));
+    if (span_cache->count == cache_limit) {
+      const size_t transfer_limit = 2 + (cache_limit >> 2);
+      const size_t transfer_count =
+          (THREAD_SPAN_LARGE_CACHE_TRANSFER <= transfer_limit
+               ? THREAD_SPAN_LARGE_CACHE_TRANSFER
+               : transfer_limit);
+      const size_t remain_count = cache_limit - transfer_count;
+#if ENABLE_GLOBAL_CACHE
+      _rpmalloc_stat_add64(&heap->thread_to_global,
+                           transfer_count * span_count * _memory_span_size);
+      _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_to_global,
+                         transfer_count);
+      _rpmalloc_global_cache_insert_spans(span_cache->span + remain_count,
+                                          span_count, transfer_count);
+#else
+      for (size_t ispan = 0; ispan < transfer_count; ++ispan)
+        _rpmalloc_span_unmap(span_cache->span[remain_count + ispan]);
+#endif
+      span_cache->count = remain_count;
+    }
+  }
+#else
+  (void)sizeof(heap);
+  _rpmalloc_span_unmap(span);
+#endif
+}
+
+//! Extract the given number of spans from the different cache levels
+static span_t *_rpmalloc_heap_thread_cache_extract(heap_t *heap,
+                                                   size_t span_count) {
+  span_t *span = 0;
+#if ENABLE_THREAD_CACHE
+  span_cache_t *span_cache;
+  if (span_count == 1)
+    span_cache = &heap->span_cache;
+  else
+    span_cache = (span_cache_t *)(heap->span_large_cache + (span_count - 2));
+  if (span_cache->count) {
+    _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_from_cache);
+    return span_cache->span[--span_cache->count];
+  }
+#endif
+  return span;
+}
+
+static span_t *_rpmalloc_heap_thread_cache_deferred_extract(heap_t *heap,
+                                                            size_t span_count) {
+  span_t *span = 0;
+  if (span_count == 1) {
+    _rpmalloc_heap_cache_adopt_deferred(heap, &span);
+  } else {
+    _rpmalloc_heap_cache_adopt_deferred(heap, 0);
+    span = _rpmalloc_heap_thread_cache_extract(heap, span_count);
+  }
+  return span;
+}
+
+static span_t *_rpmalloc_heap_reserved_extract(heap_t *heap,
+                                               size_t span_count) {
+  if (heap->spans_reserved >= span_count)
+    return _rpmalloc_span_map(heap, span_count);
+  return 0;
+}
+
+//! Extract a span from the global cache
+static span_t *_rpmalloc_heap_global_cache_extract(heap_t *heap,
+                                                   size_t span_count) {
+#if ENABLE_GLOBAL_CACHE
+#if ENABLE_THREAD_CACHE
+  span_cache_t *span_cache;
+  size_t wanted_count;
+  if (span_count == 1) {
+    span_cache = &heap->span_cache;
+    wanted_count = THREAD_SPAN_CACHE_TRANSFER;
+  } else {
+    span_cache = (span_cache_t *)(heap->span_large_cache + (span_count - 2));
+    wanted_count = THREAD_SPAN_LARGE_CACHE_TRANSFER;
+  }
+  span_cache->count = _rpmalloc_global_cache_extract_spans(
+      span_cache->span, span_count, wanted_count);
+  if (span_cache->count) {
+    _rpmalloc_stat_add64(&heap->global_to_thread,
+                         span_count * span_cache->count * _memory_span_size);
+    _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_from_global,
+                       span_cache->count);
+    return span_cache->span[--span_cache->count];
+  }
+#else
+  span_t *span = 0;
+  size_t count = _rpmalloc_global_cache_extract_spans(&span, span_count, 1);
+  if (count) {
+    _rpmalloc_stat_add64(&heap->global_to_thread,
+                         span_count * count * _memory_span_size);
+    _rpmalloc_stat_add(&heap->span_use[span_count - 1].spans_from_global,
+                       count);
+    return span;
+  }
+#endif
+#endif
+  (void)sizeof(heap);
+  (void)sizeof(span_count);
+  return 0;
+}
+
+static void _rpmalloc_inc_span_statistics(heap_t *heap, size_t span_count,
+                                          uint32_t class_idx) {
+  (void)sizeof(heap);
+  (void)sizeof(span_count);
+  (void)sizeof(class_idx);
+#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
+  uint32_t idx = (uint32_t)span_count - 1;
+  uint32_t current_count =
+      (uint32_t)atomic_incr32(&heap->span_use[idx].current);
+  if (current_count > (uint32_t)atomic_load32(&heap->span_use[idx].high))
+    atomic_store32(&heap->span_use[idx].high, (int32_t)current_count);
+  _rpmalloc_stat_add_peak(&heap->size_class_use[class_idx].spans_current, 1,
+                          heap->size_class_use[class_idx].spans_peak);
+#endif
+}
+
+//! Get a span from one of the cache levels (thread cache, reserved, global
+//! cache) or fallback to mapping more memory
+static span_t *
+_rpmalloc_heap_extract_new_span(heap_t *heap,
+                                heap_size_class_t *heap_size_class,
+                                size_t span_count, uint32_t class_idx) {
+  span_t *span;
+#if ENABLE_THREAD_CACHE
+  if (heap_size_class && heap_size_class->cache) {
+    span = heap_size_class->cache;
+    heap_size_class->cache =
+        (heap->span_cache.count
+             ? heap->span_cache.span[--heap->span_cache.count]
+             : 0);
+    _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
+    return span;
+  }
+#endif
+  (void)sizeof(class_idx);
+  // Allow 50% overhead to increase cache hits
+  size_t base_span_count = span_count;
+  size_t limit_span_count =
+      (span_count > 2) ? (span_count + (span_count >> 1)) : span_count;
+  if (limit_span_count > LARGE_CLASS_COUNT)
+    limit_span_count = LARGE_CLASS_COUNT;
+  do {
+    span = _rpmalloc_heap_thread_cache_extract(heap, span_count);
+    if (EXPECTED(span != 0)) {
+      _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
+      _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
+      return span;
+    }
+    span = _rpmalloc_heap_thread_cache_deferred_extract(heap, span_count);
+    if (EXPECTED(span != 0)) {
+      _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
+      _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
+      return span;
+    }
+    span = _rpmalloc_heap_global_cache_extract(heap, span_count);
+    if (EXPECTED(span != 0)) {
+      _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
+      _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
+      return span;
+    }
+    span = _rpmalloc_heap_reserved_extract(heap, span_count);
+    if (EXPECTED(span != 0)) {
+      _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_reserved);
+      _rpmalloc_inc_span_statistics(heap, span_count, class_idx);
+      return span;
+    }
+    ++span_count;
+  } while (span_count <= limit_span_count);
+  // Final fallback, map in more virtual memory
+  span = _rpmalloc_span_map(heap, base_span_count);
+  _rpmalloc_inc_span_statistics(heap, base_span_count, class_idx);
+  _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_map_calls);
+  return span;
+}
+
+static void _rpmalloc_heap_initialize(heap_t *heap) {
+  _rpmalloc_memset_const(heap, 0, sizeof(heap_t));
+  // Get a new heap ID
+  heap->id = 1 + atomic_incr32(&_memory_heap_id);
+
+  // Link in heap in heap ID map
+  size_t list_idx = (size_t)heap->id % HEAP_ARRAY_SIZE;
+  heap->next_heap = _memory_heaps[list_idx];
+  _memory_heaps[list_idx] = heap;
+}
+
+static void _rpmalloc_heap_orphan(heap_t *heap, int first_class) {
+  heap->owner_thread = (uintptr_t)-1;
+#if RPMALLOC_FIRST_CLASS_HEAPS
+  heap_t **heap_list =
+      (first_class ? &_memory_first_class_orphan_heaps : &_memory_orphan_heaps);
+#else
+  (void)sizeof(first_class);
+  heap_t **heap_list = &_memory_orphan_heaps;
+#endif
+  heap->next_orphan = *heap_list;
+  *heap_list = heap;
+}
+
+//! Allocate a new heap from newly mapped memory pages
+static heap_t *_rpmalloc_heap_allocate_new(void) {
+  // Map in pages for a 16 heaps. If page size is greater than required size for
+  // this, map a page and use first part for heaps and remaining part for spans
+  // for allocations. Adds a lot of complexity, but saves a lot of memory on
+  // systems where page size > 64 spans (4MiB)
+  size_t heap_size = sizeof(heap_t);
+  size_t aligned_heap_size = 16 * ((heap_size + 15) / 16);
+  size_t request_heap_count = 16;
+  size_t heap_span_count = ((aligned_heap_size * request_heap_count) +
+                            sizeof(span_t) + _memory_span_size - 1) /
+                           _memory_span_size;
+  size_t block_size = _memory_span_size * heap_span_count;
+  size_t span_count = heap_span_count;
+  span_t *span = 0;
+  // If there are global reserved spans, use these first
+  if (_memory_global_reserve_count >= heap_span_count) {
+    span = _rpmalloc_global_get_reserved_spans(heap_span_count);
+  }
+  if (!span) {
+    if (_memory_page_size > block_size) {
+      span_count = _memory_page_size / _memory_span_size;
+      block_size = _memory_page_size;
+      // If using huge pages, make sure to grab enough heaps to avoid
+      // reallocating a huge page just to serve new heaps
+      size_t possible_heap_count =
+          (block_size - sizeof(span_t)) / aligned_heap_size;
+      if (possible_heap_count >= (request_heap_count * 16))
+        request_heap_count *= 16;
+      else if (possible_heap_count < request_heap_count)
+        request_heap_count = possible_heap_count;
+      heap_span_count = ((aligned_heap_size * request_heap_count) +
+                         sizeof(span_t) + _memory_span_size - 1) /
+                        _memory_span_size;
+    }
+
+    size_t align_offset = 0;
+    span = (span_t *)_rpmalloc_mmap(block_size, &align_offset);
+    if (!span)
+      return 0;
+
+    // Master span will contain the heaps
+    _rpmalloc_stat_inc(&_master_spans);
+    _rpmalloc_span_initialize(span, span_count, heap_span_count, align_offset);
+  }
+
+  size_t remain_size = _memory_span_size - sizeof(span_t);
+  heap_t *heap = (heap_t *)pointer_offset(span, sizeof(span_t));
+  _rpmalloc_heap_initialize(heap);
+
+  // Put extra heaps as orphans
+  size_t num_heaps = remain_size / aligned_heap_size;
+  if (num_heaps < request_heap_count)
+    num_heaps = request_heap_count;
+  atomic_store32(&heap->child_count, (int32_t)num_heaps - 1);
+  heap_t *extra_heap = (heap_t *)pointer_offset(heap, aligned_heap_size);
+  while (num_heaps > 1) {
+    _rpmalloc_heap_initialize(extra_heap);
+    extra_heap->master_heap = heap;
+    _rpmalloc_heap_orphan(extra_heap, 1);
+    extra_heap = (heap_t *)pointer_offset(extra_heap, aligned_heap_size);
+    --num_heaps;
+  }
+
+  if (span_count > heap_span_count) {
+    // Cap reserved spans
+    size_t remain_count = span_count - heap_span_count;
+    size_t reserve_count =
+        (remain_count > _memory_heap_reserve_count ? _memory_heap_reserve_count
+                                                   : remain_count);
+    span_t *remain_span =
+        (span_t *)pointer_offset(span, heap_span_count * _memory_span_size);
+    _rpmalloc_heap_set_reserved_spans(heap, span, remain_span, reserve_count);
+
+    if (remain_count > reserve_count) {
+      // Set to global reserved spans
+      remain_span = (span_t *)pointer_offset(remain_span,
+                                             reserve_count * _memory_span_size);
+      reserve_count = remain_count - reserve_count;
+      _rpmalloc_global_set_reserved_spans(span, remain_span, reserve_count);
+    }
+  }
+
+  return heap;
+}
+
+static heap_t *_rpmalloc_heap_extract_orphan(heap_t **heap_list) {
+  heap_t *heap = *heap_list;
+  *heap_list = (heap ? heap->next_orphan : 0);
+  return heap;
+}
+
+//! Allocate a new heap, potentially reusing a previously orphaned heap
+static heap_t *_rpmalloc_heap_allocate(int first_class) {
+  heap_t *heap = 0;
+  while (!atomic_cas32_acquire(&_memory_global_lock, 1, 0))
+    _rpmalloc_spin();
+  if (first_class == 0)
+    heap = _rpmalloc_heap_extract_orphan(&_memory_orphan_heaps);
+#if RPMALLOC_FIRST_CLASS_HEAPS
+  if (!heap)
+    heap = _rpmalloc_heap_extract_orphan(&_memory_first_class_orphan_heaps);
+#endif
+  if (!heap)
+    heap = _rpmalloc_heap_allocate_new();
+  atomic_store32_release(&_memory_global_lock, 0);
+  if (heap)
+    _rpmalloc_heap_cache_adopt_deferred(heap, 0);
+  return heap;
+}
+
+static void _rpmalloc_heap_release(void *heapptr, int first_class,
+                                   int release_cache) {
+  heap_t *heap = (heap_t *)heapptr;
+  if (!heap)
+    return;
+  // Release thread cache spans back to global cache
+  _rpmalloc_heap_cache_adopt_deferred(heap, 0);
+  if (release_cache || heap->finalize) {
+#if ENABLE_THREAD_CACHE
+    for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
+      span_cache_t *span_cache;
+      if (!iclass)
+        span_cache = &heap->span_cache;
+      else
+        span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
+      if (!span_cache->count)
+        continue;
+#if ENABLE_GLOBAL_CACHE
+      if (heap->finalize) {
+        for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
+          _rpmalloc_span_unmap(span_cache->span[ispan]);
+      } else {
+        _rpmalloc_stat_add64(&heap->thread_to_global, span_cache->count *
+                                                          (iclass + 1) *
+                                                          _memory_span_size);
+        _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global,
+                           span_cache->count);
+        _rpmalloc_global_cache_insert_spans(span_cache->span, iclass + 1,
+                                            span_cache->count);
+      }
+#else
+      for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
+        _rpmalloc_span_unmap(span_cache->span[ispan]);
+#endif
+      span_cache->count = 0;
+    }
+#endif
+  }
+
+  if (get_thread_heap_raw() == heap)
+    set_thread_heap(0);
+
+#if ENABLE_STATISTICS
+  atomic_decr32(&_memory_active_heaps);
+  rpmalloc_assert(atomic_load32(&_memory_active_heaps) >= 0,
+                  "Still active heaps during finalization");
+#endif
+
+  // If we are forcibly terminating with _exit the state of the
+  // lock atomic is unknown and it's best to just go ahead and exit
+  if (get_thread_id() != _rpmalloc_main_thread_id) {
+    while (!atomic_cas32_acquire(&_memory_global_lock, 1, 0))
+      _rpmalloc_spin();
+  }
+  _rpmalloc_heap_orphan(heap, first_class);
+  atomic_store32_release(&_memory_global_lock, 0);
+}
+
+static void _rpmalloc_heap_release_raw(void *heapptr, int release_cache) {
+  _rpmalloc_heap_release(heapptr, 0, release_cache);
+}
+
+static void _rpmalloc_heap_release_raw_fc(void *heapptr) {
+  _rpmalloc_heap_release_raw(heapptr, 1);
+}
+
+static void _rpmalloc_heap_finalize(heap_t *heap) {
+  if (heap->spans_reserved) {
+    span_t *span = _rpmalloc_span_map(heap, heap->spans_reserved);
+    _rpmalloc_span_unmap(span);
+    heap->spans_reserved = 0;
+  }
+
+  _rpmalloc_heap_cache_adopt_deferred(heap, 0);
+
+  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
+    if (heap->size_class[iclass].cache)
+      _rpmalloc_span_unmap(heap->size_class[iclass].cache);
+    heap->size_class[iclass].cache = 0;
+    span_t *span = heap->size_class[iclass].partial_span;
+    while (span) {
+      span_t *next = span->next;
+      _rpmalloc_span_finalize(heap, iclass, span,
+                              &heap->size_class[iclass].partial_span);
+      span = next;
+    }
+    // If class still has a free list it must be a full span
+    if (heap->size_class[iclass].free_list) {
+      span_t *class_span =
+          (span_t *)((uintptr_t)heap->size_class[iclass].free_list &
+                     _memory_span_mask);
+      span_t **list = 0;
+#if RPMALLOC_FIRST_CLASS_HEAPS
+      list = &heap->full_span[iclass];
+#endif
+      --heap->full_span_count;
+      if (!_rpmalloc_span_finalize(heap, iclass, class_span, list)) {
+        if (list)
+          _rpmalloc_span_double_link_list_remove(list, class_span);
+        _rpmalloc_span_double_link_list_add(
+            &heap->size_class[iclass].partial_span, class_span);
+      }
+    }
+  }
+
+#if ENABLE_THREAD_CACHE
+  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
+    span_cache_t *span_cache;
+    if (!iclass)
+      span_cache = &heap->span_cache;
+    else
+      span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
+    for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
+      _rpmalloc_span_unmap(span_cache->span[ispan]);
+    span_cache->count = 0;
+  }
+#endif
+  rpmalloc_assert(!atomic_load_ptr(&heap->span_free_deferred),
+                  "Heaps still active during finalization");
+}
+
+////////////
+///
+/// Allocation entry points
+///
+//////
+
+//! Pop first block from a free list
+static void *free_list_pop(void **list) {
+  void *block = *list;
+  *list = *((void **)block);
+  return block;
+}
+
+//! Allocate a small/medium sized memory block from the given heap
+static void *_rpmalloc_allocate_from_heap_fallback(
+    heap_t *heap, heap_size_class_t *heap_size_class, uint32_t class_idx) {
+  span_t *span = heap_size_class->partial_span;
+  rpmalloc_assume(heap != 0);
+  if (EXPECTED(span != 0)) {
+    rpmalloc_assert(span->block_count ==
+                        _memory_size_class[span->size_class].block_count,
+                    "Span block count corrupted");
+    rpmalloc_assert(!_rpmalloc_span_is_fully_utilized(span),
+                    "Internal failure");
+    void *block;
+    if (span->free_list) {
+      // Span local free list is not empty, swap to size class free list
+      block = free_list_pop(&span->free_list);
+      heap_size_class->free_list = span->free_list;
+      span->free_list = 0;
+    } else {
+      // If the span did not fully initialize free list, link up another page
+      // worth of blocks
+      void *block_start = pointer_offset(
+          span, SPAN_HEADER_SIZE +
+                    ((size_t)span->free_list_limit * span->block_size));
+      span->free_list_limit += free_list_partial_init(
+          &heap_size_class->free_list, &block,
+          (void *)((uintptr_t)block_start & ~(_memory_page_size - 1)),
+          block_start, span->block_count - span->free_list_limit,
+          span->block_size);
+    }
+    rpmalloc_assert(span->free_list_limit <= span->block_count,
+                    "Span block count corrupted");
+    span->used_count = span->free_list_limit;
+
+    // Swap in deferred free list if present
+    if (atomic_load_ptr(&span->free_list_deferred))
+      _rpmalloc_span_extract_free_list_deferred(span);
+
+    // If span is still not fully utilized keep it in partial list and early
+    // return block
+    if (!_rpmalloc_span_is_fully_utilized(span))
+      return block;
+
+    // The span is fully utilized, unlink from partial list and add to fully
+    // utilized list
+    _rpmalloc_span_double_link_list_pop_head(&heap_size_class->partial_span,
+                                             span);
+#if RPMALLOC_FIRST_CLASS_HEAPS
+    _rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span);
+#endif
+    ++heap->full_span_count;
+    return block;
+  }
+
+  // Find a span in one of the cache levels
+  span = _rpmalloc_heap_extract_new_span(heap, heap_size_class, 1, class_idx);
+  if (EXPECTED(span != 0)) {
+    // Mark span as owned by this heap and set base data, return first block
+    return _rpmalloc_span_initialize_new(heap, heap_size_class, span,
+                                         class_idx);
+  }
+
+  return 0;
+}
+
+//! Allocate a small sized memory block from the given heap
+static void *_rpmalloc_allocate_small(heap_t *heap, size_t size) {
+  rpmalloc_assert(heap, "No thread heap");
+  // Small sizes have unique size classes
+  const uint32_t class_idx =
+      (uint32_t)((size + (SMALL_GRANULARITY - 1)) >> SMALL_GRANULARITY_SHIFT);
+  heap_size_class_t *heap_size_class = heap->size_class + class_idx;
+  _rpmalloc_stat_inc_alloc(heap, class_idx);
+  if (EXPECTED(heap_size_class->free_list != 0))
+    return free_list_pop(&heap_size_class->free_list);
+  return _rpmalloc_allocate_from_heap_fallback(heap, heap_size_class,
+                                               class_idx);
+}
+
+//! Allocate a medium sized memory block from the given heap
+static void *_rpmalloc_allocate_medium(heap_t *heap, size_t size) {
+  rpmalloc_assert(heap, "No thread heap");
+  // Calculate the size class index and do a dependent lookup of the final class
+  // index (in case of merged classes)
+  const uint32_t base_idx =
+      (uint32_t)(SMALL_CLASS_COUNT +
+                 ((size - (SMALL_SIZE_LIMIT + 1)) >> MEDIUM_GRANULARITY_SHIFT));
+  const uint32_t class_idx = _memory_size_class[base_idx].class_idx;
+  heap_size_class_t *heap_size_class = heap->size_class + class_idx;
+  _rpmalloc_stat_inc_alloc(heap, class_idx);
+  if (EXPECTED(heap_size_class->free_list != 0))
+    return free_list_pop(&heap_size_class->free_list);
+  return _rpmalloc_allocate_from_heap_fallback(heap, heap_size_class,
+                                               class_idx);
+}
+
+//! Allocate a large sized memory block from the given heap
+static void *_rpmalloc_allocate_large(heap_t *heap, size_t size) {
+  rpmalloc_assert(heap, "No thread heap");
+  // Calculate number of needed max sized spans (including header)
+  // Since this function is never called if size > LARGE_SIZE_LIMIT
+  // the span_count is guaranteed to be <= LARGE_CLASS_COUNT
+  size += SPAN_HEADER_SIZE;
+  size_t span_count = size >> _memory_span_size_shift;
+  if (size & (_memory_span_size - 1))
+    ++span_count;
+
+  // Find a span in one of the cache levels
+  span_t *span =
+      _rpmalloc_heap_extract_new_span(heap, 0, span_count, SIZE_CLASS_LARGE);
+  if (!span)
+    return span;
+
+  // Mark span as owned by this heap and set base data
+  rpmalloc_assert(span->span_count >= span_count, "Internal failure");
+  span->size_class = SIZE_CLASS_LARGE;
+  span->heap = heap;
+
+#if RPMALLOC_FIRST_CLASS_HEAPS
+  _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
+#endif
+  ++heap->full_span_count;
+
+  return pointer_offset(span, SPAN_HEADER_SIZE);
+}
+
+//! Allocate a huge block by mapping memory pages directly
+static void *_rpmalloc_allocate_huge(heap_t *heap, size_t size) {
+  rpmalloc_assert(heap, "No thread heap");
+  _rpmalloc_heap_cache_adopt_deferred(heap, 0);
+  size += SPAN_HEADER_SIZE;
+  size_t num_pages = size >> _memory_page_size_shift;
+  if (size & (_memory_page_size - 1))
+    ++num_pages;
+  size_t align_offset = 0;
+  span_t *span =
+      (span_t *)_rpmalloc_mmap(num_pages * _memory_page_size, &align_offset);
+  if (!span)
+    return span;
+
+  // Store page count in span_count
+  span->size_class = SIZE_CLASS_HUGE;
+  span->span_count = (uint32_t)num_pages;
+  span->align_offset = (uint32_t)align_offset;
+  span->heap = heap;
+  _rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak);
+
+#if RPMALLOC_FIRST_CLASS_HEAPS
+  _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
+#endif
+  ++heap->full_span_count;
+
+  return pointer_offset(span, SPAN_HEADER_SIZE);
+}
+
+//! Allocate a block of the given size
+static void *_rpmalloc_allocate(heap_t *heap, size_t size) {
+  _rpmalloc_stat_add64(&_allocation_counter, 1);
+  if (EXPECTED(size <= SMALL_SIZE_LIMIT))
+    return _rpmalloc_allocate_small(heap, size);
+  else if (size <= _memory_medium_size_limit)
+    return _rpmalloc_allocate_medium(heap, size);
+  else if (size <= LARGE_SIZE_LIMIT)
+    return _rpmalloc_allocate_large(heap, size);
+  return _rpmalloc_allocate_huge(heap, size);
+}
+
+static void *_rpmalloc_aligned_allocate(heap_t *heap, size_t alignment,
+                                        size_t size) {
+  if (alignment <= SMALL_GRANULARITY)
+    return _rpmalloc_allocate(heap, size);
+
+#if ENABLE_VALIDATE_ARGS
+  if ((size + alignment) < size) {
+    errno = EINVAL;
+    return 0;
+  }
+  if (alignment & (alignment - 1)) {
+    errno = EINVAL;
+    return 0;
+  }
+#endif
+
+  if ((alignment <= SPAN_HEADER_SIZE) &&
+      ((size + SPAN_HEADER_SIZE) < _memory_medium_size_limit)) {
+    // If alignment is less or equal to span header size (which is power of
+    // two), and size aligned to span header size multiples is less than size +
+    // alignment, then use natural alignment of blocks to provide alignment
+    size_t multiple_size = size ? (size + (SPAN_HEADER_SIZE - 1)) &
+                                      ~(uintptr_t)(SPAN_HEADER_SIZE - 1)
+                                : SPAN_HEADER_SIZE;
+    rpmalloc_assert(!(multiple_size % SPAN_HEADER_SIZE),
+                    "Failed alignment calculation");
+    if (multiple_size <= (size + alignment))
+      return _rpmalloc_allocate(heap, multiple_size);
+  }
+
+  void *ptr = 0;
+  size_t align_mask = alignment - 1;
+  if (alignment <= _memory_page_size) {
+    ptr = _rpmalloc_allocate(heap, size + alignment);
+    if ((uintptr_t)ptr & align_mask) {
+      ptr = (void *)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment);
+      // Mark as having aligned blocks
+      span_t *span = (span_t *)((uintptr_t)ptr & _memory_span_mask);
+      span->flags |= SPAN_FLAG_ALIGNED_BLOCKS;
+    }
+    return ptr;
+  }
+
+  // Fallback to mapping new pages for this request. Since pointers passed
+  // to rpfree must be able to reach the start of the span by bitmasking of
+  // the address with the span size, the returned aligned pointer from this
+  // function must be with a span size of the start of the mapped area.
+  // In worst case this requires us to loop and map pages until we get a
+  // suitable memory address. It also means we can never align to span size
+  // or greater, since the span header will push alignment more than one
+  // span size away from span start (thus causing pointer mask to give us
+  // an invalid span start on free)
+  if (alignment & align_mask) {
+    errno = EINVAL;
+    return 0;
+  }
+  if (alignment >= _memory_span_size) {
+    errno = EINVAL;
+    return 0;
+  }
+
+  size_t extra_pages = alignment / _memory_page_size;
+
+  // Since each span has a header, we will at least need one extra memory page
+  size_t num_pages = 1 + (size / _memory_page_size);
+  if (size & (_memory_page_size - 1))
+    ++num_pages;
+
+  if (extra_pages > num_pages)
+    num_pages = 1 + extra_pages;
+
+  size_t original_pages = num_pages;
+  size_t limit_pages = (_memory_span_size / _memory_page_size) * 2;
+  if (limit_pages < (original_pages * 2))
+    limit_pages = original_pages * 2;
+
+  size_t mapped_size, align_offset;
+  span_t *span;
+
+retry:
+  align_offset = 0;
+  mapped_size = num_pages * _memory_page_size;
+
+  span = (span_t *)_rpmalloc_mmap(mapped_size, &align_offset);
+  if (!span) {
+    errno = ENOMEM;
+    return 0;
+  }
+  ptr = pointer_offset(span, SPAN_HEADER_SIZE);
+
+  if ((uintptr_t)ptr & align_mask)
+    ptr = (void *)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment);
+
+  if (((size_t)pointer_diff(ptr, span) >= _memory_span_size) ||
+      (pointer_offset(ptr, size) > pointer_offset(span, mapped_size)) ||
+      (((uintptr_t)ptr & _memory_span_mask) != (uintptr_t)span)) {
+    _rpmalloc_unmap(span, mapped_size, align_offset, mapped_size);
+    ++num_pages;
+    if (num_pages > limit_pages) {
+      errno = EINVAL;
+      return 0;
+    }
+    goto retry;
+  }
+
+  // Store page count in span_count
+  span->size_class = SIZE_CLASS_HUGE;
+  span->span_count = (uint32_t)num_pages;
+  span->align_offset = (uint32_t)align_offset;
+  span->heap = heap;
+  _rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak);
+
+#if RPMALLOC_FIRST_CLASS_HEAPS
+  _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
+#endif
+  ++heap->full_span_count;
+
+  _rpmalloc_stat_add64(&_allocation_counter, 1);
+
+  return ptr;
+}
+
+////////////
+///
+/// Deallocation entry points
+///
+//////
+
+//! Deallocate the given small/medium memory block in the current thread local
+//! heap
+static void _rpmalloc_deallocate_direct_small_or_medium(span_t *span,
+                                                        void *block) {
+  heap_t *heap = span->heap;
+  rpmalloc_assert(heap->owner_thread == get_thread_id() ||
+                      !heap->owner_thread || heap->finalize,
+                  "Internal failure");
+  // Add block to free list
+  if (UNEXPECTED(_rpmalloc_span_is_fully_utilized(span))) {
+    span->used_count = span->block_count;
+#if RPMALLOC_FIRST_CLASS_HEAPS
+    _rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class],
+                                           span);
+#endif
+    _rpmalloc_span_double_link_list_add(
+        &heap->size_class[span->size_class].partial_span, span);
+    --heap->full_span_count;
+  }
+  *((void **)block) = span->free_list;
+  --span->used_count;
+  span->free_list = block;
+  if (UNEXPECTED(span->used_count == span->list_size)) {
+    // If there are no used blocks it is guaranteed that no other external
+    // thread is accessing the span
+    if (span->used_count) {
+      // Make sure we have synchronized the deferred list and list size by using
+      // acquire semantics and guarantee that no external thread is accessing
+      // span concurrently
+      void *free_list;
+      do {
+        free_list = atomic_exchange_ptr_acquire(&span->free_list_deferred,
+                                                INVALID_POINTER);
+      } while (free_list == INVALID_POINTER);
+      atomic_store_ptr_release(&span->free_list_deferred, free_list);
+    }
+    _rpmalloc_span_double_link_list_remove(
+        &heap->size_class[span->size_class].partial_span, span);
+    _rpmalloc_span_release_to_cache(heap, span);
+  }
+}
+
+static void _rpmalloc_deallocate_defer_free_span(heap_t *heap, span_t *span) {
+  if (span->size_class != SIZE_CLASS_HUGE)
+    _rpmalloc_stat_inc(&heap->span_use[span->span_count - 1].spans_deferred);
+  // This list does not need ABA protection, no mutable side state
+  do {
+    span->free_list = (void *)atomic_load_ptr(&heap->span_free_deferred);
+  } while (!atomic_cas_ptr(&heap->span_free_deferred, span, span->free_list));
+}
+
+//! Put the block in the deferred free list of the owning span
+static void _rpmalloc_deallocate_defer_small_or_medium(span_t *span,
+                                                       void *block) {
+  // The memory ordering here is a bit tricky, to avoid having to ABA protect
+  // the deferred free list to avoid desynchronization of list and list size
+  // we need to have acquire semantics on successful CAS of the pointer to
+  // guarantee the list_size variable validity + release semantics on pointer
+  // store
+  void *free_list;
+  do {
+    free_list =
+        atomic_exchange_ptr_acquire(&span->free_list_deferred, INVALID_POINTER);
+  } while (free_list == INVALID_POINTER);
+  *((void **)block) = free_list;
+  uint32_t free_count = ++span->list_size;
+  int all_deferred_free = (free_count == span->block_count);
+  atomic_store_ptr_release(&span->free_list_deferred, block);
+  if (all_deferred_free) {
+    // Span was completely freed by this block. Due to the INVALID_POINTER spin
+    // lock no other thread can reach this state simultaneously on this span.
+    // Safe to move to owner heap deferred cache
+    _rpmalloc_deallocate_defer_free_span(span->heap, span);
+  }
+}
+
+static void _rpmalloc_deallocate_small_or_medium(span_t *span, void *p) {
+  _rpmalloc_stat_inc_free(span->heap, span->size_class);
+  if (span->flags & SPAN_FLAG_ALIGNED_BLOCKS) {
+    // Realign pointer to block start
+    void *blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
+    uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start);
+    p = pointer_offset(p, -(int32_t)(block_offset % span->block_size));
+  }
+  // Check if block belongs to this heap or if deallocation should be deferred
+#if RPMALLOC_FIRST_CLASS_HEAPS
+  int defer =
+      (span->heap->owner_thread &&
+       (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
+#else
+  int defer =
+      ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
+#endif
+  if (!defer)
+    _rpmalloc_deallocate_direct_small_or_medium(span, p);
+  else
+    _rpmalloc_deallocate_defer_small_or_medium(span, p);
+}
+
+//! Deallocate the given large memory block to the current heap
+static void _rpmalloc_deallocate_large(span_t *span) {
+  rpmalloc_assert(span->size_class == SIZE_CLASS_LARGE, "Bad span size class");
+  rpmalloc_assert(!(span->flags & SPAN_FLAG_MASTER) ||
+                      !(span->flags & SPAN_FLAG_SUBSPAN),
+                  "Span flag corrupted");
+  rpmalloc_assert((span->flags & SPAN_FLAG_MASTER) ||
+                      (span->flags & SPAN_FLAG_SUBSPAN),
+                  "Span flag corrupted");
+  // We must always defer (unless finalizing) if from another heap since we
+  // cannot touch the list or counters of another heap
+#if RPMALLOC_FIRST_CLASS_HEAPS
+  int defer =
+      (span->heap->owner_thread &&
+       (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
+#else
+  int defer =
+      ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
+#endif
+  if (defer) {
+    _rpmalloc_deallocate_defer_free_span(span->heap, span);
+    return;
+  }
+  rpmalloc_assert(span->heap->full_span_count, "Heap span counter corrupted");
+  --span->heap->full_span_count;
+#if RPMALLOC_FIRST_CLASS_HEAPS
+  _rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span);
+#endif
+#if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
+  // Decrease counter
+  size_t idx = span->span_count - 1;
+  atomic_decr32(&span->heap->span_use[idx].current);
+#endif
+  heap_t *heap = span->heap;
+  rpmalloc_assert(heap, "No thread heap");
+#if ENABLE_THREAD_CACHE
+  const int set_as_reserved =
+      ((span->span_count > 1) && (heap->span_cache.count == 0) &&
+       !heap->finalize && !heap->spans_reserved);
+#else
+  const int set_as_reserved =
+      ((span->span_count > 1) && !heap->finalize && !heap->spans_reserved);
+#endif
+  if (set_as_reserved) {
+    heap->span_reserve = span;
+    heap->spans_reserved = span->span_count;
+    if (span->flags & SPAN_FLAG_MASTER) {
+      heap->span_reserve_master = span;
+    } else { // SPAN_FLAG_SUBSPAN
+      span_t *master = (span_t *)pointer_offset(
+          span,
+          -(intptr_t)((size_t)span->offset_from_master * _memory_span_size));
+      heap->span_reserve_master = master;
+      rpmalloc_assert(master->flags & SPAN_FLAG_MASTER, "Span flag corrupted");
+      rpmalloc_assert(atomic_load32(&master->remaining_spans) >=
+                          (int32_t)span->span_count,
+                      "Master span count corrupted");
+    }
+    _rpmalloc_stat_inc(&heap->span_use[idx].spans_to_reserved);
+  } else {
+    // Insert into cache list
+    _rpmalloc_heap_cache_insert(heap, span);
+  }
+}
+
+//! Deallocate the given huge span
+static void _rpmalloc_deallocate_huge(span_t *span) {
+  rpmalloc_assert(span->heap, "No span heap");
+#if RPMALLOC_FIRST_CLASS_HEAPS
+  int defer =
+      (span->heap->owner_thread &&
+       (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
+#else
+  int defer =
+      ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize);
+#endif
+  if (defer) {
+    _rpmalloc_deallocate_defer_free_span(span->heap, span);
+    return;
+  }
+  rpmalloc_assert(span->heap->full_span_count, "Heap span counter corrupted");
+  --span->heap->full_span_count;
+#if RPMALLOC_FIRST_CLASS_HEAPS
+  _rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span);
+#endif
+
+  // Oversized allocation, page count is stored in span_count
+  size_t num_pages = span->span_count;
+  _rpmalloc_unmap(span, num_pages * _memory_page_size, span->align_offset,
+                  num_pages * _memory_page_size);
+  _rpmalloc_stat_sub(&_huge_pages_current, num_pages);
+}
+
+//! Deallocate the given block
+static void _rpmalloc_deallocate(void *p) {
+  _rpmalloc_stat_add64(&_deallocation_counter, 1);
+  // Grab the span (always at start of span, using span alignment)
+  span_t *span = (span_t *)((uintptr_t)p & _memory_span_mask);
+  if (UNEXPECTED(!span))
+    return;
+  if (EXPECTED(span->size_class < SIZE_CLASS_COUNT))
+    _rpmalloc_deallocate_small_or_medium(span, p);
+  else if (span->size_class == SIZE_CLASS_LARGE)
+    _rpmalloc_deallocate_large(span);
+  else
+    _rpmalloc_deallocate_huge(span);
+}
+
+////////////
+///
+/// Reallocation entry points
+///
+//////
+
+static size_t _rpmalloc_usable_size(void *p);
+
+//! Reallocate the given block to the given size
+static void *_rpmalloc_reallocate(heap_t *heap, void *p, size_t size,
+                                  size_t oldsize, unsigned int flags) {
+  if (p) {
+    // Grab the span using guaranteed span alignment
+    span_t *span = (span_t *)((uintptr_t)p & _memory_span_mask);
+    if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) {
+      // Small/medium sized block
+      rpmalloc_assert(span->span_count == 1, "Span counter corrupted");
+      void *blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
+      uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start);
+      uint32_t block_idx = block_offset / span->block_size;
+      void *block =
+          pointer_offset(blocks_start, (size_t)block_idx * span->block_size);
+      if (!oldsize)
+        oldsize =
+            (size_t)((ptrdiff_t)span->block_size - pointer_diff(p, block));
+      if ((size_t)span->block_size >= size) {
+        // Still fits in block, never mind trying to save memory, but preserve
+        // data if alignment changed
+        if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
+          memmove(block, p, oldsize);
+        return block;
+      }
+    } else if (span->size_class == SIZE_CLASS_LARGE) {
+      // Large block
+      size_t total_size = size + SPAN_HEADER_SIZE;
+      size_t num_spans = total_size >> _memory_span_size_shift;
+      if (total_size & (_memory_span_mask - 1))
+        ++num_spans;
+      size_t current_spans = span->span_count;
+      void *block = pointer_offset(span, SPAN_HEADER_SIZE);
+      if (!oldsize)
+        oldsize = (current_spans * _memory_span_size) -
+                  (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE;
+      if ((current_spans >= num_spans) && (total_size >= (oldsize / 2))) {
+        // Still fits in block, never mind trying to save memory, but preserve
+        // data if alignment changed
+        if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
+          memmove(block, p, oldsize);
+        return block;
+      }
+    } else {
+      // Oversized block
+      size_t total_size = size + SPAN_HEADER_SIZE;
+      size_t num_pages = total_size >> _memory_page_size_shift;
+      if (total_size & (_memory_page_size - 1))
+        ++num_pages;
+      // Page count is stored in span_count
+      size_t current_pages = span->span_count;
+      void *block = pointer_offset(span, SPAN_HEADER_SIZE);
+      if (!oldsize)
+        oldsize = (current_pages * _memory_page_size) -
+                  (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE;
+      if ((current_pages >= num_pages) && (num_pages >= (current_pages / 2))) {
+        // Still fits in block, never mind trying to save memory, but preserve
+        // data if alignment changed
+        if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
+          memmove(block, p, oldsize);
+        return block;
+      }
+    }
+  } else {
+    oldsize = 0;
+  }
+
+  if (!!(flags & RPMALLOC_GROW_OR_FAIL))
+    return 0;
+
+  // Size is greater than block size, need to allocate a new block and
+  // deallocate the old Avoid hysteresis by overallocating if increase is small
+  // (below 37%)
+  size_t lower_bound = oldsize + (oldsize >> 2) + (oldsize >> 3);
+  size_t new_size =
+      (size > lower_bound) ? size : ((size > oldsize) ? lower_bound : size);
+  void *block = _rpmalloc_allocate(heap, new_size);
+  if (p && block) {
+    if (!(flags & RPMALLOC_NO_PRESERVE))
+      memcpy(block, p, oldsize < new_size ? oldsize : new_size);
+    _rpmalloc_deallocate(p);
+  }
+
+  return block;
+}
+
+static void *_rpmalloc_aligned_reallocate(heap_t *heap, void *ptr,
+                                          size_t alignment, size_t size,
+                                          size_t oldsize, unsigned int flags) {
+  if (alignment <= SMALL_GRANULARITY)
+    return _rpmalloc_reallocate(heap, ptr, size, oldsize, flags);
+
+  int no_alloc = !!(flags & RPMALLOC_GROW_OR_FAIL);
+  size_t usablesize = (ptr ? _rpmalloc_usable_size(ptr) : 0);
+  if ((usablesize >= size) && !((uintptr_t)ptr & (alignment - 1))) {
+    if (no_alloc || (size >= (usablesize / 2)))
+      return ptr;
+  }
+  // Aligned alloc marks span as having aligned blocks
+  void *block =
+      (!no_alloc ? _rpmalloc_aligned_allocate(heap, alignment, size) : 0);
+  if (EXPECTED(block != 0)) {
+    if (!(flags & RPMALLOC_NO_PRESERVE) && ptr) {
+      if (!oldsize)
+        oldsize = usablesize;
+      memcpy(block, ptr, oldsize < size ? oldsize : size);
+    }
+    _rpmalloc_deallocate(ptr);
+  }
+  return block;
+}
+
+////////////
+///
+/// Initialization, finalization and utility
+///
+//////
+
+//! Get the usable size of the given block
+static size_t _rpmalloc_usable_size(void *p) {
+  // Grab the span using guaranteed span alignment
+  span_t *span = (span_t *)((uintptr_t)p & _memory_span_mask);
+  if (span->size_class < SIZE_CLASS_COUNT) {
+    // Small/medium block
+    void *blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
+    return span->block_size -
+           ((size_t)pointer_diff(p, blocks_start) % span->block_size);
+  }
+  if (span->size_class == SIZE_CLASS_LARGE) {
+    // Large block
+    size_t current_spans = span->span_count;
+    return (current_spans * _memory_span_size) - (size_t)pointer_diff(p, span);
+  }
+  // Oversized block, page count is stored in span_count
+  size_t current_pages = span->span_count;
+  return (current_pages * _memory_page_size) - (size_t)pointer_diff(p, span);
+}
+
+//! Adjust and optimize the size class properties for the given class
+static void _rpmalloc_adjust_size_class(size_t iclass) {
+  size_t block_size = _memory_size_class[iclass].block_size;
+  size_t block_count = (_memory_span_size - SPAN_HEADER_SIZE) / block_size;
+
+  _memory_size_class[iclass].block_count = (uint16_t)block_count;
+  _memory_size_class[iclass].class_idx = (uint16_t)iclass;
+
+  // Check if previous size classes can be merged
+  if (iclass >= SMALL_CLASS_COUNT) {
+    size_t prevclass = iclass;
+    while (prevclass > 0) {
+      --prevclass;
+      // A class can be merged if number of pages and number of blocks are equal
+      if (_memory_size_class[prevclass].block_count ==
+          _memory_size_class[iclass].block_count)
+        _rpmalloc_memcpy_const(_memory_size_class + prevclass,
+                               _memory_size_class + iclass,
+                               sizeof(_memory_size_class[iclass]));
+      else
+        break;
+    }
+  }
+}
+
+//! Initialize the allocator and setup global data
+extern inline int rpmalloc_initialize(void) {
+  if (_rpmalloc_initialized) {
+    rpmalloc_thread_initialize();
+    return 0;
+  }
+  return rpmalloc_initialize_config(0);
+}
+
+int rpmalloc_initialize_config(const rpmalloc_config_t *config) {
+  if (_rpmalloc_initialized) {
+    rpmalloc_thread_initialize();
+    return 0;
+  }
+  _rpmalloc_initialized = 1;
+
+  if (config)
+    memcpy(&_memory_config, config, sizeof(rpmalloc_config_t));
+  else
+    _rpmalloc_memset_const(&_memory_config, 0, sizeof(rpmalloc_config_t));
+
+  if (!_memory_config.memory_map || !_memory_config.memory_unmap) {
+    _memory_config.memory_map = _rpmalloc_mmap_os;
+    _memory_config.memory_unmap = _rpmalloc_unmap_os;
+  }
+
+#if PLATFORM_WINDOWS
+  SYSTEM_INFO system_info;
+  memset(&system_info, 0, sizeof(system_info));
+  GetSystemInfo(&system_info);
+  _memory_map_granularity = system_info.dwAllocationGranularity;
+#else
+  _memory_map_granularity = (size_t)sysconf(_SC_PAGESIZE);
+#endif
+
+#if RPMALLOC_CONFIGURABLE
+  _memory_page_size = _memory_config.page_size;
+#else
+  _memory_page_size = 0;
+#endif
+  _memory_huge_pages = 0;
+  if (!_memory_page_size) {
+#if PLATFORM_WINDOWS
+    _memory_page_size = system_info.dwPageSize;
+#else
+    _memory_page_size = _memory_map_granularity;
+    if (_memory_config.enable_huge_pages) {
+#if defined(__linux__)
+      size_t huge_page_size = 0;
+      FILE *meminfo = fopen("/proc/meminfo", "r");
+      if (meminfo) {
+        char line[128];
+        while (!huge_page_size && fgets(line, sizeof(line) - 1, meminfo)) {
+          line[sizeof(line) - 1] = 0;
+          if (strstr(line, "Hugepagesize:"))
+            huge_page_size = (size_t)strtol(line + 13, 0, 10) * 1024;
+        }
+        fclose(meminfo);
+      }
+      if (huge_page_size) {
+        _memory_huge_pages = 1;
+        _memory_page_size = huge_page_size;
+        _memory_map_granularity = huge_page_size;
+      }
+#elif defined(__FreeBSD__)
+      int rc;
+      size_t sz = sizeof(rc);
+
+      if (sysctlbyname("vm.pmap.pg_ps_enabled", &rc, &sz, NULL, 0) == 0 &&
+          rc == 1) {
+        static size_t defsize = 2 * 1024 * 1024;
+        int nsize = 0;
+        size_t sizes[4] = {0};
+        _memory_huge_pages = 1;
+        _memory_page_size = defsize;
+        if ((nsize = getpagesizes(sizes, 4)) >= 2) {
+          nsize--;
+          for (size_t csize = sizes[nsize]; nsize >= 0 && csize;
+               --nsize, csize = sizes[nsize]) {
+            //! Unlikely, but as a precaution..
+            rpmalloc_assert(!(csize & (csize - 1)) && !(csize % 1024),
+                            "Invalid page size");
+            if (defsize < csize) {
+              _memory_page_size = csize;
+              break;
+            }
+          }
+        }
+        _memory_map_granularity = _memory_page_size;
+      }
+#elif defined(__APPLE__) || defined(__NetBSD__)
+      _memory_huge_pages = 1;
+      _memory_page_size = 2 * 1024 * 1024;
+      _memory_map_granularity = _memory_page_size;
+#endif
+    }
+#endif
+  } else {
+    if (_memory_config.enable_huge_pages)
+      _memory_huge_pages = 1;
+  }
+
+#if PLATFORM_WINDOWS
+  if (_memory_config.enable_huge_pages) {
+    HANDLE token = 0;
+    size_t large_page_minimum = GetLargePageMinimum();
+    if (large_page_minimum)
+      OpenProcessToken(GetCurrentProcess(),
+                       TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token);
+    if (token) {
+      LUID luid;
+      if (LookupPrivilegeValue(0, SE_LOCK_MEMORY_NAME, &luid)) {
+        TOKEN_PRIVILEGES token_privileges;
+        memset(&token_privileges, 0, sizeof(token_privileges));
+        token_privileges.PrivilegeCount = 1;
+        token_privileges.Privileges[0].Luid = luid;
+        token_privileges.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
+        if (AdjustTokenPrivileges(token, FALSE, &token_privileges, 0, 0, 0)) {
+          if (GetLastError() == ERROR_SUCCESS)
+            _memory_huge_pages = 1;
+        }
+      }
+      CloseHandle(token);
+    }
+    if (_memory_huge_pages) {
+      if (large_page_minimum > _memory_page_size)
+        _memory_page_size = large_page_minimum;
+      if (large_page_minimum > _memory_map_granularity)
+        _memory_map_granularity = large_page_minimum;
+    }
+  }
+#endif
+
+  size_t min_span_size = 256;
+  size_t max_page_size;
+#if UINTPTR_MAX > 0xFFFFFFFF
+  max_page_size = 4096ULL * 1024ULL * 1024ULL;
+#else
+  max_page_size = 4 * 1024 * 1024;
+#endif
+  if (_memory_page_size < min_span_size)
+    _memory_page_size = min_span_size;
+  if (_memory_page_size > max_page_size)
+    _memory_page_size = max_page_size;
+  _memory_page_size_shift = 0;
+  size_t page_size_bit = _memory_page_size;
+  while (page_size_bit != 1) {
+    ++_memory_page_size_shift;
+    page_size_bit >>= 1;
+  }
+  _memory_page_size = ((size_t)1 << _memory_page_size_shift);
+
+#if RPMALLOC_CONFIGURABLE
+  if (!_memory_config.span_size) {
+    _memory_span_size = _memory_default_span_size;
+    _memory_span_size_shift = _memory_default_span_size_shift;
+    _memory_span_mask = _memory_default_span_mask;
+  } else {
+    size_t span_size = _memory_config.span_size;
+    if (span_size > (256 * 1024))
+      span_size = (256 * 1024);
+    _memory_span_size = 4096;
+    _memory_span_size_shift = 12;
+    while (_memory_span_size < span_size) {
+      _memory_span_size <<= 1;
+      ++_memory_span_size_shift;
+    }
+    _memory_span_mask = ~(uintptr_t)(_memory_span_size - 1);
+  }
+#endif
+
+  _memory_span_map_count =
+      (_memory_config.span_map_count ? _memory_config.span_map_count
+                                     : DEFAULT_SPAN_MAP_COUNT);
+  if ((_memory_span_size * _memory_span_map_count) < _memory_page_size)
+    _memory_span_map_count = (_memory_page_size / _memory_span_size);
+  if ((_memory_page_size >= _memory_span_size) &&
+      ((_memory_span_map_count * _memory_span_size) % _memory_page_size))
+    _memory_span_map_count = (_memory_page_size / _memory_span_size);
+  _memory_heap_reserve_count = (_memory_span_map_count > DEFAULT_SPAN_MAP_COUNT)
+                                   ? DEFAULT_SPAN_MAP_COUNT
+                                   : _memory_span_map_count;
+
+  _memory_config.page_size = _memory_page_size;
+  _memory_config.span_size = _memory_span_size;
+  _memory_config.span_map_count = _memory_span_map_count;
+  _memory_config.enable_huge_pages = _memory_huge_pages;
+
+#if ((defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD) ||          \
+    defined(__TINYC__)
+  if (pthread_key_create(&_memory_thread_heap, _rpmalloc_heap_release_raw_fc))
+    return -1;
+#endif
+#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
+  fls_key = FlsAlloc(&_rpmalloc_thread_destructor);
+#endif
+
+  // Setup all small and medium size classes
+  size_t iclass = 0;
+  _memory_size_class[iclass].block_size = SMALL_GRANULARITY;
+  _rpmalloc_adjust_size_class(iclass);
+  for (iclass = 1; iclass < SMALL_CLASS_COUNT; ++iclass) {
+    size_t size = iclass * SMALL_GRANULARITY;
+    _memory_size_class[iclass].block_size = (uint32_t)size;
+    _rpmalloc_adjust_size_class(iclass);
+  }
+  // At least two blocks per span, then fall back to large allocations
+  _memory_medium_size_limit = (_memory_span_size - SPAN_HEADER_SIZE) >> 1;
+  if (_memory_medium_size_limit > MEDIUM_SIZE_LIMIT)
+    _memory_medium_size_limit = MEDIUM_SIZE_LIMIT;
+  for (iclass = 0; iclass < MEDIUM_CLASS_COUNT; ++iclass) {
+    size_t size = SMALL_SIZE_LIMIT + ((iclass + 1) * MEDIUM_GRANULARITY);
+    if (size > _memory_medium_size_limit) {
+      _memory_medium_size_limit =
+          SMALL_SIZE_LIMIT + (iclass * MEDIUM_GRANULARITY);
+      break;
+    }
+    _memory_size_class[SMALL_CLASS_COUNT + iclass].block_size = (uint32_t)size;
+    _rpmalloc_adjust_size_class(SMALL_CLASS_COUNT + iclass);
+  }
+
+  _memory_orphan_heaps = 0;
+#if RPMALLOC_FIRST_CLASS_HEAPS
+  _memory_first_class_orphan_heaps = 0;
+#endif
+#if ENABLE_STATISTICS
+  atomic_store32(&_memory_active_heaps, 0);
+  atomic_store32(&_mapped_pages, 0);
+  _mapped_pages_peak = 0;
+  atomic_store32(&_master_spans, 0);
+  atomic_store32(&_mapped_total, 0);
+  atomic_store32(&_unmapped_total, 0);
+  atomic_store32(&_mapped_pages_os, 0);
+  atomic_store32(&_huge_pages_current, 0);
+  _huge_pages_peak = 0;
+#endif
+  memset(_memory_heaps, 0, sizeof(_memory_heaps));
+  atomic_store32_release(&_memory_global_lock, 0);
+
+  rpmalloc_linker_reference();
+
+  // Initialize this thread
+  rpmalloc_thread_initialize();
+  return 0;
+}
+
+//! Finalize the allocator
+void rpmalloc_finalize(void) {
+  rpmalloc_thread_finalize(1);
+  // rpmalloc_dump_statistics(stdout);
+
+  if (_memory_global_reserve) {
+    atomic_add32(&_memory_global_reserve_master->remaining_spans,
+                 -(int32_t)_memory_global_reserve_count);
+    _memory_global_reserve_master = 0;
+    _memory_global_reserve_count = 0;
+    _memory_global_reserve = 0;
+  }
+  atomic_store32_release(&_memory_global_lock, 0);
+
+  // Free all thread caches and fully free spans
+  for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) {
+    heap_t *heap = _memory_heaps[list_idx];
+    while (heap) {
+      heap_t *next_heap = heap->next_heap;
+      heap->finalize = 1;
+      _rpmalloc_heap_global_finalize(heap);
+      heap = next_heap;
+    }
+  }
+
+#if ENABLE_GLOBAL_CACHE
+  // Free global caches
+  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass)
+    _rpmalloc_global_cache_finalize(&_memory_span_cache[iclass]);
+#endif
+
+#if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
+  pthread_key_delete(_memory_thread_heap);
+#endif
+#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
+  FlsFree(fls_key);
+  fls_key = 0;
+#endif
+#if ENABLE_STATISTICS
+  // If you hit these asserts you probably have memory leaks (perhaps global
+  // scope data doing dynamic allocations) or double frees in your code
+  rpmalloc_assert(atomic_load32(&_mapped_pages) == 0, "Memory leak detected");
+  rpmalloc_assert(atomic_load32(&_mapped_pages_os) == 0,
+                  "Memory leak detected");
+#endif
+
+  _rpmalloc_initialized = 0;
+}
+
+//! Initialize thread, assign heap
+extern inline void rpmalloc_thread_initialize(void) {
+  if (!get_thread_heap_raw()) {
+    heap_t *heap = _rpmalloc_heap_allocate(0);
+    if (heap) {
+      _rpmalloc_stat_inc(&_memory_active_heaps);
+      set_thread_heap(heap);
+#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
+      FlsSetValue(fls_key, heap);
+#endif
+    }
+  }
+}
+
+//! Finalize thread, orphan heap
+void rpmalloc_thread_finalize(int release_caches) {
+  heap_t *heap = get_thread_heap_raw();
+  if (heap)
+    _rpmalloc_heap_release_raw(heap, release_caches);
+  set_thread_heap(0);
+#if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
+  FlsSetValue(fls_key, 0);
+#endif
+}
+
+int rpmalloc_is_thread_initialized(void) {
+  return (get_thread_heap_raw() != 0) ? 1 : 0;
+}
+
+const rpmalloc_config_t *rpmalloc_config(void) { return &_memory_config; }
+
+// Extern interface
+
+extern inline RPMALLOC_ALLOCATOR void *rpmalloc(size_t size) {
+#if ENABLE_VALIDATE_ARGS
+  if (size >= MAX_ALLOC_SIZE) {
+    errno = EINVAL;
+    return 0;
+  }
+#endif
+  heap_t *heap = get_thread_heap();
+  return _rpmalloc_allocate(heap, size);
+}
+
+extern inline void rpfree(void *ptr) { _rpmalloc_deallocate(ptr); }
+
+extern inline RPMALLOC_ALLOCATOR void *rpcalloc(size_t num, size_t size) {
+  size_t total;
+#if ENABLE_VALIDATE_ARGS
+#if PLATFORM_WINDOWS
+  int err = SizeTMult(num, size, &total);
+  if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
+    errno = EINVAL;
+    return 0;
+  }
+#else
+  int err = __builtin_umull_overflow(num, size, &total);
+  if (err || (total >= MAX_ALLOC_SIZE)) {
+    errno = EINVAL;
+    return 0;
+  }
+#endif
+#else
+  total = num * size;
+#endif
+  heap_t *heap = get_thread_heap();
+  void *block = _rpmalloc_allocate(heap, total);
+  if (block)
+    memset(block, 0, total);
+  return block;
+}
+
+extern inline RPMALLOC_ALLOCATOR void *rprealloc(void *ptr, size_t size) {
+#if ENABLE_VALIDATE_ARGS
+  if (size >= MAX_ALLOC_SIZE) {
+    errno = EINVAL;
+    return ptr;
+  }
+#endif
+  heap_t *heap = get_thread_heap();
+  return _rpmalloc_reallocate(heap, ptr, size, 0, 0);
+}
+
+extern RPMALLOC_ALLOCATOR void *rpaligned_realloc(void *ptr, size_t alignment,
+                                                  size_t size, size_t oldsize,
+                                                  unsigned int flags) {
+#if ENABLE_VALIDATE_ARGS
+  if ((size + alignment < size) || (alignment > _memory_page_size)) {
+    errno = EINVAL;
+    return 0;
+  }
+#endif
+  heap_t *heap = get_thread_heap();
+  return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, oldsize,
+                                      flags);
+}
+
+extern RPMALLOC_ALLOCATOR void *rpaligned_alloc(size_t alignment, size_t size) {
+  heap_t *heap = get_thread_heap();
+  return _rpmalloc_aligned_allocate(heap, alignment, size);
+}
+
+extern inline RPMALLOC_ALLOCATOR void *
+rpaligned_calloc(size_t alignment, size_t num, size_t size) {
+  size_t total;
+#if ENABLE_VALIDATE_ARGS
+#if PLATFORM_WINDOWS
+  int err = SizeTMult(num, size, &total);
+  if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
+    errno = EINVAL;
+    return 0;
+  }
+#else
+  int err = __builtin_umull_overflow(num, size, &total);
+  if (err || (total >= MAX_ALLOC_SIZE)) {
+    errno = EINVAL;
+    return 0;
+  }
+#endif
+#else
+  total = num * size;
+#endif
+  void *block = rpaligned_alloc(alignment, total);
+  if (block)
+    memset(block, 0, total);
+  return block;
+}
+
+extern inline RPMALLOC_ALLOCATOR void *rpmemalign(size_t alignment,
+                                                  size_t size) {
+  return rpaligned_alloc(alignment, size);
+}
+
+extern inline int rpposix_memalign(void **memptr, size_t alignment,
+                                   size_t size) {
+  if (memptr)
+    *memptr = rpaligned_alloc(alignment, size);
+  else
+    return EINVAL;
+  return *memptr ? 0 : ENOMEM;
+}
+
+extern inline size_t rpmalloc_usable_size(void *ptr) {
+  return (ptr ? _rpmalloc_usable_size(ptr) : 0);
+}
+
+extern inline void rpmalloc_thread_collect(void) {}
+
+void rpmalloc_thread_statistics(rpmalloc_thread_statistics_t *stats) {
+  memset(stats, 0, sizeof(rpmalloc_thread_statistics_t));
+  heap_t *heap = get_thread_heap_raw();
+  if (!heap)
+    return;
+
+  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
+    size_class_t *size_class = _memory_size_class + iclass;
+    span_t *span = heap->size_class[iclass].partial_span;
+    while (span) {
+      size_t free_count = span->list_size;
+      size_t block_count = size_class->block_count;
+      if (span->free_list_limit < block_count)
+        block_count = span->free_list_limit;
+      free_count += (block_count - span->used_count);
+      stats->sizecache += free_count * size_class->block_size;
+      span = span->next;
+    }
+  }
+
+#if ENABLE_THREAD_CACHE
+  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
+    span_cache_t *span_cache;
+    if (!iclass)
+      span_cache = &heap->span_cache;
+    else
+      span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
+    stats->spancache += span_cache->count * (iclass + 1) * _memory_span_size;
+  }
+#endif
+
+  span_t *deferred = (span_t *)atomic_load_ptr(&heap->span_free_deferred);
+  while (deferred) {
+    if (deferred->size_class != SIZE_CLASS_HUGE)
+      stats->spancache += (size_t)deferred->span_count * _memory_span_size;
+    deferred = (span_t *)deferred->free_list;
+  }
+
+#if ENABLE_STATISTICS
+  stats->thread_to_global = (size_t)atomic_load64(&heap->thread_to_global);
+  stats->global_to_thread = (size_t)atomic_load64(&heap->global_to_thread);
+
+  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
+    stats->span_use[iclass].current =
+        (size_t)atomic_load32(&heap->span_use[iclass].current);
+    stats->span_use[iclass].peak =
+        (size_t)atomic_load32(&heap->span_use[iclass].high);
+    stats->span_use[iclass].to_global =
+        (size_t)atomic_load32(&heap->span_use[iclass].spans_to_global);
+    stats->span_use[iclass].from_global =
+        (size_t)atomic_load32(&heap->span_use[iclass].spans_from_global);
+    stats->span_use[iclass].to_cache =
+        (size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache);
+    stats->span_use[iclass].from_cache =
+        (size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache);
+    stats->span_use[iclass].to_reserved =
+        (size_t)atomic_load32(&heap->span_use[iclass].spans_to_reserved);
+    stats->span_use[iclass].from_reserved =
+        (size_t)atomic_load32(&heap->span_use[iclass].spans_from_reserved);
+    stats->span_use[iclass].map_calls =
+        (size_t)atomic_load32(&heap->span_use[iclass].spans_map_calls);
+  }
+  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
+    stats->size_use[iclass].alloc_current =
+        (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_current);
+    stats->size_use[iclass].alloc_peak =
+        (size_t)heap->size_class_use[iclass].alloc_peak;
+    stats->size_use[iclass].alloc_total =
+        (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_total);
+    stats->size_use[iclass].free_total =
+        (size_t)atomic_load32(&heap->size_class_use[iclass].free_total);
+    stats->size_use[iclass].spans_to_cache =
+        (size_t)atomic_load32(&heap->size_class_use[iclass].spans_to_cache);
+    stats->size_use[iclass].spans_from_cache =
+        (size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_cache);
+    stats->size_use[iclass].spans_from_reserved = (size_t)atomic_load32(
+        &heap->size_class_use[iclass].spans_from_reserved);
+    stats->size_use[iclass].map_calls =
+        (size_t)atomic_load32(&heap->size_class_use[iclass].spans_map_calls);
+  }
+#endif
+}
+
+void rpmalloc_global_statistics(rpmalloc_global_statistics_t *stats) {
+  memset(stats, 0, sizeof(rpmalloc_global_statistics_t));
+#if ENABLE_STATISTICS
+  stats->mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size;
+  stats->mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size;
+  stats->mapped_total =
+      (size_t)atomic_load32(&_mapped_total) * _memory_page_size;
+  stats->unmapped_total =
+      (size_t)atomic_load32(&_unmapped_total) * _memory_page_size;
+  stats->huge_alloc =
+      (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size;
+  stats->huge_alloc_peak = (size_t)_huge_pages_peak * _memory_page_size;
+#endif
+#if ENABLE_GLOBAL_CACHE
+  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
+    global_cache_t *cache = &_memory_span_cache[iclass];
+    while (!atomic_cas32_acquire(&cache->lock, 1, 0))
+      _rpmalloc_spin();
+    uint32_t count = cache->count;
+#if ENABLE_UNLIMITED_CACHE
+    span_t *current_span = cache->overflow;
+    while (current_span) {
+      ++count;
+      current_span = current_span->next;
+    }
+#endif
+    atomic_store32_release(&cache->lock, 0);
+    stats->cached += count * (iclass + 1) * _memory_span_size;
+  }
+#endif
+}
+
+#if ENABLE_STATISTICS
+
+static void _memory_heap_dump_statistics(heap_t *heap, void *file) {
+  fprintf(file, "Heap %d stats:\n", heap->id);
+  fprintf(file, "Class   CurAlloc  PeakAlloc   TotAlloc    TotFree  BlkSize "
+                "BlkCount SpansCur SpansPeak  PeakAllocMiB  ToCacheMiB "
+                "FromCacheMiB FromReserveMiB MmapCalls\n");
+  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
+    if (!atomic_load32(&heap->size_class_use[iclass].alloc_total))
+      continue;
+    fprintf(
+        file,
+        "%3u:  %10u %10u %10u %10u %8u %8u %8d %9d %13zu %11zu %12zu %14zu "
+        "%9u\n",
+        (uint32_t)iclass,
+        atomic_load32(&heap->size_class_use[iclass].alloc_current),
+        heap->size_class_use[iclass].alloc_peak,
+        atomic_load32(&heap->size_class_use[iclass].alloc_total),
+        atomic_load32(&heap->size_class_use[iclass].free_total),
+        _memory_size_class[iclass].block_size,
+        _memory_size_class[iclass].block_count,
+        atomic_load32(&heap->size_class_use[iclass].spans_current),
+        heap->size_class_use[iclass].spans_peak,
+        ((size_t)heap->size_class_use[iclass].alloc_peak *
+         (size_t)_memory_size_class[iclass].block_size) /
+            (size_t)(1024 * 1024),
+        ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_to_cache) *
+         _memory_span_size) /
+            (size_t)(1024 * 1024),
+        ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_cache) *
+         _memory_span_size) /
+            (size_t)(1024 * 1024),
+        ((size_t)atomic_load32(
+             &heap->size_class_use[iclass].spans_from_reserved) *
+         _memory_span_size) /
+            (size_t)(1024 * 1024),
+        atomic_load32(&heap->size_class_use[iclass].spans_map_calls));
+  }
+  fprintf(file, "Spans  Current     Peak Deferred  PeakMiB  Cached  ToCacheMiB "
+                "FromCacheMiB ToReserveMiB FromReserveMiB ToGlobalMiB "
+                "FromGlobalMiB  MmapCalls\n");
+  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
+    if (!atomic_load32(&heap->span_use[iclass].high) &&
+        !atomic_load32(&heap->span_use[iclass].spans_map_calls))
+      continue;
+    fprintf(
+        file,
+        "%4u: %8d %8u %8u %8zu %7u %11zu %12zu %12zu %14zu %11zu %13zu %10u\n",
+        (uint32_t)(iclass + 1), atomic_load32(&heap->span_use[iclass].current),
+        atomic_load32(&heap->span_use[iclass].high),
+        atomic_load32(&heap->span_use[iclass].spans_deferred),
+        ((size_t)atomic_load32(&heap->span_use[iclass].high) *
+         (size_t)_memory_span_size * (iclass + 1)) /
+            (size_t)(1024 * 1024),
+#if ENABLE_THREAD_CACHE
+        (unsigned int)(!iclass ? heap->span_cache.count
+                               : heap->span_large_cache[iclass - 1].count),
+        ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache) *
+         (iclass + 1) * _memory_span_size) /
+            (size_t)(1024 * 1024),
+        ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache) *
+         (iclass + 1) * _memory_span_size) /
+            (size_t)(1024 * 1024),
+#else
+        0, (size_t)0, (size_t)0,
+#endif
+        ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_reserved) *
+         (iclass + 1) * _memory_span_size) /
+            (size_t)(1024 * 1024),
+        ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_reserved) *
+         (iclass + 1) * _memory_span_size) /
+            (size_t)(1024 * 1024),
+        ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_global) *
+         (size_t)_memory_span_size * (iclass + 1)) /
+            (size_t)(1024 * 1024),
+        ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_global) *
+         (size_t)_memory_span_size * (iclass + 1)) /
+            (size_t)(1024 * 1024),
+        atomic_load32(&heap->span_use[iclass].spans_map_calls));
+  }
+  fprintf(file, "Full spans: %zu\n", heap->full_span_count);
+  fprintf(file, "ThreadToGlobalMiB GlobalToThreadMiB\n");
+  fprintf(
+      file, "%17zu %17zu\n",
+      (size_t)atomic_load64(&heap->thread_to_global) / (size_t)(1024 * 1024),
+      (size_t)atomic_load64(&heap->global_to_thread) / (size_t)(1024 * 1024));
+}
+
+#endif
+
+void rpmalloc_dump_statistics(void *file) {
+#if ENABLE_STATISTICS
+  for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) {
+    heap_t *heap = _memory_heaps[list_idx];
+    while (heap) {
+      int need_dump = 0;
+      for (size_t iclass = 0; !need_dump && (iclass < SIZE_CLASS_COUNT);
+           ++iclass) {
+        if (!atomic_load32(&heap->size_class_use[iclass].alloc_total)) {
+          rpmalloc_assert(
+              !atomic_load32(&heap->size_class_use[iclass].free_total),
+              "Heap statistics counter mismatch");
+          rpmalloc_assert(
+              !atomic_load32(&heap->size_class_use[iclass].spans_map_calls),
+              "Heap statistics counter mismatch");
+          continue;
+        }
+        need_dump = 1;
+      }
+      for (size_t iclass = 0; !need_dump && (iclass < LARGE_CLASS_COUNT);
+           ++iclass) {
+        if (!atomic_load32(&heap->span_use[iclass].high) &&
+            !atomic_load32(&heap->span_use[iclass].spans_map_calls))
+          continue;
+        need_dump = 1;
+      }
+      if (need_dump)
+        _memory_heap_dump_statistics(heap, file);
+      heap = heap->next_heap;
+    }
+  }
+  fprintf(file, "Global stats:\n");
+  size_t huge_current =
+      (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size;
+  size_t huge_peak = (size_t)_huge_pages_peak * _memory_page_size;
+  fprintf(file, "HugeCurrentMiB HugePeakMiB\n");
+  fprintf(file, "%14zu %11zu\n", huge_current / (size_t)(1024 * 1024),
+          huge_peak / (size_t)(1024 * 1024));
+
+#if ENABLE_GLOBAL_CACHE
+  fprintf(file, "GlobalCacheMiB\n");
+  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
+    global_cache_t *cache = _memory_span_cache + iclass;
+    size_t global_cache = (size_t)cache->count * iclass * _memory_span_size;
+
+    size_t global_overflow_cache = 0;
+    span_t *span = cache->overflow;
+    while (span) {
+      global_overflow_cache += iclass * _memory_span_size;
+      span = span->next;
+    }
+    if (global_cache || global_overflow_cache || cache->insert_count ||
+        cache->extract_count)
+      fprintf(file,
+              "%4zu: %8zuMiB (%8zuMiB overflow) %14zu insert %14zu extract\n",
+              iclass + 1, global_cache / (size_t)(1024 * 1024),
+              global_overflow_cache / (size_t)(1024 * 1024),
+              cache->insert_count, cache->extract_count);
+  }
+#endif
+
+  size_t mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size;
+  size_t mapped_os =
+      (size_t)atomic_load32(&_mapped_pages_os) * _memory_page_size;
+  size_t mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size;
+  size_t mapped_total =
+      (size_t)atomic_load32(&_mapped_total) * _memory_page_size;
+  size_t unmapped_total =
+      (size_t)atomic_load32(&_unmapped_total) * _memory_page_size;
+  fprintf(
+      file,
+      "MappedMiB MappedOSMiB MappedPeakMiB MappedTotalMiB UnmappedTotalMiB\n");
+  fprintf(file, "%9zu %11zu %13zu %14zu %16zu\n",
+          mapped / (size_t)(1024 * 1024), mapped_os / (size_t)(1024 * 1024),
+          mapped_peak / (size_t)(1024 * 1024),
+          mapped_total / (size_t)(1024 * 1024),
+          unmapped_total / (size_t)(1024 * 1024));
+
+  fprintf(file, "\n");
+#if 0
+	int64_t allocated = atomic_load64(&_allocation_counter);
+	int64_t deallocated = atomic_load64(&_deallocation_counter);
+	fprintf(file, "Allocation count: %lli\n", allocated);
+	fprintf(file, "Deallocation count: %lli\n", deallocated);
+	fprintf(file, "Current allocations: %lli\n", (allocated - deallocated));
+	fprintf(file, "Master spans: %d\n", atomic_load32(&_master_spans));
+	fprintf(file, "Dangling master spans: %d\n", atomic_load32(&_unmapped_master_spans));
+#endif
+#endif
+  (void)sizeof(file);
+}
+
+#if RPMALLOC_FIRST_CLASS_HEAPS
+
+extern inline rpmalloc_heap_t *rpmalloc_heap_acquire(void) {
+  // Must be a pristine heap from newly mapped memory pages, or else memory
+  // blocks could already be allocated from the heap which would (wrongly) be
+  // released when heap is cleared with rpmalloc_heap_free_all(). Also heaps
+  // guaranteed to be pristine from the dedicated orphan list can be used.
+  heap_t *heap = _rpmalloc_heap_allocate(1);
+  rpmalloc_assume(heap != NULL);
+  heap->owner_thread = 0;
+  _rpmalloc_stat_inc(&_memory_active_heaps);
+  return heap;
+}
+
+extern inline void rpmalloc_heap_release(rpmalloc_heap_t *heap) {
+  if (heap)
+    _rpmalloc_heap_release(heap, 1, 1);
+}
+
+extern inline RPMALLOC_ALLOCATOR void *
+rpmalloc_heap_alloc(rpmalloc_heap_t *heap, size_t size) {
+#if ENABLE_VALIDATE_ARGS
+  if (size >= MAX_ALLOC_SIZE) {
+    errno = EINVAL;
+    return 0;
+  }
+#endif
+  return _rpmalloc_allocate(heap, size);
+}
+
+extern inline RPMALLOC_ALLOCATOR void *
+rpmalloc_heap_aligned_alloc(rpmalloc_heap_t *heap, size_t alignment,
+                            size_t size) {
+#if ENABLE_VALIDATE_ARGS
+  if (size >= MAX_ALLOC_SIZE) {
+    errno = EINVAL;
+    return 0;
+  }
+#endif
+  return _rpmalloc_aligned_allocate(heap, alignment, size);
+}
+
+extern inline RPMALLOC_ALLOCATOR void *
+rpmalloc_heap_calloc(rpmalloc_heap_t *heap, size_t num, size_t size) {
+  return rpmalloc_heap_aligned_calloc(heap, 0, num, size);
+}
+
+extern inline RPMALLOC_ALLOCATOR void *
+rpmalloc_heap_aligned_calloc(rpmalloc_heap_t *heap, size_t alignment,
+                             size_t num, size_t size) {
+  size_t total;
+#if ENABLE_VALIDATE_ARGS
+#if PLATFORM_WINDOWS
+  int err = SizeTMult(num, size, &total);
+  if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
+    errno = EINVAL;
+    return 0;
+  }
+#else
+  int err = __builtin_umull_overflow(num, size, &total);
+  if (err || (total >= MAX_ALLOC_SIZE)) {
+    errno = EINVAL;
+    return 0;
+  }
+#endif
+#else
+  total = num * size;
+#endif
+  void *block = _rpmalloc_aligned_allocate(heap, alignment, total);
+  if (block)
+    memset(block, 0, total);
+  return block;
+}
+
+extern inline RPMALLOC_ALLOCATOR void *
+rpmalloc_heap_realloc(rpmalloc_heap_t *heap, void *ptr, size_t size,
+                      unsigned int flags) {
+#if ENABLE_VALIDATE_ARGS
+  if (size >= MAX_ALLOC_SIZE) {
+    errno = EINVAL;
+    return ptr;
+  }
+#endif
+  return _rpmalloc_reallocate(heap, ptr, size, 0, flags);
+}
+
+extern inline RPMALLOC_ALLOCATOR void *
+rpmalloc_heap_aligned_realloc(rpmalloc_heap_t *heap, void *ptr,
+                              size_t alignment, size_t size,
+                              unsigned int flags) {
+#if ENABLE_VALIDATE_ARGS
+  if ((size + alignment < size) || (alignment > _memory_page_size)) {
+    errno = EINVAL;
+    return 0;
+  }
+#endif
+  return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, 0, flags);
+}
+
+extern inline void rpmalloc_heap_free(rpmalloc_heap_t *heap, void *ptr) {
+  (void)sizeof(heap);
+  _rpmalloc_deallocate(ptr);
+}
+
+extern inline void rpmalloc_heap_free_all(rpmalloc_heap_t *heap) {
+  span_t *span;
+  span_t *next_span;
+
+  _rpmalloc_heap_cache_adopt_deferred(heap, 0);
+
+  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
+    span = heap->size_class[iclass].partial_span;
+    while (span) {
+      next_span = span->next;
+      _rpmalloc_heap_cache_insert(heap, span);
+      span = next_span;
+    }
+    heap->size_class[iclass].partial_span = 0;
+    span = heap->full_span[iclass];
+    while (span) {
+      next_span = span->next;
+      _rpmalloc_heap_cache_insert(heap, span);
+      span = next_span;
+    }
+
+    span = heap->size_class[iclass].cache;
+    if (span)
+      _rpmalloc_heap_cache_insert(heap, span);
+    heap->size_class[iclass].cache = 0;
+  }
+  memset(heap->size_class, 0, sizeof(heap->size_class));
+  memset(heap->full_span, 0, sizeof(heap->full_span));
+
+  span = heap->large_huge_span;
+  while (span) {
+    next_span = span->next;
+    if (UNEXPECTED(span->size_class == SIZE_CLASS_HUGE))
+      _rpmalloc_deallocate_huge(span);
+    else
+      _rpmalloc_heap_cache_insert(heap, span);
+    span = next_span;
+  }
+  heap->large_huge_span = 0;
+  heap->full_span_count = 0;
+
+#if ENABLE_THREAD_CACHE
+  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
+    span_cache_t *span_cache;
+    if (!iclass)
+      span_cache = &heap->span_cache;
+    else
+      span_cache = (span_cache_t *)(heap->span_large_cache + (iclass - 1));
+    if (!span_cache->count)
+      continue;
+#if ENABLE_GLOBAL_CACHE
+    _rpmalloc_stat_add64(&heap->thread_to_global,
+                         span_cache->count * (iclass + 1) * _memory_span_size);
+    _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global,
+                       span_cache->count);
+    _rpmalloc_global_cache_insert_spans(span_cache->span, iclass + 1,
+                                        span_cache->count);
+#else
+    for (size_t ispan = 0; ispan < span_cache->count; ++ispan)
+      _rpmalloc_span_unmap(span_cache->span[ispan]);
+#endif
+    span_cache->count = 0;
+  }
+#endif
+
+#if ENABLE_STATISTICS
+  for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
+    atomic_store32(&heap->size_class_use[iclass].alloc_current, 0);
+    atomic_store32(&heap->size_class_use[iclass].spans_current, 0);
+  }
+  for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
+    atomic_store32(&heap->span_use[iclass].current, 0);
+  }
+#endif
+}
+
+extern inline void rpmalloc_heap_thread_set_current(rpmalloc_heap_t *heap) {
+  heap_t *prev_heap = get_thread_heap_raw();
+  if (prev_heap != heap) {
+    set_thread_heap(heap);
+    if (prev_heap)
+      rpmalloc_heap_release(prev_heap);
+  }
+}
+
+extern inline rpmalloc_heap_t *rpmalloc_get_heap_for_ptr(void *ptr) {
+  // Grab the span, and then the heap from the span
+  span_t *span = (span_t *)((uintptr_t)ptr & _memory_span_mask);
+  if (span) {
+    return span->heap;
+  }
+  return 0;
+}
+
+#endif
+
+#if ENABLE_PRELOAD || ENABLE_OVERRIDE
+
+#include "malloc.c"
+
+#endif
+
+void rpmalloc_linker_reference(void) { (void)sizeof(_rpmalloc_initialized); }
diff --git a/llvm/lib/Support/rpmalloc/rpmalloc.h b/llvm/lib/Support/rpmalloc/rpmalloc.h
index 5b7fe1ff4286ba724939ed9b76e27d95f124e6b9..3911c53b779b36c3e52fbbc177c2306b56f46ede 100644
--- a/llvm/lib/Support/rpmalloc/rpmalloc.h
+++ b/llvm/lib/Support/rpmalloc/rpmalloc.h
@@ -1,428 +1,428 @@
-//===---------------------- rpmalloc.h ------------------*- C -*-=============//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This library provides a cross-platform lock free thread caching malloc
-// implementation in C11.
-//
-//===----------------------------------------------------------------------===//
-
-#pragma once
-
-#include <stddef.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if defined(__clang__) || defined(__GNUC__)
-#define RPMALLOC_EXPORT __attribute__((visibility("default")))
-#define RPMALLOC_ALLOCATOR
-#if (defined(__clang_major__) && (__clang_major__ < 4)) ||                     \
-    (defined(__GNUC__) && defined(ENABLE_PRELOAD) && ENABLE_PRELOAD)
-#define RPMALLOC_ATTRIB_MALLOC
-#define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
-#define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size)
-#else
-#define RPMALLOC_ATTRIB_MALLOC __attribute__((__malloc__))
-#define RPMALLOC_ATTRIB_ALLOC_SIZE(size) __attribute__((alloc_size(size)))
-#define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size)                               \
-  __attribute__((alloc_size(count, size)))
-#endif
-#define RPMALLOC_CDECL
-#elif defined(_MSC_VER)
-#define RPMALLOC_EXPORT
-#define RPMALLOC_ALLOCATOR __declspec(allocator) __declspec(restrict)
-#define RPMALLOC_ATTRIB_MALLOC
-#define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
-#define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size)
-#define RPMALLOC_CDECL __cdecl
-#else
-#define RPMALLOC_EXPORT
-#define RPMALLOC_ALLOCATOR
-#define RPMALLOC_ATTRIB_MALLOC
-#define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
-#define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size)
-#define RPMALLOC_CDECL
-#endif
-
-//! Define RPMALLOC_CONFIGURABLE to enable configuring sizes. Will introduce
-//  a very small overhead due to some size calculations not being compile time
-//  constants
-#ifndef RPMALLOC_CONFIGURABLE
-#define RPMALLOC_CONFIGURABLE 0
-#endif
-
-//! Define RPMALLOC_FIRST_CLASS_HEAPS to enable heap based API (rpmalloc_heap_*
-//! functions).
-//  Will introduce a very small overhead to track fully allocated spans in heaps
-#ifndef RPMALLOC_FIRST_CLASS_HEAPS
-#define RPMALLOC_FIRST_CLASS_HEAPS 0
-#endif
-
-//! Flag to rpaligned_realloc to not preserve content in reallocation
-#define RPMALLOC_NO_PRESERVE 1
-//! Flag to rpaligned_realloc to fail and return null pointer if grow cannot be
-//! done in-place,
-//  in which case the original pointer is still valid (just like a call to
-//  realloc which failes to allocate a new block).
-#define RPMALLOC_GROW_OR_FAIL 2
-
-typedef struct rpmalloc_global_statistics_t {
-  //! Current amount of virtual memory mapped, all of which might not have been
-  //! committed (only if ENABLE_STATISTICS=1)
-  size_t mapped;
-  //! Peak amount of virtual memory mapped, all of which might not have been
-  //! committed (only if ENABLE_STATISTICS=1)
-  size_t mapped_peak;
-  //! Current amount of memory in global caches for small and medium sizes
-  //! (<32KiB)
-  size_t cached;
-  //! Current amount of memory allocated in huge allocations, i.e larger than
-  //! LARGE_SIZE_LIMIT which is 2MiB by default (only if ENABLE_STATISTICS=1)
-  size_t huge_alloc;
-  //! Peak amount of memory allocated in huge allocations, i.e larger than
-  //! LARGE_SIZE_LIMIT which is 2MiB by default (only if ENABLE_STATISTICS=1)
-  size_t huge_alloc_peak;
-  //! Total amount of memory mapped since initialization (only if
-  //! ENABLE_STATISTICS=1)
-  size_t mapped_total;
-  //! Total amount of memory unmapped since initialization  (only if
-  //! ENABLE_STATISTICS=1)
-  size_t unmapped_total;
-} rpmalloc_global_statistics_t;
-
-typedef struct rpmalloc_thread_statistics_t {
-  //! Current number of bytes available in thread size class caches for small
-  //! and medium sizes (<32KiB)
-  size_t sizecache;
-  //! Current number of bytes available in thread span caches for small and
-  //! medium sizes (<32KiB)
-  size_t spancache;
-  //! Total number of bytes transitioned from thread cache to global cache (only
-  //! if ENABLE_STATISTICS=1)
-  size_t thread_to_global;
-  //! Total number of bytes transitioned from global cache to thread cache (only
-  //! if ENABLE_STATISTICS=1)
-  size_t global_to_thread;
-  //! Per span count statistics (only if ENABLE_STATISTICS=1)
-  struct {
-    //! Currently used number of spans
-    size_t current;
-    //! High water mark of spans used
-    size_t peak;
-    //! Number of spans transitioned to global cache
-    size_t to_global;
-    //! Number of spans transitioned from global cache
-    size_t from_global;
-    //! Number of spans transitioned to thread cache
-    size_t to_cache;
-    //! Number of spans transitioned from thread cache
-    size_t from_cache;
-    //! Number of spans transitioned to reserved state
-    size_t to_reserved;
-    //! Number of spans transitioned from reserved state
-    size_t from_reserved;
-    //! Number of raw memory map calls (not hitting the reserve spans but
-    //! resulting in actual OS mmap calls)
-    size_t map_calls;
-  } span_use[64];
-  //! Per size class statistics (only if ENABLE_STATISTICS=1)
-  struct {
-    //! Current number of allocations
-    size_t alloc_current;
-    //! Peak number of allocations
-    size_t alloc_peak;
-    //! Total number of allocations
-    size_t alloc_total;
-    //! Total number of frees
-    size_t free_total;
-    //! Number of spans transitioned to cache
-    size_t spans_to_cache;
-    //! Number of spans transitioned from cache
-    size_t spans_from_cache;
-    //! Number of spans transitioned from reserved state
-    size_t spans_from_reserved;
-    //! Number of raw memory map calls (not hitting the reserve spans but
-    //! resulting in actual OS mmap calls)
-    size_t map_calls;
-  } size_use[128];
-} rpmalloc_thread_statistics_t;
-
-typedef struct rpmalloc_config_t {
-  //! Map memory pages for the given number of bytes. The returned address MUST
-  //! be
-  //  aligned to the rpmalloc span size, which will always be a power of two.
-  //  Optionally the function can store an alignment offset in the offset
-  //  variable in case it performs alignment and the returned pointer is offset
-  //  from the actual start of the memory region due to this alignment. The
-  //  alignment offset will be passed to the memory unmap function. The
-  //  alignment offset MUST NOT be larger than 65535 (storable in an uint16_t),
-  //  if it is you must use natural alignment to shift it into 16 bits. If you
-  //  set a memory_map function, you must also set a memory_unmap function or
-  //  else the default implementation will be used for both. This function must
-  //  be thread safe, it can be called by multiple threads simultaneously.
-  void *(*memory_map)(size_t size, size_t *offset);
-  //! Unmap the memory pages starting at address and spanning the given number
-  //! of bytes.
-  //  If release is set to non-zero, the unmap is for an entire span range as
-  //  returned by a previous call to memory_map and that the entire range should
-  //  be released. The release argument holds the size of the entire span range.
-  //  If release is set to 0, the unmap is a partial decommit of a subset of the
-  //  mapped memory range. If you set a memory_unmap function, you must also set
-  //  a memory_map function or else the default implementation will be used for
-  //  both. This function must be thread safe, it can be called by multiple
-  //  threads simultaneously.
-  void (*memory_unmap)(void *address, size_t size, size_t offset,
-                       size_t release);
-  //! Called when an assert fails, if asserts are enabled. Will use the standard
-  //! assert()
-  //  if this is not set.
-  void (*error_callback)(const char *message);
-  //! Called when a call to map memory pages fails (out of memory). If this
-  //! callback is
-  //  not set or returns zero the library will return a null pointer in the
-  //  allocation call. If this callback returns non-zero the map call will be
-  //  retried. The argument passed is the number of bytes that was requested in
-  //  the map call. Only used if the default system memory map function is used
-  //  (memory_map callback is not set).
-  int (*map_fail_callback)(size_t size);
-  //! Size of memory pages. The page size MUST be a power of two. All memory
-  //! mapping
-  //  requests to memory_map will be made with size set to a multiple of the
-  //  page size. Used if RPMALLOC_CONFIGURABLE is defined to 1, otherwise system
-  //  page size is used.
-  size_t page_size;
-  //! Size of a span of memory blocks. MUST be a power of two, and in
-  //! [4096,262144]
-  //  range (unless 0 - set to 0 to use the default span size). Used if
-  //  RPMALLOC_CONFIGURABLE is defined to 1.
-  size_t span_size;
-  //! Number of spans to map at each request to map new virtual memory blocks.
-  //! This can
-  //  be used to minimize the system call overhead at the cost of virtual memory
-  //  address space. The extra mapped pages will not be written until actually
-  //  used, so physical committed memory should not be affected in the default
-  //  implementation. Will be aligned to a multiple of spans that match memory
-  //  page size in case of huge pages.
-  size_t span_map_count;
-  //! Enable use of large/huge pages. If this flag is set to non-zero and page
-  //! size is
-  //  zero, the allocator will try to enable huge pages and auto detect the
-  //  configuration. If this is set to non-zero and page_size is also non-zero,
-  //  the allocator will assume huge pages have been configured and enabled
-  //  prior to initializing the allocator. For Windows, see
-  //  https://docs.microsoft.com/en-us/windows/desktop/memory/large-page-support
-  //  For Linux, see https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt
-  int enable_huge_pages;
-  //! Respectively allocated pages and huge allocated pages names for systems
-  //  supporting it to be able to distinguish among anonymous regions.
-  const char *page_name;
-  const char *huge_page_name;
-} rpmalloc_config_t;
-
-//! Initialize allocator with default configuration
-RPMALLOC_EXPORT int rpmalloc_initialize(void);
-
-//! Initialize allocator with given configuration
-RPMALLOC_EXPORT int rpmalloc_initialize_config(const rpmalloc_config_t *config);
-
-//! Get allocator configuration
-RPMALLOC_EXPORT const rpmalloc_config_t *rpmalloc_config(void);
-
-//! Finalize allocator
-RPMALLOC_EXPORT void rpmalloc_finalize(void);
-
-//! Initialize allocator for calling thread
-RPMALLOC_EXPORT void rpmalloc_thread_initialize(void);
-
-//! Finalize allocator for calling thread
-RPMALLOC_EXPORT void rpmalloc_thread_finalize(int release_caches);
-
-//! Perform deferred deallocations pending for the calling thread heap
-RPMALLOC_EXPORT void rpmalloc_thread_collect(void);
-
-//! Query if allocator is initialized for calling thread
-RPMALLOC_EXPORT int rpmalloc_is_thread_initialized(void);
-
-//! Get per-thread statistics
-RPMALLOC_EXPORT void
-rpmalloc_thread_statistics(rpmalloc_thread_statistics_t *stats);
-
-//! Get global statistics
-RPMALLOC_EXPORT void
-rpmalloc_global_statistics(rpmalloc_global_statistics_t *stats);
-
-//! Dump all statistics in human readable format to file (should be a FILE*)
-RPMALLOC_EXPORT void rpmalloc_dump_statistics(void *file);
-
-//! Allocate a memory block of at least the given size
-RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
-rpmalloc(size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(1);
-
-//! Free the given memory block
-RPMALLOC_EXPORT void rpfree(void *ptr);
-
-//! Allocate a memory block of at least the given size and zero initialize it
-RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
-rpcalloc(size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE2(1, 2);
-
-//! Reallocate the given block to at least the given size
-RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
-rprealloc(void *ptr, size_t size) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(2);
-
-//! Reallocate the given block to at least the given size and alignment,
-//  with optional control flags (see RPMALLOC_NO_PRESERVE).
-//  Alignment must be a power of two and a multiple of sizeof(void*),
-//  and should ideally be less than memory page size. A caveat of rpmalloc
-//  internals is that this must also be strictly less than the span size
-//  (default 64KiB)
-RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
-rpaligned_realloc(void *ptr, size_t alignment, size_t size, size_t oldsize,
-                  unsigned int flags) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(3);
-
-//! Allocate a memory block of at least the given size and alignment.
-//  Alignment must be a power of two and a multiple of sizeof(void*),
-//  and should ideally be less than memory page size. A caveat of rpmalloc
-//  internals is that this must also be strictly less than the span size
-//  (default 64KiB)
-RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
-rpaligned_alloc(size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(2);
-
-//! Allocate a memory block of at least the given size and alignment, and zero
-//! initialize it.
-//  Alignment must be a power of two and a multiple of sizeof(void*),
-//  and should ideally be less than memory page size. A caveat of rpmalloc
-//  internals is that this must also be strictly less than the span size
-//  (default 64KiB)
-RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
-rpaligned_calloc(size_t alignment, size_t num,
-                 size_t size) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE2(2, 3);
-
-//! Allocate a memory block of at least the given size and alignment.
-//  Alignment must be a power of two and a multiple of sizeof(void*),
-//  and should ideally be less than memory page size. A caveat of rpmalloc
-//  internals is that this must also be strictly less than the span size
-//  (default 64KiB)
-RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
-rpmemalign(size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(2);
-
-//! Allocate a memory block of at least the given size and alignment.
-//  Alignment must be a power of two and a multiple of sizeof(void*),
-//  and should ideally be less than memory page size. A caveat of rpmalloc
-//  internals is that this must also be strictly less than the span size
-//  (default 64KiB)
-RPMALLOC_EXPORT int rpposix_memalign(void **memptr, size_t alignment,
-                                     size_t size);
-
-//! Query the usable size of the given memory block (from given pointer to the
-//! end of block)
-RPMALLOC_EXPORT size_t rpmalloc_usable_size(void *ptr);
-
-//! Dummy empty function for forcing linker symbol inclusion
-RPMALLOC_EXPORT void rpmalloc_linker_reference(void);
-
-#if RPMALLOC_FIRST_CLASS_HEAPS
-
-//! Heap type
-typedef struct heap_t rpmalloc_heap_t;
-
-//! Acquire a new heap. Will reuse existing released heaps or allocate memory
-//! for a new heap
-//  if none available. Heap API is implemented with the strict assumption that
-//  only one single thread will call heap functions for a given heap at any
-//  given time, no functions are thread safe.
-RPMALLOC_EXPORT rpmalloc_heap_t *rpmalloc_heap_acquire(void);
-
-//! Release a heap (does NOT free the memory allocated by the heap, use
-//! rpmalloc_heap_free_all before destroying the heap).
-//  Releasing a heap will enable it to be reused by other threads. Safe to pass
-//  a null pointer.
-RPMALLOC_EXPORT void rpmalloc_heap_release(rpmalloc_heap_t *heap);
-
-//! Allocate a memory block of at least the given size using the given heap.
-RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
-rpmalloc_heap_alloc(rpmalloc_heap_t *heap, size_t size) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(2);
-
-//! Allocate a memory block of at least the given size using the given heap. The
-//! returned
-//  block will have the requested alignment. Alignment must be a power of two
-//  and a multiple of sizeof(void*), and should ideally be less than memory page
-//  size. A caveat of rpmalloc internals is that this must also be strictly less
-//  than the span size (default 64KiB).
-RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
-rpmalloc_heap_aligned_alloc(rpmalloc_heap_t *heap, size_t alignment,
-                            size_t size) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(3);
-
-//! Allocate a memory block of at least the given size using the given heap and
-//! zero initialize it.
-RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
-rpmalloc_heap_calloc(rpmalloc_heap_t *heap, size_t num,
-                     size_t size) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE2(2, 3);
-
-//! Allocate a memory block of at least the given size using the given heap and
-//! zero initialize it. The returned
-//  block will have the requested alignment. Alignment must either be zero, or a
-//  power of two and a multiple of sizeof(void*), and should ideally be less
-//  than memory page size. A caveat of rpmalloc internals is that this must also
-//  be strictly less than the span size (default 64KiB).
-RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
-rpmalloc_heap_aligned_calloc(rpmalloc_heap_t *heap, size_t alignment,
-                             size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE2(2, 3);
-
-//! Reallocate the given block to at least the given size. The memory block MUST
-//! be allocated
-//  by the same heap given to this function.
-RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
-rpmalloc_heap_realloc(rpmalloc_heap_t *heap, void *ptr, size_t size,
-                      unsigned int flags) RPMALLOC_ATTRIB_MALLOC
-    RPMALLOC_ATTRIB_ALLOC_SIZE(3);
-
-//! Reallocate the given block to at least the given size. The memory block MUST
-//! be allocated
-//  by the same heap given to this function. The returned block will have the
-//  requested alignment. Alignment must be either zero, or a power of two and a
-//  multiple of sizeof(void*), and should ideally be less than memory page size.
-//  A caveat of rpmalloc internals is that this must also be strictly less than
-//  the span size (default 64KiB).
-RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *rpmalloc_heap_aligned_realloc(
-    rpmalloc_heap_t *heap, void *ptr, size_t alignment, size_t size,
-    unsigned int flags) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(4);
-
-//! Free the given memory block from the given heap. The memory block MUST be
-//! allocated
-//  by the same heap given to this function.
-RPMALLOC_EXPORT void rpmalloc_heap_free(rpmalloc_heap_t *heap, void *ptr);
-
-//! Free all memory allocated by the heap
-RPMALLOC_EXPORT void rpmalloc_heap_free_all(rpmalloc_heap_t *heap);
-
-//! Set the given heap as the current heap for the calling thread. A heap MUST
-//! only be current heap
-//  for a single thread, a heap can never be shared between multiple threads.
-//  The previous current heap for the calling thread is released to be reused by
-//  other threads.
-RPMALLOC_EXPORT void rpmalloc_heap_thread_set_current(rpmalloc_heap_t *heap);
-
-//! Returns which heap the given pointer is allocated on
-RPMALLOC_EXPORT rpmalloc_heap_t *rpmalloc_get_heap_for_ptr(void *ptr);
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif
+//===---------------------- rpmalloc.h ------------------*- C -*-=============//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This library provides a cross-platform lock free thread caching malloc
+// implementation in C11.
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(__clang__) || defined(__GNUC__)
+#define RPMALLOC_EXPORT __attribute__((visibility("default")))
+#define RPMALLOC_ALLOCATOR
+#if (defined(__clang_major__) && (__clang_major__ < 4)) ||                     \
+    (defined(__GNUC__) && defined(ENABLE_PRELOAD) && ENABLE_PRELOAD)
+#define RPMALLOC_ATTRIB_MALLOC
+#define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
+#define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size)
+#else
+#define RPMALLOC_ATTRIB_MALLOC __attribute__((__malloc__))
+#define RPMALLOC_ATTRIB_ALLOC_SIZE(size) __attribute__((alloc_size(size)))
+#define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size)                               \
+  __attribute__((alloc_size(count, size)))
+#endif
+#define RPMALLOC_CDECL
+#elif defined(_MSC_VER)
+#define RPMALLOC_EXPORT
+#define RPMALLOC_ALLOCATOR __declspec(allocator) __declspec(restrict)
+#define RPMALLOC_ATTRIB_MALLOC
+#define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
+#define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size)
+#define RPMALLOC_CDECL __cdecl
+#else
+#define RPMALLOC_EXPORT
+#define RPMALLOC_ALLOCATOR
+#define RPMALLOC_ATTRIB_MALLOC
+#define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
+#define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size)
+#define RPMALLOC_CDECL
+#endif
+
+//! Define RPMALLOC_CONFIGURABLE to enable configuring sizes. Will introduce
+//  a very small overhead due to some size calculations not being compile time
+//  constants
+#ifndef RPMALLOC_CONFIGURABLE
+#define RPMALLOC_CONFIGURABLE 0
+#endif
+
+//! Define RPMALLOC_FIRST_CLASS_HEAPS to enable heap based API (rpmalloc_heap_*
+//! functions).
+//  Will introduce a very small overhead to track fully allocated spans in heaps
+#ifndef RPMALLOC_FIRST_CLASS_HEAPS
+#define RPMALLOC_FIRST_CLASS_HEAPS 0
+#endif
+
+//! Flag to rpaligned_realloc to not preserve content in reallocation
+#define RPMALLOC_NO_PRESERVE 1
+//! Flag to rpaligned_realloc to fail and return null pointer if grow cannot be
+//! done in-place,
+//  in which case the original pointer is still valid (just like a call to
+//  realloc which failes to allocate a new block).
+#define RPMALLOC_GROW_OR_FAIL 2
+
+typedef struct rpmalloc_global_statistics_t {
+  //! Current amount of virtual memory mapped, all of which might not have been
+  //! committed (only if ENABLE_STATISTICS=1)
+  size_t mapped;
+  //! Peak amount of virtual memory mapped, all of which might not have been
+  //! committed (only if ENABLE_STATISTICS=1)
+  size_t mapped_peak;
+  //! Current amount of memory in global caches for small and medium sizes
+  //! (<32KiB)
+  size_t cached;
+  //! Current amount of memory allocated in huge allocations, i.e larger than
+  //! LARGE_SIZE_LIMIT which is 2MiB by default (only if ENABLE_STATISTICS=1)
+  size_t huge_alloc;
+  //! Peak amount of memory allocated in huge allocations, i.e larger than
+  //! LARGE_SIZE_LIMIT which is 2MiB by default (only if ENABLE_STATISTICS=1)
+  size_t huge_alloc_peak;
+  //! Total amount of memory mapped since initialization (only if
+  //! ENABLE_STATISTICS=1)
+  size_t mapped_total;
+  //! Total amount of memory unmapped since initialization  (only if
+  //! ENABLE_STATISTICS=1)
+  size_t unmapped_total;
+} rpmalloc_global_statistics_t;
+
+typedef struct rpmalloc_thread_statistics_t {
+  //! Current number of bytes available in thread size class caches for small
+  //! and medium sizes (<32KiB)
+  size_t sizecache;
+  //! Current number of bytes available in thread span caches for small and
+  //! medium sizes (<32KiB)
+  size_t spancache;
+  //! Total number of bytes transitioned from thread cache to global cache (only
+  //! if ENABLE_STATISTICS=1)
+  size_t thread_to_global;
+  //! Total number of bytes transitioned from global cache to thread cache (only
+  //! if ENABLE_STATISTICS=1)
+  size_t global_to_thread;
+  //! Per span count statistics (only if ENABLE_STATISTICS=1)
+  struct {
+    //! Currently used number of spans
+    size_t current;
+    //! High water mark of spans used
+    size_t peak;
+    //! Number of spans transitioned to global cache
+    size_t to_global;
+    //! Number of spans transitioned from global cache
+    size_t from_global;
+    //! Number of spans transitioned to thread cache
+    size_t to_cache;
+    //! Number of spans transitioned from thread cache
+    size_t from_cache;
+    //! Number of spans transitioned to reserved state
+    size_t to_reserved;
+    //! Number of spans transitioned from reserved state
+    size_t from_reserved;
+    //! Number of raw memory map calls (not hitting the reserve spans but
+    //! resulting in actual OS mmap calls)
+    size_t map_calls;
+  } span_use[64];
+  //! Per size class statistics (only if ENABLE_STATISTICS=1)
+  struct {
+    //! Current number of allocations
+    size_t alloc_current;
+    //! Peak number of allocations
+    size_t alloc_peak;
+    //! Total number of allocations
+    size_t alloc_total;
+    //! Total number of frees
+    size_t free_total;
+    //! Number of spans transitioned to cache
+    size_t spans_to_cache;
+    //! Number of spans transitioned from cache
+    size_t spans_from_cache;
+    //! Number of spans transitioned from reserved state
+    size_t spans_from_reserved;
+    //! Number of raw memory map calls (not hitting the reserve spans but
+    //! resulting in actual OS mmap calls)
+    size_t map_calls;
+  } size_use[128];
+} rpmalloc_thread_statistics_t;
+
+typedef struct rpmalloc_config_t {
+  //! Map memory pages for the given number of bytes. The returned address MUST
+  //! be
+  //  aligned to the rpmalloc span size, which will always be a power of two.
+  //  Optionally the function can store an alignment offset in the offset
+  //  variable in case it performs alignment and the returned pointer is offset
+  //  from the actual start of the memory region due to this alignment. The
+  //  alignment offset will be passed to the memory unmap function. The
+  //  alignment offset MUST NOT be larger than 65535 (storable in an uint16_t),
+  //  if it is you must use natural alignment to shift it into 16 bits. If you
+  //  set a memory_map function, you must also set a memory_unmap function or
+  //  else the default implementation will be used for both. This function must
+  //  be thread safe, it can be called by multiple threads simultaneously.
+  void *(*memory_map)(size_t size, size_t *offset);
+  //! Unmap the memory pages starting at address and spanning the given number
+  //! of bytes.
+  //  If release is set to non-zero, the unmap is for an entire span range as
+  //  returned by a previous call to memory_map and that the entire range should
+  //  be released. The release argument holds the size of the entire span range.
+  //  If release is set to 0, the unmap is a partial decommit of a subset of the
+  //  mapped memory range. If you set a memory_unmap function, you must also set
+  //  a memory_map function or else the default implementation will be used for
+  //  both. This function must be thread safe, it can be called by multiple
+  //  threads simultaneously.
+  void (*memory_unmap)(void *address, size_t size, size_t offset,
+                       size_t release);
+  //! Called when an assert fails, if asserts are enabled. Will use the standard
+  //! assert()
+  //  if this is not set.
+  void (*error_callback)(const char *message);
+  //! Called when a call to map memory pages fails (out of memory). If this
+  //! callback is
+  //  not set or returns zero the library will return a null pointer in the
+  //  allocation call. If this callback returns non-zero the map call will be
+  //  retried. The argument passed is the number of bytes that was requested in
+  //  the map call. Only used if the default system memory map function is used
+  //  (memory_map callback is not set).
+  int (*map_fail_callback)(size_t size);
+  //! Size of memory pages. The page size MUST be a power of two. All memory
+  //! mapping
+  //  requests to memory_map will be made with size set to a multiple of the
+  //  page size. Used if RPMALLOC_CONFIGURABLE is defined to 1, otherwise system
+  //  page size is used.
+  size_t page_size;
+  //! Size of a span of memory blocks. MUST be a power of two, and in
+  //! [4096,262144]
+  //  range (unless 0 - set to 0 to use the default span size). Used if
+  //  RPMALLOC_CONFIGURABLE is defined to 1.
+  size_t span_size;
+  //! Number of spans to map at each request to map new virtual memory blocks.
+  //! This can
+  //  be used to minimize the system call overhead at the cost of virtual memory
+  //  address space. The extra mapped pages will not be written until actually
+  //  used, so physical committed memory should not be affected in the default
+  //  implementation. Will be aligned to a multiple of spans that match memory
+  //  page size in case of huge pages.
+  size_t span_map_count;
+  //! Enable use of large/huge pages. If this flag is set to non-zero and page
+  //! size is
+  //  zero, the allocator will try to enable huge pages and auto detect the
+  //  configuration. If this is set to non-zero and page_size is also non-zero,
+  //  the allocator will assume huge pages have been configured and enabled
+  //  prior to initializing the allocator. For Windows, see
+  //  https://docs.microsoft.com/en-us/windows/desktop/memory/large-page-support
+  //  For Linux, see https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt
+  int enable_huge_pages;
+  //! Respectively allocated pages and huge allocated pages names for systems
+  //  supporting it to be able to distinguish among anonymous regions.
+  const char *page_name;
+  const char *huge_page_name;
+} rpmalloc_config_t;
+
+//! Initialize allocator with default configuration
+RPMALLOC_EXPORT int rpmalloc_initialize(void);
+
+//! Initialize allocator with given configuration
+RPMALLOC_EXPORT int rpmalloc_initialize_config(const rpmalloc_config_t *config);
+
+//! Get allocator configuration
+RPMALLOC_EXPORT const rpmalloc_config_t *rpmalloc_config(void);
+
+//! Finalize allocator
+RPMALLOC_EXPORT void rpmalloc_finalize(void);
+
+//! Initialize allocator for calling thread
+RPMALLOC_EXPORT void rpmalloc_thread_initialize(void);
+
+//! Finalize allocator for calling thread
+RPMALLOC_EXPORT void rpmalloc_thread_finalize(int release_caches);
+
+//! Perform deferred deallocations pending for the calling thread heap
+RPMALLOC_EXPORT void rpmalloc_thread_collect(void);
+
+//! Query if allocator is initialized for calling thread
+RPMALLOC_EXPORT int rpmalloc_is_thread_initialized(void);
+
+//! Get per-thread statistics
+RPMALLOC_EXPORT void
+rpmalloc_thread_statistics(rpmalloc_thread_statistics_t *stats);
+
+//! Get global statistics
+RPMALLOC_EXPORT void
+rpmalloc_global_statistics(rpmalloc_global_statistics_t *stats);
+
+//! Dump all statistics in human readable format to file (should be a FILE*)
+RPMALLOC_EXPORT void rpmalloc_dump_statistics(void *file);
+
+//! Allocate a memory block of at least the given size
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
+rpmalloc(size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(1);
+
+//! Free the given memory block
+RPMALLOC_EXPORT void rpfree(void *ptr);
+
+//! Allocate a memory block of at least the given size and zero initialize it
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
+rpcalloc(size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE2(1, 2);
+
+//! Reallocate the given block to at least the given size
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
+rprealloc(void *ptr, size_t size) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(2);
+
+//! Reallocate the given block to at least the given size and alignment,
+//  with optional control flags (see RPMALLOC_NO_PRESERVE).
+//  Alignment must be a power of two and a multiple of sizeof(void*),
+//  and should ideally be less than memory page size. A caveat of rpmalloc
+//  internals is that this must also be strictly less than the span size
+//  (default 64KiB)
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
+rpaligned_realloc(void *ptr, size_t alignment, size_t size, size_t oldsize,
+                  unsigned int flags) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(3);
+
+//! Allocate a memory block of at least the given size and alignment.
+//  Alignment must be a power of two and a multiple of sizeof(void*),
+//  and should ideally be less than memory page size. A caveat of rpmalloc
+//  internals is that this must also be strictly less than the span size
+//  (default 64KiB)
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
+rpaligned_alloc(size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(2);
+
+//! Allocate a memory block of at least the given size and alignment, and zero
+//! initialize it.
+//  Alignment must be a power of two and a multiple of sizeof(void*),
+//  and should ideally be less than memory page size. A caveat of rpmalloc
+//  internals is that this must also be strictly less than the span size
+//  (default 64KiB)
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
+rpaligned_calloc(size_t alignment, size_t num,
+                 size_t size) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE2(2, 3);
+
+//! Allocate a memory block of at least the given size and alignment.
+//  Alignment must be a power of two and a multiple of sizeof(void*),
+//  and should ideally be less than memory page size. A caveat of rpmalloc
+//  internals is that this must also be strictly less than the span size
+//  (default 64KiB)
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
+rpmemalign(size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(2);
+
+//! Allocate a memory block of at least the given size and alignment.
+//  Alignment must be a power of two and a multiple of sizeof(void*),
+//  and should ideally be less than memory page size. A caveat of rpmalloc
+//  internals is that this must also be strictly less than the span size
+//  (default 64KiB)
+RPMALLOC_EXPORT int rpposix_memalign(void **memptr, size_t alignment,
+                                     size_t size);
+
+//! Query the usable size of the given memory block (from given pointer to the
+//! end of block)
+RPMALLOC_EXPORT size_t rpmalloc_usable_size(void *ptr);
+
+//! Dummy empty function for forcing linker symbol inclusion
+RPMALLOC_EXPORT void rpmalloc_linker_reference(void);
+
+#if RPMALLOC_FIRST_CLASS_HEAPS
+
+//! Heap type
+typedef struct heap_t rpmalloc_heap_t;
+
+//! Acquire a new heap. Will reuse existing released heaps or allocate memory
+//! for a new heap
+//  if none available. Heap API is implemented with the strict assumption that
+//  only one single thread will call heap functions for a given heap at any
+//  given time, no functions are thread safe.
+RPMALLOC_EXPORT rpmalloc_heap_t *rpmalloc_heap_acquire(void);
+
+//! Release a heap (does NOT free the memory allocated by the heap, use
+//! rpmalloc_heap_free_all before destroying the heap).
+//  Releasing a heap will enable it to be reused by other threads. Safe to pass
+//  a null pointer.
+RPMALLOC_EXPORT void rpmalloc_heap_release(rpmalloc_heap_t *heap);
+
+//! Allocate a memory block of at least the given size using the given heap.
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
+rpmalloc_heap_alloc(rpmalloc_heap_t *heap, size_t size) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(2);
+
+//! Allocate a memory block of at least the given size using the given heap. The
+//! returned
+//  block will have the requested alignment. Alignment must be a power of two
+//  and a multiple of sizeof(void*), and should ideally be less than memory page
+//  size. A caveat of rpmalloc internals is that this must also be strictly less
+//  than the span size (default 64KiB).
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
+rpmalloc_heap_aligned_alloc(rpmalloc_heap_t *heap, size_t alignment,
+                            size_t size) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(3);
+
+//! Allocate a memory block of at least the given size using the given heap and
+//! zero initialize it.
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
+rpmalloc_heap_calloc(rpmalloc_heap_t *heap, size_t num,
+                     size_t size) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE2(2, 3);
+
+//! Allocate a memory block of at least the given size using the given heap and
+//! zero initialize it. The returned
+//  block will have the requested alignment. Alignment must either be zero, or a
+//  power of two and a multiple of sizeof(void*), and should ideally be less
+//  than memory page size. A caveat of rpmalloc internals is that this must also
+//  be strictly less than the span size (default 64KiB).
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
+rpmalloc_heap_aligned_calloc(rpmalloc_heap_t *heap, size_t alignment,
+                             size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE2(2, 3);
+
+//! Reallocate the given block to at least the given size. The memory block MUST
+//! be allocated
+//  by the same heap given to this function.
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
+rpmalloc_heap_realloc(rpmalloc_heap_t *heap, void *ptr, size_t size,
+                      unsigned int flags) RPMALLOC_ATTRIB_MALLOC
+    RPMALLOC_ATTRIB_ALLOC_SIZE(3);
+
+//! Reallocate the given block to at least the given size. The memory block MUST
+//! be allocated
+//  by the same heap given to this function. The returned block will have the
+//  requested alignment. Alignment must be either zero, or a power of two and a
+//  multiple of sizeof(void*), and should ideally be less than memory page size.
+//  A caveat of rpmalloc internals is that this must also be strictly less than
+//  the span size (default 64KiB).
+RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *rpmalloc_heap_aligned_realloc(
+    rpmalloc_heap_t *heap, void *ptr, size_t alignment, size_t size,
+    unsigned int flags) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(4);
+
+//! Free the given memory block from the given heap. The memory block MUST be
+//! allocated
+//  by the same heap given to this function.
+RPMALLOC_EXPORT void rpmalloc_heap_free(rpmalloc_heap_t *heap, void *ptr);
+
+//! Free all memory allocated by the heap
+RPMALLOC_EXPORT void rpmalloc_heap_free_all(rpmalloc_heap_t *heap);
+
+//! Set the given heap as the current heap for the calling thread. A heap MUST
+//! only be current heap
+//  for a single thread, a heap can never be shared between multiple threads.
+//  The previous current heap for the calling thread is released to be reused by
+//  other threads.
+RPMALLOC_EXPORT void rpmalloc_heap_thread_set_current(rpmalloc_heap_t *heap);
+
+//! Returns which heap the given pointer is allocated on
+RPMALLOC_EXPORT rpmalloc_heap_t *rpmalloc_get_heap_for_ptr(void *ptr);
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/llvm/lib/Support/rpmalloc/rpnew.h b/llvm/lib/Support/rpmalloc/rpnew.h
index a18f0799d56d1f2a633897199d53b80f80c54090..d8303c6f95652fecaa187a1d89996008a93bcbbf 100644
--- a/llvm/lib/Support/rpmalloc/rpnew.h
+++ b/llvm/lib/Support/rpmalloc/rpnew.h
@@ -1,113 +1,113 @@
-//===-------------------------- rpnew.h -----------------*- C -*-=============//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This library provides a cross-platform lock free thread caching malloc
-// implementation in C11.
-//
-//===----------------------------------------------------------------------===//
-
-#ifdef __cplusplus
-
-#include <new>
-#include <rpmalloc.h>
-
-#ifndef __CRTDECL
-#define __CRTDECL
-#endif
-
-extern void __CRTDECL operator delete(void *p) noexcept { rpfree(p); }
-
-extern void __CRTDECL operator delete[](void *p) noexcept { rpfree(p); }
-
-extern void *__CRTDECL operator new(std::size_t size) noexcept(false) {
-  return rpmalloc(size);
-}
-
-extern void *__CRTDECL operator new[](std::size_t size) noexcept(false) {
-  return rpmalloc(size);
-}
-
-extern void *__CRTDECL operator new(std::size_t size,
-                                    const std::nothrow_t &tag) noexcept {
-  (void)sizeof(tag);
-  return rpmalloc(size);
-}
-
-extern void *__CRTDECL operator new[](std::size_t size,
-                                      const std::nothrow_t &tag) noexcept {
-  (void)sizeof(tag);
-  return rpmalloc(size);
-}
-
-#if (__cplusplus >= 201402L || _MSC_VER >= 1916)
-
-extern void __CRTDECL operator delete(void *p, std::size_t size) noexcept {
-  (void)sizeof(size);
-  rpfree(p);
-}
-
-extern void __CRTDECL operator delete[](void *p, std::size_t size) noexcept {
-  (void)sizeof(size);
-  rpfree(p);
-}
-
-#endif
-
-#if (__cplusplus > 201402L || defined(__cpp_aligned_new))
-
-extern void __CRTDECL operator delete(void *p,
-                                      std::align_val_t align) noexcept {
-  (void)sizeof(align);
-  rpfree(p);
-}
-
-extern void __CRTDECL operator delete[](void *p,
-                                        std::align_val_t align) noexcept {
-  (void)sizeof(align);
-  rpfree(p);
-}
-
-extern void __CRTDECL operator delete(void *p, std::size_t size,
-                                      std::align_val_t align) noexcept {
-  (void)sizeof(size);
-  (void)sizeof(align);
-  rpfree(p);
-}
-
-extern void __CRTDECL operator delete[](void *p, std::size_t size,
-                                        std::align_val_t align) noexcept {
-  (void)sizeof(size);
-  (void)sizeof(align);
-  rpfree(p);
-}
-
-extern void *__CRTDECL operator new(std::size_t size,
-                                    std::align_val_t align) noexcept(false) {
-  return rpaligned_alloc(static_cast<size_t>(align), size);
-}
-
-extern void *__CRTDECL operator new[](std::size_t size,
-                                      std::align_val_t align) noexcept(false) {
-  return rpaligned_alloc(static_cast<size_t>(align), size);
-}
-
-extern void *__CRTDECL operator new(std::size_t size, std::align_val_t align,
-                                    const std::nothrow_t &tag) noexcept {
-  (void)sizeof(tag);
-  return rpaligned_alloc(static_cast<size_t>(align), size);
-}
-
-extern void *__CRTDECL operator new[](std::size_t size, std::align_val_t align,
-                                      const std::nothrow_t &tag) noexcept {
-  (void)sizeof(tag);
-  return rpaligned_alloc(static_cast<size_t>(align), size);
-}
-
-#endif
-
-#endif
+//===-------------------------- rpnew.h -----------------*- C -*-=============//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This library provides a cross-platform lock free thread caching malloc
+// implementation in C11.
+//
+//===----------------------------------------------------------------------===//
+
+#ifdef __cplusplus
+
+#include <new>
+#include <rpmalloc.h>
+
+#ifndef __CRTDECL
+#define __CRTDECL
+#endif
+
+extern void __CRTDECL operator delete(void *p) noexcept { rpfree(p); }
+
+extern void __CRTDECL operator delete[](void *p) noexcept { rpfree(p); }
+
+extern void *__CRTDECL operator new(std::size_t size) noexcept(false) {
+  return rpmalloc(size);
+}
+
+extern void *__CRTDECL operator new[](std::size_t size) noexcept(false) {
+  return rpmalloc(size);
+}
+
+extern void *__CRTDECL operator new(std::size_t size,
+                                    const std::nothrow_t &tag) noexcept {
+  (void)sizeof(tag);
+  return rpmalloc(size);
+}
+
+extern void *__CRTDECL operator new[](std::size_t size,
+                                      const std::nothrow_t &tag) noexcept {
+  (void)sizeof(tag);
+  return rpmalloc(size);
+}
+
+#if (__cplusplus >= 201402L || _MSC_VER >= 1916)
+
+extern void __CRTDECL operator delete(void *p, std::size_t size) noexcept {
+  (void)sizeof(size);
+  rpfree(p);
+}
+
+extern void __CRTDECL operator delete[](void *p, std::size_t size) noexcept {
+  (void)sizeof(size);
+  rpfree(p);
+}
+
+#endif
+
+#if (__cplusplus > 201402L || defined(__cpp_aligned_new))
+
+extern void __CRTDECL operator delete(void *p,
+                                      std::align_val_t align) noexcept {
+  (void)sizeof(align);
+  rpfree(p);
+}
+
+extern void __CRTDECL operator delete[](void *p,
+                                        std::align_val_t align) noexcept {
+  (void)sizeof(align);
+  rpfree(p);
+}
+
+extern void __CRTDECL operator delete(void *p, std::size_t size,
+                                      std::align_val_t align) noexcept {
+  (void)sizeof(size);
+  (void)sizeof(align);
+  rpfree(p);
+}
+
+extern void __CRTDECL operator delete[](void *p, std::size_t size,
+                                        std::align_val_t align) noexcept {
+  (void)sizeof(size);
+  (void)sizeof(align);
+  rpfree(p);
+}
+
+extern void *__CRTDECL operator new(std::size_t size,
+                                    std::align_val_t align) noexcept(false) {
+  return rpaligned_alloc(static_cast<size_t>(align), size);
+}
+
+extern void *__CRTDECL operator new[](std::size_t size,
+                                      std::align_val_t align) noexcept(false) {
+  return rpaligned_alloc(static_cast<size_t>(align), size);
+}
+
+extern void *__CRTDECL operator new(std::size_t size, std::align_val_t align,
+                                    const std::nothrow_t &tag) noexcept {
+  (void)sizeof(tag);
+  return rpaligned_alloc(static_cast<size_t>(align), size);
+}
+
+extern void *__CRTDECL operator new[](std::size_t size, std::align_val_t align,
+                                      const std::nothrow_t &tag) noexcept {
+  (void)sizeof(tag);
+  return rpaligned_alloc(static_cast<size_t>(align), size);
+}
+
+#endif
+
+#endif
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
old mode 100755
new mode 100644
diff --git a/llvm/test/Analysis/CostModel/X86/strided-load-i16.ll b/llvm/test/Analysis/CostModel/X86/strided-load-i16.ll
old mode 100755
new mode 100644
diff --git a/llvm/test/Analysis/CostModel/X86/strided-load-i32.ll b/llvm/test/Analysis/CostModel/X86/strided-load-i32.ll
old mode 100755
new mode 100644
diff --git a/llvm/test/Analysis/CostModel/X86/strided-load-i64.ll b/llvm/test/Analysis/CostModel/X86/strided-load-i64.ll
old mode 100755
new mode 100644
diff --git a/llvm/test/Analysis/CostModel/X86/strided-load-i8.ll b/llvm/test/Analysis/CostModel/X86/strided-load-i8.ll
old mode 100755
new mode 100644
diff --git a/llvm/test/Analysis/ValueTracking/non-negative-phi-bits.ll b/llvm/test/Analysis/ValueTracking/non-negative-phi-bits.ll
old mode 100755
new mode 100644
diff --git a/llvm/test/Bitcode/DISubprogram-v5.ll.bc b/llvm/test/Bitcode/DISubprogram-v5.ll.bc
old mode 100755
new mode 100644
diff --git a/llvm/test/CodeGen/AArch64/Atomics/generate-tests.py b/llvm/test/CodeGen/AArch64/Atomics/generate-tests.py
old mode 100644
new mode 100755
diff --git a/llvm/test/CodeGen/AArch64/dag-ReplaceAllUsesOfValuesWith.ll b/llvm/test/CodeGen/AArch64/dag-ReplaceAllUsesOfValuesWith.ll
old mode 100755
new mode 100644
diff --git a/llvm/test/CodeGen/AArch64/shrink-wrap.ll b/llvm/test/CodeGen/AArch64/shrink-wrap.ll
old mode 100755
new mode 100644
diff --git a/llvm/test/CodeGen/AArch64/taildup-ssa-update-pr62712.mir b/llvm/test/CodeGen/AArch64/taildup-ssa-update-pr62712.mir
old mode 100644
new mode 100755
diff --git a/llvm/test/CodeGen/MLRegalloc/Inputs/input.ll b/llvm/test/CodeGen/MLRegAlloc/Inputs/input.ll
similarity index 100%
rename from llvm/test/CodeGen/MLRegalloc/Inputs/input.ll
rename to llvm/test/CodeGen/MLRegAlloc/Inputs/input.ll
diff --git a/llvm/test/CodeGen/MLRegalloc/Inputs/interactive_main.py b/llvm/test/CodeGen/MLRegAlloc/Inputs/interactive_main.py
similarity index 100%
rename from llvm/test/CodeGen/MLRegalloc/Inputs/interactive_main.py
rename to llvm/test/CodeGen/MLRegAlloc/Inputs/interactive_main.py
diff --git a/llvm/test/CodeGen/MLRegalloc/Inputs/reference-log-noml.txt b/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-log-noml.txt
similarity index 100%
rename from llvm/test/CodeGen/MLRegalloc/Inputs/reference-log-noml.txt
rename to llvm/test/CodeGen/MLRegAlloc/Inputs/reference-log-noml.txt
diff --git a/llvm/test/CodeGen/MLRegalloc/Inputs/reference-prio-log-noml.txt b/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-prio-log-noml.txt
similarity index 100%
rename from llvm/test/CodeGen/MLRegalloc/Inputs/reference-prio-log-noml.txt
rename to llvm/test/CodeGen/MLRegAlloc/Inputs/reference-prio-log-noml.txt
diff --git a/llvm/test/CodeGen/MLRegalloc/Inputs/two-large-fcts.ll b/llvm/test/CodeGen/MLRegAlloc/Inputs/two-large-fcts.ll
similarity index 100%
rename from llvm/test/CodeGen/MLRegalloc/Inputs/two-large-fcts.ll
rename to llvm/test/CodeGen/MLRegAlloc/Inputs/two-large-fcts.ll
diff --git a/llvm/test/CodeGen/MLRegalloc/default-eviction-advisor.ll b/llvm/test/CodeGen/MLRegAlloc/default-eviction-advisor.ll
similarity index 100%
rename from llvm/test/CodeGen/MLRegalloc/default-eviction-advisor.ll
rename to llvm/test/CodeGen/MLRegAlloc/default-eviction-advisor.ll
diff --git a/llvm/test/CodeGen/MLRegalloc/default-priority-advisor.ll b/llvm/test/CodeGen/MLRegAlloc/default-priority-advisor.ll
similarity index 100%
rename from llvm/test/CodeGen/MLRegalloc/default-priority-advisor.ll
rename to llvm/test/CodeGen/MLRegAlloc/default-priority-advisor.ll
diff --git a/llvm/test/CodeGen/MLRegalloc/dev-mode-extra-features-logging.ll b/llvm/test/CodeGen/MLRegAlloc/dev-mode-extra-features-logging.ll
similarity index 100%
rename from llvm/test/CodeGen/MLRegalloc/dev-mode-extra-features-logging.ll
rename to llvm/test/CodeGen/MLRegAlloc/dev-mode-extra-features-logging.ll
diff --git a/llvm/test/CodeGen/MLRegalloc/dev-mode-log-2-fcts.ll b/llvm/test/CodeGen/MLRegAlloc/dev-mode-log-2-fcts.ll
similarity index 100%
rename from llvm/test/CodeGen/MLRegalloc/dev-mode-log-2-fcts.ll
rename to llvm/test/CodeGen/MLRegAlloc/dev-mode-log-2-fcts.ll
diff --git a/llvm/test/CodeGen/MLRegalloc/dev-mode-logging.ll b/llvm/test/CodeGen/MLRegAlloc/dev-mode-logging.ll
similarity index 100%
rename from llvm/test/CodeGen/MLRegalloc/dev-mode-logging.ll
rename to llvm/test/CodeGen/MLRegAlloc/dev-mode-logging.ll
diff --git a/llvm/test/CodeGen/MLRegalloc/dev-mode-prio-logging.ll b/llvm/test/CodeGen/MLRegAlloc/dev-mode-prio-logging.ll
similarity index 100%
rename from llvm/test/CodeGen/MLRegalloc/dev-mode-prio-logging.ll
rename to llvm/test/CodeGen/MLRegAlloc/dev-mode-prio-logging.ll
diff --git a/llvm/test/CodeGen/MLRegalloc/dev-rel-equivalence.ll b/llvm/test/CodeGen/MLRegAlloc/dev-rel-equivalence.ll
similarity index 100%
rename from llvm/test/CodeGen/MLRegalloc/dev-rel-equivalence.ll
rename to llvm/test/CodeGen/MLRegAlloc/dev-rel-equivalence.ll
diff --git a/llvm/test/CodeGen/MLRegalloc/empty-log.ll b/llvm/test/CodeGen/MLRegAlloc/empty-log.ll
similarity index 100%
rename from llvm/test/CodeGen/MLRegalloc/empty-log.ll
rename to llvm/test/CodeGen/MLRegAlloc/empty-log.ll
diff --git a/llvm/test/CodeGen/MLRegalloc/interactive-mode.ll b/llvm/test/CodeGen/MLRegAlloc/interactive-mode.ll
similarity index 100%
rename from llvm/test/CodeGen/MLRegalloc/interactive-mode.ll
rename to llvm/test/CodeGen/MLRegAlloc/interactive-mode.ll
diff --git a/llvm/test/CodeGen/MLRegalloc/lit.local.cfg b/llvm/test/CodeGen/MLRegAlloc/lit.local.cfg
similarity index 100%
rename from llvm/test/CodeGen/MLRegalloc/lit.local.cfg
rename to llvm/test/CodeGen/MLRegAlloc/lit.local.cfg
diff --git a/llvm/test/CodeGen/MLRegalloc/rel-codepath.ll b/llvm/test/CodeGen/MLRegAlloc/rel-codepath.ll
similarity index 100%
rename from llvm/test/CodeGen/MLRegalloc/rel-codepath.ll
rename to llvm/test/CodeGen/MLRegAlloc/rel-codepath.ll
diff --git a/llvm/test/CodeGen/RISCV/inline-asm-S-constraint.ll b/llvm/test/CodeGen/RISCV/inline-asm-s-constraint.ll
similarity index 100%
rename from llvm/test/CodeGen/RISCV/inline-asm-S-constraint.ll
rename to llvm/test/CodeGen/RISCV/inline-asm-s-constraint.ll
diff --git a/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll b/llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll
old mode 100755
new mode 100644
diff --git a/llvm/test/CodeGen/X86/evex-to-vex-compress.mir b/llvm/test/CodeGen/X86/evex-to-vex-compress.mir
old mode 100755
new mode 100644
diff --git a/llvm/test/CodeGen/X86/pr47000.ll b/llvm/test/CodeGen/X86/pr47000.ll
old mode 100755
new mode 100644
diff --git a/llvm/test/CodeGen/X86/vecloadextract.ll b/llvm/test/CodeGen/X86/vecloadextract.ll
old mode 100755
new mode 100644
diff --git a/llvm/test/DebugInfo/COFF/numeric-leaves.ll b/llvm/test/DebugInfo/COFF/numeric-leaves.ll
old mode 100755
new mode 100644
diff --git a/llvm/test/DebugInfo/Inputs/dwarfdump-gdbindex-v8.elf-x86-64 b/llvm/test/DebugInfo/Inputs/dwarfdump-gdbindex-v8.elf-x86-64
old mode 100644
new mode 100755
diff --git a/llvm/test/DebugInfo/X86/debug-info-template-parameter.ll b/llvm/test/DebugInfo/X86/debug-info-template-parameter.ll
old mode 100755
new mode 100644
diff --git a/llvm/test/LTO/X86/libcall-overridden-via-alias.ll b/llvm/test/LTO/X86/libcall-overridden-via-alias.ll
old mode 100755
new mode 100644
diff --git a/llvm/test/MC/COFF/lset0.s b/llvm/test/MC/COFF/lset0.s
old mode 100755
new mode 100644
diff --git a/llvm/test/MC/X86/MMX-32.s b/llvm/test/MC/X86/MMX-32.s
old mode 100755
new mode 100644
diff --git a/llvm/test/MC/X86/X87-32.s b/llvm/test/MC/X86/X87-32.s
old mode 100755
new mode 100644
diff --git a/llvm/test/MC/X86/X87-64.s b/llvm/test/MC/X86/X87-64.s
old mode 100755
new mode 100644
diff --git a/llvm/test/Other/Inputs/block-info-only.bc b/llvm/test/Other/Inputs/block-info-only.bc
old mode 100755
new mode 100644
diff --git a/llvm/test/Other/Inputs/no-block-info.bc b/llvm/test/Other/Inputs/no-block-info.bc
old mode 100755
new mode 100644
diff --git a/llvm/test/Transforms/ConstantHoisting/AArch64/consthoist-unreachable.ll b/llvm/test/Transforms/ConstantHoisting/AArch64/consthoist-unreachable.ll
old mode 100755
new mode 100644
diff --git a/llvm/test/Transforms/Inline/inlined-loop-metadata-inseltpoison.ll b/llvm/test/Transforms/Inline/inlined-loop-metadata-inseltpoison.ll
old mode 100755
new mode 100644
diff --git a/llvm/test/Transforms/Inline/inlined-loop-metadata.ll b/llvm/test/Transforms/Inline/inlined-loop-metadata.ll
old mode 100755
new mode 100644
diff --git a/llvm/test/Transforms/LoopRotate/catchret.ll b/llvm/test/Transforms/LoopRotate/catchret.ll
old mode 100755
new mode 100644
diff --git a/llvm/test/Transforms/LoopSimplify/do-preheader-dbg-inseltpoison.ll b/llvm/test/Transforms/LoopSimplify/do-preheader-dbg-inseltpoison.ll
old mode 100755
new mode 100644
diff --git a/llvm/test/Transforms/LoopSimplify/do-preheader-dbg.ll b/llvm/test/Transforms/LoopSimplify/do-preheader-dbg.ll
old mode 100755
new mode 100644
diff --git a/llvm/test/Transforms/LoopSimplify/for-preheader-dbg.ll b/llvm/test/Transforms/LoopSimplify/for-preheader-dbg.ll
old mode 100755
new mode 100644
diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
old mode 100755
new mode 100644
diff --git a/llvm/test/Transforms/LoopVectorize/fix-reduction-dbg.ll b/llvm/test/Transforms/LoopVectorize/fix-reduction-dbg.ll
old mode 100755
new mode 100644
diff --git a/llvm/test/Transforms/LoopVectorize/pr38800.ll b/llvm/test/Transforms/LoopVectorize/pr38800.ll
old mode 100755
new mode 100644
diff --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof.exe b/llvm/test/Transforms/PGOProfile/Inputs/memprof.exe
old mode 100644
new mode 100755
diff --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof.nocolinfo.exe b/llvm/test/Transforms/PGOProfile/Inputs/memprof.nocolinfo.exe
old mode 100644
new mode 100755
diff --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof_internal_linkage.exe b/llvm/test/Transforms/PGOProfile/Inputs/memprof_internal_linkage.exe
old mode 100644
new mode 100755
diff --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof_loop_unroll.exe b/llvm/test/Transforms/PGOProfile/Inputs/memprof_loop_unroll.exe
old mode 100644
new mode 100755
diff --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.exe b/llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.exe
old mode 100644
new mode 100755
diff --git a/llvm/test/Transforms/PGOProfile/Inputs/update_memprof_inputs.sh b/llvm/test/Transforms/PGOProfile/Inputs/update_memprof_inputs.sh
old mode 100644
new mode 100755
diff --git a/llvm/test/Transforms/PGOProfile/Inputs/update_thinlto_indirect_call_promotion_inputs.sh b/llvm/test/Transforms/PGOProfile/Inputs/update_thinlto_indirect_call_promotion_inputs.sh
old mode 100644
new mode 100755
diff --git a/llvm/test/tools/dsymutil/Inputs/accel-imported-declaration.macho-arm64 b/llvm/test/tools/dsymutil/Inputs/accel-imported-declaration.macho-arm64
old mode 100644
new mode 100755
diff --git a/llvm/test/tools/dsymutil/Inputs/discriminator.arm64.dylib b/llvm/test/tools/dsymutil/Inputs/discriminator.arm64.dylib
old mode 100644
new mode 100755
diff --git a/llvm/test/tools/dsymutil/Inputs/discriminator_repeated.arm64.dylib b/llvm/test/tools/dsymutil/Inputs/discriminator_repeated.arm64.dylib
old mode 100644
new mode 100755
diff --git a/llvm/test/tools/dsymutil/Inputs/invalid.o b/llvm/test/tools/dsymutil/Inputs/invalid.o
old mode 100755
new mode 100644
diff --git a/llvm/test/tools/dsymutil/Inputs/private/tmp/collision/main.out b/llvm/test/tools/dsymutil/Inputs/private/tmp/collision/main.out
old mode 100644
new mode 100755
diff --git a/llvm/test/tools/dsymutil/Inputs/private/tmp/dwarf5/dwarf5-addrx-0x0-last.out b/llvm/test/tools/dsymutil/Inputs/private/tmp/dwarf5/dwarf5-addrx-0x0-last.out
old mode 100644
new mode 100755
diff --git a/llvm/test/tools/dsymutil/Inputs/private/tmp/dwarf5/dwarf5-addrx.out b/llvm/test/tools/dsymutil/Inputs/private/tmp/dwarf5/dwarf5-addrx.out
old mode 100644
new mode 100755
diff --git a/llvm/test/tools/dsymutil/Inputs/private/tmp/firmware/test.out b/llvm/test/tools/dsymutil/Inputs/private/tmp/firmware/test.out
old mode 100644
new mode 100755
diff --git a/llvm/test/tools/dsymutil/Inputs/private/tmp/missing/foobar.out b/llvm/test/tools/dsymutil/Inputs/private/tmp/missing/foobar.out
old mode 100644
new mode 100755
diff --git a/llvm/test/tools/dsymutil/Inputs/private/tmp/warning/test.out b/llvm/test/tools/dsymutil/Inputs/private/tmp/warning/test.out
old mode 100644
new mode 100755
diff --git a/llvm/test/tools/llvm-dwarfutil/ELF/X86/Inputs/dwarf4-macro.out b/llvm/test/tools/llvm-dwarfutil/ELF/X86/Inputs/dwarf4-macro.out
old mode 100644
new mode 100755
diff --git a/llvm/test/tools/llvm-dwarfutil/ELF/X86/Inputs/dwarf5-macro.out b/llvm/test/tools/llvm-dwarfutil/ELF/X86/Inputs/dwarf5-macro.out
old mode 100644
new mode 100755
diff --git a/llvm/test/tools/llvm-dwarfutil/ELF/X86/Inputs/dwarf5.out b/llvm/test/tools/llvm-dwarfutil/ELF/X86/Inputs/dwarf5.out
old mode 100644
new mode 100755
diff --git a/llvm/test/tools/llvm-dwp/Inputs/search_dwos/main b/llvm/test/tools/llvm-dwp/Inputs/search_dwos/main
old mode 100644
new mode 100755
diff --git a/llvm/test/tools/llvm-libtool-darwin/Inputs/DependencyDump.py b/llvm/test/tools/llvm-libtool-darwin/Inputs/DependencyDump.py
old mode 100644
new mode 100755
diff --git a/llvm/test/tools/llvm-objdump/Inputs/embedded-source b/llvm/test/tools/llvm-objdump/Inputs/embedded-source
old mode 100644
new mode 100755
diff --git a/llvm/test/tools/llvm-objdump/MachO/AArch64/Inputs/rel-method-lists-arm64.dylib b/llvm/test/tools/llvm-objdump/MachO/AArch64/Inputs/rel-method-lists-arm64.dylib
old mode 100644
new mode 100755
diff --git a/llvm/test/tools/llvm-objdump/MachO/AArch64/Inputs/rel-method-lists-arm64_32.dylib b/llvm/test/tools/llvm-objdump/MachO/AArch64/Inputs/rel-method-lists-arm64_32.dylib
old mode 100644
new mode 100755
diff --git a/llvm/test/tools/llvm-objdump/MachO/Inputs/chained-fixups.macho-x86_64 b/llvm/test/tools/llvm-objdump/MachO/Inputs/chained-fixups.macho-x86_64
old mode 100644
new mode 100755
diff --git a/llvm/test/tools/llvm-profdata/Inputs/basic-histogram.memprofexe b/llvm/test/tools/llvm-profdata/Inputs/basic-histogram.memprofexe
old mode 100644
new mode 100755
diff --git a/llvm/test/tools/llvm-profdata/Inputs/basic_v3.memprofexe b/llvm/test/tools/llvm-profdata/Inputs/basic_v3.memprofexe
old mode 100644
new mode 100755
diff --git a/llvm/test/tools/llvm-profdata/Inputs/buildid.memprofexe b/llvm/test/tools/llvm-profdata/Inputs/buildid.memprofexe
old mode 100644
new mode 100755
diff --git a/llvm/test/tools/llvm-profdata/Inputs/padding-histogram.memprofexe b/llvm/test/tools/llvm-profdata/Inputs/padding-histogram.memprofexe
old mode 100644
new mode 100755
diff --git a/llvm/test/tools/llvm-profdata/Inputs/update_memprof_inputs.sh b/llvm/test/tools/llvm-profdata/Inputs/update_memprof_inputs.sh
old mode 100644
new mode 100755
diff --git a/llvm/test/tools/llvm-profgen/Inputs/cmov_3.perfbin b/llvm/test/tools/llvm-profgen/Inputs/cmov_3.perfbin
old mode 100644
new mode 100755
diff --git a/llvm/test/tools/llvm-profgen/Inputs/fs-discriminator-probe.perfbin b/llvm/test/tools/llvm-profgen/Inputs/fs-discriminator-probe.perfbin
old mode 100644
new mode 100755
diff --git a/llvm/test/tools/llvm-rc/Inputs/dialog-with-menu.rc b/llvm/test/tools/llvm-rc/Inputs/dialog-with-menu.rc
index c700b587af6483655c844ed387c08df0e92a3416..bb79dca399c219ac4d1adb87ed7593318d60438e 100644
--- a/llvm/test/tools/llvm-rc/Inputs/dialog-with-menu.rc
+++ b/llvm/test/tools/llvm-rc/Inputs/dialog-with-menu.rc
@@ -1,16 +1,16 @@
-101 DIALOG 0, 0, 362, 246
-STYLE 0x40l | 0x0004l | 0x0008l | 0x0800l | 0x00020000l |
-    0x00010000l | 0x80000000l | 0x10000000l | 0x02000000l | 0x00C00000l |
-    0x00080000l | 0x00040000l
-CAPTION "MakeNSISW"
-MENU 104
-FONT 8, "MS Shell Dlg"
-BEGIN
-    CONTROL "",202,"RichEdit20A",0x0004l | 0x0040l |
-                    0x0100l | 0x0800l | 0x00008000 |
-                    0x00010000l | 0x00800000l | 0x00200000l,7,22,348,190
-    CONTROL "",-1,"Static",0x00000010l,7,220,346,1
-    LTEXT "",200,7,230,200,12,0x08000000l
-    DEFPUSHBUTTON "Test &Installer",203,230,226,60,15,0x08000000l | 0x00010000l
-    PUSHBUTTON "&Close",2,296,226,49,15,0x00010000l
-END
+101 DIALOG 0, 0, 362, 246
+STYLE 0x40l | 0x0004l | 0x0008l | 0x0800l | 0x00020000l |
+    0x00010000l | 0x80000000l | 0x10000000l | 0x02000000l | 0x00C00000l |
+    0x00080000l | 0x00040000l
+CAPTION "MakeNSISW"
+MENU 104
+FONT 8, "MS Shell Dlg"
+BEGIN
+    CONTROL "",202,"RichEdit20A",0x0004l | 0x0040l |
+                    0x0100l | 0x0800l | 0x00008000 |
+                    0x00010000l | 0x00800000l | 0x00200000l,7,22,348,190
+    CONTROL "",-1,"Static",0x00000010l,7,220,346,1
+    LTEXT "",200,7,230,200,12,0x08000000l
+    DEFPUSHBUTTON "Test &Installer",203,230,226,60,15,0x08000000l | 0x00010000l
+    PUSHBUTTON "&Close",2,296,226,49,15,0x00010000l
+END
diff --git a/llvm/test/tools/llvm-reduce/Inputs/sleep-and-check-stores.py b/llvm/test/tools/llvm-reduce/Inputs/sleep-and-check-stores.py
old mode 100644
new mode 100755
diff --git a/llvm/test/tools/llvm-reduce/Inputs/test-crash-vars.py b/llvm/test/tools/llvm-reduce/Inputs/test-crash-vars.py
old mode 100644
new mode 100755
diff --git a/llvm/test/tools/llvm-split/scc-const-alias.ll b/llvm/test/tools/llvm-split/scc-const-alias.ll
old mode 100755
new mode 100644
diff --git a/llvm/test/tools/llvm-symbolizer/Inputs/split-dwarf-dwp-invalid b/llvm/test/tools/llvm-symbolizer/Inputs/split-dwarf-dwp-invalid
old mode 100644
new mode 100755
diff --git a/llvm/test/tools/llvm-symbolizer/Inputs/symbols.so b/llvm/test/tools/llvm-symbolizer/Inputs/symbols.so
old mode 100644
new mode 100755
diff --git a/llvm/unittests/DebugInfo/CodeView/GUIDFormatTest.cpp b/llvm/unittests/DebugInfo/CodeView/GUIDFormatTest.cpp
old mode 100755
new mode 100644
diff --git a/llvm/unittests/Target/AMDGPU/ExecMayBeModifiedBeforeAnyUse.cpp b/llvm/unittests/Target/AMDGPU/ExecMayBeModifiedBeforeAnyUse.cpp
old mode 100755
new mode 100644
diff --git a/llvm/utils/filecheck_lint/filecheck_lint.py b/llvm/utils/filecheck_lint/filecheck_lint.py
old mode 100644
new mode 100755
diff --git a/llvm/utils/git/code-format-helper.py b/llvm/utils/git/code-format-helper.py
old mode 100644
new mode 100755
diff --git a/llvm/utils/lit/tests/discovery.py b/llvm/utils/lit/tests/discovery.py
old mode 100644
new mode 100755
diff --git a/llvm/utils/relative_lines.py b/llvm/utils/relative_lines.py
old mode 100644
new mode 100755
diff --git a/llvm/utils/release/bump-version.py b/llvm/utils/release/bump-version.py
old mode 100644
new mode 100755
diff --git a/llvm/utils/release/get-llvm-version.sh b/llvm/utils/release/get-llvm-version.sh
old mode 100644
new mode 100755
diff --git a/llvm/utils/update_any_test_checks.py b/llvm/utils/update_any_test_checks.py
old mode 100644
new mode 100755
diff --git a/llvm/utils/update_test_body.py b/llvm/utils/update_test_body.py
old mode 100644
new mode 100755
diff --git a/mlir/docs/Dialects/OpenACCDialect.md b/mlir/docs/Dialects/OpenACCDialect.md
old mode 100644
new mode 100755
diff --git a/mlir/test/Dialect/SparseTensor/binary_valued.mlir b/mlir/test/Dialect/SparseTensor/binary_valued.mlir
old mode 100644
new mode 100755
diff --git a/mlir/test/Dialect/SparseTensor/minipipeline_vector.mlir b/mlir/test/Dialect/SparseTensor/minipipeline_vector.mlir
old mode 100644
new mode 100755
diff --git a/mlir/test/Dialect/SparseTensor/sparse_matmul_one.mlir b/mlir/test/Dialect/SparseTensor/sparse_matmul_one.mlir
old mode 100644
new mode 100755
diff --git a/mlir/test/Dialect/SparseTensor/spy_sddmm_bsr.mlir b/mlir/test/Dialect/SparseTensor/spy_sddmm_bsr.mlir
old mode 100644
new mode 100755
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/block_majors.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/block_majors.mlir
old mode 100644
new mode 100755
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_block3d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_block3d.mlir
old mode 100644
new mode 100755
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_55.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_55.mlir
old mode 100644
new mode 100755
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_empty.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_empty.mlir
old mode 100644
new mode 100755
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_d.mlir
old mode 100644
new mode 100755
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_print.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_print.mlir
old mode 100644
new mode 100755
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_print_3d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_print_3d.mlir
old mode 100644
new mode 100755
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reshape.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reshape.mlir
old mode 100755
new mode 100644
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir
old mode 100755
new mode 100644
diff --git a/mlir/test/Integration/data/block.mtx b/mlir/test/Integration/data/block.mtx
old mode 100644
new mode 100755
diff --git a/mlir/test/Integration/data/ds.mtx b/mlir/test/Integration/data/ds.mtx
old mode 100644
new mode 100755
diff --git a/mlir/utils/spirv/refresh_inst.sh b/mlir/utils/spirv/refresh_inst.sh
old mode 100644
new mode 100755
diff --git a/offload/test/mapping/target_uses_allocator.c b/offload/test/mapping/target_uses_allocator.c
old mode 100644
new mode 100755
diff --git a/offload/utils/generate_microtask_cases.py b/offload/utils/generate_microtask_cases.py
old mode 100644
new mode 100755
diff --git a/openmp/runtime/src/include/omp_lib.f90.var b/openmp/runtime/src/include/omp_lib.F90.var
similarity index 100%
rename from openmp/runtime/src/include/omp_lib.f90.var
rename to openmp/runtime/src/include/omp_lib.F90.var