diff --git a/clang/pylib/__init__.py b/clang/pylib/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..68130d5941d9bc0758f325a132ac0585432e5a66 --- /dev/null +++ b/clang/pylib/__init__.py @@ -0,0 +1,3 @@ +# Copyright 2016 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. diff --git a/clang/pylib/clang/PRESUBMIT.py b/clang/pylib/clang/PRESUBMIT.py new file mode 100644 index 0000000000000000000000000000000000000000..34dbacfdc595cd39936a88f7864a7444238635dd --- /dev/null +++ b/clang/pylib/clang/PRESUBMIT.py @@ -0,0 +1,14 @@ +# Copyright 2019 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + + +def CheckChangeOnCommit(input_api, output_api): + results = [] + + # Run the unit tests. + results.extend( + input_api.canned_checks.RunUnitTestsInDirectory(input_api, output_api, + '.', [r'^.+_test\.py$'])) + + return results diff --git a/clang/pylib/clang/__init__.py b/clang/pylib/clang/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..68130d5941d9bc0758f325a132ac0585432e5a66 --- /dev/null +++ b/clang/pylib/clang/__init__.py @@ -0,0 +1,3 @@ +# Copyright 2016 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. diff --git a/clang/pylib/clang/compile_db.py b/clang/pylib/clang/compile_db.py new file mode 100755 index 0000000000000000000000000000000000000000..b6d054128a37af75936c570d16d2adaa06b061a8 --- /dev/null +++ b/clang/pylib/clang/compile_db.py @@ -0,0 +1,225 @@ +#!/usr/bin/env python3 +# Copyright 2016 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import json +import os +import re +import shutil +import subprocess +import sys + +_RSP_RE = re.compile(r' (@(.+?\.rsp)) ') +_CLANG_WRAPPER_CMD_LINE_RE = re.compile( + r''' + ( + (?P.*rewrapper(\.exe)?"?\s+) + # rewrapper may have args between it and clang. + # Assume the args do not contain spaces. + (?P\-\S+\s+)* + )? + # Assume the path to clang does not contain spaces. + (?P\S*clang\S*) + \s+ + (?P.*) + ''', re.VERBOSE) +_debugging = False + + +def _IsTargettingWindows(target_os): + if target_os is not None: + # Available choices are based on: gn help target_os + assert target_os in [ + 'android', + 'chromeos', + 'fuchsia', + 'ios', + 'linux', + 'mac', + 'nacl', + 'win', + ] + return target_os == 'win' + return sys.platform == 'win32' + + +def _FilterFlags(command, additional_filtered_flags): + # Dictionary from flags to filter, to the number of additional arguments each + # flag consumes (so we can remove any flag parameters). + flags_to_filter = { + # These are Visual Studio-specific arguments not recognized or used by + # some third-party clang tooling. They only suppress or activate graphical + # output anyway. + '/nologo': 0, + '/showIncludes': 0, + # Drop frontend-only arguments, which generally aren't needed by clang + # tooling. + '-Xclang': 1, + # This is used for profiling-guided optimizations. Not necessary by tools, + # and clangd complains it cannot find the referenced profile file. + '-fprofile-sample-use': 1, + # This flag is only usable with -fprofile-sample-use excluded above. + # Exclude it to avoid having an unused-command-line-argument error. + '-fsample-profile-use-profi': 1, + } + # Add user-added flags. We only support flags with no parameters here. + if additional_filtered_flags: + flags_to_filter.update((flag, 0) for flag in additional_filtered_flags) + + filtered_command_parts = [] + parts_to_consume = 0 + for command_part in command.split(): + # Consume flag parameters. + if parts_to_consume > 0: + parts_to_consume -= 1 + continue + # Handle -flag=parameter syntax. We only support a single parameter here. + split_flag = command_part.split("=", 1) + if len(split_flag) == 2 and split_flag[0] in flags_to_filter: + expected_params = flags_to_filter[split_flag[0]] + if expected_params == 1: + continue + elif _debugging: + print("Expecting %s to have %d parameters, but got 1" % + (split_flag[0], expected_params)) + print("The flag will be kept in the command!") + # Handle regular parameters. + if command_part in flags_to_filter: + parts_to_consume = flags_to_filter[command_part] + continue + # This command part is not in the filter list, nor should be consumed as a + # flag parameter. + filtered_command_parts.append(command_part) + + return ' '.join(filtered_command_parts) + + +def _ProcessCommand(command, filtered_args, target_os): + # If the driver mode is not already set then define it. Driver mode is + # automatically included in the compile db by clang starting with release + # 9.0.0. + driver_mode = '' + # Only specify for Windows. Other platforms do fine without it. + if _IsTargettingWindows(target_os) and '--driver-mode' not in command: + driver_mode = '--driver-mode=cl' + + # Removes rewrapper(.exe). On Windows inserts --driver-mode=cl as the + # first arg. + # + # Deliberately avoid shlex.split() here, because it doesn't work predictably + # for Windows commands (specifically, it doesn't parse args the same way that + # Clang does on Windows). + # + # Instead, use a regex, with the simplifying assumption that the path to + # clang-cl.exe contains no spaces. + match = _CLANG_WRAPPER_CMD_LINE_RE.fullmatch(command) + if match: + match_dict = match.groupdict() + command = ' '.join([match_dict['clang'], driver_mode, match_dict['args']]) + elif _debugging: + print('Compile command didn\'t match expected regex!') + print('Command:', command) + print('Regex:', _CLANG_WRAPPER_CMD_LINE_RE.pattern) + + return _FilterFlags(command, filtered_args) + + +def _ProcessEntry(entry, filtered_args, target_os): + """Transforms one entry in a compile db to be more clang-tool friendly. + + Expands the contents of the response file, if any, and performs any + transformations needed to make the compile DB easier to use for third-party + tooling. + """ + # Expand the contents of the response file, if any. + # http://llvm.org/bugs/show_bug.cgi?id=21634 + try: + match = _RSP_RE.search(entry['command']) + if match: + rsp_path = os.path.join(entry['directory'], match.group(2)) + rsp_contents = open(rsp_path).read() + entry['command'] = ''.join([ + entry['command'][:match.start(1)], rsp_contents, + entry['command'][match.end(1):] + ]) + except IOError: + if _debugging: + print('Couldn\'t read response file for %s' % entry['file']) + + entry['command'] = _ProcessCommand(entry['command'], filtered_args, target_os) + + return entry + + +def ProcessCompileDatabase(compile_db, filtered_args, target_os=None): + """Make the compile db generated by ninja more clang-tool friendly. + + Args: + compile_db: The compile database parsed as a Python dictionary. + + Returns: + A postprocessed compile db that clang tooling can use. + """ + compile_db = [_ProcessEntry(e, filtered_args, target_os) for e in compile_db] + + if not _IsTargettingWindows(target_os): + return compile_db + + if _debugging: + print('Read in %d entries from the compile db' % len(compile_db)) + original_length = len(compile_db) + + # Filter out NaCl stuff. The clang tooling chokes on them. + # TODO(dcheng): This doesn't appear to do anything anymore, remove? + compile_db = [ + e for e in compile_db if '_nacl.cc.pdb' not in e['command'] + and '_nacl_win64.cc.pdb' not in e['command'] + ] + if _debugging: + print('Filtered out %d entries...' % (original_length - len(compile_db))) + + # TODO(dcheng): Also filter out multiple commands for the same file. Not sure + # how that happens, but apparently it's an issue on Windows. + return compile_db + + +def GetNinjaPath(): + ninja_executable = 'ninja.exe' if sys.platform == 'win32' else 'ninja' + return os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', '..', + '..', '..', 'third_party', 'ninja', ninja_executable) + + +# FIXME: This really should be a build target, rather than generated at runtime. +def GenerateWithNinja(path, targets=None): + """Generates a compile database using ninja. + + Args: + path: The build directory to generate a compile database for. + targets: Additional targets to pass to ninja. + + Returns: + List of the contents of the compile database. + """ + # TODO(dcheng): Ensure that clang is enabled somehow. + + # First, generate the compile database. + ninja_path = GetNinjaPath() + if not os.path.exists(ninja_path): + ninja_path = shutil.which('ninja') + if targets is None: + targets = [] + json_compile_db = subprocess.check_output( + [ninja_path, '-C', path] + targets + + ['-t', 'compdb', 'cc', 'cxx', 'objc', 'objcxx']) + return json.loads(json_compile_db) + + +def Read(path): + """Reads a compile database into memory. + + Args: + path: Directory that contains the compile database. + """ + with open(os.path.join(path, 'compile_commands.json'), 'rb') as db: + return json.load(db) diff --git a/clang/pylib/clang/compile_db_test.py b/clang/pylib/clang/compile_db_test.py new file mode 100755 index 0000000000000000000000000000000000000000..6858e0994840c9803ebeebce437486a215faf6a9 --- /dev/null +++ b/clang/pylib/clang/compile_db_test.py @@ -0,0 +1,150 @@ +#!/usr/bin/env vpython3 +# Copyright 2019 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + + +"""Tests for compile_db.""" + +import sys +import unittest + +import compile_db + + +# Input compile DB. +_TEST_COMPILE_DB = [ + # Verifies that rewrapper.exe is removed. + { + 'command': r'C:\rewrapper.exe C:\clang-cl.exe /blah', + }, + # Verifies a rewrapper path containing a space. + { + 'command': r'"C:\Program Files\rewrapper.exe" C:\clang-cl.exe /blah', + }, + # Includes a string define. + { + 'command': r'clang-cl.exe /blah "-DCR_CLANG_REVISION=\"346388-1\""', + }, + # Includes a string define with a space in it. + { + 'command': r'clang-cl.exe /blah -D"MY_DEFINE=\"MY VALUE\""', + }, +] + +# Expected compile DB after processing for windows. +_EXPECTED_COMPILE_DB = [ + { + 'command': r'C:\clang-cl.exe --driver-mode=cl /blah', + }, + { + 'command': r'C:\clang-cl.exe --driver-mode=cl /blah', + }, + { + 'command': r'clang-cl.exe --driver-mode=cl /blah ' + r'"-DCR_CLANG_REVISION=\"346388-1\""', + }, + { + 'command': r'clang-cl.exe --driver-mode=cl /blah ' + r'-D"MY_DEFINE=\"MY VALUE\""', + }, +] + + +class CompileDbTest(unittest.TestCase): + + def setUp(self): + self.maxDiff = None + + def testProcessNotOnWindows(self): + sys.platform = 'linux2' + processed_compile_db = compile_db.ProcessCompileDatabase( + _TEST_COMPILE_DB, []) + + # Assert no changes were made. + try: + # assertItemsEqual is renamed assertCountEqual in Python3. + self.assertCountEqual(processed_compile_db, _TEST_COMPILE_DB) + except AttributeError: + self.assertItemsEqual(processed_compile_db, _TEST_COMPILE_DB) + + def testProcessForWindows_HostPlatformBased(self): + sys.platform = 'win32' + processed_compile_db = compile_db.ProcessCompileDatabase( + _TEST_COMPILE_DB, []) + + # Check each entry individually to improve readability of the output. + for actual, expected in zip(processed_compile_db, _EXPECTED_COMPILE_DB): + self.assertDictEqual(actual, expected) + + def testProcessForWindows_TargetOsBased(self): + sys.platform = 'linux2' + processed_compile_db = compile_db.ProcessCompileDatabase(_TEST_COMPILE_DB, + [], + target_os='win') + + # Check each entry individually to improve readability of the output. + for actual, expected in zip(processed_compile_db, _EXPECTED_COMPILE_DB): + self.assertDictEqual(actual, expected) + + def testFrontendArgsFiltered(self): + sys.platform = 'linux2' + input_db = [{ + 'command': + r'clang -g -Xclang -fuse-ctor-homing -funroll-loops test.cc' + }] + self.assertEquals(compile_db.ProcessCompileDatabase(input_db, []), + [{ + 'command': r'clang -g -funroll-loops test.cc' + }]) + + def testProfileSampleUseFiltered(self): + sys.platform = 'linux2' + input_db = [{ + 'command': + r'clang -g -fprofile-sample-use=../path/to.prof -funroll-loops test.cc' + }] + self.assertEquals(compile_db.ProcessCompileDatabase(input_db, []), + [{ + 'command': r'clang -g -funroll-loops test.cc' + }]) + + def testFilterArgs(self): + sys.platform = 'linux2' + input_db = [{'command': r'clang -g -ffile-compilation-dir=. -O3 test.cc'}] + self.assertEquals( + compile_db.ProcessCompileDatabase( + input_db, + ['-ffile-compilation-dir=.', '-frandom-flag-that-does-not-exist']), + [{ + 'command': r'clang -g -O3 test.cc' + }]) + + def testRewrapperRemoved(self): + sys.platform = 'linux2' + input_db = [{ + 'command': + r'./buildtools/reclient/rewrapper ./bin/clang++ -O3 test.cc', + }] + self.assertEquals(compile_db.ProcessCompileDatabase(input_db, []), + [{ + 'command': r'./bin/clang++ -O3 test.cc' + }]) + + def testRewrapperArgsRemoved(self): + sys.platform = 'linux2' + input_db = [{ + 'command': + r'./buildtools/reclient/rewrapper' + r' -cfg=./buildtools/reclient_cfgs/.../rewrapper_linux.cfg' + r' -exec_root=/chromium/src/' + r' ./bin/clang++ -O3 test.cc', + }] + self.assertEquals(compile_db.ProcessCompileDatabase(input_db, []), + [{ + 'command': r'./bin/clang++ -O3 test.cc' + }]) + + +if __name__ == '__main__': + unittest.main() diff --git a/clang/pylib/clang/plugin_testing.py b/clang/pylib/clang/plugin_testing.py new file mode 100755 index 0000000000000000000000000000000000000000..18b6e9b891bea8d6519da0fc8770ab09b44be174 --- /dev/null +++ b/clang/pylib/clang/plugin_testing.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python +# Copyright 2015 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +from __future__ import print_function + +import glob +import os +import re +import subprocess +import sys + + +class ClangPluginTest(object): + """Test harness for clang plugins.""" + + def __init__(self, + test_base, + clang_path, + plugin_names, + reset_results, + filename_regex=None): + """Constructor. + + Args: + test_base: Path to the directory containing the tests. + clang_path: Path to the clang binary. + plugin_names: Names of the plugins. + reset_results: If true, resets expected results to the actual test output. + filename_regex: If present, only runs tests that match the regex pattern. + """ + self._test_base = test_base + self._clang_path = clang_path + self._plugin_names = plugin_names + self._reset_results = reset_results + self._filename_regex = filename_regex + + def AdjustClangArguments(self, clang_cmd): + """Tests can override this to customize the command line for clang.""" + pass + + def Run(self): + """Runs the tests. + + The working directory is temporarily changed to self._test_base while + running the tests. + + Returns: the number of failing tests. + """ + print('Using clang %s...' % self._clang_path) + + os.chdir(self._test_base) + + clang_cmd = [self._clang_path, '-std=c++20'] + + # Use the traditional diagnostics format (see crbug.com/1450229). + clang_cmd.extend([ + '-fno-diagnostics-show-line-numbers', '-fcaret-diagnostics-max-lines=1' + ]) + + for p in self._plugin_names: + clang_cmd.extend(['-Xclang', '-add-plugin', '-Xclang', p]) + self.AdjustClangArguments(clang_cmd) + + if not any('-fsyntax-only' in arg for arg in clang_cmd): + clang_cmd.append('-c') + + passing = [] + failing = [] + tests = glob.glob('*.cpp') + glob.glob('*.mm') + for test in tests: + if self._filename_regex and not re.search(self._filename_regex, test): + continue + + sys.stdout.write('Testing %s... ' % test) + test_name, _ = os.path.splitext(test) + + cmd = clang_cmd[:] + try: + # Some tests need to run with extra flags. + cmd.extend(open('%s.flags' % test_name).read().split()) + except IOError: + pass + cmd.append(test) + + print("cmd", cmd) + failure_message = self.RunOneTest(test_name, cmd) + if failure_message: + print('failed: %s' % failure_message) + failing.append(test_name) + else: + print('passed!') + passing.append(test_name) + + print('Ran %d tests: %d succeeded, %d failed' % ( + len(passing) + len(failing), len(passing), len(failing))) + for test in failing: + print(' %s' % test) + return len(failing) + + def RunOneTest(self, test_name, cmd): + try: + actual = subprocess.check_output(cmd, + stderr=subprocess.STDOUT, + universal_newlines=True) + except subprocess.CalledProcessError as e: + # Some plugin tests intentionally trigger compile errors, so just ignore + # an exit code that indicates failure. + actual = e.output + except Exception as e: + return 'could not execute %s (%s)' % (cmd, e) + + return self.ProcessOneResult(test_name, actual) + + def ProcessOneResult(self, test_name, actual): + """Tests can override this for custom result processing.""" + # On Windows, clang emits CRLF as the end of line marker. Normalize it to LF + # to match posix systems. + actual = actual.replace('\r\n', '\n') + + result_file = '%s.txt%s' % (test_name, '' if self._reset_results else + '.actual') + try: + expected = open('%s.txt' % test_name).read() + except IOError: + open(result_file, 'w').write(actual) + return 'no expected file found' + + # Normalize backslashes to forward-slashes to avoid failure on Windows + actual = actual.replace('\\', '/') + expected = expected.replace('\\', '/') + + if expected != actual: + open(result_file, 'w').write(actual) + error = 'expected and actual differed\n' + error += 'Actual:\n' + actual + error += 'Expected:\n' + expected + return error diff --git a/clang/scripts/OWNERS b/clang/scripts/OWNERS new file mode 100644 index 0000000000000000000000000000000000000000..c3a87c5b37d6ea14de869a7bbbe730d5b5d18b57 --- /dev/null +++ b/clang/scripts/OWNERS @@ -0,0 +1,8 @@ +# This is the list of Chromium committers responsible for updating clang. +aeubanks@google.com +akhuang@google.com +ayzhao@google.com +hans@chromium.org +rnk@chromium.org +thakis@chromium.org +zequanwu@google.com diff --git a/clang/scripts/README.md b/clang/scripts/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e977c5ef026eef11720886e4f831b4af2d414eac --- /dev/null +++ b/clang/scripts/README.md @@ -0,0 +1 @@ +See https://chromium.googlesource.com/chromium/src/+/main/docs/clang.md diff --git a/clang/scripts/analyze_includes.py b/clang/scripts/analyze_includes.py new file mode 100755 index 0000000000000000000000000000000000000000..a2affe92d8db23812850e337b24f236e40a1fcc6 --- /dev/null +++ b/clang/scripts/analyze_includes.py @@ -0,0 +1,498 @@ +#!/usr/bin/env python3 +# Copyright 2021 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +"""This script is used to analyze #include graphs. + +It produces the .js file that accompanies include-analysis.html. + +Usage: + +$ gn gen --args="show_includes=true symbol_level=0 enable_precompiled_headers=false" out/Debug +$ autoninja -C out/Debug -v chrome | tee /tmp/build_log +$ analyze_includes.py --target=chrome --revision=$(git rev-parse --short HEAD) \ + --json-out=/tmp/include-analysis.js /tmp/build_log + +(If you have reclient access, add use_reclient=true to the gn args, but not on +Windows due to crbug.com/1223741#c9) + +The script takes roughly half an hour on a fast machine for the chrome build +target, which is considered fast enough for batch job purposes for now. + +If --json-out is not provided, the script exits after printing some statistics +to stdout. This is significantly faster than generating the full JSON data. For +example: + +$ autoninja -C out/Debug -v chrome | analyze_includes.py - 2>/dev/null +build_size 270237664463 +""" + +import argparse +import json +import os +import pathlib +import re +import sys +import unittest +from collections import defaultdict +from datetime import datetime + + +def parse_build(build_log, root_filter=None): + """Parse the build_log (generated as in the Usage note above) to capture the + include graph. Returns a (roots, includes) pair, where roots is a list of root + nodes in the graph (the source files) and includes is a dict from filename to + list of filenames included by that filename.""" + build_dir = '.' + file_stack = [] + includes = {} + roots = set() + + # Note: A file might include different files for different compiler + # invocations depending on -D flags. For such cases, includes[file] will be + # the union of those includes. + + # Normalize paths. + normalized = {} + + def norm(fn): + if fn not in normalized: + x = fn.replace('\\\\', '\\') + # Use Path.resolve() rather than path.realpath() to get the canonical + # upper/lower-case version of the path on Windows. + p = pathlib.Path(os.path.join(build_dir, x)).resolve() + x = os.path.relpath(p) + x = x.replace(os.path.sep, '/') + normalized[fn] = x + return normalized[fn] + + # ninja: Entering directory `out/foo' + ENTER_DIR_RE = re.compile(r'ninja: Entering directory `(.*?)\'$') + # [M/N] clang... -c foo.cc -o foo.o ... + # [M/N] .../clang... -c foo.cc -o foo.o ... + # [M/N] clang-cl.exe /c foo.cc /Fofoo.o ... + # [M/N] ...\clang-cl.exe /c foo.cc /Fofoo.o ... + COMPILE_RE = re.compile(r'\[\d+/\d+\] (.*[/\\])?clang.* [/-]c (\S*)') + # . a.h + # .. b.h + # . c.h + INCLUDE_RE = re.compile(r'(\.+) (.*)$') + + skipping_root = False + + for line in build_log: + m = INCLUDE_RE.match(line) + if m: + if skipping_root: + continue + prev_depth = len(file_stack) - 1 + depth = len(m.group(1)) + filename = norm(m.group(2)) + includes.setdefault(filename, set()) + + if depth > prev_depth: + if sys.platform != 'win32': + # TODO(crbug.com/40187759): Always assert. + assert depth == prev_depth + 1 + elif depth > prev_depth + 1: + # Until the bug is fixed, skip these includes. + print('missing include under', file_stack[0]) + continue + else: + for _ in range(prev_depth - depth + 1): + file_stack.pop() + + includes[file_stack[-1]].add(filename) + file_stack.append(filename) + continue + + m = COMPILE_RE.match(line) + if m: + skipping_root = False + filename = norm(m.group(2)) + if root_filter and not root_filter.match(filename): + skipping_root = True + continue + roots.add(filename) + file_stack = [filename] + includes.setdefault(filename, set()) + continue + + m = ENTER_DIR_RE.match(line) + if m: + build_dir = m.group(1) + continue + + return roots, includes + + +class TestParseBuild(unittest.TestCase): + def test_basic(self): + x = [ + 'ninja: Entering directory `out/foo\'', + '[1/3] clang -c ../../a.cc -o a.o', + '. ../../a.h', + '[2/3] clang -c gen/c.c -o a.o', + ] + (roots, includes) = parse_build(x) + self.assertEqual(roots, set(['a.cc', 'out/foo/gen/c.c'])) + self.assertEqual(set(includes.keys()), + set(['a.cc', 'a.h', 'out/foo/gen/c.c'])) + self.assertEqual(includes['a.cc'], set(['a.h'])) + self.assertEqual(includes['a.h'], set()) + self.assertEqual(includes['out/foo/gen/c.c'], set()) + + def test_more(self): + x = [ + 'ninja: Entering directory `out/foo\'', + '[20/99] clang -c ../../a.cc -o a.o', + '. ../../a.h', + '. ../../b.h', + '.. ../../c.h', + '... ../../d.h', + '. ../../e.h', + ] + (roots, includes) = parse_build(x) + self.assertEqual(roots, set(['a.cc'])) + self.assertEqual(includes['a.cc'], set(['a.h', 'b.h', 'e.h'])) + self.assertEqual(includes['b.h'], set(['c.h'])) + self.assertEqual(includes['c.h'], set(['d.h'])) + self.assertEqual(includes['d.h'], set()) + self.assertEqual(includes['e.h'], set()) + + def test_multiple(self): + x = [ + 'ninja: Entering directory `out/foo\'', + '[123/234] clang -c ../../a.cc -o a.o', + '. ../../a.h', + '[124/234] clang -c ../../b.cc -o b.o', + '. ../../b.h', + ] + (roots, includes) = parse_build(x) + self.assertEqual(roots, set(['a.cc', 'b.cc'])) + self.assertEqual(includes['a.cc'], set(['a.h'])) + self.assertEqual(includes['b.cc'], set(['b.h'])) + + def test_root_filter(self): + x = [ + 'ninja: Entering directory `out/foo\'', + '[9/100] clang -c ../../a.cc -o a.o', + '. ../../a.h', + '[10/100] clang -c ../../b.cc -o b.o', + '. ../../b.h', + ] + (roots, includes) = parse_build(x, re.compile(r'^a.cc$')) + self.assertEqual(roots, set(['a.cc'])) + self.assertEqual(set(includes.keys()), set(['a.cc', 'a.h'])) + self.assertEqual(includes['a.cc'], set(['a.h'])) + + def test_windows(self): + x = [ + 'ninja: Entering directory `out/foo\'', + '[1/3] path\\clang-cl.exe /c ../../a.cc /Foa.o', + '. ../../a.h', + '[2/3] clang-cl.exe /c gen/c.c /Foa.o', + ] + (roots, includes) = parse_build(x) + self.assertEqual(roots, set(['a.cc', 'out/foo/gen/c.c'])) + self.assertEqual(set(includes.keys()), + set(['a.cc', 'a.h', 'out/foo/gen/c.c'])) + self.assertEqual(includes['a.cc'], set(['a.h'])) + self.assertEqual(includes['a.h'], set()) + self.assertEqual(includes['out/foo/gen/c.c'], set()) + + +def post_order_nodes(root, child_nodes): + """Generate the nodes reachable from root (including root itself) in + post-order traversal order. child_nodes maps each node to its children.""" + visited = set() + + def walk(n): + if n in visited: + return + visited.add(n) + + for c in child_nodes[n]: + for x in walk(c): + yield x + yield n + + return walk(root) + + +def compute_doms(root, includes): + """Compute the dominators for all nodes reachable from root. Node A dominates + node B if all paths from the root to B go through A. Returns a dict from + filename to the set of dominators of that filename (including itself). + + The implementation follows the "simple" version of Lengauer & Tarjan "A Fast + Algorithm for Finding Dominators in a Flowgraph" (TOPLAS 1979). + """ + + parent = {} + ancestor = {} + vertex = [] + label = {} + semi = {} + pred = defaultdict(list) + bucket = defaultdict(list) + dom = {} + + def dfs(v): + semi[v] = len(vertex) + vertex.append(v) + label[v] = v + + for w in includes[v]: + if w not in semi: + parent[w] = v + dfs(w) + pred[w].append(v) + + def compress(v): + if ancestor[v] in ancestor: + compress(ancestor[v]) + if semi[label[ancestor[v]]] < semi[label[v]]: + label[v] = label[ancestor[v]] + ancestor[v] = ancestor[ancestor[v]] + + def evaluate(v): + if v not in ancestor: + return v + compress(v) + return label[v] + + def link(v, w): + ancestor[w] = v + + # Step 1: Initialization. + dfs(root) + + for w in reversed(vertex[1:]): + # Step 2: Compute semidominators. + for v in pred[w]: + u = evaluate(v) + if semi[u] < semi[w]: + semi[w] = semi[u] + + bucket[vertex[semi[w]]].append(w) + link(parent[w], w) + + # Step 3: Implicitly define the immediate dominator for each node. + for v in bucket[parent[w]]: + u = evaluate(v) + dom[v] = u if semi[u] < semi[v] else parent[w] + bucket[parent[w]] = [] + + # Step 4: Explicitly define the immediate dominator for each node. + for w in vertex[1:]: + if dom[w] != vertex[semi[w]]: + dom[w] = dom[dom[w]] + + # Get the full dominator set for each node. + all_doms = {} + all_doms[root] = {root} + + def dom_set(node): + if node not in all_doms: + # node's dominators is itself and the dominators of its immediate + # dominator. + all_doms[node] = {node} + all_doms[node].update(dom_set(dom[node])) + + return all_doms[node] + + return {n: dom_set(n) for n in vertex} + + +class TestComputeDoms(unittest.TestCase): + def test_basic(self): + includes = {} + includes[1] = [2] + includes[2] = [1] + includes[3] = [2] + includes[4] = [1] + includes[5] = [4, 3] + root = 5 + + doms = compute_doms(root, includes) + + self.assertEqual(doms[1], set([5, 1])) + self.assertEqual(doms[2], set([5, 2])) + self.assertEqual(doms[3], set([5, 3])) + self.assertEqual(doms[4], set([5, 4])) + self.assertEqual(doms[5], set([5])) + + def test_larger(self): + # Fig. 1 in the Lengauer-Tarjan paper. + includes = {} + includes['a'] = ['d'] + includes['b'] = ['a', 'd', 'e'] + includes['c'] = ['f', 'g'] + includes['d'] = ['l'] + includes['e'] = ['h'] + includes['f'] = ['i'] + includes['g'] = ['i', 'j'] + includes['h'] = ['k', 'e'] + includes['i'] = ['k'] + includes['j'] = ['i'] + includes['k'] = ['i', 'r'] + includes['l'] = ['h'] + includes['r'] = ['a', 'b', 'c'] + root = 'r' + + doms = compute_doms(root, includes) + + # Fig. 2 in the Lengauer-Tarjan paper. + self.assertEqual(doms['a'], set(['a', 'r'])) + self.assertEqual(doms['b'], set(['b', 'r'])) + self.assertEqual(doms['c'], set(['c', 'r'])) + self.assertEqual(doms['d'], set(['d', 'r'])) + self.assertEqual(doms['e'], set(['e', 'r'])) + self.assertEqual(doms['f'], set(['f', 'c', 'r'])) + self.assertEqual(doms['g'], set(['g', 'c', 'r'])) + self.assertEqual(doms['h'], set(['h', 'r'])) + self.assertEqual(doms['i'], set(['i', 'r'])) + self.assertEqual(doms['j'], set(['j', 'g', 'c', 'r'])) + self.assertEqual(doms['k'], set(['k', 'r'])) + self.assertEqual(doms['l'], set(['l', 'd', 'r'])) + self.assertEqual(doms['r'], set(['r'])) + + +def trans_size(root, includes, sizes): + """Compute the transitive size of a file, i.e. the size of the file itself and + all its transitive includes.""" + return sum([sizes[n] for n in post_order_nodes(root, includes)]) + + +def log(*args, **kwargs): + """Log output to stderr.""" + print(*args, file=sys.stderr, **kwargs) + + +def analyze(target, revision, build_log_file, json_file, root_filter): + log('Parsing build log...') + (roots, includes) = parse_build(build_log_file, root_filter) + + log('Getting file sizes...') + sizes = {name: os.path.getsize(name) for name in includes} + + log('Computing transitive sizes...') + trans_sizes = {n: trans_size(n, includes, sizes) for n in includes} + + build_size = sum([trans_sizes[n] for n in roots]) + + print('build_size', build_size) + + if json_file is None: + log('--json-out not set; exiting.') + return 0 + + log('Counting prevalence...') + prevalence = {name: 0 for name in includes} + for r in roots: + for n in post_order_nodes(r, includes): + prevalence[n] += 1 + + # Map from file to files that include it. + log('Building reverse include map...') + included_by = {k: set() for k in includes} + for k in includes: + for i in includes[k]: + included_by[i].add(k) + + log('Computing added sizes...') + + # Split each src -> dst edge in includes into src -> (src,dst) -> dst, so that + # we can compute how much each include graph edge adds to the size by doing + # dominance analysis on the (src,dst) nodes. + augmented_includes = {} + for src in includes: + augmented_includes[src] = set() + for dst in includes[src]: + augmented_includes[src].add((src, dst)) + augmented_includes[(src, dst)] = {dst} + + added_sizes = {node: 0 for node in augmented_includes} + for r in roots: + doms = compute_doms(r, augmented_includes) + for node in doms: + if node not in sizes: + # Skip the (src,dst) pseudo nodes. + continue + for dom in doms[node]: + added_sizes[dom] += sizes[node] + + # Assign a number to each filename for tighter JSON representation. + names = [] + name2nr = {} + for n in sorted(includes.keys()): + name2nr[n] = len(names) + names.append(n) + + def nr(name): + return name2nr[name] + + log('Writing output...') + + # Provide a JS object for convenient inclusion in the HTML file. + # If someone really wants a proper JSON file, maybe we can reconsider this. + json_file.write('data = ') + + json.dump( + { + 'target': target, + 'revision': revision, + 'date': datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S UTC'), + 'files': names, + 'roots': [nr(x) for x in sorted(roots)], + 'includes': [[nr(x) for x in sorted(includes[n])] for n in names], + 'included_by': [[nr(x) for x in included_by[n]] for n in names], + 'sizes': [sizes[n] for n in names], + 'tsizes': [trans_sizes[n] for n in names], + 'asizes': [added_sizes[n] for n in names], + 'esizes': [[added_sizes[(s, d)] for d in sorted(includes[s])] + for s in names], + 'prevalence': [prevalence[n] for n in names], + }, json_file) + + log('All done!') + + +def main(): + result = unittest.main(argv=sys.argv[:1], exit=False, verbosity=2).result + if len(result.failures) > 0 or len(result.errors) > 0: + return 1 + + parser = argparse.ArgumentParser(description='Analyze an #include graph.') + parser.add_argument('build_log', + type=argparse.FileType('r', errors='replace'), + help='The build log to analyze (- for stdin).') + parser.add_argument('--target', + help='The target that was built (e.g. chrome).') + parser.add_argument('--revision', + help='The revision that was built (e.g. 016588d4ee20).') + parser.add_argument( + '--json-out', + type=argparse.FileType('w'), + help='Write full analysis data to a JSON file (- for stdout).') + parser.add_argument('--root-filter', + help='Regex to filter which root files are analyzed.') + args = parser.parse_args() + + if args.json_out and not (args.target and args.revision): + print('error: --json-out requires both --target and --revision to be set') + return 1 + + try: + root_filter = re.compile(args.root_filter) if args.root_filter else None + except Exception: + print('error: --root-filter is not a valid regex') + return 1 + + analyze(args.target, args.revision, args.build_log, args.json_out, + root_filter) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/clang/scripts/apply_edits.py b/clang/scripts/apply_edits.py new file mode 100755 index 0000000000000000000000000000000000000000..909439473229e4dc65e52b4516bc8c00a80781d6 --- /dev/null +++ b/clang/scripts/apply_edits.py @@ -0,0 +1,475 @@ +#!/usr/bin/env vpython3 +# Copyright 2013 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +"""Applies edits generated by a clang tool that was run on Chromium code. + +Synopsis: + + cat run_tool.out | extract_edits.py | apply_edits.py + +For example - to apply edits only to WTF sources: + + ... | apply_edits.py out/gn third_party/WebKit/Source/wtf + +In addition to filters specified on the command line, the tool also skips edits +that apply to files that are not covered by git. +""" + +import argparse +import collections +import functools +import multiprocessing +import os +import os.path +import re +import subprocess +import sys + +script_dir = os.path.dirname(os.path.realpath(__file__)) +tool_dir = os.path.abspath(os.path.join(script_dir, '../pylib')) +sys.path.insert(0, tool_dir) + +from clang import compile_db + +Edit = collections.namedtuple('Edit', + ('edit_type', 'offset', 'length', 'replacement')) + + +def _GetFilesFromGit(paths=None): + """Gets the list of files in the git repository. + + Args: + paths: Prefix filter for the returned paths. May contain multiple entries. + """ + args = [] + if sys.platform == 'win32': + args.append('git.bat') + else: + args.append('git') + args.append('ls-files') + if paths: + args.extend(paths) + command = subprocess.Popen(args, stdout=subprocess.PIPE) + output, _ = command.communicate() + output = output.decode('utf-8') + return [os.path.realpath(p) for p in output.splitlines()] + + +def _ParseEditsFromStdin(build_directory): + """Extracts generated list of edits from the tool's stdout. + + The expected format is documented at the top of this file. + + Args: + build_directory: Directory that contains the compile database. Used to + normalize the filenames. + + Returns: + A dictionary mapping filenames to the associated edits. + """ + path_to_resolved_path = {} + def _ResolvePath(path): + if path in path_to_resolved_path: + return path_to_resolved_path[path] + + if not os.path.isfile(path): + resolved_path = os.path.realpath(os.path.join(build_directory, path)) + else: + resolved_path = os.path.realpath(path) + + if not os.path.isfile(resolved_path): + sys.stderr.write('Edit applies to a non-existent file: %s\n' % path) + resolved_path = None + + path_to_resolved_path[path] = resolved_path + return resolved_path + + edits = collections.defaultdict(list) + for line in sys.stdin: + line = line.rstrip("\n\r") + try: + edit_type, path, offset, length, replacement = line.split(':::', 4) + replacement = replacement.replace('\0', '\n') + path = _ResolvePath(path) + if not path: continue + edits[path].append( + Edit(edit_type, int(offset), int(length), + replacement.encode("utf-8"))) + except ValueError: + sys.stderr.write('Unable to parse edit: %s\n' % line) + return edits + + +_PLATFORM_SUFFIX = \ + r'(?:_(?:android|aura|chromeos|ios|linux|mac|ozone|posix|win|x11))?' +_TEST_SUFFIX = \ + r'(?:_(?:browser|interactive_ui|ui|unit)?test)?' +_suffix_regex = re.compile(_PLATFORM_SUFFIX + _TEST_SUFFIX) + + +def _FindPrimaryHeaderBasename(filepath): + """ Translates bar/foo.cc -> foo + bar/foo_posix.cc -> foo + bar/foo_unittest.cc -> foo + bar/foo.h -> None + """ + dirname, filename = os.path.split(filepath) + basename, extension = os.path.splitext(filename) + if extension == '.h': + return None + basename = _suffix_regex.sub('', basename) + return basename + + +_SYSTEM_INCLUDE_INSERTION_POINT_REGEX_TEMPLATE = r''' + ^(?! # Match the start of the first line that is + # not one of the following: + + \s+ # 1. Line starting with whitespace + # (this includes blank lines and continuations of + # C comments that start with whitespace/indentation) + + | // # 2a. A C++ comment + | /\* # 2b. A C comment + | \* # 2c. A continuation of a C comment + # (see also rule 1. above) + + | \xef \xbb \xbf # 3. "Lines" starting with BOM character + + # 4. Include guards (Chromium-style) + | \#ifndef \s+ [A-Z0-9_]+_H ( | _ | __ ) \b \s* $ + | \#define \s+ [A-Z0-9_]+_H ( | _ | __ ) \b \s* $ + + # 4b. Include guards (anything that repeats): + # - the same has to repeat in both the #ifndef and the #define + # - #define has to be "simple" - either: + # - either: #define GUARD + # - or : #define GUARD 1 + | \#ifndef \s+ (?P [A-Za-z0-9_]* ) \s* $ ( \n | \r )* ^ + \#define \s+ (?P=guard) \s* ( | 1 \s* ) $ + | \#define \s+ (?P=guard) \s* ( | 1 \s* ) $ # Skipping previous line. + # 5. A C/C++ system include + | \#include \s* < .* > + ''' + +_INCLUDE_INSERTION_POINT_REGEX_TEMPLATE = r''' + # 6. A primary header include + # (%%s should be the basename returned by _FindPrimaryHeaderBasename). + # + # TODO(lukasza): Do not allow any directory below - require the top-level + # directory to be the same and at least one itermediate dirname to be the + # same. + | \#include \s* " + [^"]* \b # Allowing any directory + %s[^"/]*\.h " # Matching both basename.h and basename_posix.h + ) +''' + + +_NEWLINE_CHARACTERS = [ord('\n'), ord('\r')] + + +def _FindStartOfPreviousLine(contents, index): + """ Requires that `index` points to the start of a line. + Returns an index to the start of the previous line. + """ + assert (index > 0) + assert (contents[index - 1] in _NEWLINE_CHARACTERS) + + # Go back over the newline characters associated with the *single* end of a + # line just before `index`, despite of whether end of a line is designated by + # "\r", "\n" or "\r\n". Examples: + # 1. "... \r\n \r\n ... + # 2. "... \n \n ... + index = index - 1 + if index > 0 and contents[index - 1] in _NEWLINE_CHARACTERS and \ + contents[index - 1] != contents[index]: + index = index - 1 + + # Go back until `index` points right after an end of a line (or at the + # beginning of the `contents`). + while index > 0 and contents[index - 1] not in _NEWLINE_CHARACTERS: + index = index - 1 + + return index + + +def _SkipOverPreviousComment(contents, index): + """ Returns `index`, possibly moving it earlier so that it skips over comment + lines appearing in `contents` just before the old `index. + + Example: + // Comment + // Comment + bar + """ + # If `index` points at the start of the file, or `index` doesn't point at the + # beginning of a line, then don't skip anything and just return `index`. + if index == 0 or contents[index - 1] not in _NEWLINE_CHARACTERS: + return index + + # Is the previous line a non-comment? If so, just return `index`. + new_index = _FindStartOfPreviousLine(contents, index) + prev_text = contents[new_index:index] + _COMMENT_START_REGEX = b"^ \s* ( // | \* )" + if not re.search(_COMMENT_START_REGEX, prev_text, re.VERBOSE): + return index + + # Otherwise skip over the previous line + continue skipping via recursion. + return _SkipOverPreviousComment(contents, new_index) + + +def _InsertIncludeHeader(filepath, header_line_to_add, contents, is_system): + """ Mutates |contents| (contents of |filepath|) to #include + the |header_to_add + """ + # Don't add the header if it is already present. + replacement_text = header_line_to_add + if replacement_text in contents: + return contents + replacement_text += b"\n" + + # Find the right insertion point. + # + # Note that we depend on a follow-up |git cl format| for the right order of + # headers. Therefore we just need to find the right header group (e.g. skip + # system headers and the primary header). + primary_header_basename = _FindPrimaryHeaderBasename(filepath) + if primary_header_basename is None: + primary_header_basename = ':this:should:never:match:' + regex_text = _SYSTEM_INCLUDE_INSERTION_POINT_REGEX_TEMPLATE + if is_system: + regex_text += ')' + else: + regex_text += (_INCLUDE_INSERTION_POINT_REGEX_TEMPLATE % + primary_header_basename) + + match = re.search(regex_text.encode("utf-8"), contents, + re.MULTILINE | re.VERBOSE) + assert (match is not None) + insertion_point = _SkipOverPreviousComment(contents, match.start()) + + # Extra empty line is required if the addition is not adjacent to other + # includes. + if not contents[insertion_point:].startswith(b"#include"): + replacement_text += b"\n" + + # Make the edit. + return contents[:insertion_point] + replacement_text + \ + contents[insertion_point:] + + +def _ApplyReplacement(filepath, contents, edit, last_edit): + assert (edit.edit_type == 'r') + assert ((last_edit is None) or (last_edit.edit_type == 'r')) + + if last_edit is not None: + if edit.offset == last_edit.offset and edit.length == last_edit.length: + assert (edit.replacement != last_edit.replacement) + raise ValueError( + ('Conflicting replacement text: ' + + '%s at offset %d, length %d: "%s" != "%s"\n') % + (filepath, edit.offset, edit.length, edit.replacement.decode("utf-8"), + last_edit.replacement.decode("utf-8"))) + + if edit.offset + edit.length > last_edit.offset: + raise ValueError( + ('Overlapping replacements: ' + + '%s at offset %d, length %d: "%s" and ' + + 'offset %d, length %d: "%s"\n') % + (filepath, edit.offset, edit.length, edit.replacement.decode("utf-8"), + last_edit.offset, last_edit.length, + last_edit.replacement.decode("utf-8"))) + + start = edit.offset + end = edit.offset + edit.length + original_contents = contents + contents = contents[:start] + edit.replacement + contents[end:] + if not edit.replacement: + contents = _ExtendDeletionIfElementIsInList(original_contents, contents, + edit.offset, edit.length) + return contents + + +def _ApplyIncludeHeader(filepath, contents, edit, last_edit, is_system): + header_line_to_add = '#include ' + name = edit.replacement.decode("utf-8") + if is_system: + header_line_to_add += '<%s>' % name + else: + header_line_to_add += '"%s"' % name + return _InsertIncludeHeader(filepath, header_line_to_add.encode("utf-8"), + contents, is_system) + + +def _ApplySingleEdit(filepath, contents, edit, last_edit): + if edit.edit_type == 'r': + return _ApplyReplacement(filepath, contents, edit, last_edit) + elif edit.edit_type == 'include-user-header': + return _ApplyIncludeHeader(filepath, contents, edit, last_edit, False) + elif edit.edit_type == 'include-system-header': + return _ApplyIncludeHeader(filepath, contents, edit, last_edit, True) + else: + raise ValueError('Unrecognized edit directive "%s": %s\n' % + (edit.edit_type, filepath)) + return contents + + +def _ApplyEditsToSingleFileContents(filepath, contents, edits): + # Sort the edits and iterate through them in reverse order. Sorting allows + # duplicate edits to be quickly skipped, while reversing means that + # subsequent edits don't need to have their offsets updated with each edit + # applied. + # + # Note that after sorting in reverse, the 'i' directives will come after 'r' + # directives. + edits.sort(reverse=True) + + edit_count = 0 + error_count = 0 + last_edit = None + for edit in edits: + if edit == last_edit: + continue + try: + contents = _ApplySingleEdit(filepath, contents, edit, last_edit) + last_edit = edit + edit_count += 1 + except ValueError as err: + sys.stderr.write(str(err) + '\n') + error_count += 1 + + return (contents, edit_count, error_count) + + +def _ApplyEditsToSingleFile(filepath, edits): + with open(filepath, 'rb+') as f: + contents = f.read() + (contents, edit_count, + error_count) = _ApplyEditsToSingleFileContents(filepath, contents, edits) + f.seek(0) + f.truncate() + f.write(contents) + return (edit_count, error_count) + + +def _ApplyEdits(edits): + """Apply the generated edits. + + Args: + edits: A dict mapping filenames to Edit instances that apply to that file. + """ + edit_count = 0 + error_count = 0 + done_files = 0 + for k, v in edits.items(): + tmp_edit_count, tmp_error_count = _ApplyEditsToSingleFile(k, v) + edit_count += tmp_edit_count + error_count += tmp_error_count + done_files += 1 + percentage = (float(done_files) / len(edits)) * 100 + sys.stdout.write('Applied %d edits (%d errors) to %d files [%.2f%%]\r' % + (edit_count, error_count, done_files, percentage)) + + sys.stdout.write('\n') + return -error_count + + +_WHITESPACE_BYTES = frozenset((ord('\t'), ord('\n'), ord('\r'), ord(' '))) + + +def _ExtendDeletionIfElementIsInList(original_contents, contents, offset, + length): + """Extends the range of a deletion if the deleted element was part of a list. + + This rewriter helper makes it easy for refactoring tools to remove elements + from a list. Even if a matcher callback knows that it is removing an element + from a list, it may not have enough information to accurately remove the list + element; for example, another matcher callback may end up removing an adjacent + list element, or all the list elements may end up being removed. + + With this helper, refactoring tools can simply remove the list element and not + worry about having to include the comma in the replacement. + + Args: + original_contents: A bytearray before the deletion was applied. + contents: A bytearray with the deletion already applied. + offset: The offset in the bytearray where the deleted range used to be. + length: The length in the bytearray where the deleted range used to be. + """ + char_before = char_after = None + left_trim_count = 0 + for byte in reversed(contents[:offset]): + left_trim_count += 1 + if byte in _WHITESPACE_BYTES: + continue + if byte in (ord(','), ord(':'), ord('('), ord('{')): + char_before = chr(byte) + break + + right_trim_count = 0 + for byte in contents[offset:]: + right_trim_count += 1 + if byte in _WHITESPACE_BYTES: + continue + if byte == ord(','): + char_after = chr(byte) + break + + def notify(left_offset, right_offset): + (start, end) = (offset, offset + length) + deleted = original_contents[start:end].decode('utf-8') + (start, end) = (start - left_offset, end + right_offset) + extended = original_contents[start:end].decode('utf-8') + (start, end) = (max(0, start - 5), end + 5) + context = original_contents[start:end].decode('utf-8') + sys.stdout.write('Extended deletion of "%s" to "%s" in "...%s..."\n' % + (deleted, extended, context)) + + if char_before: + if char_after: + notify(0, right_trim_count) + return contents[:offset] + contents[offset + right_trim_count:] + elif char_before in (',', ':'): + notify(left_trim_count, 0) + return contents[:offset - left_trim_count] + contents[offset:] + return contents + + +def main(): + parser = argparse.ArgumentParser( + epilog=""" +Reads edit directives from stdin and applies them to all files under +Git control, modulo the path filters. + +See docs/clang_tool_refactoring.md for details. + +When an edit direct has an empty replacement text (e.g., +"r:::path/to/file/to/edit:::offset1:::length1:::") and the script detects that +the deleted text is part of a "list" (e.g., function parameters, initializers), +the script extends the deletion to remove commas, etc. as needed. A way to +suppress this behavior is to replace the text with a single space or similar +(e.g., "r:::path/to/file/to/edit:::offset1:::length1::: "). +""", + formatter_class=argparse.RawTextHelpFormatter, + ) + parser.add_argument( + '-p', + required=True, + help='path to the build dir (dir that edit paths are relative to)') + parser.add_argument( + 'path_filter', + nargs='*', + help='optional paths to filter what files the tool is run on') + args = parser.parse_args() + + filenames = set(_GetFilesFromGit(args.path_filter)) + edits = _ParseEditsFromStdin(args.p) + return _ApplyEdits( + {k: v + for k, v in edits.items() if os.path.realpath(k) in filenames}) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/clang/scripts/apply_edits_test.py b/clang/scripts/apply_edits_test.py new file mode 100755 index 0000000000000000000000000000000000000000..91da03da2b52f83f5563a03d8c1c746d411f2b6c --- /dev/null +++ b/clang/scripts/apply_edits_test.py @@ -0,0 +1,765 @@ +#!/usr/bin/env vpython3 +# Copyright 2020 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import unittest + +import apply_edits + + +def _FindPHB(filepath): + return apply_edits._FindPrimaryHeaderBasename(filepath) + + +class FindPrimaryHeaderBasenameTest(unittest.TestCase): + def testNoOpOnHeader(self): + self.assertIsNone(_FindPHB('bar.h')) + self.assertIsNone(_FindPHB('foo/bar.h')) + + def testStripDirectories(self): + self.assertEqual('bar', _FindPHB('foo/bar.cc')) + + def testStripPlatformSuffix(self): + self.assertEqual('bar', _FindPHB('bar_posix.cc')) + self.assertEqual('bar', _FindPHB('bar_unittest.cc')) + + def testStripTestSuffix(self): + self.assertEqual('bar', _FindPHB('bar_browsertest.cc')) + self.assertEqual('bar', _FindPHB('bar_unittest.cc')) + + def testStripPlatformAndTestSuffix(self): + self.assertEqual('bar', _FindPHB('bar_uitest_aura.cc')) + self.assertEqual('bar', _FindPHB('bar_linux_unittest.cc')) + + def testNoSuffixStrippingWithoutUnderscore(self): + self.assertEqual('barunittest', _FindPHB('barunittest.cc')) + + +def _ApplyEdit(old_contents_string, + edit, + contents_filepath="some_file.cc", + last_edit=None): + if last_edit is not None: + assert (last_edit > edit) # Test or prod caller should ensure. + ba = bytearray() + ba.extend(old_contents_string.encode('utf-8')) + return apply_edits._ApplySingleEdit(contents_filepath, + old_contents_string.encode("utf-8"), edit, + last_edit).decode("utf-8") + + +def _InsertHeader(old_contents, + contents_filepath='foo/impl.cc', + new_header_path='new/header.h'): + edit = apply_edits.Edit("include-user-header", -1, -1, + new_header_path.encode("utf-8")) + return _ApplyEdit(old_contents, edit, contents_filepath) + + +class InsertIncludeHeaderTest(unittest.TestCase): + def _assertEqualContents(self, expected, actual): + if expected != actual: + print("####################### EXPECTED:") + print(expected) + print("####################### ACTUAL:") + print(actual) + print("####################### END.") + self.assertEqual(expected, actual) + + def testSkippingCppComments(self): + old_contents = ''' +// Copyright info here. + +#include "old/header.h" + ''' + expected_new_contents = ''' +// Copyright info here. + +#include "new/header.h" +#include "old/header.h" + ''' + new_header_line = '#include "new/header.h' + self._assertEqualContents(expected_new_contents, + _InsertHeader(old_contents)) + + def testSkippingCppComments_DocCommentForStruct(self): + """ This is a regression test for https://crbug.com/1175684 """ + old_contents = ''' +// Copyright blah blah... + +#ifndef SANDBOX_LINUX_SYSTEM_HEADERS_LINUX_FILTER_H_ +#define SANDBOX_LINUX_SYSTEM_HEADERS_LINUX_FILTER_H_ + +#include + +// Doc comment for a struct. +// Multiline. +struct sock_filter { + uint16_t code; +}; + ''' + expected_new_contents = ''' +// Copyright blah blah... + +#ifndef SANDBOX_LINUX_SYSTEM_HEADERS_LINUX_FILTER_H_ +#define SANDBOX_LINUX_SYSTEM_HEADERS_LINUX_FILTER_H_ + +#include + +#include "new/header.h" + +// Doc comment for a struct. +// Multiline. +struct sock_filter { + uint16_t code; +}; + ''' + new_header_line = '#include "new/header.h' + self._assertEqualContents(expected_new_contents, + _InsertHeader(old_contents)) + + def testSkippingCppComments_DocCommentForStruct2(self): + """ This is a regression test for https://crbug.com/1175684 """ + old_contents = ''' +// Copyright blah blah... + +// Doc comment for a struct. +struct sock_filter { + uint16_t code; +}; + ''' + expected_new_contents = ''' +// Copyright blah blah... + +#include "new/header.h" + +// Doc comment for a struct. +struct sock_filter { + uint16_t code; +}; + ''' + new_header_line = '#include "new/header.h' + self._assertEqualContents(expected_new_contents, + _InsertHeader(old_contents)) + + def testSkippingCppComments_DocCommentForStruct3(self): + """ This is a regression test for https://crbug.com/1175684 """ + old_contents = ''' +// Doc comment for a struct. +struct sock_filter { + uint16_t code; +}; + ''' + expected_new_contents = ''' +#include "new/header.h" + +// Doc comment for a struct. +struct sock_filter { + uint16_t code; +}; + ''' + new_header_line = '#include "new/header.h' + self._assertEqualContents(expected_new_contents, + _InsertHeader(old_contents)) + + def testSkippingCppComments_DocCommentForInclude(self): + """ This is a regression test for https://crbug.com/1175684 """ + old_contents = ''' +// Copyright blah blah... + +// System includes. +#include + +// Doc comment for a struct. +struct sock_filter { + uint16_t code; +}; + ''' + expected_new_contents = ''' +// Copyright blah blah... + +// System includes. +#include + +#include "new/header.h" + +// Doc comment for a struct. +struct sock_filter { + uint16_t code; +}; + ''' + new_header_line = '#include "new/header.h' + self._assertEqualContents(expected_new_contents, + _InsertHeader(old_contents)) + + def testSkippingCppComments_DocCommentForWholeFile(self): + """ This is a regression test for https://crbug.com/1175684 """ + old_contents = ''' +// Copyright blah blah... + +// Doc comment for the whole file. + +struct sock_filter { + uint16_t code; +}; + ''' + expected_new_contents = ''' +// Copyright blah blah... + +// Doc comment for the whole file. + +#include "new/header.h" + +struct sock_filter { + uint16_t code; +}; + ''' + new_header_line = '#include "new/header.h' + self._assertEqualContents(expected_new_contents, + _InsertHeader(old_contents)) + + def testSkippingOldStyleComments(self): + old_contents = ''' +/* Copyright + * info here. + */ + +#include "old/header.h" + ''' + expected_new_contents = ''' +/* Copyright + * info here. + */ + +#include "new/header.h" +#include "old/header.h" + ''' + self._assertEqualContents(expected_new_contents, + _InsertHeader(old_contents)) + + def testSkippingOldStyleComments_NoWhitespaceAtLineStart(self): + old_contents = ''' +/* Copyright +* info here. +*/ + +#include "old/header.h" + ''' + expected_new_contents = ''' +/* Copyright +* info here. +*/ + +#include "new/header.h" +#include "old/header.h" + ''' + self._assertEqualContents(expected_new_contents, + _InsertHeader(old_contents)) + + def testSkippingSystemHeaders(self): + old_contents = ''' +#include +#include // blah + +#include "old/header.h" + ''' + expected_new_contents = ''' +#include +#include // blah + +#include "new/header.h" +#include "old/header.h" + ''' + self._assertEqualContents(expected_new_contents, + _InsertHeader(old_contents)) + + def testSkippingPrimaryHeader(self): + old_contents = ''' +// Copyright info here. + +#include "foo/impl.h" + +#include "old/header.h" + ''' + expected_new_contents = ''' +// Copyright info here. + +#include "foo/impl.h" + +#include "new/header.h" +#include "old/header.h" + ''' + self._assertEqualContents(expected_new_contents, + _InsertHeader(old_contents)) + + def testSimilarNonPrimaryHeader_WithPrimaryHeader(self): + old_contents = ''' +// Copyright info here. + +#include "primary/impl.h" // This is the primary header. + +#include "unrelated/impl.h" // This is *not* the primary header. +#include "zzz/foo.h" + ''' + expected_new_contents = ''' +// Copyright info here. + +#include "primary/impl.h" // This is the primary header. + +#include "unrelated/impl.h" // This is *not* the primary header. +#include "new/header.h" +#include "zzz/foo.h" + ''' + self._assertEqualContents(expected_new_contents, + _InsertHeader(old_contents)) + + def testSimilarNonPrimaryHeader_NoPrimaryHeader(self): + old_contents = ''' +// Copyright info here. + +#include "unrelated/impl.h" // This is *not* the primary header. +#include "zzz/foo.h" + ''' + expected_new_contents = ''' +// Copyright info here. + +#include "unrelated/impl.h" // This is *not* the primary header. +#include "new/header.h" +#include "zzz/foo.h" + ''' + self._assertEqualContents(expected_new_contents, + _InsertHeader(old_contents)) + + def testSkippingIncludeGuards(self): + old_contents = ''' +#ifndef FOO_IMPL_H_ +#define FOO_IMPL_H_ + +#include "old/header.h" + +#endif FOO_IMPL_H_ + ''' + expected_new_contents = ''' +#ifndef FOO_IMPL_H_ +#define FOO_IMPL_H_ + +#include "new/header.h" +#include "old/header.h" + +#endif FOO_IMPL_H_ + ''' + self._assertEqualContents( + expected_new_contents, + _InsertHeader(old_contents, 'foo/impl.h', 'new/header.h')) + + def testSkippingIncludeGuards2(self): + # This test is based on base/third_party/valgrind/memcheck.h + old_contents = ''' +#ifndef __MEMCHECK_H +#define __MEMCHECK_H + +#include "old/header.h" + +#endif + ''' + expected_new_contents = ''' +#ifndef __MEMCHECK_H +#define __MEMCHECK_H + +#include "new/header.h" +#include "old/header.h" + +#endif + ''' + self._assertEqualContents(expected_new_contents, + _InsertHeader(old_contents)) + + def testSkippingIncludeGuards3(self): + # This test is based on base/third_party/xdg_mime/xdgmime.h + old_contents = ''' +#ifndef __XDG_MIME_H__ +#define __XDG_MIME_H__ + +#include "old/header.h" + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +typedef void (*XdgMimeCallback) (void *user_data); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* __XDG_MIME_H__ */ + ''' + expected_new_contents = ''' +#ifndef __XDG_MIME_H__ +#define __XDG_MIME_H__ + +#include "new/header.h" +#include "old/header.h" + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +typedef void (*XdgMimeCallback) (void *user_data); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* __XDG_MIME_H__ */ + ''' + self._assertEqualContents(expected_new_contents, + _InsertHeader(old_contents)) + + @unittest.skip( + "Failing test due to regex (in apply_edits.py) not working as expected, please fix." + ) + def testSkippingIncludeGuards4(self): + # This test is based on ash/first_run/desktop_cleaner.h and/or + # components/subresource_filter/core/common/scoped_timers.h and/or + # device/gamepad/abstract_haptic_gamepad.h + old_contents = ''' +#ifndef ASH_FIRST_RUN_DESKTOP_CLEANER_ +#define ASH_FIRST_RUN_DESKTOP_CLEANER_ + +#include "old/header.h" + +namespace ash { +} // namespace ash + +#endif // ASH_FIRST_RUN_DESKTOP_CLEANER_ + ''' + expected_new_contents = ''' +#ifndef ASH_FIRST_RUN_DESKTOP_CLEANER_ +#define ASH_FIRST_RUN_DESKTOP_CLEANER_ + +#include "new/header.h" +#include "old/header.h" + +namespace ash { +} // namespace ash + +#endif // ASH_FIRST_RUN_DESKTOP_CLEANER_ + ''' + self._assertEqualContents(expected_new_contents, + _InsertHeader(old_contents)) + + @unittest.skip( + "Failing test due to regex (in apply_edits.py) not working as expected, please fix." + ) + def testSkippingIncludeGuards5(self): + # This test is based on third_party/weston/include/GLES2/gl2.h (the |extern + # "C"| part has been removed to make the test trickier to handle right - + # otherwise it is easy to see that the header has to be included before the + # |extern "C"| part). + # + # The tricky parts below include: + # 1. upper + lower case characters allowed in the guard name + # 2. Having to recognize that GL_APIENTRYP is *not* a guard + old_contents = ''' +#ifndef __gles2_gl2_h_ +#define __gles2_gl2_h_ 1 + +#include + +#ifndef GL_APIENTRYP +#define GL_APIENTRYP GL_APIENTRY* +#endif + +#endif + ''' + expected_new_contents = ''' +#ifndef __gles2_gl2_h_ +#define __gles2_gl2_h_ 1 + +#include + +#include "new/header.h" + +#ifndef GL_APIENTRYP +#define GL_APIENTRYP GL_APIENTRY* +#endif + +#endif + ''' + self._assertEqualContents(expected_new_contents, + _InsertHeader(old_contents)) + + @unittest.skip( + "Failing test due to regex (in apply_edits.py) not working as expected, please fix." + ) + def testSkippingIncludeGuards6(self): + # This test is based on ios/third_party/blink/src/html_token.h + old_contents = ''' +#ifndef HTMLToken_h +#define HTMLToken_h + +#include +#include + +// ... + +#endif + ''' + expected_new_contents = ''' +#ifndef HTMLToken_h +#define HTMLToken_h + +#include +#include + +#include "new/header.h" + +// ... + +#endif + ''' + self._assertEqualContents(expected_new_contents, + _InsertHeader(old_contents)) + + def testNoOpIfAlreadyPresent(self): + # This tests that the new header won't be inserted (and duplicated) + # if it is already included. + old_contents = ''' +// Copyright info here. + +#include "old/header.h" +#include "new/header.h" +#include "new/header2.h" + ''' + expected_new_contents = ''' +// Copyright info here. + +#include "old/header.h" +#include "new/header.h" +#include "new/header2.h" + ''' + self._assertEqualContents(expected_new_contents, + _InsertHeader(old_contents)) + + def testNoOpIfAlreadyPresent_WithTrailingComment(self): + # This tests that the new header won't be inserted (and duplicated) + # if it is already included. + old_contents = ''' +// Copyright info here. + +#include "old/header.h" +#include "new/header.h" // blah +#include "new/header2.h" + ''' + expected_new_contents = ''' +// Copyright info here. + +#include "old/header.h" +#include "new/header.h" // blah +#include "new/header2.h" + ''' + self._assertEqualContents(expected_new_contents, + _InsertHeader(old_contents)) + + def testNoOldHeaders(self): + # This tests that an extra new line is inserted after the new header + # when there are no old headers immediately below. + old_contents = ''' +#include + +struct S {}; + ''' + expected_new_contents = ''' +#include + +#include "new/header.h" + +struct S {}; + ''' + self._assertEqualContents(expected_new_contents, + _InsertHeader(old_contents)) + + def testPlatformIfDefs(self): + # This test is based on + # //base/third_party/double_conversion/double-conversion/utils.h + # We need to insert the new header in a non-conditional part. + old_contents = ''' +#ifndef DOUBLE_CONVERSION_UTILS_H_ +#define DOUBLE_CONVERSION_UTILS_H_ + +#include +#include + +#ifndef DOUBLE_CONVERSION_UNREACHABLE +#ifdef _MSC_VER +void DOUBLE_CONVERSION_NO_RETURN abort_noreturn(); +inline void abort_noreturn() { abort(); } +#define DOUBLE_CONVERSION_UNREACHABLE() (abort_noreturn()) +#else +#define DOUBLE_CONVERSION_UNREACHABLE() (abort()) +#endif +#endif + +namespace double_conversion { + ''' + expected_new_contents = ''' +#ifndef DOUBLE_CONVERSION_UTILS_H_ +#define DOUBLE_CONVERSION_UTILS_H_ + +#include +#include + +#include "new/header.h" + +#ifndef DOUBLE_CONVERSION_UNREACHABLE +#ifdef _MSC_VER +void DOUBLE_CONVERSION_NO_RETURN abort_noreturn(); +inline void abort_noreturn() { abort(); } +#define DOUBLE_CONVERSION_UNREACHABLE() (abort_noreturn()) +#else +#define DOUBLE_CONVERSION_UNREACHABLE() (abort()) +#endif +#endif + +namespace double_conversion { + ''' + self._assertEqualContents(expected_new_contents, + _InsertHeader(old_contents)) + + def testNoOldIncludesAndIfDefs(self): + # Artificial test: no old #includes + some #ifdefs. The main focus of the + # test is ensuring that the new header will be inserted into the + # unconditional part of the file. + old_contents = ''' +#ifndef NDEBUG +#include "base/logging.h" +#endif + +void foo(); + ''' + expected_new_contents = ''' +#include "new/header.h" + +#ifndef NDEBUG +#include "base/logging.h" +#endif + +void foo(); + ''' + self._assertEqualContents(expected_new_contents, + _InsertHeader(old_contents)) + + def testNoOldIncludesAndIfDefs2(self): + # Artificial test: no old #includes + some #ifdefs. The main focus of the + # test is ensuring that the new header will be inserted into the + # unconditional part of the file. + old_contents = ''' +#if BUILDFLAG(IS_WIN) +#include "foo_win.h" +#endif + +void foo(); + ''' + expected_new_contents = ''' +#include "new/header.h" + +#if BUILDFLAG(IS_WIN) +#include "foo_win.h" +#endif + +void foo(); + ''' + self._assertEqualContents(expected_new_contents, + _InsertHeader(old_contents)) + + def testUtf8BomMarker(self): + # Test based on + # //chrome/browser/ui/views/payments/payment_sheet_view_controller.cc + # which at some point began as follows: + # 00000000: efbb bf2f 2f20 436f 7079 7269 6768 7420 ...// Copyright + # + # Previous versions of apply_edits.py would not skip the BOM marker when + # figuring out where to insert the new include header. + old_contents = u'''\ufeff// Copyright + +#include "old/header.h" + ''' + expected_new_contents = u'''\ufeff// Copyright + +#include "new/header.h" +#include "old/header.h" + ''' + actual = bytearray() + actual.extend(old_contents.encode('utf-8')) + expected = bytearray() + expected.extend(expected_new_contents.encode('utf-8')) + # Test sanity check (i.e. not an assertion about code under test). + utf8_bom = [0xef, 0xbb, 0xbf] + self._assertEqualContents(list(actual[0:3]), utf8_bom) + self._assertEqualContents(list(expected[0:3]), utf8_bom) + # Actual test. + edit = apply_edits.Edit('include-user-header', -1, -1, b"new/header.h") + actual = apply_edits._ApplySingleEdit("foo/impl.cc", actual, edit, None) + self._assertEqualContents(expected, actual) + + +def _CreateReplacement(content_string, old_substring, new_substring): + """ Test helper for creating an Edit object with the right offset, etc. """ + b_content_string = content_string.encode("utf-8") + b_old_string = old_substring.encode("utf-8") + b_new_string = new_substring.encode("utf-8") + offset = b_content_string.find(b_old_string) + return apply_edits.Edit('r', offset, len(b_old_string), b_new_string) + + +class ApplyReplacementTest(unittest.TestCase): + def testBasics(self): + old_text = "123 456 789" + r = _CreateReplacement(old_text, "456", "foo") + new_text = _ApplyEdit(old_text, r) + self.assertEqual("123 foo 789", new_text) + + def testMiddleListElementRemoval(self): + old_text = "(123, 456, 789) // foobar" + r = _CreateReplacement(old_text, "456", "") + new_text = _ApplyEdit(old_text, r) + self.assertEqual("(123, 789) // foobar", new_text) + + def testFinalElementRemoval(self): + old_text = "(123, 456, 789) // foobar" + r = _CreateReplacement(old_text, "789", "") + new_text = _ApplyEdit(old_text, r) + self.assertEqual("(123, 456) // foobar", new_text) + + def testConflictingReplacement(self): + old_text = "123 456 789" + last = _CreateReplacement(old_text, "456", "foo") + edit = _CreateReplacement(old_text, "456", "bar") + expected_msg_regex = 'Conflicting replacement text' + expected_msg_regex += '.*some_file.cc at offset 4, length 3' + expected_msg_regex += '.*"bar" != "foo"' + with self.assertRaisesRegex(ValueError, expected_msg_regex): + _ApplyEdit(old_text, edit, last_edit=last) + + def testUnrecognizedEditDirective(self): + old_text = "123 456 789" + edit = apply_edits.Edit('unknown_directive', 123, 456, "foo") + expected_msg_regex = 'Unrecognized edit directive "unknown_directive"' + expected_msg_regex += '.*some_file.cc' + with self.assertRaisesRegex(ValueError, expected_msg_regex): + _ApplyEdit(old_text, edit) + + def testOverlappingReplacement(self): + old_text = "123 456 789" + last = _CreateReplacement(old_text, "456 789", "foo") + edit = _CreateReplacement(old_text, "123 456", "bar") + expected_msg_regex = 'Overlapping replacements' + expected_msg_regex += '.*some_file.cc' + expected_msg_regex += '.*offset 0, length 7.*"bar"' + expected_msg_regex += '.*offset 4, length 7.*"foo"' + with self.assertRaisesRegex(ValueError, expected_msg_regex): + _ApplyEdit(old_text, edit, last_edit=last) + + +if __name__ == '__main__': + unittest.main() diff --git a/clang/scripts/apply_fixits.py b/clang/scripts/apply_fixits.py new file mode 100755 index 0000000000000000000000000000000000000000..fb975d3753215c4e85bd0407a53773d497dfabbe --- /dev/null +++ b/clang/scripts/apply_fixits.py @@ -0,0 +1,93 @@ +#!/usr/bin/env vpython3 +# Copyright 2015 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +# +# Applies fixits generated by clang. clang's -Xclang -fixit-recompile flag +# automatically applies fixits and recompiles the result, but this does not work +# well with parallel clang invocations. +# +# Usage: +# 1. Enable parseable fixits and disable warnings as errors. Instructions for +# doing this vary based on the build environment, but for GN, warnings as +# errors can be disabled by setting treat_warnings_as_errors = false +# Enabling parseable fixits requires editing build/config/compiler/BUILD.gn +# and adding `-fdiagnostics-parseable-fixits` to cflags. +# 2. Build everything and capture the output: +# ninja -C &> generated-fixits +# 3. Apply the fixits with this script: +# python apply_fixits.py -p < generated-fixits + +from __future__ import print_function + +import argparse +import collections +import fileinput +import os +import re +import sys + +# fix-it:"../../base/threading/sequenced_worker_pool.h":{341:3-341:11}:"" +# Note that the file path is relative to the build directory. +_FIXIT_RE = re.compile(r'^fix-it:"(?P.+?)":' + r'{(?P\d+?):(?P\d+?)-' + r'(?P\d+?):(?P\d+?)}:' + r'"(?P.*?)"$') + +FixIt = collections.namedtuple( + 'FixIt', ('start_line', 'start_col', 'end_line', 'end_col', 'text')) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + '-p', + required=True, + help='path to the build directory to complete relative paths in fixits') + args = parser.parse_args() + + fixits = collections.defaultdict(list) + for line in fileinput.input(['-']): + if not line.startswith('fix-it:'): + continue + m = _FIXIT_RE.match(line) + if not m: + continue + # The negative line numbers are a hack to sort fixits in line order but + # reverse column order. Applying the fixits in reverse order makes things + # simpler, since column offsets won't have to be adjusted as the text is + # changed. + fixits[m.group('file')].append(FixIt( + int(m.group('start_line')), -int(m.group('start_col')), int(m.group( + 'end_line')), -int(m.group('end_col')), m.group('text'))) + for k, v in fixits.items(): + v.sort() + with open(os.path.join(args.p, k), mode='r+', encoding='utf-8') as f: + lines = f.readlines() + last_fixit = None + line_offset = 0 + for fixit in v: + if fixit == last_fixit: + continue + last_fixit = fixit + + # The line/column numbers emitted in fixit hints start at 1, so offset + # is appropriately. Also apply unary `-` to all column numbers to + # reverse the hack above. + prefix = lines[fixit.start_line + line_offset - 1][:-fixit.start_col - + 1] + suffix = lines[fixit.end_line + line_offset - 1][-fixit.end_col - 1:] + + lines[fixit.start_line + line_offset - 1] = prefix + fixit.text + suffix + + del lines[fixit.start_line + line_offset + 1 - 1:fixit.end_line + + line_offset + 1 - 1] + line_offset -= fixit.end_line - fixit.start_line + + f.seek(0) + f.truncate() + f.writelines(lines) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/clang/scripts/build.py b/clang/scripts/build.py new file mode 100755 index 0000000000000000000000000000000000000000..02e98f6d7b58772535027057303dc02dcab0ec67 --- /dev/null +++ b/clang/scripts/build.py @@ -0,0 +1,1556 @@ +#!/usr/bin/env python3 +# Copyright 2019 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""This script is used to build clang binaries. It is used by package.py to +create the prebuilt binaries downloaded by update.py and used by developers. + +The expectation is that update.py downloads prebuilt binaries for everyone, and +nobody should run this script as part of normal development. + +DEAR MAC USER: YOU NEED XCODE INSTALLED TO BUILD LLVM/CLANG WITH THIS SCRIPT. +The Xcode command line tools that are installed as part of the Chromium +development setup process are not sufficient. CMake will fail to configure, as +the non-system Clang we use will not find any standard library headers. To use +this build script on Mac: +1. Download Xcode. (Visit http://go/xcode for googlers.) +2. Install to /Applications +3. sudo xcode-select --switch /Applications/Xcode.app +""" + +import argparse +import glob +import io +import json +import multiprocessing +import os +import shlex +import platform +import re +import shutil +import subprocess +import sys +import tempfile +import urllib + +from update import (CDS_URL, CHROMIUM_DIR, CLANG_REVISION, LLVM_BUILD_DIR, + FORCE_HEAD_REVISION_FILE, PACKAGE_VERSION, RELEASE_VERSION, + STAMP_FILE, THIS_DIR, DownloadUrl, DownloadAndUnpack, + DownloadAndUnpackPackage, EnsureDirExists, GetDefaultHostOs, + ReadStampFile, RmTree, WriteStampFile) + +# Path constants. (All of these should be absolute paths.) +THIRD_PARTY_DIR = os.path.join(CHROMIUM_DIR, 'third_party') +LLVM_DIR = os.path.join(THIRD_PARTY_DIR, 'llvm') +COMPILER_RT_DIR = os.path.join(LLVM_DIR, 'compiler-rt') +LLVM_GIT_URL = ('https://chromium.googlesource.com/external/' + + 'github.com/llvm/llvm-project') +LLVM_BOOTSTRAP_DIR = os.path.join(THIRD_PARTY_DIR, 'llvm-bootstrap') +LLVM_BOOTSTRAP_INSTALL_DIR = os.path.join(THIRD_PARTY_DIR, + 'llvm-bootstrap-install') +LLVM_INSTRUMENTED_DIR = os.path.join(THIRD_PARTY_DIR, 'llvm-instrumented') +LLVM_PROFDATA_FILE = os.path.join(LLVM_INSTRUMENTED_DIR, 'profdata.prof') +LLVM_BUILD_TOOLS_DIR = os.path.abspath( + os.path.join(LLVM_DIR, '..', 'llvm-build-tools')) +ANDROID_NDK_DIR = os.path.join(CHROMIUM_DIR, 'third_party', + 'android_toolchain', 'ndk') +ANDROID_NDK_TOOLCHAIN_RELATIVE_DIR = os.path.join('toolchains', 'llvm', + 'prebuilt', 'linux-x86_64') +ANDROID_NDK_TOOLCHAIN_DIR = os.path.join(ANDROID_NDK_DIR, + ANDROID_NDK_TOOLCHAIN_RELATIVE_DIR) +FUCHSIA_SDK_DIR = os.path.join(CHROMIUM_DIR, 'third_party', 'fuchsia-sdk', + 'sdk') +PINNED_CLANG_DIR = os.path.join(LLVM_BUILD_TOOLS_DIR, 'pinned-clang') + +BUG_REPORT_URL = ('https://crbug.com in the Tools>LLVM component,' + ' run tools/clang/scripts/process_crashreports.py' + ' (only if inside Google) to upload crash related files,') + +LIBXML2_VERSION = 'libxml2-v2.9.12' +ZSTD_VERSION = 'zstd-1.5.5' + +win_sdk_dir = None +def GetWinSDKDir(): + """Get the location of the current SDK.""" + global win_sdk_dir + if win_sdk_dir: + return win_sdk_dir + + # Don't let vs_toolchain overwrite our environment. + environ_bak = dict(os.environ) + + sys.path.append(os.path.join(CHROMIUM_DIR, 'build')) + import vs_toolchain + win_sdk_dir = vs_toolchain.SetEnvironmentAndGetSDKDir() + msvs_version = vs_toolchain.GetVisualStudioVersion() + + if bool(int(os.environ.get('DEPOT_TOOLS_WIN_TOOLCHAIN', '1'))): + dia_path = os.path.join(win_sdk_dir, '..', 'DIA SDK', 'bin', 'amd64') + else: + if 'GYP_MSVS_OVERRIDE_PATH' not in os.environ: + vs_path = vs_toolchain.DetectVisualStudioPath() + else: + vs_path = os.environ['GYP_MSVS_OVERRIDE_PATH'] + dia_path = os.path.join(vs_path, 'DIA SDK', 'bin', 'amd64') + + os.environ.clear() + os.environ.update(environ_bak) + return win_sdk_dir + + +def RunCommand(command, setenv=False, env=None, fail_hard=True): + """Run command and return success (True) or failure; or if fail_hard is + True, exit on failure. If setenv is True, runs the command in a + shell with the msvc tools for x64 architecture.""" + + if setenv and sys.platform == 'win32': + command = [os.path.join(CHROMIUM_DIR, 'tools', 'win', 'setenv.bat'), '&&' + ] + command + + # https://docs.python.org/2/library/subprocess.html: + # "On Unix with shell=True [...] if args is a sequence, the first item + # specifies the command string, and any additional items will be treated as + # additional arguments to the shell itself. That is to say, Popen does the + # equivalent of: + # Popen(['/bin/sh', '-c', args[0], args[1], ...])" + # + # We want to pass additional arguments to command[0], not to the shell, + # so manually join everything into a single string. + # Annoyingly, for "svn co url c:\path", shlex.quote() thinks that it should + # quote c:\path but svn can't handle quoted paths on Windows. Since on + # Windows follow-on args are passed to args[0] instead of the shell, don't + # do the single-string transformation there. + if sys.platform != 'win32': + command = ' '.join([shlex.quote(c) for c in command]) + print('Running', command) + if subprocess.call(command, env=env, shell=True) == 0: + return True + print('Failed.') + if fail_hard: + sys.exit(1) + return False + + +def CopyFile(src, dst): + """Copy a file from src to dst.""" + print("Copying %s to %s" % (src, dst)) + shutil.copy(src, dst) + + +def CopyDirectoryContents(src, dst): + """Copy the files from directory src to dst.""" + dst = os.path.realpath(dst) # realpath() in case dst ends in /.. + EnsureDirExists(dst) + for f in os.listdir(src): + CopyFile(os.path.join(src, f), dst) + + +def CheckoutGitRepo(name, git_url, commit, dir): + """Checkout the git repo at a certain git commit in dir. Any local + modifications in dir will be lost.""" + + print(f'Checking out {name} {commit} into {dir}') + + # Try updating the current repo if it exists and has no local diff. + if os.path.isdir(dir): + os.chdir(dir) + # git diff-index --exit-code returns 0 when there is no diff. + # Also check that the first commit is reachable. + if (RunCommand(['git', 'diff-index', '--exit-code', 'HEAD'], + fail_hard=False) + and RunCommand(['git', 'fetch'], fail_hard=False) + and RunCommand(['git', 'checkout', commit], fail_hard=False) + and RunCommand(['git', 'clean', '-f'], fail_hard=False)): + return + + # If we can't use the current repo, delete it. + os.chdir(CHROMIUM_DIR) # Can't remove dir if we're in it. + print('Removing %s.' % dir) + RmTree(dir) + + clone_cmd = ['git', 'clone', git_url, dir] + + if RunCommand(clone_cmd, fail_hard=False): + os.chdir(dir) + if RunCommand(['git', 'checkout', commit], fail_hard=False): + return + + print('CheckoutGitRepo failed.') + sys.exit(1) + + +def GitCherryPick(git_repository, git_remote, commit): + print(f'Cherry-picking {commit} in {git_repository} from {git_remote}') + git_cmd = ['git', '-C', git_repository] + RunCommand(git_cmd + ['remote', 'add', 'github', git_remote], fail_hard=False) + RunCommand(git_cmd + ['fetch', '--recurse-submodules=no', 'github', commit]) + is_ancestor = RunCommand(git_cmd + + ['merge-base', '--is-ancestor', commit, 'HEAD'], + fail_hard=False) + if is_ancestor: + print('Commit already an ancestor; skipping.') + return + RunCommand([ + 'git', '-C', git_repository, 'cherry-pick', '--keep-redundant-commits', + commit + ]) + + +def GetLatestLLVMCommit(): + """Get the latest commit hash in the LLVM monorepo.""" + main = json.loads( + urllib.request.urlopen('https://chromium.googlesource.com/external/' + + 'github.com/llvm/llvm-project/' + + '+/refs/heads/main?format=JSON').read().decode( + "utf-8").replace(")]}'", "")) + return main['commit'] + + +def GetCommitDescription(commit): + """Get the output of `git describe`. + + Needs to be called from inside the git repository dir.""" + git_exe = 'git.bat' if sys.platform.startswith('win') else 'git' + return subprocess.check_output([ + git_exe, 'describe', '--long', '--abbrev=8', '--match=*llvmorg-*-init', + commit + ], universal_newlines=True).rstrip() + + +def AddCMakeToPath(): + """Download CMake and add it to PATH.""" + if sys.platform == 'win32': + zip_name = 'cmake-3.26.4-windows-x86_64.zip' + dir_name = ['cmake-3.26.4-windows-x86_64', 'bin'] + elif sys.platform == 'darwin': + zip_name = 'cmake-3.26.4-macos-universal.tar.gz' + dir_name = ['cmake-3.26.4-macos-universal', 'CMake.app', 'Contents', 'bin'] + else: + zip_name = 'cmake-3.26.4-linux-x86_64.tar.gz' + dir_name = ['cmake-3.26.4-linux-x86_64', 'bin'] + + cmake_dir = os.path.join(LLVM_BUILD_TOOLS_DIR, *dir_name) + if not os.path.exists(cmake_dir): + DownloadAndUnpack(CDS_URL + '/tools/' + zip_name, LLVM_BUILD_TOOLS_DIR) + os.environ['PATH'] = cmake_dir + os.pathsep + os.environ.get('PATH', '') + + +def AddGitForWindowsToPath(): + """Download Git for Windows and add it to PATH. + + Git for Windows provides command line utilities (not Git) for tests.""" + assert sys.platform == 'win32' + + git_dir = os.path.join(LLVM_BUILD_TOOLS_DIR, 'git-for-windows') + version = '2.47.0' + stamp_file = os.path.join(git_dir, 'stamp') + if ReadStampFile(stamp_file) == version: + print('Git for Windows already up to date.') + else: + archive_name = 'PortableGit-%s-64-bit.zip' % version + DownloadAndUnpack(CDS_URL + '/tools/' + archive_name, git_dir) + WriteStampFile(version, stamp_file) + + os.environ['PATH'] = os.path.join( + git_dir, 'usr', 'bin') + os.pathsep + os.environ.get('PATH', '') + + +def AddZlibToPath(dry_run = False): + """Download and build zlib, and add to PATH.""" + zlib_dir = os.path.join(LLVM_BUILD_TOOLS_DIR, 'zlib-1.2.11') + if dry_run: + return zlib_dir + + if os.path.exists(zlib_dir): + RmTree(zlib_dir) + zip_name = 'zlib-1.2.11.tar.gz' + DownloadAndUnpack(CDS_URL + '/tools/' + zip_name, LLVM_BUILD_TOOLS_DIR) + os.chdir(zlib_dir) + zlib_files = [ + 'adler32', 'compress', 'crc32', 'deflate', 'gzclose', 'gzlib', 'gzread', + 'gzwrite', 'inflate', 'infback', 'inftrees', 'inffast', 'trees', + 'uncompr', 'zutil' + ] + cl_flags = [ + '/nologo', '/O2', '/DZLIB_DLL', '/c', '/D_CRT_SECURE_NO_DEPRECATE', + '/D_CRT_NONSTDC_NO_DEPRECATE' + ] + RunCommand(['cl.exe'] + [f + '.c' for f in zlib_files] + cl_flags, + setenv=True) + RunCommand(['lib.exe'] + [f + '.obj' + for f in zlib_files] + ['/nologo', '/out:zlib.lib'], + setenv=True) + # Remove the test directory so it isn't found when trying to find + # test.exe. + shutil.rmtree('test') + + os.environ['PATH'] = zlib_dir + os.pathsep + os.environ.get('PATH', '') + return zlib_dir + + +class LibXmlDirs: + def __init__(self): + self.unzip_dir = LLVM_BUILD_TOOLS_DIR + # When unpacked in `unzip_dir`, this will be the directory where the + # sources are found. + self.src_dir = os.path.join(self.unzip_dir, LIBXML2_VERSION) + # The lib is built in a directory under its sources. + self.build_dir = os.path.join(self.src_dir, 'build') + # The lib is installed in a directory under where its built. + self.install_dir = os.path.join(self.build_dir, 'install') + # The full path to installed include files. + self.include_dir = os.path.join(self.install_dir, 'include', 'libxml2') + # The full path to installed lib files. + self.lib_dir = os.path.join(self.install_dir, 'lib') + + +def GetLibXml2Dirs(): + """Gets the set of directories where LibXml2 is located. + + Includes the diractories where the source is unpacked, where it is built, + and installed.""" + return LibXmlDirs() + + +def BuildLibXml2(): + """Download and build libxml2""" + # The .tar.gz on GCS was uploaded as follows. + # The gitlab page has more up-to-date packages than http://xmlsoft.org/, + # and the official releases on xmlsoft.org are only available over ftp too. + # $ VER=v2.9.12 + # $ curl -O \ + # https://gitlab.gnome.org/GNOME/libxml2/-/archive/$VER/libxml2-$VER.tar.gz + # $ gsutil cp -n -a public-read libxml2-$VER.tar.gz \ + # gs://chromium-browser-clang/tools + + dirs = GetLibXml2Dirs() + if os.path.exists(dirs.src_dir): + RmTree(dirs.src_dir) + zip_name = LIBXML2_VERSION + '.tar.gz' + DownloadAndUnpack(CDS_URL + '/tools/' + zip_name, dirs.unzip_dir) + os.mkdir(dirs.build_dir) + os.chdir(dirs.build_dir) + + # Disable everything except WITH_TREE and WITH_OUTPUT, both needed by LLVM's + # WindowsManifestMerger. + # Also enable WITH_THREADS, else libxml doesn't compile on Linux. + RunCommand( + [ + 'cmake', + '-GNinja', + '-DCMAKE_BUILD_TYPE=Release', + '-DCMAKE_INSTALL_PREFIX=install', + '-DCMAKE_MSVC_RUNTIME_LIBRARY=MultiThreaded', # /MT to match LLVM. + '-DBUILD_SHARED_LIBS=OFF', + '-DLIBXML2_WITH_C14N=OFF', + '-DLIBXML2_WITH_CATALOG=OFF', + '-DLIBXML2_WITH_DEBUG=OFF', + '-DLIBXML2_WITH_DOCB=OFF', + '-DLIBXML2_WITH_FTP=OFF', + '-DLIBXML2_WITH_HTML=OFF', + '-DLIBXML2_WITH_HTTP=OFF', + '-DLIBXML2_WITH_ICONV=OFF', + '-DLIBXML2_WITH_ICU=OFF', + '-DLIBXML2_WITH_ISO8859X=OFF', + '-DLIBXML2_WITH_LEGACY=OFF', + '-DLIBXML2_WITH_LZMA=OFF', + '-DLIBXML2_WITH_MEM_DEBUG=OFF', + '-DLIBXML2_WITH_MODULES=OFF', + '-DLIBXML2_WITH_OUTPUT=ON', + '-DLIBXML2_WITH_PATTERN=OFF', + '-DLIBXML2_WITH_PROGRAMS=OFF', + '-DLIBXML2_WITH_PUSH=OFF', + '-DLIBXML2_WITH_PYTHON=OFF', + '-DLIBXML2_WITH_READER=OFF', + '-DLIBXML2_WITH_REGEXPS=OFF', + '-DLIBXML2_WITH_RUN_DEBUG=OFF', + '-DLIBXML2_WITH_SAX1=OFF', + '-DLIBXML2_WITH_SCHEMAS=OFF', + '-DLIBXML2_WITH_SCHEMATRON=OFF', + '-DLIBXML2_WITH_TESTS=OFF', + '-DLIBXML2_WITH_THREADS=ON', + '-DLIBXML2_WITH_THREAD_ALLOC=OFF', + '-DLIBXML2_WITH_TREE=ON', + '-DLIBXML2_WITH_VALID=OFF', + '-DLIBXML2_WITH_WRITER=OFF', + '-DLIBXML2_WITH_XINCLUDE=OFF', + '-DLIBXML2_WITH_XPATH=OFF', + '-DLIBXML2_WITH_XPTR=OFF', + '-DLIBXML2_WITH_ZLIB=OFF', + '..', + ], + setenv=True) + RunCommand(['ninja', 'install'], setenv=True) + + if sys.platform == 'win32': + libxml2_lib = os.path.join(dirs.lib_dir, 'libxml2s.lib') + else: + libxml2_lib = os.path.join(dirs.lib_dir, 'libxml2.a') + extra_cmake_flags = [ + '-DLLVM_ENABLE_LIBXML2=FORCE_ON', + '-DLIBXML2_INCLUDE_DIR=' + dirs.include_dir.replace('\\', '/'), + '-DLIBXML2_LIBRARIES=' + libxml2_lib.replace('\\', '/'), + '-DLIBXML2_LIBRARY=' + libxml2_lib.replace('\\', '/'), + + # This hermetic libxml2 has enough features enabled for lld-link, but not + # for the libxml2 usage in libclang. We don't need libxml2 support in + # libclang, so just turn that off. + '-DCLANG_ENABLE_LIBXML2=NO', + ] + extra_cflags = ['-DLIBXML_STATIC'] + + return extra_cmake_flags, extra_cflags + + +class ZStdDirs: + """ + The set of directories where zstd is located. + + Includes the diractories where the source is unpacked, where it is built, + and installed. + """ + def __init__(self): + self.unzip_dir = LLVM_BUILD_TOOLS_DIR + # When unpacked in `unzip_dir`, this will be the directory where the + # sources are found. + self.src_dir = os.path.join(self.unzip_dir, ZSTD_VERSION) + # The lib is built in a directory under its sources. Note, zstd uses + # build/cmake for cmake. + self.build_dir = os.path.join(self.src_dir, 'cmake_build') + # The lib is installed in a directory under where its built. + self.install_dir = os.path.join(self.build_dir, 'install') + # The full path to installed include files. + self.include_dir = os.path.join(self.install_dir, 'include') + # The full path to installed lib files. + self.lib_dir = os.path.join(self.install_dir, 'lib') + + +def BuildZStd(): + """Download and build zstd lib""" + # The zstd-1.5.5.tar.gz was downloaded from + # https://github.com/facebook/zstd/releases/ + # and uploaded as follows. + # $ gsutil cp -n -a public-read zstd-$VER.tar.gz \ + # gs://chromium-browser-clang/tools + + dirs = ZStdDirs() + if os.path.exists(dirs.src_dir): + RmTree(dirs.src_dir) + zip_name = ZSTD_VERSION + '.tar.gz' + DownloadAndUnpack(CDS_URL + '/tools/' + zip_name, dirs.unzip_dir) + os.mkdir(dirs.build_dir) + os.chdir(dirs.build_dir) + + RunCommand( + [ + 'cmake', + '-GNinja', + '-DCMAKE_BUILD_TYPE=Release', + '-DCMAKE_INSTALL_PREFIX=install', + '-DCMAKE_MSVC_RUNTIME_LIBRARY=MultiThreaded', # /MT to match LLVM. + '-DZSTD_BUILD_SHARED=OFF', + '../build/cmake', + ], + setenv=True) + RunCommand(['ninja', 'install'], setenv=True) + + if sys.platform == 'win32': + zstd_lib = os.path.join(dirs.lib_dir, 'zstd_static.lib') + else: + zstd_lib = os.path.join(dirs.lib_dir, 'libzstd.a') + extra_cmake_flags = [ + '-DLLVM_ENABLE_ZSTD=ON', + '-DLLVM_USE_STATIC_ZSTD=ON', + '-Dzstd_INCLUDE_DIR=' + dirs.include_dir.replace('\\', '/'), + '-Dzstd_LIBRARY=' + zstd_lib.replace('\\', '/'), + ] + extra_cflags = [] + + return extra_cmake_flags, extra_cflags + + +def DownloadPinnedClang(): + PINNED_CLANG_VERSION = 'llvmorg-17-init-16420-g0c545a44-1' + DownloadAndUnpackPackage('clang', PINNED_CLANG_DIR, GetDefaultHostOs(), + PINNED_CLANG_VERSION) + + +def VerifyVersionOfBuiltClangMatchesVERSION(): + """Checks that `clang --version` outputs RELEASE_VERSION. If this + fails, update.RELEASE_VERSION is out-of-date and needs to be updated (possibly + in an `if args.llvm_force_head_revision:` block inupdate. main() first).""" + clang = os.path.join(LLVM_BUILD_DIR, 'bin', 'clang') + if sys.platform == 'win32': + clang += '-cl.exe' + version_out = subprocess.check_output([clang, '--version'], + universal_newlines=True) + version_out = re.match(r'clang version ([0-9]+)', version_out).group(1) + if version_out != RELEASE_VERSION: + print(('unexpected clang version %s (not %s), ' + 'update RELEASE_VERSION in update.py') + % (version_out, RELEASE_VERSION)) + sys.exit(1) + + +def VerifyZlibSupport(): + """Check that clang was built with zlib support enabled.""" + clang = os.path.join(LLVM_BUILD_DIR, 'bin', 'clang') + test_file = '/dev/null' + if sys.platform == 'win32': + clang += '.exe' + test_file = 'nul' + + print('Checking for zlib support') + clang_out = subprocess.check_output([ + clang, '-target', 'x86_64-unknown-linux-gnu', '-gz', '-c', '-###', '-x', + 'c', test_file + ], + stderr=subprocess.STDOUT, + universal_newlines=True) + if (re.search(r'--compress-debug-sections', clang_out)): + print('OK') + else: + print(('Failed to detect zlib support!\n\n(driver output: %s)') % clang_out) + sys.exit(1) + + +def VerifyZStdSupport(): + """Check that lld was built with zstd support enabled.""" + lld = os.path.join(LLVM_BUILD_DIR, 'bin') + if sys.platform == 'win32': + lld = os.path.join(lld, 'lld-link.exe') + elif sys.platform == 'linux': + lld = os.path.join(lld, 'ld.lld') + else: + print('zstd support check cannot be performed on the unsupported ' \ + 'platform ' + sys.platform) + return + + print('Checking for zstd support') + lld_out = subprocess.run([lld, '--compress-debug-sections=zstd'], + check=False, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + universal_newlines=True).stdout + if '--compress-debug-sections: zstd is not available' in lld_out: + print(('Failed to detect zlib support!\n\n(driver output: %s)') % lld_out) + sys.exit(1) + else: + print('OK') + + +def DownloadDebianSysroot(platform_name, skip_download=False): + # Download sysroots. This uses basically Chromium's sysroots, but with + # minor changes: + # - glibc version bumped to 2.18 to make __cxa_thread_atexit_impl + # work (clang can require 2.18; chromium currently doesn't) + # - libcrypt.so.1 reversioned so that crypt() is picked up from glibc + # The sysroot was built at + # https://chromium-review.googlesource.com/c/chromium/src/+/5506275/1 + # and the hashes here are from sysroots.json in that CL. + toolchain_bucket = 'https://commondatastorage.googleapis.com/chrome-linux-sysroot/' + + hashes = { + # hash from https://chromium-review.googlesource.com/c/chromium/src/+/5506275/1/build/linux/sysroot_scripts/sysroots.json#3 + 'amd64': 'dec7a3a0fc5b83b909cba1b6d119077e0429a138eadef6bf5a0f2e03b1904631', + # hash from https://chromium-review.googlesource.com/c/chromium/src/+/5506275/1/build/linux/sysroot_scripts/sysroots.json#21 + 'i386': 'b53933120bb08ffc38140a817e3f0f99782254a6bf9622271574fa004e8783a4', + # hash from https://chromium-review.googlesource.com/c/chromium/src/+/5506275/1/build/linux/sysroot_scripts/sysroots.json#15 + 'arm': 'fe81e7114b97440262bce004caf02c1514732e2fa7f99693b2836932ad1c4626', + # hash from https://chromium-review.googlesource.com/c/chromium/src/+/5506275/1/build/linux/sysroot_scripts/sysroots.json#21 + 'arm64': '308e23faba3174bd01accfe358467b8a40fad4db4c49ef629da30219f65a275f', + } + + toolchain_name = f'debian_bullseye_{platform_name}_sysroot' + output = os.path.join(LLVM_BUILD_TOOLS_DIR, toolchain_name) + U = toolchain_bucket + hashes[platform_name] + if not skip_download: + DownloadAndUnpack(U, output) + + return output + + +def compiler_rt_cmake_flags(*, sanitizers, profile): + # Don't set -DCOMPILER_RT_BUILD_BUILTINS=ON/OFF as it interferes with the + # runtimes logic of building builtins. + args = [ + # Build crtbegin/crtend. It's just two tiny TUs, so just enable this + # everywhere, even though we only need it on Linux. + 'COMPILER_RT_BUILD_CRT=ON', + 'COMPILER_RT_BUILD_LIBFUZZER=OFF', + # Turn off ctx_profile because it depends on the sanitizer libraries, + # which we don't always build. + 'COMPILER_RT_BUILD_CTX_PROFILE=OFF', + 'COMPILER_RT_BUILD_MEMPROF=OFF', + 'COMPILER_RT_BUILD_ORC=OFF', + 'COMPILER_RT_BUILD_PROFILE=' + ('ON' if profile else 'OFF'), + 'COMPILER_RT_BUILD_SANITIZERS=' + ('ON' if sanitizers else 'OFF'), + 'COMPILER_RT_BUILD_XRAY=OFF', + # See crbug.com/1205046: don't build scudo (and others we don't need). + 'COMPILER_RT_SANITIZERS_TO_BUILD=asan;dfsan;msan;hwasan;tsan;cfi', + # We explicitly list all targets we want to build, do not autodetect + # targets. + 'COMPILER_RT_DEFAULT_TARGET_ONLY=ON', + ] + return args + + +def gn_arg(v): + if v == 'True': + return True + if v == 'False': + return False + raise argparse.ArgumentTypeError('Expected one of %r or %r' % ( + 'True', 'False')) + + +def main(): + parser = argparse.ArgumentParser(description='Build Clang.') + parser.add_argument('--bootstrap', + action='store_true', + help='first build clang with CC, then with itself.') + parser.add_argument('--disable-asserts', action='store_true', + help='build with asserts disabled') + parser.add_argument('--host-cc', + help='build with host C compiler, requires --host-cxx as ' + 'well') + parser.add_argument('--host-cxx', + help='build with host C++ compiler, requires --host-cc ' + 'as well') + parser.add_argument('--pgo', action='store_true', help='build with PGO') + parser.add_argument('--thinlto', + action='store_true', + help='build with ThinLTO') + parser.add_argument('--bolt', action='store_true', help='build with BOLT') + parser.add_argument('--llvm-force-head-revision', action='store_true', + help='build the latest revision') + parser.add_argument('--run-tests', action='store_true', + help='run tests after building') + parser.add_argument('--skip-build', action='store_true', + help='do not build anything') + parser.add_argument('--skip-checkout', action='store_true', + help='do not create or update any checkouts') + parser.add_argument('--build-dir', + help='Override build directory') + parser.add_argument('--install-dir', + help='override the install directory for the final ' + 'compiler. If not specified, no install happens for ' + 'the compiler.') + parser.add_argument('--no-tools', + action='store_true', + help='don\'t build any chromium tools or ' + 'clang-extra-tools. Overrides --extra-tools.') + parser.add_argument('--extra-tools', nargs='*', default=[], + help='select additional chrome tools to build') + parser.add_argument('--no-runtimes', + action='store_true', + help='don\'t build compiler-rt, sanitizer and profile ' + 'runtimes. This is incompatible with --pgo. On Mac, ' + 'compiler-rt is always built regardless.') + parser.add_argument('--use-system-cmake', + action='store_true', + help='use the cmake from PATH instead of downloading ' + 'and using prebuilt cmake binaries') + parser.add_argument('--tf-path', + help='path to python tensorflow pip package. ' + 'Used for embedding an MLGO model') + parser.add_argument( + '--with-ml-inliner-model', + help='path to MLGO inliner model to embed. Setting to ' + '\'default\', will download an official model which was ' + 'trained for Chrome on Android', + default='default' if sys.platform.startswith('linux') else '') + parser.add_argument('--with-android', type=gn_arg, nargs='?', const=True, + help='build the Android ASan runtime (linux only)', + default=sys.platform.startswith('linux')) + parser.add_argument('--pic', + action='store_true', + help='Uses PIC when building LLVM') + parser.add_argument('--with-fuchsia', + type=gn_arg, + nargs='?', + const=True, + help='build the Fuchsia runtimes (linux only)', + default=sys.platform.startswith('linux')) + parser.add_argument('--without-android', action='store_false', + help='don\'t build Android ASan runtime (linux only)', + dest='with_android') + parser.add_argument('--without-fuchsia', action='store_false', + help='don\'t build Fuchsia clang_rt runtime (linux/mac)', + dest='with_fuchsia', + default=sys.platform in ('linux2', 'darwin')) + parser.add_argument('--with-ccache', + action='store_true', + help='Use ccache to build the stage 1 compiler') + parser.add_argument('--without-zstd', + dest='with_zstd', + action='store_false', + help='Disable zstd in the build') + + args = parser.parse_args() + + global CLANG_REVISION, PACKAGE_VERSION, LLVM_BUILD_DIR + + if (args.pgo or args.thinlto) and not args.bootstrap: + print('--pgo/--thinlto requires --bootstrap') + return 1 + if args.with_android and not os.path.exists(ANDROID_NDK_DIR): + print('Android NDK not found at ' + ANDROID_NDK_DIR) + print('The Android NDK is needed to build a Clang whose -fsanitize=address') + print('works on Android. See ') + print('https://www.chromium.org/developers/how-tos/android-build-instructions') + print('for how to install the NDK, or pass --without-android.') + return 1 + if args.no_runtimes and args.pgo: + print('--pgo requires runtimes, can\'t use --no-runtimes') + return 1 + + if args.with_fuchsia and not os.path.exists(FUCHSIA_SDK_DIR): + print('Fuchsia SDK not found at ' + FUCHSIA_SDK_DIR) + print('The Fuchsia SDK is needed to build libclang_rt for Fuchsia.') + print('Install the Fuchsia SDK by adding fuchsia to the ') + print('target_os section in your .gclient and running hooks, ') + print('or pass --without-fuchsia.') + print( + 'https://chromium.googlesource.com/chromium/src/+/main/docs/fuchsia/build_instructions.md' + ) + print('for general Fuchsia build instructions.') + return 1 + + if args.with_ml_inliner_model and not sys.platform.startswith('linux'): + print('--with-ml-inliner-model only supports linux hosts') + return 1 + + # Don't buffer stdout, so that print statements are immediately flushed. + # LLVM tests print output without newlines, so with buffering they won't be + # immediately printed. + major, _, _, _, _ = sys.version_info + if major == 3: + # Python3 only allows unbuffered output for binary streams. This + # workaround comes from https://stackoverflow.com/a/181654/4052492. + sys.stdout = io.TextIOWrapper(open(sys.stdout.fileno(), 'wb', 0), + write_through=True) + else: + sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) + + + if args.build_dir: + LLVM_BUILD_DIR = args.build_dir + + if args.llvm_force_head_revision: + checkout_revision = GetLatestLLVMCommit() + else: + checkout_revision = CLANG_REVISION + + if not args.skip_checkout: + CheckoutGitRepo('LLVM monorepo', LLVM_GIT_URL, checkout_revision, LLVM_DIR) + + if sys.platform == 'win32' and not args.llvm_force_head_revision: + # Apply https://github.com/zmodem/llvm-project/commit/802b816836f1 which + # adds printfs to the win/asan runtime which get printed at high verbosity + # level or on errors such as CHECK failure. + # TODO(crbug.com/341936875): Remove once debugging is done. + GitCherryPick(LLVM_DIR, 'https://github.com/zmodem/llvm-project.git', + '802b816836f1dcf9544f250ee5c6977b4cb2bb41') + + # Apply https://github.com/zmodem/llvm-project/commit/89a723c438a5 which + # should fix the issue of win/asan failing to allocate memory for + # trampoline functions. + # TODO(crbug.com/341936875): Land this upstream and remove after debugging. + GitCherryPick(LLVM_DIR, 'https://github.com/zmodem/llvm-project.git', + '89a723c438a50a34507a71159ba37f6e60afcea9') + + # Apply https://github.com/zmodem/llvm-project/commit/72112845b8e3 which + # fixes an issue in the previous patch and adds more printfs. + # TODO(crbug.com/341936875): Remove after debugging. + GitCherryPick(LLVM_DIR, 'https://github.com/zmodem/llvm-project.git', + '72112845b8e37ba5296858d0224f916f0afbf88b') + + # Apply https://github.com/zmodem/llvm-project/commit/723a2efebddf which + # tries to speed up the runtime by removing calls to GetModuleFileName and + # VPrintfs for contigous_containers. + # TODO(crbug.com/341936875): Remove after debugging. + GitCherryPick(LLVM_DIR, 'https://github.com/zmodem/llvm-project.git', + '723a2efebddf250b58c2dd3bd064c1cd0f57b85f') + + # Apply https://github.com/zmodem/llvm-project/commit/a86b7e95a8a7 which + # adds proper clamping of {min,max}_addr in AllocateTrampolineRegion. + # TODO(crbug.com/341936875): Remove after debugging. + GitCherryPick(LLVM_DIR, 'https://github.com/zmodem/llvm-project.git', + 'a86b7e95a8a7a7de750d19e4d189e9a9497e31e8') + + if args.llvm_force_head_revision: + CLANG_REVISION = GetCommitDescription(checkout_revision) + PACKAGE_VERSION = '%s-0' % CLANG_REVISION + + print('Locally building clang %s...' % PACKAGE_VERSION) + WriteStampFile('', STAMP_FILE) + WriteStampFile('', FORCE_HEAD_REVISION_FILE) + + if not args.use_system_cmake: + AddCMakeToPath() + + if sys.platform == 'win32': + # CMake on Windows doesn't like depot_tools's ninja.bat wrapper. + ninja_dir = os.path.join(THIRD_PARTY_DIR, 'ninja') + os.environ['PATH'] = ninja_dir + os.pathsep + os.environ.get('PATH', '') + + if args.skip_build: + return 0 + + # The variable "lld" is only used on Windows because only there does setting + # CMAKE_LINKER have an effect: On Windows, the linker is called directly, + # while elsewhere it's called through the compiler driver, and we pass + # -fuse-ld=lld there to make the compiler driver call the linker (by setting + # LLVM_ENABLE_LLD). + cc, cxx, lld = None, None, None + + cflags = [] + cxxflags = [] + ldflags = [] + + targets = 'AArch64;ARM;LoongArch;Mips;PowerPC;RISCV;SystemZ;WebAssembly;X86' + projects = 'clang;lld' + if not args.no_tools: + projects += ';clang-tools-extra' + if args.bolt: + projects += ';bolt' + + runtimes = '' + # On macOS, we always need to build compiler-rt because dsymutil's link needs + # libclang_rt.osx.a. + if not args.no_runtimes or sys.platform == 'darwin': + runtimes = 'compiler-rt' + + pic_default = sys.platform == 'win32' + pic_mode = 'ON' if args.pic or pic_default else 'OFF' + + base_cmake_args = [ + '-GNinja', + '-DCMAKE_BUILD_TYPE=Release', + '-DLLVM_ENABLE_ASSERTIONS=%s' % ('OFF' if args.disable_asserts else 'ON'), + f'-DLLVM_ENABLE_PROJECTS={projects}', + f'-DLLVM_ENABLE_RUNTIMES={runtimes}', + f'-DLLVM_TARGETS_TO_BUILD={targets}', + f'-DLLVM_ENABLE_PIC={pic_mode}', + '-DLLVM_ENABLE_TERMINFO=OFF', + '-DLLVM_ENABLE_Z3_SOLVER=OFF', + '-DCLANG_PLUGIN_SUPPORT=OFF', + '-DCLANG_ENABLE_STATIC_ANALYZER=OFF', + '-DCLANG_ENABLE_ARCMT=OFF', + '-DBUG_REPORT_URL=' + BUG_REPORT_URL, + # See crbug.com/1126219: Use native symbolizer instead of DIA + '-DLLVM_ENABLE_DIA_SDK=OFF', + # Link all binaries with lld. Effectively passes -fuse-ld=lld to the + # compiler driver. On Windows, cmake calls the linker directly, so there + # the same is achieved by passing -DCMAKE_LINKER=$lld below. + '-DLLVM_ENABLE_LLD=ON', + # The default value differs per platform, force it off everywhere. + '-DLLVM_ENABLE_PER_TARGET_RUNTIME_DIR=OFF', + # Don't use curl. + '-DLLVM_ENABLE_CURL=OFF', + # Build libclang.a as well as libclang.so + '-DLIBCLANG_BUILD_STATIC=ON', + # The Rust build (on Mac ARM at least if not others) depends on the + # FileCheck tool which is built but not installed by default, this + # puts it in the path for the Rust build to find and matches the + # `bootstrap` tool: + # https://github.com/rust-lang/rust/blob/021861aea8de20c76c7411eb8ada7e8235e3d9b5/src/bootstrap/src/core/build_steps/llvm.rs#L348 + '-DLLVM_INSTALL_UTILS=ON', + '-DLLVM_ENABLE_ZSTD=%s' % ('ON' if args.with_zstd else 'OFF'), + ] + + if sys.platform == 'darwin': + isysroot = subprocess.check_output(['xcrun', '--show-sdk-path'], + universal_newlines=True).rstrip() + base_cmake_args += ['-DLLVM_ENABLE_UNWIND_TABLES=OFF'] + + ccache_cmake_args = [] + if args.with_ccache: + ccache_cmake_args.append('-DCMAKE_C_COMPILER_LAUNCHER=ccache') + ccache_cmake_args.append('-DCMAKE_CXX_COMPILER_LAUNCHER=ccache') + + if args.host_cc or args.host_cxx: + assert args.host_cc and args.host_cxx, \ + "--host-cc and --host-cxx need to be used together" + cc = args.host_cc + cxx = args.host_cxx + else: + if not args.skip_checkout: + DownloadPinnedClang() + if sys.platform == 'win32': + cc = os.path.join(PINNED_CLANG_DIR, 'bin', 'clang-cl.exe') + cxx = os.path.join(PINNED_CLANG_DIR, 'bin', 'clang-cl.exe') + lld = os.path.join(PINNED_CLANG_DIR, 'bin', 'lld-link.exe') + # CMake has a hard time with backslashes in compiler paths: + # https://stackoverflow.com/questions/13050827 + cc = cc.replace('\\', '/') + cxx = cxx.replace('\\', '/') + lld = lld.replace('\\', '/') + else: + cc = os.path.join(PINNED_CLANG_DIR, 'bin', 'clang') + cxx = os.path.join(PINNED_CLANG_DIR, 'bin', 'clang++') + + if sys.platform.startswith('linux'): + base_cmake_args += [ '-DLLVM_STATIC_LINK_CXX_STDLIB=ON' ] + + if sys.platform.startswith('linux'): + sysroot_amd64 = DownloadDebianSysroot('amd64', args.skip_checkout) + sysroot_i386 = DownloadDebianSysroot('i386', args.skip_checkout) + sysroot_arm = DownloadDebianSysroot('arm', args.skip_checkout) + sysroot_arm64 = DownloadDebianSysroot('arm64', args.skip_checkout) + + # Add the sysroot to base_cmake_args. + if platform.machine() == 'aarch64': + base_cmake_args.append('-DCMAKE_SYSROOT=' + sysroot_arm64) + else: + # amd64 is the default toolchain. + base_cmake_args.append('-DCMAKE_SYSROOT=' + sysroot_amd64) + + if sys.platform == 'win32': + AddGitForWindowsToPath() + + base_cmake_args.append('-DCMAKE_MSVC_RUNTIME_LIBRARY=MultiThreaded') + + # Require zlib compression. + zlib_dir = AddZlibToPath() + cflags.append('-I' + zlib_dir) + cxxflags.append('-I' + zlib_dir) + ldflags.append('-LIBPATH:' + zlib_dir) + + # Use rpmalloc. For faster ThinLTO linking. + base_cmake_args.append('-DLLVM_ENABLE_RPMALLOC=ON') + + # Set a sysroot to make the build more hermetic. + base_cmake_args.append('-DLLVM_WINSYSROOT="%s"' % + os.path.dirname(os.path.dirname(GetWinSDKDir()))) + + # Statically link libxml2 to make lld-link not require mt.exe on Windows, + # and to make sure lld-link output on other platforms is identical to + # lld-link on Windows (for cross-builds). + libxml_cmake_args, libxml_cflags = BuildLibXml2() + base_cmake_args += libxml_cmake_args + cflags += libxml_cflags + cxxflags += libxml_cflags + + if args.with_zstd: + # Statically link zstd to make lld support zstd compression for debug info. + zstd_cmake_args, zstd_cflags = BuildZStd() + base_cmake_args += zstd_cmake_args + cflags += zstd_cflags + cxxflags += zstd_cflags + + lit_excludes = [] + if sys.platform.startswith('linux'): + lit_excludes += [ + # fstat and sunrpc tests fail due to sysroot/host mismatches + # (crbug.com/1459187). + '^MemorySanitizer-.* f?stat(at)?(64)?.cpp$', + '^.*Sanitizer-.*sunrpc.*cpp$', + # sysroot/host glibc version mismatch, crbug.com/1506551 + '^.*Sanitizer.*mallinfo2.cpp$', + ] + elif sys.platform == 'darwin': + lit_excludes += [ + # Fails on macOS 14, crbug.com/332589870 + '^.*Sanitizer.*Darwin/malloc_zone.cpp$', + # Fails with a recent ld, crbug.com/332589870 + '^.*ContinuousSyncMode/darwin-proof-of-concept.c$', + '^.*instrprof-darwin-exports.c$', + # Fails on our mac builds, crbug.com/346289767 + '^.*Interpreter/pretty-print.c$', + ] + if platform.machine() == 'arm64': + lit_excludes += [ + # TODO(https://crbug.com/40270881): fix and re-enable + '^.*tools/dsymutil.*$', + '^.*AddressSanitizer-arm64-darwin.*$', + '^.*SanitizerCommon-lsan-arm64-Darwin.*$', + '^.*SanitizerCommon-ubsan-arm64-Darwin.*Posix/dedup_token_length_test.cpp$', + ] + + test_env = None + if lit_excludes: + test_env = os.environ.copy() + test_env['LIT_FILTER_OUT'] = '|'.join(lit_excludes) + + if args.bootstrap: + print('Building bootstrap compiler') + if os.path.exists(LLVM_BOOTSTRAP_DIR): + RmTree(LLVM_BOOTSTRAP_DIR) + EnsureDirExists(LLVM_BOOTSTRAP_DIR) + os.chdir(LLVM_BOOTSTRAP_DIR) + + runtimes = [] + if args.pgo or sys.platform == 'darwin': + # Need libclang_rt.profile for PGO. + # On macOS, the bootstrap toolchain needs to have compiler-rt because + # dsymutil's link needs libclang_rt.osx.a. Only the x86_64 osx + # libraries are needed though, and only libclang_rt (i.e. + # COMPILER_RT_BUILD_BUILTINS). + runtimes.append('compiler-rt') + + bootstrap_targets = 'X86' + if sys.platform == 'darwin': + # Need ARM and AArch64 for building the ios clang_rt. + bootstrap_targets += ';ARM;AArch64' + bootstrap_args = base_cmake_args + ccache_cmake_args + [ + '-DLLVM_TARGETS_TO_BUILD=' + bootstrap_targets, + '-DLLVM_ENABLE_PROJECTS=clang;lld', + '-DLLVM_ENABLE_RUNTIMES=' + ';'.join(runtimes), + '-DCMAKE_INSTALL_PREFIX=' + LLVM_BOOTSTRAP_INSTALL_DIR, + '-DCMAKE_C_FLAGS=' + ' '.join(cflags), + '-DCMAKE_CXX_FLAGS=' + ' '.join(cxxflags), + '-DCMAKE_EXE_LINKER_FLAGS=' + ' '.join(ldflags), + '-DCMAKE_SHARED_LINKER_FLAGS=' + ' '.join(ldflags), + '-DCMAKE_MODULE_LINKER_FLAGS=' + ' '.join(ldflags), + # Ignore args.disable_asserts for the bootstrap compiler. + '-DLLVM_ENABLE_ASSERTIONS=ON', + ] + # PGO needs libclang_rt.profile but none of the other compiler-rt stuff. + bootstrap_args.extend([ + '-D' + f + for f in compiler_rt_cmake_flags(sanitizers=False, profile=args.pgo) + ]) + if sys.platform == 'darwin': + bootstrap_args.extend([ + '-DCOMPILER_RT_ENABLE_IOS=OFF', + '-DCOMPILER_RT_ENABLE_WATCHOS=OFF', + '-DCOMPILER_RT_ENABLE_TVOS=OFF', + ]) + if platform.machine() == 'arm64': + bootstrap_args.extend(['-DDARWIN_osx_ARCHS=arm64']) + else: + bootstrap_args.extend(['-DDARWIN_osx_ARCHS=x86_64']) + + if cc is not None: bootstrap_args.append('-DCMAKE_C_COMPILER=' + cc) + if cxx is not None: bootstrap_args.append('-DCMAKE_CXX_COMPILER=' + cxx) + if lld is not None: bootstrap_args.append('-DCMAKE_LINKER=' + lld) + RunCommand(['cmake'] + bootstrap_args + [os.path.join(LLVM_DIR, 'llvm')], + setenv=True) + RunCommand(['ninja'], setenv=True) + if args.run_tests: + RunCommand(['ninja', 'check-all'], env=test_env, setenv=True) + RunCommand(['ninja', 'install'], setenv=True) + + if sys.platform == 'win32': + cc = os.path.join(LLVM_BOOTSTRAP_INSTALL_DIR, 'bin', 'clang-cl.exe') + cxx = os.path.join(LLVM_BOOTSTRAP_INSTALL_DIR, 'bin', 'clang-cl.exe') + lld = os.path.join(LLVM_BOOTSTRAP_INSTALL_DIR, 'bin', 'lld-link.exe') + # CMake has a hard time with backslashes in compiler paths: + # https://stackoverflow.com/questions/13050827 + cc = cc.replace('\\', '/') + cxx = cxx.replace('\\', '/') + lld = lld.replace('\\', '/') + else: + cc = os.path.join(LLVM_BOOTSTRAP_INSTALL_DIR, 'bin', 'clang') + cxx = os.path.join(LLVM_BOOTSTRAP_INSTALL_DIR, 'bin', 'clang++') + + print('Bootstrap compiler installed.') + + if args.pgo: + print('Building instrumented compiler') + if os.path.exists(LLVM_INSTRUMENTED_DIR): + RmTree(LLVM_INSTRUMENTED_DIR) + EnsureDirExists(LLVM_INSTRUMENTED_DIR) + os.chdir(LLVM_INSTRUMENTED_DIR) + + # Disable -Wbackend-plugin for PGO builds as these warnings are harmless + # (https://llvm.org/docs/HowToBuildWithPGO.html#building-clang-with-pgo) + cflags += ['-Wno-backend-plugin'] + cxxflags += ['-Wno-backend-plugin'] + + instrument_args = base_cmake_args + [ + '-DLLVM_ENABLE_PROJECTS=clang', + '-DCMAKE_C_FLAGS=' + ' '.join(cflags), + '-DCMAKE_CXX_FLAGS=' + ' '.join(cxxflags), + '-DCMAKE_EXE_LINKER_FLAGS=' + ' '.join(ldflags), + '-DCMAKE_SHARED_LINKER_FLAGS=' + ' '.join(ldflags), + '-DCMAKE_MODULE_LINKER_FLAGS=' + ' '.join(ldflags), + # Build with instrumentation. + '-DLLVM_BUILD_INSTRUMENTED=IR', + ] + # Build with the bootstrap compiler. + if cc is not None: instrument_args.append('-DCMAKE_C_COMPILER=' + cc) + if cxx is not None: instrument_args.append('-DCMAKE_CXX_COMPILER=' + cxx) + if lld is not None: instrument_args.append('-DCMAKE_LINKER=' + lld) + + RunCommand(['cmake'] + instrument_args + [os.path.join(LLVM_DIR, 'llvm')], + setenv=True) + RunCommand(['ninja', 'clang'], setenv=True) + print('Instrumented compiler built.') + + # Train by building some C++ code. + # + # pgo_training-1.ii is a preprocessed (on Linux) version of + # src/third_party/blink/renderer/core/layout/layout_object.cc, selected + # because it's a large translation unit in Blink, which is normally the + # slowest part of Chromium to compile. Using this, we get ~20% shorter + # build times for Linux, Android, and Mac, which is also what we got when + # training by actually building a target in Chromium. (For comparison, a + # C++-y "Hello World" program only resulted in 14% faster builds.) + # See https://crbug.com/966403#c16 for all numbers. + # + # Although the training currently only exercises Clang, it does involve LLVM + # internals, and so LLD also benefits when used for ThinLTO links. + # + # NOTE: Tidy uses binaries built with this profile, but doesn't seem to + # gain much from it. If tidy's execution time becomes a concern, it might + # be good to investigate that. + # + # TODO(hans): Enhance the training, perhaps by including preprocessed code + # from more platforms, and by doing some linking so that lld can benefit + # from PGO as well. Perhaps the training could be done asynchronously by + # dedicated buildbots that upload profiles to the cloud. + training_source = 'pgo_training-1.ii' + with open(training_source, 'wb') as f: + DownloadUrl(CDS_URL + '/' + training_source, f) + train_cmd = [os.path.join(LLVM_INSTRUMENTED_DIR, 'bin', 'clang++'), + '-target', 'x86_64-unknown-unknown', '-O2', '-g', '-std=c++14', + '-fno-exceptions', '-fno-rtti', '-w', '-c', training_source] + if sys.platform == 'darwin': + train_cmd.extend(['-isysroot', isysroot]) + RunCommand(train_cmd, setenv=True) + + # Merge profiles. + profdata = os.path.join(LLVM_BOOTSTRAP_INSTALL_DIR, 'bin', 'llvm-profdata') + RunCommand( + [profdata, 'merge', '-output=' + LLVM_PROFDATA_FILE] + + glob.glob(os.path.join(LLVM_INSTRUMENTED_DIR, 'profiles', '*.profraw')), + setenv=True) + print('Profile generated.') + + deployment_target = '10.12' + + # If building at head, define a macro that plugins can use for #ifdefing + # out code that builds at head, but not at CLANG_REVISION or vice versa. + if args.llvm_force_head_revision: + cflags += ['-DLLVM_FORCE_HEAD_REVISION'] + cxxflags += ['-DLLVM_FORCE_HEAD_REVISION'] + + # Build PDBs for archival on Windows. Don't use RelWithDebInfo since it + # has different optimization defaults than Release. + # Also disable stack cookies (/GS-) for performance. + if sys.platform == 'win32': + cflags += ['/Zi', '/GS-'] + cxxflags += ['/Zi', '/GS-'] + ldflags += ['/DEBUG', '/OPT:REF', '/OPT:ICF'] + + deployment_env = None + if deployment_target: + deployment_env = os.environ.copy() + deployment_env['MACOSX_DEPLOYMENT_TARGET'] = deployment_target + + print('Building final compiler.') + + # Keep static relocations in the executable for BOLT to analyze. Resolve all + # symbols on program start to allow BOLT's PLT optimization. + if args.bolt: + ldflags += ['-Wl,--emit-relocs', '-Wl,-znow'] + + chrome_tools = [] + if not args.no_tools: + default_tools = [ + 'plugins', 'blink_gc_plugin', 'raw_ptr_plugin', 'translation_unit' + ] + chrome_tools = list(set(default_tools + args.extra_tools)) + if cc is not None: base_cmake_args.append('-DCMAKE_C_COMPILER=' + cc) + if cxx is not None: base_cmake_args.append('-DCMAKE_CXX_COMPILER=' + cxx) + if lld is not None: base_cmake_args.append('-DCMAKE_LINKER=' + lld) + final_install_dir = args.install_dir if args.install_dir else LLVM_BUILD_DIR + cmake_args = base_cmake_args + [ + '-DCMAKE_C_FLAGS=' + ' '.join(cflags), + '-DCMAKE_CXX_FLAGS=' + ' '.join(cxxflags), + '-DCMAKE_EXE_LINKER_FLAGS=' + ' '.join(ldflags), + '-DCMAKE_SHARED_LINKER_FLAGS=' + ' '.join(ldflags), + '-DCMAKE_MODULE_LINKER_FLAGS=' + ' '.join(ldflags), + '-DCMAKE_INSTALL_PREFIX=' + final_install_dir, + ] + if not args.no_tools: + cmake_args.extend([ + '-DLLVM_EXTERNAL_PROJECTS=chrometools', + '-DLLVM_EXTERNAL_CHROMETOOLS_SOURCE_DIR=' + + os.path.join(CHROMIUM_DIR, 'tools', 'clang'), + '-DCHROMIUM_TOOLS=%s' % ';'.join(chrome_tools) + ]) + if args.pgo: + cmake_args.append('-DLLVM_PROFDATA_FILE=' + LLVM_PROFDATA_FILE) + if args.thinlto: + cmake_args.append('-DLLVM_ENABLE_LTO=Thin') + if sys.platform == 'win32': + cmake_args.append('-DLLVM_ENABLE_ZLIB=FORCE_ON') + + # The default LLVM_DEFAULT_TARGET_TRIPLE depends on the host machine. + # Set it explicitly to make the build of clang more hermetic, and also to + # set it to arm64 when cross-building clang for mac/arm. + if sys.platform == 'darwin': + if platform.machine() == 'arm64': + cmake_args.append('-DLLVM_DEFAULT_TARGET_TRIPLE=arm64-apple-darwin') + else: + cmake_args.append('-DLLVM_DEFAULT_TARGET_TRIPLE=x86_64-apple-darwin') + elif sys.platform.startswith('linux'): + if platform.machine() == 'aarch64': + cmake_args.append( + '-DLLVM_DEFAULT_TARGET_TRIPLE=aarch64-unknown-linux-gnu') + elif platform.machine() == 'riscv64': + cmake_args.append( + '-DLLVM_DEFAULT_TARGET_TRIPLE=riscv64-unknown-linux-gnu') + elif platform.machine() == 'loongarch64': + cmake_args.append( + '-DLLVM_DEFAULT_TARGET_TRIPLE=loongarch64-unknown-linux-gnu') + else: + cmake_args.append('-DLLVM_DEFAULT_TARGET_TRIPLE=x86_64-unknown-linux-gnu') + cmake_args.append('-DLLVM_ENABLE_PER_TARGET_RUNTIME_DIR=ON') + elif sys.platform == 'win32': + cmake_args.append('-DLLVM_DEFAULT_TARGET_TRIPLE=x86_64-pc-windows-msvc') + + # Map from triple to { + # "args": list of CMake vars without '-D' common to builtins and runtimes + # "profile": bool # build profile runtime + # "sanitizers": bool # build sanitizer runtimes + # } + runtimes_triples_args = {} + + if sys.platform.startswith('linux'): + runtimes_triples_args['i386-unknown-linux-gnu'] = { + "args": [ + 'CMAKE_SYSROOT=%s' % sysroot_i386, + # TODO(crbug.com/40242553): pass proper flags to i386 tests so they compile correctly + 'LLVM_INCLUDE_TESTS=OFF', + ], + "profile": + True, + "sanitizers": + True, + } + runtimes_triples_args['x86_64-unknown-linux-gnu'] = { + "args": [ + 'CMAKE_SYSROOT=%s' % sysroot_amd64, + ], + "profile": True, + "sanitizers": True, + } + # Using "armv7a-unknown-linux-gnueabhihf" confuses the compiler-rt + # builtins build, since compiler-rt/cmake/builtin-config-ix.cmake + # doesn't include "armv7a" in its `ARM32` list. + # TODO(thakis): It seems to work for everything else though, see try + # results on + # https://chromium-review.googlesource.com/c/chromium/src/+/3702739/4 + # Maybe it should work for builtins too? + runtimes_triples_args['armv7-unknown-linux-gnueabihf'] = { + "args": [ + 'CMAKE_SYSROOT=%s' % sysroot_arm, + # Can't run tests on x86 host. + 'LLVM_INCLUDE_TESTS=OFF', + ], + "profile": + True, + "sanitizers": + True, + } + runtimes_triples_args['aarch64-unknown-linux-gnu'] = { + "args": [ + 'CMAKE_SYSROOT=%s' % sysroot_arm64, + # Can't run tests on x86 host. + 'LLVM_INCLUDE_TESTS=OFF', + ], + "profile": + True, + "sanitizers": + True, + } + elif sys.platform == 'win32': + sysroot = os.path.dirname(os.path.dirname(GetWinSDKDir())) + runtimes_triples_args['i386-pc-windows-msvc'] = { + "args": [ + 'LLVM_ENABLE_PER_TARGET_RUNTIME_DIR=OFF', + 'LLVM_WINSYSROOT="%s"' % sysroot, + ], + "profile": + True, + "sanitizers": + False, + } + runtimes_triples_args['x86_64-pc-windows-msvc'] = { + "args": [ + 'LLVM_ENABLE_PER_TARGET_RUNTIME_DIR=OFF', + 'LLVM_WINSYSROOT="%s"' % sysroot, + ], + "profile": + True, + "sanitizers": + True, + } + runtimes_triples_args['aarch64-pc-windows-msvc'] = { + "args": [ + 'LLVM_ENABLE_PER_TARGET_RUNTIME_DIR=OFF', + 'LLVM_WINSYSROOT="%s"' % sysroot, + # Can't run tests on x86 host. + 'LLVM_INCLUDE_TESTS=OFF', + ], + "profile": + True, + "sanitizers": + False, + } + elif sys.platform == 'darwin': + # compiler-rt is built for all platforms/arches with a single + # configuration, we should only specify one target triple. 'default' is + # specially handled. + runtimes_triples_args['default'] = { + "args": [ + 'SANITIZER_MIN_OSX_VERSION=' + deployment_target, + 'COMPILER_RT_ENABLE_MACCATALYST=ON', + 'COMPILER_RT_ENABLE_IOS=ON', + 'COMPILER_RT_ENABLE_WATCHOS=ON', + 'COMPILER_RT_ENABLE_TVOS=OFF', + 'COMPILER_RT_ENABLE_XROS=ON', + 'DARWIN_ios_ARCHS=arm64', + 'DARWIN_iossim_ARCHS=arm64;x86_64', + 'DARWIN_osx_ARCHS=arm64;x86_64', + 'DARWIN_watchos_BUILTIN_ARCHS=arm64', + 'DARWIN_watchossim_BUILTIN_ARCHS=arm64;x86_64', + ], + "sanitizers": + True, + "profile": + True + } + + if args.with_android: + for target_arch in ['aarch64', 'arm', 'i686', 'riscv64', 'x86_64']: + toolchain_dir = ANDROID_NDK_TOOLCHAIN_DIR + target_triple = target_arch + if target_arch == 'arm': + target_triple = 'armv7' + api_level = '21' + if target_arch == 'riscv64': + api_level = '35' + target_triple += '-linux-android' + api_level + android_cflags = [ + '--sysroot=%s/sysroot' % toolchain_dir, + + # We don't have an unwinder ready, and don't need it either. + '--unwindlib=none', + ] + + if target_arch == 'aarch64': + # Use PAC/BTI instructions for AArch64 + android_cflags += ['-mbranch-protection=standard'] + + android_args = [ + 'LLVM_ENABLE_RUNTIMES=compiler-rt', + # On Android, we want DWARF info for the builtins for unwinding. See + # crbug.com/1311807. + 'CMAKE_BUILD_TYPE=RelWithDebInfo', + 'CMAKE_C_FLAGS=' + ' '.join(android_cflags), + 'CMAKE_CXX_FLAGS=' + ' '.join(android_cflags), + 'CMAKE_ASM_FLAGS=' + ' '.join(android_cflags), + 'COMPILER_RT_USE_BUILTINS_LIBRARY=ON', + 'SANITIZER_CXX_ABI=libcxxabi', + 'CMAKE_SHARED_LINKER_FLAGS=-Wl,-u__cxa_demangle', + 'ANDROID=1', + 'LLVM_ENABLE_PER_TARGET_RUNTIME_DIR=OFF', + 'LLVM_INCLUDE_TESTS=OFF', + # This prevents static_asserts from firing in 32-bit builds. + # TODO: remove once we only support API >=24. + 'ANDROID_NATIVE_API_LEVEL=' + api_level, + ] + runtimes_triples_args[target_triple] = { + "args": android_args, + "sanitizers": True, + "profile": True + } + + if args.with_fuchsia: + # Fuchsia links against libclang_rt.builtins-.a instead of libgcc.a. + for target_arch in ['aarch64', 'x86_64']: + fuchsia_arch_name = {'aarch64': 'arm64', 'x86_64': 'x64'}[target_arch] + toolchain_dir = os.path.join( + FUCHSIA_SDK_DIR, 'arch', fuchsia_arch_name, 'sysroot') + target_triple = target_arch + '-unknown-fuchsia' + # Build the Fuchsia profile and asan runtimes. This is done after the rt + # builtins have been created because the CMake build runs link checks that + # require that the builtins already exist to succeed. + build_profile = target_arch == 'x86_64' + # Build the asan runtime only on non-Mac platforms. Macs are excluded + # because the asan install changes library RPATHs which CMake only + # supports on ELF platforms and MacOS uses Mach-O instead of ELF. + build_sanitizers = build_profile and sys.platform != 'darwin' + # TODO(thakis): Might have to pass -B here once sysroot contains + # binaries (e.g. gas for arm64?) + fuchsia_args = [ + 'LLVM_ENABLE_RUNTIMES=compiler-rt', + 'CMAKE_SYSTEM_NAME=Fuchsia', + 'CMAKE_SYSROOT=%s' % toolchain_dir, + # TODO(thakis|scottmg): Use PER_TARGET_RUNTIME_DIR for all platforms. + # https://crbug.com/882485. + 'LLVM_ENABLE_PER_TARGET_RUNTIME_DIR=ON', + ] + if build_sanitizers: + fuchsia_args.append('SANITIZER_NO_UNDEFINED_SYMBOLS=OFF') + + runtimes_triples_args[target_triple] = { + "args": fuchsia_args, + "sanitizers": build_sanitizers, + "profile": build_profile + } + + # Embed MLGO inliner model. If tf_path is not specified, a vpython3 env + # will be created which contains the necessary source files for compilation. + # MLGO is only officially supported on linux. This condition is checked at + # the top of main() + if args.with_ml_inliner_model: + if args.with_ml_inliner_model == 'default': + model_path = ('https://commondatastorage.googleapis.com/' + 'chromium-browser-clang/tools/mlgo_model3.tgz') + else: + model_path = args.with_ml_inliner_model + if not args.tf_path: + tf_path = subprocess.check_output( + ['vpython3', os.path.join(THIS_DIR, 'get_tensorflow.py')], + universal_newlines=True).rstrip() + else: + tf_path = args.tf_path + print('Embedding MLGO inliner model at %s using Tensorflow at %s' % + (model_path, tf_path)) + cmake_args += [ + '-DLLVM_INLINER_MODEL_PATH=%s' % model_path, + '-DTENSORFLOW_AOT_PATH=%s' % tf_path, + # Disable Regalloc model generation since it is unused + '-DLLVM_RAEVICT_MODEL_PATH=none' + ] + + # Convert FOO=BAR CMake flags per triple into + # -DBUILTINS_$triple_FOO=BAR/-DRUNTIMES_$triple_FOO=BAR and build up + # -DLLVM_BUILTIN_TARGETS/-DLLVM_RUNTIME_TARGETS. + all_triples = '' + for triple in sorted(runtimes_triples_args.keys()): + all_triples += triple + ';' + for arg in runtimes_triples_args[triple]["args"]: + assert not arg.startswith('-') + # 'default' is specially handled to pass through relevant CMake flags. + if triple == 'default': + cmake_args.append('-D' + arg) + else: + cmake_args.append('-DRUNTIMES_' + triple + '_' + arg) + cmake_args.append('-DBUILTINS_' + triple + '_' + arg) + if not args.no_runtimes: + profile = runtimes_triples_args[triple]["profile"], + sanitizers = runtimes_triples_args[triple]["sanitizers"] + else: + profile = False + sanitizers = False + for arg in compiler_rt_cmake_flags(profile=profile, sanitizers=sanitizers): + # 'default' is specially handled to pass through relevant CMake flags. + if triple == 'default': + cmake_args.append('-D' + arg) + else: + cmake_args.append('-DRUNTIMES_' + triple + '_' + arg) + + cmake_args.append('-DLLVM_BUILTIN_TARGETS=' + all_triples) + cmake_args.append('-DLLVM_RUNTIME_TARGETS=' + all_triples) + + if not args.bootstrap: + cmake_args.extend(ccache_cmake_args) + + if os.path.exists(LLVM_BUILD_DIR): + RmTree(LLVM_BUILD_DIR) + EnsureDirExists(LLVM_BUILD_DIR) + os.chdir(LLVM_BUILD_DIR) + RunCommand(['cmake'] + cmake_args + [os.path.join(LLVM_DIR, 'llvm')], + setenv=True, + env=deployment_env) + RunCommand(['ninja'], setenv=True) + + if chrome_tools: + # If any Chromium tools were built, install those now. + RunCommand(['ninja', 'cr-install'], setenv=True) + + if args.bolt: + print('Performing BOLT post-link optimizations.') + bolt_profiles_dir = os.path.join(LLVM_BUILD_DIR, 'bolt-profiles') + os.mkdir(bolt_profiles_dir) + + # Instrument. + RunCommand([ + 'bin/llvm-bolt', 'bin/clang', '-o', 'bin/clang-bolt.inst', + '-instrument', '--instrumentation-file-append-pid', + '--instrumentation-file=' + + os.path.join(bolt_profiles_dir, 'prof.fdata') + ]) + RunCommand([ + 'ln', '-s', + os.path.join(LLVM_BUILD_DIR, 'bin', 'clang-bolt.inst'), + os.path.join(LLVM_BUILD_DIR, 'bin', 'clang++-bolt.inst') + ]) + + # Train by building a part of Clang. + os.mkdir('bolt-training') + os.chdir('bolt-training') + bolt_train_cmake_args = base_cmake_args + [ + '-DLLVM_TARGETS_TO_BUILD=X86', + '-DLLVM_ENABLE_PROJECTS=clang', + '-DCMAKE_C_FLAGS=' + ' '.join(cflags), + '-DCMAKE_CXX_FLAGS=' + ' '.join(cxxflags), + '-DCMAKE_EXE_LINKER_FLAGS=' + ' '.join(ldflags), + '-DCMAKE_SHARED_LINKER_FLAGS=' + ' '.join(ldflags), + '-DCMAKE_MODULE_LINKER_FLAGS=' + ' '.join(ldflags), + '-DCMAKE_C_COMPILER=' + + os.path.join(LLVM_BUILD_DIR, 'bin/clang-bolt.inst'), + '-DCMAKE_CXX_COMPILER=' + + os.path.join(LLVM_BUILD_DIR, 'bin/clang++-bolt.inst'), + '-DCMAKE_ASM_COMPILER=' + + os.path.join(LLVM_BUILD_DIR, 'bin/clang-bolt.inst'), + '-DCMAKE_ASM_COMPILER_ID=Clang', + ] + RunCommand(['cmake'] + bolt_train_cmake_args + + [os.path.join(LLVM_DIR, 'llvm')]) + RunCommand([ + 'ninja', 'tools/clang/lib/Sema/CMakeFiles/obj.clangSema.dir/Sema.cpp.o' + ]) + os.chdir(LLVM_BUILD_DIR) + + # Optimize. + RunCommand([ + sys.executable, + os.path.join(LLVM_DIR, 'clang', 'utils', 'perf-training', + 'perf-helper.py'), 'merge-fdata', 'bin/merge-fdata', + 'merged.fdata', bolt_profiles_dir + ]) + RunCommand([ + 'bin/llvm-bolt', 'bin/clang', '-o', 'bin/clang-bolt.opt', '-data', + 'merged.fdata', '-reorder-blocks=ext-tsp', '-reorder-functions=hfsort+', + '-split-functions', '-split-all-cold', '-split-eh', '-dyno-stats', + '-icf=1', '-use-gnu-stack', '-use-old-text' + ]) + + # Overwrite clang, preserving its timestamp so ninja doesn't rebuild it. + RunCommand(['touch', '-r', 'bin/clang', 'bin/clang-bolt.opt']) + RunCommand(['mv', 'bin/clang-bolt.opt', 'bin/clang']) + + VerifyVersionOfBuiltClangMatchesVERSION() + VerifyZlibSupport() + if args.with_zstd: + VerifyZStdSupport() + + # Run tests. + if (chrome_tools and (args.run_tests or args.llvm_force_head_revision)): + RunCommand(['ninja', '-C', LLVM_BUILD_DIR, 'cr-check-all'], setenv=True) + + if args.run_tests: + RunCommand(['ninja', '-C', LLVM_BUILD_DIR, 'check-all'], + env=test_env, + setenv=True) + if args.install_dir: + RunCommand(['ninja', 'install'], setenv=True) + + WriteStampFile(PACKAGE_VERSION, STAMP_FILE) + WriteStampFile(PACKAGE_VERSION, FORCE_HEAD_REVISION_FILE) + print('Clang build was successful.') + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/clang/scripts/build_clang_tools_extra.py b/clang/scripts/build_clang_tools_extra.py new file mode 100755 index 0000000000000000000000000000000000000000..c987f696550962c551fecfe4d2dae1cda6c21973 --- /dev/null +++ b/clang/scripts/build_clang_tools_extra.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 +# Copyright 2019 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +"""A script for fetching LLVM monorepo and building clang-tools-extra binaries. + +Example: build clangd and clangd-indexer + + tools/clang/scripts/build_clang_tools_extra.py --fetch out/Release clangd \ + clangd-indexer +""" + +import argparse +import errno +import os +import subprocess +import sys +import update + + +from build import (CheckoutGitRepo, LLVM_GIT_URL) + +def GetCheckoutDir(out_dir): + """Returns absolute path to the checked-out llvm repo.""" + return os.path.join(out_dir, 'tools', 'clang', 'third_party', 'llvm') + + +def GetBuildDir(out_dir): + return os.path.join(GetCheckoutDir(out_dir), 'build') + + +def CreateDirIfNotExists(dir): + if not os.path.exists(dir): + os.makedirs(dir) + + +def FetchLLVM(checkout_dir, revision): + """Clone llvm repo into |checkout_dir| or update if it already exists.""" + CreateDirIfNotExists(os.path.dirname(checkout_dir)) + cwd = os.getcwd() + CheckoutGitRepo('LLVM monorepo', LLVM_GIT_URL, revision, checkout_dir) + os.chdir(cwd) + + +def BuildTargets(build_dir, targets): + """Build targets from llvm repo at |build_dir|.""" + CreateDirIfNotExists(build_dir) + + # From that dir, run cmake + cmake_args = [ + 'cmake', + '-GNinja', + '-DLLVM_ENABLE_PROJECTS=clang;clang-tools-extra', + '-DCMAKE_BUILD_TYPE=Release', + '-DLLVM_ENABLE_ASSERTIONS=On', + '../llvm', + ] + subprocess.check_call(cmake_args, cwd=build_dir) + + ninja_commands = ['ninja'] + targets + subprocess.check_call(ninja_commands, cwd=build_dir) + + +def main(): + parser = argparse.ArgumentParser(description='Build clang_tools_extra.') + parser.add_argument('--fetch', action='store_true', help='fetch LLVM source') + parser.add_argument( + '--revision', help='LLVM revision to use', default=update.CLANG_REVISION) + parser.add_argument('OUT_DIR', help='where we put the LLVM source repository') + parser.add_argument('TARGETS', nargs='+', help='targets being built') + args = parser.parse_args() + + if args.fetch: + print('Fetching LLVM source') + FetchLLVM(GetCheckoutDir(args.OUT_DIR), args.revision) + + print('Building targets: %s' % ', '.join(args.TARGETS)) + BuildTargets(GetBuildDir(args.OUT_DIR), args.TARGETS) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/clang/scripts/build_file.py b/clang/scripts/build_file.py new file mode 100755 index 0000000000000000000000000000000000000000..bc5884b65bcef3ec503da03bdbb9db3fa7cca0eb --- /dev/null +++ b/clang/scripts/build_file.py @@ -0,0 +1,91 @@ +#!/usr/bin/env vpython3 +# Copyright 2016 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +from __future__ import print_function + +import argparse +import json +import os +import re +import shlex +import sys + +script_dir = os.path.dirname(os.path.realpath(__file__)) +tool_dir = os.path.abspath(os.path.join(script_dir, '../pylib')) +sys.path.insert(0, tool_dir) + +from clang import compile_db + +_PROBABLY_CLANG_RE = re.compile(r'clang(?:\+\+)?$') + + +def ParseArgs(): + parser = argparse.ArgumentParser( + description='Utility to build one Chromium file for debugging clang') + parser.add_argument('-p', required=True, help='path to the compile database') + parser.add_argument('--generate-compdb', + action='store_true', + help='regenerate the compile database') + parser.add_argument('--prefix', + help='optional prefix to prepend, e.g. --prefix=lldb') + parser.add_argument( + '--compiler', + help='compiler to override the compiler specified in the compile db') + parser.add_argument('--suffix', + help='optional suffix to append, e.g.' + + ' --suffix="-Xclang -ast-dump -fsyntax-only"') + parser.add_argument('target_file', help='file to build') + return parser.parse_args() + + +def BuildIt(record, prefix, compiler, suffix): + """Builds the file in the provided compile DB record. + + Args: + prefix: Optional prefix to prepend to the build command. + compiler: Optional compiler to override the compiler specified the record. + suffix: Optional suffix to append to the build command. + """ + raw_args = shlex.split(record['command']) + # The compile command might have some goop in front of it, e.g. if the build + # is using reclient, so shift arguments off the front until raw_args[0] looks + # like a clang invocation. + while raw_args: + if _PROBABLY_CLANG_RE.search(raw_args[0]): + break + raw_args = raw_args[1:] + if not raw_args: + print('error: command %s does not appear to invoke clang!' % + record['command']) + return 2 + args = [] + if prefix: + args.extend(shlex.split(prefix)) + if compiler: + raw_args[0] = compiler + args.extend(raw_args) + if suffix: + args.extend(shlex.split(suffix)) + print('Running %s' % ' '.join(args)) + os.execv(args[0], args) + + +def main(): + args = ParseArgs() + os.chdir(args.p) + if args.generate_compdb: + with open('compile_commands.json', 'w') as f: + f.write(compile_db.GenerateWithNinja('.')) + db = compile_db.Read('.') + for record in db: + if os.path.normpath(os.path.join(args.p, record[ + 'file'])) == args.target_file: + return BuildIt(record, args.prefix, args.compiler, args.suffix) + print('error: could not find %s in compile DB!' % args.target_file) + return 1 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/clang/scripts/clang_tidy_tool.py b/clang/scripts/clang_tidy_tool.py new file mode 100755 index 0000000000000000000000000000000000000000..d0b93a6517049212d17d6e31e557915f9d88e03c --- /dev/null +++ b/clang/scripts/clang_tidy_tool.py @@ -0,0 +1,227 @@ +#!/usr/bin/env vpython3 +# Copyright 2019 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +r"""Automatically fetch, build, and run clang-tidy from source. + +This script seeks to automate the steps detailed in docs/clang_tidy.md. + +Example: the following command disables clang-tidy's default checks (-*) and +enables the clang static analyzer checks. + + tools/clang/scripts/clang_tidy_tool.py \\ + --checks='-*,clang-analyzer-*,-clang-analyzer-alpha*' \\ + --header-filter='.*' \\ + out/Release chrome + +The same, but checks the changes only. + + git diff -U5 | tools/clang/scripts/clang_tidy_tool.py \\ + --diff \\ + --checks='-*,clang-analyzer-*,-clang-analyzer-alpha*' \\ + --header-filter='.*' \\ + out/Release chrome +""" + +from __future__ import print_function + +import argparse +import os +import subprocess +import sys +import update + +import build_clang_tools_extra + + +def GetBinaryPath(build_dir, binary): + if sys.platform == 'win32': + binary += '.exe' + return os.path.join(build_dir, 'bin', binary) + + +def BuildNinjaTarget(out_dir, ninja_target): + args = ['autoninja', '-C', out_dir, ninja_target] + subprocess.check_call(args, shell=sys.platform == 'win32') + + +def GenerateCompDb(out_dir): + gen_compdb_script = os.path.join( + os.path.dirname(__file__), 'generate_compdb.py') + comp_db_file_path = os.path.join(out_dir, 'compile_commands.json') + args = [ + sys.executable, + gen_compdb_script, + '-p', + out_dir, + '-o', + comp_db_file_path, + ] + subprocess.check_call(args) + + # The resulting CompDb file includes /showIncludes which causes clang-tidy to + # output a lot of unnecessary text to the console. + with open(comp_db_file_path, 'r') as comp_db_file: + comp_db_data = comp_db_file.read(); + + # The trailing space on /showIncludes helps keep single-spaced flags. + comp_db_data = comp_db_data.replace('/showIncludes ', '') + + with open(comp_db_file_path, 'w') as comp_db_file: + comp_db_file.write(comp_db_data) + + +def RunClangTidy(checks, header_filter, auto_fix, clang_src_dir, + clang_build_dir, out_dir, ninja_target): + """Invoke the |run-clang-tidy.py| script.""" + run_clang_tidy_script = os.path.join( + clang_src_dir, 'clang-tools-extra', 'clang-tidy', 'tool', + 'run-clang-tidy.py') + + clang_tidy_binary = GetBinaryPath(clang_build_dir, 'clang-tidy') + clang_apply_rep_binary = GetBinaryPath(clang_build_dir, + 'clang-apply-replacements') + + args = [ + sys.executable, + run_clang_tidy_script, + '-quiet', + '-p', + out_dir, + '-clang-tidy-binary', + clang_tidy_binary, + '-clang-apply-replacements-binary', + clang_apply_rep_binary, + ] + + if checks: + args.append('-checks={}'.format(checks)) + + if header_filter: + args.append('-header-filter={}'.format(header_filter)) + + if auto_fix: + args.append('-fix') + + args.append(ninja_target) + subprocess.check_call(args) + + +def RunClangTidyDiff(checks, auto_fix, clang_src_dir, clang_build_dir, out_dir): + """Invoke the |clang-tidy-diff.py| script over the diff from stdin.""" + clang_tidy_diff_script = os.path.join( + clang_src_dir, 'clang-tools-extra', 'clang-tidy', 'tool', + 'clang-tidy-diff.py') + + clang_tidy_binary = GetBinaryPath(clang_build_dir, 'clang-tidy') + + args = [ + clang_tidy_diff_script, + '-quiet', + '-p1', + '-path', + out_dir, + '-clang-tidy-binary', + clang_tidy_binary, + ] + + if checks: + args.append('-checks={}'.format(checks)) + + if auto_fix: + args.append('-fix') + + subprocess.check_call(args) + + +def main(): + script_name = sys.argv[0] + + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, epilog=__doc__) + parser.add_argument( + '--fetch', + nargs='?', + const=update.CLANG_REVISION, + help='Fetch and build clang sources') + parser.add_argument( + '--build', + action='store_true', + help='build clang sources to get clang-tidy') + parser.add_argument( + '--diff', + action='store_true', + default=False, + help ='read diff from the stdin and check it') + parser.add_argument('--clang-src-dir', type=str, + help='override llvm and clang checkout location') + parser.add_argument('--clang-build-dir', type=str, + help='override clang build dir location') + parser.add_argument('--checks', help='passed to clang-tidy') + parser.add_argument('--header-filter', help='passed to clang-tidy') + parser.add_argument( + '--auto-fix', + action='store_true', + help='tell clang-tidy to auto-fix errors') + parser.add_argument('OUT_DIR', help='where we are building Chrome') + parser.add_argument('NINJA_TARGET', help='ninja target') + args = parser.parse_args() + + steps = [] + + # If the user hasn't provided a clang checkout and build dir, checkout and + # build clang-tidy where update.py would. + if not args.clang_src_dir: + args.clang_src_dir = build_clang_tools_extra.GetCheckoutDir(args.OUT_DIR) + if not args.clang_build_dir: + args.clang_build_dir = build_clang_tools_extra.GetBuildDir(args.OUT_DIR) + elif (args.clang_build_dir and not + os.path.isfile(GetBinaryPath(args.clang_build_dir, 'clang-tidy'))): + sys.exit('clang-tidy binary doesn\'t exist at ' + + GetBinaryPath(args.clang_build_dir, 'clang-tidy')) + + if args.fetch: + steps.append(('Fetching LLVM sources', lambda: + build_clang_tools_extra.FetchLLVM(args.clang_src_dir, + args.fetch))) + + if args.build: + steps.append(('Building clang-tidy', + lambda: build_clang_tools_extra.BuildTargets( + args.clang_build_dir, + ['clang-tidy', 'clang-apply-replacements']))) + + steps += [ + ('Building ninja target: %s' % args.NINJA_TARGET, + lambda: BuildNinjaTarget(args.OUT_DIR, args.NINJA_TARGET)), + ('Generating compilation DB', lambda: GenerateCompDb(args.OUT_DIR)) + ] + if args.diff: + steps += [ + ('Running clang-tidy on diff', lambda: RunClangTidyDiff( + args.checks, args.auto_fix, args.clang_src_dir, args. + clang_build_dir, args.OUT_DIR)), + ] + else: + steps += [ + ('Running clang-tidy', + lambda: RunClangTidy(args.checks, args.header_filter, + args.auto_fix, args.clang_src_dir, + args.clang_build_dir, args.OUT_DIR, + args.NINJA_TARGET)), + ] + + # Run the steps in sequence. + for i, (msg, step_func) in enumerate(steps): + # Print progress message + print('-- %s %s' % (script_name, '-' * (80 - len(script_name) - 4))) + print('-- [%d/%d] %s' % (i + 1, len(steps), msg)) + print(80 * '-') + + step_func() + + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/clang/scripts/compiler_inputs_size.py b/clang/scripts/compiler_inputs_size.py new file mode 100755 index 0000000000000000000000000000000000000000..a003b61a8a581e8689d942a0a76b8bf2596b5c7a --- /dev/null +++ b/clang/scripts/compiler_inputs_size.py @@ -0,0 +1,240 @@ +#!/usr/bin/env python3 +# Copyright 2022 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +"""This script measures the compiler input size for a build target and each +translation unit therein. The input size of a translation unit is the sum size +of the source file and all files #included by it. + +As input, the script takes the output of "ninja -t commands " and "ninja +-t deps", which tells the script which source files are involved in the build +and what the dependencies of each file are. + +In order to get accurate dependency information, the target must first have been +built. Also, the gn flag 'system_headers_in_deps' must be enabled to capture +dependencies on system headers. + +Unlike analyze_includes.py, this script does not compute the full include graph, +which means it runs significantly faster in exchange for providing more limited +information. On a fast machine, it should take less than a minute for the +'chrome' target. + +This currently doesnt work on Windows due to different deps handling. + +Example usage: (Remove use_remoteexec=true if you don't have reclient access.) + +$ gn gen out/Debug --args="system_headers_in_deps=true enable_nacl=false + symbol_level=0 use_remoteexec=true" +$ autoninja -C out/Debug chrome +$ tools/clang/scripts/compiler_inputs_size.py out/Debug \ + <(ninja -C out/Debug -t commands chrome) <(ninja -C out/Debug -t deps) +apps/app_lifetime_monitor.cc 9,034,754 +apps/app_lifetime_monitor_factory.cc 5,863,660 +apps/app_restore_service.cc 9,198,130 +[...] + +Total: 233,482,538,553 +""" + +import argparse +import doctest +import os +import pathlib +import re +import sys + +norm_paths = {} # Memoization cache for norm_path(). + + +def norm_path(build_dir, filename): + if not filename in norm_paths: + p = pathlib.Path(os.path.join(build_dir, filename)).resolve() + x = os.path.relpath(p) + norm_paths[filename] = x + return norm_paths[filename] + + +file_sizes = {} # Memoization cache for size(). + + +def size(filename): + """Get the size of a file.""" + if not filename in file_sizes: + file_sizes[filename] = os.path.getsize(filename) + return file_sizes[filename] + + +def parse_deps(build_dir, deps_output): + r"""Parse the output of 'ninja -t deps', which contains information about + which source files each object file depends on. + + Returns a dict of sets, e.g. {'foo.cc': {'foo.h', 'bar.h'}}. + + >>> deps = parse_deps( + ... 'dir1/dir2', + ... 'obj/foo.o: #deps 3, deps mtime 123456789 (VALID)\n' + ... ' ../../foo.cc\n' + ... ' ../../foo.h\n' + ... ' ../../bar.h\n' + ... '\n' + ... 'obj/bar.o: #deps 3, deps mtime 123456789 (VALID)\n' + ... ' ../../bar.cc\n' + ... ' ../../bar.h\n' + ... ' gen.h\n' + ... '\n'.splitlines(keepends=True)) + >>> sorted(deps.keys()) + ['bar.cc', 'foo.cc'] + >>> sorted(deps['foo.cc']) + ['bar.h', 'foo.h'] + >>> sorted(deps['bar.cc']) + ['bar.h', 'dir1/dir2/gen.h'] + + >>> deps = parse_deps( + ... 'dir1/dir2', + ... 'obj/foo.o: #deps 2, deps mtime 123456789 (STALE)\n' + ... ' ../../foo.cc\n' + ... ' ../../foo.h\n' + ... '\n' + ... 'obj/bar.o: #deps 2, deps mtime 123456789 (VALID)\n' + ... ' ../../bar.cc\n' + ... ' ../../bar.h\n' + ... '\n'.splitlines(keepends=True)) + >>> sorted(deps.keys()) + ['bar.cc'] + + >>> deps = parse_deps( + ... 'dir1/dir2', + ... 'obj/x86/foo.o: #deps 2, deps mtime 123456789 (VALID)\n' + ... ' ../../foo.cc\n' + ... ' ../../foo.h\n' + ... '\n' + ... 'obj/arm/foo.o: #deps 2, deps mtime 123456789 (VALID)\n' + ... ' ../../foo.cc\n' + ... ' ../../foo_arm.h\n' + ... '\n'.splitlines(keepends=True)) + >>> sorted(deps['foo.cc']) + ['foo.h', 'foo_arm.h'] + """ + + # obj/foo.o: #deps 3, deps mtime 123456789 (VALID) + # ../../foo.cc + # ../../foo.h + # ../../bar.h + # + HEADER_RE = re.compile(r'.*: #deps (\d+), deps mtime \d+ \((VALID|STALE)\)') + + deps = dict() + deps_iter = iter(deps_output) + while True: + # Read the deps header line. + try: + line = next(deps_iter) + except StopIteration: + break + m = HEADER_RE.match(line) + if not m: + raise Exception("Unexpected deps header line: '%s'" % line) + num_deps = int(m.group(1)) + if m.group(2) == 'STALE': + # A deps entry is stale if the .o file doesn't exist or if it's newer than + # the deps entry. Skip such entries. + for _ in range(num_deps + 1): + next(deps_iter) + continue + + # Read the main file line. + line = next(deps_iter) + if not line.startswith(' '): + raise Exception("Unexpected deps main file line '%s'" % line) + main_file = norm_path(build_dir, line[4:].rstrip('\n')) + deps.setdefault(main_file, set()) + + # Read the deps lines. + for _ in range(num_deps - 1): + line = next(deps_iter) + if not line.startswith(' '): + raise Exception("Unexpected deps file line '%s'" % line) + dep_file = norm_path(build_dir, line[4:].rstrip('\n')) + deps[main_file].add(dep_file) + + # Read the blank line. + line = next(deps_iter) + if line != '\n': + raise Exception("Expected a blank line but got '%s'" % line) + + return deps + + +def parse_commands(build_dir, commands_output): + r"""Parse the output of 'ninja -t commands ' to extract which source + files are involved in building that target. Returns a set, e.g. {'foo.cc', + 'bar.cc'}. + + >>> sorted(parse_commands('dir1/dir2', + ... '/x/rewrapper ../y/clang++ -a -b -c ../../foo.cc -o foo.o\n' + ... 'clang -x blah -c ../../bar.c -o bar.o\n' + ... 'clang-cl.exe /Fobaz.o /c baz.cc\n'.splitlines(keepends=True))) + ['bar.c', 'dir1/dir2/baz.cc', 'foo.cc'] + """ + COMPILE_RE = re.compile(r'.*clang.* [/-]c (\S+)') + files = set() + for line in commands_output: + m = COMPILE_RE.match(line) + if m: + files.add(norm_path(build_dir, m.group(1))) + return files + + +def main(): + if doctest.testmod()[0]: + return 1 + + parser = argparse.ArgumentParser(description='Measure compiler input sizes ' + 'for a build target.') + parser.add_argument('build_dir', type=str, help='Chromium build dir') + parser.add_argument('commands', + type=argparse.FileType('r', errors='ignore'), + help='File with the output of "ninja -t commands"') + parser.add_argument('deps', + type=argparse.FileType('r', errors='ignore'), + help='File with the output of "ninja -t deps"') + args = parser.parse_args() + + if sys.platform == 'win32': + print('Not currently supported on Windows due to different deps handling.') + return 1 + + if not os.path.isdir(args.build_dir): + print("Invalid build dir: '%s'" % args.build_dir) + return 1 + + deps = parse_deps(args.build_dir, args.deps) + if not deps: + print('Error: empty deps file.') + return 1 + + files = parse_commands(args.build_dir, args.commands) + if not files: + print('Error: empty commands file.') + return 1 + + total = 0 + for f in sorted(files): + if f not in deps: + raise Exception("Missing deps for '%s'", f) + s = size(f) + sum(size(d) for d in deps[f]) + print('{} {}'.format(f, s)) + total += s + + print() + print('Total: {}'.format(total)) + + if not any(file.endswith('stddef.h') for file in file_sizes): + print('Warning: did not see stddef.h.') + print('Was the build configured with system_headers_in_deps=true?') + + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/clang/scripts/compiler_inputs_size_diff.py b/clang/scripts/compiler_inputs_size_diff.py new file mode 100755 index 0000000000000000000000000000000000000000..ad2314befb70c216185e8d9f0df1c0eb0e97bda7 --- /dev/null +++ b/clang/scripts/compiler_inputs_size_diff.py @@ -0,0 +1,173 @@ +#!/usr/bin/env python3 +# Copyright 2024 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +"""Calculate deltas between results of compiler_inputs_size.py. + +As input, the script takes the outputs of compiler_inputs_size.py produced from +two different builds. The output is a list of translation units and deltas in +bytes sorted by deltas in decreasing order. TUs that didn't change in size are +omitted. + +Example usage: + +$ tools/clang/scripts/compiler_inputs_size_diff.py before.txt after.txt +Before: 343.41 GiB (368729376427) +After: 345.43 GiB (370904487700) +Delta: +2.03 GiB (+2175111273) +Delta %: 0.59% +[...] +third_party/blink/renderer/core/dom/node.cc +601503 +third_party/blink/renderer/core/frame/frame.cc +560405 +third_party/blink/renderer/core/fetch/body.cc -75 +third_party/blink/renderer/core/url/dom_url.cc -75 +third_party/blink/renderer/modules/ml/ml.cc -75 +chrome/browser/ui/browser.cc -287 +content/browser/browser_interface_binders.cc -287 +[...] + +Test this code with: +$ python3 -m doctest -v tools/clang/scripts/compiler_inputs_size_diff.py +""" + +import argparse +import re +import sys +from typing import Dict, Iterable + +LINE_RE = re.compile(r'(.+) (\d+)\n') + + +def parse_report_into_tu_size_dict(lines: Iterable[str]) -> Dict[str, int]: + r"""Parse a report from compiler_inputs_size.py into a dictionary. + + The report is expected to have one line per translation unit followed by + a blank line and then a line with the total size. + + Args: + lines: An iterable of lines from the report. + + Returns: + A dictionary mapping translation unit paths (including a "total" key) to + their sizes in bytes. + + >>> parse_report_into_tu_size_dict( + ... 'foo.cc 1234\n' + ... 'bar.cc 5678\n' + ... '\n' + ... 'Total: 6912\n'.splitlines(keepends=True)) + {'foo.cc': 1234, 'bar.cc': 5678, 'total': 6912} + """ + sizes = {} + lines_iter = iter(lines) + for line in lines_iter: + m = LINE_RE.match(line) + if not m: + assert (line == '\n') + line = next(lines_iter) + sizes['total'] = int(line.rstrip().split(' ')[1]) + break + sizes[m.group(1)] = int(m.group(2)) + return sizes + + +def diff_tu_sizes(d1: Dict[str, int], d2: Dict[str, int]) -> Dict[str, int]: + r"""Calculate the size diff for each translation unit between two reports. + + Args: + d1: dict mapping translation unit paths to their sizes from the + first report. + d2: dict mapping translation unit paths to their sizes from the + second report. + + Returns: + A dictionary mapping translation unit paths to their size differences. + Includes entries for all TUs present in either report. + + >>> diff_tu_sizes( + ... {'foo.cc': 1234, 'bar.cc': 5678}, + ... {'foo.cc': 1200, 'baz.cc': 9012}) + {'foo.cc': -34, 'bar.cc': -5678, 'baz.cc': 9012} + """ + size_diffs = {} + for path, size in d1.items(): + size_diffs[path] = d2.get(path, 0) - size + remaining_keys = set(d2) - set(d1) + for path in remaining_keys: + size_diffs[path] = d2[path] + return size_diffs + + +def bytes_to_human(bytes: int, sign: bool = False) -> str: + """Converts a number of bytes to a human-readable string. + + Args: + bytes: The number of bytes to convert. + sign: whether to prefix with the sign if positive. + + Returns: + A human-readable string representation of the number of bytes. + """ + units = ['B', 'KiB', 'MiB', 'GiB', 'TiB'] + i = 0 + while bytes >= 1024 and i < len(units) - 1: + bytes /= 1024 + i += 1 + if sign: + return f'{bytes:+.2f} {units[i]}' + return f'{bytes:.2f} {units[i]}' + + +def print_diff(before: Dict[str, int], after: Dict[str, int]): + r"""Print the diff between two compiler_inputs_size.py reports. + + >>> print_diff( + ... {'foo.cc': 1234, 'bar.cc': 5678, 'total': 6912}, + ... {'foo.cc': 1200, 'bar.cc': 5678, 'baz.cc': 1012, 'total': 7912}) + Before: 6.75 KiB (6912) + After: 7.73 KiB (7912) + Delta: +1000.00 B (+1000) + Delta %: 14.47% + baz.cc +1012 + foo.cc -34 + """ + size_diffs = diff_tu_sizes(before, after) + max_path_length = max(len(k) for k in size_diffs) + if max_path_length > 100: + max_path_length = 100 + size_diffs = sorted([(k, v) for (k, v) in size_diffs.items() if v != 0], + key=lambda x: -x[1]) + + before_total = before['total'] + after_total = after['total'] + delta = after_total - before_total + print(f'Before: {bytes_to_human(before_total)} ({before_total})') + print(f'After: {bytes_to_human(after_total)} ({after_total})') + print(f'Delta: {bytes_to_human(delta, sign=True)} ({(delta):+d})') + print(f'Delta %: {(delta) / before_total * 100:.2f}%') + for name, size in size_diffs: + if name == 'total': + continue + print('{} {:+d}'.format(name.ljust(max_path_length), size)) + + +def main(): + parser = argparse.ArgumentParser( + description='Calculate deltas between results of compiler_inputs_size.py') + parser.add_argument('before', + type=argparse.FileType('r'), + help='First report from compiler_inputs_size.py.') + parser.add_argument('after', + type=argparse.FileType('r'), + help='Second report from compiler_inputs_size.py.') + args = parser.parse_args() + + before = parse_report_into_tu_size_dict(args.before) + after = parse_report_into_tu_size_dict(args.after) + print_diff(before, after) + + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/clang/scripts/dashboard.py b/clang/scripts/dashboard.py new file mode 100755 index 0000000000000000000000000000000000000000..3470945d110a2fbba21ca937cafaccbfe41f17bc --- /dev/null +++ b/clang/scripts/dashboard.py @@ -0,0 +1,328 @@ +#!/usr/bin/env python3 +# Copyright 2023 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import argparse +import json +import os +import re +import subprocess +import sys +import time +import urllib.request + +THIS_DIR = os.path.abspath(os.path.dirname(__file__)) +CHROMIUM_REPO = os.path.abspath(os.path.join(THIS_DIR, '..', '..', '..')) +LLVM_REPO = '' # This gets filled in by main(). +RUST_REPO = '' # This gets filled in by main(). + +# This script produces the dashboard at +# https://commondatastorage.googleapis.com/chromium-browser-clang/toolchain-dashboard.html +# +# Usage: +# +# ./dashboard.py > /tmp/toolchain-dashboard.html +# gsutil.py cp -a public-read /tmp/toolchain-dashboard.html gs://chromium-browser-clang/ + +#TODO: Add libc++ graph. +#TODO: Plot 30-day moving averages. +#TODO: Overview with current age of each toolchain component. +#TODO: Tables of last N rolls for each component. +#TODO: Link to next roll bug, count of blockers, etc. + + +def timestamp_to_str(timestamp): + '''Return a string representation of a Unix timestamp.''' + return time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(timestamp)) + + +def get_git_timestamp(repo, revision): + '''Get the Unix timestamp of a git commit.''' + out = subprocess.check_output([ + 'git', '-C', repo, 'show', '--date=unix', '--pretty=fuller', '--no-patch', + revision + ]).decode('utf-8') + DATE_RE = re.compile(r'^CommitDate: (\d+)$', re.MULTILINE) + m = DATE_RE.search(out) + return int(m.group(1)) + + +svn2git_dict = None + + +def svn2git(svn_rev): + global svn2git_dict + if not svn2git_dict: + # The JSON was generated with: + # $ ( echo '{' && git rev-list 40c47680eb2a1cb9bb7f8598c319335731bd5204 | while read commit ; do SVNREV=$(git log --format=%B -n 1 $commit | grep '^llvm-svn: [0-9]*$' | awk '{print $2 }') ; [[ ! -z '$SVNREV' ]] && echo "\"$SVNREV\": \"$commit\"," ; done && echo '}' ) | tee /tmp/llvm_svn2git.json + # and manually removing the trailing comma of the last entry. + with urllib.request.urlopen( + 'https://commondatastorage.googleapis.com/chromium-browser-clang/llvm_svn2git.json' + ) as url: + svn2git_dict = json.load(url) + # For branch commits, use the most recent commit to main instead. + svn2git_dict['324578'] = '93505707b6d3ec117e555c5a48adc2cc56470e38' + svn2git_dict['149886'] = '60fc2425457f43f38edf5b310551f996f4f42df8' + svn2git_dict['145240'] = '12330650f843cf7613444e345a4ecfcf06923761' + return svn2git_dict[svn_rev] + + +def clang_rolls(): + '''Return a dict from timestamp to clang revision rolled in at that time.''' + FIRST_ROLL = 'd78457ce2895e5b98102412983a979f1896eca90' + log = subprocess.check_output([ + 'git', '-C', CHROMIUM_REPO, 'log', '--date=unix', '--pretty=fuller', '-p', + f'{FIRST_ROLL}..origin/main', '--', 'tools/clang/scripts/update.py', + 'tools/clang/scripts/update.sh' + ]).decode('utf-8') + + # AuthorDate is when a commit was first authored; CommitDate (part of + # --pretty=fuller) is when a commit was last updated. We use the latter since + # it's more likely to reflect when the commit become part of upstream. + DATE_RE = re.compile(r'^CommitDate: (\d+)$') + VERSION_RE = re.compile( + r'^\+CLANG_REVISION = \'llvmorg-\d+-init-\d+-g([0-9a-f]+)\'$') + VERSION_RE_OLD = re.compile(r'^\+CLANG_REVISION = \'([0-9a-f]{10,})\'$') + # +CLANG_REVISION=125186 + VERSION_RE_SVN = re.compile(r'^\+CLANG_REVISION ?= ?\'?(\d{1,6})\'?$') + + rolls = {} + date = None + for line in log.splitlines(): + m = DATE_RE.match(line) + if m: + date = int(m.group(1)) + next + + rev = None + if m := VERSION_RE.match(line): + rev = m.group(1) + elif m := VERSION_RE_OLD.match(line): + rev = m.group(1) + elif m := VERSION_RE_SVN.match(line): + rev = svn2git(m.group(1)) + + if rev: + assert (date) + rolls[date] = rev + date = None + + return rolls + + +def roll_ages(rolls, upstream_repo): + '''Given a dict from timestamps to upstream revisions, return a list of pairs + of timestamp string and *upstream revision age* in days at that timestamp.''' + + ages = [] + def add(timestamp, rev): + ages.append( + (timestamp_to_str(timestamp), + (timestamp - get_git_timestamp(upstream_repo, rev)) / (3600 * 24))) + + assert (rolls) + prev_roll_rev = None + for roll_time, roll_rev in sorted(rolls.items()): + if prev_roll_rev: + add(roll_time - 1, prev_roll_rev) + add(roll_time, roll_rev) + prev_roll_rev = roll_rev + add(time.time(), prev_roll_rev) + + return ages + + +def rust_rolls(): + '''Return a dict from timestamp to Rust revision rolled in at that time.''' + FIRST_ROLL = 'c77dda41d8904b6c03083cd939733d9f754b0aeb' + # Some rolls used CIPD version numbers (dates) instead of Git hashes. + CIPD_ROLLS = { + '20220914': '63b8d9b6898ec926f9eafa372506b6722d583694', + '20221101': 'b7d9af278cc7e2d3bc8845156a0ab405a3536724', + '20221118': '9db23f8d30e8d00e2e5e18b51f7bb8e582520600', + '20221207': 'a09e8c55c663d2b070f99ab0fdadbcc2c45656b2', + '20221209': '9553a4d439ffcf239c12142a78aa9923058e8a78', + '20230117': '925dc37313853f15dc21e42dc869b024fe488ef3', + } + log = subprocess.check_output([ + 'git', '-C', CHROMIUM_REPO, 'log', '--date=unix', '--pretty=fuller', '-p', + f'{FIRST_ROLL}..origin/main', '--', 'tools/rust/update_rust.py' + ]).decode('utf-8') + + # AuthorDate is when a commit was first authored; CommitDate (part of + # --pretty=fuller) is when a commit was last updated. We use the latter since + # it's more likely to reflect when the commit become part of upstream. + DATE_RE = re.compile(r'^CommitDate: (\d+)$') + VERSION_RE = re.compile(r'^\+RUST_REVISION = \'([0-9a-f]+)\'$') + + rolls = {} + date = None + for line in log.splitlines(): + m = DATE_RE.match(line) + if m: + date = int(m.group(1)) + next + + rev = None + if m := VERSION_RE.match(line): + rev = m.group(1) + if rev in CIPD_ROLLS: + rev = CIPD_ROLLS[rev] + + if rev: + assert (date) + rolls[date] = rev + date = None + + return rolls + + +def print_dashboard(): + print(''' + + + + + + Chromium Toolchain Dashboard + + + + +

Chromium Toolchain Dashboard (go/chrome-clang-dash)

''') + + print(f'

Last updated: {timestamp_to_str(time.time())} UTC

') + + print(''' +

Clang

+
+
+
+
+ +

Rust

+
+
+
+
+ + +''') + + +def main(): + parser = argparse.ArgumentParser( + description='Generate Chromium toolchain dashboard.') + parser.add_argument('--llvm-dir', + help='LLVM repository directory.', + default=os.path.join(CHROMIUM_REPO, '..', '..', + 'llvm-project')) + parser.add_argument('--rust-dir', + help='Rust repository directory.', + default=os.path.join(CHROMIUM_REPO, '..', '..', 'rust')) + args = parser.parse_args() + + global LLVM_REPO + LLVM_REPO = args.llvm_dir + if not os.path.isdir(os.path.join(LLVM_REPO, '.git')): + print(f'Invalid LLVM repository path: {LLVM_REPO}') + return 1 + + global RUST_REPO + RUST_REPO = args.rust_dir + if not os.path.isdir(os.path.join(RUST_REPO, '.git')): + print(f'Invalid Rust repository path: {RUST_REPO}') + return 1 + + print_dashboard() + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/clang/scripts/expand_thin_archives.py b/clang/scripts/expand_thin_archives.py new file mode 100755 index 0000000000000000000000000000000000000000..ed2fd694638227b4c0c96b6334381edbdb808fd5 --- /dev/null +++ b/clang/scripts/expand_thin_archives.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +# Copyright 2019 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +# Library and tool to expand command lines that mention thin archives +# into command lines that mention the contained object files. + +import argparse +import sys + +from remote_link import RemoteLinkWindows +from remote_ld import RemoteLinkUnix + + +def main(argv): + ap = argparse.ArgumentParser( + description=('Expand command lines that mention thin archives into ' + 'command lines that mention the contained object files.'), + usage='%(prog)s [options] -- command line') + ap.add_argument('-o', + '--output', + help=('Write new command line to named file ' + 'instead of standard output.')) + ap.add_argument('-p', '--linker-prefix', + help='String to prefix linker flags with.', + default='') + ap.add_argument('cmdline', + nargs=argparse.REMAINDER, + help='Command line to expand. Should be preceded by \'--\'.') + args = ap.parse_args(argv[1:]) + if not args.cmdline: + ap.print_help(sys.stderr) + return 1 + + cmdline = args.cmdline + if cmdline[0] == '--': + cmdline = cmdline[1:] + linker_prefix = args.linker_prefix + + if linker_prefix == '-Wl,': + linker = RemoteLinkUnix() + else: + linker = RemoteLinkWindows() + + rsp_expanded = list(linker.expand_args_rsps(cmdline)) + expanded_args = list(linker.expand_thin_archives(rsp_expanded)) + + if args.output: + output = open(args.output, 'w') + else: + output = sys.stdout + for arg in expanded_args: + output.write('%s\n' % (arg,)) + if args.output: + output.close() + return 0 + + +if __name__ == '__main__': + sys.exit(main(sys.argv)) diff --git a/clang/scripts/extract_edits.py b/clang/scripts/extract_edits.py new file mode 100755 index 0000000000000000000000000000000000000000..b93a712c3a85e35ebf294d284eca85448dff077b --- /dev/null +++ b/clang/scripts/extract_edits.py @@ -0,0 +1,69 @@ +#!/usr/bin/env vpython3 +# Copyright 2016 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +"""Script to extract edits from clang tool output. + +If a clang tool emits edits, then the edits should look like this: + ... + ==== BEGIN EDITS ==== + + + ... + ==== END EDITS ==== + ... + +extract_edits.py takes input that is concatenated from multiple tool invocations +and extract just the edits. In other words, given the following input: + ... + ==== BEGIN EDITS ==== + + + ==== END EDITS ==== + ... + ==== BEGIN EDITS ==== + + + ==== END EDITS ==== + ... +extract_edits.py would emit the following output: + + + + + +This python script is mainly needed on Windows. +On unix this script can be replaced with running sed as follows: + + $ cat run_tool.debug.out \ + | sed '/^==== BEGIN EDITS ====$/,/^==== END EDITS ====$/{//!b};d' + | sort | uniq +""" + +from __future__ import print_function + +import sys + + +def main(): + # TODO(dcheng): extract_edits.py should normalize paths. Doing this in + # apply_edits.py is too late, as a common use case is to apply edits from many + # different platforms. + unique_lines = set() + inside_marker_lines = False + for line in sys.stdin: + line = line.rstrip("\n\r") + if line == '==== BEGIN EDITS ====': + inside_marker_lines = True + continue + if line == '==== END EDITS ====': + inside_marker_lines = False + continue + if inside_marker_lines and line not in unique_lines: + unique_lines.add(line) + print(line) + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/clang/scripts/generate_compdb.py b/clang/scripts/generate_compdb.py new file mode 100755 index 0000000000000000000000000000000000000000..78cc9e54bc3a0aa3cd23974883683b0a9965e67c --- /dev/null +++ b/clang/scripts/generate_compdb.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +# Copyright 2014 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +"""Generates compile DBs that are more amenable for clang tooling.""" + +import argparse +import json +import os +import sys + +script_dir = os.path.dirname(os.path.realpath(__file__)) +tool_dir = os.path.abspath(os.path.join(script_dir, '../pylib')) +sys.path.insert(0, tool_dir) + +from clang import compile_db + + +def main(argv): + parser = argparse.ArgumentParser() + parser.add_argument( + '--filter_arg', + nargs='*', + help='Additional argument(s) to filter out from compilation database.') + parser.add_argument( + '-o', + help='File to write the compilation database to. Defaults to stdout') + parser.add_argument('-p', required=True, help='Path to build directory') + parser.add_argument( + '--target_os', + choices=[ + 'android', + 'chromeos', + 'fuchsia', + 'ios', + 'linux', + 'mac', + 'nacl', + 'win', + ], + help='Target OS - see `gn help target_os`. Set to "win" when ' + + 'cross-compiling Windows from Linux or another host') + parser.add_argument('targets', + nargs='*', + help='Additional targets to pass to ninja') + + args = parser.parse_args() + + compdb_text = json.dumps(compile_db.ProcessCompileDatabase( + compile_db.GenerateWithNinja(args.p, args.targets), args.filter_arg, + args.target_os), + indent=2) + if args.o is None: + print(compdb_text) + else: + with open(args.o, 'w') as f: + f.write(compdb_text) + + +if __name__ == '__main__': + sys.exit(main(sys.argv[1:])) diff --git a/clang/scripts/get_tensorflow.py b/clang/scripts/get_tensorflow.py new file mode 100755 index 0000000000000000000000000000000000000000..ae3d4c3e3e1dd28397d031e48bf9e4e3db231a4a --- /dev/null +++ b/clang/scripts/get_tensorflow.py @@ -0,0 +1,199 @@ +#!/usr/bin/env vpython3 +# Copyright 2022 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +# Credits to the The Fuchsia Authors for creating this file. +"""This script is used to fetch the tensorflow 2.7.0 pip package (via vpython) +for use by MLGO during LLVM compile. The vpython spec is hand-created, with the +help of pip. The transitive dependencies can be retrieved by using pipdeptree, +a pip package, by running `pipdeptree -p tensorflow`. +""" +# [VPYTHON:BEGIN] +# python_version: "3.8" +# wheel: < +# name: "infra/python/wheels/absl-py-py3" +# version: "version:2.1.0" +# > +# wheel: < +# name: "infra/python/wheels/astunparse-py2_py3" +# version: "version:1.6.3" +# > +# wheel: < +# name: "infra/python/wheels/cachetools-py3" +# version: "version:4.2.2" +# > +# wheel: < +# name: "infra/python/wheels/certifi-py2_py3" +# version: "version:2021.5.30" +# > +# wheel: < +# name: "infra/python/wheels/charset_normalizer-py3" +# version: "version:2.0.4" +# > +# wheel: < +# name: "infra/python/wheels/flatbuffers-py3" +# version: "version:24.3.25" +# > +# wheel: < +# name: "infra/python/wheels/gast-py3" +# version: "version:0.4.0" +# > +# wheel: < +# name: "infra/python/wheels/google-auth-oauthlib-py3" +# version: "version:1.0.0" +# > +# wheel: < +# name: "infra/python/wheels/google-auth-py3" +# version: "version:2.16.2" +# > +# wheel: < +# name: "infra/python/wheels/google-pasta-py3" +# version: "version:0.2.0" +# > +# wheel: < +# name: "infra/python/wheels/grpcio/${vpython_platform}" +# version: "version:1.57.0" +# > +# wheel: < +# name: "infra/python/wheels/h5py/${vpython_platform}" +# version: "version:3.11.0" +# > +# wheel: < +# name: "infra/python/wheels/idna-py3" +# version: "version:3.2" +# > +# wheel: < +# name: "infra/python/wheels/importlib-metadata-py3" +# version: "version:8.0.0" +# > +# wheel: < +# name: "infra/python/wheels/jax-py3" +# version: "version:0.4.13" +# > +# wheel: < +# name: "infra/python/wheels/keras-py3" +# version: "version:2.12.0" +# > +# wheel: < +# name: "infra/python/wheels/libclang/${vpython_platform}" +# version: "version:18.1.1" +# > +# wheel: < +# name: "infra/python/wheels/markdown-py3" +# version: "version:3.3.4" +# > +# wheel: < +# name: "infra/python/wheels/markupsafe/${vpython_platform}" +# version: "version:2.1.5" +# > +# wheel: < +# name: "infra/python/wheels/ml_dtypes/${vpython_platform}" +# version: "version:0.2.0" +# > +# wheel: < +# name: "infra/python/wheels/numpy/${vpython_platform}" +# version: "version:1.22.1" +# > +# wheel: < +# name: "infra/python/wheels/oauthlib-py2_py3" +# version: "version:3.2.2" +# > +# wheel: < +# name: "infra/python/wheels/opt-einsum-py3" +# version: "version:3.3.0" +# > +# wheel: < +# name: "infra/python/wheels/packaging-py3" +# version: "version:24.1" +# > +# wheel: < +# name: "infra/python/wheels/protobuf-py3" +# version: "version:4.25.1" +# > +# wheel: < +# name: "infra/python/wheels/pyasn1-py2_py3" +# version: "version:0.4.8" +# > +# wheel: < +# name: "infra/python/wheels/pyasn1_modules-py2_py3" +# version: "version:0.2.8" +# > +# wheel: < +# name: "infra/python/wheels/requests-py3" +# version: "version:2.31.0" +# > +# wheel: < +# name: "infra/python/wheels/requests-oauthlib-py2_py3" +# version: "version:2.0.0" +# > +# wheel: < +# name: "infra/python/wheels/rsa-py3" +# version: "version:4.7.2" +# > +# wheel: < +# name: "infra/python/wheels/scipy/${vpython_platform}" +# version: "version:1.10.1" +# > +# wheel: < +# name: "infra/python/wheels/setuptools-py3" +# version: "version:70.3.0" +# > +# wheel: < +# name: "infra/python/wheels/six-py2_py3" +# version: "version:1.16.0" +# > +# wheel: < +# name: "infra/python/wheels/tensorboard-py3" +# version: "version:2.12.3" +# > +# wheel: < +# name: "infra/python/wheels/tensorboard-data-server-py3" +# version: "version:0.7.2" +# > +# wheel: < +# name: "infra/python/wheels/tensorflow/${vpython_platform}" +# version: "version:2.12.0" +# > +# wheel: < +# name: "infra/python/wheels/tensorflow-estimator-py3" +# version: "version:2.12.0" +# > +# wheel: < +# name: "infra/python/wheels/tensorflow-io-gcs-filesystem/${vpython_platform}" +# version: "version:0.34.0" +# > +# wheel: < +# name: "infra/python/wheels/termcolor-py2_py3" +# version: "version:2.4.0" +# > +# wheel: < +# name: "infra/python/wheels/typing-extensions-py3" +# version: "version:4.0.1" +# > +# wheel: < +# name: "infra/python/wheels/urllib3-py2_py3" +# version: "version:1.26.6" +# > +# wheel: < +# name: "infra/python/wheels/werkzeug-py3" +# version: "version:3.0.3" +# > +# wheel: < +# name: "infra/python/wheels/wheel-py2_py3" +# version: "version:0.37.1" +# > +# wheel: < +# name: "infra/python/wheels/wrapt/${vpython_platform}" +# version: "version:1.14.1" +# > +# wheel: < +# name: "infra/python/wheels/zipp-py3" +# version: "version:3.7.0" +# > + +# [VPYTHON:END] +import importlib +import os + +spec = importlib.util.find_spec("tensorflow") +print(os.path.dirname(spec.origin)) diff --git a/clang/scripts/include-analysis.html b/clang/scripts/include-analysis.html new file mode 100644 index 0000000000000000000000000000000000000000..44120fabf002df5fa0fb085627e7e8d8689004ae --- /dev/null +++ b/clang/scripts/include-analysis.html @@ -0,0 +1,356 @@ + + + + + Chrome #include Analysis + + + + + + + + +

Chrome #include Analysis (go/chrome-includes) Beta

+ +

Build target: x (Linux). Revision: x. Analyzed on: x.

+ +

Number of translation units: x. Total build size (sum of expanded translation unit sizes): x bytes.

+ +

Number of files: x. Total file size: x bytes.

+ +

Archive

+ + + + + + +
+ +
+ +

Per-File Analysis

+ +

+ + + + + +

+ + + + + + + + + + + + + + + + +
#FilenameIndividual Size (B) ⓘExpanded Size (B) ⓘAdded Size (B) ⓘOccurrences ⓘDirectly Included In ⓘDirect Includes ⓘ
+ +
+ + + +
+ +

File size does not correlate perfectly with compile time, but can serve as a rough guide to what files are slow to compile.

+ +

Analysis by analyze_includes.py.

+ + + + diff --git a/clang/scripts/package.py b/clang/scripts/package.py new file mode 100755 index 0000000000000000000000000000000000000000..9b59f7d750178a15bb59fbcdb445c8e6914c34a9 --- /dev/null +++ b/clang/scripts/package.py @@ -0,0 +1,740 @@ +#!/usr/bin/env python3 +# Copyright 2015 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""This script will check out llvm and clang, and then package the results up +to a number of tarballs.""" + +import argparse +import fnmatch +import itertools +import lzma +import multiprocessing.dummy +import os +import platform +import shutil +import subprocess +import sys +import tarfile +import time + +# Path constants. +THIS_DIR = os.path.dirname(__file__) +CHROMIUM_DIR = os.path.abspath(os.path.join(THIS_DIR, '..', '..', '..')) +THIRD_PARTY_DIR = os.path.join(THIS_DIR, '..', '..', '..', 'third_party') +BUILDTOOLS_DIR = os.path.join(THIS_DIR, '..', '..', '..', 'buildtools') +LLVM_DIR = os.path.join(THIRD_PARTY_DIR, 'llvm') +LLVM_BOOTSTRAP_DIR = os.path.join(THIRD_PARTY_DIR, 'llvm-bootstrap') +LLVM_BOOTSTRAP_INSTALL_DIR = os.path.join(THIRD_PARTY_DIR, + 'llvm-bootstrap-install') +LLVM_BUILD_DIR = os.path.join(THIRD_PARTY_DIR, 'llvm-build') +LLVM_RELEASE_DIR = os.path.join(LLVM_BUILD_DIR, 'Release+Asserts') +EU_STRIP = os.path.join(BUILDTOOLS_DIR, 'third_party', 'eu-strip', 'bin', + 'eu-strip') + +DEFAULT_GCS_BUCKET = 'chromium-browser-clang-staging' + + +def Tee(output, logfile): + logfile.write(output) + print(output, end=' ') + + +def TeeCmd(cmd, logfile, fail_hard=True): + """Runs cmd and writes the output to both stdout and logfile.""" + # Reading from PIPE can deadlock if one buffer is full but we wait on a + # different one. To work around this, pipe the subprocess's stderr to + # its stdout buffer and don't give it a stdin. + # shell=True is required in cmd.exe since depot_tools has an svn.bat, and + # bat files only work with shell=True set. + proc = subprocess.Popen(cmd, bufsize=1, shell=sys.platform == 'win32', + stdin=open(os.devnull), stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + for line in iter(proc.stdout.readline,''): + Tee(str(line.decode()), logfile) + if proc.poll() is not None: + break + exit_code = proc.wait() + if exit_code != 0 and fail_hard: + print('Failed:', cmd) + sys.exit(1) + + +def PrintTarProgress(tarinfo): + print('Adding', tarinfo.name) + return tarinfo + + +def GetGsutilPath(): + if not 'find_depot_tools' in sys.modules: + sys.path.insert(0, os.path.join(CHROMIUM_DIR, 'build')) + global find_depot_tools + import find_depot_tools + depot_path = find_depot_tools.add_depot_tools_to_path() + if depot_path is None: + print ('depot_tools are not found in PATH. ' + 'Follow the instructions in this document ' + 'http://dev.chromium.org/developers/how-tos/install-depot-tools' + ' to install depot_tools and then try again.') + sys.exit(1) + gsutil_path = os.path.join(depot_path, 'gsutil.py') + return gsutil_path + + +def RunGsutil(args): + return subprocess.call([sys.executable, GetGsutilPath()] + args) + + +def PackageInArchive(directory_path, archive_path): + bin_dir_path = os.path.join(directory_path, 'bin') + if sys.platform != 'win32' and os.path.exists(bin_dir_path): + for f in os.listdir(bin_dir_path): + file_path = os.path.join(bin_dir_path, f) + if not os.path.islink(file_path): + if sys.platform == 'darwin': + subprocess.call(['strip', '-x', file_path]) + else: + subprocess.call(['strip', file_path]) + + with tarfile.open(archive_path + '.tar.xz', + 'w:xz', + preset=9 | lzma.PRESET_EXTREME) as tar_xz: + for f in sorted(os.listdir(directory_path)): + tar_xz.add(os.path.join(directory_path, f), + arcname=f, + filter=PrintTarProgress) + + +def MaybeUpload(do_upload, + gcs_bucket, + filename, + gcs_platform, + extra_gsutil_args=[]): + gsutil_args = ['cp'] + extra_gsutil_args + [ + '-n', filename, + 'gs://%s/%s/' % (gcs_bucket, gcs_platform) + ] + if do_upload: + print('Uploading %s to Google Cloud Storage...' % filename) + exit_code = RunGsutil(gsutil_args) + if exit_code != 0: + print("gsutil failed, exit_code: %s" % exit_code) + sys.exit(exit_code) + else: + print('To upload, run:') + print('gsutil %s' % ' '.join(gsutil_args)) + + +def UploadPDBsToSymbolServer(binaries): + assert sys.platform == 'win32' + # Upload PDB and binary to the symbol server on Windows. Put them into the + # chromium-browser-symsrv bucket, since chrome devs have that in their + # _NT_SYMBOL_PATH already. Executable and PDB must be at paths following a + # certain pattern for the Microsoft debuggers to be able to load them. + # Executable: + # chromium-browser-symsrv/clang-cl.exe/ABCDEFAB01234/clang-cl.ex_ + # ABCDEFAB is the executable's timestamp in %08X format, 01234 is the + # executable's image size in %x format. tools/symsrc/img_fingerprint.py + # can compute this ABCDEFAB01234 string for us, so use that. + # The .ex_ instead of .exe at the end means that the file is compressed. + # PDB: + # gs://chromium-browser-symsrv/clang-cl.exe.pdb/AABBCCDD/clang-cl.exe.pd_ + # AABBCCDD here is computed from the output of + # dumpbin /all mybinary.exe | find "Format: RSDS" + # but tools/symsrc/pdb_fingerprint_from_img.py can compute it already, so + # again just use that. + sys.path.insert(0, os.path.join(CHROMIUM_DIR, 'tools', 'symsrc')) + import img_fingerprint, pdb_fingerprint_from_img + + files = [] + for binary_path in binaries: + binary_path = os.path.join(LLVM_RELEASE_DIR, binary_path) + binary_id = img_fingerprint.GetImgFingerprint(binary_path) + (pdb_id, pdb_path) = pdb_fingerprint_from_img.GetPDBInfoFromImg(binary_path) + files += [(binary_path, binary_id), (pdb_path, pdb_id)] + + # The build process builds clang.exe and then copies it to clang-cl.exe + # (both are the same binary and they behave differently on what their + # filename is). Hence, the pdb is at clang.pdb, not at clang-cl.pdb. + # Likewise, lld-link.exe's PDB file is called lld.pdb. + + # Compress and upload. + def compress(t): + subprocess.check_call( + ['makecab', '/D', 'CompressionType=LZX', '/D', 'CompressionMemory=21', + t[0], '/L', os.path.dirname(t[0])], stdout=open(os.devnull, 'w')) + multiprocessing.dummy.Pool().map(compress, files) + for f, f_id in files: + f_cab = f[:-1] + '_' + dest = '%s/%s/%s' % (os.path.basename(f), f_id, os.path.basename(f_cab)) + print('Uploading %s to Google Cloud Storage...' % dest) + gsutil_args = ['cp', '-n', '-a', 'public-read', f_cab, + 'gs://chromium-browser-symsrv/' + dest] + exit_code = RunGsutil(gsutil_args) + if exit_code != 0: + print("gsutil failed, exit_code: %s" % exit_code) + sys.exit(exit_code) + + +def replace_version(want, version): + """Replaces $V with provided version.""" + return set([w.replace('$V', version) for w in want]) + + +def main(): + parser = argparse.ArgumentParser(description='build and package clang') + parser.add_argument('--upload', action='store_true', + help='Upload the target archive to Google Cloud Storage.') + parser.add_argument( + '--bucket', + default=DEFAULT_GCS_BUCKET, + help='Google Cloud Storage bucket where the target archive is uploaded') + parser.add_argument('--revision', + help='LLVM revision to use. Default: based on update.py') + args = parser.parse_args() + + if args.revision: + # Use upload_revision.py to set the revision first. + cmd = [ + sys.executable, + os.path.join(THIS_DIR, 'upload_revision.py'), + '--no-git', # Just run locally, don't upload anything. + '--clang-git-hash=' + args.revision + ] + subprocess.run(cmd, check=True) + + # This needs to happen after upload_revision.py modifies update.py. + from update import PACKAGE_VERSION, RELEASE_VERSION, STAMP_FILE, STAMP_FILENAME + + expected_stamp = PACKAGE_VERSION + pdir = 'clang-' + expected_stamp + print(pdir) + + if sys.platform == 'darwin': + # When we need to run this script on an arm machine, we need to add a + # --build-mac-intel switch to pick which clang to build, pick the + # 'Mac_arm64' here when there's no flag and 'Mac' when --build-mac-intel is + # passed. Also update the build script to explicitly pass a default triple + # then. + if platform.machine() == 'arm64': + gcs_platform = 'Mac_arm64' + else: + gcs_platform = 'Mac' + elif sys.platform == 'win32': + gcs_platform = 'Win' + else: + gcs_platform = 'Linux_x64' + + with open('buildlog.txt', 'w', encoding='utf-8') as log: + Tee('Starting build\n', log) + + # Do a clobber build. + shutil.rmtree(LLVM_BOOTSTRAP_DIR, ignore_errors=True) + shutil.rmtree(LLVM_BOOTSTRAP_INSTALL_DIR, ignore_errors=True) + shutil.rmtree(LLVM_BUILD_DIR, ignore_errors=True) + + build_cmd = [ + sys.executable, + os.path.join(THIS_DIR, 'build.py'), '--bootstrap', '--disable-asserts', + '--run-tests', '--pgo' + ] + if sys.platform != 'darwin': + build_cmd.append('--thinlto') + if sys.platform.startswith('linux'): + build_cmd.append('--bolt') + + TeeCmd(build_cmd, log) + + stamp = open(STAMP_FILE).read().rstrip() + if stamp != expected_stamp: + print('Actual stamp (%s) != expected stamp (%s).' % (stamp, expected_stamp)) + return 1 + + shutil.rmtree(pdir, ignore_errors=True) + + # Track of all files that should be part of runtime package. '$V' is replaced + # by RELEASE_VERSION, and no glob is supported. + runtime_packages = None + runtime_package_name = None + + # Copy a list of files to the directory we're going to tar up. + # This supports the same patterns that the fnmatch module understands. + # '$V' is replaced by RELEASE_VERSION further down. + exe_ext = '.exe' if sys.platform == 'win32' else '' + want = set([ + STAMP_FILENAME, + 'bin/llvm-pdbutil' + exe_ext, + 'bin/llvm-symbolizer' + exe_ext, + 'bin/llvm-undname' + exe_ext, + # Copy built-in headers (lib/clang/3.x.y/include). + 'lib/clang/$V/include/*', + 'lib/clang/$V/share/asan_*list.txt', + 'lib/clang/$V/share/cfi_*list.txt', + ]) + if sys.platform == 'win32': + want.update([ + 'bin/clang-cl.exe', + 'bin/lld-link.exe', + 'bin/llvm-ml.exe', + ]) + else: + want.update([ + 'bin/clang', + + # Add LLD. + 'bin/lld', + + # Add llvm-ar for LTO. + 'bin/llvm-ar', + + # llvm-ml for Windows cross builds. + 'bin/llvm-ml', + + # Add llvm-readobj (symlinked from llvm-readelf) for extracting SONAMEs. + 'bin/llvm-readobj', + ]) + if sys.platform != 'darwin': + # The Fuchsia runtimes are only built on non-Mac platforms. + want.update([ + 'lib/clang/$V/lib/aarch64-unknown-fuchsia/libclang_rt.builtins.a', + 'lib/clang/$V/lib/x86_64-unknown-fuchsia/libclang_rt.builtins.a', + 'lib/clang/$V/lib/x86_64-unknown-fuchsia/libclang_rt.profile.a', + 'lib/clang/$V/lib/x86_64-unknown-fuchsia/libclang_rt.asan.so', + 'lib/clang/$V/lib/x86_64-unknown-fuchsia/libclang_rt.asan-preinit.a', + 'lib/clang/$V/lib/x86_64-unknown-fuchsia/libclang_rt.asan_static.a', + ]) + if sys.platform == 'darwin': + runtime_package_name = 'clang-mac-runtime-library' + runtime_packages = set([ + # AddressSanitizer runtime. + 'lib/clang/$V/lib/darwin/libclang_rt.asan_ios_dynamic.dylib', + 'lib/clang/$V/lib/darwin/libclang_rt.asan_iossim_dynamic.dylib', + 'lib/clang/$V/lib/darwin/libclang_rt.asan_osx_dynamic.dylib', + + # Builtin libraries for the _IsOSVersionAtLeast runtime function. + 'lib/clang/$V/lib/darwin/libclang_rt.ios.a', + 'lib/clang/$V/lib/darwin/libclang_rt.iossim.a', + 'lib/clang/$V/lib/darwin/libclang_rt.osx.a', + 'lib/clang/$V/lib/darwin/libclang_rt.watchos.a', + 'lib/clang/$V/lib/darwin/libclang_rt.watchossim.a', + 'lib/clang/$V/lib/darwin/libclang_rt.xros.a', + 'lib/clang/$V/lib/darwin/libclang_rt.xrossim.a', + + # Profile runtime (used by profiler and code coverage). + 'lib/clang/$V/lib/darwin/libclang_rt.profile_iossim.a', + 'lib/clang/$V/lib/darwin/libclang_rt.profile_osx.a', + + # UndefinedBehaviorSanitizer runtime. + 'lib/clang/$V/lib/darwin/libclang_rt.ubsan_iossim_dynamic.dylib', + 'lib/clang/$V/lib/darwin/libclang_rt.ubsan_osx_dynamic.dylib', + ]) + want.update(runtime_packages) + want.update([ + # Add llvm-objcopy for its use as install_name_tool. + 'bin/llvm-objcopy', + ]) + elif sys.platform.startswith('linux'): + want.update([ + # pylint: disable=line-too-long + + # Add llvm-objcopy for partition extraction on Android. + 'bin/llvm-objcopy', + + # Add llvm-nm. + 'bin/llvm-nm', + + # AddressSanitizer C runtime (pure C won't link with *_cxx). + 'lib/clang/$V/lib/i386-unknown-linux-gnu/libclang_rt.asan.a', + 'lib/clang/$V/lib/x86_64-unknown-linux-gnu/libclang_rt.asan.a', + 'lib/clang/$V/lib/x86_64-unknown-linux-gnu/libclang_rt.asan.a.syms', + 'lib/clang/$V/lib/i386-unknown-linux-gnu/libclang_rt.asan_static.a', + 'lib/clang/$V/lib/x86_64-unknown-linux-gnu/libclang_rt.asan_static.a', + + # AddressSanitizer C++ runtime. + 'lib/clang/$V/lib/i386-unknown-linux-gnu/libclang_rt.asan_cxx.a', + 'lib/clang/$V/lib/x86_64-unknown-linux-gnu/libclang_rt.asan_cxx.a', + 'lib/clang/$V/lib/x86_64-unknown-linux-gnu/libclang_rt.asan_cxx.a.syms', + + # AddressSanitizer Android runtime. + 'lib/clang/$V/lib/linux/libclang_rt.asan-aarch64-android.so', + 'lib/clang/$V/lib/linux/libclang_rt.asan-arm-android.so', + 'lib/clang/$V/lib/linux/libclang_rt.asan-i686-android.so', + 'lib/clang/$V/lib/linux/libclang_rt.asan-riscv64-android.so', + 'lib/clang/$V/lib/linux/libclang_rt.asan_static-aarch64-android.a', + 'lib/clang/$V/lib/linux/libclang_rt.asan_static-arm-android.a', + 'lib/clang/$V/lib/linux/libclang_rt.asan_static-i686-android.a', + 'lib/clang/$V/lib/linux/libclang_rt.asan_static-riscv64-android.a', + + # Builtins for Android. + 'lib/clang/$V/lib/linux/libclang_rt.builtins-aarch64-android.a', + 'lib/clang/$V/lib/linux/libclang_rt.builtins-arm-android.a', + 'lib/clang/$V/lib/linux/libclang_rt.builtins-i686-android.a', + 'lib/clang/$V/lib/linux/libclang_rt.builtins-x86_64-android.a', + 'lib/clang/$V/lib/linux/libclang_rt.builtins-riscv64-android.a', + + # Builtins for Linux and Lacros. + 'lib/clang/$V/lib/aarch64-unknown-linux-gnu/libclang_rt.builtins.a', + 'lib/clang/$V/lib/armv7-unknown-linux-gnueabihf/libclang_rt.builtins.a', + 'lib/clang/$V/lib/i386-unknown-linux-gnu/libclang_rt.builtins.a', + 'lib/clang/$V/lib/x86_64-unknown-linux-gnu/libclang_rt.builtins.a', + + # crtstart/crtend for Linux and Lacros. + 'lib/clang/$V/lib/aarch64-unknown-linux-gnu/clang_rt.crtbegin.o', + 'lib/clang/$V/lib/aarch64-unknown-linux-gnu/clang_rt.crtend.o', + 'lib/clang/$V/lib/armv7-unknown-linux-gnueabihf/clang_rt.crtbegin.o', + 'lib/clang/$V/lib/armv7-unknown-linux-gnueabihf/clang_rt.crtend.o', + 'lib/clang/$V/lib/x86_64-unknown-linux-gnu/clang_rt.crtbegin.o', + 'lib/clang/$V/lib/x86_64-unknown-linux-gnu/clang_rt.crtend.o', + + # HWASAN Android runtime. + 'lib/clang/$V/lib/linux/libclang_rt.hwasan-aarch64-android.so', + 'lib/clang/$V/lib/linux/libclang_rt.hwasan-preinit-aarch64-android.a', + 'lib/clang/$V/lib/linux/libclang_rt.hwasan-riscv64-android.so', + 'lib/clang/$V/lib/linux/libclang_rt.hwasan-preinit-riscv64-android.a', + + # MemorySanitizer C runtime (pure C won't link with *_cxx). + 'lib/clang/$V/lib/x86_64-unknown-linux-gnu/libclang_rt.msan.a', + 'lib/clang/$V/lib/x86_64-unknown-linux-gnu/libclang_rt.msan.a.syms', + + # MemorySanitizer C++ runtime. + 'lib/clang/$V/lib/x86_64-unknown-linux-gnu/libclang_rt.msan_cxx.a', + 'lib/clang/$V/lib/x86_64-unknown-linux-gnu/libclang_rt.msan_cxx.a.syms', + + # Profile runtime (used by profiler and code coverage). + 'lib/clang/$V/lib/aarch64-unknown-linux-gnu/libclang_rt.profile.a', + 'lib/clang/$V/lib/armv7-unknown-linux-gnueabihf/libclang_rt.profile.a', + 'lib/clang/$V/lib/i386-unknown-linux-gnu/libclang_rt.profile.a', + 'lib/clang/$V/lib/x86_64-unknown-linux-gnu/libclang_rt.profile.a', + 'lib/clang/$V/lib/linux/libclang_rt.profile-i686-android.a', + 'lib/clang/$V/lib/linux/libclang_rt.profile-x86_64-android.a', + 'lib/clang/$V/lib/linux/libclang_rt.profile-aarch64-android.a', + 'lib/clang/$V/lib/linux/libclang_rt.profile-arm-android.a', + 'lib/clang/$V/lib/linux/libclang_rt.profile-riscv64-android.a', + + # ThreadSanitizer C runtime (pure C won't link with *_cxx). + 'lib/clang/$V/lib/x86_64-unknown-linux-gnu/libclang_rt.tsan.a', + 'lib/clang/$V/lib/x86_64-unknown-linux-gnu/libclang_rt.tsan.a.syms', + + # ThreadSanitizer C++ runtime. + 'lib/clang/$V/lib/x86_64-unknown-linux-gnu/libclang_rt.tsan_cxx.a', + 'lib/clang/$V/lib/x86_64-unknown-linux-gnu/libclang_rt.tsan_cxx.a.syms', + + # UndefinedBehaviorSanitizer C runtime (pure C won't link with *_cxx). + 'lib/clang/$V/lib/i386-unknown-linux-gnu/libclang_rt.ubsan_standalone.a', + 'lib/clang/$V/lib/x86_64-unknown-linux-gnu/libclang_rt.ubsan_standalone.a', + 'lib/clang/$V/lib/x86_64-unknown-linux-gnu/libclang_rt.ubsan_standalone.a.syms', + + # UndefinedBehaviorSanitizer C++ runtime. + 'lib/clang/$V/lib/i386-unknown-linux-gnu/libclang_rt.ubsan_standalone_cxx.a', + 'lib/clang/$V/lib/x86_64-unknown-linux-gnu/libclang_rt.ubsan_standalone_cxx.a', + 'lib/clang/$V/lib/x86_64-unknown-linux-gnu/libclang_rt.ubsan_standalone_cxx.a.syms', + + # UndefinedBehaviorSanitizer Android runtime, needed for CFI. + 'lib/clang/$V/lib/linux/libclang_rt.ubsan_standalone-aarch64-android.so', + 'lib/clang/$V/lib/linux/libclang_rt.ubsan_standalone-arm-android.so', + 'lib/clang/$V/lib/linux/libclang_rt.ubsan_standalone-riscv64-android.so', + + # Ignorelist for MemorySanitizer (used on Linux only). + 'lib/clang/$V/share/msan_*list.txt', + + # pylint: enable=line-too-long + ]) + elif sys.platform == 'win32': + runtime_package_name = 'clang-win-runtime-library' + + runtime_packages = set([ + # pylint: disable=line-too-long + 'bin/llvm-symbolizer.exe', + + # AddressSanitizer runtime. + 'lib/clang/$V/lib/windows/clang_rt.asan_dynamic-x86_64.dll', + 'lib/clang/$V/lib/windows/clang_rt.asan_dynamic-x86_64.lib', + + # Thunk for AddressSanitizer for component builds. + 'lib/clang/$V/lib/windows/clang_rt.asan_dynamic_runtime_thunk-x86_64.lib', + + # Thunk for AddressSanitizer for static builds. + 'lib/clang/$V/lib/windows/clang_rt.asan_static_runtime_thunk-x86_64.lib', + + # Builtins for C/C++. + 'lib/clang/$V/lib/windows/clang_rt.builtins-i386.lib', + 'lib/clang/$V/lib/windows/clang_rt.builtins-x86_64.lib', + 'lib/clang/$V/lib/windows/clang_rt.builtins-aarch64.lib', + + # Profile runtime (used by profiler and code coverage). + 'lib/clang/$V/lib/windows/clang_rt.profile-i386.lib', + 'lib/clang/$V/lib/windows/clang_rt.profile-x86_64.lib', + 'lib/clang/$V/lib/windows/clang_rt.profile-aarch64.lib', + + # UndefinedBehaviorSanitizer C runtime (pure C won't link with *_cxx). + 'lib/clang/$V/lib/windows/clang_rt.ubsan_standalone-x86_64.lib', + + # UndefinedBehaviorSanitizer C++ runtime. + 'lib/clang/$V/lib/windows/clang_rt.ubsan_standalone_cxx-x86_64.lib', + + # pylint: enable=line-too-long + ]) + want.update(runtime_packages) + + # reclient is a tool for executing programs remotely. When uploading the + # binary to be executed, it needs to know which other files the binary depends + # on. This can include shared libraries, as well as other dependencies not + # explicitly mentioned in the source code (those would be found by reclient's + # include scanner) such as sanitizer ignore lists. + # + # These paths are written relative to the package root, and will be rebased + # to wherever the reclient config file is written when added to the file. + reclient_inputs = { + 'clang': + set([ + # Note: These have to match the `want` list exactly. `want` uses + # a glob, so these must too. + 'lib/clang/$V/share/asan_*list.txt', + 'lib/clang/$V/share/cfi_*list.txt', + ]), + } + if sys.platform == 'win32': + # TODO(crbug.com/335997052): Remove this again once we have a compiler + # flag that tells clang-cl to not auto-add it (and then explicitly pass + # it via GN). + reclient_inputs['clang'].update([ + 'lib/clang/$V/lib/windows/clang_rt.ubsan_standalone-x86_64.lib', + 'lib/clang/$V/lib/windows/clang_rt.ubsan_standalone_cxx-x86_64.lib', + 'lib/clang/$V/lib/windows/clang_rt.profile-i386.lib', + 'lib/clang/$V/lib/windows/clang_rt.profile-x86_64.lib', + 'lib/clang/$V/lib/windows/clang_rt.profile-aarch64.lib', + ]) + + # Check that all non-glob wanted files exist on disk. + want = replace_version(want, RELEASE_VERSION) + found_all_wanted_files = True + for w in want: + if '*' in w: continue + if os.path.exists(os.path.join(LLVM_RELEASE_DIR, w)): continue + print('wanted file "%s" but it did not exist' % w, file=sys.stderr) + found_all_wanted_files = False + + if not found_all_wanted_files: + return 1 + + # Check that all reclient inputs are in the package. + for tool in reclient_inputs: + reclient_inputs[tool] = set( + [i.replace('$V', RELEASE_VERSION) for i in reclient_inputs[tool]]) + missing = reclient_inputs[tool] - want + if missing: + print('reclient inputs not part of package: ', missing, file=sys.stderr) + return 1 + + reclient_input_strings = {t: '' for t in reclient_inputs} + + # TODO(thakis): Try walking over want and copying the files in there instead + # of walking the directory and doing fnmatch() against want. + for root, dirs, files in os.walk(LLVM_RELEASE_DIR): + dirs.sort() # Walk dirs in sorted order. + # root: third_party/llvm-build/Release+Asserts/lib/..., rel_root: lib/... + rel_root = root[len(LLVM_RELEASE_DIR)+1:] + rel_files = [os.path.join(rel_root, f) for f in files] + wanted_files = list(set(itertools.chain.from_iterable( + fnmatch.filter(rel_files, p) for p in want))) + if wanted_files: + # Guaranteed to not yet exist at this point: + os.makedirs(os.path.join(pdir, rel_root)) + for f in sorted(wanted_files): + src = os.path.join(LLVM_RELEASE_DIR, f) + dest = os.path.join(pdir, f) + shutil.copy(src, dest) + # Strip libraries. + if 'libclang_rt.builtins' in f and 'android' in f: + # Keep the builtins' DWARF info for unwinding. + pass + elif sys.platform == 'darwin' and f.endswith('.dylib'): + subprocess.call(['strip', '-x', dest]) + elif (sys.platform.startswith('linux') and + os.path.splitext(f)[1] in ['.so', '.a']): + subprocess.call([EU_STRIP, '-g', dest]) + # If this is an reclient input, add it to the inputs file(s). + for tool, inputs in reclient_inputs.items(): + if any(fnmatch.fnmatch(f, i) for i in inputs): + rel_input = os.path.relpath(dest, os.path.join(pdir, 'bin')) + reclient_input_strings[tool] += ('%s\n' % rel_input) + + # Write the reclient inputs files. + if sys.platform != 'win32': + reclient_input_strings['clang++'] = reclient_input_strings['clang'] + reclient_input_strings['clang-cl'] = reclient_input_strings['clang'] + else: + reclient_input_strings['clang-cl.exe'] = reclient_input_strings.pop('clang') + for tool, string in reclient_input_strings.items(): + filename = os.path.join(pdir, 'bin', '%s_remote_toolchain_inputs' % tool) + print('%s:\n%s' % (filename, string)) + with open(filename, 'w') as f: + f.write(string) + + # Set up symlinks. + if sys.platform != 'win32': + os.symlink('clang', os.path.join(pdir, 'bin', 'clang++')) + os.symlink('clang', os.path.join(pdir, 'bin', 'clang-cl')) + os.symlink('lld', os.path.join(pdir, 'bin', 'ld.lld')) + os.symlink('lld', os.path.join(pdir, 'bin', 'ld64.lld')) + os.symlink('lld', os.path.join(pdir, 'bin', 'lld-link')) + os.symlink('lld', os.path.join(pdir, 'bin', 'wasm-ld')) + os.symlink('llvm-readobj', os.path.join(pdir, 'bin', 'llvm-readelf')) + + if sys.platform.startswith('linux') or sys.platform == 'darwin': + os.symlink('llvm-objcopy', os.path.join(pdir, 'bin', 'llvm-strip')) + os.symlink('llvm-objcopy', + os.path.join(pdir, 'bin', 'llvm-install-name-tool')) + + # Make `--target=*-cros-linux-gnu` work with + # LLVM_ENABLE_PER_TARGET_RUNTIME_DIR=ON. + for arch, abi in [('armv7', 'gnueabihf'), ('aarch64', 'gnu'), + ('x86_64', 'gnu')]: + old = '%s-unknown-linux-%s' % (arch, abi) + new = old.replace('unknown', 'cros').replace('armv7', 'armv7a') + os.symlink( + old, os.path.join(pdir, 'lib', 'clang', RELEASE_VERSION, 'lib', new)) + + # Create main archive. + PackageInArchive(pdir, pdir) + MaybeUpload(args.upload, args.bucket, pdir + '.tar.xz', gcs_platform) + + # Upload build log next to it. + os.rename('buildlog.txt', pdir + '-buildlog.txt') + MaybeUpload(args.upload, + args.bucket, + pdir + '-buildlog.txt', + gcs_platform, + extra_gsutil_args=['-z', 'txt']) + os.remove(pdir + '-buildlog.txt') + + # Zip up runtime libraries, if specified. + if runtime_package_name and len(runtime_packages) > 0: + runtime_dir = f'{runtime_package_name}-{stamp}' + shutil.rmtree(runtime_dir, ignore_errors=True) + for f in sorted(replace_version(runtime_packages, RELEASE_VERSION)): + os.makedirs(os.path.dirname(os.path.join(runtime_dir, f)), exist_ok=True) + shutil.copy( + os.path.join(pdir, f), + os.path.join(runtime_dir, f), + ) + PackageInArchive(runtime_dir, runtime_dir) + MaybeUpload(args.upload, args.bucket, f'{runtime_dir}.tar.xz', gcs_platform) + + # Zip up llvm-code-coverage for code coverage. + code_coverage_dir = 'llvm-code-coverage-' + stamp + shutil.rmtree(code_coverage_dir, ignore_errors=True) + os.makedirs(os.path.join(code_coverage_dir, 'bin')) + for filename in ['llvm-cov', 'llvm-profdata']: + shutil.copy(os.path.join(LLVM_RELEASE_DIR, 'bin', filename + exe_ext), + os.path.join(code_coverage_dir, 'bin')) + PackageInArchive(code_coverage_dir, code_coverage_dir) + MaybeUpload(args.upload, args.bucket, code_coverage_dir + '.tar.xz', + gcs_platform) + + # Zip up llvm-objdump and related tools for sanitizer coverage and Supersize. + objdumpdir = 'llvmobjdump-' + stamp + shutil.rmtree(objdumpdir, ignore_errors=True) + os.makedirs(os.path.join(objdumpdir, 'bin')) + for filename in [ + 'llvm-bcanalyzer', 'llvm-cxxfilt', 'llvm-dwarfdump', 'llvm-nm', + 'llvm-objdump' + ]: + shutil.copy(os.path.join(LLVM_RELEASE_DIR, 'bin', filename + exe_ext), + os.path.join(objdumpdir, 'bin')) + llvmobjdump_stamp_file_base = 'llvmobjdump_build_revision' + llvmobjdump_stamp_file = os.path.join(objdumpdir, llvmobjdump_stamp_file_base) + with open(llvmobjdump_stamp_file, 'w') as f: + f.write(expected_stamp) + f.write('\n') + if sys.platform != 'win32': + os.symlink('llvm-objdump', os.path.join(objdumpdir, 'bin', 'llvm-otool')) + PackageInArchive(objdumpdir, objdumpdir) + MaybeUpload(args.upload, args.bucket, objdumpdir + '.tar.xz', gcs_platform) + + # Zip up clang-tidy for users who opt into it, and Tricium. + clang_tidy_dir = 'clang-tidy-' + stamp + shutil.rmtree(clang_tidy_dir, ignore_errors=True) + os.makedirs(os.path.join(clang_tidy_dir, 'bin')) + shutil.copy(os.path.join(LLVM_RELEASE_DIR, 'bin', 'clang-tidy' + exe_ext), + os.path.join(clang_tidy_dir, 'bin')) + PackageInArchive(clang_tidy_dir, clang_tidy_dir) + MaybeUpload(args.upload, args.bucket, clang_tidy_dir + '.tar.xz', + gcs_platform) + + # Zip up clangd and related tools for users who opt into it. + clangd_dir = 'clangd-' + stamp + shutil.rmtree(clangd_dir, ignore_errors=True) + os.makedirs(os.path.join(clangd_dir, 'bin')) + shutil.copy(os.path.join(LLVM_RELEASE_DIR, 'bin', 'clangd' + exe_ext), + os.path.join(clangd_dir, 'bin')) + shutil.copy( + os.path.join(LLVM_RELEASE_DIR, 'bin', 'clang-include-cleaner' + exe_ext), + os.path.join(clangd_dir, 'bin')) + PackageInArchive(clangd_dir, clangd_dir) + MaybeUpload(args.upload, args.bucket, clangd_dir + '.tar.xz', gcs_platform) + + # Zip up clang-format so we can update it (separately from the clang roll). + clang_format_dir = 'clang-format-' + stamp + shutil.rmtree(clang_format_dir, ignore_errors=True) + os.makedirs(os.path.join(clang_format_dir, 'bin')) + shutil.copy(os.path.join(LLVM_RELEASE_DIR, 'bin', 'clang-format' + exe_ext), + os.path.join(clang_format_dir, 'bin')) + PackageInArchive(clang_format_dir, clang_format_dir) + MaybeUpload(args.upload, args.bucket, clang_format_dir + '.tar.xz', + gcs_platform) + + if sys.platform == 'darwin': + # dsymutil isn't part of the main zip, and it gets periodically + # deployed to CIPD (manually, not as part of clang rolls) for use in the + # Mac build toolchain. + dsymdir = 'dsymutil-' + stamp + shutil.rmtree(dsymdir, ignore_errors=True) + os.makedirs(os.path.join(dsymdir, 'bin')) + shutil.copy(os.path.join(LLVM_RELEASE_DIR, 'bin', 'dsymutil'), + os.path.join(dsymdir, 'bin')) + PackageInArchive(dsymdir, dsymdir) + MaybeUpload(args.upload, args.bucket, dsymdir + '.tar.xz', gcs_platform) + + # Zip up the translation_unit tool. + translation_unit_dir = 'translation_unit-' + stamp + shutil.rmtree(translation_unit_dir, ignore_errors=True) + os.makedirs(os.path.join(translation_unit_dir, 'bin')) + shutil.copy(os.path.join(LLVM_RELEASE_DIR, 'bin', 'translation_unit' + + exe_ext), + os.path.join(translation_unit_dir, 'bin')) + PackageInArchive(translation_unit_dir, translation_unit_dir) + MaybeUpload(args.upload, args.bucket, translation_unit_dir + '.tar.xz', + gcs_platform) + + # Zip up the libclang binaries. + libclang_dir = 'libclang-' + stamp + shutil.rmtree(libclang_dir, ignore_errors=True) + os.makedirs(os.path.join(libclang_dir, 'bin')) + os.makedirs(os.path.join(libclang_dir, 'bindings', 'python', 'clang')) + if sys.platform == 'win32': + shutil.copy(os.path.join(LLVM_RELEASE_DIR, 'bin', 'libclang.dll'), + os.path.join(libclang_dir, 'bin')) + py_bindings_dir = os.path.join(LLVM_DIR, 'clang', 'bindings', 'python', + 'clang') + for filename in os.listdir(py_bindings_dir): + shutil.copy(os.path.join(py_bindings_dir, filename), + os.path.join(libclang_dir, 'bindings', 'python', 'clang')) + PackageInArchive(libclang_dir, libclang_dir) + MaybeUpload(args.upload, args.bucket, libclang_dir + '.tar.xz', gcs_platform) + + if sys.platform == 'win32' and args.upload: + binaries = [f for f in want if f.endswith('.exe') or f.endswith('.dll')] + assert 'bin/clang-cl.exe' in binaries + assert 'bin/lld-link.exe' in binaries + start = time.time() + UploadPDBsToSymbolServer(binaries) + end = time.time() + print('symbol upload took', end - start, 'seconds') + + # FIXME: Warn if the file already exists on the server. + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/clang/scripts/process_crashreports.py b/clang/scripts/process_crashreports.py new file mode 100755 index 0000000000000000000000000000000000000000..ff835a47307f208af6555f3e6ad3ad8959457986 --- /dev/null +++ b/clang/scripts/process_crashreports.py @@ -0,0 +1,127 @@ +#!/usr/bin/env vpython3 +# Copyright 2019 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Looks for crash reports in tools/clang/crashreports and uploads them to GCS. +""" + +from __future__ import print_function + +import argparse +import datetime +import getpass +import glob +import os +import shutil +import subprocess +import sys +import tarfile +import tempfile + + +GCS_BUCKET = 'chrome-clang-crash-reports' +THIS_DIR = os.path.dirname(__file__) +CRASHREPORTS_DIR = os.path.join(THIS_DIR, '..', 'crashreports') +GSUTIL = os.path.join( + THIS_DIR, '..', '..', '..', 'third_party', 'depot_tools', 'gsutil.py') + + +def ProcessCrashreport(base, source): + """Zip up all files belonging to a crash base name and upload them to GCS.""" + sys.stdout.write('processing %s... ' % base) + sys.stdout.flush() + + # Note that this will include the .sh and other files: + files = glob.glob(os.path.join(CRASHREPORTS_DIR, base + '.*')) + + # Path design. + # - For each crash, it should be easy to see which platform it was on, + # and which configuration it happened for. + # - Crash prefixes should be regular so that a second bot could download + # crash reports and auto-triage them. + # - Ideally the assert reason would be easily visible too, but clang doesn't + # write that to disk. + # Prepend with '/v1' so that we can move to other schemes in the future if + # needed. + # /v1/yyyy-mm-dd/botname-basename.tgz + now = datetime.datetime.now() + dest = 'gs://%s/v1/%04d/%02d/%02d/%s-%s.tgz' % ( + GCS_BUCKET, now.year, now.month, now.day, source, base) + + # zipfile.ZipFile() defaults to Z_DEFAULT_COMPRESSION (6) and that can't + # be overridden until Python 3.7. tarfile always uses compression level 9, + # so use tarfile. + tmp_name = None + try: + with tempfile.NamedTemporaryFile(delete=False, suffix='.tgz') as tmp: + tmp_name = tmp.name + sys.stdout.write('compressing... ') + sys.stdout.flush() + with tarfile.open(mode='w:gz', fileobj=tmp) as tgz: + for f in files: + tgz.add(f, os.path.basename(f)) + sys.stdout.write('uploading... ') + sys.stdout.flush() + subprocess.check_call([sys.executable, GSUTIL, '-q', 'cp', tmp_name, dest]) + print('done') + print(' %s' % dest) + except subprocess.CalledProcessError as e: + print('upload failed; if it was due to missing permissions, try running') + print('download_from_google_storage --config') + print('and then try again') + finally: + if tmp_name: + os.remove(tmp_name) + + +def DeleteCrashFiles(): + for root, dirs, files in os.walk(CRASHREPORTS_DIR, topdown=True): + for d in dirs: + print('removing dir', d) + shutil.rmtree(os.path.join(root, d)) + for f in files: + if f != '.gitignore': + print('removing', f) + os.remove(os.path.join(root, f)) + del dirs[:] # Abort os.walk() after one level. + + +def main(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument('--delete', dest='delete', action='store_true', + help='Delete all crashreports after processing them ' + '(default)') + parser.add_argument('--no-delete', dest='delete', action='store_false', + help='Do not delete crashreports after processing them') + parser.set_defaults(delete=True) + parser.add_argument('--source', default='user-' + getpass.getuser(), + help='Source of the crash -- usually a bot name. ' + 'Leave empty to use your username.') + args = parser.parse_args() + # When clang notices that it crashes, it tries to write a .sh file containing + # the command used to invoke clang, a source file containing the whole + # input source code with an extension matching the input file (.c, .cpp, ...), + # and potentially other temp files and directories. + # If generating the unified input source file fails, the .sh file won't + # be written. (see Driver::generateCompilationDiagnostics()). + # As a heuristic, find all .sh files in the crashreports directory, then + # zip each up along with all other files that have the same basename with + # different extensions. + clang_reproducers = glob.glob(os.path.join(CRASHREPORTS_DIR, '*.sh')) + # lld reproducers just leave a .tar + lld_reproducers = glob.glob( + os.path.join(CRASHREPORTS_DIR, 'linker-crash*.tar')) + for reproducer in clang_reproducers + lld_reproducers: + base = os.path.splitext(os.path.basename(reproducer))[0] + ProcessCrashreport(base, args.source) + + if args.delete: + DeleteCrashFiles() + + +if __name__ == '__main__': + try: + main() + except Exception as e: + print('got exception:', e) diff --git a/clang/scripts/remote_ld.py b/clang/scripts/remote_ld.py new file mode 100755 index 0000000000000000000000000000000000000000..5207070872e60db69fd1381104b20834c83ec2fe --- /dev/null +++ b/clang/scripts/remote_ld.py @@ -0,0 +1,63 @@ +#! /usr/bin/env python3 +# Copyright 2020 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +# Linker wrapper that performs distributed ThinLTO on Reclient. +# +# Usage: Pass the original link command as parameters to this script. +# E.g. original: clang++ -o foo foo.o +# Becomes: remote_ld clang++ -o foo foo.o + +import os +import re +import sys + +import remote_link + + +class RemoteLinkUnix(remote_link.RemoteLinkBase): + # Target-platform-specific constants. + WL = '-Wl,' + TLTO = '-plugin-opt=thinlto' + SEP = '=' + DATA_SECTIONS = '-fdata-sections' + FUNCTION_SECTIONS = '-ffunction-sections' + GROUP_RE = re.compile(WL + '--(?:end|start)-group') + MACHINE_RE = re.compile('-m([0-9]+)') + OBJ_PATH = '-plugin-opt=obj-path' + SEP + OBJ_SUFFIX = '.o' + PREFIX_REPLACE = TLTO + '-prefix-replace' + SEP + XIR = '-x ir ' + + ALLOWLIST = { + 'chrome', + } + + def analyze_args(self, args, *posargs, **kwargs): + # TODO(crbug.com/40113922): Builds are unreliable when all targets use + # distributed ThinLTO, so we only enable it for some targets. + # For other targets, we fall back to local ThinLTO. We must use ThinLTO + # because we build with -fsplit-lto-unit, which requires code generation + # be done for each object and target. + # Returning None from this function causes the original, non-distributed + # linker command to be invoked. + if args.output is None: + return None + if not (args.allowlist or os.path.basename(args.output) in self.ALLOWLIST): + return None + return super(RemoteLinkUnix, self).analyze_args(args, *posargs, **kwargs) + + def process_output_param(self, args, i): + """ + If args[i] is a parameter that specifies the output file, + returns (output_name, new_i). Else, returns (None, new_i). + """ + if args[i] == '-o': + return (os.path.normpath(args[i + 1]), i + 2) + else: + return (None, i + 1) + + +if __name__ == '__main__': + sys.exit(RemoteLinkUnix().main(sys.argv)) diff --git a/clang/scripts/remote_link.py b/clang/scripts/remote_link.py new file mode 100755 index 0000000000000000000000000000000000000000..82bfd750e0088977e85b69283115ea304d8961dc --- /dev/null +++ b/clang/scripts/remote_link.py @@ -0,0 +1,689 @@ +#! /usr/bin/env python3 +# Copyright 2020 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +# Linker wrapper that performs distributed ThinLTO on Reclient. +# +# Usage: Pass the original link command as parameters to this script. +# E.g. original: lld-link -out:foo foo.obj +# Becomes: remote_link.py lld-link -out:foo foo.obj + +import argparse +import errno +import io +import os +import re +import shlex +import subprocess +import sys +from collections import namedtuple +from pipes import quote as shquote +from tempfile import NamedTemporaryFile + +# Type returned by analyze_args. +AnalyzeArgsResult = namedtuple('AnalyzeArgsResult', [ + 'output', 'linker', 'compiler', 'splitfile', 'index_inputs', 'index_params', + 'codegen', 'codegen_params', 'final_inputs', 'final_params' +]) + + +def autoninja(): + """ + Returns the name of the autoninja executable to invoke. + """ + name = os.path.normpath( + os.path.join(os.path.dirname(__file__), '..', '..', '..', 'third_party', + 'depot_tools', 'autoninja')) + if os.name == 'nt': + return name + '.bat' + else: + return name + + +def ensure_dir(path): + """ + Creates path as a directory if it does not already exist. + """ + if not path: + return + try: + os.makedirs(path) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + +def ensure_file(path): + """ + Creates an empty file at path if it does not already exist. + Also creates directories as needed. + """ + ensure_dir(os.path.dirname(path)) + try: + fd = os.open(path, os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o644) + os.close(fd) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + +def exe_suffix(): + if os.name == 'nt': + return '.exe' + else: + return '' + + +def is_bitcode_file(path): + """ + Returns True if path contains a LLVM bitcode file, False if not. + """ + with open(path, 'rb') as f: + return f.read(4) == b'BC\xc0\xde' + + +def is_thin_archive(path): + """ + Returns True if path refers to a thin archive (ar file), False if not. + """ + with open(path, 'rb') as f: + return f.read(8) == b'!\n' + + +def names_in_archive(path, ar_path): + """ + Yields the member names in the archive file at path. + """ + proc = subprocess.run([ar_path, "t", path], stdout=subprocess.PIPE) + for line in proc.stdout.splitlines(): + # Using UTF-8 here gives us a fighting chance if someone decides to use + # non-US-ASCII characters in a file name, and backslashreplace gives us + # a human-readable representation of any undecodable bytes we might + # encounter. + yield line.decode('UTF-8', 'backslashreplace').rstrip() + + +def ninjaenc(s): + """ + Encodes string s for use in ninja files. + """ + return s.replace('$', '$$') + + +def ninjajoin(l): + """ + Encodes list of strings l to a string encoded for use in a ninja file. + """ + return ' '.join(map(ninjaenc, l)) + + +def parse_args(args): + """ + Parses the command line and returns a structure with the results. + """ + # The basic invocation is to pass in the command line that would be used + # for a local ThinLTO link. Optionally, this may be preceded by options + # that set some values for this script. If these optional options are + # present, they must be followed by '--'. + ap = argparse.ArgumentParser() + ap.add_argument('--generate', + action='store_true', + help='generate ninja file, but do not invoke it.') + ap.add_argument('--wrapper', help='path to remote exec wrapper.') + ap.add_argument('--jobs', '-j', help='maximum number of concurrent jobs.') + ap.add_argument('--no-wrapper', + action='store_true', + help='do not use remote exec wrapper.') + ap.add_argument('--allowlist', + action='store_true', + help='act as if the target is on the allow list.') + ap.add_argument('--ar-path', help='path to ar or llvm-ar.', required=True) + try: + splitpos = args.index('--') + except: + raise Exception("Must separate linker args from wrapper args using --") + parsed = ap.parse_args(args[1:splitpos]) + rest = args[(splitpos + 1):] + parsed.linker = rest[0] + parsed.linker_args = rest[1:] + return parsed + + +def report_run(cmd, *args, **kwargs): + """ + Runs a command using subprocess.check_call, first writing the command line + to standard error. + """ + sys.stderr.write('%s: %s\n' % (sys.argv[0], ' '.join(map(shquote, cmd)))) + sys.stderr.flush() + return subprocess.check_call(cmd, *args, **kwargs) + + +class RemoteLinkBase(object): + """ + Base class used by RemoteLinkUnix and RemoteLinkWindows. + """ + # Defaults. + wrapper = 'rewrapper' + jobs = None + + # These constants should work across platforms. + DATA_SECTIONS_RE = re.compile('-f(no-)?data-sections|[-/]Gw(-)?', + re.IGNORECASE) + FUNCTION_SECTIONS_RE = re.compile('-f(no-)?function-sections|[-/]Gy(-)?', + re.IGNORECASE) + LIB_RE = re.compile('.*\\.(?:a|r?lib)', re.IGNORECASE) + # LTO_RE matches flags we want to pass in the thin link step but not in the + # native link step. + # Continue to pass -flto and -fsanitize flags in the native link even though + # they're not normally necessary because clang needs them to build with CFI. + LTO_RE = re.compile('|'.join(( + '-Wl,-plugin-opt=.*', + '-Wl,--lto.*', + '-Wl,--thin.*', + ))) + MLLVM_RE = re.compile('(?:-Wl,)?([-/]mllvm)[:=,]?(.*)', re.IGNORECASE) + OBJ_RE = re.compile('(.*)\\.(o(?:bj)?)', re.IGNORECASE) + + def _no_codegen(self, args): + """ + Helper function for the case where no distributed code generation + is necessary. It invokes the original command, unless --generate + was passed, in which case it informs the user that no code generation + is necessary. + """ + if args.generate: + sys.stderr.write( + 'No code generation required; no ninja file generated.\n') + return 5 # Indicates no code generation required. + return subprocess.call([args.linker] + args.linker_args) + + def transform_codegen_param(self, param): + return self.transform_codegen_param_common(param) + + def transform_codegen_param_common(self, param): + """ + If param is a parameter relevant to code generation, returns the + parameter in a form that is suitable to pass to clang. For values + of param that are not relevant to code generation, returns None. + """ + match = self.MACHINE_RE.match(param) + if match and match.group(1).lower() in ['x86', 'i386', 'arm', '32']: + return ['-m32'] + match = self.MLLVM_RE.match(param) + if match: + if match.group(2): + return ['-mllvm', match.group(2)] + else: + return ['-mllvm'] + if (param.startswith('-f') and not param.startswith('-flto') + and not param.startswith('-fsanitize') + and not param.startswith('-fthinlto') + and not param.startswith('-fwhole-program')): + return [param] + if param.startswith('-g'): + return [param] + if param.startswith('-m'): + # Note: -mllvm is handled separately above. + return [param] + if param.startswith('--target'): + return [param] + return None + + def output_path(self, args): + """ + Analyzes command line arguments in args and returns the output + path if one is specified by args. If no output path is specified + by args, returns None. + """ + i = 2 + while i < len(args): + output, next_i = self.process_output_param(args, i) + if output is not None: + return output + i = next_i + return None + + def write_rsp(self, path, params): + """ + Writes params to a newly created response file at path. + """ + ensure_dir(os.path.basename(path)) + with open(path, 'wb') as f: + f.write('\n'.join(map(self.rspenc, params)).encode('UTF-8')) + + def rspenc(self, param): + """ + Encodes param for use in an rsp file. + """ + return param.replace('\\%', '%') + + def expand_rsp(self, rspname): + """ + Returns the parameters found in the response file at rspname. + """ + with open(rspname) as f: + return shlex.split(f.read()) + + def expand_args_rsps(self, args): + """ + Yields args, expanding @rsp file references into the commands mentioned + in the rsp file. + """ + result = [] + for arg in args: + if len(arg) > 0 and arg[0] == '@': + for x in self.expand_rsp(arg[1:]): + yield x + else: + yield arg + + def expand_archives(self, args, ar_path): + """ + Yields the parameters in args, with archives replaced by a sequence + of '--start-lib', the member names, and '--end-lib'. This is used to get a + command line where members of archives are mentioned explicitly, but we + still get the same semantics as using archive files, namely that the object + files are only linked in if they provided needed symbol definitions. + Most of the archives encountered in the Chromium link process are + "thin archives", which means they're just directories of files that are + already on disk - and all we need to do is enumerate those files instead + of actually extracting anything. Occasionally we encounter a real archive + and its contents need to be extracted. + """ + for arg in args: + if arg.startswith("./"): + arg = arg[2:] + if self.LIB_RE.match(arg) and os.path.exists(arg): + yield (self.WL + '--start-lib') + if is_thin_archive(arg): + for name in names_in_archive(arg, ar_path): + yield (name) + else: + arg_encoded = arg.replace("..", "parent_dir") + extractdir = os.path.join("expanded_archives", arg_encoded) + if not os.path.exists(extractdir): + os.makedirs(extractdir) + subprocess.run([ar_path, "--output", extractdir, "x", arg]) + for name in names_in_archive(arg, ar_path): + yield (os.path.join(extractdir, name)) + yield (self.WL + '--end-lib') + else: + yield (arg) + + def analyze_args(self, args, gen_dir, common_dir, use_common_objects, + ar_path): + """ + Analyzes the command line arguments in args. + If no ThinLTO code generation is necessary, returns None. + Else, returns an AnalyzeArgsResult value. + + Args: + args: the command line as returned by parse_args(). + gen_dir: directory in which to generate files specific to this target. + common_dir: directory for file shared among targets. + use_common_objects: if True, native object files are shared with + other targets. + """ + # If we're invoking the NaCl toolchain, don't do distributed code + # generation. + if os.path.basename(args.linker).startswith('pnacl-'): + return None + + rsp_expanded = list(self.expand_args_rsps(args.linker_args)) + expanded_args = list(self.expand_archives(rsp_expanded, ar_path)) + + return self.analyze_expanded_args(expanded_args, args.output, args.linker, + gen_dir, common_dir, use_common_objects) + + def analyze_expanded_args(self, args, output, linker, gen_dir, common_dir, + use_common_objects): + """ + Helper function for analyze_args. This is called by analyze_args after + expanding rsp files and determining which files are bitcode files, and + produces codegen_params, final_params, and index_params. + + This function interacts with the filesystem through os.path.exists, + is_bitcode_file, and ensure_file. + """ + if 'clang' in os.path.basename(linker): + compiler = linker + else: + compiler_dir = os.path.dirname(linker) + if compiler_dir: + compiler_dir += '/' + else: + compiler_dir = '' + compiler = compiler_dir + 'clang-cl' + exe_suffix() + + if use_common_objects: + obj_dir = common_dir + else: + obj_dir = gen_dir + + common_index = common_dir + '/empty.thinlto.bc' + index_inputs = set() + index_params = [] + codegen = [] + codegen_params = [ + '-Wno-unused-command-line-argument', + '-Wno-override-module', + ] + final_inputs = set() + final_params = [] + in_mllvm = [False] + + # Defaults that match those for local linking. + optlevel = [2] + data_sections = [True] + function_sections = [True] + + def extract_opt_level(param): + """ + If param is a parameter that specifies the LTO optimization level, + returns the level. If not, returns None. + """ + match = re.match('(?:-Wl,)?--lto-O(.+)', param) + if match: + return match.group(1) + match = re.match('[-/]opt:.*lldlto=([^:]*)', param, re.IGNORECASE) + if match: + return match.group(1) + return None + + def process_param(param): + """ + Common code for processing a single parameter from the either the + command line or an rsp file. + """ + + def helper(): + """ + This exists so that we can use return instead of + nested if statements to use the first matching case. + """ + # After -mllvm, just pass on the param. + if in_mllvm[0]: + if param.startswith('-Wl,'): + codegen_params.append(param[4:]) + else: + codegen_params.append(param) + in_mllvm[0] = False + return + + # Check for params that specify LTO optimization level. + o = extract_opt_level(param) + if o is not None: + optlevel[0] = o + return + + # Check for params that affect code generation. + cg_param = self.transform_codegen_param(param) + if cg_param: + codegen_params.extend(cg_param) + # No return here, we still want to check for -mllvm. + + # Check for -mllvm. + match = self.MLLVM_RE.match(param) + if match and not match.group(2): + # Next parameter will be the thing to pass to LLVM. + in_mllvm[0] = True + + # Parameters that override defaults disable the defaults; the + # final value is set by passing through the parameter. + if self.DATA_SECTIONS_RE.match(param): + data_sections[0] = False + if self.FUNCTION_SECTIONS_RE.match(param): + function_sections[0] = False + + helper() + if self.GROUP_RE.match(param): + return + index_params.append(param) + if os.path.exists(param): + index_inputs.add(param) + match = self.OBJ_RE.match(param) + if match and is_bitcode_file(param): + native = obj_dir + '/' + match.group(1) + '.' + match.group(2) + if use_common_objects: + index = common_index + else: + index = obj_dir + '/' + param + '.thinlto.bc' + ensure_file(index) + codegen.append((os.path.normpath(native), param, index)) + else: + final_inputs.add(param) + final_params.append(param) + elif not self.LTO_RE.match(param): + final_params.append(param) + + index_params.append(self.WL + self.PREFIX_REPLACE + ';' + obj_dir + '/') + i = 0 + while i < len(args): + x = args[i] + if not self.GROUP_RE.match(x): + outfile, next_i = self.process_output_param(args, i) + if outfile is not None: + index_params.extend(args[i:next_i]) + final_params.extend(args[i:next_i]) + i = next_i - 1 + else: + process_param(x) + i += 1 + + # If we are not doing ThinLTO codegen, just invoke the original command. + if len(codegen) < 1: + return None + + codegen_params.append('-O' + str(optlevel[0])) + if data_sections[0]: + codegen_params.append(self.DATA_SECTIONS) + if function_sections[0]: + codegen_params.append(self.FUNCTION_SECTIONS) + + if use_common_objects: + splitfile = None + for tup in codegen: + final_params.append(tup[0]) + index_inputs = [] + else: + splitfile = gen_dir + '/' + output + '.split' + self.OBJ_SUFFIX + final_params.append(splitfile) + index_params.append(self.WL + self.OBJ_PATH + splitfile) + used_obj_file = gen_dir + '/' + os.path.basename(output) + '.objs' + final_params.append('@' + used_obj_file) + + return AnalyzeArgsResult( + output=output, + linker=linker, + compiler=compiler, + splitfile=splitfile, + index_inputs=index_inputs, + index_params=index_params, + codegen=codegen, + codegen_params=codegen_params, + final_inputs=final_inputs, + final_params=final_params, + ) + + def gen_ninja(self, ninjaname, params, gen_dir): + """ + Generates a ninja build file at path ninjaname, using original command line + params and with objs being a list of bitcode files for which to generate + native code. + """ + if self.wrapper: + wrapper_prefix = ninjaenc(self.wrapper) + ' ' + else: + wrapper_prefix = '' + base = gen_dir + '/' + os.path.basename(params.output) + ensure_dir(gen_dir) + ensure_dir(os.path.dirname(ninjaname)) + codegen_cmd = ('%s%s -c %s -fthinlto-index=$index %s$bitcode -o $native' % + (wrapper_prefix, ninjaenc(params.compiler), + ninjajoin(params.codegen_params), self.XIR)) + if params.index_inputs: + used_obj_file = base + '.objs' + index_rsp = base + '.index.rsp' + ensure_dir(os.path.dirname(used_obj_file)) + if params.splitfile: + ensure_dir(os.path.dirname(params.splitfile)) + # We use grep here to only codegen native objects which are actually + # used by the native link step. Ninja 1.10 introduced a dyndep feature + # which allows for a more elegant implementation, but Chromium still + # uses an older ninja version which doesn't have this feature. + codegen_cmd = '( ! grep -qF $native %s || %s)' % ( + ninjaenc(used_obj_file), codegen_cmd) + + with open(ninjaname, 'w') as f: + if params.index_inputs: + self.write_rsp(index_rsp, params.index_params) + f.write('\nrule index\n command = %s %s %s @$rspfile\n' % + (ninjaenc(params.linker), + ninjaenc(self.WL + self.TLTO + '-index-only' + self.SEP) + + '$out', self.WL + self.TLTO + '-emit-imports-files')) + + f.write(('\nrule native-link\n command = %s @$rspname' + '\n rspfile = $rspname\n rspfile_content = $params\n') % + (ninjaenc(params.linker), )) + + f.write('\nrule codegen\n command = %s && touch $out\n' % + (codegen_cmd, )) + + native_link_deps = [] + if params.index_inputs: + f.write( + ('\nbuild %s | %s : index %s\n' + ' rspfile = %s\n' + ' rspfile_content = %s\n') % + (ninjaenc(used_obj_file), ninjajoin( + [x[2] for x in params.codegen]), ninjajoin(params.index_inputs), + ninjaenc(index_rsp), ninjajoin(params.index_params))) + native_link_deps.append(used_obj_file) + + for tup in params.codegen: + obj, bitcode, index = tup + stamp = obj + '.stamp' + native_link_deps.append(obj) + f.write( + ('\nbuild %s : codegen %s %s\n' + ' bitcode = %s\n' + ' index = %s\n' + ' native = %s\n' + '\nbuild %s : phony %s\n') % tuple( + map(ninjaenc, + (stamp, bitcode, index, bitcode, index, obj, obj, stamp)))) + + f.write(('\nbuild %s : native-link %s\n' + ' rspname = %s\n params = %s\n') % + (ninjaenc(params.output), + ninjajoin(list(params.final_inputs) + native_link_deps), + ninjaenc(base + '.final.rsp'), ninjajoin(params.final_params))) + + f.write('\ndefault %s\n' % (ninjaenc(params.output), )) + + def do_main(self, argv): + """ + This function contains the main code to run. Not intended to be called + directly. Call main instead, which returns exit status for failing + subprocesses. + """ + args = parse_args(argv) + args.output = self.output_path(argv[1:]) + if args.output is None: + return self._no_codegen(args) + if args.wrapper: + self.wrapper = args.wrapper + if args.no_wrapper: + self.wrapper = None + if args.jobs: + self.jobs = int(args.jobs) + + basename = os.path.basename(args.output) + # Only generate tailored native object files for targets on the allow list. + # TODO: Find a better way to structure this. There are three different + # ways we can perform linking: Local ThinLTO, distributed ThinLTO, + # and distributed ThinLTO with common object files. + # We expect the distributed ThinLTO variants to be faster, but + # common object files cannot be used when -fsplit-lto-unit is in effect. + # Currently, we don't detect this situation. We could, but it might + # be better to instead move this logic out of this script and into + # the build system. + use_common_objects = not (args.allowlist or basename in self.ALLOWLIST) + common_dir = 'common_objs' + gen_dir = 'lto.' + basename + params = self.analyze_args(args, gen_dir, common_dir, use_common_objects, + args.ar_path) + # If we determined that no distributed code generation need be done, just + # invoke the original command. + if params is None: + return self._no_codegen(args) + if use_common_objects: + objs = [x[0] for x in params.codegen] + ensure_file(common_dir + '/empty.thinlto.bc') + ninjaname = gen_dir + '/build.ninja' + self.gen_ninja(ninjaname, params, gen_dir) + if args.generate: + sys.stderr.write('Generated ninja file %s\n' % (shquote(ninjaname), )) + else: + cmd = [autoninja(), '-f', ninjaname] + if self.jobs: + cmd.extend(['-j', str(self.jobs)]) + report_run(cmd) + return 0 + + def main(self, argv): + try: + return self.do_main(argv) + except subprocess.CalledProcessError as e: + return e.returncode + + +class RemoteLinkWindows(RemoteLinkBase): + # Target-platform-specific constants. + WL = '' + TLTO = '-thinlto' + SEP = ':' + DATA_SECTIONS = '-Gw' + FUNCTION_SECTIONS = '-Gy' + GROUP_RE = re.compile(WL + '--(?:end|start)-group') + MACHINE_RE = re.compile('[-/]machine:(.*)', re.IGNORECASE) + OBJ_PATH = '-lto-obj-path' + SEP + OBJ_SUFFIX = '.obj' + OUTPUT_RE = re.compile('[-/]out:(.*)', re.IGNORECASE) + PREFIX_REPLACE = TLTO + '-prefix-replace' + SEP + XIR = '' + + ALLOWLIST = { + 'chrome.exe', + 'chrome.dll', + 'chrome_child.dll', + # TODO: The following targets are on the allow list because the + # common objects flow does not link them successfully. This should + # be fixed, after which they can be removed from the list. + 'tls_edit.exe', + } + + def transform_codegen_param(self, param): + # In addition to parameters handled by transform_codegen_param_common, + # we pass on parameters that start in 'G' or 'Q', which are + # MSVC-style parameters that affect code generation. + if len(param) >= 2 and param[0] in ['-', '/'] and param[1] in ['G', 'Q']: + return [param] + return self.transform_codegen_param_common(param) + + def process_output_param(self, args, i): + """ + If args[i] is a parameter that specifies the output file, + returns (output_name, new_i). Else, returns (None, new_i). + """ + m = self.OUTPUT_RE.match(args[i]) + if m: + return (os.path.normpath(m.group(1)), i + 1) + else: + return (None, i + 1) + + +if __name__ == '__main__': + sys.exit(RemoteLinkWindows().main(sys.argv)) diff --git a/clang/scripts/remote_link_integration_tests.py b/clang/scripts/remote_link_integration_tests.py new file mode 100755 index 0000000000000000000000000000000000000000..01a2b5306b78ae257a0d817e5f0207e9901dd69b --- /dev/null +++ b/clang/scripts/remote_link_integration_tests.py @@ -0,0 +1,626 @@ +#! /usr/bin/env python3 +# Copyright 2020 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +# Integration tests for remote_link. +# +# Usage: +# +# Ensure that rewrapper, llvm-objdump, and llvm-dwarfdump are in your +# PATH. +# Then run: +# +# tools/clang/scripts/remote_link_integration_tests.py +# +# See also remote_link_unit_tests.py, which contains unit tests and +# instructions for generating coverage information. + +import remote_ld +import remote_link + +from io import StringIO +import os +import re +import shlex +import subprocess +import unittest +from unittest import mock + +from remote_link_test_utils import named_directory, working_directory + +# Path constants. +CHROMIUM_DIR = os.path.abspath( + os.path.join(os.path.dirname(__file__), '..', '..', '..')) +LLVM_BIN_DIR = os.path.join(CHROMIUM_DIR, 'third_party', 'llvm-build', + 'Release+Asserts', 'bin') + + +def _create_inputs(path): + """ + Creates input files under path. + """ + with open(os.path.join(path, 'main.cpp'), 'w') as f: + f.write('extern int foo();\n' + 'int main(int argc, char *argv[]) {\n return foo();\n}\n') + with open(os.path.join(path, 'foo.cpp'), 'w') as f: + f.write('int foo() {\n return 12;\n}\n') + with open(os.path.join(path, 'bar.cpp'), 'w') as f: + f.write('int bar() {\n return 9;\n}\n') + + +def _lto_args(generate_bitcode): + """ + Returns list of arguments to clang to generate bitcode or not. + """ + if generate_bitcode: + return ['-flto=thin'] + else: + return [] + + +class RemoteLinkUnixAllowMain(remote_ld.RemoteLinkUnix): + """ + Same as remote_ld.RemoteLinkUnix, but has "main" on the allow list. + """ + + def __init__(self, *args, **kwargs): + super(RemoteLinkUnixAllowMain, self).__init__(*args, **kwargs) + self.ALLOWLIST = {'main'} + + +class RemoteLinkWindowsAllowMain(remote_link.RemoteLinkWindows): + """ + Same as remote_ld.RemoteLinkWindows, but has "main" on the allow list. + """ + + def __init__(self, *args, **kwargs): + super(RemoteLinkWindowsAllowMain, self).__init__(*args, **kwargs) + self.ALLOWLIST = {'main.exe'} + + +class RemoteLinkIntegrationTest(unittest.TestCase): + def clangcl(self): + return os.path.join(LLVM_BIN_DIR, 'clang-cl' + remote_link.exe_suffix()) + + def lld_link(self): + return os.path.join(LLVM_BIN_DIR, 'lld-link' + remote_link.exe_suffix()) + + def llvmar(self): + return os.path.join(LLVM_BIN_DIR, 'llvm-ar' + remote_link.exe_suffix()) + + def test_distributed_lto_common_objs(self): + with named_directory() as d, working_directory(d): + _create_inputs(d) + os.makedirs('obj') + subprocess.check_call([ + self.clangcl(), '-c', '-Os', '-flto=thin', 'main.cpp', + '-Foobj/main.obj' + ]) + subprocess.check_call([ + self.clangcl(), '-c', '-Os', '-flto=thin', 'foo.cpp', '-Foobj/foo.obj' + ]) + subprocess.check_call([ + self.clangcl(), '-c', '-Os', '-flto=thin', 'bar.cpp', '-Foobj/bar.obj' + ]) + subprocess.check_call([ + self.llvmar(), 'crsT', 'obj/foobar.lib', 'obj/bar.obj', 'obj/foo.obj' + ]) + with open('main.rsp', 'w') as f: + f.write('obj/main.obj\n' 'obj/foobar.lib\n') + with open('my_reclient.sh', 'w') as f: + f.write('#! /bin/sh\n\nrewrapper "$@"\n') + os.chmod('my_reclient.sh', 0o755) + rc = remote_link.RemoteLinkWindows().main([ + 'remote_link.py', '--wrapper', './my_reclient.sh', '--ar-path', + self.llvmar(), '--', + self.lld_link(), '-nodefaultlib', '-entry:main', '-out:main.exe', + '@main.rsp' + ]) + # Should succeed. + self.assertEqual(rc, 0) + # Check codegen parameters. + with open(os.path.join(d, 'lto.main.exe', 'build.ninja')) as f: + buildrules = f.read() + codegen_match = re.search('^rule codegen\\b.*?^[^ ]', buildrules, + re.MULTILINE | re.DOTALL) + self.assertIsNotNone(codegen_match) + codegen_text = codegen_match.group(0) + self.assertIn('my_reclient.sh', codegen_text) + self.assertNotIn('-flto', codegen_text) + self.assertIn('build common_objs/obj/main.obj.stamp : codegen ', + buildrules) + self.assertIn('build common_objs/obj/foo.obj.stamp : codegen ', + buildrules) + self.assertIn(' index = common_objs/empty.thinlto.bc', buildrules) + link_match = re.search('^build main.exe : native-link\\b.*?^[^ ]', + buildrules, re.MULTILINE | re.DOTALL) + self.assertIsNotNone(link_match) + link_text = link_match.group(0) + self.assertNotIn('main.exe.split.obj', link_text) + # Check that main does not call foo. + disasm = subprocess.check_output(['llvm-objdump', '-d', 'main.exe']) + # There are no symbols in the disassembly, but we're expecting two + # functions, one of which calls the other. + self.assertTrue(b'call' in disasm or b'jmp' in disasm) + + def test_distributed_lto_allowlist(self): + with named_directory() as d, working_directory(d): + _create_inputs(d) + os.makedirs('obj') + subprocess.check_call([ + self.clangcl(), '-c', '-Os', '-flto=thin', '-m32', 'main.cpp', + '-Foobj/main.obj' + ]) + subprocess.check_call([ + self.clangcl(), '-c', '-Os', '-flto=thin', '-m32', 'foo.cpp', + '-Foobj/foo.obj' + ]) + subprocess.check_call([ + self.clangcl(), '-c', '-Os', '-flto=thin', '-m32', 'bar.cpp', + '-Foobj/bar.obj' + ]) + subprocess.check_call([ + self.llvmar(), 'crsT', 'obj/foobar.lib', 'obj/bar.obj', 'obj/foo.obj' + ]) + with open('main.rsp', 'w') as f: + f.write('obj/main.obj\n' 'obj/foobar.lib\n') + rc = RemoteLinkWindowsAllowMain().main([ + 'remote_link.py', '--wrapper', 'rewrapper', '--ar-path', + self.llvmar(), '--', + self.lld_link(), '-nodefaultlib', '-entry:main', '-machine:X86', + '-opt:lldlto=2', '-mllvm:-import-instr-limit=10', '-out:main.exe', + '@main.rsp' + ]) + # Should succeed. + self.assertEqual(rc, 0) + # Check codegen parameters. + with open(os.path.join(d, 'lto.main.exe', 'build.ninja')) as f: + buildrules = f.read() + codegen_match = re.search('^rule codegen\\b.*?^[^ ]', buildrules, + re.MULTILINE | re.DOTALL) + self.assertIsNotNone(codegen_match) + codegen_text = codegen_match.group(0) + self.assertIn('rewrapper', codegen_text) + self.assertIn('-m32', codegen_text) + self.assertIn('-mllvm -import-instr-limit=10', codegen_text) + self.assertNotIn('-flto', codegen_text) + self.assertIn('build lto.main.exe/obj/main.obj.stamp : codegen ', + buildrules) + self.assertIn('build lto.main.exe/obj/foo.obj.stamp : codegen ', + buildrules) + link_match = re.search('^build main.exe : native-link\\b.*?^[^ ]', + buildrules, re.MULTILINE | re.DOTALL) + self.assertIsNotNone(link_match) + link_text = link_match.group(0) + self.assertIn('main.exe.split.obj', link_text) + # Check that main does not call foo. + disasm = subprocess.check_output(['llvm-objdump', '-d', 'main.exe']) + # There are no symbols in the disassembly, but we're expecting a single + # function, with no calls or jmps. + self.assertNotIn(b'jmp', disasm) + self.assertNotIn(b'call', disasm) + + def test_override_allowlist(self): + with named_directory() as d, working_directory(d): + _create_inputs(d) + os.makedirs('obj') + subprocess.check_call([ + self.clangcl(), '-c', '-O2', '-flto=thin', 'main.cpp', + '-Foobj/main.obj' + ]) + subprocess.check_call([ + self.clangcl(), '-c', '-O2', '-flto=thin', 'foo.cpp', '-Foobj/foo.obj' + ]) + rc = remote_link.RemoteLinkWindows().main([ + 'remote_link.py', '--generate', '--allowlist', '--ar-path', + self.llvmar(), '--', + self.lld_link(), '-nodefaultlib', '-entry:main', '-opt:lldlto=2', + '-out:main.exe', 'obj/main.obj', 'obj/foo.obj' + ]) + # Should succeed. + self.assertEqual(rc, 0) + # Check that we have rules for main and foo, and that they are + # not common objects. + with open(os.path.join(d, 'lto.main.exe', 'build.ninja')) as f: + buildrules = f.read() + codegen_match = re.search(r'^rule codegen\b.*?^[^ ]', buildrules, + re.MULTILINE | re.DOTALL) + self.assertIsNotNone(codegen_match) + codegen_text = codegen_match.group(0) + self.assertNotIn('-flto', codegen_text) + self.assertIn('build lto.main.exe/obj/main.obj.stamp : codegen ', + buildrules) + self.assertIn('build lto.main.exe/obj/foo.obj.stamp : codegen ', + buildrules) + link_match = re.search(r'^build main.exe : native-link\b.*?^[^ ]', + buildrules, re.MULTILINE | re.DOTALL) + self.assertIsNotNone(link_match) + + +class RemoteLdIntegrationTest(unittest.TestCase): + def clangxx(self): + return os.path.join(LLVM_BIN_DIR, 'clang++' + remote_link.exe_suffix()) + + def llvmar(self): + return os.path.join(LLVM_BIN_DIR, 'llvm-ar' + remote_link.exe_suffix()) + + def test_nonlto(self): + with named_directory() as d, working_directory(d): + _create_inputs(d) + subprocess.check_call( + [self.clangxx(), '-c', '-Os', 'main.cpp', '-o', 'main.o']) + subprocess.check_call( + [self.clangxx(), '-c', '-Os', 'foo.cpp', '-o', 'foo.o']) + rc = RemoteLinkUnixAllowMain().main([ + 'remote_ld.py', '--wrapper', 'rewrapper', '--ar-path', + self.llvmar(), '--', + self.clangxx(), '-fuse-ld=lld', 'main.o', 'foo.o', '-o', 'main' + ]) + # Should succeed. + self.assertEqual(rc, 0) + # lto.main directory should not be present. + self.assertFalse(os.path.exists(os.path.join(d, 'lto.main'))) + # Check that main calls foo. + disasm = subprocess.check_output(['llvm-objdump', '-d', 'main']) + main_idx = disasm.index(b'
:\n') + after_main_idx = disasm.index(b'\n\n', main_idx) + main_disasm = disasm[main_idx:after_main_idx] + self.assertIn(b'foo', main_disasm) + + def test_fallback_lto(self): + with named_directory() as d, working_directory(d): + _create_inputs(d) + subprocess.check_call([ + self.clangxx(), '-c', '-Os', '-flto=thin', 'main.cpp', '-o', 'main.o' + ]) + subprocess.check_call( + [self.clangxx(), '-c', '-Os', '-flto=thin', 'foo.cpp', '-o', 'foo.o']) + rc = remote_ld.RemoteLinkUnix().main([ + 'remote_ld.py', '--wrapper', 'rewrapper', '--ar-path', + self.llvmar(), '--', + self.clangxx(), '-fuse-ld=lld', '-flto=thin', 'main.o', 'foo.o', '-o', + 'main' + ]) + # Should succeed. + self.assertEqual(rc, 0) + # lto.main directory should not be present. + self.assertFalse(os.path.exists(os.path.join(d, 'lto.main'))) + # Check that main does not call foo. + disasm = subprocess.check_output(['llvm-objdump', '-d', 'main']) + main_idx = disasm.index(b'
:\n') + after_main_idx = disasm.index(b'\n\n', main_idx) + main_disasm = disasm[main_idx:after_main_idx] + self.assertNotIn(b'foo', main_disasm) + + def test_distributed_lto(self): + with named_directory() as d, working_directory(d): + _create_inputs(d) + subprocess.check_call([ + self.clangxx(), '-c', '-Os', '-flto=thin', 'main.cpp', '-o', 'main.o' + ]) + subprocess.check_call( + [self.clangxx(), '-c', '-Os', '-flto=thin', 'foo.cpp', '-o', 'foo.o']) + rc = RemoteLinkUnixAllowMain().main([ + 'remote_ld.py', '-j', '16', '--ar-path', + self.llvmar(), '--', + self.clangxx(), '-fuse-ld=lld', '-flto=thin', 'main.o', 'foo.o', '-o', + 'main' + ]) + # Should succeed. + self.assertEqual(rc, 0) + # build.ninja file should have rewrapper invocations in it. + with open(os.path.join(d, 'lto.main', 'build.ninja')) as f: + buildrules = f.read() + self.assertIn('rewrapper ', buildrules) + self.assertIn('build lto.main/main.o.stamp : codegen ', buildrules) + self.assertIn('build lto.main/foo.o.stamp : codegen ', buildrules) + # Check that main does not call foo. + disasm = subprocess.check_output(['llvm-objdump', '-d', 'main']) + main_idx = disasm.index(b'
:\n') + after_main_idx = disasm.index(b'\n\n', main_idx) + main_disasm = disasm[main_idx:after_main_idx] + self.assertNotIn(b'foo', main_disasm) + + def test_distributed_lto_thin_archive_same_dir(self): + with named_directory() as d, working_directory(d): + _create_inputs(d) + subprocess.check_call([ + self.clangxx(), '-c', '-Os', '-flto=thin', 'main.cpp', '-o', 'main.o' + ]) + subprocess.check_call( + [self.clangxx(), '-c', '-Os', '-flto=thin', 'foo.cpp', '-o', 'foo.o']) + subprocess.check_call( + [self.clangxx(), '-c', '-Os', '-flto=thin', 'bar.cpp', '-o', 'bar.o']) + subprocess.check_call( + [self.llvmar(), 'crsT', 'libfoobar.a', 'bar.o', 'foo.o']) + rc = RemoteLinkUnixAllowMain().main([ + 'remote_ld.py', '--ar-path', + self.llvmar(), '--', + self.clangxx(), '-fuse-ld=lld', '-flto=thin', 'main.o', 'libfoobar.a', + '-o', 'main' + ]) + # Should succeed. + self.assertEqual(rc, 0) + # build.ninja file should have rewrapper invocations in it. + with open(os.path.join(d, 'lto.main', 'build.ninja')) as f: + buildrules = f.read() + self.assertIn('rewrapper ', buildrules) + self.assertIn('build lto.main/main.o.stamp : codegen ', buildrules) + self.assertIn('build lto.main/foo.o.stamp : codegen ', buildrules) + # Check that main does not call foo. + disasm = subprocess.check_output(['llvm-objdump', '-d', 'main']) + main_idx = disasm.index(b'
:\n') + after_main_idx = disasm.index(b'\n\n', main_idx) + main_disasm = disasm[main_idx:after_main_idx] + self.assertNotIn(b'foo', main_disasm) + + def test_distributed_lto_thin_archive_subdir(self): + self.run_archive_test(bitcode_archive=True, + bitcode_main=True, + thin_archive=True) + + def test_distributed_machine_code_thin_archive_bitcode_main_subdir(self): + self.run_archive_test(bitcode_archive=False, + bitcode_main=True, + thin_archive=True) + + def test_distributed_machine_code_thin_archive_subdir(self): + self.run_archive_test(bitcode_archive=False, + bitcode_main=False, + thin_archive=True) + + def test_distributed_bitcode_thick_archive_subdir(self): + self.run_archive_test(bitcode_archive=True, + bitcode_main=True, + thin_archive=False) + + def test_distributed_machine_code_thick_archive_subdir(self): + self.run_archive_test(bitcode_archive=False, + bitcode_main=False, + thin_archive=False) + + def test_distributed_machine_code_thick_archive_bitcode_main_subdir(self): + self.run_archive_test(bitcode_archive=False, + bitcode_main=True, + thin_archive=False) + + def run_archive_test(self, bitcode_archive, bitcode_main, thin_archive): + """ + Runs a test to ensure correct remote linking handling of + an archive. + Arguments: + bitcode_archive: whether the archive should contain bitcode (true) + or machine code (false) + bitcode_main: whether the main object, outside the archive, should + contain bitcode (true) or machine code (false) + thin_archive: whether to create a thin archive instead of a regular archive. + """ + with named_directory() as d, working_directory(d): + _create_inputs(d) + os.makedirs('obj') + subprocess.check_call( + [self.clangxx(), '-c', '-Os', 'main.cpp', '-o', 'obj/main.o'] + + _lto_args(bitcode_main)) + subprocess.check_call( + [self.clangxx(), '-c', '-Os', 'foo.cpp', '-o', 'obj/foo.o'] + + _lto_args(bitcode_archive)) + subprocess.check_call( + [self.clangxx(), '-c', '-Os', 'bar.cpp', '-o', 'obj/bar.o'] + + _lto_args(bitcode_archive)) + archive_creation_arg = 'crs' + if thin_archive: + archive_creation_arg = 'crsT' + subprocess.check_call([ + self.llvmar(), archive_creation_arg, 'obj/libfoobar.a', 'obj/bar.o', + 'obj/foo.o' + ]) + rc = RemoteLinkUnixAllowMain().main([ + 'remote_ld.py', '--ar-path', + self.llvmar(), '--', + self.clangxx(), '-fuse-ld=lld', '-flto=thin', 'obj/main.o', + 'obj/libfoobar.a', '-o', 'main' + ]) + # Should succeed. + self.assertEqual(rc, 0) + if bitcode_main or bitcode_archive: + # build.ninja file should have rewrapper invocations in it. + with open(os.path.join(d, 'lto.main', 'build.ninja')) as f: + buildrules = f.read() + self.assertIn('rewrapper ', buildrules) + if bitcode_main: + self.assertIn('build lto.main/obj/main.o.stamp : codegen ', + buildrules) + if bitcode_archive: + if thin_archive: + self.assertIn('build lto.main/obj/foo.o.stamp : codegen ', + buildrules) + else: + self.assertIn( + 'build lto.main/expanded_archives/obj/libfoobar.a/' + + 'foo.o.stamp : codegen ', buildrules) + # Check that main does not call foo. + if bitcode_archive: + disasm = subprocess.check_output(['llvm-objdump', '-d', 'main']) + main_idx = disasm.index(b'
:\n') + after_main_idx = disasm.index(b'\n\n', main_idx) + main_disasm = disasm[main_idx:after_main_idx] + self.assertNotIn(b'foo', main_disasm) + + def test_debug_params(self): + with named_directory() as d, working_directory(d): + _create_inputs(d) + os.makedirs('obj') + subprocess.check_call([ + self.clangxx(), '-c', '-g', '-gsplit-dwarf', '-flto=thin', 'main.cpp', + '-o', 'obj/main.o' + ]) + subprocess.check_call([ + self.clangxx(), '-c', '-g', '-gsplit-dwarf', '-flto=thin', 'foo.cpp', + '-o', 'obj/foo.o' + ]) + with open('main.rsp', 'w') as f: + f.write('obj/main.o\n' 'obj/foo.o\n') + rc = RemoteLinkUnixAllowMain().main([ + 'remote_ld.py', '--ar-path', + self.llvmar(), '--', + self.clangxx(), '-fuse-ld=lld', '-flto=thin', '-g', '-gsplit-dwarf', + '-Wl,--lto-O2', '-o', 'main', '@main.rsp' + ]) + # Should succeed. + self.assertEqual(rc, 0) + # Check debug info present, refers to .dwo file, and does not + # contain full debug info for foo.cpp. + dbginfo = subprocess.check_output( + ['llvm-dwarfdump', '-debug-info', + 'main']).decode('utf-8', 'backslashreplace') + self.assertRegexpMatches(dbginfo, '\\bDW_AT_GNU_dwo_name\\b.*\\.dwo"') + self.assertNotRegexpMatches(dbginfo, '\\bDW_AT_name\\b.*foo\\.cpp"') + + def test_distributed_lto_params(self): + with named_directory() as d, working_directory(d): + _create_inputs(d) + os.makedirs('obj') + subprocess.check_call([ + self.clangxx(), '-c', '-Os', '-flto=thin', '-m32', '-fsplit-lto-unit', + '-fwhole-program-vtables', 'main.cpp', '-o', 'obj/main.o' + ]) + subprocess.check_call([ + self.clangxx(), '-c', '-Os', '-flto=thin', '-m32', '-fsplit-lto-unit', + '-fwhole-program-vtables', 'foo.cpp', '-o', 'obj/foo.o' + ]) + subprocess.check_call([ + self.clangxx(), '-c', '-Os', '-flto=thin', '-m32', '-fsplit-lto-unit', + '-fwhole-program-vtables', 'bar.cpp', '-o', 'obj/bar.o' + ]) + subprocess.check_call( + [self.llvmar(), 'crsT', 'obj/libfoobar.a', 'obj/bar.o', 'obj/foo.o']) + with open('main.rsp', 'w') as f: + f.write('-fsplit-lto-unit\n' + '-fwhole-program-vtables\n' + 'obj/main.o\n' + 'obj/libfoobar.a\n') + rc = RemoteLinkUnixAllowMain().main([ + 'remote_ld.py', '--ar-path', + self.llvmar(), '--', + self.clangxx(), '-fuse-ld=lld', '-flto=thin', '-m32', '-Wl,-mllvm', + '-Wl,-generate-type-units', '-Wl,--lto-O2', '-o', 'main', + '-Wl,--start-group', '@main.rsp', '-Wl,--end-group' + ]) + # Should succeed. + self.assertEqual(rc, 0) + # Check codegen parameters. + with open(os.path.join(d, 'lto.main', 'build.ninja')) as f: + buildrules = f.read() + codegen_match = re.search('^rule codegen\\b.*?^[^ ]', buildrules, + re.MULTILINE | re.DOTALL) + self.assertIsNotNone(codegen_match) + codegen_text = codegen_match.group(0) + self.assertIn('rewrapper', codegen_text) + self.assertIn('-m32', codegen_text) + self.assertIn('-mllvm -generate-type-units', codegen_text) + self.assertNotIn('-flto', codegen_text) + self.assertIn('build lto.main/obj/main.o.stamp : codegen ', buildrules) + self.assertIn('build lto.main/obj/foo.o.stamp : codegen ', buildrules) + link_match = re.search('^build main : native-link\\b.*?^[^ ]', + buildrules, re.MULTILINE | re.DOTALL) + self.assertIsNotNone(link_match) + link_text = link_match.group(0) + self.assertIn('main.split.o', link_text) + # Check that main does not call foo. + disasm = subprocess.check_output(['llvm-objdump', '-d', 'main']) + main_idx = disasm.index(b'
:\n') + after_main_idx = disasm.index(b'\n\n', main_idx) + main_disasm = disasm[main_idx:after_main_idx] + self.assertNotIn(b'foo', main_disasm) + + def test_no_rewrapper(self): + with named_directory() as d, working_directory(d): + _create_inputs(d) + subprocess.check_call([ + self.clangxx(), '-c', '-Os', '-flto=thin', 'main.cpp', '-o', 'main.o' + ]) + subprocess.check_call( + [self.clangxx(), '-c', '-Os', '-flto=thin', 'foo.cpp', '-o', 'foo.o']) + rc = RemoteLinkUnixAllowMain().main([ + 'remote_ld.py', '--ar-path', + self.llvmar(), '--no-wrapper', '-j', '16', '--', + self.clangxx(), '-fuse-ld=lld', '-flto=thin', 'main.o', 'foo.o', '-o', + 'main' + ]) + # Should succeed. + self.assertEqual(rc, 0) + # build.ninja file should not have rewrapper invocations in it. + with open(os.path.join(d, 'lto.main', 'build.ninja')) as f: + buildrules = f.read() + self.assertNotIn('rewrapper ', buildrules) + self.assertIn('build lto.main/main.o.stamp : codegen ', buildrules) + self.assertIn('build lto.main/foo.o.stamp : codegen ', buildrules) + # Check that main does not call foo. + disasm = subprocess.check_output(['llvm-objdump', '-d', 'main']) + main_idx = disasm.index(b'
:\n') + after_main_idx = disasm.index(b'\n\n', main_idx) + main_disasm = disasm[main_idx:after_main_idx] + self.assertNotIn(b'foo', main_disasm) + + def test_generate_no_codegen(self): + with named_directory() as d, working_directory(d): + with open('main.o', 'wb') as f: + f.write(b'\7fELF') + with mock.patch('sys.stderr', new_callable=StringIO) as stderr: + rc = RemoteLinkUnixAllowMain().main([ + 'remote_ld.py', '--ar-path', + self.llvmar(), '--generate', '--', + self.clangxx(), 'main.o', '-o', 'main' + ]) + self.assertEqual(rc, 5) + self.assertIn('no ninja file generated.\n', stderr.getvalue()) + + def test_generate(self): + with named_directory() as d, working_directory(d): + with open('main.o', 'wb') as f: + f.write(b'BC\xc0\xde') + with mock.patch('sys.stderr', new_callable=StringIO) as stderr: + rc = RemoteLinkUnixAllowMain().main([ + 'remote_ld.py', '--ar-path', + self.llvmar(), '--generate', '--', + self.clangxx(), 'main.o', '-o', 'main' + ]) + self.assertEqual(rc, 0) + m = re.search('ninja file (.*)', stderr.getvalue()) + self.assertIsNotNone(m) + path = shlex.split(m.group(1))[0] + self.assertTrue(os.path.exists(path)) + content = open(path).read() + self.assertRegex( + content, + re.compile('^build [^:]+/main\\.o\\.stamp : codegen ', + re.MULTILINE)) + + def test_override_allowlist(self): + with named_directory() as d, working_directory(d): + _create_inputs(d) + subprocess.check_call([ + self.clangxx(), '-c', '-Os', '-flto=thin', 'main.cpp', '-o', 'main.o' + ]) + subprocess.check_call( + [self.clangxx(), '-c', '-Os', '-flto=thin', 'foo.cpp', '-o', 'foo.o']) + rc = remote_ld.RemoteLinkUnix().main([ + 'remote_ld.py', '--ar-path', + self.llvmar(), '--generate', '--allowlist', '--', + self.clangxx(), '-fuse-ld=lld', '-flto=thin', 'main.o', 'foo.o', '-o', + 'main' + ]) + # Should succeed. + self.assertEqual(rc, 0) + # build.ninja file should have rules for main and foo. + ninjafile = os.path.join(d, 'lto.main', 'build.ninja') + self.assertTrue(os.path.exists(ninjafile)) + with open(ninjafile) as f: + buildrules = f.read() + self.assertIn('build lto.main/main.o.stamp : codegen ', buildrules) + self.assertIn('build lto.main/foo.o.stamp : codegen ', buildrules) + + +if __name__ == '__main__': + unittest.main() diff --git a/clang/scripts/remote_link_test_utils.py b/clang/scripts/remote_link_test_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..7e320d49b13ce82271406911ae83af763feac1c9 --- /dev/null +++ b/clang/scripts/remote_link_test_utils.py @@ -0,0 +1,37 @@ +# Copyright 2020 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +# +# Utility classes for testing remote_link. + +import contextlib +import os +import shutil +import tempfile + + +# tempfile.NamedDirectory is in Python 3.8. This is for compatibility with +# older Python versions. +@contextlib.contextmanager +def named_directory(*args, **kwargs): + name = tempfile.mkdtemp(*args, **kwargs) + try: + yield name + finally: + shutil.rmtree(name) + + +@contextlib.contextmanager +def working_directory(newcwd): + """ + Changes working directory to the specified directory, runs enclosed code, + and changes back to the previous directory. + """ + oldcwd = os.getcwd() + os.chdir(newcwd) + try: + # Use os.getcwd() instead of newcwd so that we have a path that works + # inside the block. + yield os.getcwd() + finally: + os.chdir(oldcwd) diff --git a/clang/scripts/remote_link_unit_tests.py b/clang/scripts/remote_link_unit_tests.py new file mode 100755 index 0000000000000000000000000000000000000000..aac44f0a5d3893081f04206ddae24a7458017cbd --- /dev/null +++ b/clang/scripts/remote_link_unit_tests.py @@ -0,0 +1,244 @@ +#! /usr/bin/env python3 +# Copyright 2020 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +# Unit tests for remote_link. +# +# Usage: +# +# tools/clang/scripts/remote_link_unit_tests.py +# +# A coverage report combining these tests with the integration tests +# in remote_link_integration_tests.py can be generated by running: +# +# env COVERAGE_FILE=.coverage.unit python3 third_party/pycoverage run \ +# tools/clang/scripts/remote_link_unit_tests.py +# env COVERAGE_FILE=.coverage.integration python3 third_party/pycoverage \ +# run tools/clang/scripts/remote_link_integration_tests.py +# python3 third_party/pycoverage combine +# python3 third_party/pycoverage html +# +# The report will be available as htmlcov/index.html + +import remote_ld +import remote_link + +import os +import unittest +from unittest import mock + +from remote_link_test_utils import named_directory, working_directory + + +class FakeFs(object): + """ + Context manager that mocks the functions through which remote_link + interacts with the filesystem. + """ + + def __init__(self, bitcode_files=None, other_files=None): + self.bitcode_files = set(bitcode_files or []) + self.other_files = set(other_files or []) + + def ensure_file(path): + self.other_files.add(path) + + def exists(path): + return path in self.bitcode_files or path in self.other_files + + def is_bitcode_file(path): + return path in self.bitcode_files + + self.mock_ensure_file = mock.patch('remote_link.ensure_file', ensure_file) + self.mock_exists = mock.patch('os.path.exists', exists) + self.mock_is_bitcode_file = mock.patch('remote_link.is_bitcode_file', + is_bitcode_file) + + def __enter__(self): + self.mock_ensure_file.start() + self.mock_exists.start() + self.mock_is_bitcode_file.start() + return self + + def __exit__(self, exnty, *args, **kwargs): + self.mock_is_bitcode_file.stop() + self.mock_exists.stop() + self.mock_ensure_file.stop() + return exnty is None + + +class RemoteLinkUnitTest(unittest.TestCase): + """ + Unit tests for remote_link. + """ + + def test_analyze_expanded_args_nocodegen(self): + with FakeFs(other_files=['foo.o', 'bar.o']): + self.assertIsNone(remote_ld.RemoteLinkUnix().analyze_expanded_args( + ['clang', 'foo.o', 'bar.o', '-o', 'foo'], 'foo', 'clang', 'lto.foo', + 'common', False)) + + def test_analyze_expanded_args_one_codegen(self): + with FakeFs(bitcode_files=['foo.o'], other_files=['bar.o']): + result = remote_ld.RemoteLinkUnix().analyze_expanded_args( + ['clang', 'foo.o', 'bar.o', '-o', 'foo'], 'foo', 'clang', 'lto.foo', + 'common', False) + self.assertIsNotNone(result) + self.assertNotEqual(len(result.codegen), 0) + self.assertEqual(result.codegen[0][1], 'foo.o') + self.assertEqual(len(result.codegen), 1) + self.assertIn('foo.o', result.index_params) + self.assertIn('bar.o', result.index_params) + self.assertIn('bar.o', result.final_params) + # foo.o should not be in final_params because it will be added via + # the used object file. + self.assertNotIn('foo.o', result.final_params) + + def test_analyze_expanded_args_params(self): + with FakeFs(bitcode_files=['foo.o']): + result = remote_ld.RemoteLinkUnix().analyze_expanded_args([ + 'clang', '-O2', '--target=arm-none-eabi', '-march=armv7-a', + '-flto=thin', '-fsplit-lto-unit', '-fwhole-program-vtables', + '-fsanitize=cfi', '-g', '-gsplit-dwarf', '-mllvm', + '-generate-type-units', 'foo.o', '-o', 'foo' + ], 'foo', 'clang', 'lto.foo', 'common', False) + self.assertIsNotNone(result) + self.assertIn('-Wl,-plugin-opt=obj-path=lto.foo/foo.split.o', + result.index_params) + self.assertIn('-O2', result.index_params) + self.assertIn('--target=arm-none-eabi', result.codegen_params) + self.assertIn('-march=armv7-a', result.codegen_params) + self.assertIn('-g', result.index_params) + self.assertIn('-gsplit-dwarf', result.index_params) + self.assertIn('-mllvm -generate-type-units', + ' '.join(result.index_params)) + self.assertIn('-flto=thin', result.index_params) + self.assertIn('-fwhole-program-vtables', result.index_params) + self.assertIn('-fsanitize=cfi', result.index_params) + + self.assertIn('-O2', result.codegen_params) + self.assertIn('--target=arm-none-eabi', result.codegen_params) + self.assertIn('-march=armv7-a', result.codegen_params) + self.assertIn('-gsplit-dwarf', result.codegen_params) + self.assertIn('-mllvm -generate-type-units', + ' '.join(result.codegen_params)) + self.assertNotIn('-flto=thin', result.codegen_params) + self.assertNotIn('-fwhole-program-vtables', result.codegen_params) + self.assertNotIn('-fsanitize=cfi', result.codegen_params) + + self.assertIn('-flto=thin', result.final_params) + + def test_codegen_params_default(self): + with FakeFs(bitcode_files=['foo.o'], other_files=['bar.o']): + result = remote_ld.RemoteLinkUnix().analyze_expanded_args( + ['clang', 'foo.o', 'bar.o', '-o', 'foo'], 'foo', 'clang', 'lto.foo', + 'common', False) + # Codegen optimization level should default to 2. + self.assertIn('-O2', result.codegen_params) + # -fdata-sections and -ffunction-sections default to on to match the + # behavior of local linking. + self.assertIn('-fdata-sections', result.codegen_params) + self.assertIn('-ffunction-sections', result.codegen_params) + + def test_codegen_params_default_cl(self): + with FakeFs(bitcode_files=['foo.obj'], other_files=['bar.obj']): + result = remote_link.RemoteLinkWindows().analyze_expanded_args( + ['clang-cl', 'foo.obj', 'bar.obj', '-Fefoo.exe'], 'foo.exe', + 'clang-cl', 'lto.foo', 'common', False) + # Codegen optimization level should default to 2. + self.assertIn('-O2', result.codegen_params) + # -Gw and -Gy default to on to match the behavior of local linking. + self.assertIn('-Gw', result.codegen_params) + self.assertIn('-Gy', result.codegen_params) + + def test_codegen_params_no_data_sections(self): + with FakeFs(bitcode_files=['foo.o'], other_files=['bar.o']): + result = remote_ld.RemoteLinkUnix().analyze_expanded_args( + ['clang', '-fno-data-sections', 'foo.o', 'bar.o', '-o', 'foo'], 'foo', + 'clang', 'lto.foo', 'common', False) + self.assertNotIn('-fdata-sections', result.codegen_params) + self.assertIn('-ffunction-sections', result.codegen_params) + + def test_codegen_params_no_function_sections(self): + with FakeFs(bitcode_files=['foo.o'], other_files=['bar.o']): + result = remote_ld.RemoteLinkUnix().analyze_expanded_args( + ['clang', '-fno-function-sections', 'foo.o', 'bar.o', '-o', 'foo'], + 'foo', 'clang', 'lto.foo', 'common', False) + self.assertIn('-fdata-sections', result.codegen_params) + self.assertNotIn('-ffunction-sections', result.codegen_params) + + def test_codegen_params_no_data_sections_cl(self): + with FakeFs(bitcode_files=['foo.obj'], other_files=['bar.obj']): + result = remote_link.RemoteLinkWindows().analyze_expanded_args( + ['clang-cl', '/Gw-', 'foo.obj', 'bar.obj', '/Fefoo.exe'], 'foo.exe', + 'clang-cl', 'lto.foo', 'common', False) + self.assertNotIn('-fdata-sections', result.codegen_params) + self.assertNotIn('-Gw', result.codegen_params) + self.assertNotIn('/Gw', result.codegen_params) + self.assertIn('-Gy', result.codegen_params) + + def test_codegen_params_no_function_sections_cl(self): + with FakeFs(bitcode_files=['foo.obj'], other_files=['bar.obj']): + result = remote_link.RemoteLinkWindows().analyze_expanded_args( + ['clang-cl', '/Gy-', 'foo.obj', 'bar.obj', '/Fefoo.exe'], 'foo.exe', + 'clang-cl', 'lto.foo', 'common', False) + self.assertIn('-Gw', result.codegen_params) + self.assertNotIn('-ffunction-sections', result.codegen_params) + self.assertNotIn('-Gy', result.codegen_params) + self.assertNotIn('/Gy', result.codegen_params) + + def test_codegen_params_explicit_data_and_function_sections(self): + with FakeFs(bitcode_files=['foo.o'], other_files=['bar.o']): + result = remote_ld.RemoteLinkUnix().analyze_expanded_args([ + 'clang', '-ffunction-sections', '-fdata-sections', 'foo.o', 'bar.o', + '-o', 'foo' + ], 'foo', 'clang', 'lto.foo', 'common', False) + self.assertIn('-fdata-sections', result.codegen_params) + self.assertIn('-ffunction-sections', result.codegen_params) + + def test_codegen_params_explicit_data_and_function_sections_cl(self): + with FakeFs(bitcode_files=['foo.obj'], other_files=['bar.obj']): + result = remote_link.RemoteLinkWindows().analyze_expanded_args( + ['clang-cl', '/Gy', '-Gw', 'foo.obj', 'bar.obj', '/Fefoo.exe'], + 'foo.exe', 'clang-cl', 'lto.foo', 'common', False) + self.assertIn('-Gw', result.codegen_params) + self.assertIn('/Gy', result.codegen_params) + self.assertNotIn('-fdata-sections', result.codegen_params) + self.assertNotIn('-ffunction-sections', result.codegen_params) + + def test_ensure_file_no_dir(self): + with named_directory() as d, working_directory(d): + self.assertFalse(os.path.exists('test')) + remote_link.ensure_file('test') + self.assertTrue(os.path.exists('test')) + + def test_ensure_file_existing(self): + with named_directory() as d, working_directory(d): + self.assertFalse(os.path.exists('foo/test')) + remote_link.ensure_file('foo/test') + self.assertTrue(os.path.exists('foo/test')) + os.utime('foo/test', (0, 0)) + statresult = os.stat('foo/test') + remote_link.ensure_file('foo/test') + self.assertTrue(os.path.exists('foo/test')) + newstatresult = os.stat('foo/test') + self.assertEqual(newstatresult.st_mtime, statresult.st_mtime) + + def test_ensure_file_error(self): + with named_directory() as d, working_directory(d): + self.assertFalse(os.path.exists('test')) + remote_link.ensure_file('test') + self.assertTrue(os.path.exists('test')) + self.assertRaises(OSError, remote_link.ensure_file, 'test/impossible') + + def test_transform_codegen_param_on_mllvm(self): + # Regression test for crbug.com/1135234 + link = remote_ld.RemoteLinkUnix() + self.assertEqual( + link.transform_codegen_param_common('-mllvm,-import-instr-limit=20'), + ['-mllvm', '-import-instr-limit=20']) + + +if __name__ == '__main__': + unittest.main() diff --git a/clang/scripts/run_tool.py b/clang/scripts/run_tool.py new file mode 100755 index 0000000000000000000000000000000000000000..cff63849528cf1473a0255547c407278e3900fd5 --- /dev/null +++ b/clang/scripts/run_tool.py @@ -0,0 +1,418 @@ +#!/usr/bin/env vpython3 +# Copyright 2013 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +"""Wrapper script to help run clang tools across Chromium code. + +How to use run_tool.py: +If you want to run a clang tool across all Chromium code: +run_tool.py + +If you want to include all files mentioned in the compilation database +(this will also include generated files, unlike the previous command): +run_tool.py --all + +If you want to run the clang tool across only chrome/browser and +content/browser: +run_tool.py chrome/browser content/browser + +Please see docs/clang_tool_refactoring.md for more information, which documents +the entire automated refactoring flow in Chromium. + +Why use run_tool.py (instead of running a clang tool directly): +The clang tool implementation doesn't take advantage of multiple cores, and if +it fails mysteriously in the middle, all the generated replacements will be +lost. Additionally, if the work is simply sharded across multiple cores by +running multiple RefactoringTools, problems arise when they attempt to rewrite a +file at the same time. + +run_tool.py will +1) run multiple instances of clang tool in parallel +2) gather stdout from clang tool invocations +3) "atomically" forward #2 to stdout + +Output of run_tool.py can be piped into extract_edits.py and then into +apply_edits.py. These tools will extract individual edits and apply them to the +source files. These tools assume the clang tool emits the edits in the +following format: + ... + ==== BEGIN EDITS ==== + r:::::::::::: + r:::::::::::: + ...etc... + ==== END EDITS ==== + ... + +extract_edits.py extracts only lines between BEGIN/END EDITS markers +apply_edits.py reads edit lines from stdin and applies the edits +""" + +from __future__ import print_function + +import argparse +from collections import namedtuple +import functools +import json +import multiprocessing +import os +import os.path +import re +import subprocess +import shlex +import sys + +script_dir = os.path.dirname(os.path.realpath(__file__)) +tool_dir = os.path.abspath(os.path.join(script_dir, '../pylib')) +sys.path.insert(0, tool_dir) + +from clang import compile_db + + +CompDBEntry = namedtuple('CompDBEntry', ['directory', 'filename', 'command']) + +def _PruneGitFiles(git_files, paths): + """Prunes the list of files from git to include only those that are either in + |paths| or start with one item in |paths|. + + Args: + git_files: List of all repository files. + paths: Prefix filter for the returned paths. May contain multiple entries, + and the contents should be absolute paths. + + Returns: + Pruned list of files. + """ + if not git_files: + return [] + git_files.sort() + pruned_list = [] + git_index = 0 + for path in sorted(paths): + least = git_index + most = len(git_files) - 1 + while least <= most: + middle = int((least + most) / 2) + if git_files[middle] == path: + least = middle + break + elif git_files[middle] > path: + most = middle - 1 + else: + least = middle + 1 + while least < len(git_files) and git_files[least].startswith(path): + pruned_list.append(git_files[least]) + least += 1 + git_index = least + + return pruned_list + + +def _GetFilesFromGit(paths=None): + """Gets the list of files in the git repository if |paths| includes prefix + path filters or is empty. All complete filenames in |paths| are also included + in the output. + + Args: + paths: Prefix filter for the returned paths. May contain multiple entries. + """ + partial_paths = [] + files = [] + for p in paths: + real_path = os.path.realpath(p) + if os.path.isfile(real_path): + files.append(real_path) + else: + partial_paths.append(real_path) + if partial_paths or not files: + args = [] + if sys.platform == 'win32': + args.append('git.bat') + else: + args.append('git') + args.append('ls-files') + command = subprocess.Popen(args, stdout=subprocess.PIPE) + output, _ = command.communicate() + output = output.decode('utf-8') + git_files = [os.path.realpath(p) for p in output.splitlines()] + if partial_paths: + git_files = _PruneGitFiles(git_files, partial_paths) + files.extend(git_files) + return files + + +def _GetEntriesFromCompileDB(build_directory, source_filenames): + """ Gets the list of files and args mentioned in the compilation database. + + Args: + build_directory: Directory that contains the compile database. + source_filenames: If not None, only include entries for the given list of + filenames. + """ + + filenames_set = None if source_filenames is None else set(source_filenames) + entries = compile_db.Read(build_directory) + return [ + CompDBEntry(entry['directory'], entry['file'], entry['command']) + for entry in entries if filenames_set is None or os.path.realpath( + os.path.join(entry['directory'], entry['file'])) in filenames_set + ] + + +def _UpdateCompileCommandsIfNeeded(compile_commands, files_list, + target_os=None): + """ Filters compile database to only include required files, and makes it + more clang-tool friendly on Windows. + + Args: + compile_commands: List of the contents of compile database. + files_list: List of required files for processing. Can be None to specify + no filtering. + Returns: + List of the contents of the compile database after processing. + """ + if sys.platform == 'win32' and files_list: + relative_paths = set([os.path.relpath(f) for f in files_list]) + filtered_compile_commands = [] + for entry in compile_commands: + file_path = os.path.relpath( + os.path.join(entry['directory'], entry['file'])) + if file_path in relative_paths: + filtered_compile_commands.append(entry) + else: + filtered_compile_commands = compile_commands + + return compile_db.ProcessCompileDatabase(filtered_compile_commands, [], + target_os) + + +def _ExecuteTool(toolname, tool_args, build_directory, compdb_entry): + """Executes the clang tool. + + This is defined outside the class so it can be pickled for the multiprocessing + module. + + Args: + toolname: Name of the clang tool to execute. + tool_args: Arguments to be passed to the clang tool. Can be None. + build_directory: Directory that contains the compile database. + compdb_entry: The file and args to run the clang tool over. + + Returns: + A dictionary that must contain the key "status" and a boolean value + associated with it. + + If status is True, then the generated output is stored with the key + "stdout_text" in the dictionary. + + Otherwise, the filename and the output from stderr are associated with the + keys "filename" and "stderr_text" respectively. + """ + + args = [toolname, compdb_entry.filename] + if (tool_args): + args.extend(tool_args) + + args.append('--') + args.extend([ + a for a in shlex.split(compdb_entry.command, + posix=(sys.platform != 'win32')) + # 'command' contains the full command line, including the input + # source file itself. We need to filter it out otherwise it's + # passed to the tool twice - once directly and once via + # the compile args. + if a != compdb_entry.filename + # /showIncludes is used by Ninja to track header file dependencies on + # Windows. We don't need to do this here, and it results in lots of spam + # and a massive log file, so we strip it. + and a != '/showIncludes' and a != '/showIncludes:user' + # -MMD has the same purpose on non-Windows. It may have a corresponding + # '-MF ', which we strip below. + and a != '-MMD' + ]) + + for i, arg in enumerate(args): + if arg == '-MF': + del args[i:i+2] + break + + # shlex.split escapes double quotes in non-Posix mode, so we need to strip + # them back. + if sys.platform == 'win32': + args = [a.replace('\\"', '"') for a in args] + command = subprocess.Popen( + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=build_directory) + stdout_text, stderr_text = command.communicate() + stdout_text = stdout_text.decode('utf-8') + stderr_text = stderr_text.decode('utf-8') + stderr_text = re.sub( + r"^warning: .*'linker' input unused \[-Wunused-command-line-argument\]\n", + "", stderr_text, flags=re.MULTILINE) + + if command.returncode != 0: + return { + 'status': False, + 'filename': compdb_entry.filename, + 'stderr_text': stderr_text, + } + else: + return { + 'status': True, + 'filename': compdb_entry.filename, + 'stdout_text': stdout_text, + 'stderr_text': stderr_text, + } + + +class _CompilerDispatcher(object): + """Multiprocessing controller for running clang tools in parallel.""" + + def __init__(self, toolname, tool_args, build_directory, compdb_entries): + """Initializer method. + + Args: + toolname: Path to the tool to execute. + tool_args: Arguments to be passed to the tool. Can be None. + build_directory: Directory that contains the compile database. + compdb_entries: The files and args to run the tool over. + """ + self.__toolname = toolname + self.__tool_args = tool_args + self.__build_directory = build_directory + self.__compdb_entries = compdb_entries + self.__success_count = 0 + self.__failed_count = 0 + + @property + def failed_count(self): + return self.__failed_count + + def Run(self): + """Does the grunt work.""" + pool = multiprocessing.Pool() + result_iterator = pool.imap_unordered( + functools.partial(_ExecuteTool, self.__toolname, self.__tool_args, + self.__build_directory), + self.__compdb_entries) + for result in result_iterator: + self.__ProcessResult(result) + sys.stderr.write('\n') + + def __ProcessResult(self, result): + """Handles result processing. + + Args: + result: The result dictionary returned by _ExecuteTool. + """ + if result['status']: + self.__success_count += 1 + sys.stdout.write(result['stdout_text']) + sys.stderr.write(result['stderr_text']) + else: + self.__failed_count += 1 + sys.stderr.write('\nFailed to process %s\n' % result['filename']) + sys.stderr.write(result['stderr_text']) + sys.stderr.write('\n') + done_count = self.__success_count + self.__failed_count + percentage = (float(done_count) / len(self.__compdb_entries)) * 100 + # Only output progress for every 100th entry, to make log files easier to + # inspect. + if done_count % 100 == 0 or done_count == len(self.__compdb_entries): + sys.stderr.write( + 'Processed %d files with %s tool (%d failures) [%.2f%%]\r' % + (done_count, self.__toolname, self.__failed_count, percentage)) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + '--options-file', + help='optional file to read options from') + args, argv = parser.parse_known_args() + if args.options_file: + argv = open(args.options_file).read().split() + + parser.add_argument('--tool', required=True, help='clang tool to run') + parser.add_argument('--all', action='store_true') + parser.add_argument( + '--generate-compdb', + action='store_true', + help='regenerate the compile database before running the tool') + parser.add_argument( + '--shard', + metavar='-of-') + parser.add_argument( + '-p', + required=True, + help='path to the directory that contains the compile database') + parser.add_argument( + '--target_os', + choices=['android', 'chromeos', 'ios', 'linux', 'nacl', 'mac', 'win'], + help='Target OS - see `gn help target_os`. Set to "win" when ' + + 'cross-compiling Windows from Linux or another host') + parser.add_argument( + 'path_filter', + nargs='*', + help='optional paths to filter what files the tool is run on') + parser.add_argument( + '--tool-arg', nargs='?', action='append', + help='optional arguments passed to the tool') + parser.add_argument( + '--tool-path', nargs='?', + help='optional path to the tool directory') + args = parser.parse_args(argv) + + if args.tool_path: + tool_path = os.path.abspath(args.tool_path) + else: + tool_path = os.path.abspath(os.path.join( + os.path.dirname(__file__), + '../../../third_party/llvm-build/Release+Asserts/bin')) + if not os.path.exists(tool_path): + sys.stderr.write('tool not found: %s\n' % tool_path) + return -1 + + if args.all: + # Reading source files is postponed to after possible regeneration of + # compile_commands.json. + source_filenames = None + else: + git_filenames = set(_GetFilesFromGit(args.path_filter)) + # Filter out files that aren't C/C++/Obj-C/Obj-C++. + extensions = frozenset(('.c', '.cc', '.cpp', '.m', '.mm')) + source_filenames = [ + f for f in git_filenames if os.path.splitext(f)[1] in extensions + ] + + if args.generate_compdb: + compile_commands = compile_db.GenerateWithNinja(args.p) + compile_commands = _UpdateCompileCommandsIfNeeded(compile_commands, + source_filenames, + args.target_os) + with open(os.path.join(args.p, 'compile_commands.json'), 'w') as f: + f.write(json.dumps(compile_commands, indent=2)) + + compdb_entries = set(_GetEntriesFromCompileDB(args.p, source_filenames)) + + if args.shard: + total_length = len(compdb_entries) + match = re.match(r'(\d+)-of-(\d+)$', args.shard) + # Input is 1-based, but modular arithmetic is 0-based. + shard_number = int(match.group(1)) - 1 + shard_count = int(match.group(2)) + compdb_entries = [ + f for i, f in enumerate(sorted(compdb_entries)) + if i % shard_count == shard_number + ] + print('Shard %d-of-%d will process %d entries out of %d' % + (shard_number, shard_count, len(compdb_entries), total_length)) + + dispatcher = _CompilerDispatcher(os.path.join(tool_path, args.tool), + args.tool_arg, + args.p, + compdb_entries) + dispatcher.Run() + return -dispatcher.failed_count + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/clang/scripts/sync_deps.py b/clang/scripts/sync_deps.py new file mode 100755 index 0000000000000000000000000000000000000000..9298018cae45ab31625cc3097b3cb3b1b2025db0 --- /dev/null +++ b/clang/scripts/sync_deps.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python3 +# Copyright 2024 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +"""This script syncs the Clang and Rust revisions defined in update.py +and update_rust.py with the deps entries in DEPS.""" + +import argparse +import hashlib +import re +import os +import subprocess +import sys +import tempfile + +from update import DownloadUrl, CDS_URL, CLANG_REVISION, CLANG_SUB_REVISION + +sys.path.append( + os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..', + 'rust')) +from update_rust import RUST_REVISION, RUST_SUB_REVISION + +CLANG_PLATFORM_TO_PACKAGE_FILES = { + 'Linux_x64': [ + 'clang', + 'clang-tidy', + 'clangd', + 'llvm-code-coverage', + 'llvmobjdump', + ], + 'Mac': [ + 'clang', + 'clang-mac-runtime-library', + 'clang-tidy', + 'clangd', + 'llvm-code-coverage', + ], + 'Mac_arm64': [ + 'clang', + 'clang-tidy', + 'clangd', + 'llvm-code-coverage', + ], + 'Win': [ + 'clang', + 'clang-tidy', + 'clang-win-runtime-library', + 'clangd', + 'llvm-code-coverage', + 'llvmobjdump', + ], +} + + +def GetDepsObjectInfo(object_name: str) -> str: + url = f'{CDS_URL}/{object_name}' + describe_url = f'gs://chromium-browser-clang/{object_name}' + output = subprocess.check_output(['gsutil.py', 'stat', + describe_url]).decode("utf-8") + # Output looks like: + # `` + # gs://bucket/path: + # Creation time: Wed, 15 May 2024 13:36:30 GMT + # Update time: Wed, 15 May 2024 13:36:30 GMT + # Storage class: STANDARD + # Cache-Control: public, max-age=31536000,no-transform + # Content-Encoding: gzip + # Content-Length: 5766650 + # Content-Type: application/octet-stream + # Hash (crc32c): E8z0Sg== + # Hash (md5): E/XAhJhhpd5+08cdO17CFA== + # ETag: COvj8aXjj4YDEAE= + # Generation: 1715780189975019 + # Metageneration: 1 + generation = re.search('Generation:\s+([0-9]+)', output).group(1) + size_bytes = re.search('Content-Length:\s+([0-9]+)', output).group(1) + with tempfile.NamedTemporaryFile() as f: + DownloadUrl(url, f) + f.seek(0) + sha256sum = hashlib.file_digest(f, 'sha256').hexdigest() + + return f'{object_name},{sha256sum},{size_bytes},{generation}' + + +def GetRustObjectNames() -> list: + object_names = [] + for host_os in ['Linux_x64', 'Mac', 'Mac_arm64', 'Win']: + rust_version = (f'{RUST_REVISION}-{RUST_SUB_REVISION}') + clang_revision = CLANG_REVISION + object_name = f'{host_os}/rust-toolchain-{rust_version}-{clang_revision}' + object_names.append(f'{object_name}.tar.xz') + return object_names + + +def GetClangObjectNames() -> list: + object_names = [] + clang_version = f'{CLANG_REVISION}-{CLANG_SUB_REVISION}' + for platform, package_file_list in CLANG_PLATFORM_TO_PACKAGE_FILES.items(): + for package_file in package_file_list: + object_names.append(f'{platform}/{package_file}-{clang_version}.tar.xz') + + return object_names + + +def main(): + setdep_revisions = [] + + rust_object_infos = [ + GetDepsObjectInfo(o) for o in sorted(GetRustObjectNames()) + ] + rust_object_infos_string = '?'.join(rust_object_infos) + rust_deps_entry_path = 'src/third_party/rust-toolchain' + setdep_revisions.append( + f'--revision={rust_deps_entry_path}@{rust_object_infos_string}') + + clang_object_infos = [ + GetDepsObjectInfo(o) for o in sorted(GetClangObjectNames()) + ] + clang_object_infos_string = '?'.join(clang_object_infos) + clang_deps_entry_path = 'src/third_party/llvm-build/Release+Asserts' + setdep_revisions.append( + f'--revision={clang_deps_entry_path}@{clang_object_infos_string}') + + subprocess.run(['gclient', 'setdep'] + setdep_revisions) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/clang/scripts/test_tool.py b/clang/scripts/test_tool.py new file mode 100755 index 0000000000000000000000000000000000000000..76e34ba2e222c74809a711436ebf4cbdc45b3c8a --- /dev/null +++ b/clang/scripts/test_tool.py @@ -0,0 +1,292 @@ +#!/usr/bin/env vpython3 +# Copyright 2013 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""Test harness for chromium clang tools.""" + +from __future__ import print_function + +import argparse +import difflib +import glob +import json +import os +import os.path +import re +import shutil +import subprocess +import sys + + +def _RunGit(args): + if sys.platform == 'win32': + args = ['git.bat'] + args + else: + args = ['git'] + args + subprocess.check_call(args) + + +def _GenerateCompileCommands(files, include_paths): + """Returns a JSON string containing a compilation database for the input.""" + # Note: in theory, backslashes in the compile DB should work but the tools + # that write compile DBs and the tools that read them don't agree on the + # escaping convention: https://llvm.org/bugs/show_bug.cgi?id=19687 + files = [f.replace('\\', '/') for f in files] + include_path_flags = ' '.join('-I %s' % include_path.replace('\\', '/') + for include_path in include_paths) + return json.dumps([{ + 'directory': + os.path.dirname(f), + 'command': + 'clang++ -std=c++20 -fsyntax-only %s %s' % + (include_path_flags, os.path.basename(f)), + 'file': + os.path.basename(f) + } for f in files], + indent=2) + + +def _NumberOfTestsToString(tests): + """Returns an English describing the number of tests.""" + return '%d test%s' % (tests, 's' if tests != 1 else '') + + +def _ApplyTool(tools_clang_scripts_directory, tool_to_test, tool_path, + tool_args, test_directory_for_tool, actual_files, apply_edits, + extract_edits_path): + try: + # Stage the test files in the git index. If they aren't staged, then + # run_tool.py will skip them when applying replacements. + args = ['add'] + args.extend(actual_files) + _RunGit(args) + + # Launch the following pipeline if |apply_edits| is True: + # run_tool.py ... | extract_edits.py | apply_edits.py ... + # Otherwise just the first step is done and the result is written to + # actual_files[0]. + processes = [] + args = ['python', + os.path.join(tools_clang_scripts_directory, 'run_tool.py')] + extra_run_tool_args_path = os.path.join(test_directory_for_tool, + 'run_tool.args') + if os.path.exists(extra_run_tool_args_path): + with open(extra_run_tool_args_path, 'r') as extra_run_tool_args_file: + extra_run_tool_args = extra_run_tool_args_file.readlines() + args.extend([arg.strip() for arg in extra_run_tool_args]) + args.extend(['--tool', tool_to_test, '-p', test_directory_for_tool]) + + if tool_path: + args.extend(['--tool-path', tool_path]) + if tool_args: + for arg in tool_args: + args.append('--tool-arg=%s' % arg) + + args.extend(actual_files) + processes.append(subprocess.Popen(args, stdout=subprocess.PIPE)) + + if apply_edits: + if not extract_edits_path: + args = [ + 'python', + os.path.join(tools_clang_scripts_directory, 'extract_edits.py') + ] + processes.append( + subprocess.Popen(args, + stdin=processes[-1].stdout, + stdout=subprocess.PIPE)) + else: + args = ['python', os.path.join(extract_edits_path, 'extract_edits.py')] + processes.append( + subprocess.Popen(args, + stdin=processes[-1].stdout, + stdout=subprocess.PIPE)) + + args = [ + 'python', + os.path.join(tools_clang_scripts_directory, 'apply_edits.py'), '-p', + test_directory_for_tool + ] + args.extend(actual_files) # Limit edits to the test files. + processes.append(subprocess.Popen( + args, stdin=processes[-1].stdout, stdout=subprocess.PIPE)) + + # Wait for the pipeline to finish running + check exit codes. + stdout, _ = processes[-1].communicate() + for process in processes: + process.wait() + if process.returncode != 0: + print('Failure while running the tool.') + return process.returncode + + if apply_edits: + # Reformat the resulting edits via: git cl format. + args = ['cl', 'format'] + args.extend(actual_files) + _RunGit(args) + else: + with open(actual_files[0], 'w') as output_file: + output_file.write(stdout.decode('utf-8')) + + return 0 + + finally: + # No matter what, unstage the git changes we made earlier to avoid polluting + # the index. + args = ['reset', '--quiet', 'HEAD'] + args.extend(actual_files) + _RunGit(args) + + +def _NormalizePathInRawOutput(path, test_dir): + if not os.path.isabs(path): + path = os.path.join(test_dir, path) + + return os.path.relpath(path, test_dir) + + +def _NormalizeSingleRawOutputLine(output_line, test_dir): + if not re.match('^[^:]+(:::.*){4,4}$', output_line): + return output_line + + edit_type, path, offset, length, replacement = output_line.split(':::', 4) + path = _NormalizePathInRawOutput(path, test_dir) + return "%s:::%s:::%s:::%s:::%s" % (edit_type, path, offset, length, + replacement) + + +def _NormalizeRawOutput(output_lines, test_dir): + return list( + map(lambda line: _NormalizeSingleRawOutputLine(line, test_dir), + output_lines)) + + +def main(argv): + parser = argparse.ArgumentParser() + parser.add_argument( + '--apply-edits', + action='store_true', + help='Applies the edits to the original test files and compares the ' + 'reformatted new files with the expected files.') + parser.add_argument( + '--tool-arg', nargs='?', action='append', + help='optional arguments passed to the tool') + parser.add_argument( + '--tool-path', nargs='?', + help='optional path to the tool directory') + parser.add_argument('tool_name', + nargs=1, + help='Clang tool to be tested.') + parser.add_argument( + '--test-filter', default='*', help='optional glob filter for test names') + parser.add_argument('--extract-edits-path', + nargs='?', + help='optional path to the extract_edits script\ + [e.g. if custom filtering or post-processing of edits is needed]') + args = parser.parse_args(argv) + tool_to_test = args.tool_name[0] + print('\nTesting %s\n' % tool_to_test) + tools_clang_scripts_directory = os.path.dirname(os.path.realpath(__file__)) + tools_clang_directory = os.path.dirname(tools_clang_scripts_directory) + test_directory_for_tool = os.path.join( + tools_clang_directory, tool_to_test, 'tests') + compile_database = os.path.join(test_directory_for_tool, + 'compile_commands.json') + source_files = glob.glob( + os.path.join(test_directory_for_tool, + '%s-original.cc' % args.test_filter)) + ext = 'cc' if args.apply_edits else 'txt' + actual_files = ['-'.join([source_file.rsplit('-', 1)[0], 'actual.cc']) + for source_file in source_files] + expected_files = ['-'.join([source_file.rsplit('-', 1)[0], 'expected.' + ext]) + for source_file in source_files] + if not args.apply_edits and len(actual_files) != 1: + print('Only one test file is expected for testing without apply-edits.') + return 1 + + include_paths = [] + include_paths.append( + os.path.realpath(os.path.join(tools_clang_directory, '../..'))) + # Many gtest and gmock headers expect to have testing/gtest/include and/or + # testing/gmock/include in the include search path. + include_paths.append( + os.path.realpath(os.path.join(tools_clang_directory, + '../..', + 'testing/gtest/include'))) + include_paths.append( + os.path.realpath(os.path.join(tools_clang_directory, + '../..', + 'testing/gmock/include'))) + + include_paths.append( + os.path.realpath( + os.path.join(tools_clang_directory, '../..', + 'third_party/googletest/src/googletest/include'))) + + include_paths.append( + os.path.realpath( + os.path.join(tools_clang_directory, '../..', + 'third_party/googletest/src/googlemock/include'))) + + if len(actual_files) == 0: + print('Tool "%s" does not have compatible test files.' % tool_to_test) + return 1 + + # Set up the test environment. + for source, actual in zip(source_files, actual_files): + shutil.copyfile(source, actual) + # Generate a temporary compilation database to run the tool over. + with open(compile_database, 'w') as f: + f.write(_GenerateCompileCommands(actual_files, include_paths)) + + # Run the tool. + os.chdir(test_directory_for_tool) + exitcode = _ApplyTool(tools_clang_scripts_directory, tool_to_test, + args.tool_path, args.tool_arg, test_directory_for_tool, + actual_files, args.apply_edits, args.extract_edits_path) + if (exitcode != 0): + return exitcode + + # Compare actual-vs-expected results. + passed = 0 + failed = 0 + for expected, actual in zip(expected_files, actual_files): + print('[ RUN ] %s' % os.path.relpath(actual)) + expected_output = actual_output = None + with open(expected, 'r') as f: + expected_output = f.readlines() + with open(actual, 'r') as f: + actual_output = f.readlines() + if not args.apply_edits: + actual_output = _NormalizeRawOutput(actual_output, + test_directory_for_tool) + expected_output = _NormalizeRawOutput(expected_output, + test_directory_for_tool) + if actual_output != expected_output: + failed += 1 + lines = difflib.unified_diff(expected_output, actual_output, + fromfile=os.path.relpath(expected), + tofile=os.path.relpath(actual)) + sys.stdout.writelines(lines) + print('[ FAILED ] %s' % os.path.relpath(actual)) + # Don't clean up the file on failure, so the results can be referenced + # more easily. + continue + print('[ OK ] %s' % os.path.relpath(actual)) + passed += 1 + os.remove(actual) + + if failed == 0: + os.remove(compile_database) + + print('[==========] %s ran.' % _NumberOfTestsToString(len(source_files))) + if passed > 0: + print('[ PASSED ] %s.' % _NumberOfTestsToString(passed)) + if failed > 0: + print('[ FAILED ] %s.' % _NumberOfTestsToString(failed)) + return 1 + + +if __name__ == '__main__': + sys.exit(main(sys.argv[1:])) diff --git a/clang/scripts/update.py b/clang/scripts/update.py new file mode 100755 index 0000000000000000000000000000000000000000..f7e967557a15b0986dda0096a28470d81f2246aa --- /dev/null +++ b/clang/scripts/update.py @@ -0,0 +1,378 @@ +#!/usr/bin/env python3 +# Copyright 2012 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""This script is used to download prebuilt clang binaries. It runs as a +"gclient hook" in Chromium checkouts. + +It can also be run stand-alone as a convenient way of installing a well-tested +near-tip-of-tree clang version: + + $ curl -s https://raw.githubusercontent.com/chromium/chromium/main/tools/clang/scripts/update.py | python3 - --output-dir=/tmp/clang + +(Note that the output dir may be deleted and re-created if it exists.) +""" + +import sys +assert sys.version_info >= (3, 0), 'This script requires Python 3.' + +import argparse +import glob +import os +import platform +import shutil +import stat +import tarfile +import tempfile +import time +import urllib.request +import urllib.error +import zipfile +import zlib + + +# Do NOT CHANGE this if you don't know what you're doing -- see +# https://chromium.googlesource.com/chromium/src/+/main/docs/updating_clang.md +# Reverting problematic clang rolls is safe, though. +# This is the output of `git describe` and is usable as a commit-ish. +CLANG_REVISION = 'llvmorg-20-init-9764-gb81d8e90' +CLANG_SUB_REVISION = 6 + +PACKAGE_VERSION = '%s-%s' % (CLANG_REVISION, CLANG_SUB_REVISION) +RELEASE_VERSION = '20' + +CDS_URL = os.environ.get('CDS_CLANG_BUCKET_OVERRIDE', + 'https://commondatastorage.googleapis.com/chromium-browser-clang') + +# Path constants. (All of these should be absolute paths.) +THIS_DIR = os.path.abspath(os.path.dirname(__file__)) +CHROMIUM_DIR = os.path.abspath(os.path.join(THIS_DIR, '..', '..', '..')) +LLVM_BUILD_DIR = os.path.join(CHROMIUM_DIR, 'third_party', 'llvm-build', + 'Release+Asserts') + +STAMP_FILENAME = 'cr_build_revision' +STAMP_FILE = os.path.normpath(os.path.join(LLVM_BUILD_DIR, STAMP_FILENAME)) +OLD_STAMP_FILE = os.path.normpath( + os.path.join(LLVM_BUILD_DIR, '..', STAMP_FILENAME)) +FORCE_HEAD_REVISION_FILE = os.path.normpath( + os.path.join(LLVM_BUILD_DIR, '..', 'force_head_revision')) + + +def RmTree(dir): + """Delete dir.""" + if sys.platform == 'win32': + # Avoid problems with paths longer than MAX_PATH + # https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation + dir = f'\\\\?\\{dir}' + + def ChmodAndRetry(func, path, _): + # Subversion can leave read-only files around. + if not os.access(path, os.W_OK): + os.chmod(path, stat.S_IWUSR) + return func(path) + raise + shutil.rmtree(dir, onerror=ChmodAndRetry) + + +def ReadStampFile(path): + """Return the contents of the stamp file, or '' if it doesn't exist.""" + try: + with open(path, 'r') as f: + return f.read().rstrip() + except IOError: + return '' + + +def WriteStampFile(s, path): + """Write s to the stamp file.""" + EnsureDirExists(os.path.dirname(path)) + with open(path, 'w') as f: + f.write(s) + f.write('\n') + + +def DownloadUrl(url, output_file): + """Download url into output_file.""" + CHUNK_SIZE = 4096 + TOTAL_DOTS = 10 + num_retries = 3 + retry_wait_s = 5 # Doubled at each retry. + + while True: + try: + sys.stdout.write(f'Downloading {url} ') + sys.stdout.flush() + request = urllib.request.Request(url) + request.add_header('Accept-Encoding', 'gzip') + response = urllib.request.urlopen(request) + total_size = None + if 'Content-Length' in response.headers: + total_size = int(response.headers['Content-Length'].strip()) + + is_gzipped = response.headers.get('Content-Encoding', + '').strip() == 'gzip' + if is_gzipped: + gzip_decode = zlib.decompressobj(zlib.MAX_WBITS + 16) + + bytes_done = 0 + dots_printed = 0 + while True: + chunk = response.read(CHUNK_SIZE) + if not chunk: + break + bytes_done += len(chunk) + + if is_gzipped: + chunk = gzip_decode.decompress(chunk) + output_file.write(chunk) + + if total_size is not None: + num_dots = TOTAL_DOTS * bytes_done // total_size + sys.stdout.write('.' * (num_dots - dots_printed)) + sys.stdout.flush() + dots_printed = num_dots + if total_size is not None and bytes_done != total_size: + raise urllib.error.URLError( + f'only got {bytes_done} of {total_size} bytes') + if is_gzipped: + output_file.write(gzip_decode.flush()) + print(' Done.') + return + except (ConnectionError, urllib.error.URLError) as e: + sys.stdout.write('\n') + print(e) + if num_retries == 0 or isinstance( + e, urllib.error.HTTPError) and e.code == 404: + raise e + num_retries -= 1 + output_file.seek(0) + output_file.truncate() + print(f'Retrying in {retry_wait_s} s ...') + sys.stdout.flush() + time.sleep(retry_wait_s) + retry_wait_s *= 2 + + +def EnsureDirExists(path): + if not os.path.exists(path): + os.makedirs(path) + + +def DownloadAndUnpack(url, output_dir, path_prefixes=None, is_known_zip=False): + """Download an archive from url and extract into output_dir. If path_prefixes + is not None, only extract files whose paths within the archive start with + any prefix in path_prefixes.""" + with tempfile.TemporaryFile() as f: + DownloadUrl(url, f) + f.seek(0) + EnsureDirExists(output_dir) + if url.endswith('.zip') or is_known_zip: + assert path_prefixes is None + zipfile.ZipFile(f).extractall(path=output_dir) + else: + t = tarfile.open(mode='r:*', fileobj=f) + members = None + if path_prefixes is not None: + members = [m for m in t.getmembers() + if any(m.name.startswith(p) for p in path_prefixes)] + t.extractall(path=output_dir, members=members) + + +def GetPlatformUrlPrefix(host_os): + _HOST_OS_URL_MAP = { + 'linux': 'Linux_x64', + 'mac': 'Mac', + 'mac-arm64': 'Mac_arm64', + 'win': 'Win', + } + return CDS_URL + '/' + _HOST_OS_URL_MAP[host_os] + '/' + + +def DownloadAndUnpackPackage(package_file, + output_dir, + host_os, + version=PACKAGE_VERSION): + cds_file = "%s-%s.tar.xz" % (package_file, version) + cds_full_url = GetPlatformUrlPrefix(host_os) + cds_file + try: + DownloadAndUnpack(cds_full_url, output_dir) + except urllib.error.URLError: + print('Failed to download prebuilt clang package %s' % cds_file) + print('Use build.py if you want to build locally.') + print('Exiting.') + sys.exit(1) + + +def DownloadAndUnpackClangMacRuntime(output_dir): + cds_file = "clang-mac-runtime-library-%s.tar.xz" % PACKAGE_VERSION + # We run this only for the runtime libraries, and 'mac' and 'mac-arm64' both + # have the same (universal) runtime libraries. It doesn't matter which one + # we download here. + cds_full_url = GetPlatformUrlPrefix('mac') + cds_file + try: + DownloadAndUnpack(cds_full_url, output_dir) + except urllib.error.URLError: + print('Failed to download prebuilt clang %s' % cds_file) + print('Use build.py if you want to build locally.') + print('Exiting.') + sys.exit(1) + + +def DownloadAndUnpackClangWinRuntime(output_dir): + cds_file = "clang-win-runtime-library-%s.tar.xz" % PACKAGE_VERSION + cds_full_url = GetPlatformUrlPrefix('win') + cds_file + try: + DownloadAndUnpack(cds_full_url, output_dir) + except urllib.error.URLError: + print('Failed to download prebuilt clang %s' % cds_file) + print('Use build.py if you want to build locally.') + print('Exiting.') + sys.exit(1) + + +def UpdatePackage(package_name, host_os, dir=LLVM_BUILD_DIR): + stamp_file = None + package_file = None + + stamp_file = os.path.join(dir, package_name + '_revision') + if package_name == 'clang': + stamp_file = STAMP_FILE + package_file = 'clang' + elif package_name == 'coverage_tools': + stamp_file = os.path.join(dir, 'cr_coverage_revision') + package_file = 'llvm-code-coverage' + elif package_name == 'objdump': + package_file = 'llvmobjdump' + elif package_name in ['clang-tidy', 'clangd', 'libclang', 'translation_unit']: + package_file = package_name + else: + print('Unknown package: "%s".' % package_name) + return 1 + + assert stamp_file is not None + assert package_file is not None + + # TODO(hans): Create a clang-win-runtime package and use separate DEPS hook. + target_os = [] + if package_name == 'clang': + try: + GCLIENT_CONFIG = os.path.join(os.path.dirname(CHROMIUM_DIR), '.gclient') + env = {} + exec (open(GCLIENT_CONFIG).read(), env, env) + target_os = env.get('target_os', target_os) + except: + pass + + if os.path.exists(OLD_STAMP_FILE): + # Delete the old stamp file so it doesn't look like an old version of clang + # is available in case the user rolls back to an old version of this script + # during a bisect for example (crbug.com/988933). + os.remove(OLD_STAMP_FILE) + + expected_stamp = ','.join([PACKAGE_VERSION] + target_os) + # This file is created by first class GCS deps. If this file exists, + # clear the entire directory and download with this script instead. + if glob.glob(os.path.join(dir, '.*_is_first_class_gcs')): + RmTree(dir) + elif ReadStampFile(stamp_file) == expected_stamp: + return 0 + + # Updating the main clang package nukes the output dir. Any other packages + # need to be updated *after* the clang package. + if package_name == 'clang' and os.path.exists(dir): + RmTree(dir) + + DownloadAndUnpackPackage(package_file, dir, host_os) + + if package_name == 'clang' and 'mac' in target_os: + DownloadAndUnpackClangMacRuntime(dir) + if package_name == 'clang' and 'win' in target_os: + # When doing win/cross builds on other hosts, get the Windows runtime + # libraries, and llvm-symbolizer.exe (needed in asan builds). + DownloadAndUnpackClangWinRuntime(dir) + + WriteStampFile(expected_stamp, stamp_file) + return 0 + + +def GetDefaultHostOs(): + _PLATFORM_HOST_OS_MAP = { + 'darwin': 'mac', + 'cygwin': 'win', + 'linux2': 'linux', + 'win32': 'win', + } + default_host_os = _PLATFORM_HOST_OS_MAP.get(sys.platform, sys.platform) + if default_host_os == 'mac' and platform.machine() == 'arm64': + default_host_os = 'mac-arm64' + return default_host_os + + +def main(): + parser = argparse.ArgumentParser(description='Update clang.') + parser.add_argument('--output-dir', + help='Where to extract the package.') + parser.add_argument('--package', + help='What package to update (default: clang)', + default='clang') + parser.add_argument('--host-os', + help=('Which host OS to download for ' + '(default: %(default)s)'), + default=GetDefaultHostOs(), + choices=('linux', 'mac', 'mac-arm64', 'win')) + parser.add_argument('--print-revision', action='store_true', + help='Print current clang revision and exit.') + parser.add_argument('--llvm-force-head-revision', action='store_true', + help='Print locally built revision with --print-revision') + parser.add_argument('--print-clang-version', action='store_true', + help=('Print current clang release version (e.g. 9.0.0) ' + 'and exit.')) + parser.add_argument('--verify-version', + help='Verify that clang has the passed-in version.') + args = parser.parse_args() + + if args.verify_version and args.verify_version != RELEASE_VERSION: + print('RELEASE_VERSION is %s but --verify-version argument was %s.' % ( + RELEASE_VERSION, args.verify_version)) + print('clang_version in build/toolchain/toolchain.gni is likely outdated.') + return 1 + + if args.print_clang_version: + print(RELEASE_VERSION) + return 0 + + output_dir = LLVM_BUILD_DIR + if args.output_dir: + global STAMP_FILE + output_dir = os.path.abspath(args.output_dir) + STAMP_FILE = os.path.join(output_dir, STAMP_FILENAME) + + if args.print_revision: + if args.llvm_force_head_revision: + force_head_revision = ReadStampFile(FORCE_HEAD_REVISION_FILE) + if force_head_revision == '': + print('No locally built version found!') + return 1 + print(force_head_revision) + return 0 + + stamp_version = ReadStampFile(STAMP_FILE).partition(',')[0] + if PACKAGE_VERSION != stamp_version: + print('The expected clang version is %s but the actual version is %s' % + (PACKAGE_VERSION, stamp_version)) + print('Did you run "gclient sync"?') + return 1 + + print(PACKAGE_VERSION) + return 0 + + if args.llvm_force_head_revision: + print('--llvm-force-head-revision can only be used for --print-revision') + return 1 + + return UpdatePackage(args.package, args.host_os, output_dir) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/clang/scripts/upload_revision.py b/clang/scripts/upload_revision.py new file mode 100755 index 0000000000000000000000000000000000000000..589bdd1a9ff702ca1718cf83b3313eec3a513280 --- /dev/null +++ b/clang/scripts/upload_revision.py @@ -0,0 +1,408 @@ +#!/usr/bin/env python3 +# Copyright 2016 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""This script takes a Clang git revision as an argument, it then +creates a feature branch, puts this revision into update.py, uploads +a CL, triggers Clang Upload try bots, and tells what to do next""" + +from __future__ import print_function + +import argparse +import itertools +import os +import re +import subprocess +import sys +import urllib.request + +from build import (CheckoutGitRepo, GetCommitDescription, GetLatestLLVMCommit, + LLVM_DIR, LLVM_GIT_URL, RunCommand) +from update import CHROMIUM_DIR, DownloadAndUnpack + +# Access to //tools/rust +sys.path.append( + os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..', + 'rust')) + +from build_rust import RUST_GIT_URL, RUST_SRC_DIR, GetLatestRustCommit + +# Path constants. +THIS_DIR = os.path.dirname(__file__) +CHROMIUM_DIR = os.path.abspath(os.path.join(THIS_DIR, '..', '..', '..')) +CLANG_UPDATE_PY_PATH = os.path.join(THIS_DIR, 'update.py') +RUST_UPDATE_PY_PATH = os.path.join(THIS_DIR, '..', '..', 'rust', + 'update_rust.py') +BUILD_RUST_PY_PATH = os.path.join(THIS_DIR, '..', '..', 'rust', 'build_rust.py') + +# Bots where we build Clang + Rust. +BUILD_CLANG_BOTS = [ + 'linux_upload_clang', + 'mac_upload_clang', + 'mac_upload_clang_arm', + 'win_upload_clang', +] +BUILD_RUST_BOTS = [ + 'linux_upload_rust', + 'mac_upload_rust', + 'mac_upload_rust_arm', + 'win_upload_rust', +] + +# Keep lines in here at <= 72 columns, else they wrap in gerrit. +# There can be no whitespace line between or below these gerrit footers. +COMMIT_FOOTER = \ +''' +Bug: TODO. Remove the Tricium: line below when filling this in. +Tricium: skip +Disable-Rts: True +Cq-Include-Trybots: chromium/try:chromeos-amd64-generic-cfi-thin-lto-rel +Cq-Include-Trybots: chromium/try:dawn-win10-x86-deps-rel +Cq-Include-Trybots: chromium/try:linux-chromeos-dbg +Cq-Include-Trybots: chromium/try:linux_chromium_cfi_rel_ng +Cq-Include-Trybots: chromium/try:linux_chromium_chromeos_msan_rel_ng +Cq-Include-Trybots: chromium/try:linux_chromium_msan_rel_ng +Cq-Include-Trybots: chromium/try:mac11-arm64-rel,mac_chromium_asan_rel_ng +Cq-Include-Trybots: chromium/try:ios-catalyst,win-asan,android-official +Cq-Include-Trybots: chromium/try:fuchsia-arm64-cast-receiver-rel +Cq-Include-Trybots: chromium/try:mac-official,linux-official +Cq-Include-Trybots: chromium/try:win-official,win32-official +Cq-Include-Trybots: chromium/try:win-arm64-rel +Cq-Include-Trybots: chromium/try:linux-swangle-try-x64,win-swangle-try-x86 +Cq-Include-Trybots: chromium/try:android-cronet-mainline-clang-arm64-dbg +Cq-Include-Trybots: chromium/try:android-cronet-mainline-clang-arm64-rel +Cq-Include-Trybots: chromium/try:android-cronet-mainline-clang-riscv64-dbg +Cq-Include-Trybots: chromium/try:android-cronet-mainline-clang-riscv64-rel +Cq-Include-Trybots: chromium/try:android-cronet-mainline-clang-x86-dbg +Cq-Include-Trybots: chromium/try:android-cronet-mainline-clang-x86-rel +Cq-Include-Trybots: chromium/try:android-cronet-riscv64-dbg +Cq-Include-Trybots: chromium/try:android-cronet-riscv64-rel +Cq-Include-Trybots: chrome/try:iphone-device,ipad-device +Cq-Include-Trybots: chrome/try:linux-chromeos-chrome +Cq-Include-Trybots: chrome/try:win-chrome,win64-chrome,linux-chrome,mac-chrome +Cq-Include-Trybots: chrome/try:linux-pgo,mac-pgo,win32-pgo,win64-pgo''' + +RUST_BOTS = \ +'''Cq-Include-Trybots: chromium/try:android-rust-arm32-rel +Cq-Include-Trybots: chromium/try:android-rust-arm64-dbg +Cq-Include-Trybots: chromium/try:android-rust-arm64-rel +Cq-Include-Trybots: chromium/try:linux-rust-x64-dbg +Cq-Include-Trybots: chromium/try:linux-rust-x64-rel +Cq-Include-Trybots: chromium/try:mac-rust-x64-dbg +Cq-Include-Trybots: chromium/try:win-rust-x64-dbg +Cq-Include-Trybots: chromium/try:win-rust-x64-rel''' + +is_win = sys.platform.startswith('win32') + + +class RustVersion: + """Holds the nightly Rust version in an explicit format.""" + + def __init__(self, git_hash: str, sub_revision: int): + self.git_hash = git_hash + self.short_git_hash = git_hash[slice(0, 12)] + self.sub_revision = sub_revision + + def __str__(self) -> str: + """A string containing the Rust version and sub revision. + + The string is useful for humans, it contains all info needed to identify + the Rust version being built. It is also unique to a given Rust version and + subversion. + """ + return f'{self.git_hash}-{self.sub_revision}' + + def __eq__(self, o) -> bool: + return (self.git_hash == o.git_hash and self.sub_revision == o.sub_revision) + + +class ClangVersion: + """Holds the Clang version in an explicit format.""" + + def __init__(self, git_describe: str, sub_revision: str): + self.git_describe = git_describe + self.short_git_hash = re.search('-g([0-9a-f]+)', git_describe).group(1) + self.sub_revision = int(sub_revision) + + def __str__(self) -> str: + """A string containing the Clang version and sub revision. + + The string is useful for humans, it contains all info needed to identify + the Clang version being built. It is also unique to a given Clang version + and subversion. + """ + return f'{self.git_describe}-{self.sub_revision}' + + def __eq__(self, o) -> bool: + return (self.git_describe == o.git_describe + and self.sub_revision == o.sub_revision) + + +def PatchClangRevision(new_version: ClangVersion) -> ClangVersion: + with open(CLANG_UPDATE_PY_PATH) as f: + content = f.read() + + REV = '\'([0-9a-z-]+)\'' + SUB_REV = '([0-9]+)' + + git_describe = re.search(f'CLANG_REVISION = {REV}', content).group(1) + sub_revision = re.search(f'CLANG_SUB_REVISION = {SUB_REV}', content).group(1) + old_version = ClangVersion(git_describe, sub_revision) + + content = re.sub(f'CLANG_REVISION = {REV}', + f'CLANG_REVISION = \'{new_version.git_describe}\'', + content, + count=1) + content = re.sub(f'CLANG_SUB_REVISION = {SUB_REV}', + f'CLANG_SUB_REVISION = {new_version.sub_revision}', + content, + count=1) + + with open(CLANG_UPDATE_PY_PATH, 'w') as f: + f.write(content) + + return old_version + + +def PatchRustRevision(new_version: RustVersion) -> RustVersion: + with open(RUST_UPDATE_PY_PATH) as f: + content = f.read() + + REV = '\'([0-9a-z-]+)\'' + SUB_REV = '([0-9]+)' + + git_hash = re.search(f'RUST_REVISION = {REV}', content).group(1) + sub_revision = re.search(f'RUST_SUB_REVISION = {SUB_REV}', content).group(1) + old_version = RustVersion(git_hash, sub_revision) + + content = re.sub(f'RUST_REVISION = {REV}', + f'RUST_REVISION = \'{new_version.git_hash}\'', + content, + count=1) + content = re.sub(f'RUST_SUB_REVISION = {SUB_REV}', + f'RUST_SUB_REVISION = {new_version.sub_revision}', + content, + count=1) + + with open(RUST_UPDATE_PY_PATH, 'w') as f: + f.write(content) + + return old_version + + +def PatchRustStage0(): + verify_stage0 = subprocess.run( + [sys.executable, BUILD_RUST_PY_PATH, '--verify-stage0-hash'], + capture_output=True, + text=True) + if verify_stage0.returncode == 0: + return + + # TODO(crbug.com/40252478): We're printing a warning that the hash has + # changed, but we could require a verification step of some sort here. We + # should do the same for both Rust and Clang if we do so. + print(verify_stage0.stdout) + lines = verify_stage0.stdout.splitlines() + m = re.match('Actual hash: +([0-9a-z]+)', lines[-1]) + new_stage0_hash = m.group(1) + + with open(RUST_UPDATE_PY_PATH) as f: + content = f.read() + + STAGE0_HASH = '\'([0-9a-z]+)\'' + content = re.sub(f'STAGE0_JSON_SHA256 = {STAGE0_HASH}', + f'STAGE0_JSON_SHA256 = \'{new_stage0_hash}\'', + content, + count=1) + with open(RUST_UPDATE_PY_PATH, 'w') as f: + f.write(content) + + +def PatchRustRemoveOverride(): + with open(RUST_UPDATE_PY_PATH) as f: + content = f.read() + + REV = '([0-9a-z-]+)' + content = re.sub(f'OVERRIDE_CLANG_REVISION = \'{REV}\'', + f'OVERRIDE_CLANG_REVISION = None', + content, + count=1) + with open(RUST_UPDATE_PY_PATH, 'w') as f: + f.write(content) + + +def Git(*args, no_run: bool): + """Runs a git command, or just prints it out if `no_run` is True.""" + if no_run: + print('\033[91m', end='') # Color red + print('Skipped running: ', end='') + print('\033[0m', end='') # No color + print(*['git'] + [f'\'{i}\'' for i in list(args)], end='') + print() + else: + # Needs shell=True on Windows due to git.bat in depot_tools. + subprocess.check_call(['git'] + list(args), shell=is_win) + + +def main(): + parser = argparse.ArgumentParser(description='upload new clang revision') + # TODO(crbug.com/40250560): Remove this when the cron job doesn't pass a SHA. + parser.add_argument( + 'ignored', + nargs='?', + help='Ignored argument to handle the cron job passing a clang SHA') + parser.add_argument('--clang-git-hash', + type=str, + metavar='SHA1', + help='Clang git hash to build the toolchain for.') + parser.add_argument( + '--clang-sub-revision', + type=int, + default=1, + metavar='NUM', + help='Clang sub-revision to build the toolchain for. Defaults to 1.') + parser.add_argument('--rust-git-hash', + type=str, + metavar='SHA1', + help='Rust git hash to build the toolchain for.') + parser.add_argument( + '--rust-sub-revision', + type=int, + default=1, + metavar='NUM', + help='Rust sub-revision to build the toolchain for. Defaults to 1.') + parser.add_argument( + '--no-git', + action='store_true', + default=False, + help=('Print out `git` commands instead of running them. Still generates ' + 'a local diff for debugging purposes.')) + parser.add_argument('--skip-rust', + action='store_true', + default=False, + help=('Skip updating the rust revision.')) + parser.add_argument('--skip-clang', + action='store_true', + default=False, + help=('Skip updating the clang revision.')) + + args = parser.parse_args() + + if args.skip_clang and args.skip_rust: + print('Cannot set both --skip-clang and --skip-rust.') + sys.exit(1) + + if args.skip_clang: + clang_version = '-skipped-' + else: + if args.clang_git_hash: + clang_git_hash = args.clang_git_hash + else: + clang_git_hash = GetLatestLLVMCommit() + # To `GetCommitDescription()`, we need a checkout. On success, the + # CheckoutLLVM() makes `LLVM_DIR` be the current working directory, so that + # we can GetCommitDescription() without changing directory. + CheckoutGitRepo("LLVM", LLVM_GIT_URL, clang_git_hash, LLVM_DIR) + clang_version = ClangVersion(GetCommitDescription(clang_git_hash), + args.clang_sub_revision) + os.chdir(CHROMIUM_DIR) + + if args.skip_rust: + rust_version = '-skipped-' + else: + if args.rust_git_hash: + rust_git_hash = args.rust_git_hash + else: + rust_git_hash = GetLatestRustCommit() + CheckoutGitRepo("Rust", RUST_GIT_URL, rust_git_hash, RUST_SRC_DIR) + rust_version = RustVersion(rust_git_hash, args.rust_sub_revision) + os.chdir(CHROMIUM_DIR) + + print(f'Making a patch for Clang {clang_version} and Rust {rust_version}') + + branch_name = f'clang-{clang_version}_rust-{rust_version}' + Git('checkout', 'origin/main', '-b', branch_name, no_run=args.no_git) + + old_clang_version = clang_version + if not args.skip_clang: + old_clang_version = PatchClangRevision(clang_version) + if args.skip_rust: + assert (clang_version != + old_clang_version), ('Change the sub-revision of Clang if there is ' + 'no major version change.') + else: + old_rust_version = PatchRustRevision(rust_version) + assert (clang_version != old_clang_version + or rust_version != old_rust_version), ( + 'Change the sub-revision of Clang or Rust if there is ' + 'no major version change.') + PatchRustStage0() + if not args.skip_clang: + PatchRustRemoveOverride() + + if args.skip_clang: + clang_change = '[skipping Clang]' + clang_change_log = '' + else: + clang_change = f'{old_clang_version} : {clang_version}' + clang_change_log = ( + f'{LLVM_GIT_URL}/+log/' + f'{old_clang_version.short_git_hash}..{clang_version.short_git_hash}' + f'\n\n') + + if args.skip_rust: + rust_change = '[skipping Rust]' + rust_change_log = '' + else: + rust_change = f'{old_rust_version} : {rust_version}' + rust_change_log = (f'{RUST_GIT_URL}/+log/' + f'{old_rust_version.short_git_hash}..' + f'{rust_version.short_git_hash}' + f'\n\n') + + title = f'Roll clang+rust {clang_change} / {rust_change}' + + cmd = ' '.join(sys.argv) + body = f'{clang_change_log}{rust_change_log}Ran: {cmd}' + + commit_message = f'{title}\n\n{body}\n{COMMIT_FOOTER}' + if not args.skip_rust: + commit_message += f'\n{RUST_BOTS}' + + Git('add', + CLANG_UPDATE_PY_PATH, + RUST_UPDATE_PY_PATH, + no_run=args.no_git) + Git('commit', '-m', commit_message, no_run=args.no_git) + Git('cl', 'upload', '-f', '--bypass-hooks', '--squash', no_run=args.no_git) + if not args.skip_clang: + Git('cl', + 'try', + '-B', + "chromium/try", + *itertools.chain(*[['-b', bot] for bot in BUILD_CLANG_BOTS]), + no_run=args.no_git) + + Git('cl', + 'try', + '-B', + "chromium/try", + *itertools.chain(*[['-b', bot] for bot in BUILD_RUST_BOTS]), + no_run=args.no_git) + + print('Please, wait until the try bots succeeded ' + 'and then push the binaries to RBE.') + print() + print('To update the Clang/Rust DEPS entries, run:\n ' + 'tools/clang/scripts/sync_deps.py') + print() + print('To regenerate BUILD.gn rules for Rust stdlib (needed if dep versions ' + 'in the stdlib change for example), run:\n tools/rust/gnrt_stdlib.py.') + print() + print('To update Abseil .def files, run:\n ' + 'third_party/abseil-cpp/generate_def_files.py') + + +if __name__ == '__main__': + sys.exit(main())