reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166
  167
  168
  169
  170
  171
  172
  173
  174
  175
  176
  177
  178
  179
  180
  181
  182
  183
  184
  185
  186
  187
  188
  189
  190
  191
  192
  193
  194
  195
  196
  197
  198
  199
  200
  201
  202
  203
  204
  205
  206
  207
  208
  209
  210
  211
  212
  213
  214
  215
  216
  217
  218
  219
  220
  221
  222
  223
  224
  225
  226
  227
  228
  229
  230
  231
  232
  233
  234
  235
  236
  237
  238
  239
  240
  241
  242
  243
  244
  245
  246
  247
  248
  249
  250
  251
  252
  253
  254
  255
  256
  257
  258
  259
  260
  261
  262
  263
  264
  265
  266
  267
  268
  269
  270
  271
  272
  273
  274
  275
  276
  277
  278
  279
  280
  281
  282
  283
  284
  285
  286
  287
  288
  289
  290
  291
  292
  293
  294
  295
  296
  297
  298
  299
  300
  301
#!/usr/bin/env python3
'''A utility to update LLVM IR CHECK lines in C/C++ FileCheck test files.

Example RUN lines in .c/.cc test files:

// RUN: %clang -emit-llvm -S %s -o - -O2 | FileCheck %s
// RUN: %clangxx -emit-llvm -S %s -o - -O2 | FileCheck -check-prefix=CHECK-A %s

Usage:

% utils/update_cc_test_checks.py --llvm-bin=release/bin test/a.cc
% utils/update_cc_test_checks.py --c-index-test=release/bin/c-index-test \
  --clang=release/bin/clang /tmp/c/a.cc
'''

import argparse
import collections
import distutils.spawn
import os
import shlex
import string
import subprocess
import sys
import re
import tempfile

from UpdateTestChecks import asm, common

ADVERT = '// NOTE: Assertions have been autogenerated by '

CHECK_RE = re.compile(r'^\s*//\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
RUN_LINE_RE = re.compile('^//\s*RUN:\s*(.*)$')

SUBST = {
    '%clang': [],
    '%clang_cc1': ['-cc1'],
    '%clangxx': ['--driver-mode=g++'],
}

def get_line2spell_and_mangled(args, clang_args):
  ret = {}
  with tempfile.NamedTemporaryFile() as f:
    # TODO Make c-index-test print mangled names without circumventing through precompiled headers
    status = subprocess.run([args.c_index_test, '-write-pch', f.name, *clang_args],
                            stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    if status.returncode:
      sys.stderr.write(status.stdout.decode())
      sys.exit(2)
    output = subprocess.check_output([args.c_index_test,
        '-test-print-mangle', f.name])
    if sys.version_info[0] > 2:
      output = output.decode()
  DeclRE = re.compile(r'^FunctionDecl=(\w+):(\d+):\d+ \(Definition\)')
  MangleRE = re.compile(r'.*\[mangled=([^]]+)\]')
  MatchedDecl = False
  for line in output.splitlines():
    # Get the function source name, line number and mangled name.  Sometimes
    # c-index-test outputs the mangled name on a separate line (this can happen
    # with block comments in front of functions).  Keep scanning until we see
    # the mangled name.
    decl_m = DeclRE.match(line)
    mangle_m = MangleRE.match(line)

    if decl_m:
      MatchedDecl = True
      spell, lineno = decl_m.groups()
    if MatchedDecl and mangle_m:
      mangled = mangle_m.group(1)
      MatchedDecl = False
    else:
      continue

    if mangled == '_' + spell:
      # HACK for MacOS (where the mangled name includes an _ for C but the IR won't):
      mangled = spell
    # Note -test-print-mangle does not print file names so if #include is used,
    # the line number may come from an included file.
    ret[int(lineno)-1] = (spell, mangled)
  if args.verbose:
    for line, func_name in sorted(ret.items()):
      print('line {}: found function {}'.format(line+1, func_name), file=sys.stderr)
  return ret


def config():
  parser = argparse.ArgumentParser(
      description=__doc__,
      formatter_class=argparse.RawTextHelpFormatter)
  parser.add_argument('-v', '--verbose', action='store_true')
  parser.add_argument('--llvm-bin', help='llvm $prefix/bin path')
  parser.add_argument('--clang',
                      help='"clang" executable, defaults to $llvm_bin/clang')
  parser.add_argument('--clang-args',
                      help='Space-separated extra args to clang, e.g. --clang-args=-v')
  parser.add_argument('--c-index-test',
                      help='"c-index-test" executable, defaults to $llvm_bin/c-index-test')
  parser.add_argument('--opt',
                      help='"opt" executable, defaults to $llvm_bin/opt')
  parser.add_argument(
      '--functions', nargs='+', help='A list of function name regexes. '
      'If specified, update CHECK lines for functions matching at least one regex')
  parser.add_argument(
      '--x86_extra_scrub', action='store_true',
      help='Use more regex for x86 matching to reduce diffs between various subtargets')
  parser.add_argument('-u', '--update-only', action='store_true',
                      help='Only update test if it was already autogened')
  parser.add_argument('tests', nargs='+')
  args = parser.parse_args()
  args.clang_args = shlex.split(args.clang_args or '')

  if args.clang is None:
    if args.llvm_bin is None:
      args.clang = 'clang'
    else:
      args.clang = os.path.join(args.llvm_bin, 'clang')
  if not distutils.spawn.find_executable(args.clang):
    print('Please specify --llvm-bin or --clang', file=sys.stderr)
    sys.exit(1)

  if args.opt is None:
    if args.llvm_bin is None:
      args.opt = 'opt'
    else:
      args.opt = os.path.join(args.llvm_bin, 'opt')
  if not distutils.spawn.find_executable(args.opt):
    # Many uses of this tool will not need an opt binary, because it's only
    # needed for updating a test that runs clang | opt | FileCheck. So we
    # defer this error message until we find that opt is actually needed.
    args.opt = None

  if args.c_index_test is None:
    if args.llvm_bin is None:
      args.c_index_test = 'c-index-test'
    else:
      args.c_index_test = os.path.join(args.llvm_bin, 'c-index-test')
  if not distutils.spawn.find_executable(args.c_index_test):
    print('Please specify --llvm-bin or --c-index-test', file=sys.stderr)
    sys.exit(1)

  return args


def get_function_body(args, filename, clang_args, extra_commands, prefixes, triple_in_cmd, func_dict):
  # TODO Clean up duplication of asm/common build_function_body_dictionary
  # Invoke external tool and extract function bodies.
  raw_tool_output = common.invoke_tool(args.clang, clang_args, filename)
  for extra_command in extra_commands:
    extra_args = shlex.split(extra_command)
    with tempfile.NamedTemporaryFile() as f:
      f.write(raw_tool_output.encode())
      f.flush()
      if extra_args[0] == 'opt':
        if args.opt is None:
          print(filename, 'needs to run opt. '
                'Please specify --llvm-bin or --opt', file=sys.stderr)
          sys.exit(1)
        extra_args[0] = args.opt
      raw_tool_output = common.invoke_tool(extra_args[0],
                                           extra_args[1:], f.name)
  if '-emit-llvm' in clang_args:
    common.build_function_body_dictionary(
            common.OPT_FUNCTION_RE, common.scrub_body, [],
            raw_tool_output, prefixes, func_dict, args.verbose)
  else:
    print('The clang command line should include -emit-llvm as asm tests '
          'are discouraged in Clang testsuite.', file=sys.stderr)
    sys.exit(1)


def main():
  args = config()
  script_name = os.path.basename(__file__)
  autogenerated_note = (ADVERT + 'utils/' + script_name)

  for filename in args.tests:
    with open(filename) as f:
      input_lines = [l.rstrip() for l in f]
    
    first_line = input_lines[0] if input_lines else ""
    if 'autogenerated' in first_line and script_name not in first_line:
      common.warn("Skipping test which wasn't autogenerated by " + script_name, filename)
      continue

    if args.update_only:
      if not first_line or 'autogenerated' not in first_line:
        common.warn("Skipping test which isn't autogenerated: " + filename)
        continue

    # Extract RUN lines.
    raw_lines = [m.group(1)
                 for m in [RUN_LINE_RE.match(l) for l in input_lines] if m]
    run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
    for l in raw_lines[1:]:
      if run_lines[-1].endswith("\\"):
        run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
      else:
        run_lines.append(l)

    if args.verbose:
      print('Found {} RUN lines:'.format(len(run_lines)), file=sys.stderr)
      for l in run_lines:
        print('  RUN: ' + l, file=sys.stderr)

    # Build a list of clang command lines and check prefixes from RUN lines.
    run_list = []
    line2spell_and_mangled_list = collections.defaultdict(list)
    for l in run_lines:
      commands = [cmd.strip() for cmd in l.split('|')]

      triple_in_cmd = None
      m = common.TRIPLE_ARG_RE.search(commands[0])
      if m:
        triple_in_cmd = m.groups()[0]

      # Apply %clang substitution rule, replace %s by `filename`, and append args.clang_args
      clang_args = shlex.split(commands[0])
      if clang_args[0] not in SUBST:
        print('WARNING: Skipping non-clang RUN line: ' + l, file=sys.stderr)
        continue
      clang_args[0:1] = SUBST[clang_args[0]]
      clang_args = [filename if i == '%s' else i for i in clang_args] + args.clang_args

      # Permit piping the output through opt
      if not (len(commands) == 2 or
              (len(commands) == 3 and commands[1].startswith('opt'))):
        print('WARNING: Skipping non-clang RUN line: ' + l, file=sys.stderr)

      # Extract -check-prefix in FileCheck args
      filecheck_cmd = commands[-1]
      common.verify_filecheck_prefixes(filecheck_cmd)
      if not filecheck_cmd.startswith('FileCheck '):
        print('WARNING: Skipping non-FileChecked RUN line: ' + l, file=sys.stderr)
        continue
      check_prefixes = [item for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
                               for item in m.group(1).split(',')]
      if not check_prefixes:
        check_prefixes = ['CHECK']
      run_list.append((check_prefixes, clang_args, commands[1:-1], triple_in_cmd))

    # Strip CHECK lines which are in `prefix_set`, update test file.
    prefix_set = set([prefix for p in run_list for prefix in p[0]])
    input_lines = []
    with open(filename, 'r+') as f:
      for line in f:
        m = CHECK_RE.match(line)
        if not (m and m.group(1) in prefix_set) and line != '//\n':
          input_lines.append(line)
      f.seek(0)
      f.writelines(input_lines)
      f.truncate()

    # Execute clang, generate LLVM IR, and extract functions.
    func_dict = {}
    for p in run_list:
      prefixes = p[0]
      for prefix in prefixes:
        func_dict.update({prefix: dict()})
    for prefixes, clang_args, extra_commands, triple_in_cmd in run_list:
      if args.verbose:
        print('Extracted clang cmd: clang {}'.format(clang_args), file=sys.stderr)
        print('Extracted FileCheck prefixes: {}'.format(prefixes), file=sys.stderr)

      get_function_body(args, filename, clang_args, extra_commands, prefixes, triple_in_cmd, func_dict)

      # Invoke c-index-test to get mapping from start lines to mangled names.
      # Forward all clang args for now.
      for k, v in get_line2spell_and_mangled(args, clang_args).items():
        line2spell_and_mangled_list[k].append(v)

    output_lines = [autogenerated_note]
    for idx, line in enumerate(input_lines):
      # Discard any previous script advertising.
      if line.startswith(ADVERT):
        continue
      if idx in line2spell_and_mangled_list:
        added = set()
        for spell, mangled in line2spell_and_mangled_list[idx]:
          # One line may contain multiple function declarations.
          # Skip if the mangled name has been added before.
          # The line number may come from an included file,
          # we simply require the spelling name to appear on the line
          # to exclude functions from other files.
          if mangled in added or spell not in line:
            continue
          if args.functions is None or any(re.search(regex, spell) for regex in args.functions):
            if added:
              output_lines.append('//')
            added.add(mangled)
            common.add_ir_checks(output_lines, '//', run_list, func_dict, mangled, False)
      output_lines.append(line.rstrip('\n'))

    # Update the test file.
    with open(filename, 'w') as f:
      for line in output_lines:
        f.write(line + '\n')

  return 0


if __name__ == '__main__':
  sys.exit(main())