from __future__ import print_function import copy import glob import os import re import subprocess import sys if sys.version_info[0] > 2: class string: expandtabs = str.expandtabs else: import string ##### Common utilities for update_*test_checks.py _verbose = False _prefix_filecheck_ir_name = '' def parse_commandline_args(parser): parser.add_argument('--include-generated-funcs', action='store_true', help='Output checks for functions not in source') parser.add_argument('-v', '--verbose', action='store_true', help='Show verbose output') parser.add_argument('-u', '--update-only', action='store_true', help='Only update test if it was already autogened') parser.add_argument('--force-update', action='store_true', help='Update test even if it was autogened by a different script') parser.add_argument('--enable', action='store_true', dest='enabled', default=True, help='Activate CHECK line generation from this point forward') parser.add_argument('--disable', action='store_false', dest='enabled', help='Deactivate CHECK line generation from this point forward') parser.add_argument('--replace-value-regex', nargs='+', default=[], help='List of regular expressions to replace matching value names') parser.add_argument('--prefix-filecheck-ir-name', default='', help='Add a prefix to FileCheck IR value names to avoid conflicts with scripted names') parser.add_argument('--global-value-regex', nargs='+', default=[], help='List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)') parser.add_argument('--global-hex-value-regex', nargs='+', default=[], help='List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives') args = parser.parse_args() global _verbose, _global_value_regex, _global_hex_value_regex _verbose = args.verbose _global_value_regex = args.global_value_regex _global_hex_value_regex = args.global_hex_value_regex return args class InputLineInfo(object): def __init__(self, line, line_number, args, argv): self.line = line self.line_number = line_number self.args = args self.argv = argv class TestInfo(object): def __init__(self, test, parser, script_name, input_lines, args, argv, comment_prefix, argparse_callback): self.parser = parser self.argparse_callback = argparse_callback self.path = test self.args = args if args.prefix_filecheck_ir_name: global _prefix_filecheck_ir_name _prefix_filecheck_ir_name = args.prefix_filecheck_ir_name self.argv = argv self.input_lines = input_lines self.run_lines = find_run_lines(test, self.input_lines) self.comment_prefix = comment_prefix if self.comment_prefix is None: if self.path.endswith('.mir'): self.comment_prefix = '#' else: self.comment_prefix = ';' self.autogenerated_note_prefix = self.comment_prefix + ' ' + UTC_ADVERT self.test_autogenerated_note = self.autogenerated_note_prefix + script_name self.test_autogenerated_note += get_autogennote_suffix(parser, self.args) def ro_iterlines(self): for line_num, input_line in enumerate(self.input_lines): args, argv = check_for_command(input_line, self.parser, self.args, self.argv, self.argparse_callback) yield InputLineInfo(input_line, line_num, args, argv) def iterlines(self, output_lines): output_lines.append(self.test_autogenerated_note) for line_info in self.ro_iterlines(): input_line = line_info.line # Discard any previous script advertising. if input_line.startswith(self.autogenerated_note_prefix): continue self.args = line_info.args self.argv = line_info.argv if not self.args.enabled: output_lines.append(input_line) continue yield line_info def itertests(test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None): for pattern in test_patterns: # On Windows we must expand the patterns ourselves. tests_list = glob.glob(pattern) if not tests_list: warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,)) continue for test in tests_list: with open(test) as f: input_lines = [l.rstrip() for l in f] args = parser.parse_args() if argparse_callback is not None: argparse_callback(args) argv = sys.argv[:] first_line = input_lines[0] if input_lines else "" if UTC_ADVERT in first_line: if script_name not in first_line and not args.force_update: warn("Skipping test which wasn't autogenerated by " + script_name, test) continue args, argv = check_for_command(first_line, parser, args, argv, argparse_callback) elif args.update_only: assert UTC_ADVERT not in first_line warn("Skipping test which isn't autogenerated: " + test) continue yield TestInfo(test, parser, script_name, input_lines, args, argv, comment_prefix, argparse_callback) def should_add_line_to_output(input_line, prefix_set, skip_global_checks = False, comment_marker = ';'): # Skip any blank comment lines in the IR. if not skip_global_checks and input_line.strip() == comment_marker: return False # Skip a special double comment line we use as a separator. if input_line.strip() == comment_marker + SEPARATOR: return False # Skip any blank lines in the IR. #if input_line.strip() == '': # return False # And skip any CHECK lines. We're building our own. m = CHECK_RE.match(input_line) if m and m.group(1) in prefix_set: if skip_global_checks: global_ir_value_re = re.compile('\[\[', flags=(re.M)) return not global_ir_value_re.search(input_line) return False return True # Invoke the tool that is being tested. def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False): with open(ir) as ir_file: # TODO Remove the str form which is used by update_test_checks.py and # update_llc_test_checks.py # The safer list form is used by update_cc_test_checks.py if preprocess_cmd: # Allow pre-processing the IR file (e.g. using sed): assert isinstance(preprocess_cmd, str) # TODO: use a list instead of using shell preprocess_cmd = preprocess_cmd.replace('%s', ir).strip() if verbose: print('Pre-processing input file: ', ir, " with command '", preprocess_cmd, "'", sep="", file=sys.stderr) # Python 2.7 doesn't have subprocess.DEVNULL: with open(os.devnull, 'w') as devnull: pp = subprocess.Popen(preprocess_cmd, shell=True, stdin=devnull, stdout=subprocess.PIPE) ir_file = pp.stdout if isinstance(cmd_args, list): stdout = subprocess.check_output([exe] + cmd_args, stdin=ir_file) else: stdout = subprocess.check_output(exe + ' ' + cmd_args, shell=True, stdin=ir_file) if sys.version_info[0] > 2: stdout = stdout.decode() # Fix line endings to unix CR style. return stdout.replace('\r\n', '\n') ##### LLVM IR parser RUN_LINE_RE = re.compile(r'^\s*(?://|[;#])\s*RUN:\s*(.*)$') CHECK_PREFIX_RE = re.compile(r'--?check-prefix(?:es)?[= ](\S+)') PREFIX_RE = re.compile('^[a-zA-Z0-9_-]+$') CHECK_RE = re.compile(r'^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME|-EMPTY)?:') UTC_ARGS_KEY = 'UTC_ARGS:' UTC_ARGS_CMD = re.compile(r'.*' + UTC_ARGS_KEY + '\s*(?P.*)\s*$') UTC_ADVERT = 'NOTE: Assertions have been autogenerated by ' OPT_FUNCTION_RE = re.compile( r'^(\s*;\s*Function\sAttrs:\s(?P[\w\s]+?))?\s*define\s+(?:internal\s+)?[^@]*@(?P[\w.$-]+?)\s*' r'(?P\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P.*?)^\}$', flags=(re.M | re.S)) ANALYZE_FUNCTION_RE = re.compile( r'^\s*\'(?P[\w\s-]+?)\'\s+for\s+function\s+\'(?P[\w.$-]+?)\':' r'\s*\n(?P.*)$', flags=(re.X | re.S)) IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(') TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$') TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)') MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)') SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)') SCRUB_WHITESPACE_RE = re.compile(r'(?!^(| \w))[ \t]+', flags=re.M) SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M) SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(r'([ \t]|(#[0-9]+))+$', flags=re.M) SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n') SCRUB_LOOP_COMMENT_RE = re.compile( r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M) SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r'(?<=\S)+[ \t]*#$', flags=re.M) SEPARATOR = '.' def error(msg, test_file=None): if test_file: msg = '{}: {}'.format(msg, test_file) print('ERROR: {}'.format(msg), file=sys.stderr) def warn(msg, test_file=None): if test_file: msg = '{}: {}'.format(msg, test_file) print('WARNING: {}'.format(msg), file=sys.stderr) def debug(*args, **kwargs): # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs): if 'file' not in kwargs: kwargs['file'] = sys.stderr if _verbose: print(*args, **kwargs) def find_run_lines(test, lines): debug('Scanning for RUN lines in test file:', test) raw_lines = [m.group(1) for m in [RUN_LINE_RE.match(l) for l in lines] if m] run_lines = [raw_lines[0]] if len(raw_lines) > 0 else [] for l in raw_lines[1:]: if run_lines[-1].endswith('\\'): run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l else: run_lines.append(l) debug('Found {} RUN lines in {}:'.format(len(run_lines), test)) for l in run_lines: debug(' RUN: {}'.format(l)) return run_lines def scrub_body(body): # Scrub runs of whitespace out of the assembly, but leave the leading # whitespace in place. body = SCRUB_WHITESPACE_RE.sub(r' ', body) # Expand the tabs used for indentation. body = string.expandtabs(body, 2) # Strip trailing whitespace. body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r'', body) return body def do_scrub(body, scrubber, scrubber_args, extra): if scrubber_args: local_args = copy.deepcopy(scrubber_args) local_args[0].extra_scrub = extra return scrubber(body, *local_args) return scrubber(body, *scrubber_args) # Build up a dictionary of all the function bodies. class function_body(object): def __init__(self, string, extra, args_and_sig, attrs): self.scrub = string self.extrascrub = extra self.args_and_sig = args_and_sig self.attrs = attrs def is_same_except_arg_names(self, extrascrub, args_and_sig, attrs): arg_names = set() def drop_arg_names(match): arg_names.add(match.group(variable_group_in_ir_value_match)) if match.group(attribute_group_in_ir_value_match): attr = match.group(attribute_group_in_ir_value_match) else: attr = '' return match.group(1) + attr + match.group(match.lastindex) def repl_arg_names(match): if match.group(variable_group_in_ir_value_match) is not None and match.group(variable_group_in_ir_value_match) in arg_names: return match.group(1) + match.group(match.lastindex) return match.group(1) + match.group(2) + match.group(match.lastindex) if self.attrs != attrs: return False ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig) ans1 = IR_VALUE_RE.sub(drop_arg_names, args_and_sig) if ans0 != ans1: return False es0 = IR_VALUE_RE.sub(repl_arg_names, self.extrascrub) es1 = IR_VALUE_RE.sub(repl_arg_names, extrascrub) es0 = SCRUB_IR_COMMENT_RE.sub(r'', es0) es1 = SCRUB_IR_COMMENT_RE.sub(r'', es1) return es0 == es1 def __str__(self): return self.scrub class FunctionTestBuilder: def __init__(self, run_list, flags, scrubber_args, path): self._verbose = flags.verbose self._record_args = flags.function_signature self._check_attributes = flags.check_attributes self._scrubber_args = scrubber_args self._path = path # Strip double-quotes if input was read by UTC_ARGS self._replace_value_regex = list(map(lambda x: x.strip('"'), flags.replace_value_regex)) self._func_dict = {} self._func_order = {} self._global_var_dict = {} for tuple in run_list: for prefix in tuple[0]: self._func_dict.update({prefix:dict()}) self._func_order.update({prefix: []}) self._global_var_dict.update({prefix:dict()}) def finish_and_get_func_dict(self): for prefix in self._get_failed_prefixes(): warn('Prefix %s had conflicting output from different RUN lines for all functions in test %s' % (prefix,self._path,)) return self._func_dict def func_order(self): return self._func_order def global_var_dict(self): return self._global_var_dict def process_run_line(self, function_re, scrubber, raw_tool_output, prefixes): build_global_values_dictionary(self._global_var_dict, raw_tool_output, prefixes) for m in function_re.finditer(raw_tool_output): if not m: continue func = m.group('func') body = m.group('body') attrs = m.group('attrs') if self._check_attributes else '' # Determine if we print arguments, the opening brace, or nothing after the # function name if self._record_args and 'args_and_sig' in m.groupdict(): args_and_sig = scrub_body(m.group('args_and_sig').strip()) elif 'args_and_sig' in m.groupdict(): args_and_sig = '(' else: args_and_sig = '' scrubbed_body = do_scrub(body, scrubber, self._scrubber_args, extra=False) scrubbed_extra = do_scrub(body, scrubber, self._scrubber_args, extra=True) if 'analysis' in m.groupdict(): analysis = m.group('analysis') if analysis.lower() != 'cost model analysis': warn('Unsupported analysis mode: %r!' % (analysis,)) if func.startswith('stress'): # We only use the last line of the function body for stress tests. scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:]) if self._verbose: print('Processing function: ' + func, file=sys.stderr) for l in scrubbed_body.splitlines(): print(' ' + l, file=sys.stderr) for prefix in prefixes: # Replace function names matching the regex. for regex in self._replace_value_regex: # Pattern that matches capture groups in the regex in leftmost order. group_regex = re.compile('\(.*?\)') # Replace function name with regex. match = re.match(regex, func) if match: func_repl = regex # Replace any capture groups with their matched strings. for g in match.groups(): func_repl = group_regex.sub(re.escape(g), func_repl, count=1) func = re.sub(func_repl, '{{' + func_repl + '}}', func) # Replace all calls to regex matching functions. matches = re.finditer(regex, scrubbed_body) for match in matches: func_repl = regex # Replace any capture groups with their matched strings. for g in match.groups(): func_repl = group_regex.sub(re.escape(g), func_repl, count=1) # Substitute function call names that match the regex with the same # capture groups set. scrubbed_body = re.sub(func_repl, '{{' + func_repl + '}}', scrubbed_body) if func in self._func_dict[prefix]: if (self._func_dict[prefix][func] is None or str(self._func_dict[prefix][func]) != scrubbed_body or self._func_dict[prefix][func].args_and_sig != args_and_sig or self._func_dict[prefix][func].attrs != attrs): if (self._func_dict[prefix][func] is not None and self._func_dict[prefix][func].is_same_except_arg_names( scrubbed_extra, args_and_sig, attrs)): self._func_dict[prefix][func].scrub = scrubbed_extra self._func_dict[prefix][func].args_and_sig = args_and_sig continue else: # This means a previous RUN line produced a body for this function # that is different from the one produced by this current RUN line, # so the body can't be common accross RUN lines. We use None to # indicate that. self._func_dict[prefix][func] = None continue self._func_dict[prefix][func] = function_body( scrubbed_body, scrubbed_extra, args_and_sig, attrs) self._func_order[prefix].append(func) def _get_failed_prefixes(self): # This returns the list of those prefixes that failed to match any function, # because there were conflicting bodies produced by different RUN lines, in # all instances of the prefix. Effectively, this prefix is unused and should # be removed. for prefix in self._func_dict: if (self._func_dict[prefix] and (not [fct for fct in self._func_dict[prefix] if self._func_dict[prefix][fct] is not None])): yield prefix ##### Generator of LLVM IR CHECK lines SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*') # TODO: We should also derive check lines for global, debug, loop declarations, etc.. class NamelessValue: def __init__(self, check_prefix, check_key, ir_prefix, global_ir_prefix, global_ir_prefix_regexp, ir_regexp, global_ir_rhs_regexp, is_before_functions): self.check_prefix = check_prefix self.check_key = check_key self.ir_prefix = ir_prefix self.global_ir_prefix = global_ir_prefix self.global_ir_prefix_regexp = global_ir_prefix_regexp self.ir_regexp = ir_regexp self.global_ir_rhs_regexp = global_ir_rhs_regexp self.is_before_functions = is_before_functions # Description of the different "unnamed" values we match in the IR, e.g., # (local) ssa values, (debug) metadata, etc. nameless_values = [ NamelessValue(r'TMP' , '%' , r'%' , None , None , r'[\w$.-]+?' , None , False) , NamelessValue(r'ATTR' , '#' , r'#' , None , None , r'[0-9]+' , None , False) , NamelessValue(r'ATTR' , '#' , None , r'attributes #' , r'[0-9]+' , None , r'{[^}]*}' , False) , NamelessValue(r'GLOB' , '@' , r'@' , None , None , r'[0-9]+' , None , False) , NamelessValue(r'GLOB' , '@' , None , r'@' , r'[a-zA-Z0-9_$"\\.-]+' , None , r'.+' , True) , NamelessValue(r'DBG' , '!' , r'!dbg ' , None , None , r'![0-9]+' , None , False) , NamelessValue(r'PROF' , '!' , r'!prof ' , None , None , r'![0-9]+' , None , False) , NamelessValue(r'TBAA' , '!' , r'!tbaa ' , None , None , r'![0-9]+' , None , False) , NamelessValue(r'RNG' , '!' , r'!range ' , None , None , r'![0-9]+' , None , False) , NamelessValue(r'LOOP' , '!' , r'!llvm.loop ' , None , None , r'![0-9]+' , None , False) , NamelessValue(r'META' , '!' , r'metadata ' , None , None , r'![0-9]+' , None , False) , NamelessValue(r'META' , '!' , None , r'' , r'![0-9]+' , None , r'(?:distinct |)!.*' , False) , ] def createOrRegexp(old, new): if not old: return new if not new: return old return old + '|' + new def createPrefixMatch(prefix_str, prefix_re): if prefix_str is None or prefix_re is None: return '' return '(?:' + prefix_str + '(' + prefix_re + '))' # Build the regexp that matches an "IR value". This can be a local variable, # argument, global, or metadata, anything that is "named". It is important that # the PREFIX and SUFFIX below only contain a single group, if that changes # other locations will need adjustment as well. IR_VALUE_REGEXP_PREFIX = r'(\s*)' IR_VALUE_REGEXP_STRING = r'' for nameless_value in nameless_values: lcl_match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp) glb_match = createPrefixMatch(nameless_value.global_ir_prefix, nameless_value.global_ir_prefix_regexp) assert((lcl_match or glb_match) and not (lcl_match and glb_match)) if lcl_match: IR_VALUE_REGEXP_STRING = createOrRegexp(IR_VALUE_REGEXP_STRING, lcl_match) elif glb_match: IR_VALUE_REGEXP_STRING = createOrRegexp(IR_VALUE_REGEXP_STRING, '^' + glb_match) IR_VALUE_REGEXP_SUFFIX = r'([,\s\(\)]|\Z)' IR_VALUE_RE = re.compile(IR_VALUE_REGEXP_PREFIX + r'(' + IR_VALUE_REGEXP_STRING + r')' + IR_VALUE_REGEXP_SUFFIX) # The entire match is group 0, the prefix has one group (=1), the entire # IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start. first_nameless_group_in_ir_value_match = 3 # constants for the group id of special matches variable_group_in_ir_value_match = 3 attribute_group_in_ir_value_match = 4 # Check a match for IR_VALUE_RE and inspect it to determine if it was a local # value, %..., global @..., debug number !dbg !..., etc. See the PREFIXES above. def get_idx_from_ir_value_match(match): for i in range(first_nameless_group_in_ir_value_match, match.lastindex): if match.group(i) is not None: return i - first_nameless_group_in_ir_value_match error("Unable to identify the kind of IR value from the match!") return 0 # See get_idx_from_ir_value_match def get_name_from_ir_value_match(match): return match.group(get_idx_from_ir_value_match(match) + first_nameless_group_in_ir_value_match) # Return the nameless prefix we use for this kind or IR value, see also # get_idx_from_ir_value_match def get_nameless_check_prefix_from_ir_value_match(match): return nameless_values[get_idx_from_ir_value_match(match)].check_prefix # Return the IR prefix and check prefix we use for this kind or IR value, e.g., (%, TMP) for locals, # see also get_idx_from_ir_value_match def get_ir_prefix_from_ir_value_match(match): idx = get_idx_from_ir_value_match(match) if nameless_values[idx].ir_prefix and match.group(0).strip().startswith(nameless_values[idx].ir_prefix): return nameless_values[idx].ir_prefix, nameless_values[idx].check_prefix return nameless_values[idx].global_ir_prefix, nameless_values[idx].check_prefix def get_check_key_from_ir_value_match(match): idx = get_idx_from_ir_value_match(match) return nameless_values[idx].check_key # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals, # see also get_idx_from_ir_value_match def get_ir_prefix_from_ir_value_re_match(match): # for backwards compatibility we check locals with '.*' if is_local_def_ir_value_match(match): return '.*' idx = get_idx_from_ir_value_match(match) if nameless_values[idx].ir_prefix and match.group(0).strip().startswith(nameless_values[idx].ir_prefix): return nameless_values[idx].ir_regexp return nameless_values[idx].global_ir_prefix_regexp # Return true if this kind of IR value is "local", basically if it matches '%{{.*}}'. def is_local_def_ir_value_match(match): return nameless_values[get_idx_from_ir_value_match(match)].ir_prefix == '%' # Return true if this kind of IR value is "global", basically if it matches '#{{.*}}'. def is_global_scope_ir_value_match(match): return nameless_values[get_idx_from_ir_value_match(match)].global_ir_prefix is not None # Return true if var clashes with the scripted FileCheck check_prefix. def may_clash_with_default_check_prefix_name(check_prefix, var): return check_prefix and re.match(r'^' + check_prefix + r'[0-9]+?$', var, re.IGNORECASE) # Create a FileCheck variable name based on an IR name. def get_value_name(var, check_prefix): var = var.replace('!', '') # This is a nameless value, prepend check_prefix. if var.isdigit(): var = check_prefix + var else: # This is a named value that clashes with the check_prefix, prepend with _prefix_filecheck_ir_name, # if it has been defined. if may_clash_with_default_check_prefix_name(check_prefix, var) and _prefix_filecheck_ir_name: var = _prefix_filecheck_ir_name + var var = var.replace('.', '_') var = var.replace('-', '_') return var.upper() # Create a FileCheck variable from regex. def get_value_definition(var, match): # for backwards compatibility we check locals with '.*' if is_local_def_ir_value_match(match): return '[[' + get_value_name(var, get_nameless_check_prefix_from_ir_value_match(match)) + ':' + \ get_ir_prefix_from_ir_value_match(match)[0] + get_ir_prefix_from_ir_value_re_match(match) + ']]' prefix = get_ir_prefix_from_ir_value_match(match)[0] return prefix + '[[' + get_value_name(var, get_nameless_check_prefix_from_ir_value_match(match)) + ':' + get_ir_prefix_from_ir_value_re_match(match) + ']]' # Use a FileCheck variable. def get_value_use(var, match, check_prefix): if is_local_def_ir_value_match(match): return '[[' + get_value_name(var, check_prefix) + ']]' prefix = get_ir_prefix_from_ir_value_match(match)[0] return prefix + '[[' + get_value_name(var, check_prefix) + ']]' # Replace IR value defs and uses with FileCheck variables. def generalize_check_lines(lines, is_analyze, vars_seen, global_vars_seen): # This gets called for each match that occurs in # a line. We transform variables we haven't seen # into defs, and variables we have seen into uses. def transform_line_vars(match): pre, check = get_ir_prefix_from_ir_value_match(match) var = get_name_from_ir_value_match(match) for nameless_value in nameless_values: if may_clash_with_default_check_prefix_name(nameless_value.check_prefix, var): warn("Change IR value name '%s' or use -prefix-ir-filecheck-name to prevent possible conflict" " with scripted FileCheck name." % (var,)) key = (var, get_check_key_from_ir_value_match(match)) is_local_def = is_local_def_ir_value_match(match) if is_local_def and key in vars_seen: rv = get_value_use(var, match, get_nameless_check_prefix_from_ir_value_match(match)) elif not is_local_def and key in global_vars_seen: rv = get_value_use(var, match, global_vars_seen[key]) else: if is_local_def: vars_seen.add(key) else: global_vars_seen[key] = get_nameless_check_prefix_from_ir_value_match(match) rv = get_value_definition(var, match) # re.sub replaces the entire regex match # with whatever you return, so we have # to make sure to hand it back everything # including the commas and spaces. return match.group(1) + rv + match.group(match.lastindex) lines_with_def = [] for i, line in enumerate(lines): # An IR variable named '%.' matches the FileCheck regex string. line = line.replace('%.', '%dot') for regex in _global_hex_value_regex: if re.match('^@' + regex + ' = ', line): line = re.sub(r'\bi([0-9]+) ([0-9]+)', lambda m : 'i' + m.group(1) + ' [[#' + hex(int(m.group(2))) + ']]', line) break # Ignore any comments, since the check lines will too. scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line) lines[i] = scrubbed_line if not is_analyze: # It can happen that two matches are back-to-back and for some reason sub # will not replace both of them. For now we work around this by # substituting until there is no more match. changed = True while changed: (lines[i], changed) = IR_VALUE_RE.subn(transform_line_vars, lines[i], count=1) return lines def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, is_asm, is_analyze, global_vars_seen_dict): # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well. prefix_exclusions = set() printed_prefixes = [] for p in prefix_list: checkprefixes = p[0] # If not all checkprefixes of this run line produced the function we cannot check for it as it does not # exist for this run line. A subset of the check prefixes might know about the function but only because # other run lines created it. if any(map(lambda checkprefix: func_name not in func_dict[checkprefix], checkprefixes)): prefix_exclusions |= set(checkprefixes) continue # prefix_exclusions is constructed, we can now emit the output for p in prefix_list: global_vars_seen = {} checkprefixes = p[0] for checkprefix in checkprefixes: if checkprefix in global_vars_seen_dict: global_vars_seen.update(global_vars_seen_dict[checkprefix]) else: global_vars_seen_dict[checkprefix] = {} if checkprefix in printed_prefixes: break # Check if the prefix is excluded. if checkprefix in prefix_exclusions: continue # If we do not have output for this prefix we skip it. if not func_dict[checkprefix][func_name]: continue # Add some space between different check prefixes, but not after the last # check line (before the test code). if is_asm: if len(printed_prefixes) != 0: output_lines.append(comment_marker) if checkprefix not in global_vars_seen_dict: global_vars_seen_dict[checkprefix] = {} global_vars_seen_before = [key for key in global_vars_seen.keys()] vars_seen = set() printed_prefixes.append(checkprefix) attrs = str(func_dict[checkprefix][func_name].attrs) attrs = '' if attrs == 'None' else attrs if attrs: output_lines.append('%s %s: Function Attrs: %s' % (comment_marker, checkprefix, attrs)) args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig) args_and_sig = generalize_check_lines([args_and_sig], is_analyze, vars_seen, global_vars_seen)[0] if '[[' in args_and_sig: output_lines.append(check_label_format % (checkprefix, func_name, '')) output_lines.append('%s %s-SAME: %s' % (comment_marker, checkprefix, args_and_sig)) else: output_lines.append(check_label_format % (checkprefix, func_name, args_and_sig)) func_body = str(func_dict[checkprefix][func_name]).splitlines() # For ASM output, just emit the check lines. if is_asm: output_lines.append('%s %s: %s' % (comment_marker, checkprefix, func_body[0])) for func_line in func_body[1:]: if func_line.strip() == '': output_lines.append('%s %s-EMPTY:' % (comment_marker, checkprefix)) else: output_lines.append('%s %s-NEXT: %s' % (comment_marker, checkprefix, func_line)) break # For IR output, change all defs to FileCheck variables, so we're immune # to variable naming fashions. func_body = generalize_check_lines(func_body, is_analyze, vars_seen, global_vars_seen) # This could be selectively enabled with an optional invocation argument. # Disabled for now: better to check everything. Be safe rather than sorry. # Handle the first line of the function body as a special case because # it's often just noise (a useless asm comment or entry label). #if func_body[0].startswith("#") or func_body[0].startswith("entry:"): # is_blank_line = True #else: # output_lines.append('%s %s: %s' % (comment_marker, checkprefix, func_body[0])) # is_blank_line = False is_blank_line = False for func_line in func_body: if func_line.strip() == '': is_blank_line = True continue # Do not waste time checking IR comments. func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line) # Skip blank lines instead of checking them. if is_blank_line: output_lines.append('{} {}: {}'.format( comment_marker, checkprefix, func_line)) else: output_lines.append('{} {}-NEXT: {}'.format( comment_marker, checkprefix, func_line)) is_blank_line = False # Add space between different check prefixes and also before the first # line of code in the test function. output_lines.append(comment_marker) # Remembe new global variables we have not seen before for key in global_vars_seen: if key not in global_vars_seen_before: global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] break def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, preserve_names, function_sig, global_vars_seen_dict): # Label format is based on IR string. function_def_regex = 'define {{[^@]+}}' if function_sig else '' check_label_format = '{} %s-LABEL: {}@%s%s'.format(comment_marker, function_def_regex) add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, False, preserve_names, global_vars_seen_dict) def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name): check_label_format = '{} %s-LABEL: \'%s%s\''.format(comment_marker) global_vars_seen_dict = {} add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, False, True, global_vars_seen_dict) def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes): for nameless_value in nameless_values: if nameless_value.global_ir_prefix is None: continue lhs_re_str = nameless_value.global_ir_prefix + nameless_value.global_ir_prefix_regexp rhs_re_str = nameless_value.global_ir_rhs_regexp global_ir_value_re_str = r'^' + lhs_re_str + r'\s=\s' + rhs_re_str + r'$' global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M)) lines = [] for m in global_ir_value_re.finditer(raw_tool_output): lines.append(m.group(0)) for prefix in prefixes: if glob_val_dict[prefix] is None: continue if nameless_value.check_prefix in glob_val_dict[prefix]: if lines == glob_val_dict[prefix][nameless_value.check_prefix]: continue if prefix == prefixes[-1]: warn('Found conflicting asm under the same prefix: %r!' % (prefix,)) else: glob_val_dict[prefix][nameless_value.check_prefix] = None continue glob_val_dict[prefix][nameless_value.check_prefix] = lines def add_global_checks(glob_val_dict, comment_marker, prefix_list, output_lines, global_vars_seen_dict, is_analyze, is_before_functions): printed_prefixes = set() for nameless_value in nameless_values: if nameless_value.global_ir_prefix is None: continue if nameless_value.is_before_functions != is_before_functions: continue for p in prefix_list: global_vars_seen = {} checkprefixes = p[0] if checkprefixes is None: continue for checkprefix in checkprefixes: if checkprefix in global_vars_seen_dict: global_vars_seen.update(global_vars_seen_dict[checkprefix]) else: global_vars_seen_dict[checkprefix] = {} if (checkprefix, nameless_value.check_prefix) in printed_prefixes: break if not glob_val_dict[checkprefix]: continue if nameless_value.check_prefix not in glob_val_dict[checkprefix]: continue if not glob_val_dict[checkprefix][nameless_value.check_prefix]: continue check_lines = [] global_vars_seen_before = [key for key in global_vars_seen.keys()] for line in glob_val_dict[checkprefix][nameless_value.check_prefix]: if _global_value_regex: matched = False for regex in _global_value_regex: if re.match('^@' + regex + ' = ', line): matched = True break if not matched: continue tmp = generalize_check_lines([line], is_analyze, set(), global_vars_seen) check_line = '%s %s: %s' % (comment_marker, checkprefix, tmp[0]) check_lines.append(check_line) if not check_lines: continue output_lines.append(comment_marker + SEPARATOR) for check_line in check_lines: output_lines.append(check_line) printed_prefixes.add((checkprefix, nameless_value.check_prefix)) # Remembe new global variables we have not seen before for key in global_vars_seen: if key not in global_vars_seen_before: global_vars_seen_dict[checkprefix][key] = global_vars_seen[key] break if printed_prefixes: output_lines.append(comment_marker + SEPARATOR) def check_prefix(prefix): if not PREFIX_RE.match(prefix): hint = "" if ',' in prefix: hint = " Did you mean '--check-prefixes=" + prefix + "'?" warn(("Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." + hint) % (prefix)) def verify_filecheck_prefixes(fc_cmd): fc_cmd_parts = fc_cmd.split() for part in fc_cmd_parts: if "check-prefix=" in part: prefix = part.split('=', 1)[1] check_prefix(prefix) elif "check-prefixes=" in part: prefixes = part.split('=', 1)[1].split(',') for prefix in prefixes: check_prefix(prefix) if prefixes.count(prefix) > 1: warn("Supplied prefix '%s' is not unique in the prefix list." % (prefix,)) def get_autogennote_suffix(parser, args): autogenerated_note_args = '' for action in parser._actions: if not hasattr(args, action.dest): continue # Ignore options such as --help that aren't included in args # Ignore parameters such as paths to the binary or the list of tests if action.dest in ('tests', 'update_only', 'opt_binary', 'llc_binary', 'clang', 'opt', 'llvm_bin', 'verbose'): continue value = getattr(args, action.dest) if action.const is not None: # action stores a constant (usually True/False) # Skip actions with different constant values (this happens with boolean # --foo/--no-foo options) if value != action.const: continue if parser.get_default(action.dest) == value: continue # Don't add default values autogenerated_note_args += action.option_strings[0] + ' ' if action.const is None: # action takes a parameter if action.nargs == '+': value = ' '.join(map(lambda v: '"' + v.strip('"') + '"', value)) autogenerated_note_args += '%s ' % value if autogenerated_note_args: autogenerated_note_args = ' %s %s' % (UTC_ARGS_KEY, autogenerated_note_args[:-1]) return autogenerated_note_args def check_for_command(line, parser, args, argv, argparse_callback): cmd_m = UTC_ARGS_CMD.match(line) if cmd_m: for option in cmd_m.group('cmd').strip().split(' '): if option: argv.append(option) args = parser.parse_args(filter(lambda arg: arg not in args.tests, argv)) if argparse_callback is not None: argparse_callback(args) return args, argv def find_arg_in_test(test_info, get_arg_to_check, arg_string, is_global): result = get_arg_to_check(test_info.args) if not result and is_global: # See if this has been specified via UTC_ARGS. This is a "global" option # that affects the entire generation of test checks. If it exists anywhere # in the test, apply it to everything. saw_line = False for line_info in test_info.ro_iterlines(): line = line_info.line if not line.startswith(';') and line.strip() != '': saw_line = True result = get_arg_to_check(line_info.args) if result: if warn and saw_line: # We saw the option after already reading some test input lines. # Warn about it. print('WARNING: Found {} in line following test start: '.format(arg_string) + line, file=sys.stderr) print('WARNING: Consider moving {} to top of file'.format(arg_string), file=sys.stderr) break return result def dump_input_lines(output_lines, test_info, prefix_set, comment_string): for input_line_info in test_info.iterlines(output_lines): line = input_line_info.line args = input_line_info.args if line.strip() == comment_string: continue if line.strip() == comment_string + SEPARATOR: continue if line.lstrip().startswith(comment_string): m = CHECK_RE.match(line) if m and m.group(1) in prefix_set: continue output_lines.append(line.rstrip('\n')) def add_checks_at_end(output_lines, prefix_list, func_order, comment_string, check_generator): added = set() for prefix in prefix_list: prefixes = prefix[0] tool_args = prefix[1] for prefix in prefixes: for func in func_order[prefix]: if added: output_lines.append(comment_string) added.add(func) # The add_*_checks routines expect a run list whose items are # tuples that have a list of prefixes as their first element and # tool command args string as their second element. They output # checks for each prefix in the list of prefixes. By doing so, it # implicitly assumes that for each function every run line will # generate something for that function. That is not the case for # generated functions as some run lines might not generate them # (e.g. -fopenmp vs. no -fopenmp). # # Therefore, pass just the prefix we're interested in. This has # the effect of generating all of the checks for functions of a # single prefix before moving on to the next prefix. So checks # are ordered by prefix instead of by function as in "normal" # mode. check_generator(output_lines, [([prefix], tool_args)], func)