[lit] Add the ability to parse regexes in Lit boolean expressions

This patch augments Lit with the ability to parse regular expressions in boolean expressions. This includes REQUIRES:, XFAIL:, UNSUPPORTED:, and all other special Lit markup that evaluates to a boolean expression. Regular expressions can be specified by enclosing them in {{...}}, similarly to how FileCheck handles such regular expressions. The regular expression can either be on its own, or it can be part of an identifier. For example, a match expression like {{.+}}-apple-darwin{{.+}} would match the following variables: x86_64-apple-darwin20.0 arm64-apple-darwin20.0 arm64-apple-darwin22.0 etc... In the long term, this could be used to remove the need to handle the target triple specially when parsing boolean expressions. Differential Revision: https://reviews.llvm.org/D104572
2025-01-31 12:41:49 +01:00 · 2021-06-18 13:33:14 -04:00 · 2021-06-18 13:33:14 -04:00 · 6766b6d75d
commit 6766b6d75d
parent 3ab40c9e92
5 changed files with 80 additions and 25 deletions
--- a/docs/TestingGuide.rst
+++ b/docs/TestingGuide.rst
@ -459,8 +459,12 @@ will be a failure if its execution succeeds.
 ``REQUIRES`` and ``UNSUPPORTED`` and ``XFAIL`` all accept a comma-separated
 list of boolean expressions. The values in each expression may be:

- Features added to ``config.available_features`` by
-  configuration files such as ``lit.cfg``.
+- Features added to ``config.available_features`` by configuration files such as ``lit.cfg``.
+  String comparison of features is case-sensitive. Furthermore, a boolean expression can
+  contain any Python regular expression enclosed in ``{{ }}``, in which case the boolean
+  expression is satisfied if any feature matches the regular expression. Regular
+  expressions can appear inside an identifier, so for example ``he{{l+}}o`` would match
+  ``helo``, ``hello``, ``helllo``, and so on.
 - Substrings of the target triple (``UNSUPPORTED`` and ``XFAIL`` only).

 | ``REQUIRES`` enables the test if all expressions are true.
--- a/utils/lit/lit/BooleanExpression.py
+++ b/utils/lit/lit/BooleanExpression.py
@ -4,18 +4,24 @@ class BooleanExpression:
    # A simple evaluator of boolean expressions.
    #
    # Grammar:
-    #   expr       :: or_expr
-    #   or_expr    :: and_expr ('||' and_expr)*
-    #   and_expr   :: not_expr ('&&' not_expr)*
-    #   not_expr   :: '!' not_expr
-    #                 '(' or_expr ')'
-    #                 identifier
-    #   identifier :: [-+=._a-zA-Z0-9]+
+    #   expr         :: or_expr
+    #   or_expr      :: and_expr ('||' and_expr)*
+    #   and_expr     :: not_expr ('&&' not_expr)*
+    #   not_expr     :: '!' not_expr
+    #                   '(' or_expr ')'
+    #                   match_expr
+    #   match_expr   :: braced_regex
+    #                   identifier
+    #                   braced_regex match_expr
+    #                   identifier match_expr
+    #   identifier   :: [-+=._a-zA-Z0-9]+
+    #   braced_regex :: '{{' python_regex '}}'

    # Evaluates `string` as a boolean expression.
    # Returns True or False. Throws a ValueError on syntax error.
    #
    # Variables in `variables` are true.
+    # Regexes that match any variable in `variables` are true.
    # Substrings of `triple` are true.
    # 'true' is true.
    # All other identifiers are false.
@ -41,7 +47,7 @@ class BooleanExpression:
    END = object()

    # Tokenization pattern.
-    Pattern = re.compile(r'\A\s*([()]|[-+=._a-zA-Z0-9]+|&&|\|\||!)\s*(.*)\Z')
+    Pattern = re.compile(r'\A\s*([()]|&&|\|\||!|(?:[-+=._a-zA-Z0-9]+|\{\{.+?\}\})+)\s*(.*)\Z')

    @staticmethod
    def tokenize(string):
@ -80,12 +86,24 @@ class BooleanExpression:
                             (self.quote(t), self.quote(self.token)))

    @staticmethod
-    def isIdentifier(token):
+    def isMatchExpression(token):
        if (token is BooleanExpression.END or token == '&&' or token == '||' or
            token == '!' or token == '(' or token == ')'):
            return False
        return True

+    def parseMATCH(self):
+        regex = ''
+        for part in filter(None, re.split(r'(\{\{.+?\}\})', self.token)):
+            if part.startswith('{{'):
+                assert part.endswith('}}')
+                regex += '(?:{})'.format(part[2:-2])
+            else:
+                regex += re.escape(part)
+        regex = re.compile(regex)
+        self.value = self.token in self.triple or any(regex.fullmatch(var) for var in self.variables)
+        self.token = next(self.tokens)
+
    def parseNOT(self):
        if self.accept('!'):
            self.parseNOT()
@ -93,13 +111,11 @@ class BooleanExpression:
        elif self.accept('('):
            self.parseOR()
            self.expect(')')
-        elif not BooleanExpression.isIdentifier(self.token):
-            raise ValueError("expected: '!' or '(' or identifier\nhave: %s" %
+        elif not BooleanExpression.isMatchExpression(self.token):
+            raise ValueError("expected: '!', '(', '{{', or identifier\nhave: %s" %
                             self.quote(self.token))
        else:
-            self.value = (self.token in self.variables or
-                          self.token in self.triple)
-            self.token = next(self.tokens)
+            self.parseMATCH()

    def parseAND(self):
        self.parseNOT()
@ -143,12 +159,20 @@ class TestBooleanExpression(unittest.TestCase):
        self.assertTrue(BooleanExpression.evaluate('under_score', variables))
        self.assertTrue(BooleanExpression.evaluate('e=quals', variables))
        self.assertTrue(BooleanExpression.evaluate('d1g1ts', variables))
+        self.assertTrue(BooleanExpression.evaluate('{{its.+}}', variables))
+        self.assertTrue(BooleanExpression.evaluate('{{false-[lo]+-true}}', variables))
+        self.assertTrue(BooleanExpression.evaluate('{{(true|false)-lol-(true|false)}}', variables))
+        self.assertTrue(BooleanExpression.evaluate('d1g{{[0-9]}}ts', variables))
+        self.assertTrue(BooleanExpression.evaluate('d1g{{[0-9]}}t{{[a-z]}}', variables))
+        self.assertTrue(BooleanExpression.evaluate('{{d}}1g{{[0-9]}}t{{[a-z]}}', variables))
+        self.assertTrue(BooleanExpression.evaluate('d1{{(g|1)+}}ts', variables))

        self.assertFalse(BooleanExpression.evaluate('false', variables))
        self.assertFalse(BooleanExpression.evaluate('True', variables))
        self.assertFalse(BooleanExpression.evaluate('true-ish', variables))
        self.assertFalse(BooleanExpression.evaluate('not_true', variables))
        self.assertFalse(BooleanExpression.evaluate('tru', variables))
+        self.assertFalse(BooleanExpression.evaluate('{{its-true.+}}', variables))

    def test_triple(self):
        triple = 'arch-vendor-os'
@ -159,6 +183,21 @@ class TestBooleanExpression(unittest.TestCase):
        self.assertTrue(BooleanExpression.evaluate('-os', {}, triple))
        self.assertFalse(BooleanExpression.evaluate('arch-os', {}, triple))

+        # When matching against the triple, a regex is treated as an identifier and checked
+        # for a literal match. This preserves existing behavior before regexes were introduced.
+        self.assertFalse(BooleanExpression.evaluate('arch-{{vendor}}-os', {}, triple))
+        self.assertTrue(BooleanExpression.evaluate('arch-{{vendor}}-os', {}, 'arch-{{vendor}}-os'))
+
+    def test_matching(self):
+        expr1 = 'linux && (target={{aarch64-.+}} || target={{x86_64-.+}})'
+        self.assertTrue(BooleanExpression.evaluate(expr1, {'linux', 'target=x86_64-unknown-linux-gnu'}))
+        self.assertFalse(BooleanExpression.evaluate(expr1, {'linux', 'target=i386-unknown-linux-gnu'}))
+
+        expr2 = 'use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions'
+        self.assertTrue(BooleanExpression.evaluate(expr2, {'use_system_cxx_lib', 'target=arm64-apple-macosx10.12'}))
+        self.assertFalse(BooleanExpression.evaluate(expr2, {'use_system_cxx_lib', 'target=arm64-apple-macosx10.12', 'no-exceptions'}))
+        self.assertFalse(BooleanExpression.evaluate(expr2, {'use_system_cxx_lib', 'target=arm64-apple-macosx10.15'}))
+
    def test_operators(self):
        self.assertTrue(BooleanExpression.evaluate('true || true', {}))
        self.assertTrue(BooleanExpression.evaluate('true || false', {}))
@ -206,17 +245,17 @@ class TestBooleanExpression(unittest.TestCase):
                            "in expression: 'true and true'")

        self.checkException("|| true",
-                            "expected: '!' or '(' or identifier\n" +
+                            "expected: '!', '(', '{{', or identifier\n" +
                            "have: '||'\n" +
                            "in expression: '|| true'")

        self.checkException("true &&",
-                            "expected: '!' or '(' or identifier\n" +
+                            "expected: '!', '(', '{{', or identifier\n" +
                            "have: <end of expression>\n" +
                            "in expression: 'true &&'")

        self.checkException("",
-                            "expected: '!' or '(' or identifier\n" +
+                            "expected: '!', '(', '{{', or identifier\n" +
                            "have: <end of expression>\n" +
                            "in expression: ''")

@ -244,9 +283,18 @@ class TestBooleanExpression(unittest.TestCase):
                            "in expression: 'true (true)'")

        self.checkException("( )",
-                            "expected: '!' or '(' or identifier\n" +
+                            "expected: '!', '(', '{{', or identifier\n" +
                            "have: ')'\n" +
                            "in expression: '( )'")

+        self.checkException("abc{{def",
+                            "couldn't parse text: '{{def'\n" +
+                            "in expression: 'abc{{def'")
+
+        self.checkException("{{}}",
+                            "couldn't parse text: '{{}}'\n" +
+                            "in expression: '{{}}'")
+
+
 if __name__ == '__main__':
    unittest.main()
--- a/utils/lit/lit/Test.py
+++ b/utils/lit/lit/Test.py
@ -408,5 +408,5 @@ class Test:
            BooleanExpression.tokenize(expr) for expr in
                boolean_expressions if expr != '*'
        )
-        identifiers = set(filter(BooleanExpression.isIdentifier, tokens))
-        return identifiers
+        matchExpressions = set(filter(BooleanExpression.isMatchExpression, tokens))
+        return matchExpressions
--- a/utils/lit/tests/Inputs/show-used-features/mixed.txt
+++ b/utils/lit/tests/Inputs/show-used-features/mixed.txt
@ -1,4 +1,4 @@

-// REQUIRES: my-require-feature-2 || my-require-feature-3
-// UNSUPPORTED: my-unsupported-feature-2, my-unsupported-feature-3
-// XFAIL: my-xfail-feature-2, my-xfail-feature-3
+// REQUIRES: my-require-feature-2 || my-require-feature-3, my-{{[require]*}}-feature-4
+// UNSUPPORTED: my-unsupported-feature-2, my-unsupported-feature-3 && !my-{{[unsupported]*}}-feature-4
+// XFAIL: my-xfail-feature-2, my-xfail-feature-3, my-{{[xfail]*}}-feature-4
--- a/utils/lit/tests/show-used-features.py
+++ b/utils/lit/tests/show-used-features.py
@ -4,3 +4,6 @@
 # CHECK: my-require-feature-1 my-require-feature-2 my-require-feature-3
 # CHECK: my-unsupported-feature-1 my-unsupported-feature-2 my-unsupported-feature-3
 # CHECK: my-xfail-feature-1 my-xfail-feature-2 my-xfail-feature-3
+# CHECK: {{my-[{][{]\[require\]\*[}][}]-feature-4}}
+# CHECK: {{my-[{][{]\[unsupported\]\*[}][}]-feature-4}}
+# CHECK: {{my-[{][{]\[xfail\]\*[}][}]-feature-4}}