parser: use non-capturing groups (#40373)

This commit is contained in:
Harmen Stoppels 2023-10-09 07:18:27 +02:00 committed by GitHub
parent 3a18fe04cc
commit 08da9a854a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -73,10 +73,10 @@
#: Valid name for specs and variants. Here we are not using #: Valid name for specs and variants. Here we are not using
#: the previous "w[\w.-]*" since that would match most #: the previous "w[\w.-]*" since that would match most
#: characters that can be part of a word in any language #: characters that can be part of a word in any language
IDENTIFIER = r"([a-zA-Z_0-9][a-zA-Z_0-9\-]*)" IDENTIFIER = r"(?:[a-zA-Z_0-9][a-zA-Z_0-9\-]*)"
DOTTED_IDENTIFIER = rf"({IDENTIFIER}(\.{IDENTIFIER})+)" DOTTED_IDENTIFIER = rf"(?:{IDENTIFIER}(?:\.{IDENTIFIER})+)"
GIT_HASH = r"([A-Fa-f0-9]{40})" GIT_HASH = r"(?:[A-Fa-f0-9]{40})"
GIT_VERSION = rf"((git\.({DOTTED_IDENTIFIER}|{IDENTIFIER}))|({GIT_HASH}))" GIT_VERSION = rf"(?:(?:git\.(?:{DOTTED_IDENTIFIER}|{IDENTIFIER}))|(?:{GIT_HASH}))"
NAME = r"[a-zA-Z_0-9][a-zA-Z_0-9\-.]*" NAME = r"[a-zA-Z_0-9][a-zA-Z_0-9\-.]*"
@ -85,15 +85,15 @@
#: A filename starts either with a "." or a "/" or a "{name}/, #: A filename starts either with a "." or a "/" or a "{name}/,
# or on Windows, a drive letter followed by a colon and "\" # or on Windows, a drive letter followed by a colon and "\"
# or "." or {name}\ # or "." or {name}\
WINDOWS_FILENAME = r"(\.|[a-zA-Z0-9-_]*\\|[a-zA-Z]:\\)([a-zA-Z0-9-_\.\\]*)(\.json|\.yaml)" WINDOWS_FILENAME = r"(?:\.|[a-zA-Z0-9-_]*\\|[a-zA-Z]:\\)(?:[a-zA-Z0-9-_\.\\]*)(?:\.json|\.yaml)"
UNIX_FILENAME = r"(\.|\/|[a-zA-Z0-9-_]*\/)([a-zA-Z0-9-_\.\/]*)(\.json|\.yaml)" UNIX_FILENAME = r"(?:\.|\/|[a-zA-Z0-9-_]*\/)(?:[a-zA-Z0-9-_\.\/]*)(?:\.json|\.yaml)"
if not IS_WINDOWS: if not IS_WINDOWS:
FILENAME = UNIX_FILENAME FILENAME = UNIX_FILENAME
else: else:
FILENAME = WINDOWS_FILENAME FILENAME = WINDOWS_FILENAME
VALUE = r"([a-zA-Z_0-9\-+\*.,:=\~\/\\]+)" VALUE = r"(?:[a-zA-Z_0-9\-+\*.,:=\~\/\\]+)"
QUOTED_VALUE = r"[\"']+([a-zA-Z_0-9\-+\*.,:=\~\/\\\s]+)[\"']+" QUOTED_VALUE = r"[\"']+(?:[a-zA-Z_0-9\-+\*.,:=\~\/\\\s]+)[\"']+"
VERSION = r"=?([a-zA-Z0-9_][a-zA-Z_0-9\-\.]*\b)" VERSION = r"=?([a-zA-Z0-9_][a-zA-Z_0-9\-\.]*\b)"
VERSION_RANGE = rf"({VERSION}\s*:\s*{VERSION}(?!\s*=)|:\s*{VERSION}(?!\s*=)|{VERSION}\s*:|:)" VERSION_RANGE = rf"({VERSION}\s*:\s*{VERSION}(?!\s*=)|:\s*{VERSION}(?!\s*=)|{VERSION}\s*:|:)"
@ -125,34 +125,34 @@ class TokenType(TokenBase):
""" """
# Dependency # Dependency
DEPENDENCY = r"(\^)" DEPENDENCY = r"(?:\^)"
# Version # Version
VERSION_HASH_PAIR = rf"(@({GIT_VERSION})=({VERSION}))" VERSION_HASH_PAIR = rf"(?:@(?:{GIT_VERSION})=(?:{VERSION}))"
VERSION = rf"(@\s*({VERSION_LIST}))" VERSION = rf"(?:@\s*(?:{VERSION_LIST}))"
# Variants # Variants
PROPAGATED_BOOL_VARIANT = rf"((\+\+|~~|--)\s*{NAME})" PROPAGATED_BOOL_VARIANT = rf"(?:(?:\+\+|~~|--)\s*{NAME})"
BOOL_VARIANT = rf"([~+-]\s*{NAME})" BOOL_VARIANT = rf"(?:[~+-]\s*{NAME})"
PROPAGATED_KEY_VALUE_PAIR = rf"({NAME}\s*==\s*({VALUE}|{QUOTED_VALUE}))" PROPAGATED_KEY_VALUE_PAIR = rf"(?:{NAME}\s*==\s*(?:{VALUE}|{QUOTED_VALUE}))"
KEY_VALUE_PAIR = rf"({NAME}\s*=\s*({VALUE}|{QUOTED_VALUE}))" KEY_VALUE_PAIR = rf"(?:{NAME}\s*=\s*(?:{VALUE}|{QUOTED_VALUE}))"
# Compilers # Compilers
COMPILER_AND_VERSION = rf"(%\s*({NAME})([\s]*)@\s*({VERSION_LIST}))" COMPILER_AND_VERSION = rf"(?:%\s*(?:{NAME})(?:[\s]*)@\s*(?:{VERSION_LIST}))"
COMPILER = rf"(%\s*({NAME}))" COMPILER = rf"(?:%\s*(?:{NAME}))"
# FILENAME # FILENAME
FILENAME = rf"({FILENAME})" FILENAME = rf"(?:{FILENAME})"
# Package name # Package name
FULLY_QUALIFIED_PACKAGE_NAME = rf"({DOTTED_IDENTIFIER})" FULLY_QUALIFIED_PACKAGE_NAME = rf"(?:{DOTTED_IDENTIFIER})"
UNQUALIFIED_PACKAGE_NAME = rf"({IDENTIFIER})" UNQUALIFIED_PACKAGE_NAME = rf"(?:{IDENTIFIER})"
# DAG hash # DAG hash
DAG_HASH = rf"(/({HASH}))" DAG_HASH = rf"(?:/(?:{HASH}))"
# White spaces # White spaces
WS = r"(\s+)" WS = r"(?:\s+)"
class ErrorTokenType(TokenBase): class ErrorTokenType(TokenBase):
"""Enum with regexes for error analysis""" """Enum with regexes for error analysis"""
# Unexpected character # Unexpected character
UNEXPECTED = r"(.[\s]*)" UNEXPECTED = r"(?:.[\s]*)"
class Token: class Token: