diff --git a/CHANGELOG.md b/CHANGELOG.md index fdc2634..7f0a360 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,12 @@ This project uses [Semantic Versioning](https://semver.org). ### Added +- **`rules:if:` expression evaluator improvements** — four correctness fixes to the GitLab CI expression parser: + - **Multi-line expressions** — newlines (`\n`, `\r`) are now treated as whitespace between tokens, so block-scalar `if:` values (e.g. `if: | ...`) and folded YAML scalars with `||`/`&&` on a continuation line are parsed correctly instead of falling back to permissive `true`. + - **`${VAR}` curly-brace variable syntax** — `${CI_COMMIT_BRANCH}` is now equivalent to `$CI_COMMIT_BRANCH` everywhere a value is expected. + - **Regex flags** — `/pattern/i`, `/pattern/m`, `/pattern/s` are now honoured; the `i` flag (case-insensitive) is translated to Go's `(?i)` prefix before compiling. Unknown flags are silently ignored. + - **Variable as regex RHS** — `$BRANCH =~ $PATTERN` where `$PATTERN` holds a `/regex/[flags]` string is now evaluated by extracting and compiling the pattern from the variable's value; if the value is empty or does not look like a regex literal the expression falls back to permissive `true`. + - **File and line numbers on findings** — every finding now includes the source file and line where the job is defined, e.g. `[ERROR] job "deploy" (src/deploy.yml:14): …`. For jobs that come from local or fetched includes the file reflects the include source. Pipeline-level findings (workflow rules, missing stages) reference the root pipeline file. - **`glint graph includes` shows jobs per file** — each node in the Mermaid include dependency graph now shows the jobs defined directly in that file. Jobs are rendered as rounded nodes (`(name)`) in a distinct light-purple style, connected with dashed arrows (`-.->`) to distinguish ownership from the include hierarchy (solid `-->` arrows). The root pipeline file always shows its direct jobs; local and fetched project/component nodes show theirs when the file can be read. diff --git a/Taskfile.yml b/Taskfile.yml index fb0253c..d07f431 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -59,6 +59,12 @@ tasks: ignore_error: false - cmd: ./{{.BINARY}} check --tag v1.0.0 testdata/context_rules.yml ignore_error: false + - cmd: ./{{.BINARY}} check testdata/rules_if_expr.yml + ignore_error: false + - cmd: ./{{.BINARY}} check --branch main testdata/rules_if_expr.yml + ignore_error: false + - cmd: ./{{.BINARY}} check --branch feat/x testdata/rules_if_expr.yml + ignore_error: false - cmd: ./{{.BINARY}} check samba-testdata/.gitlab-ci.yml ignore_error: false - cmd: ./{{.BINARY}} check samba-testdata/.gitlab-ci-coverage.yml diff --git a/internal/cicontext/eval.go b/internal/cicontext/eval.go index 44d5523..0e1914d 100644 --- a/internal/cicontext/eval.go +++ b/internal/cicontext/eval.go @@ -9,12 +9,15 @@ import ( // variable resolver. // // Supported: -// - Variable references: $VAR_NAME +// - Variable references: $VAR_NAME or ${VAR_NAME} // - String literals: "value" or 'value' // - Null keyword: null // - Comparison: == != =~ !~ // - Boolean: && || ! // - Grouping: ( ) +// - Regex flags: /pattern/i (case-insensitive), /pattern/m, /pattern/s +// - Multi-line: newlines between tokens are treated as whitespace +// - Variable regex RHS: $VAR =~ $PATTERN when $PATTERN holds a /regex/ string // // Regex patterns use Go's regexp syntax, which covers the common RE2 subset // used by GitLab CI. Unsupported or unparseable expressions fall back to true @@ -56,8 +59,13 @@ func (p *exprParser) consume(tok string) bool { } func (p *exprParser) skipWS() { - for p.pos < len(p.s) && (p.s[p.pos] == ' ' || p.s[p.pos] == '\t') { - p.pos++ + for p.pos < len(p.s) { + b := p.s[p.pos] + if b == ' ' || b == '\t' || b == '\n' || b == '\r' { + p.pos++ + continue + } + break } } @@ -67,11 +75,11 @@ func (p *exprParser) skipWS() { // and_expr → not_expr ( '&&' not_expr )* // not_expr → '!' not_expr | primary // primary → '(' or_expr ')' | comparison -// comparison → value ( op value | regex_op regex )? | value -// value → '$' ident | '"' … '"' | "'" … "'" | 'null' +// comparison → value ( op value | regex_op regex_rhs )? +// value → '$' '{' ident '}' | '$' ident | '"' … '"' | "'" … "'" | 'null' // op → '==' | '!=' // regex_op → '=~' | '!~' -// regex → '/' … '/' +// regex_rhs → '/' … '/' flags? | '$' ident (where ident value is '/…/flags') func (p *exprParser) parseOr() (bool, bool) { left, ok := p.parseAnd() @@ -165,8 +173,11 @@ func (p *exprParser) parseComparison() (bool, bool) { case p.consume("=~"): p.skipWS() - pat, ok := p.parseRegexLiteral() - if !ok { + pat, patOk, permissive := p.parseRegexRHS() + if permissive { + return true, true + } + if !patOk { return false, false } re, err := regexp.Compile(pat) @@ -177,8 +188,11 @@ func (p *exprParser) parseComparison() (bool, bool) { case p.consume("!~"): p.skipWS() - pat, ok := p.parseRegexLiteral() - if !ok { + pat, patOk, permissive := p.parseRegexRHS() + if permissive { + return true, true + } + if !patOk { return false, false } re, err := regexp.Compile(pat) @@ -192,13 +206,47 @@ func (p *exprParser) parseComparison() (bool, bool) { return leftStr != "", true } -// parseValue reads $VAR, "string", 'string', or null. +// parseRegexRHS parses the right-hand side of =~ / !~ operators. +// Returns (pattern, ok, permissive): +// - /regex/flags literal → (pattern, true, false) +// - $VAR whose value is /regex/flags → (pattern, true, false) +// - $VAR whose value is empty or not a /regex/ → ("", false, true) — caller uses permissive true +// - parse error → ("", false, false) +func (p *exprParser) parseRegexRHS() (pat string, ok bool, permissive bool) { + if p.peek() == '/' { + pat, ok = p.parseRegexLiteral() + return pat, ok, false + } + if p.peek() == '$' { + varVal, varOk := p.parseValue() + if !varOk { + return "", false, false + } + pat, ok = extractRegexFromString(varVal) + if !ok { + return "", false, true // variable is not a /regex/ value → permissive + } + return pat, true, false + } + return "", false, false +} + +// parseValue reads $VAR, ${VAR}, "string", 'string', or null. // null and undefined variables both produce an empty string. func (p *exprParser) parseValue() (string, bool) { p.skipWS() if p.peek() == '$' { p.pos++ // consume '$' + if p.peek() == '{' { + p.pos++ // consume '{' + name := p.parseIdent() + if name == "" || p.peek() != '}' { + return "", false + } + p.pos++ // consume '}' + return p.vars(name), true + } name := p.parseIdent() if name == "" { return "", false @@ -261,7 +309,8 @@ func (p *exprParser) parseRegexLiteral() (string, bool) { b := p.s[p.pos] if b == '/' { p.pos++ // consume closing '/' - return sb.String(), true + flags := p.parseRegexFlags() + return applyRegexFlags(flags, sb.String()), true } if b == '\\' && p.pos+1 < len(p.s) { p.pos++ @@ -275,6 +324,68 @@ func (p *exprParser) parseRegexLiteral() (string, bool) { return "", false // unterminated regex } +// parseRegexFlags reads zero or more regex flag letters (i, m, s) after the +// closing '/'. Unknown letters are consumed but ignored. +func (p *exprParser) parseRegexFlags() string { + start := p.pos + for p.pos < len(p.s) && isIdentByte(p.s[p.pos]) { + p.pos++ + } + return p.s[start:p.pos] +} + +// applyRegexFlags prepends Go regexp flag groups to pattern (e.g. (?i) for 'i'). +// Unknown flags are silently ignored. +func applyRegexFlags(flags, pattern string) string { + if flags == "" { + return pattern + } + var prefix strings.Builder + for _, f := range flags { + switch f { + case 'i': + prefix.WriteString("(?i)") + case 'm': + prefix.WriteString("(?m)") + case 's': + prefix.WriteString("(?s)") + } + } + return prefix.String() + pattern +} + +// extractRegexFromString parses a /pattern/flags string (typically from a CI +// variable) and returns a Go regexp pattern with flags applied. +func extractRegexFromString(s string) (string, bool) { + s = strings.TrimSpace(s) + if len(s) == 0 || s[0] != '/' { + return "", false + } + var sb strings.Builder + i := 1 + for i < len(s) { + b := s[i] + if b == '/' { + i++ // past closing '/' + var flags strings.Builder + for i < len(s) && isIdentByte(s[i]) { + flags.WriteByte(s[i]) + i++ + } + return applyRegexFlags(flags.String(), sb.String()), true + } + if b == '\\' && i+1 < len(s) { + i++ + sb.WriteByte('\\') + sb.WriteByte(s[i]) + } else { + sb.WriteByte(b) + } + i++ + } + return "", false // unterminated +} + func isIdentByte(b byte) bool { return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || (b >= '0' && b <= '9') || b == '_' } diff --git a/internal/cicontext/eval_test.go b/internal/cicontext/eval_test.go index 81f40d8..f6e7d1d 100644 --- a/internal/cicontext/eval_test.go +++ b/internal/cicontext/eval_test.go @@ -5,10 +5,14 @@ import "testing" func TestEvalIf(t *testing.T) { vars := func(key string) string { m := map[string]string{ - "CI_COMMIT_BRANCH": "develop", - "CI_COMMIT_TAG": "", + "CI_COMMIT_BRANCH": "develop", + "CI_COMMIT_TAG": "", "CI_PIPELINE_SOURCE": "push", - "DEPLOY_ENV": "staging", + "DEPLOY_ENV": "staging", + "BRANCH_PATTERN": "/^dev/", + "BRANCH_PATTERN_CI": "/^DEV/i", + "EMPTY_PATTERN": "", + "PLAIN_PATTERN": "develop", } return m[key] } @@ -69,6 +73,36 @@ func TestEvalIf(t *testing.T) { {"extra spaces", ` $CI_COMMIT_BRANCH == "develop" `, true}, {"tabs", "$CI_COMMIT_BRANCH\t==\t\"develop\"", true}, + // ── Multi-line expressions (newlines between tokens) ────────────────── + {"multiline or true", "$CI_COMMIT_BRANCH == \"develop\" ||\n$CI_COMMIT_TAG != null", true}, + {"multiline or false", "$CI_COMMIT_BRANCH == \"main\" ||\n$CI_COMMIT_TAG != null", false}, + {"multiline and true", "$CI_COMMIT_BRANCH == \"develop\" &&\n$CI_PIPELINE_SOURCE == \"push\"", true}, + {"multiline and false", "$CI_COMMIT_BRANCH == \"main\" &&\n$CI_PIPELINE_SOURCE == \"push\"", false}, + {"multiline with crlf", "$CI_COMMIT_BRANCH == \"develop\" ||\r\n$CI_COMMIT_TAG != null", true}, + + // ── ${VAR} curly-brace syntax ───────────────────────────────────────── + {"curly var eq match", `${CI_COMMIT_BRANCH} == "develop"`, true}, + {"curly var eq no match", `${CI_COMMIT_BRANCH} == "main"`, false}, + {"curly var truthiness", `${CI_COMMIT_BRANCH}`, true}, + {"curly var falsy", `${CI_COMMIT_TAG}`, false}, + {"curly var neq null", `${CI_COMMIT_BRANCH} != null`, true}, + {"curly mixed", `${CI_COMMIT_BRANCH} == "develop" && $CI_PIPELINE_SOURCE == "push"`, true}, + + // ── Regex flags (/pattern/i etc.) ───────────────────────────────────── + {"regex flag i match", `$CI_COMMIT_BRANCH =~ /^DEV/i`, true}, + {"regex flag i no match", `$CI_COMMIT_BRANCH =~ /^MAIN/i`, false}, + {"regex flag i not match", `$CI_COMMIT_BRANCH !~ /^MAIN/i`, true}, + {"regex no flag case sensitive", `$CI_COMMIT_BRANCH =~ /^DEV/`, false}, + {"regex flag i version tag", `$CI_PIPELINE_SOURCE =~ /^PUSH$/i`, true}, + + // ── Variable on right side of =~ ────────────────────────────────────── + {"var regex rhs match", `$CI_COMMIT_BRANCH =~ $BRANCH_PATTERN`, true}, + {"var regex rhs no match", `$CI_PIPELINE_SOURCE =~ $BRANCH_PATTERN`, false}, + {"var regex rhs ci flag match", `$CI_COMMIT_BRANCH =~ $BRANCH_PATTERN_CI`, true}, + {"var regex rhs empty permissive", `$CI_COMMIT_BRANCH =~ $EMPTY_PATTERN`, true}, + {"var regex rhs plain permissive", `$CI_COMMIT_BRANCH =~ $PLAIN_PATTERN`, true}, + {"var regex rhs not match", `$CI_COMMIT_BRANCH !~ $BRANCH_PATTERN`, false}, + // ── Permissive fallback ─────────────────────────────────────────────── {"unparseable returns true", `this is not valid syntax %%%`, true}, {"empty expr returns true", ``, true}, diff --git a/testdata/rules_if_expr.yml b/testdata/rules_if_expr.yml new file mode 100644 index 0000000..661d4c1 --- /dev/null +++ b/testdata/rules_if_expr.yml @@ -0,0 +1,51 @@ +--- +# rules_if_expr.yml +# Exercises the rules:if: expression evaluator for: +# - Multi-line block-scalar expressions (|| on next line) +# - ${VAR} curly-brace variable syntax +# - Regex flags (/pattern/i case-insensitive) +# - Parenthesised compound expressions +# Expected: exits 0 (lints clean with no context). + +stages: + - build + - deploy + +variables: + DEPLOY_ENVIRONMENTS: + value: "staging" + description: "Target deployment environment" + +build: + stage: build + script: make build + rules: + # Multi-line expression: || on next line + - if: | + $CI_COMMIT_BRANCH == "main" || + $CI_COMMIT_BRANCH == "develop" + when: on_success + - when: never + +deploy-feature: + stage: deploy + script: make deploy + rules: + # ${VAR} curly-brace syntax + - if: '${CI_COMMIT_BRANCH} != null && ${CI_COMMIT_TAG} == null' + when: manual + - when: never + +release: + stage: deploy + script: make release + rules: + # Case-insensitive regex flag + - if: '$CI_COMMIT_BRANCH =~ /^(main|master)$/i' + when: on_success + # Parenthesised compound with multi-line + - if: >- + ($CI_COMMIT_TAG != null) && + ($CI_PIPELINE_SOURCE == "push" || $CI_PIPELINE_SOURCE == "web") + when: on_success + - when: never