diff --git a/actions/ql/lib/change-notes/2025-06-09-bash-parsing-performance.md b/actions/ql/lib/change-notes/2025-06-09-bash-parsing-performance.md new file mode 100644 index 000000000000..5ee29557c85e --- /dev/null +++ b/actions/ql/lib/change-notes/2025-06-09-bash-parsing-performance.md @@ -0,0 +1,6 @@ +--- +category: minorAnalysis +--- +* Fixed performance issues in the parsing of Bash scripts in workflow files, + which led to out-of-disk errors when analysing certain workflow files with + complex interpolations of shell commands or quoted strings. \ No newline at end of file diff --git a/actions/ql/lib/codeql/actions/Bash.qll b/actions/ql/lib/codeql/actions/Bash.qll index 4519a8949d7c..4975ce6f4cc5 100644 --- a/actions/ql/lib/codeql/actions/Bash.qll +++ b/actions/ql/lib/codeql/actions/Bash.qll @@ -8,35 +8,64 @@ class BashShellScript extends ShellScript { ) } - private string lineProducer(int i) { - result = this.getRawScript().regexpReplaceAll("\\\\\\s*\n", "").splitAt("\n", i) - } - - private predicate cmdSubstitutionReplacement(string cmdSubs, string id, int k) { - exists(string line | line = this.lineProducer(k) | - exists(int i, int j | - cmdSubs = - // $() cmd substitution - line.regexpFind("\\$\\((?:[^()]+|\\((?:[^()]+|\\([^()]*\\))*\\))*\\)", i, j) - .regexpReplaceAll("^\\$\\(", "") - .regexpReplaceAll("\\)$", "") and - id = "cmdsubs:" + k + ":" + i + ":" + j - ) - or - exists(int i, int j | - // `...` cmd substitution - cmdSubs = - line.regexpFind("\\`[^\\`]+\\`", i, j) - .regexpReplaceAll("^\\`", "") - .regexpReplaceAll("\\`$", "") and - id = "cmd:" + k + ":" + i + ":" + j - ) + /** + * Gets the line at 0-based index `lineIndex` within this shell script, + * assuming newlines as separators. + */ + private string lineProducer(int lineIndex) { + result = this.getRawScript().regexpReplaceAll("\\\\\\s*\n", "").splitAt("\n", lineIndex) + } + + private predicate cmdSubstitutionReplacement(string command, string id, int lineIndex) { + this.commandInSubstitution(lineIndex, command, id) + or + this.commandInBackticks(lineIndex, command, id) + } + + /** + * Holds if there is a command substitution `$(command)` in + * the line at `lineIndex` in the shell script, + * and `id` is a unique identifier for this command. + */ + private predicate commandInSubstitution(int lineIndex, string command, string id) { + exists(int occurrenceIndex, int occurrenceOffset | + command = + // Look for the command inside a $(...) command substitution + this.lineProducer(lineIndex) + .regexpFind("\\$\\((?:[^()]+|\\((?:[^()]+|\\([^()]*\\))*\\))*\\)", occurrenceIndex, + occurrenceOffset) + // trim starting $( - TODO do this in first regex + .regexpReplaceAll("^\\$\\(", "") + // trim ending ) - TODO do this in first regex + .regexpReplaceAll("\\)$", "") and + id = "cmdsubs:" + lineIndex + ":" + occurrenceIndex + ":" + occurrenceOffset ) } - private predicate rankedCmdSubstitutionReplacements(int i, string old, string new) { - old = rank[i](string old2 | this.cmdSubstitutionReplacement(old2, _, _) | old2) and - this.cmdSubstitutionReplacement(old, new, _) + /** + * Holds if `command` is a command in backticks `` `...` `` in + * the line at `lineIndex` in the shell script, + * and `id` is a unique identifier for this command. + */ + private predicate commandInBackticks(int lineIndex, string command, string id) { + exists(int occurrenceIndex, int occurrenceOffset | + command = + this.lineProducer(lineIndex) + .regexpFind("\\`[^\\`]+\\`", occurrenceIndex, occurrenceOffset) + // trim leading backtick - TODO do this in first regex + .regexpReplaceAll("^\\`", "") + // trim trailing backtick - TODO do this in first regex + .regexpReplaceAll("\\`$", "") and + id = "cmd:" + lineIndex + ":" + occurrenceIndex + ":" + occurrenceOffset + ) + } + + private predicate rankedCmdSubstitutionReplacements(int i, string command, string commandId) { + // rank commands by their unique IDs + commandId = rank[i](string c, string id | this.cmdSubstitutionReplacement(c, id, _) | id) and + // since we cannot output (command, ID) tuples from the rank operation, + // we need to work out the specific command associated with the resulting ID + this.cmdSubstitutionReplacement(command, commandId, _) } private predicate doReplaceCmdSubstitutions(int line, int round, string old, string new) { @@ -64,31 +93,56 @@ class BashShellScript extends ShellScript { this.cmdSubstitutionReplacement(result, _, i) } + /** + * Holds if `quotedStr` is a string in double quotes in + * the line at `lineIndex` in the shell script, + * and `id` is a unique identifier for this quoted string. + */ + private predicate doubleQuotedString(int lineIndex, string quotedStr, string id) { + exists(int occurrenceIndex, int occurrenceOffset | + // double quoted string + quotedStr = + this.cmdSubstitutedLineProducer(lineIndex) + .regexpFind("\"((?:[^\"\\\\]|\\\\.)*)\"", occurrenceIndex, occurrenceOffset) and + id = + "qstr:" + lineIndex + ":" + occurrenceIndex + ":" + occurrenceOffset + ":" + + quotedStr.length() + ":" + quotedStr.regexpReplaceAll("[^a-zA-Z0-9]", "") + ) + } + + /** + * Holds if `quotedStr` is a string in single quotes in + * the line at `lineIndex` in the shell script, + * and `id` is a unique identifier for this quoted string. + */ + private predicate singleQuotedString(int lineIndex, string quotedStr, string id) { + exists(int occurrenceIndex, int occurrenceOffset | + // single quoted string + quotedStr = + this.cmdSubstitutedLineProducer(lineIndex) + .regexpFind("'((?:\\\\.|[^'\\\\])*)'", occurrenceIndex, occurrenceOffset) and + id = + "qstr:" + lineIndex + ":" + occurrenceIndex + ":" + occurrenceOffset + ":" + + quotedStr.length() + ":" + quotedStr.regexpReplaceAll("[^a-zA-Z0-9]", "") + ) + } + private predicate quotedStringReplacement(string quotedStr, string id) { - exists(string line, int k | line = this.cmdSubstitutedLineProducer(k) | - exists(int i, int j | - // double quoted string - quotedStr = line.regexpFind("\"((?:[^\"\\\\]|\\\\.)*)\"", i, j) and - id = - "qstr:" + k + ":" + i + ":" + j + ":" + quotedStr.length() + ":" + - quotedStr.regexpReplaceAll("[^a-zA-Z0-9]", "") - ) + exists(int lineIndex | + this.doubleQuotedString(lineIndex, quotedStr, id) or - exists(int i, int j | - // single quoted string - quotedStr = line.regexpFind("'((?:\\\\.|[^'\\\\])*)'", i, j) and - id = - "qstr:" + k + ":" + i + ":" + j + ":" + quotedStr.length() + ":" + - quotedStr.regexpReplaceAll("[^a-zA-Z0-9]", "") - ) + this.singleQuotedString(lineIndex, quotedStr, id) ) and // Only do this for strings that might otherwise disrupt subsequent parsing quotedStr.regexpMatch("[\"'].*[$\n\r'\"" + Bash::separator() + "].*[\"']") } - private predicate rankedQuotedStringReplacements(int i, string old, string new) { - old = rank[i](string old2 | this.quotedStringReplacement(old2, _) | old2) and - this.quotedStringReplacement(old, new) + private predicate rankedQuotedStringReplacements(int i, string quotedString, string quotedStringId) { + // rank quoted strings by their nearly-unique IDs + quotedStringId = rank[i](string s, string id | this.quotedStringReplacement(s, id) | id) and + // since we cannot output (string, ID) tuples from the rank operation, + // we need to work out the specific string associated with the resulting ID + this.quotedStringReplacement(quotedString, quotedStringId) } private predicate doReplaceQuotedStrings(int line, int round, string old, string new) { diff --git a/actions/ql/test/query-tests/Security/CWE-094/.github/workflows/interpolation.yml b/actions/ql/test/query-tests/Security/CWE-094/.github/workflows/interpolation.yml new file mode 100644 index 000000000000..2b719a3a38ab --- /dev/null +++ b/actions/ql/test/query-tests/Security/CWE-094/.github/workflows/interpolation.yml @@ -0,0 +1,81 @@ +name: Workflow with complex interpolation +on: + workflow_dispatch: + inputs: + choice-a: + required: true + type: choice + description: choice-a + default: a1 + options: + - a1 + - a2 + - a3 + string-b: + required: false + type: string + description: string-b + string-c: + required: false + type: string + description: string-c + list-d: + required: true + type: string + default: d1 d2 + description: list-d whitespace separated + list-e: + required: false + type: string + description: list-e whitespace separated + choice-f: + required: true + type: choice + description: choice-f + options: + - false + - true + +env: + DRY_TEST: false + B: ${{ github.event.inputs.string-b }} + +jobs: + job: + runs-on: ubuntu-latest + steps: + - name: Produce values + id: produce-values + run: | + echo "region=region" >> $GITHUB_OUTPUT + echo "zone=zone" >> $GITHUB_OUTPUT + + - name: Step with complex interpolation + id: complex + env: + CHOICE_A: ${{ github.event.inputs.choice-a }} + STRING_B: ${{ github.event.inputs.string-b }} + STRING_C: ${{ github.event.inputs.string-c }} + LIST_D: ${{ github.event.inputs.list-d }} + LIST_E: ${{ github.event.inputs.list-e }} + CHOICE_F: ${{ github.event.inputs.choice-f }} + REGION: ${{ steps.produce-values.outputs.region }} + ZONE: ${{ steps.produce-values.outputs.zone }} + DRY_TEST_JSON: ${{ fromJSON(env.DRY_TEST) }} + FUNCTION_NAME: my-function + USER_EMAIL: 'example@example.com' + TYPE: type + RANGE: '0-100' + + run: | + comma_separated_list_d=$(echo "${LIST_D}" | sed "s/ /\",\"/g") + comma_separated_list_e=$(echo "${LIST_E}" | sed "s/ /\",\"/g") + c1=$(echo "${STRING_C}" | cut -d "-" -f 1) + c2=$(echo "${STRING_C}" | cut -d "-" -f 2) + # Similar commands that use JSON payloads with string interpolation. + response=$(aws lambda invoke --invocation-type RequestResponse --function-name "${FUNCTION_NAME}" --region "${REGION}" --cli-read-timeout 0 --cli-binary-format raw-in-base64-out --payload '{"appName":"my-app","chA":"'"${CHOICE_A}"'","c1":"'"${c1}"'","c2":"'"${c2}"'","a":"${CHOICE_A}","bValue":"${B}","zone":"${ZONE}","userEmail":"'"${USER_EMAIL}"'","region":"${REGION}","range":"${RANGE}","type":"${TYPE}","b":"${STRING_B}","listD":"","listE":"","dryTest":'"${DRY_TEST_JSON}"',"f":"${CHOICE_F}"}' ./config.json --log-type Tail) + response=$(aws lambda invoke --invocation-type RequestResponse --function-name "${FUNCTION_NAME}" --region "${REGION}" --cli-read-timeout 0 --cli-binary-format raw-in-base64-out --payload '{"appName":"my-app","chA":"'"${CHOICE_A}"'","c1":"'"${c1}"'","c2":"'"${c2}"'","a":"${CHOICE_A}","bValue":"${B}","zone":"${ZONE}","userEmail":"'"${USER_EMAIL}"'","region":"${REGION}","range":"${RANGE}","type":"${TYPE}","b":"${STRING_B}","listD":["'"${comma_separated_list_d}"'"],"listE":"","dryTest":'"${DRY_TEST_JSON}"',"f":"${CHOICE_F}"}' ./config.json --log-type Tail) + response=$(aws lambda invoke --invocation-type RequestResponse --function-name "${FUNCTION_NAME}" --region "${REGION}" --cli-read-timeout 0 --cli-binary-format raw-in-base64-out --payload '{"appName":"my-app","chA":"'"${CHOICE_A}"'","c1":"'"${c1}"'","c2":"'"${c2}"'","a":"${CHOICE_A}","bValue":"${B}","zone":"${ZONE}","userEmail":"'"${USER_EMAIL}"'","region":"${REGION}","range":"${RANGE}","type":"${TYPE}","b":"${STRING_B}","listD":["'"${comma_separated_list_d}"'"],"listE":"","dryTest":'"${DRY_TEST_JSON}"',"f":"${CHOICE_F}"}' ./config.json --log-type Tail) + response=$(aws lambda invoke --invocation-type RequestResponse --function-name "${FUNCTION_NAME}" --region "${REGION}" --cli-read-timeout 0 --cli-binary-format raw-in-base64-out --payload '{"appName":"my-app","chA":"'"${CHOICE_A}"'","c1":"'"${c1}"'","c2":"'"${c2}"'","a":"${CHOICE_A}","bValue":"${B}","zone":"${ZONE}","userEmail":"'"${USER_EMAIL}"'","region":"${REGION}","range":"${RANGE}","type":"${TYPE}","b":"${STRING_B}","listD":["'"${comma_separated_list_d}"'"],"listE":"","dryTest":'"${DRY_TEST_JSON}"',"f":"${CHOICE_F}"}' ./config.json --log-type Tail) + response=$(aws lambda invoke --invocation-type RequestResponse --function-name "${FUNCTION_NAME}" --region "${REGION}" --cli-read-timeout 0 --cli-binary-format raw-in-base64-out --payload '{"appName":"my-app","chA":"'"${CHOICE_A}"'","c1":"'"${c1}"'","c2":"'"${c2}"'","a":"${CHOICE_A}","bValue":"${B}","zone":"${ZONE}","userEmail":"'"${USER_EMAIL}"'","region":"${REGION}","range":"${RANGE}","type":"${TYPE}","b":"${STRING_B}","listD":"","listE":["'"${comma_separated_list_e}"'"],"dryTest":'"${DRY_TEST_JSON}"',"f":"${CHOICE_F}"}' ./config.json --log-type Tail) + shell: bash