From 88e351ad4fa692baff61fe7ad1731f728ebed841 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Fri, 10 Jan 2025 09:42:22 +0000 Subject: [PATCH 1/5] Rust: Expand sensitive data test cases. --- .../test/library-tests/sensitivedata/test.rs | 32 ++++++++++++++++--- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/rust/ql/test/library-tests/sensitivedata/test.rs b/rust/ql/test/library-tests/sensitivedata/test.rs index 858526642c61..aa688a07fb7c 100644 --- a/rust/ql/test/library-tests/sensitivedata/test.rs +++ b/rust/ql/test/library-tests/sensitivedata/test.rs @@ -21,20 +21,28 @@ impl MyStruct { fn get_password() -> String { get_string() } fn test_passwords( - password: &str, passwd: &str, my_password: &str, password_str: &str, pass_phrase: &str, - auth_key: &str, authenticationkey: &str, authKey: &str, + password: &str, pass_word: &str, passwd: &str, my_password: &str, password_str: &str, + pass_phrase: &str, passphrase: &str, passPhrase: &str, + auth_key: &str, authkey: &str, authKey: &str, authentication_key: &str, authenticationkey: &str, authenticationKey: &str, harmless: &str, encrypted_password: &str, password_hash: &str, ms: &MyStruct ) { // passwords sink(password); // $ sensitive=password + sink(pass_word); // $ MISSING: sensitive=password sink(passwd); // $ sensitive=password sink(my_password); // $ sensitive=password sink(password_str); // $ sensitive=password sink(pass_phrase); // $ MISSING: sensitive=password + sink(passphrase); // $ sensitive=password + sink(passPhrase); // $ sensitive=password + sink(auth_key); // $ MISSING: sensitive=password - sink(authenticationkey); // $ sensitive=password + sink(authkey); // $ sensitive=password sink(authKey); // $ sensitive=password + sink(authentication_key); // $ MISSING: sensitive=password + sink(authenticationkey); // $ sensitive=password + sink(authenticationKey); // $ sensitive=password sink(ms); // $ MISSING: sensitive=password sink(ms.password.as_str()); // $ MISSING: sensitive=password @@ -65,7 +73,9 @@ fn get_secret_token() -> String { get_string() } fn get_next_token() -> String { get_string() } fn test_credentials( - account_key: &str, accnt_key: &str, license_key: &str, secret_key: &str, is_secret: bool, num_accounts: i64, uid: i64, + account_key: &str, accnt_key: &str, license_key: &str, secret_key: &str, is_secret: bool, num_accounts: i64, + username: String, user_name: String, userid: i64, user_id: i64, my_user_id_64: i64, unique_id: i64, uid: i64, + sessionkey: &[u64; 4], session_key: &[u64; 4], hashkey: &[u64; 4], hash_key: &[u64; 4], ms: &MyStruct ) { // credentials @@ -74,6 +84,15 @@ fn test_credentials( sink(license_key); // $ MISSING: sensitive=secret sink(secret_key); // $ sensitive=secret + sink(username); // $ sensitive=id + sink(user_name); // $ MISSING: sensitive=id + sink(userid); // $ sensitive=id + sink(user_id); // $ MISSING: sensitive=id + sink(my_user_id_64); // $ MISSING: sensitive=id + + sink(sessionkey); // $ sensitive=id + sink(session_key); // $ MISSING: sensitive=id + sink(ms.get_certificate()); // $ sensitive=certificate sink(generate_secret_key()); // $ sensitive=secret @@ -81,10 +100,13 @@ fn test_credentials( sink(get_private_key()); // $ MISSING: sensitive=secret sink(get_secret_token()); // $ sensitive=secret - // not credentials + // not (necessarily) credentials sink(is_secret); sink(num_accounts); // $ SPURIOUS: sensitive=id + sink(unique_id); sink(uid); // $ SPURIOUS: sensitive=id + sink(hashkey); + sink(hash_key); sink(ms.get_certificate_url()); // $ SPURIOUS: sensitive=certificate sink(ms.get_certificate_file()); // $ SPURIOUS: sensitive=certificate From 9a8a852277cac3129a15cc478cb95a96780c72a8 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Fri, 10 Jan 2025 09:44:51 +0000 Subject: [PATCH 2/5] Rust: Support snake case more widely in SensitiveDataHeuristics.qll. --- .../security/internal/SensitiveDataHeuristics.qll | 6 +++--- rust/ql/test/library-tests/sensitivedata/test.rs | 14 +++++++------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll b/rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll index eb8a0c1fe756..ede88ebf8149 100644 --- a/rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll +++ b/rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll @@ -62,7 +62,7 @@ module HeuristicNames { */ string maybeAccountInfo() { result = "(?is).*acc(ou)?nt.*" or - result = "(?is).*(puid|username|userid|session(id|key)).*" or + result = "(?is).*(puid|user.?name|user.?id|session.?(id|key)).*" or result = "(?s).*([uU]|^|_|[a-z](?=U))([uU][iI][dD]).*" } @@ -71,8 +71,8 @@ module HeuristicNames { * a password or an authorization key. */ string maybePassword() { - result = "(?is).*pass(wd|word|code|phrase)(?!.*question).*" or - result = "(?is).*(auth(entication|ori[sz]ation)?)key.*" + result = "(?is).*pass(wd|word|code|.?phrase)(?!.*question).*" or + result = "(?is).*(auth(entication|ori[sz]ation)?).?key.*" } /** diff --git a/rust/ql/test/library-tests/sensitivedata/test.rs b/rust/ql/test/library-tests/sensitivedata/test.rs index aa688a07fb7c..ff5496fb7361 100644 --- a/rust/ql/test/library-tests/sensitivedata/test.rs +++ b/rust/ql/test/library-tests/sensitivedata/test.rs @@ -33,14 +33,14 @@ fn test_passwords( sink(passwd); // $ sensitive=password sink(my_password); // $ sensitive=password sink(password_str); // $ sensitive=password - sink(pass_phrase); // $ MISSING: sensitive=password + sink(pass_phrase); // $ sensitive=password sink(passphrase); // $ sensitive=password sink(passPhrase); // $ sensitive=password - sink(auth_key); // $ MISSING: sensitive=password + sink(auth_key); // $ sensitive=password sink(authkey); // $ sensitive=password sink(authKey); // $ sensitive=password - sink(authentication_key); // $ MISSING: sensitive=password + sink(authentication_key); // $ sensitive=password sink(authenticationkey); // $ sensitive=password sink(authenticationKey); // $ sensitive=password @@ -85,13 +85,13 @@ fn test_credentials( sink(secret_key); // $ sensitive=secret sink(username); // $ sensitive=id - sink(user_name); // $ MISSING: sensitive=id + sink(user_name); // $ sensitive=id sink(userid); // $ sensitive=id - sink(user_id); // $ MISSING: sensitive=id - sink(my_user_id_64); // $ MISSING: sensitive=id + sink(user_id); // $ sensitive=id + sink(my_user_id_64); // $ sensitive=id sink(sessionkey); // $ sensitive=id - sink(session_key); // $ MISSING: sensitive=id + sink(session_key); // $ sensitive=id sink(ms.get_certificate()); // $ sensitive=certificate From f8659c0a4eb319b84bcd0269b64d35f0375814e0 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Fri, 10 Jan 2025 10:26:13 +0000 Subject: [PATCH 3/5] Sync identical files. --- .../security/internal/SensitiveDataHeuristics.qll | 6 +++--- .../python/security/internal/SensitiveDataHeuristics.qll | 6 +++--- .../ruby/security/internal/SensitiveDataHeuristics.qll | 6 +++--- .../swift/security/internal/SensitiveDataHeuristics.qll | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll b/javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll index eb8a0c1fe756..ede88ebf8149 100644 --- a/javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll +++ b/javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll @@ -62,7 +62,7 @@ module HeuristicNames { */ string maybeAccountInfo() { result = "(?is).*acc(ou)?nt.*" or - result = "(?is).*(puid|username|userid|session(id|key)).*" or + result = "(?is).*(puid|user.?name|user.?id|session.?(id|key)).*" or result = "(?s).*([uU]|^|_|[a-z](?=U))([uU][iI][dD]).*" } @@ -71,8 +71,8 @@ module HeuristicNames { * a password or an authorization key. */ string maybePassword() { - result = "(?is).*pass(wd|word|code|phrase)(?!.*question).*" or - result = "(?is).*(auth(entication|ori[sz]ation)?)key.*" + result = "(?is).*pass(wd|word|code|.?phrase)(?!.*question).*" or + result = "(?is).*(auth(entication|ori[sz]ation)?).?key.*" } /** diff --git a/python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll b/python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll index eb8a0c1fe756..ede88ebf8149 100644 --- a/python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll +++ b/python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll @@ -62,7 +62,7 @@ module HeuristicNames { */ string maybeAccountInfo() { result = "(?is).*acc(ou)?nt.*" or - result = "(?is).*(puid|username|userid|session(id|key)).*" or + result = "(?is).*(puid|user.?name|user.?id|session.?(id|key)).*" or result = "(?s).*([uU]|^|_|[a-z](?=U))([uU][iI][dD]).*" } @@ -71,8 +71,8 @@ module HeuristicNames { * a password or an authorization key. */ string maybePassword() { - result = "(?is).*pass(wd|word|code|phrase)(?!.*question).*" or - result = "(?is).*(auth(entication|ori[sz]ation)?)key.*" + result = "(?is).*pass(wd|word|code|.?phrase)(?!.*question).*" or + result = "(?is).*(auth(entication|ori[sz]ation)?).?key.*" } /** diff --git a/ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll b/ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll index eb8a0c1fe756..ede88ebf8149 100644 --- a/ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll +++ b/ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll @@ -62,7 +62,7 @@ module HeuristicNames { */ string maybeAccountInfo() { result = "(?is).*acc(ou)?nt.*" or - result = "(?is).*(puid|username|userid|session(id|key)).*" or + result = "(?is).*(puid|user.?name|user.?id|session.?(id|key)).*" or result = "(?s).*([uU]|^|_|[a-z](?=U))([uU][iI][dD]).*" } @@ -71,8 +71,8 @@ module HeuristicNames { * a password or an authorization key. */ string maybePassword() { - result = "(?is).*pass(wd|word|code|phrase)(?!.*question).*" or - result = "(?is).*(auth(entication|ori[sz]ation)?)key.*" + result = "(?is).*pass(wd|word|code|.?phrase)(?!.*question).*" or + result = "(?is).*(auth(entication|ori[sz]ation)?).?key.*" } /** diff --git a/swift/ql/lib/codeql/swift/security/internal/SensitiveDataHeuristics.qll b/swift/ql/lib/codeql/swift/security/internal/SensitiveDataHeuristics.qll index eb8a0c1fe756..ede88ebf8149 100644 --- a/swift/ql/lib/codeql/swift/security/internal/SensitiveDataHeuristics.qll +++ b/swift/ql/lib/codeql/swift/security/internal/SensitiveDataHeuristics.qll @@ -62,7 +62,7 @@ module HeuristicNames { */ string maybeAccountInfo() { result = "(?is).*acc(ou)?nt.*" or - result = "(?is).*(puid|username|userid|session(id|key)).*" or + result = "(?is).*(puid|user.?name|user.?id|session.?(id|key)).*" or result = "(?s).*([uU]|^|_|[a-z](?=U))([uU][iI][dD]).*" } @@ -71,8 +71,8 @@ module HeuristicNames { * a password or an authorization key. */ string maybePassword() { - result = "(?is).*pass(wd|word|code|phrase)(?!.*question).*" or - result = "(?is).*(auth(entication|ori[sz]ation)?)key.*" + result = "(?is).*pass(wd|word|code|.?phrase)(?!.*question).*" or + result = "(?is).*(auth(entication|ori[sz]ation)?).?key.*" } /** From 488738a8b80e1f115946e40f5791aa547e160255 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Fri, 10 Jan 2025 10:31:55 +0000 Subject: [PATCH 4/5] Swift: Remove special case that is now redundant. --- swift/ql/lib/codeql/swift/security/SensitiveExprs.qll | 2 -- 1 file changed, 2 deletions(-) diff --git a/swift/ql/lib/codeql/swift/security/SensitiveExprs.qll b/swift/ql/lib/codeql/swift/security/SensitiveExprs.qll index b1cdd64d245d..044b2a054d7f 100644 --- a/swift/ql/lib/codeql/swift/security/SensitiveExprs.qll +++ b/swift/ql/lib/codeql/swift/security/SensitiveExprs.qll @@ -34,8 +34,6 @@ class SensitivePassword extends SensitiveDataType, TPassword { override string getRegexp() { result = HeuristicNames::maybeSensitiveRegexp(SensitiveDataClassification::password()) - or - result = "(?is).*pass.?phrase.*" } } From 5ef5b04aac85dd4d3f3eeea4adb54ea4ed5603f0 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Fri, 10 Jan 2025 11:16:53 +0000 Subject: [PATCH 5/5] Add change notes. --- javascript/ql/lib/change-notes/2025-01-10-sensitive-data.md | 4 ++++ python/ql/lib/change-notes/2025-01-10-sensitive-data.md | 4 ++++ ruby/ql/lib/change-notes/2025-01-10-sensitive-data.md | 4 ++++ swift/ql/lib/change-notes/2025-01-10-sensitive-data.md | 4 ++++ 4 files changed, 16 insertions(+) create mode 100644 javascript/ql/lib/change-notes/2025-01-10-sensitive-data.md create mode 100644 python/ql/lib/change-notes/2025-01-10-sensitive-data.md create mode 100644 ruby/ql/lib/change-notes/2025-01-10-sensitive-data.md create mode 100644 swift/ql/lib/change-notes/2025-01-10-sensitive-data.md diff --git a/javascript/ql/lib/change-notes/2025-01-10-sensitive-data.md b/javascript/ql/lib/change-notes/2025-01-10-sensitive-data.md new file mode 100644 index 000000000000..5eac93cb1a6b --- /dev/null +++ b/javascript/ql/lib/change-notes/2025-01-10-sensitive-data.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* The sensitive data library has been improved so that `snake_case` style variable names are recognized more reliably. This may result in more sensitive data being identified, and more results from queries that use the sensitive data library. diff --git a/python/ql/lib/change-notes/2025-01-10-sensitive-data.md b/python/ql/lib/change-notes/2025-01-10-sensitive-data.md new file mode 100644 index 000000000000..5eac93cb1a6b --- /dev/null +++ b/python/ql/lib/change-notes/2025-01-10-sensitive-data.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* The sensitive data library has been improved so that `snake_case` style variable names are recognized more reliably. This may result in more sensitive data being identified, and more results from queries that use the sensitive data library. diff --git a/ruby/ql/lib/change-notes/2025-01-10-sensitive-data.md b/ruby/ql/lib/change-notes/2025-01-10-sensitive-data.md new file mode 100644 index 000000000000..5eac93cb1a6b --- /dev/null +++ b/ruby/ql/lib/change-notes/2025-01-10-sensitive-data.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* The sensitive data library has been improved so that `snake_case` style variable names are recognized more reliably. This may result in more sensitive data being identified, and more results from queries that use the sensitive data library. diff --git a/swift/ql/lib/change-notes/2025-01-10-sensitive-data.md b/swift/ql/lib/change-notes/2025-01-10-sensitive-data.md new file mode 100644 index 000000000000..5eac93cb1a6b --- /dev/null +++ b/swift/ql/lib/change-notes/2025-01-10-sensitive-data.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* The sensitive data library has been improved so that `snake_case` style variable names are recognized more reliably. This may result in more sensitive data being identified, and more results from queries that use the sensitive data library.