diff --git a/javascript/ql/lib/change-notes/2025-01-10-sensitive-data.md b/javascript/ql/lib/change-notes/2025-01-10-sensitive-data.md new file mode 100644 index 000000000000..5eac93cb1a6b --- /dev/null +++ b/javascript/ql/lib/change-notes/2025-01-10-sensitive-data.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* The sensitive data library has been improved so that `snake_case` style variable names are recognized more reliably. This may result in more sensitive data being identified, and more results from queries that use the sensitive data library. diff --git a/javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll b/javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll index eb8a0c1fe756..ede88ebf8149 100644 --- a/javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll +++ b/javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll @@ -62,7 +62,7 @@ module HeuristicNames { */ string maybeAccountInfo() { result = "(?is).*acc(ou)?nt.*" or - result = "(?is).*(puid|username|userid|session(id|key)).*" or + result = "(?is).*(puid|user.?name|user.?id|session.?(id|key)).*" or result = "(?s).*([uU]|^|_|[a-z](?=U))([uU][iI][dD]).*" } @@ -71,8 +71,8 @@ module HeuristicNames { * a password or an authorization key. */ string maybePassword() { - result = "(?is).*pass(wd|word|code|phrase)(?!.*question).*" or - result = "(?is).*(auth(entication|ori[sz]ation)?)key.*" + result = "(?is).*pass(wd|word|code|.?phrase)(?!.*question).*" or + result = "(?is).*(auth(entication|ori[sz]ation)?).?key.*" } /** diff --git a/python/ql/lib/change-notes/2025-01-10-sensitive-data.md b/python/ql/lib/change-notes/2025-01-10-sensitive-data.md new file mode 100644 index 000000000000..5eac93cb1a6b --- /dev/null +++ b/python/ql/lib/change-notes/2025-01-10-sensitive-data.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* The sensitive data library has been improved so that `snake_case` style variable names are recognized more reliably. This may result in more sensitive data being identified, and more results from queries that use the sensitive data library. diff --git a/python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll b/python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll index eb8a0c1fe756..ede88ebf8149 100644 --- a/python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll +++ b/python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll @@ -62,7 +62,7 @@ module HeuristicNames { */ string maybeAccountInfo() { result = "(?is).*acc(ou)?nt.*" or - result = "(?is).*(puid|username|userid|session(id|key)).*" or + result = "(?is).*(puid|user.?name|user.?id|session.?(id|key)).*" or result = "(?s).*([uU]|^|_|[a-z](?=U))([uU][iI][dD]).*" } @@ -71,8 +71,8 @@ module HeuristicNames { * a password or an authorization key. */ string maybePassword() { - result = "(?is).*pass(wd|word|code|phrase)(?!.*question).*" or - result = "(?is).*(auth(entication|ori[sz]ation)?)key.*" + result = "(?is).*pass(wd|word|code|.?phrase)(?!.*question).*" or + result = "(?is).*(auth(entication|ori[sz]ation)?).?key.*" } /** diff --git a/ruby/ql/lib/change-notes/2025-01-10-sensitive-data.md b/ruby/ql/lib/change-notes/2025-01-10-sensitive-data.md new file mode 100644 index 000000000000..5eac93cb1a6b --- /dev/null +++ b/ruby/ql/lib/change-notes/2025-01-10-sensitive-data.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* The sensitive data library has been improved so that `snake_case` style variable names are recognized more reliably. This may result in more sensitive data being identified, and more results from queries that use the sensitive data library. diff --git a/ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll b/ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll index eb8a0c1fe756..ede88ebf8149 100644 --- a/ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll +++ b/ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll @@ -62,7 +62,7 @@ module HeuristicNames { */ string maybeAccountInfo() { result = "(?is).*acc(ou)?nt.*" or - result = "(?is).*(puid|username|userid|session(id|key)).*" or + result = "(?is).*(puid|user.?name|user.?id|session.?(id|key)).*" or result = "(?s).*([uU]|^|_|[a-z](?=U))([uU][iI][dD]).*" } @@ -71,8 +71,8 @@ module HeuristicNames { * a password or an authorization key. */ string maybePassword() { - result = "(?is).*pass(wd|word|code|phrase)(?!.*question).*" or - result = "(?is).*(auth(entication|ori[sz]ation)?)key.*" + result = "(?is).*pass(wd|word|code|.?phrase)(?!.*question).*" or + result = "(?is).*(auth(entication|ori[sz]ation)?).?key.*" } /** diff --git a/rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll b/rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll index eb8a0c1fe756..ede88ebf8149 100644 --- a/rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll +++ b/rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll @@ -62,7 +62,7 @@ module HeuristicNames { */ string maybeAccountInfo() { result = "(?is).*acc(ou)?nt.*" or - result = "(?is).*(puid|username|userid|session(id|key)).*" or + result = "(?is).*(puid|user.?name|user.?id|session.?(id|key)).*" or result = "(?s).*([uU]|^|_|[a-z](?=U))([uU][iI][dD]).*" } @@ -71,8 +71,8 @@ module HeuristicNames { * a password or an authorization key. */ string maybePassword() { - result = "(?is).*pass(wd|word|code|phrase)(?!.*question).*" or - result = "(?is).*(auth(entication|ori[sz]ation)?)key.*" + result = "(?is).*pass(wd|word|code|.?phrase)(?!.*question).*" or + result = "(?is).*(auth(entication|ori[sz]ation)?).?key.*" } /** diff --git a/rust/ql/test/library-tests/sensitivedata/test.rs b/rust/ql/test/library-tests/sensitivedata/test.rs index 858526642c61..ff5496fb7361 100644 --- a/rust/ql/test/library-tests/sensitivedata/test.rs +++ b/rust/ql/test/library-tests/sensitivedata/test.rs @@ -21,20 +21,28 @@ impl MyStruct { fn get_password() -> String { get_string() } fn test_passwords( - password: &str, passwd: &str, my_password: &str, password_str: &str, pass_phrase: &str, - auth_key: &str, authenticationkey: &str, authKey: &str, + password: &str, pass_word: &str, passwd: &str, my_password: &str, password_str: &str, + pass_phrase: &str, passphrase: &str, passPhrase: &str, + auth_key: &str, authkey: &str, authKey: &str, authentication_key: &str, authenticationkey: &str, authenticationKey: &str, harmless: &str, encrypted_password: &str, password_hash: &str, ms: &MyStruct ) { // passwords sink(password); // $ sensitive=password + sink(pass_word); // $ MISSING: sensitive=password sink(passwd); // $ sensitive=password sink(my_password); // $ sensitive=password sink(password_str); // $ sensitive=password - sink(pass_phrase); // $ MISSING: sensitive=password - sink(auth_key); // $ MISSING: sensitive=password - sink(authenticationkey); // $ sensitive=password + sink(pass_phrase); // $ sensitive=password + sink(passphrase); // $ sensitive=password + sink(passPhrase); // $ sensitive=password + + sink(auth_key); // $ sensitive=password + sink(authkey); // $ sensitive=password sink(authKey); // $ sensitive=password + sink(authentication_key); // $ sensitive=password + sink(authenticationkey); // $ sensitive=password + sink(authenticationKey); // $ sensitive=password sink(ms); // $ MISSING: sensitive=password sink(ms.password.as_str()); // $ MISSING: sensitive=password @@ -65,7 +73,9 @@ fn get_secret_token() -> String { get_string() } fn get_next_token() -> String { get_string() } fn test_credentials( - account_key: &str, accnt_key: &str, license_key: &str, secret_key: &str, is_secret: bool, num_accounts: i64, uid: i64, + account_key: &str, accnt_key: &str, license_key: &str, secret_key: &str, is_secret: bool, num_accounts: i64, + username: String, user_name: String, userid: i64, user_id: i64, my_user_id_64: i64, unique_id: i64, uid: i64, + sessionkey: &[u64; 4], session_key: &[u64; 4], hashkey: &[u64; 4], hash_key: &[u64; 4], ms: &MyStruct ) { // credentials @@ -74,6 +84,15 @@ fn test_credentials( sink(license_key); // $ MISSING: sensitive=secret sink(secret_key); // $ sensitive=secret + sink(username); // $ sensitive=id + sink(user_name); // $ sensitive=id + sink(userid); // $ sensitive=id + sink(user_id); // $ sensitive=id + sink(my_user_id_64); // $ sensitive=id + + sink(sessionkey); // $ sensitive=id + sink(session_key); // $ sensitive=id + sink(ms.get_certificate()); // $ sensitive=certificate sink(generate_secret_key()); // $ sensitive=secret @@ -81,10 +100,13 @@ fn test_credentials( sink(get_private_key()); // $ MISSING: sensitive=secret sink(get_secret_token()); // $ sensitive=secret - // not credentials + // not (necessarily) credentials sink(is_secret); sink(num_accounts); // $ SPURIOUS: sensitive=id + sink(unique_id); sink(uid); // $ SPURIOUS: sensitive=id + sink(hashkey); + sink(hash_key); sink(ms.get_certificate_url()); // $ SPURIOUS: sensitive=certificate sink(ms.get_certificate_file()); // $ SPURIOUS: sensitive=certificate diff --git a/swift/ql/lib/change-notes/2025-01-10-sensitive-data.md b/swift/ql/lib/change-notes/2025-01-10-sensitive-data.md new file mode 100644 index 000000000000..5eac93cb1a6b --- /dev/null +++ b/swift/ql/lib/change-notes/2025-01-10-sensitive-data.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* The sensitive data library has been improved so that `snake_case` style variable names are recognized more reliably. This may result in more sensitive data being identified, and more results from queries that use the sensitive data library. diff --git a/swift/ql/lib/codeql/swift/security/SensitiveExprs.qll b/swift/ql/lib/codeql/swift/security/SensitiveExprs.qll index b1cdd64d245d..044b2a054d7f 100644 --- a/swift/ql/lib/codeql/swift/security/SensitiveExprs.qll +++ b/swift/ql/lib/codeql/swift/security/SensitiveExprs.qll @@ -34,8 +34,6 @@ class SensitivePassword extends SensitiveDataType, TPassword { override string getRegexp() { result = HeuristicNames::maybeSensitiveRegexp(SensitiveDataClassification::password()) - or - result = "(?is).*pass.?phrase.*" } } diff --git a/swift/ql/lib/codeql/swift/security/internal/SensitiveDataHeuristics.qll b/swift/ql/lib/codeql/swift/security/internal/SensitiveDataHeuristics.qll index eb8a0c1fe756..ede88ebf8149 100644 --- a/swift/ql/lib/codeql/swift/security/internal/SensitiveDataHeuristics.qll +++ b/swift/ql/lib/codeql/swift/security/internal/SensitiveDataHeuristics.qll @@ -62,7 +62,7 @@ module HeuristicNames { */ string maybeAccountInfo() { result = "(?is).*acc(ou)?nt.*" or - result = "(?is).*(puid|username|userid|session(id|key)).*" or + result = "(?is).*(puid|user.?name|user.?id|session.?(id|key)).*" or result = "(?s).*([uU]|^|_|[a-z](?=U))([uU][iI][dD]).*" } @@ -71,8 +71,8 @@ module HeuristicNames { * a password or an authorization key. */ string maybePassword() { - result = "(?is).*pass(wd|word|code|phrase)(?!.*question).*" or - result = "(?is).*(auth(entication|ori[sz]ation)?)key.*" + result = "(?is).*pass(wd|word|code|.?phrase)(?!.*question).*" or + result = "(?is).*(auth(entication|ori[sz]ation)?).?key.*" } /**