@@ -4,11 +4,19 @@ float highConfidence() { result = 0.9 }
44
55float mediumConfidence ( ) { result = 0.6 }
66
7+ /**
8+ * A specification of how to instantiate the shared characteristics for a given candidate class.
9+ *
10+ * The `CandidateSig` implementation specifies a type to use for Endpoints (eg., `ParameterNode`), as well as a type
11+ * to label endpoint classes (the `EndpointType`). One of the endpoint classes needs to be a 'negative' class, meaning
12+ * "not any of the other known endpoint types".
13+ */
714signature module CandidateSig {
815 class Endpoint ;
916
1017 class EndpointType ;
1118
19+ /** The string representing the file+range of the endpoint. */
1220 string getLocationString ( Endpoint e ) ;
1321
1422 /**
@@ -22,6 +30,11 @@ signature module CandidateSig {
2230 */
2331 predicate isNegative ( EndpointType t ) ;
2432
33+ /**
34+ * Should hold for any endpoint that is a flow sanitizer.
35+ */
36+ predicate isSanitizer ( Endpoint e , EndpointType t ) ;
37+
2538 /**
2639 * Should hold for any endpoint that is a sink of the given (known or unknown) label.
2740 */
@@ -37,11 +50,23 @@ signature module CandidateSig {
3750 *
3851 * This is a helper function to extract and export needed information about each endpoint in the sink candidate query
3952 * as well as the queries that extract positive and negative examples for the prompt / training set. The metadata is
40- * extracted as a string in the format of a Python dictionary.
53+ * extracted as a string in the format of a Python dictionary, eg.:
54+ *
55+ * `{'Package': 'com.foo.util', 'Type': 'HelperClass', ... }`.
56+ *
57+ * The meta data will be passed on to the machine learning code by the extraction queries.
4158 */
4259 predicate hasMetadata ( Endpoint e , string metadata ) ;
4360}
4461
62+ /**
63+ * A set of shared characteristics for a given candidate class.
64+ *
65+ * This module is language-agnostic, although the `CandidateSig` module will be language-specific.
66+ *
67+ * The language specific implementation can also further extend the behaviour of this module by adding additional
68+ * implementations of endpoint characteristics exported by this module.
69+ */
4570module SharedCharacteristics< CandidateSig Candidate> {
4671 predicate isNegative ( Candidate:: EndpointType e ) { Candidate:: isNegative ( e ) }
4772
@@ -159,20 +184,6 @@ module SharedCharacteristics<CandidateSig Candidate> {
159184 }
160185 }
161186
162- /**
163- * Endpoints identified as sinks by the MaD modeling are sinks with maximal confidence.
164- */
165- private class KnownSinkCharacteristic extends SinkCharacteristic {
166- string madLabel ;
167- Candidate:: EndpointType endpointType ;
168-
169- KnownSinkCharacteristic ( ) { Candidate:: isKnownLabel ( madLabel , this , endpointType ) }
170-
171- override predicate appliesToEndpoint ( Candidate:: Endpoint e ) { Candidate:: isSink ( e , madLabel ) }
172-
173- override Candidate:: EndpointType getSinkType ( ) { result = endpointType }
174- }
175-
176187 /**
177188 * A high-confidence characteristic that indicates that an endpoint is not a sink of any type. These endpoints can be
178189 * used as negative samples for training or for a few-shot prompt.
@@ -190,33 +201,6 @@ module SharedCharacteristics<CandidateSig Candidate> {
190201 }
191202 }
192203
193- /**
194- * A negative characteristic that indicates that an endpoint is not part of the source code for the project being
195- * analyzed.
196- *
197- * WARNING: These endpoints should not be used as negative samples for training, because they are not necessarily
198- * non-sinks. They are merely not interesting sinks to run through the ML model.
199- */
200- private class IsExternalCharacteristic extends LikelyNotASinkCharacteristic {
201- IsExternalCharacteristic ( ) { this = "external" }
202-
203- override predicate appliesToEndpoint ( Candidate:: Endpoint e ) {
204- not exists ( Candidate:: getLocationString ( e ) )
205- }
206- }
207-
208- /**
209- * A negative characteristic that indicates that an endpoint was manually modeled as a neutral model.
210- *
211- * TODO: It may be necessary to turn this into a LikelyNotASinkCharacteristic, pending answers to the definition of a
212- * neutral model (https://github.com/github/codeql-java-team/issues/254#issuecomment-1435309148).
213- */
214- private class NeutralModelCharacteristic extends NotASinkCharacteristic {
215- NeutralModelCharacteristic ( ) { this = "known non-sink" }
216-
217- override predicate appliesToEndpoint ( Candidate:: Endpoint e ) { Candidate:: isNeutral ( e ) }
218- }
219-
220204 /**
221205 * A medium-confidence characteristic that indicates that an endpoint is unlikely to be a sink of any type. These
222206 * endpoints can be excluded from scoring at inference time, both to save time and to avoid false positives. They should
@@ -256,4 +240,42 @@ module SharedCharacteristics<CandidateSig Candidate> {
256240 confidence = mediumConfidence ( )
257241 }
258242 }
243+
244+ /**
245+ * Contains default implementations that are derived solely from the `CandidateSig` implementation.
246+ */
247+ private module DefaultCharacteristicImplementations {
248+ /**
249+ * Endpoints identified as sinks by the `CandidateSig` implementation are sinks with maximal confidence.
250+ */
251+ private class KnownSinkCharacteristic extends SinkCharacteristic {
252+ string madLabel ;
253+ Candidate:: EndpointType endpointType ;
254+
255+ KnownSinkCharacteristic ( ) { Candidate:: isKnownLabel ( madLabel , this , endpointType ) }
256+
257+ override predicate appliesToEndpoint ( Candidate:: Endpoint e ) { Candidate:: isSink ( e , madLabel ) }
258+
259+ override Candidate:: EndpointType getSinkType ( ) { result = endpointType }
260+ }
261+
262+ /**
263+ * A negative characteristic that indicates that an endpoint was manually modeled as a neutral model.
264+ */
265+ private class NeutralModelCharacteristic extends NotASinkCharacteristic {
266+ NeutralModelCharacteristic ( ) { this = "known non-sink" }
267+
268+ override predicate appliesToEndpoint ( Candidate:: Endpoint e ) { Candidate:: isNeutral ( e ) }
269+ }
270+
271+ /**
272+ * A negative characteristic that indicates that an endpoint is not part of the source code for the project being
273+ * analyzed.
274+ */
275+ private class IsSanitizerCharacteristic extends NotASinkCharacteristic {
276+ IsSanitizerCharacteristic ( ) { this = "external" }
277+
278+ override predicate appliesToEndpoint ( Candidate:: Endpoint e ) { Candidate:: isSanitizer ( e , _) }
279+ }
280+ }
259281}
0 commit comments