Skip to content

Commit 09f3296

Browse files
author
Stephan Brandauer
committed
export related locations using notation
1 parent f1644ad commit 09f3296

File tree

5 files changed

+62
-30
lines changed

5 files changed

+62
-30
lines changed

java/ql/src/Telemetry/AutomodelEndpointCharacteristics.qll

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
*/
44

55
private import java
6+
private import semmle.code.Location as Location
67
private import semmle.code.java.dataflow.DataFlow
78
private import semmle.code.java.dataflow.TaintTracking
89
private import semmle.code.java.security.PathCreation
@@ -23,10 +24,12 @@ module CandidatesImpl implements SharedCharacteristics::CandidateSig {
2324

2425
class NegativeEndpointType = AutomodelEndpointTypes::NegativeSinkType;
2526

27+
class RelatedLocation = Location::Top;
28+
2629
// Sanitizers are currently not modeled in MaD. TODO: check if this has large negative impact.
2730
predicate isSanitizer(Endpoint e, EndpointType t) { none() }
2831

29-
string getLocationString(Endpoint e) { result = e.getLocation().toString() }
32+
RelatedLocation toRelatedLocation(Endpoint e) { result = e.asParameter() }
3033

3134
predicate isKnownLabel(string label, string humanReadableLabel, EndpointType type) {
3235
label = "read-file" and
@@ -87,11 +90,9 @@ module CandidatesImpl implements SharedCharacteristics::CandidateSig {
8790
predicate hasMetadata(Endpoint e, string metadata) {
8891
exists(
8992
string package, string type, boolean subtypes, string name, string signature, string ext,
90-
int input, string provenance, boolean isPublic, boolean isFinal, boolean isStatic,
91-
string callableJavaDoc
93+
int input, boolean isPublic, boolean isFinal, boolean isStatic
9294
|
93-
hasMetadata(e, package, type, name, signature, input, isFinal, isStatic, isPublic,
94-
callableJavaDoc) and
95+
hasMetadata(e, package, type, name, signature, input, isFinal, isStatic, isPublic) and
9596
(if isFinal = true or isStatic = true then subtypes = false else subtypes = true) and
9697
ext = "" and
9798
/*
@@ -100,7 +101,6 @@ module CandidatesImpl implements SharedCharacteristics::CandidateSig {
100101
* a certain annotation.
101102
*/
102103

103-
provenance = "ai-generated" and
104104
metadata =
105105
"{" //
106106
+ "'Package': '" + package //
@@ -109,14 +109,18 @@ module CandidatesImpl implements SharedCharacteristics::CandidateSig {
109109
+ ", 'Name': '" + name //
110110
+ ", 'ParamName': '" + e.toString() //
111111
+ "', 'Signature': '" + signature //
112-
+ "', 'Ext': '" + ext //
113112
+ "', 'Argument index': " + input //
114-
+ ", 'Provenance': '" + provenance //
115-
+ "', 'Is public': " + isPublic //
116-
+ "', 'Callable JavaDoc': '" + callableJavaDoc.replaceAll("'", "\"") //
117113
+ "'}" // TODO: Why are the curly braces added twice?
118114
)
119115
}
116+
117+
RelatedLocation getRelatedLocation(Endpoint e, string name) {
118+
name = "Callable-JavaDoc" and
119+
result = e.getEnclosingCallable().(Documentable).getJavadoc()
120+
or
121+
name = "Class-JavaDoc" and
122+
result = e.getEnclosingCallable().getDeclaringType().(Documentable).getJavadoc()
123+
}
120124
}
121125

122126
module CharacteristicsImpl = SharedCharacteristics::SharedCharacteristics<CandidatesImpl>;
@@ -136,7 +140,7 @@ class Endpoint = CandidatesImpl::Endpoint;
136140
*/
137141
predicate hasMetadata(
138142
Endpoint n, string package, string type, string name, string signature, int input,
139-
boolean isFinal, boolean isStatic, boolean isPublic, string callableJavaDoc
143+
boolean isFinal, boolean isStatic, boolean isPublic
140144
) {
141145
exists(Callable callable |
142146
n.asParameter() = callable.getParameter(input) and
@@ -154,10 +158,7 @@ predicate hasMetadata(
154158
) and
155159
name = callable.getSourceDeclaration().getName() and
156160
signature = ExternalFlow::paramsString(callable) and // TODO: Why are brackets being escaped (`\[\]` vs `[]`)?
157-
(if callable.isPublic() then isPublic = true else isPublic = false) and
158-
if exists(callable.(Documentable).getJavadoc())
159-
then callableJavaDoc = callable.(Documentable).getJavadoc().toString()
160-
else callableJavaDoc = ""
161+
(if callable.isPublic() then isPublic = true else isPublic = false)
161162
)
162163
}
163164

java/ql/src/Telemetry/AutomodelExtractCandidates.ql

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,26 +14,29 @@
1414

1515
import AutomodelEndpointCharacteristics
1616

17-
from Endpoint sinkCandidate, string message
17+
from Endpoint endpoint, string message
1818
where
1919
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
20-
u.appliesToEndpoint(sinkCandidate)
20+
u.appliesToEndpoint(endpoint)
2121
) and
2222
// If a node is already a known sink for any of our existing ATM queries and is already modeled as a MaD sink, we
2323
// don't include it as a candidate. Otherwise, we might include it as a candidate for query A, but the model will
2424
// label it as a sink for one of the sink types of query B, for which it's already a known sink. This would result in
2525
// overlap between our detected sinks and the pre-existing modeling. We assume that, if a sink has already been
2626
// modeled in a MaD model, then it doesn't belong to any additional sink types, and we don't need to reexamine it.
27-
not CharacteristicsImpl::isSink(sinkCandidate, _) and
27+
not CharacteristicsImpl::isSink(endpoint, _) and
2828
// The message is the concatenation of all sink types for which this endpoint is known neither to be a sink nor to be
2929
// a non-sink, and we surface only endpoints that have at least one such sink type.
3030
message =
3131
strictconcat(AutomodelEndpointTypes::SinkType sinkType |
32-
not CharacteristicsImpl::isKnownSink(sinkCandidate, sinkType) and
33-
CharacteristicsImpl::isSinkCandidate(sinkCandidate, sinkType)
32+
not CharacteristicsImpl::isKnownSink(endpoint, sinkType) and
33+
CharacteristicsImpl::isSinkCandidate(endpoint, sinkType)
3434
|
3535
sinkType + ", "
3636
) + "\n" +
3737
// Extract the needed metadata for this endpoint.
38-
any(string metadata | CharacteristicsImpl::hasMetadata(sinkCandidate, metadata))
39-
select sinkCandidate, message
38+
any(string metadata | CharacteristicsImpl::hasMetadata(endpoint, metadata))
39+
select endpoint, message + "\nrelated locations: $@, $@", //
40+
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, "Callable-JavaDoc"),
41+
"Callable-JavaDoc", //
42+
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, "Class-JavaDoc"), "Class-JavaDoc" //

java/ql/src/Telemetry/AutomodelExtractNegativeExamples.ql

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,4 +33,7 @@ where
3333
characteristic + "\n" +
3434
// Extract the needed metadata for this endpoint.
3535
any(string metadata | CharacteristicsImpl::hasMetadata(endpoint, metadata))
36-
select endpoint, message
36+
select endpoint, message + "\nrelated locations: $@, $@",
37+
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, "Callable-JavaDoc"),
38+
"Callable-JavaDoc", //
39+
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, "Class-JavaDoc"), "Class-JavaDoc" //

java/ql/src/Telemetry/AutomodelExtractPositiveExamples.ql

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,4 +33,7 @@ where
3333
message =
3434
"Error: There are erroneous endpoints! Please check whether there's a codex-generated data extension file in `java/ql/lib/ext`."
3535
)
36-
select sink, message
36+
select sink, message + "\nrelated locations: $@, $@",
37+
CharacteristicsImpl::getRelatedLocationOrCandidate(sink, "Callable-JavaDoc"),
38+
"Callable-JavaDoc", //
39+
CharacteristicsImpl::getRelatedLocationOrCandidate(sink, "Class-JavaDoc"), "Class-JavaDoc" //

java/ql/src/Telemetry/AutomodelSharedCharacteristics.qll

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,28 @@ float mediumConfidence() { result = 0.6 }
1212
* "not any of the other known endpoint types".
1313
*/
1414
signature module CandidateSig {
15+
/**
16+
* An endpoint is a potential candidate for modelling. This will typically be bound to the language's
17+
* DataFlow node class, or a subtype thereof.
18+
*/
1519
class Endpoint;
1620

21+
/**
22+
* A related location for an endpoint. This will typically be bound to the supertype of all AST nodes.
23+
*/
24+
class RelatedLocation;
25+
26+
/**
27+
* A class label for an endpoint.
28+
*/
1729
class EndpointType;
1830

1931
/**
2032
* An EndpointType that denotes the absence of any sink.
2133
*/
2234
class NegativeEndpointType extends EndpointType;
2335

24-
/** Gets the string representing the file+range of the endpoint. */
25-
string getLocationString(Endpoint e);
36+
RelatedLocation toRelatedLocation(Endpoint e);
2637

2738
/**
2839
* Defines what labels are known, and what endpoint type they correspond to.
@@ -56,6 +67,8 @@ signature module CandidateSig {
5667
* The meta data will be passed on to the machine learning code by the extraction queries.
5768
*/
5869
predicate hasMetadata(Endpoint e, string metadata);
70+
71+
RelatedLocation getRelatedLocation(Endpoint e, string name);
5972
}
6073

6174
/**
@@ -67,9 +80,9 @@ signature module CandidateSig {
6780
* implementations of endpoint characteristics exported by this module.
6881
*/
6982
module SharedCharacteristics<CandidateSig Candidate> {
70-
predicate isSink(Candidate::Endpoint e, string label) { Candidate::isSink(e, label) }
83+
predicate isSink = Candidate::isSink/2;
7184

72-
predicate isNeutral(Candidate::Endpoint e) { Candidate::isNeutral(e) }
85+
predicate isNeutral = Candidate::isNeutral/1;
7386

7487
/**
7588
* Holds if `sink` is a known sink of type `endpointType`.
@@ -94,8 +107,17 @@ module SharedCharacteristics<CandidateSig Candidate> {
94107
not exists(getAReasonSinkExcluded(candidateSink, sinkType))
95108
}
96109

97-
predicate hasMetadata(Candidate::Endpoint n, string metadata) {
98-
Candidate::hasMetadata(n, metadata)
110+
predicate hasMetadata = Candidate::hasMetadata/2;
111+
112+
/**
113+
* If it exists, gets a related location for a given endpoint or candidate.
114+
* If it doesn't exist, returns the candidate itself as a 'null' value.
115+
*/
116+
bindingset[name]
117+
Candidate::RelatedLocation getRelatedLocationOrCandidate(Candidate::Endpoint e, string name) {
118+
if exists(Candidate::getRelatedLocation(e, name))
119+
then result = Candidate::getRelatedLocation(e, name)
120+
else result = Candidate::toRelatedLocation(e)
99121
}
100122

101123
/**

0 commit comments

Comments
 (0)