6969 * sources "remote" indicates a default remote flow source, and for summaries
7070 * "taint" indicates a default additional taint step and "value" indicates a
7171 * globally applicable value-preserving step.
72+ * 9. The `provenance` column is a tag to indicate the origin of the summary.
73+ * There are two supported values: "generated" and "manual". "generated" means that
74+ * the model has been emitted by the model generator tool and "manual" means
75+ * that the model has been written by hand. This information is used in a heuristic
76+ * for dataflow analysis to determine, if a model or source code should be used for
77+ * determining flow.
7278 */
7379
7480import csharp
@@ -163,17 +169,10 @@ private predicate sinkModel(string row) { any(SinkModelCsv s).row(row) }
163169
164170private predicate summaryModel ( string row ) { any ( SummaryModelCsv s ) .row ( row ) }
165171
166- bindingset [ input]
167- private predicate getKind ( string input , string kind , boolean generated ) {
168- input .splitAt ( ":" , 0 ) = "generated" and kind = input .splitAt ( ":" , 1 ) and generated = true
169- or
170- not input .matches ( "%:%" ) and kind = input and generated = false
171- }
172-
173172/** Holds if a source model exists for the given parameters. */
174173predicate sourceModel (
175174 string namespace , string type , boolean subtypes , string name , string signature , string ext ,
176- string output , string kind , boolean generated
175+ string output , string kind , string provenance
177176) {
178177 exists ( string row |
179178 sourceModel ( row ) and
@@ -185,14 +184,15 @@ predicate sourceModel(
185184 row .splitAt ( ";" , 4 ) = signature and
186185 row .splitAt ( ";" , 5 ) = ext and
187186 row .splitAt ( ";" , 6 ) = output and
188- exists ( string k | row .splitAt ( ";" , 7 ) = k and getKind ( k , kind , generated ) )
187+ row .splitAt ( ";" , 7 ) = kind and
188+ row .splitAt ( ";" , 8 ) = provenance
189189 )
190190}
191191
192192/** Holds if a sink model exists for the given parameters. */
193193predicate sinkModel (
194194 string namespace , string type , boolean subtypes , string name , string signature , string ext ,
195- string input , string kind , boolean generated
195+ string input , string kind , string provenance
196196) {
197197 exists ( string row |
198198 sinkModel ( row ) and
@@ -204,14 +204,15 @@ predicate sinkModel(
204204 row .splitAt ( ";" , 4 ) = signature and
205205 row .splitAt ( ";" , 5 ) = ext and
206206 row .splitAt ( ";" , 6 ) = input and
207- exists ( string k | row .splitAt ( ";" , 7 ) = k and getKind ( k , kind , generated ) )
207+ row .splitAt ( ";" , 7 ) = kind and
208+ row .splitAt ( ";" , 8 ) = provenance
208209 )
209210}
210211
211212/** Holds if a summary model exists for the given parameters. */
212213predicate summaryModel (
213214 string namespace , string type , boolean subtypes , string name , string signature , string ext ,
214- string input , string output , string kind , boolean generated
215+ string input , string output , string kind , string provenance
215216) {
216217 exists ( string row |
217218 summaryModel ( row ) and
@@ -224,7 +225,8 @@ predicate summaryModel(
224225 row .splitAt ( ";" , 5 ) = ext and
225226 row .splitAt ( ";" , 6 ) = input and
226227 row .splitAt ( ";" , 7 ) = output and
227- exists ( string k | row .splitAt ( ";" , 8 ) = k and getKind ( k , kind , generated ) )
228+ row .splitAt ( ";" , 8 ) = kind and
229+ row .splitAt ( ";" , 9 ) = provenance
228230 )
229231}
230232
@@ -259,25 +261,25 @@ predicate modelCoverage(string namespace, int namespaces, string kind, string pa
259261 part = "source" and
260262 n =
261263 strictcount ( string subns , string type , boolean subtypes , string name , string signature ,
262- string ext , string output , boolean generated |
264+ string ext , string output , string provenance |
263265 canonicalNamespaceLink ( namespace , subns ) and
264- sourceModel ( subns , type , subtypes , name , signature , ext , output , kind , generated )
266+ sourceModel ( subns , type , subtypes , name , signature , ext , output , kind , provenance )
265267 )
266268 or
267269 part = "sink" and
268270 n =
269271 strictcount ( string subns , string type , boolean subtypes , string name , string signature ,
270- string ext , string input , boolean generated |
272+ string ext , string input , string provenance |
271273 canonicalNamespaceLink ( namespace , subns ) and
272- sinkModel ( subns , type , subtypes , name , signature , ext , input , kind , generated )
274+ sinkModel ( subns , type , subtypes , name , signature , ext , input , kind , provenance )
273275 )
274276 or
275277 part = "summary" and
276278 n =
277279 strictcount ( string subns , string type , boolean subtypes , string name , string signature ,
278- string ext , string input , string output , boolean generated |
280+ string ext , string input , string output , string provenance |
279281 canonicalNamespaceLink ( namespace , subns ) and
280- summaryModel ( subns , type , subtypes , name , signature , ext , input , output , kind , generated )
282+ summaryModel ( subns , type , subtypes , name , signature , ext , input , output , kind , provenance )
281283 )
282284 )
283285}
@@ -286,12 +288,16 @@ predicate modelCoverage(string namespace, int namespaces, string kind, string pa
286288module CsvValidation {
287289 /** Holds if some row in a CSV-based flow model appears to contain typos. */
288290 query predicate invalidModelRow ( string msg ) {
289- exists ( string pred , string namespace , string type , string name , string signature , string ext |
290- sourceModel ( namespace , type , _, name , signature , ext , _, _, _) and pred = "source"
291+ exists (
292+ string pred , string namespace , string type , string name , string signature , string ext ,
293+ string provenance
294+ |
295+ sourceModel ( namespace , type , _, name , signature , ext , _, _, provenance ) and pred = "source"
291296 or
292- sinkModel ( namespace , type , _, name , signature , ext , _, _, _ ) and pred = "sink"
297+ sinkModel ( namespace , type , _, name , signature , ext , _, _, provenance ) and pred = "sink"
293298 or
294- summaryModel ( namespace , type , _, name , signature , ext , _, _, _, _) and pred = "summary"
299+ summaryModel ( namespace , type , _, name , signature , ext , _, _, _, provenance ) and
300+ pred = "summary"
295301 |
296302 not namespace .regexpMatch ( "[a-zA-Z0-9_\\.]+" ) and
297303 msg = "Dubious namespace \"" + namespace + "\" in " + pred + " model."
@@ -307,6 +313,9 @@ module CsvValidation {
307313 or
308314 not ext .regexpMatch ( "|Attribute" ) and
309315 msg = "Unrecognized extra API graph element \"" + ext + "\" in " + pred + " model."
316+ or
317+ not provenance = [ "manual" , "generated" ] and
318+ msg = "Unrecognized provenance description \"" + provenance + "\" in " + pred + " model."
310319 )
311320 or
312321 exists ( string pred , AccessPath input , string part |
@@ -338,18 +347,18 @@ module CsvValidation {
338347 )
339348 or
340349 exists ( string pred , string row , int expect |
341- sourceModel ( row ) and expect = 8 and pred = "source"
350+ sourceModel ( row ) and expect = 9 and pred = "source"
342351 or
343- sinkModel ( row ) and expect = 8 and pred = "sink"
352+ sinkModel ( row ) and expect = 9 and pred = "sink"
344353 or
345- summaryModel ( row ) and expect = 9 and pred = "summary"
354+ summaryModel ( row ) and expect = 10 and pred = "summary"
346355 |
347356 exists ( int cols |
348357 cols = 1 + max ( int n | exists ( row .splitAt ( ";" , n ) ) ) and
349358 cols != expect and
350359 msg =
351360 "Wrong number of columns in " + pred + " model row, expected " + expect + ", got " + cols +
352- "."
361+ " in " + row + " ."
353362 )
354363 or
355364 exists ( string b |
@@ -359,23 +368,20 @@ module CsvValidation {
359368 )
360369 )
361370 or
362- exists ( string row , string k , string kind | summaryModel ( row ) |
363- k = row .splitAt ( ";" , 8 ) and
364- getKind ( k , kind , _) and
371+ exists ( string row , string kind | summaryModel ( row ) |
372+ kind = row .splitAt ( ";" , 8 ) and
365373 not kind = [ "taint" , "value" ] and
366374 msg = "Invalid kind \"" + kind + "\" in summary model."
367375 )
368376 or
369- exists ( string row , string k , string kind | sinkModel ( row ) |
370- k = row .splitAt ( ";" , 7 ) and
371- getKind ( k , kind , _) and
377+ exists ( string row , string kind | sinkModel ( row ) |
378+ kind = row .splitAt ( ";" , 7 ) and
372379 not kind = [ "code" , "sql" , "xss" , "remote" , "html" ] and
373380 msg = "Invalid kind \"" + kind + "\" in sink model."
374381 )
375382 or
376- exists ( string row , string k , string kind | sourceModel ( row ) |
377- k = row .splitAt ( ";" , 7 ) and
378- getKind ( k , kind , _) and
383+ exists ( string row , string kind | sourceModel ( row ) |
384+ kind = row .splitAt ( ";" , 7 ) and
379385 not kind = "local" and
380386 msg = "Invalid kind \"" + kind + "\" in source model."
381387 )
0 commit comments