11#!/usr/bin/python3
22
3+ import helpers
34import json
45import os
56import os .path
89import sys
910import tempfile
1011
12+ def quote_if_needed (row ):
13+ if row != "true" and row != "false" :
14+ return "\" " + row + "\" "
15+ # subtypes column
16+ return row
17+
18+ def parseData (data ):
19+ rows = ""
20+ for (row ) in data :
21+ d = row [0 ].split (';' )
22+ d = map (quote_if_needed , d )
23+ rows += " - [" + ', ' .join (d ) + ']\n '
24+
25+ return rows
26+
1127class Generator :
1228 def __init__ (self , language ):
1329 self .language = language
@@ -17,55 +33,52 @@ def __init__ (self, language):
1733 self .generateNegativeSummaries = False
1834 self .generateTypeBasedSummaries = False
1935 self .dryRun = False
36+ self .dirname = "model-generator"
2037
2138
2239 def printHelp (self ):
2340 print (f"""Usage:
24- python3 GenerateFlowModel .py <library-database> <outputQll > [<friendlyFrameworkName>] [--with-sinks] [--with-sources] [--with-summaries] [--with-typebased-summaries] [--dry-run]
41+ python3 GenerateFlowModelExtensions .py <library-database> <outputYml > [<friendlyFrameworkName>] [--with-sinks] [--with-sources] [--with-summaries] [--with-typebased-summaries] [--dry-run]
2542
2643This generates summary, source and sink models for the code in the database.
27- The files will be placed in `{ self .language } /ql/lib/semmle/code/ { self . language } /frameworks/<outputQll> ` where
28- outputQll is the name (and path) of the output QLL file. Usually, models are grouped by their
44+ The files will be placed in `{ self .language } /ql/lib/ext/generated/<outputYml>.model.yml ` where
45+ outputYml is the name (and path) of the output YAML file. Usually, models are grouped by their
2946respective frameworks.
30- If negative summaries are produced a file prefixed with `Negative` will be generated and stored in the same folder.
3147
3248Which models are generated is controlled by the flags:
3349 --with-sinks
3450 --with-sources
3551 --with-summaries
3652 --with-negative-summaries
37- --with-typebased-summaries
38- If none of these flags are specified, all models are generated.
53+ --with-typebased-summaries (Experimental - only for C#)
54+ If none of these flags are specified, all models are generated except for the type based models .
3955
4056 --dry-run: Only run the queries, but don't write to file.
4157
4258Example invocations:
43- $ python3 GenerateFlowModel .py /tmp/dbs/my_library_db " mylibrary/Framework.qll"
44- $ python3 GenerateFlowModel .py /tmp/dbs/my_library_db " mylibrary/Framework.qll" "Friendly Name of Framework"
45- $ python3 GenerateFlowModel .py /tmp/dbs/my_library_db "mylibrary/FrameworkSinks.qll" --with-sinks
59+ $ python3 GenerateFlowModelExtensions .py /tmp/dbs/my_library_db mylibrary
60+ $ python3 GenerateFlowModelExtensions .py /tmp/dbs/my_library_db mylibrary "Friendly Name of Framework"
61+ $ python3 GenerateFlowModelExtensions .py /tmp/dbs/my_library_db --with-sinks
4662
4763Requirements: `codeql` should both appear on your path.
4864 """ )
4965
5066
5167 def setenvironment (self , target , database , friendlyName ):
5268 self .codeQlRoot = subprocess .check_output (["git" , "rev-parse" , "--show-toplevel" ]).decode ("utf-8" ).strip ()
53- if not target .endswith (".qll " ):
54- target += ".qll "
69+ if not target .endswith (".model.yml " ):
70+ target += ".model.yml "
5571 filename = os .path .basename (target )
56- dirname = os .path .dirname (target )
5772 if friendlyName is not None :
5873 self .friendlyname = friendlyName
5974 else :
60- self .friendlyname = filename [:- 4 ]
61- self .shortname = filename [:- 4 ]
75+ self .friendlyname = filename [:- 10 ]
76+ self .shortname = filename [:- 10 ]
6277 self .database = database
6378 self .generatedFrameworks = os .path .join (
64- self .codeQlRoot , f"{ self .language } /ql/lib/semmle/code/{ self .language } /frameworks/" )
65- self .frameworkTarget = os .path .join (self .generatedFrameworks , dirname , filename )
66- self .negativeFrameworkTarget = os .path .join (self .generatedFrameworks , dirname , "Negative" + filename )
67- self .typeBasedFrameworkTarget = os .path .join (self .generatedFrameworks , dirname , "TypeBased" + filename )
68-
79+ self .codeQlRoot , f"{ self .language } /ql/lib/ext/generated/" )
80+ self .frameworkTarget = os .path .join (self .generatedFrameworks , filename )
81+ self .typeBasedFrameworkTarget = os .path .join (self .generatedFrameworks , "TypeBased" + filename )
6982 self .workDir = tempfile .mkdtemp ()
7083 os .makedirs (self .generatedFrameworks , exist_ok = True )
7184
@@ -114,166 +127,93 @@ def make(language):
114127
115128 generator .setenvironment (sys .argv [2 ], sys .argv [1 ], friendlyName )
116129 return generator
130+
117131
118-
119- def runQuery (self , infoMessage , query ):
120- print ("########## Querying " + infoMessage + "..." )
121- queryFile = os .path .join (self .codeQlRoot , f"{ self .language } /ql/src/utils/model-generator" , query )
132+ def runQuery (self , query ):
133+ print ("########## Querying " + query + "..." )
134+ queryFile = os .path .join (self .codeQlRoot , f"{ self .language } /ql/src/utils/{ self .dirname } " , query )
122135 resultBqrs = os .path .join (self .workDir , "out.bqrs" )
123- cmd = ['codeql' , 'query' , 'run' , queryFile , '--database' ,
124- self .database , '--output' , resultBqrs , '--threads' , '8' ]
125136
126- ret = subprocess .call (cmd )
127- if ret != 0 :
128- print ("Failed to generate " + infoMessage +
129- ". Failed command was: " + shlex .join (cmd ))
130- sys .exit (1 )
131- return self .readRows (resultBqrs )
132-
133-
134- def readRows (self , bqrsFile ):
135- generatedJson = os .path .join (self .workDir , "out.json" )
136- cmd = ['codeql' , 'bqrs' , 'decode' , bqrsFile ,
137- '--format=json' , '--output' , generatedJson ]
138- ret = subprocess .call (cmd )
139- if ret != 0 :
140- print ("Failed to decode BQRS. Failed command was: " + shlex .join (cmd ))
141- sys .exit (1 )
137+ helpers .run_cmd (['codeql' , 'query' , 'run' , queryFile , '--database' ,
138+ self .database , '--output' , resultBqrs , '--threads' , '8' ], "Failed to generate " + query )
142139
143- with open (generatedJson ) as f :
144- results = json .load (f )
140+ return helpers .readData (self .workDir , resultBqrs )
145141
146- try :
147- results ['#select' ]['tuples' ]
148- except KeyError :
149- print ('Unexpected JSON output - no tuples found' )
150- exit (1 )
151142
152- rows = ""
153- for (row ) in results ['#select' ]['tuples' ]:
154- rows += " \" " + row [0 ] + "\" ,\n "
155-
156- return rows [:- 2 ]
157-
158-
159- def asCsvModel (self , superclass , kind , rows ):
160- classTemplate = """
161- private class {0}{1}Csv extends {2} {{
162- override predicate row(string row) {{
163- row =
164- [
165- {3}
166- ]
167- }}
168- }}
169- """
143+ def asAddsTo (self , rows , predicate ):
170144 if rows .strip () == "" :
171145 return ""
172- return classTemplate .format (self .shortname [0 ].upper () + self .shortname [1 :], kind .capitalize (), superclass , rows )
146+ return helpers .addsToTemplate .format (f"codeql/{ self .language } -all" , predicate , rows )
147+
148+
149+ def getAddsTo (self , query , predicate ):
150+ data = self .runQuery (query )
151+ rows = parseData (data )
152+ return self .asAddsTo (rows , predicate )
173153
174154
175155 def makeContent (self ):
176156 if self .generateSummaries :
177- summaryRows = self .runQuery ("summary models" , "CaptureSummaryModels.ql" )
178- summaryCsv = self .asCsvModel ("SummaryModelCsv" , "summary" , summaryRows )
157+ summaryAddsTo = self .getAddsTo ("CaptureSummaryModels.ql" , helpers .summaryModelPredicate )
179158 else :
180- summaryCsv = ""
159+ summaryAddsTo = ""
181160
182161 if self .generateSinks :
183- sinkRows = self .runQuery ("sink models" , "CaptureSinkModels.ql" )
184- sinkCsv = self .asCsvModel ("SinkModelCsv" , "sinks" , sinkRows )
162+ sinkAddsTo = self .getAddsTo ("CaptureSinkModels.ql" , helpers .sinkModelPredicate )
185163 else :
186- sinkCsv = ""
164+ sinkAddsTo = ""
187165
188166 if self .generateSources :
189- sourceRows = self .runQuery ("source models" , "CaptureSourceModels.ql" )
190- sourceCsv = self .asCsvModel ("SourceModelCsv" , "sources" , sourceRows )
167+ sourceAddsTo = self .getAddsTo ("CaptureSourceModels.ql" , helpers .sourceModelPredicate )
191168 else :
192- sourceCsv = ""
193-
194- return f"""
195- /**
196- * THIS FILE IS AN AUTO-GENERATED MODELS AS DATA FILE. DO NOT EDIT.
197- * Definitions of taint steps in the { self .friendlyname } framework.
198- */
169+ sourceAddsTo = ""
199170
200- import { self .language }
201- private import semmle.code.{ self .language } .dataflow.ExternalFlow
202-
203- { sinkCsv }
204- { sourceCsv }
205- { summaryCsv }
206-
207- """
208-
209- def makeNegativeContent (self ):
210171 if self .generateNegativeSummaries :
211- negativeSummaryRows = self .runQuery ("negative summary models" , "CaptureNegativeSummaryModels.ql" )
212- negativeSummaryCsv = self .asCsvModel ("NegativeSummaryModelCsv" , "NegativeSummary" , negativeSummaryRows )
172+ negativeSummaryAddsTo = self .getAddsTo ("CaptureNegativeSummaryModels.ql" , "extNegativeSummaryModel" )
213173 else :
214- negativeSummaryCsv = ""
215-
216- return f"""
217- /**
218- * THIS FILE IS AN AUTO-GENERATED MODELS AS DATA FILE. DO NOT EDIT.
219- * Definitions of negative summaries in the { self .friendlyname } framework.
220- */
221-
222- import { self .language }
223- private import semmle.code.{ self .language } .dataflow.ExternalFlow
224-
225- { negativeSummaryCsv }
226-
174+ negativeSummaryAddsTo = ""
175+
176+ return f"""
177+ # THIS FILE IS AN AUTO-GENERATED MODELS AS DATA FILE. DO NOT EDIT.
178+ # Definitions of taint steps in the { self .friendlyname } framework.
179+
180+ extensions:
181+ { sinkAddsTo }
182+ { sourceAddsTo }
183+ { summaryAddsTo }
184+ { negativeSummaryAddsTo }
227185 """
228186
229187 def makeTypeBasedContent (self ):
230188 if self .generateTypeBasedSummaries :
231- typeBasedSummaryRows = self .runQuery ("type based summary models" , "CaptureTypeBasedSummaryModels.ql" )
232- typeBasedSummaryCsv = self .asCsvModel ("SummaryModelCsv" , "TypeBasedSummary" , typeBasedSummaryRows )
189+ typeBasedSummaryAddsTo = self .getAddsTo ("CaptureTypeBasedSummaryModels.ql" , "extSummaryModel" )
233190 else :
234- typeBasedSummaryCsv = ""
191+ typeBasedSummaryAddsTo = ""
235192
236193 return f"""
237- /**
238- * THIS FILE IS AN AUTO-GENERATED MODELS AS DATA FILE. DO NOT EDIT.
239- * Definitions of type based summaries in the { self .friendlyname } framework.
240- */
241-
242- import { self .language }
243- private import semmle.code.{ self .language } .dataflow.ExternalFlow
244-
245- { typeBasedSummaryCsv }
194+ # THIS FILE IS AN AUTO-GENERATED MODELS AS DATA FILE. DO NOT EDIT.
195+ # Definitions of type based summaries in the { self .friendlyname } framework.
246196
197+ extensions:
198+ { typeBasedSummaryAddsTo }
247199 """
248200
249201 def save (self , content , target ):
250- with open (target , "w" ) as targetQll :
251- targetQll .write (content )
252-
253- cmd = ['codeql' , 'query' , 'format' , '--in-place' , target ]
254- ret = subprocess .call (cmd )
255- if ret != 0 :
256- print ("Failed to format query. Failed command was: " + shlex .join (cmd ))
257- sys .exit (1 )
258-
259- print ("" )
260- print ("CSV model written to " + target )
202+ with open (target , "w" ) as targetYml :
203+ targetYml .write (content )
204+ print ("Models as data extensions written to " + target )
261205
262206
263207 def run (self ):
264208 content = self .makeContent ()
265- negativeContent = self .makeNegativeContent ()
266209 typeBasedContent = self .makeTypeBasedContent ()
267210
268211 if self .dryRun :
269- print ("CSV Models generated, but not written to file." )
212+ print ("Models as data extensions generated, but not written to file." )
270213 sys .exit (0 )
271214
272215 if self .generateSinks or self .generateSinks or self .generateSummaries :
273216 self .save (content , self .frameworkTarget )
274217
275- if self .generateNegativeSummaries :
276- self .save (negativeContent , self .negativeFrameworkTarget )
277-
278218 if self .generateTypeBasedSummaries :
279219 self .save (typeBasedContent , self .typeBasedFrameworkTarget )
0 commit comments