55private import ruby
66private import codeql.ruby.Concepts
77private import codeql.ruby.ApiGraphs
8+ private import codeql.ruby.DataFlow
9+ private import codeql.ruby.frameworks.StandardLibrary
10+
11+ private DataFlow:: Node ioInstanceInstantiation ( ) {
12+ result = API:: getTopLevelMember ( "IO" ) .getAnInstantiation ( ) or
13+ result = API:: getTopLevelMember ( "IO" ) .getAMethodCall ( [ "for_fd" , "open" , "try_convert" ] )
14+ }
15+
16+ private DataFlow:: Node ioInstance ( ) {
17+ result = ioInstanceInstantiation ( )
18+ or
19+ exists ( DataFlow:: Node inst |
20+ inst = ioInstance ( ) and
21+ inst .( DataFlow:: LocalSourceNode ) .flowsTo ( result )
22+ )
23+ }
24+
25+ // Match some simple cases where a path argument specifies a shell command to
26+ // be executed. For example, the `"|date"` argument in `IO.read("|date")`, which
27+ // will execute a shell command and read its output rather than reading from the
28+ // filesystem.
29+ private predicate pathArgSpawnsSubprocess ( Expr arg ) {
30+ arg .( StringlikeLiteral ) .getValueText ( ) .charAt ( 0 ) = "|"
31+ }
32+
33+ private DataFlow:: Node fileInstanceInstantiation ( ) {
34+ result = API:: getTopLevelMember ( "File" ) .getAnInstantiation ( )
35+ or
36+ result = API:: getTopLevelMember ( "File" ) .getAMethodCall ( "open" )
37+ or
38+ // Calls to `Kernel.open` can yield `File` instances
39+ exists ( KernelMethodCall c |
40+ c = result .asExpr ( ) .getExpr ( ) and
41+ c .getMethodName ( ) = "open" and
42+ // Assume that calls that don't invoke shell commands will instead open
43+ // a file.
44+ not pathArgSpawnsSubprocess ( c .getArgument ( 0 ) )
45+ )
46+ }
47+
48+ private DataFlow:: Node fileInstance ( ) {
49+ result = fileInstanceInstantiation ( )
50+ or
51+ exists ( DataFlow:: Node inst |
52+ inst = fileInstance ( ) and
53+ inst .( DataFlow:: LocalSourceNode ) .flowsTo ( result )
54+ )
55+ }
56+
57+ private string ioFileReaderClassMethodName ( ) {
58+ result = [ "binread" , "foreach" , "read" , "readlines" , "try_convert" ]
59+ }
60+
61+ private string ioFileReaderInstanceMethodName ( ) {
62+ result =
63+ [
64+ "getbyte" , "getc" , "gets" , "pread" , "read" , "read_nonblock" , "readbyte" , "readchar" ,
65+ "readline" , "readlines" , "readpartial" , "sysread"
66+ ]
67+ }
68+
69+ private string ioFileReaderMethodName ( boolean classMethodCall ) {
70+ classMethodCall = true and result = ioFileReaderClassMethodName ( )
71+ or
72+ classMethodCall = false and result = ioFileReaderInstanceMethodName ( )
73+ }
874
975/**
10- * Classes and predicates for modelling the `File` module from the standard
11- * library.
76+ * Classes and predicates for modelling the core `IO` module.
1277 */
13- private module File {
14- private class FileModuleReader extends FileSystemReadAccess:: Range , DataFlow:: CallNode {
15- FileModuleReader ( ) { this = API:: getTopLevelMember ( "File" ) .getAMethodCall ( [ "new" , "open" ] ) }
78+ module IO {
79+ /**
80+ * An instance of the `IO` class, for example in
81+ *
82+ * ```rb
83+ * rand = IO.new(IO.sysopen("/dev/random", "r"), "r")
84+ * rand_data = rand.read(32)
85+ * ```
86+ *
87+ * there are 3 `IOInstance`s - the call to `IO.new`, the assignment
88+ * `rand = ...`, and the read access to `rand` on the second line.
89+ */
90+ class IOInstance extends DataFlow:: Node {
91+ IOInstance ( ) {
92+ this = ioInstance ( ) or
93+ this = fileInstance ( )
94+ }
95+ }
96+
97+ // "Direct" `IO` instances, i.e. cases where there is no more specific
98+ // subtype such as `File`
99+ private class IOInstanceStrict extends IOInstance {
100+ IOInstanceStrict ( ) { this = ioInstance ( ) }
101+ }
16102
17- override DataFlow:: Node getAPathArgument ( ) { result = this .getArgument ( 0 ) }
103+ /**
104+ * A `DataFlow::CallNode` that reads data using the `IO` class. For example,
105+ * the `IO.read call in:
106+ *
107+ * ```rb
108+ * IO.read("|date")
109+ * ```
110+ *
111+ * returns the output of the `date` shell command, invoked as a subprocess.
112+ *
113+ * This class includes reads both from shell commands and reads from the
114+ * filesystem. For working with filesystem accesses specifically, see
115+ * `IOFileReader` or the `FileSystemReadAccess` concept.
116+ */
117+ class IOReader extends DataFlow:: CallNode {
118+ private boolean classMethodCall ;
119+ private string api ;
18120
121+ IOReader ( ) {
122+ // Class methods
123+ api = [ "File" , "IO" ] and
124+ classMethodCall = true and
125+ this = API:: getTopLevelMember ( api ) .getAMethodCall ( ioFileReaderMethodName ( classMethodCall ) )
126+ or
127+ // IO instance methods
128+ classMethodCall = false and
129+ api = "IO" and
130+ exists ( IOInstanceStrict ii |
131+ this .getReceiver ( ) = ii and
132+ this .asExpr ( ) .getExpr ( ) .( MethodCall ) .getMethodName ( ) =
133+ ioFileReaderMethodName ( classMethodCall )
134+ )
135+ or
136+ // File instance methods
137+ classMethodCall = false and
138+ api = "File" and
139+ exists ( File:: FileInstance fi |
140+ this .getReceiver ( ) = fi and
141+ this .asExpr ( ) .getExpr ( ) .( MethodCall ) .getMethodName ( ) =
142+ ioFileReaderMethodName ( classMethodCall )
143+ )
144+ // TODO: enumeration style methods such as `each`, `foreach`, etc.
145+ }
146+
147+ /**
148+ * Returns the most specific core class used for this read, `IO` or `File`
149+ */
150+ string getAPI ( ) { result = api }
151+
152+ predicate isClassMethodCall ( ) { classMethodCall = true }
153+ }
154+
155+ /**
156+ * A `DataFlow::CallNode` that reads data from the filesystem using the `IO`
157+ * class. For example, the `IO.read call in:
158+ *
159+ * ```rb
160+ * IO.read("foo.txt")
161+ * ```
162+ *
163+ * reads the file `foo.txt` and returns its contents as a string.
164+ */
165+ class IOFileReader extends IOReader , FileSystemReadAccess:: Range {
166+ IOFileReader ( ) {
167+ this .getAPI ( ) = "File"
168+ or
169+ this .isClassMethodCall ( ) and
170+ // Assume that calls that don't invoke shell commands will instead
171+ // read from a file.
172+ not pathArgSpawnsSubprocess ( this .getArgument ( 0 ) .asExpr ( ) .getExpr ( ) )
173+ }
174+
175+ // TODO: can we infer a path argument for instance method calls?
176+ // e.g. by tracing back to the instantiation of that instance
177+ override DataFlow:: Node getAPathArgument ( ) {
178+ result = this .getArgument ( 0 ) and this .isClassMethodCall ( )
179+ }
180+
181+ // This class represents calls that return data
19182 override DataFlow:: Node getADataNode ( ) { result = this }
20183 }
184+ }
185+
186+ /**
187+ * Classes and predicates for modelling the core `File` module.
188+ *
189+ * Because `File` is a subclass of `IO`, all `FileInstance`s and
190+ * `FileModuleReader`s are also `IOInstance`s and `IOModuleReader`s
191+ * respectively.
192+ */
193+ module File {
194+ /**
195+ * An instance of the `File` class, for example in
196+ *
197+ * ```rb
198+ * f = File.new("foo.txt")
199+ * puts f.read()
200+ * ```
201+ *
202+ * there are 3 `FileInstance`s - the call to `File.new`, the assignment
203+ * `f = ...`, and the read access to `f` on the second line.
204+ */
205+ class FileInstance extends IO:: IOInstance {
206+ FileInstance ( ) { this = fileInstance ( ) }
207+ }
208+
209+ /**
210+ * A read using the `File` module, e.g. the `f.read` call in
211+ *
212+ * ```rb
213+ * f = File.new("foo.txt")
214+ * puts f.read()
215+ * ```
216+ */
217+ class FileModuleReader extends IO:: IOFileReader {
218+ FileModuleReader ( ) { this .getAPI ( ) = "File" }
219+ }
21220
22- private class FileModuleFilenameSource extends FileNameSource {
221+ /**
222+ * A call to a File method that may return one or more filenames.
223+ */
224+ class FileModuleFilenameSource extends FileNameSource , DataFlow:: CallNode {
23225 FileModuleFilenameSource ( ) {
24226 // Class methods
25227 this =
@@ -28,6 +230,12 @@ private module File {
28230 "absolute_path" , "basename" , "expand_path" , "join" , "path" , "readlink" ,
29231 "realdirpath" , "realpath"
30232 ] )
233+ or
234+ // Instance methods
235+ exists ( FileInstance fi |
236+ this .getReceiver ( ) = fi and
237+ this .asExpr ( ) .getExpr ( ) .( MethodCall ) .getMethodName ( ) = [ "path" , "to_path" ]
238+ )
31239 }
32240 }
33241
@@ -50,12 +258,19 @@ private module File {
50258 }
51259}
52260
53- private module FileUtils {
54- private class FileUtilsFilenameSource extends FileNameSource {
261+ /**
262+ * Classes and predicates for modelling the `FileUtils` module from the standard
263+ * library.
264+ */
265+ module FileUtils {
266+ /**
267+ * A call to a FileUtils method that may return one or more filenames.
268+ */
269+ class FileUtilsFilenameSource extends FileNameSource {
55270 FileUtilsFilenameSource ( ) {
56271 // Note that many methods in FileUtils accept a `noop` option that will
57272 // perform a dry run of the command. This means that, for instance, `rm`
58- // and similar methods may not actually delete/unlink a file.
273+ // and similar methods may not actually delete/unlink a file when called .
59274 this =
60275 API:: getTopLevelMember ( "FileUtils" )
61276 .getAMethodCall ( [
@@ -85,5 +300,3 @@ private module FileUtils {
85300 override DataFlow:: Node getAPermissionNode ( ) { result = permissionArg }
86301 }
87302}
88-
89- private module IO { }
0 commit comments