1+ /** Provides classes for detecting duplicate or similar code. */
2+
13import cpp
24
35private string relativePath ( File file ) { result = file .getRelativePath ( ) .replaceAll ( "\\" , "/" ) }
@@ -8,41 +10,59 @@ private predicate tokenLocation(string path, int sl, int sc, int ec, int el, Cop
810 tokens ( copy , index , sl , sc , ec , el )
911}
1012
13+ /** A token block used for detection of duplicate and similar code. */
1114class Copy extends @duplication_or_similarity {
15+ /** Gets the index of the last token in this block. */
1216 private int lastToken ( ) { result = max ( int i | tokens ( this , i , _, _, _, _) | i ) }
1317
18+ /** Gets the index of the token in this block starting at the location `loc`, if any. */
1419 int tokenStartingAt ( Location loc ) {
1520 exists ( string filepath , int startline , int startcol |
1621 loc .hasLocationInfo ( filepath , startline , startcol , _, _) and
1722 tokenLocation ( filepath , startline , startcol , _, _, this , result )
1823 )
1924 }
2025
26+ /** Gets the index of the token in this block ending at the location `loc`, if any. */
2127 int tokenEndingAt ( Location loc ) {
2228 exists ( string filepath , int endline , int endcol |
2329 loc .hasLocationInfo ( filepath , _, _, endline , endcol ) and
2430 tokenLocation ( filepath , _, _, endline , endcol , this , result )
2531 )
2632 }
2733
34+ /** Gets the line on which the first token in this block starts. */
2835 int sourceStartLine ( ) { tokens ( this , 0 , result , _, _, _) }
2936
37+ /** Gets the column on which the first token in this block starts. */
3038 int sourceStartColumn ( ) { tokens ( this , 0 , _, result , _, _) }
3139
40+ /** Gets the line on which the last token in this block ends. */
3241 int sourceEndLine ( ) { tokens ( this , lastToken ( ) , _, _, result , _) }
3342
43+ /** Gets the column on which the last token in this block ends. */
3444 int sourceEndColumn ( ) { tokens ( this , lastToken ( ) , _, _, _, result ) }
3545
46+ /** Gets the number of lines containing at least (part of) one token in this block. */
3647 int sourceLines ( ) { result = this .sourceEndLine ( ) + 1 - this .sourceStartLine ( ) }
3748
49+ /** Gets an opaque identifier for the equivalence class of this block. */
3850 int getEquivalenceClass ( ) { duplicateCode ( this , _, result ) or similarCode ( this , _, result ) }
3951
52+ /** Gets the source file in which this block appears. */
4053 File sourceFile ( ) {
4154 exists ( string name | duplicateCode ( this , name , _) or similarCode ( this , name , _) |
4255 name .replaceAll ( "\\" , "/" ) = relativePath ( result )
4356 )
4457 }
4558
59+ /**
60+ * Holds if this element is at the specified location.
61+ * The location spans column `startcolumn` of line `startline` to
62+ * column `endcolumn` of line `endline` in file `filepath`.
63+ * For more information, see
64+ * [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
65+ */
4666 predicate hasLocationInfo (
4767 string filepath , int startline , int startcolumn , int endline , int endcolumn
4868 ) {
@@ -53,25 +73,30 @@ class Copy extends @duplication_or_similarity {
5373 endcolumn = sourceEndColumn ( )
5474 }
5575
76+ /** Gets a textual representation of this element. */
5677 string toString ( ) { none ( ) }
5778}
5879
80+ /** A block of duplicated code. */
5981class DuplicateBlock extends Copy , @duplication {
6082 override string toString ( ) { result = "Duplicate code: " + sourceLines ( ) + " duplicated lines." }
6183}
6284
85+ /** A block of similar code. */
6386class SimilarBlock extends Copy , @similarity {
6487 override string toString ( ) {
6588 result = "Similar code: " + sourceLines ( ) + " almost duplicated lines."
6689 }
6790}
6891
92+ /** Gets a function with a body and a location. */
6993FunctionDeclarationEntry sourceMethod ( ) {
7094 result .isDefinition ( ) and
7195 exists ( result .getLocation ( ) ) and
7296 numlines ( unresolveElement ( result .getFunction ( ) ) , _, _, _)
7397}
7498
99+ /** Gets the number of member functions in `c` with a body and a location. */
75100int numberOfSourceMethods ( Class c ) {
76101 result =
77102 count ( FunctionDeclarationEntry m |
@@ -108,20 +133,27 @@ private predicate duplicateStatement(
108133 )
109134}
110135
136+ /**
137+ * Holds if `m1` is a function with `total` lines, and `m2` is a function
138+ * that has `duplicate` lines in common with `m1`.
139+ */
111140predicate duplicateStatements (
112141 FunctionDeclarationEntry m1 , FunctionDeclarationEntry m2 , int duplicate , int total
113142) {
114143 duplicate = strictcount ( Stmt s | duplicateStatement ( m1 , m2 , s , _) ) and
115144 total = strictcount ( statementInMethod ( m1 ) )
116145}
117146
118- /**
119- * Find pairs of methods are identical
120- */
147+ /** Holds if `m` and other are identical functions. */
121148predicate duplicateMethod ( FunctionDeclarationEntry m , FunctionDeclarationEntry other ) {
122149 exists ( int total | duplicateStatements ( m , other , total , total ) )
123150}
124151
152+ /**
153+ * INTERNAL: do not use.
154+ *
155+ * Holds if `line` in `f` is similar to a line somewhere else.
156+ */
125157predicate similarLines ( File f , int line ) {
126158 exists ( SimilarBlock b | b .sourceFile ( ) = f and line in [ b .sourceStartLine ( ) .. b .sourceEndLine ( ) ] )
127159}
@@ -152,6 +184,7 @@ private predicate similarLinesCoveredFiles(File f, File otherFile) {
152184 )
153185}
154186
187+ /** Holds if `coveredLines` lines of `f` are similar to lines in `otherFile`. */
155188predicate similarLinesCovered ( File f , int coveredLines , File otherFile ) {
156189 exists ( int numLines | numLines = f .getMetrics ( ) .getNumberOfLines ( ) |
157190 similarLinesCoveredFiles ( f , otherFile ) and
@@ -166,6 +199,11 @@ predicate similarLinesCovered(File f, int coveredLines, File otherFile) {
166199 )
167200}
168201
202+ /**
203+ * INTERNAL: do not use.
204+ *
205+ * Holds if `line` in `f` is duplicated by a line somewhere else.
206+ */
169207predicate duplicateLines ( File f , int line ) {
170208 exists ( DuplicateBlock b |
171209 b .sourceFile ( ) = f and line in [ b .sourceStartLine ( ) .. b .sourceEndLine ( ) ]
@@ -182,6 +220,7 @@ private predicate duplicateLinesPerEquivalenceClass(int equivClass, int lines, F
182220 )
183221}
184222
223+ /** Holds if `coveredLines` lines of `f` are duplicates of lines in `otherFile`. */
185224predicate duplicateLinesCovered ( File f , int coveredLines , File otherFile ) {
186225 exists ( int numLines | numLines = f .getMetrics ( ) .getNumberOfLines ( ) |
187226 exists ( int coveredApprox |
@@ -206,6 +245,7 @@ predicate duplicateLinesCovered(File f, int coveredLines, File otherFile) {
206245 )
207246}
208247
248+ /** Holds if most of `f` (`percent`%) is similar to `other`. */
209249predicate similarFiles ( File f , File other , int percent ) {
210250 exists ( int covered , int total |
211251 similarLinesCovered ( f , covered , other ) and
@@ -216,6 +256,7 @@ predicate similarFiles(File f, File other, int percent) {
216256 not duplicateFiles ( f , other , _)
217257}
218258
259+ /** Holds if most of `f` (`percent`%) is duplicated by `other`. */
219260predicate duplicateFiles ( File f , File other , int percent ) {
220261 exists ( int covered , int total |
221262 duplicateLinesCovered ( f , covered , other ) and
@@ -225,6 +266,10 @@ predicate duplicateFiles(File f, File other, int percent) {
225266 )
226267}
227268
269+ /**
270+ * Holds if most member functions of `c` (`numDup` out of `total`) are
271+ * duplicates of member functions in `other`.
272+ */
228273predicate mostlyDuplicateClassBase ( Class c , Class other , int numDup , int total ) {
229274 numDup =
230275 strictcount ( FunctionDeclarationEntry m1 |
@@ -240,6 +285,11 @@ predicate mostlyDuplicateClassBase(Class c, Class other, int numDup, int total)
240285 ( numDup * 100 ) / total > 80
241286}
242287
288+ /**
289+ * Holds if most member functions of `c` are duplicates of member functions in
290+ * `other`. Provides the human-readable `message` to describe the amount of
291+ * duplication.
292+ */
243293predicate mostlyDuplicateClass ( Class c , Class other , string message ) {
244294 exists ( int numDup , int total |
245295 mostlyDuplicateClassBase ( c , other , numDup , total ) and
@@ -264,12 +314,21 @@ predicate mostlyDuplicateClass(Class c, Class other, string message) {
264314 )
265315}
266316
317+ /** Holds if `f` and `other` are similar or duplicates. */
267318predicate fileLevelDuplication ( File f , File other ) {
268319 similarFiles ( f , other , _) or duplicateFiles ( f , other , _)
269320}
270321
322+ /**
323+ * Holds if most member functions of `c` are duplicates of member functions in
324+ * `other`.
325+ */
271326predicate classLevelDuplication ( Class c , Class other ) { mostlyDuplicateClass ( c , other , _) }
272327
328+ /**
329+ * Holds if `line` in `f` should be allowed to be duplicated. This is the case
330+ * for `#include` directives.
331+ */
273332predicate whitelistedLineForDuplication ( File f , int line ) {
274333 exists ( Include i | i .getFile ( ) = f and i .getLocation ( ) .getStartLine ( ) = line )
275334}
0 commit comments