|
| 1 | +#! /usr/bin/env python3 |
| 2 | + |
| 3 | +"""The Tab Nanny despises ambiguous indentation. She knows no mercy. |
| 4 | +
|
| 5 | +tabnanny -- Detection of ambiguous indentation |
| 6 | +
|
| 7 | +For the time being this module is intended to be called as a script. |
| 8 | +However it is possible to import it into an IDE and use the function |
| 9 | +check() described below. |
| 10 | +
|
| 11 | +Warning: The API provided by this module is likely to change in future |
| 12 | +releases; such changes may not be backward compatible. |
| 13 | +""" |
| 14 | + |
| 15 | +# Released to the public domain, by Tim Peters, 15 April 1998. |
| 16 | + |
| 17 | +# XXX Note: this is now a standard library module. |
| 18 | +# XXX The API needs to undergo changes however; the current code is too |
| 19 | +# XXX script-like. This will be addressed later. |
| 20 | + |
| 21 | +__version__ = "6" |
| 22 | + |
| 23 | +import os |
| 24 | +import sys |
| 25 | +import tokenize |
| 26 | +if not hasattr(tokenize, 'NL'): |
| 27 | + raise ValueError("tokenize.NL doesn't exist -- tokenize module too old") |
| 28 | + |
| 29 | +__all__ = ["check", "NannyNag", "process_tokens"] |
| 30 | + |
| 31 | +verbose = 0 |
| 32 | +filename_only = 0 |
| 33 | + |
| 34 | +def errprint(*args): |
| 35 | + sep = "" |
| 36 | + for arg in args: |
| 37 | + sys.stderr.write(sep + str(arg)) |
| 38 | + sep = " " |
| 39 | + sys.stderr.write("\n") |
| 40 | + |
| 41 | +def main(): |
| 42 | + import getopt |
| 43 | + |
| 44 | + global verbose, filename_only |
| 45 | + try: |
| 46 | + opts, args = getopt.getopt(sys.argv[1:], "qv") |
| 47 | + except getopt.error as msg: |
| 48 | + errprint(msg) |
| 49 | + return |
| 50 | + for o, a in opts: |
| 51 | + if o == '-q': |
| 52 | + filename_only = filename_only + 1 |
| 53 | + if o == '-v': |
| 54 | + verbose = verbose + 1 |
| 55 | + if not args: |
| 56 | + errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...") |
| 57 | + return |
| 58 | + for arg in args: |
| 59 | + check(arg) |
| 60 | + |
| 61 | +class NannyNag(Exception): |
| 62 | + """ |
| 63 | + Raised by process_tokens() if detecting an ambiguous indent. |
| 64 | + Captured and handled in check(). |
| 65 | + """ |
| 66 | + def __init__(self, lineno, msg, line): |
| 67 | + self.lineno, self.msg, self.line = lineno, msg, line |
| 68 | + def get_lineno(self): |
| 69 | + return self.lineno |
| 70 | + def get_msg(self): |
| 71 | + return self.msg |
| 72 | + def get_line(self): |
| 73 | + return self.line |
| 74 | + |
| 75 | +def check(file): |
| 76 | + """check(file_or_dir) |
| 77 | +
|
| 78 | + If file_or_dir is a directory and not a symbolic link, then recursively |
| 79 | + descend the directory tree named by file_or_dir, checking all .py files |
| 80 | + along the way. If file_or_dir is an ordinary Python source file, it is |
| 81 | + checked for whitespace related problems. The diagnostic messages are |
| 82 | + written to standard output using the print statement. |
| 83 | + """ |
| 84 | + |
| 85 | + if os.path.isdir(file) and not os.path.islink(file): |
| 86 | + if verbose: |
| 87 | + print("%r: listing directory" % (file,)) |
| 88 | + names = os.listdir(file) |
| 89 | + for name in names: |
| 90 | + fullname = os.path.join(file, name) |
| 91 | + if (os.path.isdir(fullname) and |
| 92 | + not os.path.islink(fullname) or |
| 93 | + os.path.normcase(name[-3:]) == ".py"): |
| 94 | + check(fullname) |
| 95 | + return |
| 96 | + |
| 97 | + try: |
| 98 | + f = tokenize.open(file) |
| 99 | + except OSError as msg: |
| 100 | + errprint("%r: I/O Error: %s" % (file, msg)) |
| 101 | + return |
| 102 | + |
| 103 | + if verbose > 1: |
| 104 | + print("checking %r ..." % file) |
| 105 | + |
| 106 | + try: |
| 107 | + process_tokens(tokenize.generate_tokens(f.readline)) |
| 108 | + |
| 109 | + except tokenize.TokenError as msg: |
| 110 | + errprint("%r: Token Error: %s" % (file, msg)) |
| 111 | + return |
| 112 | + |
| 113 | + except IndentationError as msg: |
| 114 | + errprint("%r: Indentation Error: %s" % (file, msg)) |
| 115 | + return |
| 116 | + |
| 117 | + except NannyNag as nag: |
| 118 | + badline = nag.get_lineno() |
| 119 | + line = nag.get_line() |
| 120 | + if verbose: |
| 121 | + print("%r: *** Line %d: trouble in tab city! ***" % (file, badline)) |
| 122 | + print("offending line: %r" % (line,)) |
| 123 | + print(nag.get_msg()) |
| 124 | + else: |
| 125 | + if ' ' in file: file = '"' + file + '"' |
| 126 | + if filename_only: print(file) |
| 127 | + else: print(file, badline, repr(line)) |
| 128 | + return |
| 129 | + |
| 130 | + finally: |
| 131 | + f.close() |
| 132 | + |
| 133 | + if verbose: |
| 134 | + print("%r: Clean bill of health." % (file,)) |
| 135 | + |
| 136 | +class Whitespace: |
| 137 | + # the characters used for space and tab |
| 138 | + S, T = ' \t' |
| 139 | + |
| 140 | + # members: |
| 141 | + # raw |
| 142 | + # the original string |
| 143 | + # n |
| 144 | + # the number of leading whitespace characters in raw |
| 145 | + # nt |
| 146 | + # the number of tabs in raw[:n] |
| 147 | + # norm |
| 148 | + # the normal form as a pair (count, trailing), where: |
| 149 | + # count |
| 150 | + # a tuple such that raw[:n] contains count[i] |
| 151 | + # instances of S * i + T |
| 152 | + # trailing |
| 153 | + # the number of trailing spaces in raw[:n] |
| 154 | + # It's A Theorem that m.indent_level(t) == |
| 155 | + # n.indent_level(t) for all t >= 1 iff m.norm == n.norm. |
| 156 | + # is_simple |
| 157 | + # true iff raw[:n] is of the form (T*)(S*) |
| 158 | + |
| 159 | + def __init__(self, ws): |
| 160 | + self.raw = ws |
| 161 | + S, T = Whitespace.S, Whitespace.T |
| 162 | + count = [] |
| 163 | + b = n = nt = 0 |
| 164 | + for ch in self.raw: |
| 165 | + if ch == S: |
| 166 | + n = n + 1 |
| 167 | + b = b + 1 |
| 168 | + elif ch == T: |
| 169 | + n = n + 1 |
| 170 | + nt = nt + 1 |
| 171 | + if b >= len(count): |
| 172 | + count = count + [0] * (b - len(count) + 1) |
| 173 | + count[b] = count[b] + 1 |
| 174 | + b = 0 |
| 175 | + else: |
| 176 | + break |
| 177 | + self.n = n |
| 178 | + self.nt = nt |
| 179 | + self.norm = tuple(count), b |
| 180 | + self.is_simple = len(count) <= 1 |
| 181 | + |
| 182 | + # return length of longest contiguous run of spaces (whether or not |
| 183 | + # preceding a tab) |
| 184 | + def longest_run_of_spaces(self): |
| 185 | + count, trailing = self.norm |
| 186 | + return max(len(count)-1, trailing) |
| 187 | + |
| 188 | + def indent_level(self, tabsize): |
| 189 | + # count, il = self.norm |
| 190 | + # for i in range(len(count)): |
| 191 | + # if count[i]: |
| 192 | + # il = il + (i//tabsize + 1)*tabsize * count[i] |
| 193 | + # return il |
| 194 | + |
| 195 | + # quicker: |
| 196 | + # il = trailing + sum (i//ts + 1)*ts*count[i] = |
| 197 | + # trailing + ts * sum (i//ts + 1)*count[i] = |
| 198 | + # trailing + ts * sum i//ts*count[i] + count[i] = |
| 199 | + # trailing + ts * [(sum i//ts*count[i]) + (sum count[i])] = |
| 200 | + # trailing + ts * [(sum i//ts*count[i]) + num_tabs] |
| 201 | + # and note that i//ts*count[i] is 0 when i < ts |
| 202 | + |
| 203 | + count, trailing = self.norm |
| 204 | + il = 0 |
| 205 | + for i in range(tabsize, len(count)): |
| 206 | + il = il + i//tabsize * count[i] |
| 207 | + return trailing + tabsize * (il + self.nt) |
| 208 | + |
| 209 | + # return true iff self.indent_level(t) == other.indent_level(t) |
| 210 | + # for all t >= 1 |
| 211 | + def equal(self, other): |
| 212 | + return self.norm == other.norm |
| 213 | + |
| 214 | + # return a list of tuples (ts, i1, i2) such that |
| 215 | + # i1 == self.indent_level(ts) != other.indent_level(ts) == i2. |
| 216 | + # Intended to be used after not self.equal(other) is known, in which |
| 217 | + # case it will return at least one witnessing tab size. |
| 218 | + def not_equal_witness(self, other): |
| 219 | + n = max(self.longest_run_of_spaces(), |
| 220 | + other.longest_run_of_spaces()) + 1 |
| 221 | + a = [] |
| 222 | + for ts in range(1, n+1): |
| 223 | + if self.indent_level(ts) != other.indent_level(ts): |
| 224 | + a.append( (ts, |
| 225 | + self.indent_level(ts), |
| 226 | + other.indent_level(ts)) ) |
| 227 | + return a |
| 228 | + |
| 229 | + # Return True iff self.indent_level(t) < other.indent_level(t) |
| 230 | + # for all t >= 1. |
| 231 | + # The algorithm is due to Vincent Broman. |
| 232 | + # Easy to prove it's correct. |
| 233 | + # XXXpost that. |
| 234 | + # Trivial to prove n is sharp (consider T vs ST). |
| 235 | + # Unknown whether there's a faster general way. I suspected so at |
| 236 | + # first, but no longer. |
| 237 | + # For the special (but common!) case where M and N are both of the |
| 238 | + # form (T*)(S*), M.less(N) iff M.len() < N.len() and |
| 239 | + # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded. |
| 240 | + # XXXwrite that up. |
| 241 | + # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1. |
| 242 | + def less(self, other): |
| 243 | + if self.n >= other.n: |
| 244 | + return False |
| 245 | + if self.is_simple and other.is_simple: |
| 246 | + return self.nt <= other.nt |
| 247 | + n = max(self.longest_run_of_spaces(), |
| 248 | + other.longest_run_of_spaces()) + 1 |
| 249 | + # the self.n >= other.n test already did it for ts=1 |
| 250 | + for ts in range(2, n+1): |
| 251 | + if self.indent_level(ts) >= other.indent_level(ts): |
| 252 | + return False |
| 253 | + return True |
| 254 | + |
| 255 | + # return a list of tuples (ts, i1, i2) such that |
| 256 | + # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2. |
| 257 | + # Intended to be used after not self.less(other) is known, in which |
| 258 | + # case it will return at least one witnessing tab size. |
| 259 | + def not_less_witness(self, other): |
| 260 | + n = max(self.longest_run_of_spaces(), |
| 261 | + other.longest_run_of_spaces()) + 1 |
| 262 | + a = [] |
| 263 | + for ts in range(1, n+1): |
| 264 | + if self.indent_level(ts) >= other.indent_level(ts): |
| 265 | + a.append( (ts, |
| 266 | + self.indent_level(ts), |
| 267 | + other.indent_level(ts)) ) |
| 268 | + return a |
| 269 | + |
| 270 | +def format_witnesses(w): |
| 271 | + firsts = (str(tup[0]) for tup in w) |
| 272 | + prefix = "at tab size" |
| 273 | + if len(w) > 1: |
| 274 | + prefix = prefix + "s" |
| 275 | + return prefix + " " + ', '.join(firsts) |
| 276 | + |
| 277 | +def process_tokens(tokens): |
| 278 | + INDENT = tokenize.INDENT |
| 279 | + DEDENT = tokenize.DEDENT |
| 280 | + NEWLINE = tokenize.NEWLINE |
| 281 | + JUNK = tokenize.COMMENT, tokenize.NL |
| 282 | + indents = [Whitespace("")] |
| 283 | + check_equal = 0 |
| 284 | + |
| 285 | + for (type, token, start, end, line) in tokens: |
| 286 | + if type == NEWLINE: |
| 287 | + # a program statement, or ENDMARKER, will eventually follow, |
| 288 | + # after some (possibly empty) run of tokens of the form |
| 289 | + # (NL | COMMENT)* (INDENT | DEDENT+)? |
| 290 | + # If an INDENT appears, setting check_equal is wrong, and will |
| 291 | + # be undone when we see the INDENT. |
| 292 | + check_equal = 1 |
| 293 | + |
| 294 | + elif type == INDENT: |
| 295 | + check_equal = 0 |
| 296 | + thisguy = Whitespace(token) |
| 297 | + if not indents[-1].less(thisguy): |
| 298 | + witness = indents[-1].not_less_witness(thisguy) |
| 299 | + msg = "indent not greater e.g. " + format_witnesses(witness) |
| 300 | + raise NannyNag(start[0], msg, line) |
| 301 | + indents.append(thisguy) |
| 302 | + |
| 303 | + elif type == DEDENT: |
| 304 | + # there's nothing we need to check here! what's important is |
| 305 | + # that when the run of DEDENTs ends, the indentation of the |
| 306 | + # program statement (or ENDMARKER) that triggered the run is |
| 307 | + # equal to what's left at the top of the indents stack |
| 308 | + |
| 309 | + # Ouch! This assert triggers if the last line of the source |
| 310 | + # is indented *and* lacks a newline -- then DEDENTs pop out |
| 311 | + # of thin air. |
| 312 | + # assert check_equal # else no earlier NEWLINE, or an earlier INDENT |
| 313 | + check_equal = 1 |
| 314 | + |
| 315 | + del indents[-1] |
| 316 | + |
| 317 | + elif check_equal and type not in JUNK: |
| 318 | + # this is the first "real token" following a NEWLINE, so it |
| 319 | + # must be the first token of the next program statement, or an |
| 320 | + # ENDMARKER; the "line" argument exposes the leading whitespace |
| 321 | + # for this statement; in the case of ENDMARKER, line is an empty |
| 322 | + # string, so will properly match the empty string with which the |
| 323 | + # "indents" stack was seeded |
| 324 | + check_equal = 0 |
| 325 | + thisguy = Whitespace(line) |
| 326 | + if not indents[-1].equal(thisguy): |
| 327 | + witness = indents[-1].not_equal_witness(thisguy) |
| 328 | + msg = "indent not equal e.g. " + format_witnesses(witness) |
| 329 | + raise NannyNag(start[0], msg, line) |
| 330 | + |
| 331 | + |
| 332 | +if __name__ == '__main__': |
| 333 | + main() |
0 commit comments