-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathflake8_indent_in_def.py
319 lines (267 loc) · 11 KB
/
flake8_indent_in_def.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
import ast
import tokenize
from enum import Enum
from typing import Generator, Tuple, Type, Any, List, Union, Dict, Optional
import importlib.metadata as importlib_metadata
IND101 = 'IND101 hanging indentation in function definition must be 8 spaces'
IND102 = (
'IND102 if the 1st argument is on the same line as the function '
+ 'name, all other arguments must be on the same line'
)
IND201 = 'IND201 hanging indentation in class definition must be 8 spaces'
IND202 = (
'IND202 if the 1st base class is on the same line as the class '
+ 'name, all other base classes must be on the same line'
)
EXPECTED_INDENT = 8 # https://peps.python.org/pep-0008/#indentation
OP_TOKEN_CODE = 54 # "OP" means operator token
NL_TOKEN_CODE = 61 # "NL" means new line
NEWLINE_TOKEN_CODE = 4
class ArgType(Enum):
REGULAR = 1
POS_ONLY = 2
KW_ONLY = 3
VARARG = 4
KWARG = 5
class Visitor(ast.NodeVisitor):
def __init__(self, tokens: List[tokenize.TokenInfo]) -> None:
self._tokens: List[tokenize.TokenInfo] = tokens
self.violations: List[Tuple[int, int, str]] = []
def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
sorted_args, arg_type_lookup, has_star = self._collect_func_args(node)
if has_star:
self._visit_node_with_star_in_arg_list(node)
else:
self._visit_func_args_or_class_bases(
node=node,
args_or_bases=sorted_args,
is_func=True,
arg_type_lookup=arg_type_lookup,
)
def visit_ClassDef(self, node: ast.ClassDef) -> None:
self._visit_func_args_or_class_bases(node, node.bases, is_func=False)
@classmethod
def _collect_func_args(
cls,
node: ast.FunctionDef,
) -> Tuple[List, Dict[ast.arg, ArgType], bool]:
"""Collect all args from function def; detect presence of * argument"""
all_args: List[ast.arg] = []
arg_type_lookup: Dict[ast.arg, ArgType] = {}
has_star = False # it means there's a '*,' in the argument list
if node.args.args: # List[ast.arg]
all_args.extend(node.args.args)
for arg_ in node.args.args:
arg_type_lookup[arg_] = ArgType.REGULAR
if node.args.posonlyargs: # List[ast.arg]
has_star = True
all_args.extend(node.args.posonlyargs)
for arg_ in node.args.posonlyargs:
arg_type_lookup[arg_] = ArgType.POS_ONLY
if node.args.kwonlyargs: # List[ast.arg]
has_star = True
all_args.extend(node.args.kwonlyargs)
for arg_ in node.args.kwonlyargs:
arg_type_lookup[arg_] = ArgType.KW_ONLY
if node.args.vararg is not None:
all_args.append(node.args.vararg)
arg_type_lookup[node.args.vararg] = ArgType.VARARG
if node.args.kwarg is not None:
all_args.append(node.args.kwarg)
arg_type_lookup[node.args.kwarg] = ArgType.KWARG
sorted_args = sorted(all_args, key=lambda x: x.lineno, reverse=False)
return sorted_args, arg_type_lookup, has_star
def _visit_func_args_or_class_bases(
self,
node: Union[ast.FunctionDef, ast.ClassDef],
args_or_bases: Union[List[ast.arg], List[ast.Name]],
is_func: bool,
arg_type_lookup: Optional[Dict[ast.arg, ArgType]] = None,
) -> None:
if is_func:
code01 = IND101
code02 = IND102
else:
code01 = IND201
code02 = IND202
if len(args_or_bases) > 0:
function_def_line_num = node.lineno
def_col_offset = node.col_offset
if args_or_bases[0].lineno == function_def_line_num:
for item in args_or_bases[1:]:
if item.lineno != function_def_line_num:
arg_type = arg_type_lookup[item] if is_func else None
col_offset = self._calc_col_offset(item, arg_type)
self.violations.append((item.lineno, col_offset, code02))
for i, item in enumerate(args_or_bases):
if i == 0:
prev_item_line_num = function_def_line_num
else:
prev_item_line_num = args_or_bases[i - 1].lineno
# Only enforce indentation when this arg is on a new line
if item.lineno > prev_item_line_num:
arg_type = arg_type_lookup[item] if is_func else None
if self._not_expected_indent(item, def_col_offset, arg_type):
col_offset = self._calc_col_offset(item, arg_type)
self.violations.append((item.lineno, col_offset, code01))
# Place this line OUTSIDE `if len(args_or_bases) > 0`, otherwise
# nested functions/classes will not be detected if the parent function
# has an empty input argument list
self.generic_visit(node)
@classmethod
def _calc_col_offset(
cls,
item: Union[ast.arg, ast.Name],
arg_type: Optional[ArgType] = None,
) -> int:
if isinstance(item, ast.Name): # this means base class
return item.col_offset
arg_type = ArgType.REGULAR if arg_type is None else arg_type
return cls._calc_col_offset_for_func_args(item, arg_type)
@classmethod
def _calc_col_offset_for_func_args(
cls,
arg_: ast.arg,
arg_type: ArgType,
) -> int:
if arg_type in {ArgType.REGULAR, ArgType.POS_ONLY, ArgType.KW_ONLY}:
return arg_.col_offset
if arg_type == ArgType.VARARG:
return arg_.col_offset - 1 # '-1' because of '*' before vararg
if arg_type == ArgType.KWARG:
return arg_.col_offset - 2 # '-2' because of '**' before kwarg
@classmethod
def _not_expected_indent(
cls,
item: Union[ast.arg, ast.Name],
def_col_offset: int,
arg_type: Optional[ArgType] = None,
) -> bool:
if isinstance(item, ast.Name): # this means base class
return item.col_offset - def_col_offset != EXPECTED_INDENT
arg_type = ArgType.REGULAR if arg_type is None else arg_type
if arg_type in {ArgType.REGULAR, ArgType.POS_ONLY, ArgType.KW_ONLY}:
expected_indent_ = EXPECTED_INDENT
elif arg_type == ArgType.VARARG:
expected_indent_ = EXPECTED_INDENT + 1 # because '*vararg'
elif arg_type == ArgType.KWARG:
expected_indent_ = EXPECTED_INDENT + 2 # because '**kwarg'
else:
# this branch can't be reached in theory
expected_indent_ = EXPECTED_INDENT
return item.col_offset - def_col_offset != expected_indent_
def _visit_node_with_star_in_arg_list(self, node: ast.FunctionDef) -> None:
"""
Within this method, we add '*' as an argument into the node's AST
structure. Somehow, Python stdlib `ast` omits the standalone '*' when
parsing Python code. Therefore we have to find '*' by tokens and
manually add it to the structure, as if it were a regular argument.
"""
func_def_lineno = node.lineno
func_end_lineno = node.end_lineno
# We skip the last 2 tokens because the last token from the tokenizer
# is always ENDMARKER (type 0) and '*' cannot be the 2nd to last token.
# And then we also skip the 0th token because that is always
# the ENCODING token (type 62).
for i in range(1, len(self._tokens) - 2):
this_token = self._tokens[i]
next_token = self._tokens[i + 1]
next_next_token = self._tokens[i + 2]
this_lineno = this_token.start[0]
if func_def_lineno <= this_lineno <= func_end_lineno:
if self._is_qualifying_star(
this=this_token,
next=next_token,
next_next=next_next_token,
):
self._replace_args_field(node, this_token)
self.visit_FunctionDef(node)
return # because there can only be one '*' in the arg list
@classmethod
def _replace_args_field(
cls,
node: ast.FunctionDef,
token: tokenize.TokenInfo,
) -> None:
star_arg = cls._build_arg_obj(token)
new_args_unsorted = (
node.args.args # List[ast.arg]
+ [star_arg]
+ node.args.posonlyargs # List[ast.arg]
+ node.args.kwonlyargs # List[ast.arg]
)
new_args = sorted(
new_args_unsorted,
key=lambda x: (x.lineno, x.col_offset), # first line, then col
)
node.args.posonlyargs = []
node.args.kwonlyargs = []
node.args.args = new_args
@classmethod
def _build_arg_obj(cls, token: tokenize.TokenInfo) -> ast.arg:
return ast.arg(
lineno=token.start[0],
col_offset=token.start[1],
end_lineno=token.end[0],
end_col_offset=token.end[1],
arg='*',
annotation=None,
type_comment=None,
)
@classmethod
def _is_qualifying_star(
cls,
this: tokenize.TokenInfo,
next: tokenize.TokenInfo,
next_next: tokenize.TokenInfo,
):
return (
cls._is_star_newline_comma(this=this, next=next, next_next=next_next)
or cls._is_star_comma(this=this, next=next)
)
@classmethod
def _is_star_comma(
cls,
this: tokenize.TokenInfo,
next: tokenize.TokenInfo,
) -> bool:
return cls._is_star(this) and cls._is_comma(next)
@classmethod
def _is_star_newline_comma(
cls,
this: tokenize.TokenInfo,
next: tokenize.TokenInfo,
next_next: tokenize.TokenInfo,
) -> bool:
return (
cls._is_star(this)
and cls._is_newline(next)
and cls._is_comma(next_next)
)
@classmethod
def _is_star(cls, token: tokenize.TokenInfo) -> bool:
return token.type == OP_TOKEN_CODE and token.string == '*'
@classmethod
def _is_comma(cls, token: tokenize.TokenInfo) -> bool:
return token.type == OP_TOKEN_CODE and token.string == ','
@classmethod
def _is_newline(cls, token: tokenize.TokenInfo) -> bool:
return (
token.type in {NL_TOKEN_CODE, NEWLINE_TOKEN_CODE}
and token.string == '\n'
)
class Plugin:
name = __name__
version = importlib_metadata.version(__name__)
def __init__(
self,
tree: ast.AST,
file_tokens: List[tokenize.TokenInfo] = None,
) -> None:
self._tree = tree
self._file_tokens = file_tokens
def run(self) -> Generator[Tuple[int, int, str, Type[Any]], None, None]:
visitor = Visitor(self._file_tokens)
visitor.visit(self._tree)
for line, col, msg in visitor.violations:
yield line, col, msg, type(self)