codemodder-python/src/codemodder/codemods/libcst_transformer.py at fd8da5a602fe9d713574902804ee47305d5fafb9 · pixee/codemodder-python · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
from collections import namedtuple

import libcst as cst
from libcst import matchers
from libcst._position import CodeRange
from libcst.codemod import CodemodContext
from libcst.codemod.visitors import AddImportsVisitor, RemoveImportsVisitor

from codemodder.codemods.base_transformer import BaseTransformerPipeline
from codemodder.codemods.base_visitor import BaseTransformer
from codemodder.codemods.utils import get_call_name
from codemodder.codetf import Change, ChangeSet, Finding
from codemodder.context import CodemodExecutionContext
from codemodder.dependency import Dependency
from codemodder.diff import create_diff_from_tree
from codemodder.file_context import FileContext
from codemodder.logging import logger
from codemodder.result import Result

NewArg = namedtuple("NewArg", ["name", "value", "add_if_missing"])


def update_code(file_path, new_code):
    """
    Write the `new_code` to the `file_path`
    """
    file_path.write_bytes(new_code.encode("utf-8"))


class LibcstResultTransformer(BaseTransformer):
    """
    Transformer class that performs libcst-based transformations on a given file

    :param context: libcst CodemodContext
    :param results: list of `Result` generated by the detector phase (may be empty)
    :param file_context: `FileContext` for the file to be transformed
    """

    change_description: str = ""

    def __init__(
        self,
        context: CodemodContext,
        results: list[Result] | None,
        file_context: FileContext,
        _transformer: bool = False,
    ):
        del _transformer

        self.file_context = file_context
        super().__init__(
            context,
            results,
            line_include=file_context.line_include,
            line_exclude=file_context.line_exclude,
        )

    @classmethod
    def transform(
        cls, module: cst.Module, results: list[Result] | None, file_context: FileContext
    ) -> cst.Module:
        wrapper = cst.MetadataWrapper(module)
        codemod = cls(
            CodemodContext(wrapper=wrapper),
            results,
            file_context,
            _transformer=True,
        )

        return codemod.transform_module(module)

    def _new_or_updated_node(self, original_node, updated_node):
        if self.node_is_selected(original_node):
            if (attr := getattr(self, "on_result_found", None)) is not None:
                new_node = attr(original_node, updated_node)
                self.report_change(original_node)
                return new_node
        return updated_node

    # TODO: there needs to be a way to generalize this so that it applies
    # more broadly than to just a specific kind of node. There's probably a
    # decent way to do this with metaprogramming. We could either apply it
    # broadly to every known method (which would probably have a big
    # performance impact). Or we could allow users to register the handler
    # for a specific node or nodes by means of a decorator or something
    # similar when they define their `on_result_found` method.
    # Right now this is just to demonstrate a particular use case.
    def leave_Call(self, original_node: cst.Call, updated_node: cst.Call):
        return self._new_or_updated_node(original_node, updated_node)

    def leave_Assign(self, original_node, updated_node):
        return self._new_or_updated_node(original_node, updated_node)

    def leave_ClassDef(
        self, original_node: cst.ClassDef, updated_node: cst.ClassDef
    ) -> cst.ClassDef:
        return self._new_or_updated_node(original_node, updated_node)

    def add_change(self, node, description: str, start: bool = True):
        position = self.node_position(node)
        self.add_change_from_position(position, description, start)

    def add_change_from_position(
        self, position: CodeRange, description: str, start: bool = True
    ):
        line_number = position.start.line if start else position.end.line
        self.report_change_for_line(line_number, description)

    def lineno_for_node(self, node):
        return self.node_position(node).start.line

    def add_dependency(self, dependency: Dependency):
        self.file_context.add_dependency(dependency)

    def report_change(self, original_node, description: str | None = None):
        line_number = self.lineno_for_node(original_node)
        self.report_change_for_line(line_number, description)

    def report_change_for_line(
        self,
        line_number,
        description: str | None = None,
        findings: list[Finding] | None = None,
    ):
        self.file_context.codemod_changes.append(
            Change(
                lineNumber=line_number,
                description=description or self.change_description,
                fixedFindings=findings
                or self.file_context.get_findings_for_location(line_number),
            )
        )

    def report_unfixed(self, original_node: cst.CSTNode, reason: str):
        line_number = self.lineno_for_node(original_node)
        findings = self.file_context.get_findings_for_location(line_number)
        self.file_context.add_unfixed_findings(findings, reason, line_number)

    def remove_unused_import(self, original_node):
        RemoveImportsVisitor.remove_unused_import_by_node(self.context, original_node)

    def add_needed_import(self, module, obj=None):
        # TODO: do we need to check if this import already exists?
        AddImportsVisitor.add_needed_import(self.context, module, obj)

    def update_call_target(
        self,
        original_node,
        new_target,
        new_func: str | None = None,
        replacement_args=None,
    ):
        # TODO: is an assertion the best way to handle this?
        # Or should we just return the original node if it's not a Call?
        assert isinstance(original_node, cst.Call)

        func_name = new_func if new_func else get_call_name(original_node)
        return cst.Call(
            func=cst.Attribute(
                value=cst.parse_expression(new_target),
                attr=cst.Name(value=func_name),
            ),
            args=replacement_args if replacement_args else original_node.args,
        )

    def update_arg_target(self, updated_node, new_args: list):
        return updated_node.with_changes(
            args=[new if isinstance(new, cst.Arg) else cst.Arg(new) for new in new_args]
        )

    def update_assign_rhs(self, updated_node: cst.Assign, rhs: str):
        value = cst.parse_expression(rhs)
        return updated_node.with_changes(value=value)

    def parse_expression(self, expression: str):
        return cst.parse_expression(expression)

    def replace_args(self, original_node, args_info):
        """
        Iterate over the args in original_node and replace each arg
        with any matching arg in `args_info`.

        :param original_node: libcst node with args attribute.
        :param list args_info: List of NewArg
        """
        assert hasattr(original_node, "args")
        assert all(
            isinstance(arg, NewArg) for arg in args_info
        ), "`args_info` must contain `NewArg` types."
        new_args = []

        for arg in original_node.args:
            arg_name, replacement_val, idx = _match_with_existing_arg(arg, args_info)
            if arg_name is not None:
                new = self.make_new_arg(replacement_val, arg_name, arg)
                del args_info[idx]
            else:
                new = arg
            new_args.append(new)

        for arg_name, replacement_val, add_if_missing in args_info:
            if add_if_missing:
                new = self.make_new_arg(replacement_val, arg_name)
                new_args.append(new)

        return new_args

    def make_new_arg(self, value, name=None, existing_arg=None):
        if name is None:
            # Make a positional argument
            return cst.Arg(
                value=cst.parse_expression(value),
            )

        # make a keyword argument
        equal = (
            existing_arg.equal
            if existing_arg
            else cst.AssignEqual(
                whitespace_before=cst.SimpleWhitespace(""),
                whitespace_after=cst.SimpleWhitespace(""),
            )
        )
        return cst.Arg(
            keyword=cst.Name(value=name),
            value=cst.parse_expression(value),
            equal=equal,
        )

    def add_arg_to_call(self, node: cst.Call, name: str, value):
        """
        Add a new arg to the end of the args list.
        """
        new_args = list(node.args) + [
            cst.Arg(
                keyword=cst.Name(value=name),
                value=cst.parse_expression(str(value)),
                equal=cst.AssignEqual(
                    whitespace_before=cst.SimpleWhitespace(""),
                    whitespace_after=cst.SimpleWhitespace(""),
                ),
            )
        ]
        return node.with_changes(args=new_args)


class LibcstTransformerPipeline(BaseTransformerPipeline):
    """
    Transformer pipeline class that applies one or more `LibcstResultTransformer` to a given file

    This pipeline expects that all transformers accept a libcst `Module` as input and return a libcst `Module` as output.
    """

    transformers: list[type[LibcstResultTransformer]]

    def apply(
        self,
        context: CodemodExecutionContext,
        file_context: FileContext,
        results: list[Result] | None,
    ) -> ChangeSet | None:
        file_path = file_context.file_path

        try:
            with file_context.timer.measure("parse"):
                source_tree = cst.parse_module(file_path.read_bytes().decode("utf-8"))
        except Exception:
            file_context.add_failure(file_path, reason := "Failed to parse file")
            logger.exception("%s %s", reason, file_path)
            return None

        tree = source_tree
        try:
            with file_context.timer.measure("transform"):
                for transformer in self.transformers:
                    tree = transformer.transform(tree, results, file_context)
        except Exception:
            file_context.add_failure(file_path, reason := "Failed to transform file")
            logger.exception("%s %s", reason, file_path)
            return None

        if not file_context.codemod_changes:
            logger.debug("No changes produced for %s", file_path)
            return None

        if not (diff := create_diff_from_tree(source_tree, tree)):
            logger.debug("No code diff produced for %s", file_path)
            return None

        change_set = ChangeSet(
            path=str(file_context.file_path.relative_to(context.directory)),
            diff=diff,
            changes=file_context.codemod_changes,
        )

        if not context.dry_run:
            with file_context.timer.measure("write"):
                update_code(file_context.file_path, tree.code)

        return change_set


def _match_with_existing_arg(arg, args_info):
    """
    Given an `arg` and a list of arg info, determine if any of the names in arg_info match the arg.
    """
    for idx, (arg_name, replacement_val, _) in enumerate(args_info):
        if matchers.matches(arg.keyword, matchers.Name(arg_name)):
            return arg_name, replacement_val, idx
    return None, None, None