Fix and add more tests of futurize with encoding comments (issues #97 and #10)

edschofield · edschofield · commit abb191fb2903 · 2014-10-03T10:03:18.000+10:00
diff --git a/src/libfuturize/fixer_util.py b/src/libfuturize/fixer_util.py
@@ -196,13 +196,13 @@ def future_import(feature, node):
     if does_tree_import(u"__future__", feature, node):
         return
 
-    # Look for a shebang line
-    shebang_idx = None
+    # Look for a shebang or encoding line
+    shebang_encoding_idx = None
 
     for idx, node in enumerate(root.children):
-        # If it's a shebang line, attach the prefix to
-        if is_shebang_comment(node):
-            shebang_idx = idx
+        # If it's a shebang or encoding line, attach the prefix to
+        if is_shebang_comment(node) or is_encoding_comment(node):
+            shebang_encoding_idx = idx
         if node.type == syms.simple_stmt and \
            len(node.children) > 0 and node.children[0].type == token.STRING:
             # skip over docstring
@@ -216,9 +216,9 @@ def future_import(feature, node):
             return
 
     import_ = FromImport(u'__future__', [Leaf(token.NAME, feature, prefix=" ")])
-    if shebang_idx == 0 and idx == 0:
+    if shebang_encoding_idx == 0 and idx == 0:
         # If this __future__ import would go on the first line,
-        # detach the shebang prefix from the current first line
+        # detach the shebang / encoding prefix from the current first line
         # and attach it to our new __future__ import node.
         import_.prefix = root.children[0].prefix
         root.children[0].prefix = u''
@@ -424,16 +424,34 @@ def check_future_import(node):
         assert False, "strange import: %s" % savenode
 
 
-SHEBANG_REGEX = r'^#!\s*.*python'
+SHEBANG_REGEX = r'^#!.*python'
+ENCODING_REGEX = r"^#.*coding[:=]\s*([-\w.]+)"
+
 
 def is_shebang_comment(node):
     """
     Comments are prefixes for Leaf nodes. Returns whether the given node has a
-    prefix that looks like a shebang line.
+    prefix that looks like a shebang line or an encoding line:
+
+        #!/usr/bin/env python
+        #!/usr/bin/python3
     """
     return bool(re.match(SHEBANG_REGEX, node.prefix))
 
 
+def is_encoding_comment(node):
+    """
+    Comments are prefixes for Leaf nodes. Returns whether the given node has a
+    prefix that looks like an encoding line:
+
+        # coding: utf-8
+        # encoding: utf-8
+        # -*- coding: <encoding name> -*-
+        # vim: set fileencoding=<encoding name> :
+    """
+    return bool(re.match(ENCODING_REGEX, node.prefix))
+
+
 def wrap_in_fn_call(fn_name, args, prefix=None):
     """
     Example:
diff --git a/tests/test_future/test_futurize.py b/tests/test_future/test_futurize.py
@@ -6,7 +6,7 @@
 from subprocess import Popen, PIPE
 import os
 
-from libfuturize.fixer_util import is_shebang_comment
+from libfuturize.fixer_util import is_shebang_comment, is_encoding_comment
 from lib2to3.fixer_util import FromImport
 from lib2to3.pytree import Leaf, Node
 from lib2to3.pygram import token
@@ -19,11 +19,48 @@
 class TestLibFuturize(unittest.TestCase):
     def test_is_shebang_comment(self):
         """
-        Tests whether the libfuturize.fixer_util.is_shebang_comment() function is working
-        """
-        node = FromImport(u'math', [Leaf(token.NAME, u'cos', prefix=" ")])
-        node.prefix = u'#!/usr/bin/env python\n'
-        self.assertTrue(is_shebang_comment(node))
+        Tests whether the fixer_util.is_encoding_comment() function is working.
+        """
+        shebang_comments = [u'#!/usr/bin/env python\n'
+                             u"#!/usr/bin/python2\n",
+                             u"#! /usr/bin/python3\n",
+                            ]
+        not_shebang_comments = [u"# I saw a giant python\n",
+                                 u"# I have never seen a python2\n",
+                               ]
+        for comment in shebang_comments:
+            node = FromImport(u'math', [Leaf(token.NAME, u'cos', prefix=" ")])
+            node.prefix = comment
+            self.assertTrue(is_shebang_comment(node))
+
+        for comment in not_shebang_comments:
+            node = FromImport(u'math', [Leaf(token.NAME, u'cos', prefix=" ")])
+            node.prefix = comment
+            self.assertFalse(is_shebang_comment(node))
+
+
+    def test_is_encoding_comment(self):
+        """
+        Tests whether the fixer_util.is_encoding_comment() function is working.
+        """
+        encoding_comments = [u"# coding: utf-8",
+                             u"# encoding: utf-8",
+                             u"# -*- coding: latin-1 -*-",
+                             u"# vim: set fileencoding=iso-8859-15 :",
+                            ]
+        not_encoding_comments = [u"# We use the file encoding utf-8",
+                                 u"coding = 'utf-8'",
+                                 u"encoding = 'utf-8'",
+                                ]
+        for comment in encoding_comments:
+            node = FromImport(u'math', [Leaf(token.NAME, u'cos', prefix=" ")])
+            node.prefix = comment
+            self.assertTrue(is_encoding_comment(node))
+
+        for comment in not_encoding_comments:
+            node = FromImport(u'math', [Leaf(token.NAME, u'cos', prefix=" ")])
+            node.prefix = comment
+            self.assertFalse(is_encoding_comment(node))
 
 
 class TestFuturizeSimple(CodeHandler):