Improvements to past.builtins: map, filter, etc.

edschofield · edschofield · commit 45da4e03dd0b · 2014-03-18T11:54:51.000+11:00
These now pass more of the Py2.7 unit tests
diff --git a/past/builtins/__init__.py b/past/builtins/__init__.py
@@ -1,8 +1,32 @@
-from past.builtins.noniterators import (filter, map, range, zip)
-from past.builtins.types import basestring, dict, str   #, unicode
-# from past.builtins.misc import (ascii, chr, hex, input, oct, open, raw_input, unichr)
-from past.builtins.misc import cmp, execfile, raw_input, unichr, unicode
+"""
+A resurrection of some old functions from Python 2. These should be used
+sparingly, to help with porting efforts, since code using them is no
+longer standard Python 3 code.
+
+We provide these builtin functions which have no equivalent on Py3:
+
+- cmp()
+- execfile()
+
+These aliases are also provided:
+
+- raw_input() <- input()
+- unicode() <- str()
+- unichr() <- chr()
 
+For reference, the following Py2 builtin functions are available from
+these standard locations on both Py2.6+ and Py3:
+
+- reduce() <- functools.reduce()
+- reload() <- imp.reload()
+
+"""
+
+from past.builtins.noniterators import (filter, map, range, reduce, zip)
+from past.builtins.types import basestring, dict, str, long, unicode
+# from past.builtins.misc import (ascii, chr, hex, input, oct, open, raw_input, unichr)
+from past.builtins.misc import (apply, cmp, execfile, intern, raw_input,
+                                reload, unichr, unicode, xrange)
 from past import utils
 
 
@@ -13,7 +37,8 @@
     # Only shadow builtins on Py3; no new names
     __all__ = ['filter', 'map', 'range', 'zip', 
                'basestring', 'dict', 'str',
-               'cmp', 'execfile', 'raw_input', 'unichr', 'unicode'
+               'cmp', 'execfile', 'raw_input', 'reduce', 'reload',
+               'unichr', 'unicode', 'xrange'
     #            'ascii', 'chr', 'hex', 'input', 'oct', 'open', 'unichr',
     #            'bytes', 'dict', 'int', 'range', 'round', 'str', 'super',
               ]
diff --git a/past/builtins/misc.py b/past/builtins/misc.py
@@ -1,45 +1,29 @@
-"""
-A resurrection of some old functions from Python 2. These should be used
-sparingly, to help with porting efforts, since code using them is no
-longer standard Python 3 code.
-
-We provide these builtin functions which have no equivalent on Py3:
-
-- cmp()
-- execfile()
-
-These aliases are also provided:
-
-- raw_input() <- input()
-- unicode() <- str()
-- unichr() <- chr()
-
-For reference, the following Py2 builtin functions are available from
-these standard locations on both Py2.6+ and Py3:
-
-- reduce() <- functools.reduce()
-- reload() <- imp.reload()
-
-"""
-
 from __future__ import unicode_literals
 import sys
 
-from future.utils import PY3
+from future.utils import PY3, exec_
 
 
 if PY3:
-    # Bring back the cmp function
+    def apply(f, *args, **kw):
+        return f(*args, **kw)
     cmp = lambda a, b: (a > b) - (a < b)
+    from sys import intern
     raw_input = input
+    from imp import reload
     unicode = str
     unichr = chr
+    xrange = range
 else:
     import __builtin__
+    apply = __builtin__.apply
     cmp = __builtin__.cmp
+    intern = __builtin__.intern
     raw_input = __builtin__.raw_input
+    reload = __builtin__.reload
     unicode = __builtin__.unicode
     unichr = __builtin__.unichr
+    xrange = __builtin__.xrange
 
 
 if PY3:
@@ -91,5 +75,9 @@ def execfile(filename, myglobals=None, mylocals=None):
                 else:
                     __builtin__.execfile(filename)
 
+if PY3:
+    __all__ = ['apply', 'cmp', 'execfile', 'intern', 'raw_input',
+               'reload', 'unichr', 'unicode', 'xrange']
+else:
+    __all__ = []
 
-__all__ = ['cmp', 'raw_input', 'unichr', 'unicode', 'execfile']
diff --git a/past/builtins/noniterators.py b/past/builtins/noniterators.py
@@ -19,41 +19,255 @@
 
 from __future__ import division, absolute_import, print_function
 
-import itertools
+from itertools import chain, starmap    
+import itertools       # since zip_longest doesn't exist on Py2
+from past.builtins.types import basestring
 from past.utils import PY3
 
+
+def flatmap(f, items):
+    return chain.from_iterable(map(f, items))
+
+
 if PY3:
     import builtins
 
     # list-producing versions of the major Python iterating functions
-    def oldfilter(*args, **kwargs):
-        return list(builtins.filter(*args, **kwargs))
+    def oldfilter(*args):
+        """
+        filter(function or None, sequence) -> list, tuple, or string
+        
+        Return those items of sequence for which function(item) is true.
+        If function is None, return the items that are true.  If sequence
+        is a tuple or string, return the same type, else return a list.
+        """
+        mytype = type(args[1])
+        if isinstance(args[1], basestring):
+            return mytype().join(builtins.filter(*args))
+        elif isinstance(args[1], (tuple, list)):
+            return mytype(builtins.filter(*args))
+        else:
+            # Fall back to list. Is this the right thing to do?
+            return list(builtins.filter(*args))
+
+    # This is surprisingly difficult to get right. For example, the
+    # solutions here fail with the test cases in the docstring below:
+    # http://stackoverflow.com/questions/8072755/
+    def oldmap(func, *iterables):
+        """
+        map(function, sequence[, sequence, ...]) -> list
+        
+        Return a list of the results of applying the function to the
+        items of the argument sequence(s).  If more than one sequence is
+        given, the function is called with an argument list consisting of
+        the corresponding item of each sequence, substituting None for
+        missing values when not all sequences have the same length.  If
+        the function is None, return a list of the items of the sequence
+        (or a list of tuples if more than one sequence).
+        
+        Test cases:
+        >>> oldmap(None, 'hello world')
+        ['h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd']
 
-    def oldmap(*args, **kwargs):
-        return list(builtins.map(*args, **kwargs))
+        >>> oldmap(None, range(4))
+        [0, 1, 2, 3]
+
+        More test cases are in past.tests.test_builtins.
+        """
+        zipped = itertools.zip_longest(*iterables)
+        l = list(zipped)
+        if len(l) == 0:
+            return []
+        if func is None:
+            result = l
+        else:
+            result = list(starmap(func, l))
+
+        # Inspect to see whether it's a simple sequence of tuples
+        try:
+            if max([len(item) for item in result]) == 1:
+                return list(chain.from_iterable(result))
+            # return list(flatmap(func, result))
+        except TypeError as e:
+            # Simple objects like ints have no len()
+            pass
+        return result
+
+        ############################
+        ### For reference, the source code for Py2.7 map function:
+        # static PyObject *
+        # builtin_map(PyObject *self, PyObject *args)
+        # {
+        #     typedef struct {
+        #         PyObject *it;           /* the iterator object */
+        #         int saw_StopIteration;  /* bool:  did the iterator end? */
+        #     } sequence;
+        # 
+        #     PyObject *func, *result;
+        #     sequence *seqs = NULL, *sqp;
+        #     Py_ssize_t n, len;
+        #     register int i, j;
+        # 
+        #     n = PyTuple_Size(args);
+        #     if (n < 2) {
+        #         PyErr_SetString(PyExc_TypeError,
+        #                         "map() requires at least two args");
+        #         return NULL;
+        #     }
+        # 
+        #     func = PyTuple_GetItem(args, 0);
+        #     n--;
+        # 
+        #     if (func == Py_None) {
+        #         if (PyErr_WarnPy3k("map(None, ...) not supported in 3.x; "
+        #                            "use list(...)", 1) < 0)
+        #             return NULL;
+        #         if (n == 1) {
+        #             /* map(None, S) is the same as list(S). */
+        #             return PySequence_List(PyTuple_GetItem(args, 1));
+        #         }
+        #     }
+        # 
+        #     /* Get space for sequence descriptors.  Must NULL out the iterator
+        #      * pointers so that jumping to Fail_2 later doesn't see trash.
+        #      */
+        #     if ((seqs = PyMem_NEW(sequence, n)) == NULL) {
+        #         PyErr_NoMemory();
+        #         return NULL;
+        #     }
+        #     for (i = 0; i < n; ++i) {
+        #         seqs[i].it = (PyObject*)NULL;
+        #         seqs[i].saw_StopIteration = 0;
+        #     }
+        # 
+        #     /* Do a first pass to obtain iterators for the arguments, and set len
+        #      * to the largest of their lengths.
+        #      */
+        #     len = 0;
+        #     for (i = 0, sqp = seqs; i < n; ++i, ++sqp) {
+        #         PyObject *curseq;
+        #         Py_ssize_t curlen;
+        # 
+        #         /* Get iterator. */
+        #         curseq = PyTuple_GetItem(args, i+1);
+        #         sqp->it = PyObject_GetIter(curseq);
+        #         if (sqp->it == NULL) {
+        #             static char errmsg[] =
+        #                 "argument %d to map() must support iteration";
+        #             char errbuf[sizeof(errmsg) + 25];
+        #             PyOS_snprintf(errbuf, sizeof(errbuf), errmsg, i+2);
+        #             PyErr_SetString(PyExc_TypeError, errbuf);
+        #             goto Fail_2;
+        #         }
+        # 
+        #         /* Update len. */
+        #         curlen = _PyObject_LengthHint(curseq, 8);
+        #         if (curlen > len)
+        #             len = curlen;
+        #     }
+        # 
+        #     /* Get space for the result list. */
+        #     if ((result = (PyObject *) PyList_New(len)) == NULL)
+        #         goto Fail_2;
+        # 
+        #     /* Iterate over the sequences until all have stopped. */
+        #     for (i = 0; ; ++i) {
+        #         PyObject *alist, *item=NULL, *value;
+        #         int numactive = 0;
+        # 
+        #         if (func == Py_None && n == 1)
+        #             alist = NULL;
+        #         else if ((alist = PyTuple_New(n)) == NULL)
+        #             goto Fail_1;
+        # 
+        #         for (j = 0, sqp = seqs; j < n; ++j, ++sqp) {
+        #             if (sqp->saw_StopIteration) {
+        #                 Py_INCREF(Py_None);
+        #                 item = Py_None;
+        #             }
+        #             else {
+        #                 item = PyIter_Next(sqp->it);
+        #                 if (item)
+        #                     ++numactive;
+        #                 else {
+        #                     if (PyErr_Occurred()) {
+        #                         Py_XDECREF(alist);
+        #                         goto Fail_1;
+        #                     }
+        #                     Py_INCREF(Py_None);
+        #                     item = Py_None;
+        #                     sqp->saw_StopIteration = 1;
+        #                 }
+        #             }
+        #             if (alist)
+        #                 PyTuple_SET_ITEM(alist, j, item);
+        #             else
+        #                 break;
+        #         }
+        # 
+        #         if (!alist)
+        #             alist = item;
+        # 
+        #         if (numactive == 0) {
+        #             Py_DECREF(alist);
+        #             break;
+        #         }
+        # 
+        #         if (func == Py_None)
+        #             value = alist;
+        #         else {
+        #             value = PyEval_CallObject(func, alist);
+        #             Py_DECREF(alist);
+        #             if (value == NULL)
+        #                 goto Fail_1;
+        #         }
+        #         if (i >= len) {
+        #             int status = PyList_Append(result, value);
+        #             Py_DECREF(value);
+        #             if (status < 0)
+        #                 goto Fail_1;
+        #         }
+        #         else if (PyList_SetItem(result, i, value) < 0)
+        #             goto Fail_1;
+        #     }
+        # 
+        #     if (i < len && PyList_SetSlice(result, i, len, NULL) < 0)
+        #         goto Fail_1;
+        # 
+        #     goto Succeed;
+        # 
+        # Fail_1:
+        #     Py_DECREF(result);
+        # Fail_2:
+        #     result = NULL;
+        # Succeed:
+        #     assert(seqs);
+        #     for (i = 0; i < n; ++i)
+        #         Py_XDECREF(seqs[i].it);
+        #     PyMem_DEL(seqs);
+        #     return result;
+        # }
 
     def oldrange(*args, **kwargs):
         return list(builtins.range(*args, **kwargs))
 
-    # def reduce(*args, **kwargs):
-    #     return list(reduce(*args, **kwargs))
-
     def oldzip(*args, **kwargs):
         return list(builtins.zip(*args, **kwargs))
 
     filter = oldfilter
     map = oldmap
     range = oldrange
+    from functools import reduce
     zip = oldzip
-    __all__ = ['filter', 'map', 'range', 'zip']
+    __all__ = ['filter', 'map', 'range', 'reduce', 'zip']
 
 else:
     import __builtin__
     # Python 2-builtin ranges produce lists
     filter = __builtin__.filter
     map = __builtin__.map
-    # reduce = __builtin__.reduce
     range = __builtin__.range
+    reduce = __builtin__.reduce
     zip = __builtin__.zip
     __all__ = []
 
diff --git a/past/builtins/types/__init__.py b/past/builtins/types/__init__.py
@@ -5,11 +5,15 @@
     basestring = __builtin__.basestring
     dict = __builtin__.dict
     str = __builtin__.str
+    long = __builtin__.long
+    unicode = __builtin__.unicode
     __all__ = []
 else:
     from .basestring import basestring
     from .olddict import olddict as dict
     from .oldstr import oldstr as str
+    long = int
+    unicode = str
     # from .unicode import unicode
-    __all__ = ['basestring', 'dict', 'str']
+    __all__ = ['basestring', 'dict', 'str', 'long', 'unicode']
 
diff --git a/past/tests/test_builtins.py b/past/tests/test_builtins.py