diff --git a/README.rst b/README.rst
index 207d3a9..2ed8cf0 100644
--- a/README.rst
+++ b/README.rst
@@ -1,34 +1,94 @@
 twitter-text-python
 ===================
 
-**twitter-text-python** is a Tweet parser and formatter for Python.
+**twitter-text-python** is a Tweet parser and formatter for Python. Extract users, hashtags, URLs and format as HTML for display.
 
-It is based on twitter-text-java_ and passes all the unittests of 
-twitter-text-conformance_ plus some additional ones.
+----
+**UPDATE** this project is now maintained by Ed Burnett, please go here for the active version: https://github.com/edburnett/twitter-text-python
+----
+
+It is based on twitter-text-java_ and did pass all the unittests of 
+twitter-text-conformance_ plus some additional ones. Note that the conformance tests are now behind (easy PR for someone to work on: https://github.com/ianozsvald/twitter-text-python/issues/5 ):
 
 .. _twitter-text-java: http://github.com/mzsanford/twitter-text-java
 .. _twitter-text-conformance: http://github.com/mzsanford/twitter-text-conformance
 
+This version was forked by Ian Ozsvald in January 2013 and released to PyPI, some bugs were fixed, a few minor changes to functionality added:
+https://github.com/ianozsvald/twitter-text-python
+
+PyPI release:
+http://pypi.python.org/pypi/twitter-text-python/
+
+The original ttp comes from Ivo Wetzel (Ivo's version no longer supported):
+https://github.com/BonsaiDen/twitter-text-python
+
 Usage::
 
-    >>> import ttp
+    >>> from ttp import ttp
     >>> p = ttp.Parser()
-    >>> result = p.parse("@BonsaiDen Hey that's a great Tweet parser! #twp")
+    >>> result = p.parse("@ianozsvald, you now support #IvoWertzel's tweet parser! https://github.com/ianozsvald/")
     >>> result.reply
-    'BonsaiDen'
+    'ianozsvald'
     >>> result.users
-    ['BonsaiDen']
+    ['ianozsvald']
     >>> result.tags
-    ['twp']
+    ['IvoWertzel']
     >>> result.urls
-    []
+    ['https://github.com/ianozsvald/']
     >>> result.html
-    u'<a href="http://twitter.com/BonsaiDen">@BonsaiDen</a> Hey that\'s a great Tweet Parser! 
-    <a href="http://search.twitter.com/search?q=%23twp">#twp</a>'
-
+    u'<a href="http://twitter.com/ianozsvald">@ianozsvald</a>, you now support <a href="http://search.twitter.com/search?q=%23IvoWertzel">#IvoWertzel</a>\'s tweet parser! <a href="https://github.com/ianozsvald/">https://github.com/ianozsvald/</a>'
 
 If you need different HTML output just subclass and override the ``format_*`` methods.
 
+You can also ask for the span tags to be returned for each entity::
+
+    >>> p = ttp.Parser(include_spans=True)
+    >>> result = p.parse("@ianozsvald, you now support #IvoWertzel's tweet parser! https://github.com/ianozsvald/")
+    >>> result.urls
+    [('https://github.com/ianozsvald/', (57, 87))]
+
+
+To use the shortlink follower:
+
+    >>> from ttp import utils
+    >>> # assume that result.urls == ['http://t.co/8o0z9BbEMu', u'http://bbc.in/16dClPF']
+    >>> print utils.follow_shortlinks(result.urls)  # pass in list of shortlink URLs
+    {'http://t.co/8o0z9BbEMu': [u'http://t.co/8o0z9BbEMu', u'http://bbc.in/16dClPF', u'http://www.bbc.co.uk/sport/0/21711199#TWEET650562'], u'http://bbc.in/16dClPF': [u'http://bbc.in/16dClPF', u'http://www.bbc.co.uk/sport/0/21711199#TWEET650562']}
+     >>> # note that bad shortlink URLs have a key to an empty list (lost/forgotten shortlink URLs don't generate any error)
+
+
+Installation
+------------
+
+**NOTE** this version (Ian's) is no longer maintained, see Ed's active version instead: https://github.com/edburnett/twitter-text-python
+
+pip and easy_install will do the job::
+
+    # via: http://pypi.python.org/pypi/twitter-text-python
+    $ pip install twitter-text-python  
+    $ python
+    >>> from ttp import ttp
+    >>> ttp.__version__
+    '1.0.0.2'
+
+Changelog
+---------
+
+ * 2013/2/11 1.0.0.2 released to PyPI
+ * 2013/6/1 1.0.1 new working version, adding comma parse fix (thanks https://github.com/muckrack), used autopep8 to clean the src, added a shortlink expander
+
+
+Tests
+-----
+
+Checkout the code via github https://github.com/ianozsvald/twitter-text-python and run tests locally::
+
+    $ python ttp/tests.py 
+    ....................................................................................................
+    ----------------------------------------------------------------------
+    Ran 100 tests in 0.009s
+    OK
+
 
 Contributing
 ------------
@@ -37,23 +97,53 @@ The source is available on GitHub_, to
 contribute to the project, fork it on GitHub and send a pull request.
 Everyone is welcome to make improvements to **twp**!
 
-.. _GitHub: http://github.com/BonsaiDen/twitter-text-python
+.. _GitHub: https://github.com/ianozsvald/twitter-text-python
+
+
+Todo
+----
+
+  * Consider adding capitalised phrase identification
+  * Consider adding a repeated-char remover (e.g. grrrrrrr->grr)
+  * Make it 1 line to parse and get a results dict via __init__.py
+  * Tag the next release
+
+Doing a release
+---------------
+
+In parent directory on Ian's machine see USE_THIS_FOR_PYPI_RELEASE.txt. The short form::
+
+    $ # edit setup.py to bump the version number
+    $ git tag -a v1.0.1 -m 'v1.0.1 release'
+    $ git push origin --tags
+    $ ianozsvald-twitter-text-python $ python setup.py sdist register  upload -r http://pypi.python.org/pypi
+    $ # this uses ~/.pypirc with cached login details
+
 
 License
-=======
+-------
+
+*MIT*
+
+Copyright (c) 2012 Ivo Wetzel.
 
-Copyright (c) 2010 Ivo Wetzel
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
 
-**twitter-text-python** is free software: you can redistribute it and/or 
-modify it under the terms of the GNU General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
 
-**twitter-text-python** is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
 
-You should have received a copy of the GNU General Public License along with
-**twitter-text-python**. If not, see <http://www.gnu.org/licenses/>.
+Copyright (c) 2010-2013 Ivo Wetzel
 
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..6a99645
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+requests==1.1.0
diff --git a/setup.py b/setup.py
index 9de7d83..2a9dd84 100644
--- a/setup.py
+++ b/setup.py
@@ -2,24 +2,25 @@
 
 setup(
     name='twitter-text-python',
-    version='1.0',
-    description='Tweet parser and formatter',
-    long_description=open('README.rst').read(),
-    author='Ivo Wetzel',
-    author_email='',
-    url='http://github.com/BonsaiDen/twitter-text-python',
-    license='GPL',
-    py_modules=['ttp'],
+    version='1.0.1',
+    description='Twitter Tweet parser and formatter',
+    long_description="Extract @users, #hashtags and URLs (and unwind shortened links) from tweets including entity locations, also generate HTML for output. Visit https://github.com/ianozsvald/twitter-text-python for examples.",
+    #open('README.rst').read(),
+    author='Maintained by Ian Ozsvald (originally by Ivo Wetzel)',
+    author_email='ian@ianozsvald.com',
+    url='https://github.com/ianozsvald/twitter-text-python',
+    license='MIT',
+    packages=['ttp'],
     include_package_data=True,
     zip_safe=False,
     install_requires=[],
     classifiers=[
-        'Environment :: Web Environment',
-        # I don't know what exactly this means, but why not?
+        'Environment :: Console',
         'Intended Audience :: Developers',
-        'License :: OSI Approved :: BSD License',
+        'License :: OSI Approved :: MIT License',
         'Operating System :: OS Independent',
         'Programming Language :: Python',
         'Topic :: Software Development :: Libraries :: Python Modules',
+        'Topic :: Text Processing :: Linguistic',
     ]
 )
diff --git a/ttp/__init__.py b/ttp/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests.py b/ttp/tests.py
similarity index 85%
rename from tests.py
rename to ttp/tests.py
index e084abc..39aa5ab 100644
--- a/tests.py
+++ b/ttp/tests.py
@@ -22,534 +22,593 @@
 
 
 class TWPTests(unittest.TestCase):
+
     def setUp(self):
         self.parser = ttp.Parser()
-    
-    
+
     # General Tests ------------------------------------------------------------
     # --------------------------------------------------------------------------
+    def test_urls(self):
+        """Confirm that # in a URL works along with ,"""
+        result = self.parser.parse(u'big url: http://blah.com:8080/path/to/here?p=1&q=abc,def#posn2 #ahashtag')
+        self.assertEqual(result.urls, [u'http://blah.com:8080/path/to/here?p=1&q=abc,def#posn2'])
+        self.assertEqual(result.tags, [u'ahashtag'])
+
     def test_all_not_allow_amp_without_question(self):
         result = self.parser.parse(u'Check out: http://www.github.com/test&@username')
         self.assertEqual(result.html, u'Check out: <a href="http://www.github.com/test">http://www.github.com/test</a>&<a href="http://twitter.com/username">@username</a>')
         self.assertEqual(result.users, [u'username'])
         self.assertEqual(result.urls, [u'http://www.github.com/test'])
-    
+
     def test_all_not_break_url_at(self):
         result = self.parser.parse(u'http://www.flickr.com/photos/29674651@N00/4382024406')
         self.assertEqual(result.html, u'<a href="http://www.flickr.com/photos/29674651@N00/4382024406">http://www.flickr.com/photo...</a>')
         self.assertEqual(result.urls, [u'http://www.flickr.com/photos/29674651@N00/4382024406'])
-    
-    
+
     # URL tests ----------------------------------------------------------------
     # --------------------------------------------------------------------------
     def test_url_mid(self):
         result = self.parser.parse(u'text http://example.com more text')
         self.assertEqual(result.html, u'text <a href="http://example.com">http://example.com</a> more text')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_unicode(self):
         result = self.parser.parse(u'I enjoy Macintosh Brand computers: http://✪df.ws/ejp')
         self.assertEqual(result.html, u'I enjoy Macintosh Brand computers: <a href="http://✪df.ws/ejp">http://✪df.ws/ejp</a>')
         self.assertEqual(result.urls, [u'http://\u272adf.ws/ejp'])
-    
+
     def test_url_parentheses(self):
         result = self.parser.parse(u'text (http://example.com)')
         self.assertEqual(result.html, u'text (<a href="http://example.com">http://example.com</a>)')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_underscore(self):
         result = self.parser.parse(u'text http://example.com/test/foo_123.jpg')
         self.assertEqual(result.html, u'text <a href="http://example.com/test/foo_123.jpg">http://example.com/test/foo...</a>')
         self.assertEqual(result.urls, [u'http://example.com/test/foo_123.jpg'])
-    
+
     def test_url_underscore_dot(self):
         result = self.parser.parse(u'text http://example.com/test/bla.net_foo_123.jpg')
         self.assertEqual(result.html, u'text <a href="http://example.com/test/bla.net_foo_123.jpg">http://example.com/test/bla...</a>')
         self.assertEqual(result.urls, [u'http://example.com/test/bla.net_foo_123.jpg'])
-    
+
     def test_url_amp_lang_equals(self):
         result = self.parser.parse(u'Check out http://search.twitter.com/search?q=avro&lang=en')
         self.assertEqual(result.html, u'Check out <a href="http://search.twitter.com/search?q=avro&amp;lang=en">http://search.twitter.com/s...</a>')
         self.assertEqual(result.urls, [u'http://search.twitter.com/search?q=avro&lang=en'])
-    
+
     def test_url_amp_break(self):
         result = self.parser.parse(u'Check out http://twitter.com/te?foo&invalid=True')
         self.assertEqual(result.html, u'Check out <a href="http://twitter.com/te?foo&amp;invalid=True">http://twitter.com/te?foo...</a>')
         self.assertEqual(result.urls, [u'http://twitter.com/te?foo&invalid=True'])
-    
+
     def test_url_dash(self):
         result = self.parser.parse(u'Is www.foo-bar.com a valid URL?')
         self.assertEqual(result.html, u'Is <a href="http://www.foo-bar.com">www.foo-bar.com</a> a valid URL?')
         self.assertEqual(result.urls, [u'www.foo-bar.com'])
-    
+
     def test_url_multiple(self):
         result = self.parser.parse(u'http://example.com https://sslexample.com http://sub.example.com')
-        self.assertEqual(result.html, u'<a href="http://example.com">http://example.com</a> <a href="https://sslexample.com">https://sslexample.com</a> <a href="http://sub.example.com">http://sub.example.com</a>')
+        self.assertEqual(
+            result.html, u'<a href="http://example.com">http://example.com</a> <a href="https://sslexample.com">https://sslexample.com</a> <a href="http://sub.example.com">http://sub.example.com</a>')
         self.assertEqual(result.urls, [u'http://example.com', u'https://sslexample.com', u'http://sub.example.com'])
-    
+
     def test_url_raw_domain(self):
         result = self.parser.parse(u'See http://example.com example.com')
         self.assertEqual(result.html, u'See <a href="http://example.com">http://example.com</a> example.com')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_embed_link(self):
         result = self.parser.parse(u'<link rel=\'true\'>http://example.com</link>')
         self.assertEqual(result.html, u'<link rel=\'true\'><a href="http://example.com">http://example.com</a></link>')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_trailing(self):
         result = self.parser.parse(u'text http://example.com')
         self.assertEqual(result.html, u'text <a href="http://example.com">http://example.com</a>')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_japanese(self):
         result = self.parser.parse(u'いまなにしてるhttp://example.comいまなにしてる')
         self.assertEqual(result.html, u'いまなにしてる<a href="http://example.com">http://example.com</a>いまなにしてる')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_lots_of_punctuation(self):
         result = self.parser.parse(u'text http://xo.com/~matthew+%-,.;x')
         self.assertEqual(result.html, u'text <a href="http://xo.com/~matthew+%-,.;x">http://xo.com/~matthew+%-,.;x</a>')
         self.assertEqual(result.urls, [u'http://xo.com/~matthew+%-,.;x'])
-    
+
     def test_url_question_numbers(self):
         result = self.parser.parse(u'text http://example.com/?77e8fd')
         self.assertEqual(result.html, u'text <a href="http://example.com/?77e8fd">http://example.com/?77e8fd</a>')
         self.assertEqual(result.urls, [u'http://example.com/?77e8fd'])
-    
+
     def test_url_one_letter_other(self):
         result = self.parser.parse(u'text http://u.nu/')
         self.assertEqual(result.html, u'text <a href="http://u.nu/">http://u.nu/</a>')
         self.assertEqual(result.urls, [u'http://u.nu/'])
-        
+
         result = self.parser.parse(u'text http://u.tv/')
         self.assertEqual(result.html, u'text <a href="http://u.tv/">http://u.tv/</a>')
         self.assertEqual(result.urls, [u'http://u.tv/'])
-    
+
     def test_url_one_letter_iana(self):
         result = self.parser.parse(u'text http://x.com/')
         self.assertEqual(result.html, u'text <a href="http://x.com/">http://x.com/</a>')
         self.assertEqual(result.urls, [u'http://x.com/'])
-        
+
         result = self.parser.parse(u'text http://Q.com/')
         self.assertEqual(result.html, u'text <a href="http://Q.com/">http://Q.com/</a>')
         self.assertEqual(result.urls, [u'http://Q.com/'])
-        
+
         result = self.parser.parse(u'text http://z.com/')
         self.assertEqual(result.html, u'text <a href="http://z.com/">http://z.com/</a>')
         self.assertEqual(result.urls, [u'http://z.com/'])
-        
+
         result = self.parser.parse(u'text http://i.net/')
         self.assertEqual(result.html, u'text <a href="http://i.net/">http://i.net/</a>')
         self.assertEqual(result.urls, [u'http://i.net/'])
-        
+
         result = self.parser.parse(u'text http://q.net/')
         self.assertEqual(result.html, u'text <a href="http://q.net/">http://q.net/</a>')
         self.assertEqual(result.urls, [u'http://q.net/'])
-        
+
         result = self.parser.parse(u'text http://X.org/')
         self.assertEqual(result.html, u'text <a href="http://X.org/">http://X.org/</a>')
         self.assertEqual(result.urls, [u'http://X.org/'])
-    
+
     def test_url_long_hypens(self):
         result = self.parser.parse(u'text http://word-and-a-number-8-ftw.domain.tld/')
         self.assertEqual(result.html, u'text <a href="http://word-and-a-number-8-ftw.domain.tld/">http://word-and-a-number-8-...</a>')
         self.assertEqual(result.urls, [u'http://word-and-a-number-8-ftw.domain.tld/'])
-    
-    
+
     # URL not tests ------------------------------------------------------------
     def test_not_url_dotdotdot(self):
         result = self.parser.parse(u'Is www...foo a valid URL?')
         self.assertEqual(result.html, u'Is www...foo a valid URL?')
         self.assertEqual(result.urls, [])
-    
+
     def test_not_url_dash(self):
         result = self.parser.parse(u'Is www.-foo.com a valid URL?')
         self.assertEqual(result.html, u'Is www.-foo.com a valid URL?')
         self.assertEqual(result.urls, [])
-    
+
     def test_not_url_no_tld(self):
         result = self.parser.parse(u'Is http://no-tld a valid URL?')
         self.assertEqual(result.html, u'Is http://no-tld a valid URL?')
         self.assertEqual(result.urls, [])
-    
+
     def test_not_url_tld_too_short(self):
         result = self.parser.parse(u'Is http://tld-too-short.x a valid URL?')
         self.assertEqual(result.html, u'Is http://tld-too-short.x a valid URL?')
         self.assertEqual(result.urls, [])
-    
-    def test_all_not_break_url_at(self):
+
+    def test_all_not_break_url_at2(self):
         result = self.parser.parse(u'http://www.flickr.com/photos/29674651@N00/4382024406')
         self.assertEqual(result.html, u'<a href="http://www.flickr.com/photos/29674651@N00/4382024406">http://www.flickr.com/photo...</a>')
         self.assertEqual(result.urls, [u'http://www.flickr.com/photos/29674651@N00/4382024406'])
-    
+
     def test_not_url_one_letter_iana(self):
         result = self.parser.parse(u'text http://a.com/ http://a.net/ http://a.org/')
         self.assertEqual(result.html, u'text http://a.com/ http://a.net/ http://a.org/')
         self.assertEqual(result.urls, [])
-    
-    
+
     # URL followed Tests -------------------------------------------------------
     def test_url_followed_question(self):
         result = self.parser.parse(u'text http://example.com?')
         self.assertEqual(result.html, u'text <a href="http://example.com">http://example.com</a>?')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_followed_colon(self):
         result = self.parser.parse(u'text http://example.com:')
         self.assertEqual(result.html, u'text <a href="http://example.com">http://example.com</a>:')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_followed_curly_brace(self):
         result = self.parser.parse(u'text http://example.com}')
         self.assertEqual(result.html, u'text <a href="http://example.com">http://example.com</a>}')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_followed_single_quote(self):
         result = self.parser.parse(u'text http://example.com')
         self.assertEqual(result.html, u'text <a href="http://example.com">http://example.com</a>')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_followed_dot(self):
         result = self.parser.parse(u'text http://example.com.')
         self.assertEqual(result.html, u'text <a href="http://example.com">http://example.com</a>.')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_followed_exclamation(self):
         result = self.parser.parse(u'text http://example.com!')
         self.assertEqual(result.html, u'text <a href="http://example.com">http://example.com</a>!')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_followed_comma(self):
         result = self.parser.parse(u'text http://example.com,')
         self.assertEqual(result.html, u'text <a href="http://example.com">http://example.com</a>,')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
+    def test_url_with_path_preceeded_by_comma(self):
+        result = self.parser.parse(u'text ,http://example.com/abcde, more')
+        self.assertEqual(result.html, u'text ,<a href="http://example.com/abcde">http://example.com/abcde</a>, more')
+        self.assertEqual(result.urls, [u'http://example.com/abcde'])
+
+    def test_url_with_path_followed_comma(self):
+        result = self.parser.parse(u'text http://example.com/abcde, more')
+        self.assertEqual(result.html, u'text <a href="http://example.com/abcde">http://example.com/abcde</a>, more')
+        self.assertEqual(result.urls, [u'http://example.com/abcde'])
+
+    def test_url_with_path_followed_commas(self):
+        result = self.parser.parse(u'text http://example.com/abcde,, more')
+        self.assertEqual(result.html, u'text <a href="http://example.com/abcde">http://example.com/abcde</a>,, more')
+        self.assertEqual(result.urls, [u'http://example.com/abcde'])
+
     def test_url_followed_brace(self):
         result = self.parser.parse(u'text http://example.com)')
         self.assertEqual(result.html, u'text <a href="http://example.com">http://example.com</a>)')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_followed_big_brace(self):
         result = self.parser.parse(u'text http://example.com]')
         self.assertEqual(result.html, u'text <a href="http://example.com">http://example.com</a>]')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_followed_equals(self):
         result = self.parser.parse(u'text http://example.com=')
         self.assertEqual(result.html, u'text <a href="http://example.com">http://example.com</a>=')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_followed_semicolon(self):
         result = self.parser.parse(u'text http://example.com;')
         self.assertEqual(result.html, u'text <a href="http://example.com">http://example.com</a>;')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_url_followed_hypen(self):
         result = self.parser.parse(u'text http://domain.tld-that-you-should-have-put-a-space-after')
         self.assertEqual(result.html, u'text <a href="http://domain.tld">http://domain.tld</a>-that-you-should-have-put-a-space-after')
         self.assertEqual(result.urls, [u'http://domain.tld'])
-    
-    
+
     # URL preceeded Tests -------------------------------------------------------
     def test_url_preceeded_colon(self):
         result = self.parser.parse(u'text:http://example.com')
         self.assertEqual(result.html, u'text:<a href="http://example.com">http://example.com</a>')
         self.assertEqual(result.urls, [u'http://example.com'])
-    
+
     def test_not_url_preceeded_equals(self):
         result = self.parser.parse(u'text =http://example.com')
         self.assertEqual(result.html, u'text =http://example.com')
         self.assertEqual(result.urls, [])
-    
+
     # NOT
     def test_not_url_preceeded_forwardslash(self):
         result = self.parser.parse(u'text /http://example.com')
         self.assertEqual(result.html, u'text /http://example.com')
         self.assertEqual(result.urls, [])
-    
+
     def test_not_url_preceeded_exclamation(self):
         result = self.parser.parse(u'text !http://example.com')
         self.assertEqual(result.html, u'text !http://example.com')
         self.assertEqual(result.urls, [])
-    
-    
+
     # URL numeric tests --------------------------------------------------------
     def test_url_at_numeric(self):
         result = self.parser.parse(u'http://www.flickr.com/photos/29674651@N00/4382024406')
         self.assertEqual(result.html, u'<a href="http://www.flickr.com/photos/29674651@N00/4382024406">http://www.flickr.com/photo...</a>')
         self.assertEqual(result.urls, [u'http://www.flickr.com/photos/29674651@N00/4382024406'])
-    
+
     def test_url_at_non_numeric(self):
         result = self.parser.parse(u'http://www.flickr.com/photos/29674651@N00/foobar')
         self.assertEqual(result.html, u'<a href="http://www.flickr.com/photos/29674651@N00/foobar">http://www.flickr.com/photo...</a>')
         self.assertEqual(result.urls, [u'http://www.flickr.com/photos/29674651@N00/foobar'])
-    
-    
+
     # URL domain tests ---------------------------------------------------------
     def test_url_WWW(self):
         result = self.parser.parse(u'WWW.EXAMPLE.COM')
         self.assertEqual(result.html, u'<a href="http://WWW.EXAMPLE.COM">WWW.EXAMPLE.COM</a>')
         self.assertEqual(result.urls, [u'WWW.EXAMPLE.COM'])
-    
+
     def test_url_www(self):
         result = self.parser.parse(u'www.example.com')
         self.assertEqual(result.html, u'<a href="http://www.example.com">www.example.com</a>')
         self.assertEqual(result.urls, [u'www.example.com'])
-    
+
     def test_url_only_domain_query_followed_period(self):
         result = self.parser.parse(u'I think it\'s proper to end sentences with a period http://tell.me/why?=because.i.want.it. Even when they contain a URL.')
-        self.assertEqual(result.html, u'I think it\'s proper to end sentences with a period <a href="http://tell.me/why?=because.i.want.it">http://tell.me/why?=because...</a>. Even when they contain a URL.')
+        self.assertEqual(
+            result.html, u'I think it\'s proper to end sentences with a period <a href="http://tell.me/why?=because.i.want.it">http://tell.me/why?=because...</a>. Even when they contain a URL.')
         self.assertEqual(result.urls, [u'http://tell.me/why?=because.i.want.it'])
-    
+
     def test_url_only_domain_followed_period(self):
         result = self.parser.parse(u'I think it\'s proper to end sentences with a period http://tell.me. Even when they contain a URL.')
         self.assertEqual(result.html, u'I think it\'s proper to end sentences with a period <a href="http://tell.me">http://tell.me</a>. Even when they contain a URL.')
         self.assertEqual(result.urls, [u'http://tell.me'])
-    
+
     def test_url_only_domain_path_followed_period(self):
         result = self.parser.parse(u'I think it\'s proper to end sentences with a period http://tell.me/why. Even when they contain a URL.')
         self.assertEqual(result.html, u'I think it\'s proper to end sentences with a period <a href="http://tell.me/why">http://tell.me/why</a>. Even when they contain a URL.')
         self.assertEqual(result.urls, [u'http://tell.me/why'])
-    
+
     def test_url_long_tld(self):
         result = self.parser.parse(u'http://example.mobi/path')
         self.assertEqual(result.html, u'<a href="http://example.mobi/path">http://example.mobi/path</a>')
         self.assertEqual(result.urls, [u'http://example.mobi/path'])
-    
+
     def test_url_multiple_protocols(self):
         result = self.parser.parse(u'http://foo.com AND https://bar.com AND www.foobar.com')
         self.assertEqual(result.html, u'<a href="http://foo.com">http://foo.com</a> AND <a href="https://bar.com">https://bar.com</a> AND <a href="http://www.foobar.com">www.foobar.com</a>')
         self.assertEqual(result.urls, [u'http://foo.com', u'https://bar.com', u'www.foobar.com'])
-    
+
     # NOT
     def test_not_url_exclamation_domain(self):
         result = self.parser.parse(u'badly formatted http://foo!bar.com')
         self.assertEqual(result.html, u'badly formatted http://foo!bar.com')
         self.assertEqual(result.urls, [])
-    
+
     def test_not_url_under_domain(self):
         result = self.parser.parse(u'badly formatted http://foo_bar.com')
         self.assertEqual(result.html, u'badly formatted http://foo_bar.com')
         self.assertEqual(result.urls, [])
-    
-    
+
     # Hashtag tests ------------------------------------------------------------
     # --------------------------------------------------------------------------
     def test_hashtag_followed_full_whitespace(self):
         result = self.parser.parse(u'#hashtag　text')
         self.assertEqual(result.html, u'<a href="http://search.twitter.com/search?q=%23hashtag">#hashtag</a>　text')
         self.assertEqual(result.tags, [u'hashtag'])
-    
+
     def test_hashtag_followed_full_hash(self):
         result = self.parser.parse(u'＃hashtag')
         self.assertEqual(result.html, u'<a href="http://search.twitter.com/search?q=%23hashtag">＃hashtag</a>')
         self.assertEqual(result.tags, [u'hashtag'])
-    
+
     def test_hashtag_preceeded_full_whitespace(self):
         result = self.parser.parse(u'text　#hashtag')
         self.assertEqual(result.html, u'text　<a href="http://search.twitter.com/search?q=%23hashtag">#hashtag</a>')
         self.assertEqual(result.tags, [u'hashtag'])
-    
+
     def test_hashtag_number(self):
         result = self.parser.parse(u'text #1tag')
         self.assertEqual(result.html, u'text <a href="http://search.twitter.com/search?q=%231tag">#1tag</a>')
         self.assertEqual(result.tags, [u'1tag'])
-    
+
     def test_not_hashtag_escape(self):
         result = self.parser.parse(u'&#nbsp;')
         self.assertEqual(result.html, u'&#nbsp;')
         self.assertEqual(result.tags, [])
-    
+
     def test_hashtag_japanese(self):
         result = self.parser.parse(u'text #hashtagの')
         self.assertEqual(result.html, u'text <a href="http://search.twitter.com/search?q=%23hashtag">#hashtag</a>の')
         self.assertEqual(result.tags, [u'hashtag'])
-    
+
     def test_hashtag_period(self):
         result = self.parser.parse(u'text.#hashtag')
         self.assertEqual(result.html, u'text.<a href="http://search.twitter.com/search?q=%23hashtag">#hashtag</a>')
         self.assertEqual(result.tags, [u'hashtag'])
-    
+
     def test_hashtag_trailing(self):
         result = self.parser.parse(u'text #hashtag')
         self.assertEqual(result.html, u'text <a href="http://search.twitter.com/search?q=%23hashtag">#hashtag</a>')
         self.assertEqual(result.tags, [u'hashtag'])
-    
+
     def test_not_hashtag_exclamation(self):
         result = self.parser.parse(u'text #hashtag!')
         self.assertEqual(result.html, u'text <a href="http://search.twitter.com/search?q=%23hashtag">#hashtag</a>!')
         self.assertEqual(result.tags, [u'hashtag'])
-    
+
     def test_hashtag_multiple(self):
         result = self.parser.parse(u'text #hashtag1 #hashtag2')
         self.assertEqual(result.html, u'text <a href="http://search.twitter.com/search?q=%23hashtag1">#hashtag1</a> <a href="http://search.twitter.com/search?q=%23hashtag2">#hashtag2</a>')
         self.assertEqual(result.tags, [u'hashtag1', u'hashtag2'])
-    
+
     def test_not_hashtag_number(self):
         result = self.parser.parse(u'text #1234')
         self.assertEqual(result.html, u'text #1234')
         self.assertEqual(result.tags, [])
-    
+
     def test_not_hashtag_text(self):
         result = self.parser.parse(u'text#hashtag')
         self.assertEqual(result.html, u'text#hashtag')
         self.assertEqual(result.tags, [])
-    
+
     def test_hashtag_umlaut(self):
         result = self.parser.parse(u'text #hash_tagüäö')
         self.assertEqual(result.html, u'text <a href="http://search.twitter.com/search?q=%23hash_tag%C3%BC%C3%A4%C3%B6">#hash_tagüäö</a>')
         self.assertEqual(result.tags, [u'hash_tag\xfc\xe4\xf6'])
-    
+
     def test_hashtag_alpha(self):
         result = self.parser.parse(u'text #hash0tag')
         self.assertEqual(result.html, u'text <a href="http://search.twitter.com/search?q=%23hash0tag">#hash0tag</a>')
         self.assertEqual(result.tags, [u'hash0tag'])
-    
+
     def test_hashtag_under(self):
         result = self.parser.parse(u'text #hash_tag')
         self.assertEqual(result.html, u'text <a href="http://search.twitter.com/search?q=%23hash_tag">#hash_tag</a>')
         self.assertEqual(result.tags, [u'hash_tag'])
-    
-    
+
     # Username tests -----------------------------------------------------------
     # --------------------------------------------------------------------------
     def test_not_username_preceded_letter(self):
         result = self.parser.parse(u'meet@the beach')
         self.assertEqual(result.html, u'meet@the beach')
         self.assertEqual(result.users, [])
-    
+
     def test_username_preceded_punctuation(self):
         result = self.parser.parse(u'.@username')
         self.assertEqual(result.html, u'.<a href="http://twitter.com/username">@username</a>')
         self.assertEqual(result.users, [u'username'])
-    
+
     def test_username_preceded_japanese(self):
         result = self.parser.parse(u'あ@username')
         self.assertEqual(result.html, u'あ<a href="http://twitter.com/username">@username</a>')
         self.assertEqual(result.users, [u'username'])
-    
+
     def test_username_followed_japanese(self):
         result = self.parser.parse(u'@usernameの')
         self.assertEqual(result.html, u'<a href="http://twitter.com/username">@username</a>の')
         self.assertEqual(result.users, [u'username'])
-    
+
     def test_username_surrounded_japanese(self):
         result = self.parser.parse(u'あ@usernameの')
         self.assertEqual(result.html, u'あ<a href="http://twitter.com/username">@username</a>の')
         self.assertEqual(result.users, [u'username'])
-    
+
     def test_username_followed_punctuation(self):
         result = self.parser.parse(u'@username&^$%^')
         self.assertEqual(result.html, u'<a href="http://twitter.com/username">@username</a>&^$%^')
         self.assertEqual(result.users, [u'username'])
-    
+
     def test_not_username_spaced(self):
         result = self.parser.parse(u'@ username')
         self.assertEqual(result.html, u'@ username')
         self.assertEqual(result.users, [])
-    
+
     def test_username_beginning(self):
         result = self.parser.parse(u'@username text')
         self.assertEqual(result.html, u'<a href="http://twitter.com/username">@username</a> text')
         self.assertEqual(result.users, [u'username'])
-    
+
     def test_username_to_long(self):
         result = self.parser.parse(u'@username9012345678901')
         self.assertEqual(result.html, u'<a href="http://twitter.com/username901234567890">@username901234567890</a>1')
         self.assertEqual(result.users, [u'username901234567890'])
-    
+
     def test_username_full_at_sign(self):
         result = self.parser.parse(u'＠username')
         self.assertEqual(result.html, u'<a href="http://twitter.com/username">＠username</a>')
         self.assertEqual(result.users, [u'username'])
-    
+
     def test_username_trailing(self):
         result = self.parser.parse(u'text @username')
         self.assertEqual(result.html, u'text <a href="http://twitter.com/username">@username</a>')
         self.assertEqual(result.users, [u'username'])
-    
+
     # Replies
     def test_username_reply_simple(self):
         result = self.parser.parse(u'@username')
         self.assertEqual(result.html, u'<a href="http://twitter.com/username">@username</a>')
         self.assertEqual(result.users, [u'username'])
         self.assertEqual(result.reply, u'username')
-    
+
     def test_username_reply_whitespace(self):
         result = self.parser.parse(u'   @username')
         self.assertEqual(result.html, u'   <a href="http://twitter.com/username">@username</a>')
         self.assertEqual(result.users, [u'username'])
         self.assertEqual(result.reply, u'username')
-    
+
     def test_username_reply_full(self):
         result = self.parser.parse(u'　@username')
         self.assertEqual(result.html, u'　<a href="http://twitter.com/username">@username</a>')
         self.assertEqual(result.users, [u'username'])
         self.assertEqual(result.reply, u'username')
-    
+
     def test_username_non_reply(self):
         result = self.parser.parse(u'test @username')
         self.assertEqual(result.html, u'test <a href="http://twitter.com/username">@username</a>')
         self.assertEqual(result.users, [u'username'])
         self.assertEqual(result.reply, None)
-    
-    
+
     # List tests ---------------------------------------------------------------
     # --------------------------------------------------------------------------
     def test_list_preceeded(self):
         result = self.parser.parse(u'text @username/list')
         self.assertEqual(result.html, u'text <a href="http://twitter.com/username/list">@username/list</a>')
         self.assertEqual(result.lists, [(u'username', u'list')])
-    
+
     def test_list_beginning(self):
         result = self.parser.parse(u'@username/list')
         self.assertEqual(result.html, u'<a href="http://twitter.com/username/list">@username/list</a>')
         self.assertEqual(result.lists, [(u'username', u'list')])
-    
+
     def test_list_preceeded_punctuation(self):
         result = self.parser.parse(u'.@username/list')
         self.assertEqual(result.html, u'.<a href="http://twitter.com/username/list">@username/list</a>')
         self.assertEqual(result.lists, [(u'username', u'list')])
-    
+
     def test_list_followed_punctuation(self):
         result = self.parser.parse(u'@username/list&^$%^')
         self.assertEqual(result.html, u'<a href="http://twitter.com/username/list">@username/list</a>&^$%^')
         self.assertEqual(result.lists, [(u'username', u'list')])
-    
+
     def test_list_not_slash_space(self):
         result = self.parser.parse(u'@username/ list')
         self.assertEqual(result.html, u'<a href="http://twitter.com/username">@username</a>/ list')
         self.assertEqual(result.users, [u'username'])
         self.assertEqual(result.lists, [])
-    
-    def test_list_beginning(self):
+
+    def test_list_beginning2(self):
         result = self.parser.parse(u'@username/list')
         self.assertEqual(result.html, u'<a href="http://twitter.com/username/list">@username/list</a>')
         self.assertEqual(result.lists, [(u'username', u'list')])
-    
+
     def test_list_not_empty_username(self):
         result = self.parser.parse(u'text @/list')
         self.assertEqual(result.html, u'text @/list')
         self.assertEqual(result.lists, [])
-    
+
     def test_list_not_preceeded_letter(self):
         result = self.parser.parse(u'meet@the/beach')
         self.assertEqual(result.html, u'meet@the/beach')
         self.assertEqual(result.lists, [])
-    
+
     def test_list_long_truncate(self):
         result = self.parser.parse(u'@username/list5678901234567890123456789012345678901234567890123456789012345678901234567890A')
-        self.assertEqual(result.html, u'<a href="http://twitter.com/username/list5678901234567890123456789012345678901234567890123456789012345678901234567890">@username/list5678901234567890123456789012345678901234567890123456789012345678901234567890</a>A')
+        self.assertEqual(
+            result.html, u'<a href="http://twitter.com/username/list5678901234567890123456789012345678901234567890123456789012345678901234567890">@username/list5678901234567890123456789012345678901234567890123456789012345678901234567890</a>A')
         self.assertEqual(result.lists, [(u'username', u'list5678901234567890123456789012345678901234567890123456789012345678901234567890')])
-    
+
     def test_list_with_dash(self):
         result = self.parser.parse(u'text @username/list-foo')
         self.assertEqual(result.html, u'text <a href="http://twitter.com/username/list-foo">@username/list-foo</a>')
         self.assertEqual(result.lists, [(u'username', u'list-foo')])
 
 
+class TWPTestsWithSpans(unittest.TestCase):
+
+    """Test ttp with re spans to extract character co-ords of matches"""
+    def setUp(self):
+        self.parser = ttp.Parser(include_spans=True)
+
+    def test_spans_in_tweets(self):
+        """Test some coca-cola tweets taken from twitter with spans"""
+        result = self.parser.parse(u'Coca-Cola Hits 50 Million Facebook Likes http://bit.ly/QlKOc7')
+        self.assertEqual(result.urls, [('http://bit.ly/QlKOc7', (41, 61))])
+
+        result = self.parser.parse(u' #ABillionReasonsToBelieveInAfrica ARISE MAG.FASHION WEEK NY! Tsemaye B,Maki Oh,Tiffany Amber, Ozwald.Showin NY reasons2beliv @CocaCola_NG', html=False)
+        self.assertEqual(result.urls, [])
+        self.assertEqual(result.tags, [(u'ABillionReasonsToBelieveInAfrica', (1, 34))])
+        self.assertEqual(result.users, [(u'CocaCola_NG', (126, 138))])
+
+        result = self.parser.parse(u'Follow @CokeZero & Retweet for a chance to win @EASPORTS @EANCAAFootball 13 #GameOn #ad Rules: http://bit.ly/EANCAA', html=False)
+        self.assertEqual(result.urls, [(u'http://bit.ly/EANCAA', (95, 115))])
+        self.assertEqual(result.users, [(u'CokeZero', (7, 16)), (u'EASPORTS', (47, 56)), (u'EANCAAFootball', (57, 72))])
+        self.assertEqual(result.tags, [(u'GameOn', (76, 83)), (u'ad', (84, 87))])
+
+    def test_users_in_tweets(self):
+        result = self.parser.parse(u'Follow @CokeZero & Retweet for a chance to win @EASPORTS @EANCAAFootball 13 #GameOn #ad Rules: http://bit.ly/EANCAA @someone', html=False)
+        self.assertEqual(result.users, [(u'CokeZero', (7, 16)), (u'EASPORTS', (47, 56)), (u'EANCAAFootball', (57, 72)), (u'someone', (116, 124))])
+
+    def test_edge_cases(self):
+        """Some edge cases that upset the original version of ttp"""
+        result = self.parser.parse(u' @user', html=False)
+        self.assertEqual(result.users, [(u'user', (1, 6))])
+
+        result = self.parser.parse(u' #hash ', html=False)
+        self.assertEqual(result.tags, [(u'hash', (1, 6))])
+
+        result = self.parser.parse(u' http://some.com ', html=False)
+        self.assertEqual(result.urls, [(u'http://some.com', (1, 16))])
+
+
 # Test it!
 if __name__ == '__main__':
     unittest.main()
 
+    # verbosity = 0 # set to 2 for verbose output
+    # suite = unittest.TestLoader().loadTestsFromTestCase(TWPTestsWithSpansEdgeCases)
+    # unittest.TextTestRunner(verbosity=verbosity).run(suite)
+    # suite = unittest.TestLoader().loadTestsFromTestCase(TWPTestsWithSpans)
+    # unittest.TextTestRunner(verbosity=verbosity).run(suite)
+    # suite = unittest.TestLoader().loadTestsFromTestCase(TWPTests)
+    # unittest.TextTestRunner(verbosity=verbosity).run(suite)
diff --git a/ttp.py b/ttp/ttp.py
similarity index 82%
rename from ttp.py
rename to ttp/ttp.py
index 27102a9..ac7c79e 100644
--- a/ttp.py
+++ b/ttp/ttp.py
@@ -13,14 +13,18 @@
 #  You should have received a copy of the GNU General Public License along with
 #  twitter-text-python. If not, see <http://www.gnu.org/licenses/>.
 
-# TODO create a setup.py
-
+# Forked by Ian Ozsvald:
+# https://github.com/ianozsvald/twitter-text-python
+# from:
+# https://github.com/BonsaiDen/twitter-text-python
 
 # Tweet Parser and Formatter ---------------------------------------------------
 # ------------------------------------------------------------------------------
 import re
 import urllib
 
+__version__ = "1.0.1.0"
+
 # Some of this code has been translated from the twitter-text-java library:
 # <http://github.com/mzsanford/twitter-text-java>
 AT_SIGNS = ur'[@\uff20]'
@@ -35,8 +39,8 @@
 
 # Users
 USERNAME_REGEX = re.compile(ur'\B' + AT_SIGNS + LIST_END_CHARS, re.IGNORECASE)
-REPLY_REGEX = re.compile(ur'^(?:' + SPACES + ur')*' + AT_SIGNS \
-              + ur'([a-z0-9_]{1,20}).*', re.IGNORECASE)
+REPLY_REGEX = re.compile(ur'^(?:' + SPACES + ur')*' + AT_SIGNS
+                         + ur'([a-z0-9_]{1,20}).*', re.IGNORECASE)
 
 # Hashtags
 HASHTAG_EXP = ur'(^|[^0-9A-Z&/]+)(#|\uff03)([0-9A-Z_]*[A-Z_]+[%s]*)' % UTF_CHARS
@@ -55,76 +59,78 @@
 PATH_ENDING_CHARS = r'[%s\)=#/]' % UTF_CHARS
 QUERY_ENDING_CHARS = '[a-z0-9_&=#]'
 
-URL_REGEX = re.compile('((%s)((https?://|www\\.)(%s)(\/%s*%s?)?(\?%s*%s)?))'
+URL_REGEX = re.compile('((%s)((https?://|www\\.)(%s)(\/(%s*%s)?)?(\?%s*%s)?))'
                        % (PRE_CHARS, DOMAIN_CHARS, PATH_CHARS,
                           PATH_ENDING_CHARS, QUERY_CHARS, QUERY_ENDING_CHARS),
-                          re.IGNORECASE)
-
+                       re.IGNORECASE)
 
 # Registered IANA one letter domains
 IANA_ONE_LETTER_DOMAINS = ('x.com', 'x.org', 'z.com', 'q.net', 'q.com', 'i.net')
 
 
 class ParseResult(object):
+
     '''A class containing the results of a parsed Tweet.
-    
+
     Attributes:
     - urls:
         A list containing all the valid urls in the Tweet.
-    
+
     - users
         A list containing all the valid usernames in the Tweet.
-    
+
     - reply
         A string containing the username this tweet was a reply to.
         This only matches a username at the beginning of the Tweet,
         it may however be preceeded by whitespace.
         Note: It's generally better to rely on the Tweet JSON/XML in order to
         find out if it's a reply or not.
-        
+
     - lists
         A list containing all the valid lists in the Tweet.
         Each list item is a tuple in the format (username, listname).
-        
+
     - tags
         A list containing all the valid tags in theTweet.
-    
+
     - html
         A string containg formatted HTML.
         To change the formatting sublcass twp.Parser and override the format_*
         methods.
-    
+
     '''
-    
+
     def __init__(self, urls, users, reply, lists, tags, html):
-        self.urls = list(set(urls)) if urls else []  #fixes dups
-        self.users = list(set(users)) if users else []
-        self.lists = list(set(lists)) if lists else []
-        self.reply = list(set(reply)) if reply else []
-        self.tags = list(set(tags)) if tags else []
+        self.urls = urls if urls else []
+        self.users = users if users else []
+        self.lists = lists if lists else []
+        self.reply = reply if reply else None
+        self.tags = tags if tags else []
         self.html = html
 
 
 class Parser(object):
+
     '''A Tweet Parser'''
-    
-    def __init__(self, max_url_length=30):
+
+    def __init__(self, max_url_length=30, include_spans=False):
         self._max_url_length = max_url_length
-    
+        self._include_spans = include_spans
+
     def parse(self, text, html=True):
         '''Parse the text and return a ParseResult instance.'''
         self._urls = []
         self._users = []
         self._lists = []
         self._tags = []
-        
+
         reply = REPLY_REGEX.match(text)
         reply = reply.groups(0)[0] if reply is not None else None
-        
+
         parsed_html = self._html(text) if html else self._text(text)
         return ParseResult(self._urls, self._users, reply,
                            self._lists, self._tags, parsed_html)
-    
+
     def _text(self, text):
         '''Parse a Tweet without generating HTML.'''
         URL_REGEX.sub(self._parse_urls, text)
@@ -132,84 +138,95 @@ def _text(self, text):
         LIST_REGEX.sub(self._parse_lists, text)
         HASHTAG_REGEX.sub(self._parse_tags, text)
         return None
-    
+
     def _html(self, text):
         '''Parse a Tweet and generate HTML.'''
         html = URL_REGEX.sub(self._parse_urls, text)
         html = USERNAME_REGEX.sub(self._parse_users, html)
         html = LIST_REGEX.sub(self._parse_lists, html)
         return HASHTAG_REGEX.sub(self._parse_tags, html)
-    
-    
+
     # Internal parser stuff ----------------------------------------------------
     def _parse_urls(self, match):
         '''Parse URLs.'''
-        
+
         mat = match.group(0)
-        
+
         # Fix a bug in the regex concerning www...com and www.-foo.com domains
         # TODO fix this in the regex instead of working around it here
         domain = match.group(5)
         if domain[0] in '.-':
             return mat
-        
+
         # Only allow IANA one letter domains that are actually registered
         if len(domain) == 5 \
            and domain[-4:].lower() in ('.com', '.org', '.net') \
            and not domain.lower() in IANA_ONE_LETTER_DOMAINS:
-            
+
             return mat
-        
+
         # Check for urls without http(s)
         pos = mat.find('http')
         if pos != -1:
             pre, url = mat[:pos], mat[pos:]
             full_url = url
-        
+
         # Find the www and force http://
         else:
             pos = mat.lower().find('www')
             pre, url = mat[:pos], mat[pos:]
             full_url = 'http://%s' % url
-        
-        self._urls.append(url)
-        
+
+        if self._include_spans:
+            span = match.span(0)
+            # add an offset if pre is e.g. ' '
+            span = (span[0] + len(pre), span[1])
+            self._urls.append((url, span))
+        else:
+            self._urls.append(url)
+
         if self._html:
             return '%s%s' % (pre, self.format_url(full_url,
-                                       self._shorten_url(escape(url))))
-    
+                                                  self._shorten_url(escape(url))))
+
     def _parse_users(self, match):
         '''Parse usernames.'''
-        
+
         # Don't parse lists here
         if match.group(2) is not None:
             return match.group(0)
-        
+
         mat = match.group(0)
-        self._users.append(mat[1:])
-        
+        if self._include_spans:
+            self._users.append((mat[1:], match.span(0)))
+        else:
+            self._users.append(mat[1:])
+
         if self._html:
             return self.format_username(mat[0:1], mat[1:])
-    
+
     def _parse_lists(self, match):
         '''Parse lists.'''
-        
+
         # Don't parse usernames here
         if match.group(4) is None:
             return match.group(0)
-        
+
         pre, at_char, user, list_name = match.groups()
         list_name = list_name[1:]
-        self._lists.append((user, list_name))
-        
+        if self._include_spans:
+            self._lists.append((user, list_name, match.span(0)))
+        else:
+            self._lists.append((user, list_name))
+
         if self._html:
             return '%s%s' % (pre, self.format_list(at_char, user, list_name))
-    
+
     def _parse_tags(self, match):
         '''Parse hashtags.'''
-        
+
         mat = match.group(0)
-        
+
         # Fix problems with the regex capturing stuff infront of the #
         tag = None
         for i in u'#\uff03':
@@ -217,45 +234,50 @@ def _parse_tags(self, match):
             if pos != -1:
                 tag = i
                 break
-        
+
         pre, text = mat[:pos], mat[pos + 1:]
-        self._tags.append(text)
-        
+        if self._include_spans:
+            span = match.span(0)
+            # add an offset if pre is e.g. ' '
+            span = (span[0] + len(pre), span[1])
+            self._tags.append((text, span))
+        else:
+            self._tags.append(text)
+
         if self._html:
             return '%s%s' % (pre, self.format_tag(tag, text))
-    
+
     def _shorten_url(self, text):
         '''Shorten a URL and make sure to not cut of html entities.'''
-        
+
         if len(text) > self._max_url_length and self._max_url_length != -1:
             text = text[0:self._max_url_length - 3]
             amp = text.rfind('&')
             close = text.rfind(';')
             if amp != -1 and (close == -1 or close < amp):
                 text = text[0:amp]
-            
+
             return text + '...'
-        
+
         else:
             return text
-    
-    
+
     # User defined formatters --------------------------------------------------
     def format_tag(self, tag, text):
         '''Return formatted HTML for a hashtag.'''
         return '<a href="http://search.twitter.com/search?q=%s">%s%s</a>' \
-                % (urllib.quote('#' + text.encode('utf-8')), tag, text)
-    
+            % (urllib.quote('#' + text.encode('utf-8')), tag, text)
+
     def format_username(self, at_char, user):
         '''Return formatted HTML for a username.'''
         return '<a href="http://twitter.com/%s">%s%s</a>' \
                % (user, at_char, user)
-    
+
     def format_list(self, at_char, user, list_name):
         '''Return formatted HTML for a list.'''
         return '<a href="http://twitter.com/%s/%s">%s%s/%s</a>' \
                % (user, list_name, at_char, user, list_name)
-    
+
     def format_url(self, url, text):
         '''Return formatted HTML for a url.'''
         return '<a href="%s">%s</a>' % (escape(url), text)
@@ -267,4 +289,3 @@ def escape(text):
     return ''.join({'&': '&amp;', '"': '&quot;',
                     '\'': '&apos;', '>': '&gt;',
                     '<': '&lt;'}.get(c, c) for c in text)
-
diff --git a/ttp/utils.py b/ttp/utils.py
new file mode 100644
index 0000000..2c3d822
--- /dev/null
+++ b/ttp/utils.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""Unwind short-links e.g. bit.ly, t.co etc to their canonical links"""
+import requests
+
+
+def follow_shortlinks(shortlinks):
+    """Follow redirects in list of shortlinks, return dict of resulting URLs"""
+    links_followed = {}
+    for shortlink in shortlinks:
+        url = shortlink
+        request_result = requests.get(url)
+        redirect_history = request_result.history
+        # history might look like:
+        # (<Response [301]>, <Response [301]>)
+        # where each response object has a URL
+        all_urls = []
+        for redirect in redirect_history:
+            all_urls.append(redirect.url)
+        # append the final URL that we finish with
+        all_urls.append(request_result.url)
+        links_followed[shortlink] = all_urls
+    return links_followed
+
+
+if __name__ == "__main__":
+    shortlinks = ['http://t.co/8o0z9BbEMu', u'http://bbc.in/16dClPF']
+    print follow_shortlinks(shortlinks)