diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..f2f205f
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,5 @@
+# ignore .git and .cache folders
+.git
+.cache
+.idea
+data
diff --git a/.gitignore b/.gitignore
index e272905..bdca348 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,109 +1,110 @@
-*#
-*~
-.idea/
-
-# Byte-compiled / optimized / DLL files
-__pycache__/
-*.py[cod]
-*$py.class
-
-# C extensions
-*.so
-
-# Distribution / packaging
-.Python
-env/
-ve/
-venv/
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-
-# PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-.hypothesis/
-
-# Translations
-*.mo
-*.pot
-
-# Django stuff:
-*.log
-local_settings.py
-
-# Flask stuff:
-instance/
-.webassets-cache
-
-# Scrapy stuff:
-.scrapy
-
-# Sphinx documentation
-docs/_build/
-
-# PyBuilder
-target/
-
-# Jupyter Notebook
-.ipynb_checkpoints
-
-# pyenv
-.python-version
-
-# celery beat schedule file
-celerybeat-schedule
-
-# SageMath parsed files
-*.sage.py
-
-# dotenv
-.env
-
-# virtualenv
-.venv
-venv/
-ENV/
-
-# Spyder project settings
-.spyderproject
-.spyproject
-
-# Rope project settings
-.ropeproject
-
-# mkdocs documentation
-/site
-
-# mypy
-.mypy_cache/
-
-benchmarks
+*#
+*~
+.idea/
+data/
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+ve/
+venv/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# dotenv
+.env
+
+# virtualenv
+.venv
+venv/
+ENV/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+benchmarks
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..fbca14b
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,38 @@
+# CC Specific Dockerfile implementing steps at https://github.com/LexPredict/openedgar/blob/master/INSTALL.md
+# Allows the use of OpenEDGAR in AKS
+FROM ubuntu:18.04
+MAINTAINER Michael Seddon (michael.seddon@cliffordchance.com)
+
+# Environment variables
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Package installation
+RUN apt update
+RUN apt upgrade -y
+RUN apt install -y software-properties-common build-essential python3-dev python3-pip virtualenv git-all
+# to be removed when rabbit is in its own container
+RUN apt install -y rabbitmq-server
+RUN apt-get install -y openjdk-8-jdk
+
+# Clone OpenEDGAR repository
+WORKDIR /opt
+RUN mkdir /opt/openedgar
+
+# Set up Python venv
+WORKDIR /opt/openedgar/
+RUN virtualenv -p /usr/bin/python3 env
+COPY lexpredict_openedgar/requirements/full.txt lexpredict_openedgar/requirements/full.txt
+RUN ./env/bin/pip install -r lexpredict_openedgar/requirements/full.txt
+RUN ./env/bin/pip install azure-mgmt-resource azure-mgmt-datalake-store azure-datalake-store azure-storage-blob
+COPY tika/tika-server-1.21.jar /opt/openedgar/tika/tika-server-1.21.jar
+COPY lexpredict_openedgar/ /opt/openedgar/lexpredict_openedgar/
+
+COPY docker/default.env /opt/openedgar/
+RUN cp lexpredict_openedgar/sample.env lexpredict_openedgar/.env
+#COPY docker/erlang-solutions_1.0_all.deb lexpredict_openedgar/erlang-solutions_1.0_all.deb
+COPY docker/oe-entrypoint.sh /usr/local/bin/
+COPY docker/run_edgar.py /opt/openedgar/lexpredict_openedgar/run_edgar.py
+COPY docker/dot_env.sh /opt/openedgar
+RUN mkdir /data
+
+ENTRYPOINT ["oe-entrypoint.sh"]
\ No newline at end of file
diff --git a/docker/README.md b/docker/README.md
new file mode 100644
index 0000000..989ec2e
--- /dev/null
+++ b/docker/README.md
@@ -0,0 +1,38 @@
+# Dockerisation
+The image will take as default parameters [default.env](docker/default.env)
+
+All the variable can be substitute at runtime as environment variables
+
+## Download tika
+run in `/tika` the script `download_tika.sh` it will download in the `/tika'
+folder tika version 1.20
+
+## Docker
+Run the follow from the repository root for creating the image:
+
+    docker build -t dslcr.azurecr.io/openedgar:1.1 .
+    
+# Run container
+It is wise to mount a local folder to the container for being able to access to the 
+downloaded documents. 
+Example:
+
+    docker run --env-file vars.txt -v /Users/mirko/Projects/research-openedgar/data:/data dslcr.azurecr.io/openedgar:1.1
+
+Contents of vars.txt
+    
+    EDGAR_YEAR=2015
+    EDGAR_QUARTER=1
+    EDGAR_MONTH=1
+    CLIENT_TYPE=Local
+    S3_DOCUMENT_PATH=/data
+    DOWNLOAD_PATH=/data
+ 
+After the download is terimated you have to stop the container:
+
+    $ docker ps
+    
+    CONTAINER ID        IMAGE                            COMMAND              CREATED             STATUS              PORTS               NAMES
+    9e0ae247b61f        dslcr.azurecr.io/openedgar:1.1   "oe-entrypoint.sh"   2 minutes ago       Up 2 minutes                            priceless_bardeen
+    
+    $ docker kill 9e
diff --git a/docker/default.env b/docker/default.env
new file mode 100755
index 0000000..65f3a67
--- /dev/null
+++ b/docker/default.env
@@ -0,0 +1,70 @@
+# PostgreSQL
+DATABASE_URL=${DATABASE_URL:="postgres://postgres:postgres@host.docker.internal:5432/openedgar"}
+CELERY_BROKER_URL=${CELERY_BROKER_URL:="amqp://openedgar:openedgar@localhost:5672/openedgar"}
+CELERY_RESULT_BACKEND=${CELERY_RESULT_BACKEND:="rpc"}
+CELERY_RESULT_PERSISTENT=${CELERY_RESULT_PERSISTENT:="False"}
+DJANGO_SECRET_KEY=${DJANGO_SECRET_KEY:="openedgar"}
+
+# Domain name, used by caddy
+#DOMAIN_NAME=domain.com
+
+# General settings
+# DJANGO_READ_DOT_ENV_FILE=True
+# CLIENT_TYPE: S3, ADL, Local, Blob
+CLIENT_TYPE=${CLIENT_TYPE:="Local"}
+
+
+DJANGO_ADMIN_URL=${DJANGO_ADMIN_URL=""}
+DJANGO_SETTINGS_MODULE=${DJANGO_SETTINGS_MODULE:="config.settings.production"}
+DJANGO_SECRET_KEY=${DJANGO_SECRET_KEY:="openedgar"}
+DJANGO_ALLOWED_HOSTS=${DJANGO_ALLOWED_HOSTS:="localhost"}
+
+# AWS Settings
+DJANGO_AWS_ACCESS_KEY_ID=${DJANGO_AWS_ACCESS_KEY_ID:=""}
+DJANGO_AWS_SECRET_ACCESS_KEY=${DJANGO_AWS_SECRET_ACCESS_KEY:=""}
+DJANGO_AWS_STORAGE_BUCKET_NAME=${DJANGO_AWS_STORAGE_BUCKET_NAME:=""}
+
+# AZURE DLAKE Settings
+ADL_ACCOUNT=${ADL_ACCOUNT:=""}
+ADL_TENANT=${ADL_TENANT:=""}
+# Client ID
+ADL_CID=${ADL_CID:=""}
+# Client secret/password
+ADL_SECRET=${ADL_SECRET:=""}
+
+# Azure Blob Storage
+BLOB_CONNECTION_STRING=${BLOB_CONNECTION_STRING:=""}
+BLOB_CONTAINER=${BLOB_CONTAINER:="openedgar"}
+
+
+# Read rate limit
+CELERY_TASK_DEFAULT_RATE_LIMIT={$CELERY_TASK_DEFAULT_RATE_LIMIT:="10/s"}
+
+# Used with email
+DJANGO_MAILGUN_API_KEY=${DJANGO_MAILGUN_API_KEY:=""}
+DJANGO_SERVER_EMAIL=${DJANGO_SERVER_EMAIL:=""}
+MAILGUN_SENDER_DOMAIN=${MAILGUN_SENDER_DOMAIN:=""}
+EMAIL_BACKEND=${EMAIL_BACKEND:="django.core.mail.backends.console.EmailBackend"}
+
+# Security! Better to use DNS for this task, but you can use redirect
+DJANGO_SECURE_SSL_REDIRECT=${DJANGO_SECURE_SSL_REDIRECT:="False"}
+
+# django-allauth
+DJANGO_ACCOUNT_ALLOW_REGISTRATION=${DJANGO_ACCOUNT_ALLOW_REGISTRATION:="True"}
+
+# AWS setup
+S3_ACCESS_KEY=${S3_ACCESS_KEY:="ABCDEFGHIJKLMNOPQRST"}
+S3_SECRET_KEY=${S3_SECRET_KEY:="abcdefghijklmnopqrstuvwxyz12345678901234"}
+S3_BUCKET=${S3_BUCKET:=""}
+
+S3_PREFIX=${S3_PREFIX:="DATA"}
+S3_COMPRESSION_LEVEL=${S3_COMPRESSION_LEVEL:="9"}
+
+# Download path
+DOWNLOAD_PATH=${DOWNLOAD_PATH:="/data"}
+S3_DOCUMENT_PATH=${S3_DOCUMENT_PATH:="/data"}
+
+# EDGAR PARAMETERS
+EDGAR_YEAR=${EDGAR_YEAR:="2015"}
+FORM_TYPES=${FORM_TYPES:="3, 10, 8-K, 10-Q, 10-K"}
+
diff --git a/docker/dot_env.sh b/docker/dot_env.sh
new file mode 100755
index 0000000..4555d96
--- /dev/null
+++ b/docker/dot_env.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+cat >/opt/openedgar/lexpredict_openedgar/.env << EOF
+DATABASE_URL=$DATABASE_URL
+CELERY_BROKER_URL=$CELERY_BROKER_URL
+CELERY_RESULT_BACKEND=$CELERY_RESULT_BACKEND
+CELERY_RESULT_PERSISTENT=$CELERY_RESULT_PERSISTENT
+DJANGO_SECRET_KEY=$DJANGO_SECRET_KEY
+
+# Domain name, used by caddy
+#DOMAIN_NAME=domain.com
+
+# General settings
+# DJANGO_READ_DOT_ENV_FILE=True
+# CLIENT_TYPE: AKS, ADLAKE, Local
+CLIENT_TYPE=$CLIENT_TYPE
+
+
+DJANGO_ADMIN_URL=$DJANGO_ADMIN_URL
+DJANGO_SETTINGS_MODULE=$DJANGO_SETTINGS_MODULE
+DJANGO_SECRET_KEY=$DJANGO_SECRET_KEY
+DJANGO_ALLOWED_HOSTS=$DJANGO_ALLOWED_HOSTS
+
+CELERY_TASK_DEFAULT_RATE_LIMIT=$CELERY_TASK_DEFAULT_RATE_LIMIT
+
+# data Lake Settings
+ADL_ACCOUNT=$ADL_ACCOUNT
+ADL_TENANT=$ADL_TENANT
+# Client ID
+ADL_CID=$ADL_CID
+# Client secret/password
+ADL_SECRET=$ADL_SECRET
+
+# Azure Blob Storage
+BLOB_CONNECTION_STRING=$BLOB_CONNECTION_STRING
+BLOB_CONTAINER=$BLOB_CONTAINER
+
+# AWS Settings
+DJANGO_AWS_ACCESS_KEY_ID=$DJANGO_AWS_ACCESS_KEY_ID
+DJANGO_AWS_SECRET_ACCESS_KEY=$DJANGO_AWS_SECRET_ACCESS_KEY
+DJANGO_AWS_STORAGE_BUCKET_NAME=$DJANGO_AWS_STORAGE_BUCKET_NAME
+
+# Used with email
+DJANGO_MAILGUN_API_KEY=$DJANGO_MAILGUN_API_KEY
+DJANGO_SERVER_EMAIL=$DJANGO_SERVER_EMAIL
+MAILGUN_SENDER_DOMAIN=$MAILGUN_SENDER_DOMAIN
+EMAIL_BACKEND=$EMAIL_BACKEND
+
+# Security! Better to use DNS for this task, but you can use redirect
+DJANGO_SECURE_SSL_REDIRECT=$DJANGO_SECURE_SSL_REDIRECT
+
+# django-allauth
+DJANGO_ACCOUNT_ALLOW_REGISTRATION=$DJANGO_ACCOUNT_ALLOW_REGISTRATION
+
+# AWS setup
+S3_ACCESS_KEY=$S3_ACCESS_KEY
+S3_SECRET_KEY=$S3_SECRET_KEY
+S3_BUCKET=$S3_BUCKET
+
+S3_PREFIX=$S3_PREFIX
+S3_COMPRESSION_LEVEL=$S3_COMPRESSION_LEVEL
+
+# Download path
+DOWNLOAD_PATH=$DOWNLOAD_PATH
+S3_DOCUMENT_PATH=$S3_DOCUMENT_PATH
+
+# EDGAR PARAMETERS
+EDGAR_YEAR=$EDGAR_YEAR
+FORM_TYPES=$FORM_TYPES
+EOF
\ No newline at end of file
diff --git a/docker/erlang-solutions_1.0_all.deb b/docker/erlang-solutions_1.0_all.deb
new file mode 100644
index 0000000..6cf8fef
Binary files /dev/null and b/docker/erlang-solutions_1.0_all.deb differ
diff --git a/docker/oe-entrypoint.sh b/docker/oe-entrypoint.sh
new file mode 100755
index 0000000..03803ef
--- /dev/null
+++ b/docker/oe-entrypoint.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+cd /opt/openedgar/lexpredict_openedgar
+export PYTHONIOENCODING=utf-8
+mkdir -p /data/logs
+
+source ../env/bin/activate
+source /opt/openedgar/default.env
+
+source /opt/openedgar/dot_env.sh
+
+export C_FORCE_ROOT="true"
+
+service rabbitmq-server start
+
+rabbitmqctl add_user openedgar openedgar
+
+rabbitmqctl add_vhost openedgar
+
+rabbitmqctl set_permissions -p openedgar openedgar ".*" ".*" ".*"
+
+cd /opt/openedgar/tika
+
+java -jar tika-server-1.21.jar > /data/logs/tika.log   2>&1 &
+
+cd /opt/openedgar/lexpredict_openedgar
+
+source ../env/bin/activate
+source /opt/openedgar/default.env
+
+# perform initial migration
+python manage.py migrate
+
+celery -A lexpredict_openedgar.taskapp worker --loglevel=INFO > /data/logs/celery.log  2>&1 &
+
+python manage.py shell < run_edgar.py
+
+tail -f /data/logs/celery.log
+#| grep -v "INFO  rmeta/text (autodetecting type)"
diff --git a/docker/run_edgar.py b/docker/run_edgar.py
new file mode 100644
index 0000000..d0fca4e
--- /dev/null
+++ b/docker/run_edgar.py
@@ -0,0 +1,21 @@
+import logging
+import os
+from openedgar.processes.edgar import download_filing_index_data, process_all_filing_index
+
+years = [t.upper().strip() for t in os.getenv("EDGAR_YEAR").split(",")]
+quarter = os.getenv("EDGAR_QUARTER")
+month = os.getenv("EDGAR_MONTH")
+types = [t.upper().strip() for t in os.getenv("FORM_TYPES").split(",")]
+
+if len(quarter) == 0:
+    quarter = None
+if len(month) == 0:
+    month = None
+
+print("Edgar analysis started")
+for year in years:
+    print("Analysing year {} types {}".format(year, types))
+    process_all_filing_index(year=year, quarter=quarter, month=month, form_type_list=types)
+
+for i in range(1, 10):
+    print("###############################################")
diff --git a/lexpredict_openedgar/config/settings/base.py b/lexpredict_openedgar/config/settings/base.py
index 02c2ad3..3f89365 100755
--- a/lexpredict_openedgar/config/settings/base.py
+++ b/lexpredict_openedgar/config/settings/base.py
@@ -332,8 +332,21 @@
 S3_DOCUMENT_PATH = env('S3_DOCUMENT_PATH', default="openedgar")
 S3_PREFIX = env('S3_PREFIX', default="documents")
 S3_COMPRESSION_LEVEL = int(env('S3_COMPRESSION_LEVEL', default=6))
+DOWNLOAD_PATH = env('DOWNLOAD_PATH', default='openedgar')
 
 # Tika configuration
 TIKA_HOST = "localhost"
 TIKA_PORT = 9998
 TIKA_ENDPOINT = "http://{0}:{1}/tika".format(TIKA_HOST, TIKA_PORT)
+
+# Azure data Lake
+ADL_ACCOUNT = env('ADL_ACCOUNT', default="")
+ADL_TENANT = env('ADL_TENANT', default="")
+# Client ID
+ADL_CID = env('ADL_CID', default="")
+# Client secret/password
+ADL_SECRET = env('ADL_SECRET', default="")
+
+# Azure Blob Storage
+BLOB_CONNECTION_STRING = env('BLOB_CONNECTION_STRING', default="")
+BLOB_CONTAINER = env("BLOB_CONTAINER", default="openedgar")
diff --git a/lexpredict_openedgar/openedgar/clients/adl.py b/lexpredict_openedgar/openedgar/clients/adl.py
new file mode 100755
index 0000000..5b2b535
--- /dev/null
+++ b/lexpredict_openedgar/openedgar/clients/adl.py
@@ -0,0 +1,123 @@
+# Libraries
+import logging
+import os
+from typing import Union
+import adal
+
+from msrestazure.azure_active_directory import AADTokenCredentials
+from azure.datalake.store import core, lib, multithread
+from config.settings.base import ADL_ACCOUNT, ADL_TENANT, ADL_CID, ADL_SECRET
+
+logger = logging.getLogger(__name__)
+
+if os.environ["CLIENT_TYPE"] == "ADL":
+    authority_host_uri = 'https://login.microsoftonline.com'
+    authority_uri = authority_host_uri + '/' + ADL_TENANT
+    RESOURCE = 'https://management.core.windows.net/'
+
+    adlCreds = lib.auth(tenant_id=ADL_TENANT,
+                        client_secret=ADL_SECRET,
+                        client_id=ADL_CID,
+                        resource=RESOURCE)
+
+    context = adal.AuthenticationContext(authority_uri, api_version=None)
+    mgmt_token = context.acquire_token_with_client_credentials(RESOURCE, ADL_CID, ADL_SECRET)
+    armCreds = AADTokenCredentials(mgmt_token, ADL_CID, resource=RESOURCE)
+
+    ## Create a filesystem client object
+    adlsFileSystemClient = core.AzureDLFileSystem(adlCreds, store_name=ADL_ACCOUNT)
+
+
+class ADLClient:
+    """
+    TODO: This class does not support the deflate option
+    """
+    def __init__(self):
+        logger.info("Initialized AKS client")
+
+    def path_exists(self, path: str, client=None):
+        """
+        Check if an AKS path exists
+        :param path:
+        :return: true if AKS object exists, else false
+        """
+
+        return adlsFileSystemClient.exists(path)
+
+    def get_buffer(self, remote_path: str, client=None, deflate: bool = True):
+        """
+        Get a file from S3 given a path and optional client.
+        :param remote_path: S3 path under bucket
+        :param client: optional client to re-use
+        :param deflate: whether to automatically zlib deflate contents
+        :return: buffer bytes/str
+        """
+
+        with adlsFileSystemClient.open(remote_path, blocksize=2 ** 20) as f:
+            return f.read()
+
+    def get_file(self, remote_path: str, local_path: str, client=None, deflate: bool = True):
+        """
+        Save a local file from AKS given a path and optional client.
+        :param remote_path: AKS path under bucket
+        :param local_path: local path to save to
+        :param client: optional client to re-use
+        :param deflate: whether to automatically zlib deflate contents
+        :return:
+        """
+        # Open and write buffer
+        with open(local_path, "wb") as out_file:
+            out_file.write(self.get_buffer(remote_path, client, deflate))
+
+    def get_buffer_segment(self, remote_path: str, start_pos: int, end_pos: int, client=None, deflate: bool = True):
+        """
+        Get a file from S3 given a path and optional client.
+        :param remote_path: S3 path under bucket
+        :param start_pos:
+        :param end_pos:
+        :param client: optional client to re-use
+        :param deflate: whether to automatically zlib deflate contents
+        :return:
+        """
+        # Retrieve buffer and return subset
+        buffer = self.get_buffer(remote_path, client, deflate)
+        return buffer[start_pos:end_pos]
+
+    def put_buffer(self, remote_path: str, buffer: Union[str, bytes], client=None, deflate: bool = True,
+                   write_bytes=True):
+        """
+        Upload a buffer to AKS given a path.
+        :param remote_path: AKS path
+        :param buffer: buffer to upload
+        :param client: optional client to re-use
+        :param deflate: whether to automatically zlib deflate contents
+        :return:
+        """
+
+        import tempfile
+
+        temp = tempfile.NamedTemporaryFile(mode='w+b')
+
+        try:
+            # Ensure we have bytes object
+            if isinstance(buffer, str):
+                upload_buffer = bytes(buffer, "utf-8")
+            elif isinstance(buffer, bytes):
+                upload_buffer = buffer
+
+            temp.write(upload_buffer)
+
+            multithread.ADLUploader(adlsFileSystemClient, lpath=temp.name, rpath=remote_path, nthreads=64,
+                                    overwrite=True, buffersize=4194304, blocksize=4194304)
+        finally:
+            temp.close()
+
+    def put_file(self, remote_path: str, local_path: str):
+        """
+        Save a local file from AKS.
+        :param remote_path: AKS remote path
+        :param local_path: local path to save to
+        :return:
+        """
+        multithread.ADLUploader(adlsFileSystemClient, lpath=local_path, rpath=remote_path, nthreads=64, overwrite=True,
+                                buffersize=4194304, blocksize=4194304)
diff --git a/lexpredict_openedgar/openedgar/clients/blob.py b/lexpredict_openedgar/openedgar/clients/blob.py
new file mode 100755
index 0000000..d78d51f
--- /dev/null
+++ b/lexpredict_openedgar/openedgar/clients/blob.py
@@ -0,0 +1,106 @@
+# Libraries
+import logging
+import os
+import zlib
+from typing import Union
+
+from azure.storage.blob import BlockBlobService
+
+from config.settings.base import BLOB_CONNECTION_STRING, BLOB_CONTAINER, S3_COMPRESSION_LEVEL
+
+logger = logging.getLogger(__name__)
+# Remove garbage logging from MS
+logging.getLogger("azure.storage.common.storageclient").setLevel(logging.ERROR)
+
+if os.environ["CLIENT_TYPE"] == "Blob":
+    blob_service = BlockBlobService(connection_string=BLOB_CONNECTION_STRING)
+    blob_service.create_container(BLOB_CONTAINER)
+
+
+class BlobClient:
+
+    def __init__(self):
+        logger.info("Initialized Blob client")
+
+    def path_exists(self, path: str, client=None):
+        """
+        Check if an Blob path exists
+        :param path:
+        :return: true if Blob object exists, else false
+        """
+        return blob_service.exists(BLOB_CONTAINER, path)
+
+    def get_buffer(self, remote_path: str, client=None, deflate: bool = True):
+        """
+        Get a file from Blob  given a path and optional client.
+        :param remote_path: Blob path under bucket
+        :param client: optional client to re-use
+        :param deflate: whether to automatically zlib deflate contents
+        :return: buffer bytes/str
+        """
+        if remote_path[0] == '/':
+            remote_path = remote_path[1:]
+        buffer = blob_service.get_blob_to_bytes(BLOB_CONTAINER, remote_path).content
+        if deflate:
+            return zlib.decompress(buffer)
+        else:
+            return buffer
+
+    def get_file(self, remote_path: str, local_path: str, client=None, deflate: bool = True):
+        """
+        Save a local file from Blob given a path and optional client.
+        :param remote_path: Blob path under bucket
+        :param local_path: local path to save to
+        :param client: optional client to re-use
+        :param deflate: whether to automatically zlib deflate contents
+        :return:
+        """
+        with open(local_path, "wb") as out_file:
+            out_file.write(self.get_buffer(remote_path, client, deflate))
+
+    def get_buffer_segment(self, remote_path: str, start_pos: int, end_pos: int, client=None, deflate: bool = True):
+        """
+        Get a file from S3 given a path and optional client.
+        :param remote_path: S3 path under bucket
+        :param start_pos:
+        :param end_pos:
+        :param client: optional client to re-use
+        :param deflate: whether to automatically zlib deflate contents
+        :return:
+        """
+        # Retrieve buffer and return subset
+        buffer = self.get_buffer(remote_path, client, deflate)
+        return buffer[start_pos:end_pos]
+
+    def put_buffer(self, remote_path: str, buffer: Union[str, bytes], client=None, deflate: bool = True,
+                   write_bytes=True):
+        """
+        Upload a buffer to AKS given a path.
+        :param remote_path: AKS path
+        :param buffer: buffer to upload
+        :param client: optional client to re-use
+        :param deflate: whether to automatically zlib deflate contents
+        :return:
+        """
+        # Ensure we have bytes object
+        if isinstance(buffer, str):
+            upload_buffer = bytes(buffer, "utf-8")
+        else:
+            upload_buffer = buffer
+        if remote_path[0] == '/':
+            remote_path = remote_path[1:]
+        if deflate:
+            upload_buffer = zlib.compress(upload_buffer, S3_COMPRESSION_LEVEL)
+        blob_service.create_blob_from_bytes(BLOB_CONTAINER, remote_path, upload_buffer)
+
+    def put_file(self, remote_path: str, local_path: str, client=None, deflate: bool = True):
+        """
+        Save a local file from AKS.
+        :param remote_path: AKS remote path
+        :param local_path: local path to save to
+        :param client: optional client to re-use
+        :param deflate: whether to automatically zlib deflate contents
+        :return:
+        """
+        with open(local_path, "rb") as in_file:
+            self.put_buffer(remote_path, in_file.read(), client, deflate)
diff --git a/lexpredict_openedgar/openedgar/clients/edgar.py b/lexpredict_openedgar/openedgar/clients/edgar.py
index 70496d5..30d6015 100755
--- a/lexpredict_openedgar/openedgar/clients/edgar.py
+++ b/lexpredict_openedgar/openedgar/clients/edgar.py
@@ -39,13 +39,6 @@
 
 # Setup logger
 logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
-console = logging.StreamHandler()
-console.setLevel(logging.INFO)
-formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s')
-console.setFormatter(formatter)
-logger.addHandler(console)
-
 
 def get_buffer(remote_path: str, base_path: str = HTTP_SEC_HOST):
     """
@@ -185,6 +178,91 @@ def list_index_by_year(year: int):
     return form_index_list
 
 
+def list_index_by_quarter(year: int, quarter: int):
+    """
+    Get list of index files for a given year and quarter.
+    :param year: filing year to retrieve
+    :param quarter: filing quarter to retrieve
+    :return:
+    """
+    # Log entrance
+    logger.info("Locating form index list for {0}".format(year))
+
+    # Form index list
+    year = str(year)
+    form_index_list = []
+
+    # Get year directory list
+    year_index_uri = urllib.parse.urljoin(HTTP_SEC_INDEX_PATH, str(year) + "/")
+    year_root_list = list_path(year_index_uri)
+    print(year_root_list)
+
+    # Get quarters
+    quarter_list = [f for f in year_root_list if "/QTR" in f]
+
+    quarter_string_match = "QTR" + str(quarter)
+
+    # Iterate over quarters
+    for qu in quarter_list:
+        if quarter_string_match in qu:
+            quarter_root_list = list_path(qu)
+            form_index_list.extend([q for q in quarter_root_list if "/form." in q.lower()])
+
+    # Cleanup double /
+    for i in range(len(form_index_list)):
+        form_index_list[i] = form_index_list[i].replace("//", "/")
+
+    # Log exit
+    logger.info("Successfully located {0} form index files for {1}".format(len(form_index_list), year))
+
+    # Return
+    return form_index_list
+
+
+def list_index_by_month(year: int, month: int):
+    """
+    Get list of index files for a given year and month.
+    :param year: filing year to retrieve
+    :param month: filing month to retrieve
+    :return:
+    """
+    # Log entrance
+    logger.info("Locating form index list for {0}".format(year))
+
+    # Form index list
+    year = str(year)
+    form_index_list = []
+
+    # Get year directory list
+    year_index_uri = urllib.parse.urljoin(HTTP_SEC_INDEX_PATH, str(year) + "/")
+    year_root_list = list_path(year_index_uri)
+    print(year_root_list)
+
+    # Get quarters
+    quarter_list = [f for f in year_root_list if "/QTR" in f]
+
+    # company.20190102.idx
+    if int(month) < 10:
+        month = "0{}".format(month)
+
+    date_string_match = year + month
+
+    # Iterate over quarters
+    for qu in quarter_list:
+        quarter_root_list = list_path(qu)
+        form_index_list.extend([q for q in quarter_root_list if "/form." + date_string_match in q.lower()])
+
+    # Cleanup double /
+    for i in range(len(form_index_list)):
+        form_index_list[i] = form_index_list[i].replace("//", "/")
+
+    # Log exit
+    logger.info("Successfully located {0} form index files for {1}".format(len(form_index_list), year))
+
+    # Return
+    return form_index_list
+
+
 def list_index(min_year: int = 1950, max_year: int = 2050):
     """
     Get the list of form index files on SEC HTTP.
diff --git a/lexpredict_openedgar/openedgar/clients/local.py b/lexpredict_openedgar/openedgar/clients/local.py
index 2a7fa62..ff2f053 100644
--- a/lexpredict_openedgar/openedgar/clients/local.py
+++ b/lexpredict_openedgar/openedgar/clients/local.py
@@ -26,12 +26,6 @@
 
 # Setup logger
 logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
-console = logging.StreamHandler()
-console.setLevel(logging.INFO)
-formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s')
-console.setFormatter(formatter)
-logger.addHandler(console)
 
 
 class LocalClient:
@@ -47,10 +41,12 @@ def put_buffer(self, file_path: str, buffer, write_bytes=True):
         if not os.path.exists(dir_name):
             os.makedirs(dir_name)
         if write_bytes:
-            mode="wb"
+            mode = "wb"
+            encoding = None
         else:
-            mode="w"
-        with open(file_path, mode=mode) as localfile:
+            mode = "w"
+            encoding = "utf-8"
+        with open(file_path, mode=mode, encoding=encoding) as localfile:
             localfile.write(buffer)
 
     def get_buffer(self, file_path: str):
diff --git a/lexpredict_openedgar/openedgar/clients/s3.py b/lexpredict_openedgar/openedgar/clients/s3.py
index 367f3aa..34e762e 100755
--- a/lexpredict_openedgar/openedgar/clients/s3.py
+++ b/lexpredict_openedgar/openedgar/clients/s3.py
@@ -38,12 +38,6 @@
 
 # Setup logger
 logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
-console = logging.StreamHandler()
-console.setLevel(logging.INFO)
-formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s')
-console.setFormatter(formatter)
-logger.addHandler(console)
 
 
 class S3Client:
diff --git a/lexpredict_openedgar/openedgar/parsers/edgar.py b/lexpredict_openedgar/openedgar/parsers/edgar.py
index 06a31c8..2e1dcc5 100755
--- a/lexpredict_openedgar/openedgar/parsers/edgar.py
+++ b/lexpredict_openedgar/openedgar/parsers/edgar.py
@@ -44,12 +44,6 @@
 
 # Setup logger
 logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
-console = logging.StreamHandler()
-console.setLevel(logging.INFO)
-formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s')
-console.setFormatter(formatter)
-logger.addHandler(console)
 
 
 def uudecode(buffer: Union[bytes, str]):
@@ -125,7 +119,6 @@ def parse_index_file(file_name: str, double_gz: bool = False):
         with gzip.open(file_name, "rb") as index_file:
             index_buffer = index_file.read()
     except IOError as e:
-        logger.error("IOError parsing {0}: {1}".format(file_name, e))
         # Read as plain binary
         with open(file_name, "rb") as index_file:
             index_buffer = index_file.read()
@@ -135,6 +128,8 @@ def parse_index_file(file_name: str, double_gz: bool = False):
             index_buffer = zlib.decompress(index_buffer).decode("utf-8")
             logger.info("gz with valid header: decompressing {0} to {1} bytes.".format(file_name,
                                                                                        len(index_buffer)))
+        else:
+            logger.info("IOError parsing {0}: {1}".format(file_name, e))
 
         # Check for double-gz
         if double_gz:
@@ -218,6 +213,29 @@ def extract_filing_header_field(buffer: Union[bytes, str], field: str):
     return buffer[p0:p1].strip()
 
 
+def decode_filing(to_decode: Union[bytes, str]) -> Union[str, None]:
+    buffer = to_decode
+    if isinstance(buffer, bytes):
+        try:
+            # Start with UTF-8
+            buffer = str(buffer.decode("utf-8"))
+        except UnicodeDecodeError as _:
+            try:
+                # Fallback to ISO 8859-1
+                logger.warning("Falling back to ISO 8859-1 after failing to decode with UTF-8...")
+                buffer = str(buffer.decode("iso-8859-1"))
+            except UnicodeDecodeError as _:
+                try:
+                    # Fallback to ISO 8859-15
+                    logger.warning("Falling back to ISO 8859-15 after failing to decode with UTF-8...")
+                    buffer = str(buffer.decode("iso-8859-5"))
+                except UnicodeDecodeError as _:
+                    # Give up if we can't
+                    logger.error("Unable to decode with either UTF-8 or ISO 8859-1; giving up...")
+                    return None
+    return buffer
+
+
 def parse_filing(buffer: Union[bytes, str], extract: bool = False):
     """
     Parse a filing file by returning each document within
@@ -243,18 +261,9 @@ def parse_filing(buffer: Union[bytes, str], extract: bool = False):
 
     # Typing
     if isinstance(buffer, bytes):
-        try:
-            # Start with UTF-8
-            buffer = str(buffer.decode("utf-8"))
-        except UnicodeDecodeError as _:
-            try:
-                # Fallback to ISO 8859-1
-                logger.warning("Falling back to ISO 8859-1 after failing to decode with UTF-8...")
-                buffer = str(buffer.decode("iso-8859-1"))
-            except UnicodeDecodeError as _:
-                # Give up if we can't
-                logger.error("Unable to decode with either UTF-8 or ISO 8859-1; giving up...")
-                return filing_data
+        buffer = decode_filing(buffer)
+    if buffer is None:
+        return filing_data
 
     # Check for SEC-HEADER block
     if "<SEC-HEADER>" in buffer or "<IMS-HEADER>" in buffer:
diff --git a/lexpredict_openedgar/openedgar/process_text.py b/lexpredict_openedgar/openedgar/process_text.py
new file mode 100644
index 0000000..9e9244a
--- /dev/null
+++ b/lexpredict_openedgar/openedgar/process_text.py
@@ -0,0 +1,49 @@
+from typing import Union
+
+from bs4 import BeautifulSoup
+import re
+
+
+def html_to_text(html_doc: str) -> str:
+    """
+    Convert html/xml to the pure text
+    :param html_doc: the document to convert
+    :return:
+    """
+    soup = BeautifulSoup(html_doc, 'html.parser')
+    if soup.find('xbrl'):
+        return xbr(soup)
+    else:
+        return not_xbrl(soup)
+
+
+def not_xbrl(soup: BeautifulSoup) -> str:
+    """
+    Handle format not in XBLR
+    :param soup:
+    :return:
+    """
+    doc = ''
+    for string in soup.strings:
+        string = string.strip()
+        if len(string) > 0:
+            doc += string + '\n'
+    return doc
+
+
+def xbr(soup: BeautifulSoup) -> str:
+    """
+    Handle XBLR format for extracting all the text fields
+    :param soup:
+    :return:
+    """
+    doc = ""
+    for tag in soup.find_all(re.compile("[T,t]ext|[D,d]escription")):
+        if tag.string is not None:
+            inner_soup = BeautifulSoup(tag.string, 'html.parser')
+            string = ""
+            for string_inner in inner_soup.strings:
+                if string_inner is not None:
+                    string = string + " " + string_inner
+            doc += string + '\n'
+    return doc
diff --git a/lexpredict_openedgar/openedgar/processes/edgar.py b/lexpredict_openedgar/openedgar/processes/edgar.py
index fa4b082..582e423 100755
--- a/lexpredict_openedgar/openedgar/processes/edgar.py
+++ b/lexpredict_openedgar/openedgar/processes/edgar.py
@@ -28,6 +28,9 @@
 import os
 # Project
 import openedgar.clients.edgar
+from config.settings.base import DOWNLOAD_PATH
+from openedgar.clients.adl import ADLClient
+from openedgar.clients.blob import BlobClient
 from openedgar.clients.s3 import S3Client
 from openedgar.clients.local import LocalClient
 import openedgar.clients.local
@@ -37,37 +40,42 @@
 
 # Logging setup
 logger = logging.getLogger(__name__)
-logger.setLevel(logging.ERROR)
-console = logging.StreamHandler()
-console.setLevel(logging.ERROR)
-formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s')
-console.setFormatter(formatter)
-logger.addHandler(console)
 
 
-def download_filing_index_data(year: int = None):
+def download_filing_index_data(year: int = None, quarter: int = None, month: int = None):
     """
     Download all filing index data.
+    :param month:
+    :param quarter:
     :param year:
     :return:
     """
     # Get filing index list
     if year is not None:
-        filing_index_list = openedgar.clients.edgar.list_index_by_year(year)
+        if month is not None:
+            filing_index_list = openedgar.clients.edgar.list_index_by_month(year, month)
+        elif quarter is not None:
+            filing_index_list = openedgar.clients.edgar.list_index_by_quarter(year, quarter)
+        else:
+            filing_index_list = openedgar.clients.edgar.list_index_by_year(year)
     else:
         filing_index_list = openedgar.clients.edgar.list_index()
 
     path_list = []
     configured_client = os.environ["CLIENT_TYPE"]
-    logger.info(msg="Configured client is: {}".format(configured_client))
+    logger.info("Configured client is: {}".format(configured_client))
     path_prefix = str()
 
     if configured_client is None or configured_client == "S3":
         # Create S3 client
         download_client = S3Client()
+    elif configured_client == "ADL":
+        download_client = ADLClient()
+    elif configured_client == "Blob":
+        download_client = BlobClient()
     else:
         download_client = LocalClient()
-        path_prefix = os.environ["DOWNLOAD_PATH"]
+    path_prefix = DOWNLOAD_PATH
 
     # Now iterate through list to check if already on S3
     for filing_index_path in filing_index_list:
@@ -81,10 +89,10 @@ def download_filing_index_data(year: int = None):
         try:
             filing_index = FilingIndex.objects.get(edgar_url=filing_index_path)
             is_processed = filing_index.is_processed
-            logger.info("Index {0} already exists in DB.".format(filing_index_path))
+            logger.debug("Index {0} already exists in DB.".format(filing_index_path))
         except FilingIndex.DoesNotExist:
             is_processed = False
-            logger.info("Index {0} does not exist in DB.".format(filing_index_path))
+            logger.debug("Index {0} does not exist in DB.".format(filing_index_path))
 
         # Check if exists; download and upload to S3 if missing
         if not download_client.path_exists(file_path):
@@ -94,19 +102,19 @@ def download_filing_index_data(year: int = None):
             # Upload
             download_client.put_buffer(file_path, buffer)
 
-            logger.info("Retrieved {0} and uploaded to S3.".format(filing_index_path))
+            logger.debug("Retrieved {0} and uploaded to S3.".format(filing_index_path))
             path_list.append((file_path, True, is_processed))
         else:
-            logger.info("Index {0} already exists on S3.".format(filing_index_path))
+            logger.debug("Index {0} already exists on S3.".format(filing_index_path))
             path_list.append((file_path, False, is_processed))
 
     # Return list of updates
     return path_list
 
 
-def process_all_filing_index(year: int = None, form_type_list: Iterable[str] = None, new_only: bool = False,
-                             store_raw: bool = True,
-                             store_text: bool = True):
+def process_all_filing_index(year: int = None, quarter: int = None, month: int = None,
+                             form_type_list: Iterable[str] = None, new_only: bool = False,
+                             store_raw: bool = True, store_text: bool = True, store_processed: bool = True):
     """
     Process all filing index data.
     :type year: optional year to process
@@ -114,10 +122,11 @@ def process_all_filing_index(year: int = None, form_type_list: Iterable[str] = N
     :param new_only:
     :param store_raw:
     :param store_text:
+    :param store_processed:
     :return:
     """
     # Get the list of file paths
-    file_path_list = download_filing_index_data(year)
+    file_path_list = download_filing_index_data(year, quarter, month)
 
     client_type = os.environ["CLIENT_TYPE"] or "S3"
 
@@ -127,11 +136,11 @@ def process_all_filing_index(year: int = None, form_type_list: Iterable[str] = N
         if new_only and not is_processed:
             logger.info("Processing filing index for {0}...".format(s3_path))
             _ = process_filing_index.delay(client_type, s3_path, form_type_list=form_type_list, store_raw=store_raw,
-                                           store_text=store_text)
+                                           store_text=store_text, store_processed=store_processed)
         elif not new_only:
             logger.info("Processing filing index for {0}...".format(s3_path))
             _ = process_filing_index.delay(client_type, s3_path, form_type_list=form_type_list, store_raw=store_raw,
-                                           store_text=store_text)
+                                           store_text=store_text, store_processed=store_processed)
         else:
             logger.info("Skipping process_filing_index for {0}...".format(s3_path))
 
@@ -179,7 +188,7 @@ def search_filing_documents(term_list: Iterable[str], form_type_list: Iterable[s
                                           stem_search=stem_search)
         n += 1
 
-    logger.info("Searching {0} documents for {1} terms...".format(n, len(term_list)))
+    logger.debug("Searching {0} documents for {1} terms...".format(n, len(term_list)))
 
 
 def export_filing_document_search(search_query_id: int, output_file_path: str):
diff --git a/lexpredict_openedgar/openedgar/processes/s3.py b/lexpredict_openedgar/openedgar/processes/s3.py
index 3e3cee1..d5cdd9a 100755
--- a/lexpredict_openedgar/openedgar/processes/s3.py
+++ b/lexpredict_openedgar/openedgar/processes/s3.py
@@ -31,12 +31,6 @@
 
 # Setup logger
 logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
-console = logging.StreamHandler()
-console.setLevel(logging.INFO)
-formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s')
-console.setFormatter(formatter)
-logger.addHandler(console)
 
 
 def is_access_denied_file(remote_path: str, client=None):
diff --git a/lexpredict_openedgar/openedgar/tasks.py b/lexpredict_openedgar/openedgar/tasks.py
index e4b2380..1da38c2 100755
--- a/lexpredict_openedgar/openedgar/tasks.py
+++ b/lexpredict_openedgar/openedgar/tasks.py
@@ -26,9 +26,11 @@
 import datetime
 import hashlib
 import logging
+import sys
 import tempfile
 import os
 import pathlib
+import traceback
 from typing import Iterable, Union
 
 # Packages
@@ -37,7 +39,9 @@
 from celery import shared_task
 
 # Project
-from config.settings.base import S3_DOCUMENT_PATH
+from config.settings.base import S3_DOCUMENT_PATH, DOWNLOAD_PATH
+from openedgar.clients.adl import ADLClient
+from openedgar.clients.blob import BlobClient
 from openedgar.clients.s3 import S3Client
 from openedgar.clients.local import LocalClient
 import openedgar.clients.edgar
@@ -49,16 +53,13 @@
 import lexnlp.nlp.en.tokens
 
 # Logging setup
+from openedgar.process_text import html_to_text
+
 logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
-console = logging.StreamHandler()
-console.setLevel(logging.INFO)
-formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s')
-console.setFormatter(formatter)
-logger.addHandler(console)
 
 
-def create_filing_documents(client, documents, filing, store_raw: bool = True, store_text: bool = True):
+def create_filing_documents(client, documents, filing, store_raw: bool = True, store_text: bool = True,
+                            store_processed=True):
     """
     Create filing document records given a list of documents
     and a filing record.
@@ -66,28 +67,14 @@ def create_filing_documents(client, documents, filing, store_raw: bool = True, s
     :param filing: Filing record
     :param store_raw: whether to store raw contents
     :param store_text: whether to store text contents
+    :param store_processed: whether to extract the text from the contents
     :return:
     """
     # Get client if we're using S3
 
-
     # Iterate through documents
     document_records = []
     for document in documents:
-        # Create DB object
-        filing_doc = FilingDocument()
-        filing_doc.filing = filing
-        filing_doc.type = document["type"]
-        filing_doc.sequence = document["sequence"]
-        filing_doc.file_name = document["file_name"]
-        filing_doc.content_type = document["content_type"]
-        filing_doc.description = document["description"]
-        filing_doc.sha1 = document["sha1"]
-        filing_doc.start_pos = document["start_pos"]
-        filing_doc.end_pos = document["end_pos"]
-        filing_doc.is_processed = True
-        filing_doc.is_error = len(document["content"]) > 0
-        document_records.append(filing_doc)
 
         # Upload raw if requested
         if store_raw and len(document["content"]) > 0:
@@ -111,6 +98,32 @@ def create_filing_documents(client, documents, filing, store_raw: bool = True, s
                 logger.info("Text contents for filing={0}, sequence={1}, sha1={2} already exists on S3"
                             .format(filing, document["sequence"], document["sha1"]))
 
+        if store_processed and document["content_text"] is not None:
+            text_path = pathlib.Path(S3_DOCUMENT_PATH, "processed", document["sha1"]).as_posix()
+            if not client.path_exists(text_path):
+                text = html_to_text(document["content_text"])
+                client.put_buffer(text_path, text, write_bytes=False)
+                logger.info("Processed text contents for filing={0}, sequence={1}, sha1={2}"
+                            .format(filing, document["sequence"], document["sha1"]))
+            else:
+                logger.info("Processed text contents for filing={0}, sequence={1}, sha1={2} already exists"
+                            .format(filing, document["sequence"], document["sha1"]))
+
+        # Create DB object
+        filing_doc = FilingDocument()
+        filing_doc.filing = filing
+        filing_doc.type = document["type"]
+        filing_doc.sequence = document["sequence"]
+        filing_doc.file_name = document["file_name"]
+        filing_doc.content_type = document["content_type"]
+        filing_doc.description = document["description"]
+        filing_doc.sha1 = document["sha1"]
+        filing_doc.start_pos = document["start_pos"]
+        filing_doc.end_pos = document["end_pos"]
+        filing_doc.is_processed = True
+        filing_doc.is_error = len(document["content"]) > 0
+        document_records.append(filing_doc)
+
     # Create in bulk
     FilingDocument.objects.bulk_create(document_records)
     return len(document_records)
@@ -187,14 +200,17 @@ def create_filing_error(row, filing_path: str):
 
 @shared_task
 def process_filing_index(client_type: str, file_path: str, filing_index_buffer: Union[str, bytes] = None,
-                         form_type_list: Iterable[str] = None, store_raw: bool = False, store_text: bool = False):
+                         form_type_list: Iterable[str] = None, store_raw: bool = False, store_text: bool = False,
+                         store_processed: bool = False):
     """
     Process a filing index from an S3 path or buffer.
+    :param client_type:
     :param file_path: S3 or local path to process; if filing_index_buffer is none, retrieved from here
     :param filing_index_buffer: buffer; if not present, s3_path must be set
     :param form_type_list: optional list of form type to process
     :param store_raw:
     :param store_text:
+    :param store_processed:
     :return:
     """
     # Log entry
@@ -202,6 +218,10 @@ def process_filing_index(client_type: str, file_path: str, filing_index_buffer:
 
     if client_type == "S3":
         client = S3Client()
+    elif client_type == "ADL":
+        client = ADLClient()
+    elif client_type == "Blob":
+        client = BlobClient()
     else:
         client = LocalClient()
 
@@ -209,6 +229,9 @@ def process_filing_index(client_type: str, file_path: str, filing_index_buffer:
     if filing_index_buffer is None:
         logger.info("Retrieving filing index buffer for: {}...".format(file_path))
         filing_index_buffer = client.get_buffer(file_path)
+    if filing_index_buffer is None:
+        logger.error("SOMETHING WRONG! FILING IS NONE! Client {} file {}".format(client_type, file_path))
+        raise ValueError("Filing index is none!")
 
     # Write to disk to handle headaches
     temp_file = tempfile.NamedTemporaryFile(delete=False)
@@ -217,7 +240,7 @@ def process_filing_index(client_type: str, file_path: str, filing_index_buffer:
 
     # Get main filing data structure
     filing_index_data = openedgar.parsers.edgar.parse_index_file(temp_file.name)
-    logger.info("Parsed {0} records from index".format(filing_index_data.shape[0]))
+    logger.warning("Parsed {0} records from index".format(filing_index_data.shape[0]))
 
     # Iterate through rows
     bad_record_count = 0
@@ -240,9 +263,10 @@ def process_filing_index(client_type: str, file_path: str, filing_index_buffer:
             logger.info("Filing record already exists: {0}".format(filing))
         except Filing.MultipleObjectsReturned as e:
             # Create new filing record
-            logger.error("Multiple Filing records found for s3_path={0}, skipping...".format(filing_path))
-            logger.info("Raw exception: {0}".format(e))
-            continue
+            logger.warning("Multiple Filing records found for s3_path={0} .. Taking first one!".format(filing_path))
+            filing = Filing.objects.filter(s3_path=filing_path).first()
+            logger.info("Filing record already exists: {0}".format(filing))
+            logger.debug("Raw exception: {0}".format(e))
         except Filing.DoesNotExist as f:
             # Create new filing record
             logger.info("No Filing record found for {0}, creating...".format(filing_path))
@@ -260,7 +284,7 @@ def process_filing_index(client_type: str, file_path: str, filing_index_buffer:
                     continue
 
                 # Upload
-                client.put_buffer(filing_path, filing_buffer)
+                client.put_buffer("{}/{}".format(DOWNLOAD_PATH,filing_path), filing_buffer)
 
                 logger.info("Downloaded from EDGAR and uploaded to {}...".format(client_type))
             else:
@@ -269,7 +293,8 @@ def process_filing_index(client_type: str, file_path: str, filing_index_buffer:
                 filing_buffer = client.get_buffer(filing_path)
 
             # Parse
-            filing_result = process_filing(client, filing_path, filing_buffer, store_raw=store_raw, store_text=store_text)
+            filing_result = process_filing(client, filing_path, filing_buffer, store_raw=store_raw,
+                                           store_text=store_text, store_processed=store_processed)
             if filing_result is None:
                 logger.error("Unable to process filing.")
                 bad_record_count += 1
@@ -303,19 +328,19 @@ def process_filing_index(client_type: str, file_path: str, filing_index_buffer:
 
 @shared_task
 def process_filing(client, file_path: str, filing_buffer: Union[str, bytes] = None, store_raw: bool = False,
-                   store_text: bool = False):
+                   store_text: bool = False, store_processed: bool = False):
     """
     Process a filing from a path or filing buffer.
     :param file_path: path to process; if filing_buffer is none, retrieved from here
     :param filing_buffer: buffer; if not present, s3_path must be set
     :param store_raw:
     :param store_text:
+    :param store_processed
     :return:
     """
     # Log entry
     logger.info("Processing filing {0}...".format(file_path))
 
-
     # Check for existing record first
     try:
         filing = Filing.objects.get(s3_path=file_path)
@@ -332,6 +357,10 @@ def process_filing(client, file_path: str, filing_buffer: Union[str, bytes] = No
     if filing_buffer is None:
         logger.info("Retrieving filing buffer from S3...")
         filing_buffer = client.get_buffer(file_path)
+        filing_buffer = openedgar.parsers.edgar.decode_filing(filing_buffer)
+        if filing_buffer is None:
+            logger.error("Unable to read the filing {}".format(file_path))
+            return None
 
     # Get main filing data structure
     filing_data = openedgar.parsers.edgar.parse_filing(filing_buffer, extract=store_text)
@@ -409,13 +438,19 @@ def process_filing(client, file_path: str, filing_buffer: Union[str, bytes] = No
 
     # Create filing document records
     try:
-        create_filing_documents(client, filing_data["documents"], filing, store_raw=store_raw, store_text=store_text)
+        create_filing_documents(client, filing_data["documents"], filing, store_raw=store_raw, store_text=store_text,
+                                store_processed=store_processed)
         filing.is_processed = True
         filing.is_error = False
         filing.save()
         return filing
     except Exception as e:  # pylint: disable=broad-except
-        logger.error("Unable to create filing documents for {0}: {1}".format(filing, e))
+        logger.error("423: Unable to create filing documents for {0}: {1}qGG".format(filing, e))
+        exc_type, exc_value, exc_tb = sys.exc_info()
+        ex_txt = ""
+        for line in traceback.TracebackException(type(exc_type), exc_value, exc_tb).format(chain=None):
+            ex_txt += line + "\n"
+        logger.error(ex_txt)
         return None
 
 
@@ -429,8 +464,6 @@ def extract_filing(client, file_path: str, filing_buffer: Union[str, bytes] = No
     """
     # Get buffer
 
-
-
     if filing_buffer is None:
         logger.info("Retrieving filing buffer from S3...")
         filing_buffer = client.get_buffer(file_path)
diff --git a/lexpredict_openedgar/requirements/full.txt b/lexpredict_openedgar/requirements/full.txt
index f34945d..19e16c6 100755
--- a/lexpredict_openedgar/requirements/full.txt
+++ b/lexpredict_openedgar/requirements/full.txt
@@ -135,4 +135,5 @@ Werkzeug==0.14.1
 whitenoise==3.3.1
 widgetsnbextension==3.2.1
 wrapt==1.10.11
+beautifulsoup4==4.7.1
 https://github.com/LexPredict/lexpredict-lexnlp/archive/0.1.8.zip
\ No newline at end of file
diff --git a/tika/download_tika.sh b/tika/download_tika.sh
index 7fcce88..53910ea 100644
--- a/tika/download_tika.sh
+++ b/tika/download_tika.sh
@@ -1 +1,2 @@
-wget http://www-us.apache.org/dist/tika/tika-server-1.18.jar
+#!/bin/bash
+wget http://www-us.apache.org/dist/tika/tika-server-1.21.jar