diff --git a/.fprettifyrc b/.fprettifyrc
new file mode 100644
index 0000000000..348b4ec8bf
--- /dev/null
+++ b/.fprettifyrc
@@ -0,0 +1,6 @@
+[fprettify]
+indent = 6
+whitespace_only = true
+case = upper
+strict_indent = false
+align_comments = true
diff --git a/.gitignore b/.gitignore
index 1c7dd37270..f08047acfb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,9 +20,10 @@ objects.mk
 ##################
 config/compilation_helper.sh.inc
 lib/archive/git.list
-
 # Compiled source #
 ###################
+src/*/*.o_to_save
+ypp/*/*.o_to_save
 *.com
 *.class
 *.dll
@@ -37,13 +38,13 @@ lib/archive/git.list
 *.f90
 *.i
 *.s
-*.swp*
 *dSYM
 *bk
 
 # source #
 ##########
 lib/archive/Ydriver
+lib/archive/Ydriver-src
 lib/yambo/Ydriver/yambo/include/editor.h
 lib/yambo/Ydriver/yambo/include/version.h
 include/version/version.h
@@ -63,7 +64,7 @@ include/system/*
 lib/install/make_iotk.inc
 lib/iotk/make.sys
 lib/iotk/make_iotk.inc
-lib/ydiago/make_ydiago.inc
+lib/ldiago/make_ldiago.inc
 lib/yambo
 lib/*/*stamp
 lib/*/package-installed
@@ -83,7 +84,7 @@ lib/*/petsc-*
 lib/*/slepc-*
 lib/*/futile*
 lib/*/yaml*
-lib/*/Ydiago*
+lib/*/Ldiago*
 lib/*/devicexlib*
 lib/hdf5/hdf5*
 lib/external/*
@@ -136,6 +137,11 @@ nohup.out
 autom4te.cache
 
 #Scripts
-gfortran*.sh
-pgi*.sh
-nvidia*.sh
+compile_yambo_gcc
+gfortran*.sh*
+nvfortran*.sh*
+ifx*.sh*
+git_nmd.sh
+
+#Configuation files
+.vscode/*.json
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
new file mode 100644
index 0000000000..ed89667cf6
--- /dev/null
+++ b/.gitlab-ci.yml
@@ -0,0 +1,20 @@
+# The Docker image that will be used to build your app
+image: ubuntu:20.04
+variables:
+  DEBIAN_FRONTEND: "noninteractive"
+create-pages:
+  pages:
+    # The folder that contains the files to be exposed at the Page URL
+    publish: doc
+  rules:
+    # This ensures that only pushes to the default branch ($CI_DEFAULT_BRANCH) will trigger a pages deploy
+    - if: $CI_COMMIT_REF_NAME == "develop" 
+    - if: $CI_COMMIT_REF_NAME == "master" 
+  # Functions that should be executed before the build script is run 
+  script:
+    - apt-get -qq update
+    - apt-get -qq install apt-transport-https ca-certificates gnupg software-properties-common wget
+    - wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null
+    - apt-add-repository 'deb https://apt.kitware.com/ubuntu/ focal main'
+    - apt-get install -qq build-essential git gcc gfortran cmake libopenblas-dev libfftw3-dev libopenmpi-dev
+    - ./configure
diff --git a/.vscode/extensions.json_suggested b/.vscode/extensions.json_suggested
new file mode 100644
index 0000000000..6f4c0f02c0
--- /dev/null
+++ b/.vscode/extensions.json_suggested
@@ -0,0 +1,18 @@
+{
+    "recommendations": [
+      // Fortran language support
+      "krvajal.vscode-fortran-support",  
+      // Linting, IntelliSense, debugging (GDB)
+      "ms-vscode.cpptools",
+      // Python tests and helpers
+      "ms-python.python",
+      "ms-python.vscode-pylance",
+      // Debug (LLDB alternative)
+      "vadimcn.vscode-lldb",
+      // Color-coded comments (TODO, FIXME, etc.)
+      "aaron-bond.better-comments",
+      // Format Fortran code with fprettify
+      "fortran-lang.fortran-fprettify"
+    ]
+  }
+  
\ No newline at end of file
diff --git a/.vscode/launch.json_suggested b/.vscode/launch.json_suggested
new file mode 100644
index 0000000000..23b2943136
--- /dev/null
+++ b/.vscode/launch.json_suggested
@@ -0,0 +1,25 @@
+{
+    "version": "0.2.0",
+    "configurations": [
+      {
+        "name": "Debug Yambo",
+        "type": "cppdbg",
+        "request": "launch",
+        "program": "/root/codes/lumen/bin/yambo", // Adjust to your Yambo binary path
+        "args": ["-F", "02_COHSEX", "-J", "debug_run", "-C", "debug_run", "-I", "debug_run"], // Adjust to your Yambo files 
+        "stopAtEntry": false,
+        "cwd": "${workspaceFolder}",
+        "environment": [],
+        "externalConsole": false,
+        "MIMode": "gdb",
+        "miDebuggerPath": "/usr/bin/gdb",
+        "setupCommands": [
+          {
+            "description": "Enable pretty printing",
+            "text": "-enable-pretty-printing",
+            "ignoreFailures": true
+          }
+        ]
+      }
+    ]
+  } 
\ No newline at end of file
diff --git a/.vscode/settings.json_suggested b/.vscode/settings.json_suggested
new file mode 100644
index 0000000000..78d408d26c
--- /dev/null
+++ b/.vscode/settings.json_suggested
@@ -0,0 +1,58 @@
+{
+  // File associations for proper syntax highlighting
+  "files.associations": {
+    "*.F": "FortranFreeForm",
+    "*.F90": "FortranFreeForm",
+    "*.inc": "FortranFreeForm"
+  },
+  // GitHub Copilot activation for Fortran
+  "github.copilot.enable": {
+    "Fortran": true,
+    "Fortran77": true
+  },
+  // Fortran linter configuration
+  "fortran.linter.compiler": "gfortran",
+  "fortran.linter.compilerPath": "/usr/bin/gfortran",
+  "fortran.linter.extraArgs": [
+    "-Wall",
+    "-Wextra",
+    "-pedantic",
+    "-std=f2008",
+    "-cpp",
+    "-D_HDF5_LIB",
+    "-D_HDF5_IO",
+    "-D_MPI",
+    "-DYAMBO_ALLOC",
+    "-DDEV_SUB_ALT(x)=x##_cpu"
+  ],
+  "fortran.linter.includePaths": [
+    "${workspaceFolder}/src/modules/",
+    "${workspaceFolder}/src/parser/",
+    "${workspaceFolder}/include/",
+    "${workspaceFolder}/include/headers/common",
+    "${workspaceFolder}/include/version",
+    "${workspaceFolder}/include/system",
+    "${workspaceFolder}/include/headers/parser"
+  ],
+  "fortran.fortls.preprocessor.definitions": {
+    "_HDF5_LIB": "",
+    "_HDF5_IO": "",
+    "_MPI": "",
+    "YAMBO_ALLOC(A,x)": "allocate(A x)",
+  },
+  "fortran.fortls.path": "/home/reho0001/.local/bin/fortls",
+  // Optional formatter
+  "fortran.formatting.formatter": "fprettify",
+  "fortran.formatting.fprettifyArgs": [
+    "/root/.local/bin/fprettify"
+  ],
+  "fortran.formatting.path": "/root/.local/bin/fprettify", // Change me: Path to the fprettify executable
+  "editor.formatOnSave": true,
+  // Enable verbose logging
+  "fortran.logging.level": "Debug",
+  "fortran.experimental.keepInitDiagnostics": false,
+  //This is needed if fortls is installed via pipx
+  "terminal.integrated.env.linux": {
+    "PATH": "${env:HOME}/.local/bin:${env:PATH}"
+  }
+}
\ No newline at end of file
diff --git a/AUTHORS b/AUTHORS
index 7303ea16f3..d494f63c98 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -1,60 +1,61 @@
 
-ACTIVE DEVELOPERS
-==================
-
-   NAME SURNAME                 (INITIALS)    (since year)
-   ------------			----------    ------------
- * Andrea Marini                   (AM)  	 (2001)
- * Myrta Gruening                  (MG)  	 (2004)
- * Daniele Varsano                 (DV)  	 (2004)
- * Conor Hogan                     (CH)  	 (2005)
- * Maurizia Palummo                (MP)  	 (2005)
- * Claudio Attaccalite             (CA)  	 (2009)
- * Davide Sangalli                 (DS)  	 (2009)
- * Elena Cannuccia                 (EC)  	 (2011)
- * Andrea Ferretti                 (AF)  	 (2013)
- * Alejandro Molina-Sánchez        (AMS) 	 (2015)
- * Miki Bonacci                    (MB)  	 (2018)
- * Dario Alejandro Leon-Valido     (DALV)	 (2018)
- * Fulvio Paleari                  (FP)  	 (2018)
- * Nicola Spallanzani              (NS)  	 (2019)
- * Pino D’Amico                    (PDA)  	 (2020) 
- * Alberto Guandalini              (AG)  	 (2020)
- * Riccardo Reho                   (RR)  	 (2022)
- * Giacomo Sesti                   (GS)  	 (2022)
- * Blanca Mellado Pinto            (BM)          (2023)
- * Nalabothula Muralidhar          (NM)          (2024)
-
-FORMER DEVELOPERS
-==================
+DEVELOPERS
+===========
 
    NAME SURNAME 		(INITIALS)   (period of activity)
    ------------			----------   --------------------
- * David Kammerlader               (DK)  	 (2010-2012)
- * Fabio Affinito                  (FA)  	 (2013-2019)
+ * Davide Sangalli                 (DS)  	 (2009-NOW)
+ * Andrea Marini                   (AM)  	 (2001-2025)
+ * Claudio Attaccalite             (CA)  	 (2009-NOW)
+ * Andrea Ferretti                 (AF)  	 (2013-NOW)
+ * Henrique Miranda                (HM)  	 (2016-2018)
+ * Daniele Varsano                 (DV)  	 (2004-NOW)
+ * Fulvio Paleari                  (FP)  	 (2018-NOW)
+ * Conor Hogan                     (CH)  	 (2005-2015)
+ * Nalabothula Muralidhar          (NM)          (2024-NOW)
+ * Alberto Guandalini              (AG)  	 (2020-2023)
+ * Myrta Gruening                  (MG)  	 (2004-NOW)
+ * Nicola Spallanzani              (NS)  	 (2019-NOW)
+ * Miki Bonacci                    (MB)  	 (2018-2020)
+ * Dario Alejandro Leon-Valido     (DALV)	 (2018-2024)
+ * Riccardo Reho                   (RR)  	 (2022-NOW)
+ * Ignacio Martin Alliati          (IMA) 	 (2020-2024)
  * Pedro Melo                      (PM)  	 (2013-2022)
- * Ivan Marri                      (IM)  	 (2014-2018)
+ * Blanca Mellado Pinto            (BM)          (2023-2025)
+ * Elena Molteni                   (EM)  	 (2019-2021)
+ * Alejandro Molina-Sánchez        (AMS) 	 (2015-2022)
+ * Ivan Marri                      (IM)  	 (2014-2019)
+ * Pietro Bonfa’                   (PB)  	 (2018-2018)
+ * Giacomo Rossi                   (GR)          (2023-NOW)
+ * Ryan McMillan                   (RM)  	 (2015-2015)
+ * Antimo Marrazzo                 (AR)  	 (2016-2020)
+ * Laura Bellentani                (LB)          (2024-NOW)
+ * Giacomo Sesti                   (GS)  	 (2022-NOW)
+ * Petru Milev                     (PMI)         (2025-NOW)
+ * Torsten Geirsson                (TG)          (2025-NOW)
  * Margherita Marsili              (MM)  	 (2014-2022)
+ * Pino D’Amico                    (PDA)  	 (2020-2021)
+ * Maurizia Palummo                (MP)  	 (2005-2020)
+ * Elena Cannuccia                 (EC)  	 (2011-2020)
+ * Fabio Affinito                  (FA)  	 (2013-2019)
  * Mike Atambo                     (MA)  	 (2015-2018)
- * Pietro Bonfa’                   (PB)  	 (2015-2018)
- * Ryan McMillan                   (RM)  	 (2015-2017)
- * Antimo Marrazzo                 (AR)  	 (2016-2020)
- * Henrique Miranda                (HM)  	 (2016-2018)
- * Elena Molteni                   (EM)  	 (2019-2021)
- * Ignacio Martin Alliati          (IMA) 	 (2020-2024)
+ * David Kammerlader               (DK)  	 (2010-2012)
 
 ACKNOWLEDGEMENTS
 =================
 
-In all source files the Yambo developers are included with their initials.
-Andrea Marini is the original developer of the code and still the main responsible for the code development.
+In all source files the developers are included with their initials.
 
-For aknowledging the Yambo Team please refer to the following published articles:
+Davide Sangallli and Claudio Attaccalite are the founders of the Lumen fork
 
-- A Marini, et al. "Yambo: an ab initio tool for excited state calculations",
-  Computer Physics Communications 180 (8), 1392 (2009)
+Andrea Marini is the original developer of the Yambo code
+
+For aknowledging the developers Team please refer to the following published articles:
 
 - D Sangalli, et al. "Many-body perturbation theory calculations using the yambo code",
   Journal of physics: Condensed matter 31 (32), 325902 (2019)
 
+- A Marini, et al. "Yambo: an ab initio tool for excited state calculations",
+  Computer Physics Communications 180 (8), 1392 (2009)
+
 For more info please refer to the README file
diff --git a/Makefile b/Makefile
index 67dfc3d26d..9af29d6eb9 100644
--- a/Makefile
+++ b/Makefile
@@ -34,8 +34,6 @@ changelog:
 	./sbin/gitchangelog.py > ChangeLog
 interfaces:
 	@for target in $(INTERFCS) ; do $(MAKE) $(MAKEFLAGS) $$target; if test ! -f "$(bindir)/$$target"; then echo "$$target build failed"; exit 1;fi ; done
-gpl:
-	@for target in $(GPL)      ; do $(MAKE) $(MAKEFLAGS) $$target; if test ! -f "$(bindir)/$$target"; then echo "$$target build failed"; exit 1;fi ; done
 core:
 	@for target in $(CORE)     ; do $(MAKE) $(MAKEFLAGS) $$target; if test ! -f "$(bindir)/$$target"; then echo "$$target build failed"; exit 1;fi ; done
 ph-project:
diff --git a/README.md b/README.md
index d431de551e..d9713d73cd 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,6 @@
+## Lumen
+Lumen is a GPL code derived from Yambo 5.3. Here below the readme file from Yambo
+
 ## Yambo
 This is the distribution of the Yambo code.
 Yambo doesn't stand for anything like "Yet Another Many-Body cOde", for instance.  Unless you really want it to. 
diff --git a/config/configure.ac b/config/configure.ac
index 6f235d68f6..b9bc675a45 100644
--- a/config/configure.ac
+++ b/config/configure.ac
@@ -255,6 +255,7 @@ m4_include([config/m4/iotk.m4])
 m4_include([config/m4/etsf_io.m4])
 m4_include([config/m4/scalapack.m4])
 m4_include([config/m4/petsc_slepc.m4])
+m4_include([config/m4/magma.m4])
 m4_include([config/m4/libcuda.m4])
 m4_include([config/m4/device_xlib.m4])
 #
@@ -303,11 +304,14 @@ ACX_LIBCUDA
 # CUDA
 AC_SET_GPU
 # ============================================================================
+# Diago library
+AC_LDIAGO
+# ============================================================================
 # Device XLIB
 ACX_DEVXLIB
 # ============================================================================
-# Yambo Libs
-AC_YDIAGO
+# MAGMA
+AC_MAGMA_SETUP
 # ============================================================================
 # Prepare the REPORT file variables
 ACX_REPORT()
diff --git a/config/m4/acx_fortran_flags.m4 b/config/m4/acx_fortran_flags.m4
index 37797187dd..289a794e80 100644
--- a/config/m4/acx_fortran_flags.m4
+++ b/config/m4/acx_fortran_flags.m4
@@ -88,7 +88,12 @@ i?86*linux*)
        CPU_FLAG="-xHost"
        #CPU_FLAG=" "
        ;;
-      *2021* | *2022* | *2023* | *2024* | *2025* )
+      *2025* )
+       CPU_FLAG=" "
+       OMPFLAGS="-qopenmp"
+       FCMFLAG="-nofor-main"
+       ;;
+      *2021* | *2022* | *2023* | *2024* )
        CPU_FLAG=" "
        OMPFLAGS="-qopenmp -parallel"
        FCMFLAG="-nofor-main"
@@ -338,7 +343,13 @@ aarch*linux* | arm*linux* )
        #CPU_FLAG="-xHost"
        CPU_FLAG=" "
        ;;
-      *2020* | *2021* | *2022* | *2023* | *2024* | *2025* )
+      *2025* )
+       CPU_FLAG=" "
+       OMPFLAGS="-qopenmp"
+       FCMFLAG="-nofor-main"
+       CFLAGS="-O2 -std=gnu99"
+       ;;
+      *2020* | *2021* | *2022* | *2023* | *2024* )
        CPU_FLAG=" "
        OMPFLAGS="-qopenmp -parallel"
        FCMFLAG="-nofor-main"
diff --git a/config/m4/acx_report.m4 b/config/m4/acx_report.m4
index d8c19f765a..6495066baf 100644
--- a/config/m4/acx_report.m4
+++ b/config/m4/acx_report.m4
@@ -13,6 +13,9 @@ AC_DEFUN([ACX_REPORT],
 DP_check="-"
 if test "$enable_dp" = "yes" ; then DP_check="X"; fi
 #
+GAMMA_ONLY_check="-"
+if test "$enable_gamma_only" = "yes" ; then GAMMA_ONLY_check="X"; fi
+#
 KEEP_OBJS_check="-"
 if test "$enable_keep_objects" = "yes" ; then KEEP_OBJS_check="X"; fi
 #
@@ -124,14 +127,14 @@ if test "$internal_blacs" = "yes" ; then
   if test "$compile_blacs" = "no" ; then BLACS_check="I"; fi
 fi
 #
-if test "$compile_ydiago" = "yes"; then
-  if test x"$with_ydiago_branch" = "xnone"; then
-    YDIAGO_check="I";
+if test "$compile_ldiago" = "yes"; then
+  if test x"$with_ldiago_branch" = "xnone"; then
+    LDIAGO_check="I"; 
   else
-    YDIAGO_check="G";
+    LDIAGO_check="G"; 
   fi
 else
-  YDIAGO_check="-";
+  LDIAGO_check="-"; 
 fi
 #
 PETSC_check="-"
@@ -160,6 +163,14 @@ if test "$internal_libxc" = "yes" ; then
   if test "$compile_libxc" = "no" ; then LIBXC_check="I"; fi
 fi
 #
+MAGMA_check="-"
+if test "$internal_magma" = "yes" ; then
+  if test "$compile_magma" = "yes" ; then MAGMA_check="C"; fi
+  if test "$compile_magma" = "no"  ; then MAGMA_check="I"; fi
+elif test "$enable_magma" = "yes" ; then
+  MAGMA_check="E"
+fi
+#
 DEVXLIB_check="E"
 if test "$internal_devxlib" = "yes" ; then
   if test "$compile_devxlib" = "yes"; then DEVXLIB_check="C"; fi
@@ -217,6 +228,7 @@ if test "$hdf5" = "yes" ; then
 fi
 #
 AC_SUBST(DP_check)
+AC_SUBST(GAMMA_ONLY_check)
 AC_SUBST(KEEP_OBJS_check)
 AC_SUBST(TIME_profile_check)
 AC_SUBST(MEM_profile_check)
@@ -242,6 +254,7 @@ AC_SUBST(LAPACK_check)
 AC_SUBST(BLACS_check)
 AC_SUBST(SLK_check)
 AC_SUBST(ELPA_check)
+AC_SUBST(LDIAGO_check)
 AC_SUBST(PETSC_check)
 AC_SUBST(SLEPC_check)
 AC_SUBST(PETSC_info)
@@ -251,6 +264,7 @@ AC_SUBST(YDB_check)
 AC_SUBST(YPY_check)
 #
 AC_SUBST(LIBXC_check)
+AC_SUBST(MAGMA_check)
 AC_SUBST(DEVXLIB_check)
 AC_SUBST(LIBCUDA_check)
 AC_SUBST(MPI_check)
@@ -258,12 +272,14 @@ AC_SUBST(MPI_info)
 #
 # STRIPE [LIB] from paths
 #
-ACX_STRIPE_SUBPATH2($YDIAGO_LIBS,"LIB")
-YDIAGO_LIBS_R=$STRIPE
-ACX_STRIPE_SUBPATH2($YDIAGO_INCS,"INC")
-YDIAGO_INCS_R=$STRIPE
-AC_SUBST(YDIAGO_LIBS_R)
-AC_SUBST(YDIAGO_INCS_R)
+ACX_STRIPE_SUBPATH($LDIAGO_LIBS,"LIB")
+#ACX_STRIPE_SUBPATH2($LDIAGO_LIBS,"LIB")
+LDIAGO_LIBS_R=$STRIPE
+ACX_STRIPE_SUBPATH($LDIAGO_INCS,"INC")
+#ACX_STRIPE_SUBPATH2($LDIAGO_INCS,"INC")
+LDIAGO_INCS_R=$STRIPE
+AC_SUBST(LDIAGO_LIBS_R)
+AC_SUBST(LDIAGO_INCS_R)
 #
 ACX_STRIPE_SUBPATH($IOTK_LIBS,"LIB")
 IOTK_LIBS_R=$STRIPE
@@ -349,6 +365,13 @@ ELPA_INCS_R=$STRIPE
 AC_SUBST(ELPA_LIBS_R)
 AC_SUBST(ELPA_INCS_R)
 #
+ACX_STRIPE_SUBPATH($MAGMA_LIBS,"LIB")
+MAGMA_LIBS_R=$STRIPE
+ACX_STRIPE_SUBPATH($MAGMA_INCS,"INC")
+MAGMA_INCS_R=$STRIPE
+AC_SUBST(MAGMA_LIBS_R)
+AC_SUBST(MAGMA_INCS_R)
+#
 ACX_STRIPE_SUBPATH($BLACS_LIBS,"LIB")
 BLACS_LIBS_R=$STRIPE
 ACX_STRIPE_SUBPATH($BLACS_INCS,"INC")
@@ -370,6 +393,13 @@ SLEPC_INCS_R=$STRIPE
 AC_SUBST(SLEPC_LIBS_R)
 AC_SUBST(SLEPC_INCS_R)
 #
+ACX_STRIPE_SUBPATH($MAGMA_LIBS,"LIB")
+MAGMA_LIBS_R=$STRIPE
+ACX_STRIPE_SUBPATH($MAGMA_INCS,"INC")
+MAGMA_INCS_R=$STRIPE
+AC_SUBST(MAGMA_LIBS_R)
+AC_SUBST(MAGMA_INCS_R)
+#
 ACX_STRIPE_SUBPATH($LIBXC_LIBS,"LIB")
 LIBXC_LIBS_R=$STRIPE
 ACX_STRIPE_SUBPATH($LIBXC_INCS,"INC")
diff --git a/config/m4/acx_stripe_subpath.m4 b/config/m4/acx_stripe_subpath.m4
index 65180dadd0..e5fe3d7287 100644
--- a/config/m4/acx_stripe_subpath.m4
+++ b/config/m4/acx_stripe_subpath.m4
@@ -7,10 +7,10 @@
 #
 AC_DEFUN([ACX_STRIPE_SUBPATH],
 [
-TMP1=`echo $1 | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $1 | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [[ -z "${1// }" ]]; then
  STRIPE="$STRIPE ($2)"
 fi
@@ -18,10 +18,10 @@ fi
 
 AC_DEFUN([ACX_STRIPE_SUBPATH2],
 [
-TMP1=`echo $1 | sed 's/\//+/g'`
-TMP2=`echo $compdir | sed 's/\//+/g'`
+TMP1=`echo $1 | sed 's/\//\#/g'`
+TMP2=`echo $compdir | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(CMP\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [[ -z "${1// }" ]]; then
  STRIPE="$STRIPE ($2)"
 fi
diff --git a/config/m4/acx_version.m4 b/config/m4/acx_version.m4
index 77de35ea64..a1fd71bce1 100644
--- a/config/m4/acx_version.m4
+++ b/config/m4/acx_version.m4
@@ -8,6 +8,8 @@
 AC_DEFUN([ACX_VERSION],
 [
 cat << EOF > include/version/version.h
+#pragma once
+#define LUMEN_VERSION "$PACKAGE_STRING"
 #define YAMBO_VERSION $SVERSION
 #define YAMBO_SUBVERSION $SSUBVERSION
 #define YAMBO_PATCHLEVEL $SPATCHLEVEL
diff --git a/config/m4/gpu.m4 b/config/m4/gpu.m4
index e06d7466d3..b79a74d4a4 100644
--- a/config/m4/gpu.m4
+++ b/config/m4/gpu.m4
@@ -127,7 +127,7 @@ AC_ARG_WITH(rocm_path, [AS_HELP_STRING([--with-rocm-path=<path>],
             [Path to rocm install directory],[32])])
 #
 AC_ARG_WITH(mklgpu_libs, [AS_HELP_STRING([--with-mklgpu-libs=<libs>], 
-            [Use librocm library <libs>],[32])])
+            [Use the MKLGPU library <libs>],[32])])
 
 use_int_cuda_libs="no"
 use_gpu_libs="no"
diff --git a/config/m4/hdf5_f90.m4 b/config/m4/hdf5_f90.m4
index 95d2ff5934..7f1b4793b8 100644
--- a/config/m4/hdf5_f90.m4
+++ b/config/m4/hdf5_f90.m4
@@ -256,7 +256,7 @@ if test x"$enable_hdf5" = "xyes"; then
       compile_hdf5="no" ;
       AC_MSG_RESULT([already compiled]) ;
       #
-    elif test "$IO_LIB_VER" = "serial" && test -e "${NETCDF_HDF5_PAR_PATH}/lib/libhdf5.a"; then
+    elif test "$IO_LIB_VER" = "serial" &&  test "$mpibuild"  = "yes"  && test -e "${NETCDF_HDF5_PAR_PATH}/lib/libhdf5.a"; then
       #
       compile_hdf5="no" ;
       IO_LIB_VER="parallel";
diff --git a/config/m4/lapack.m4 b/config/m4/lapack.m4
index 949c41d927..27d17889d1 100644
--- a/config/m4/lapack.m4
+++ b/config/m4/lapack.m4
@@ -43,7 +43,7 @@ fi
 # LAPACK linked to by default?  (is sometimes included in BLAS lib)
 if test $acx_lapack_ok = no; then
         save_LIBS="$LIBS"; LIBS="$LIBS $BLAS_LIBS $FLIBS"
-        AC_CHECK_FUNC($cheev, [acx_lapack_ok=yes])
+        AC_CHECK_FUNC($cheev, [acx_lapack_ok=yes LAPACK_LIBS="$BLAS_LIBS"])
         LIBS="$save_LIBS"
 fi
 
diff --git a/config/m4/libxc.m4 b/config/m4/libxc.m4
index 56919791e5..dabf52d0fe 100644
--- a/config/m4/libxc.m4
+++ b/config/m4/libxc.m4
@@ -69,12 +69,14 @@ if test x"$acx_libxc_ok" = xno ; then
   if test -d "$with_libxc_path"; then
      libxc_incdir="$with_libxc_path/include"
      libxc_libdir="$with_libxc_path/lib"
+     libxc_lib64dir="$with_libxc_path/lib64"
   fi
   if test -d "$with_libxc_includedir"; then libxc_incdir="$with_libxc_includedir" ; fi
   if test -d "$with_libxc_libdir";     then libxc_libdir="$with_libxc_libdir" ; fi
   #
   # dynamic linkage, separate Fortran interface
-  if test ! -z "$libxc_libdir"; then LIBXC_LIBS="-L$libxc_libdir -lxcf90 -lxcf03 -lxc"; fi
+  if test ! -z "$libxc_libdir";  then LIBXC_LIBS="-L$libxc_libdir -lxcf90 -lxcf03 -lxc"; fi
+  if test ! -z "$libxc_lib64dir"; then LIBXC_LIBS="-L$libxc_lib64dir $LIBXC_LIBS"; fi
   if test ! -z "$libxc_incdir"; then LIBXC_INCS="$IFLAG$libxc_incdir"; fi
   #
   if test ! -z "$with_libxc_libs" ; then LIBXC_LIBS="$with_libxc_libs" ; fi
diff --git a/config/m4/magma.m4 b/config/m4/magma.m4
new file mode 100644
index 0000000000..7628bb25a8
--- /dev/null
+++ b/config/m4/magma.m4
@@ -0,0 +1,210 @@
+#
+#        Copyright (C) 2000-2022 the YAMBO team
+#              http://www.yambo-code.org
+#
+# Authors (see AUTHORS file for details): AM
+#
+# This file is distributed under the terms of the GNU
+# General Public License. You can redistribute it and/or
+# modify it under the terms of the GNU General Public
+# License as published by the Free Software Foundation;
+# either version 2, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will
+# be useful, but WITHOUT ANY WARRANTY; without even the
+# implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.  See the GNU General Public License
+# for more details.
+#
+# You should have received a copy of the GNU General Public
+# License along with this program; if not, write to the Free
+# Software Foundation, Inc., 59 Temple Place - Suite 330,Boston,
+# MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+#
+AC_DEFUN([AC_MAGMA_SETUP],[
+#
+AC_ARG_ENABLE(magma,   AS_HELP_STRING([--enable-magma],[Enable suport for the BSE diagonalization using MAGMA. Default is no]))
+#
+AC_ARG_WITH(magma_libs,AS_HELP_STRING([--with-magma-libs=<libs>],[Use Magma libraries <libs>],[32]))
+AC_ARG_WITH(magma_incs,AS_HELP_STRING([--with-magma-incs=<incs>],[Use Magma includes <incs>],[32]))
+AC_ARG_WITH(magma_path, AS_HELP_STRING([--with-magma-path=<path>],[Path to the Magma install directory],[32]),[],[])
+AC_ARG_WITH(magma_libdir,AS_HELP_STRING([--with-magma-libdir=<path>],[Path to the Magma lib directory],[32]))
+AC_ARG_WITH(magma_includedir,AS_HELP_STRING([--with-magma-includedir=<path>],[Path to the Magma include directory],[32]))
+
+#
+def_magma=""
+magma="no"
+internal_magma="no"
+compile_magma="no"
+compile_magma_fmodules="no"
+#
+if test x"$enable_magma" = "x"; then enable_magma="no" ; fi
+#if test x"$enable_magma" = "xyes"; then enable_magma="yes"; fi
+#
+# MAGMA global options
+#
+if test  x"$with_magma_libs" = "xyes" ; then
+  enable_magma="yes" ;
+  compile_magma_fmodules="yes" ;
+  with_magma_libs="";
+elif test  x"$with_magma_libs" = "xno" ; then
+  enable_magma="no" ;
+  compile_magma_fmodules="no" ;
+  with_magma_libs="";
+fi
+#
+if test  x"$with_magma_libdir" != "x" ; then enable_magma="yes" ; fi
+if test  x"$with_magma_path"   != "x" ; then enable_magma="yes" ; fi
+if test  x"$with_magma_libs"   != "x" ; then enable_magma="yes" ; fi
+#
+# Set MAGMA LIBS and FLAGS from INPUT
+#
+if test -d "$with_magma_path" || test -d "$with_magma_libdir" || test x"$with_magma_libs" != "x" ; then
+  #
+  # external magma
+  #
+  if   test   x"$with_magma_libs" != "x" ; then  AC_MSG_CHECKING([for Magma using $with_magma_libs]) ;
+  elif test -d "$with_magma_libdir"      ; then  AC_MSG_CHECKING([for Magma in $with_magma_libdir]) ;
+  elif test -d "$with_magma_path"        ; then  AC_MSG_CHECKING([for Magma in $with_magma_path/lib]) ;
+  fi
+  #
+  if test -d "$with_magma_path" ; then 
+      try_magma_libdir="$with_magma_path/lib" ;
+      try_magma_incdir="$with_magma_path/include" ;
+  fi
+  #
+  if test -d "$with_magma_libdir"     ; then try_magma_libdir="$with_magma_libdir" ; fi
+  if test -d "$with_magma_includedir" ; then try_magma_incdir="$with_magma_includedir" ; fi
+  #
+  try_MAGMA_INCS="$IFLAG$try_magma_incdir" ;
+  try_MAGMA_LIBS="-L$try_magma_libdir -lmagma" ;
+  #
+  if test x"$with_magma_libs" != "x" ; then try_MAGMA_LIBS="$with_magma_libs" ; fi
+  if test x"$with_magma_incs" != "x" ; then try_MAGMA_INCS="$with_magma_incs" ; fi
+  #
+  if test -z "$try_MAGMA_LIBS" ; then AC_MSG_ERROR([No libs specified]) ; fi
+  if test -z "$try_MAGMA_INCS" ; then AC_MSG_ERROR([No include-dir specified]) ; fi
+  #
+  AC_LANG([Fortran])
+  #
+  save_fcflags="$FCFLAGS" ;
+  save_libs="$LIBS" ;
+  #
+  FCFLAGS="$try_MAGMA_INCS $save_fcflags";
+  LIBS="$try_MAGMA_LIBS $save_libs";
+  #
+  # check for magma with fortran-interfaces
+  AC_COMPILE_IFELSE(AC_LANG_PROGRAM([], [
+    use magma
+    implicit none
+    integer :: lda
+    !magma_devptr_t :: dA]),
+       [magmaf=yes], [magmaf=no]);
+  #
+  # check for c-style magma
+  AC_COMPILE_IFELSE(AC_LANG_PROGRAM([], [
+    ierr = magma_init(); ]),
+       [magmac=yes], [magmac=no]);
+  #
+  AC_MSG_RESULT([Library: $magmac;  Fortran support: $magmaf]) ;
+  #
+  if test "x$magmaf" = "xyes"; then
+    #
+    MAGMA_INCS="$try_MAGMA_INCS" ;
+    MAGMA_LIBS="$try_MAGMA_LIBS" ;
+    magma=yes;
+    compile_magma="no";
+    internal_magma="no";
+    def_magma="-D_MAGMA"
+    #
+  elif test "x$magmac" = "xyes"; then
+    #
+    MAGMA_LIBS="$try_MAGMA_LIBS" ;
+    MAGMA_INCS="${IFLAG}${extlibs_path}/${FCKIND}/${FC}/include" ; 
+    magma=yes;
+    compile_magma="no";
+    compile_magma_fmodules="yes";
+    def_magma="-D_MAGMA"
+    #
+  else
+    #
+    magma=no;
+    #
+  fi
+  # 
+  FCFLAGS="$save_fcflags" ;
+  LIBS="$save_libs" ;
+  # 
+fi
+#
+# TO BE FIXED: needs internal compilation support and paths 
+#              have to be corrected with GPU_SUPPORT folder
+#
+# Internal compilation
+#
+if test "x$enable_magma" = "xyes" && test "x$magma" = "xno" ; then
+  #
+  # internal magma
+  #
+  AC_MSG_CHECKING([for internal Magma library])
+  #
+  internal_magma="yes"
+  #
+  #if test "x$lapack_shared" = "x1" ; then
+  #  MAGMA_LIBS="${extlibs_path}/${FCKIND}/${FC}/lib/libmagma.so" ;
+  #  #MAGMA_LIBS="" ;
+  #else
+    MAGMA_LIBS="${extlibs_path}/${FCKIND}/${FC}/lib/libmagma.a" ;
+  #fi
+  MAGMA_INCS="${IFLAG}${extlibs_path}/${FCKIND}/${FC}/include" ;
+  #
+  magma=yes
+  def_magma="-D_MAGMA"
+  if test -e "${extlibs_path}/${FCKIND}/${FC}/lib/libmagma.a" ; then
+    compile_magma="no" ;
+    compile_magma_fmodules="no" ;
+    AC_MSG_RESULT([already compiled]) ;
+  elif test -e "${extlibs_path}/${FCKIND}/${FC}/lib/libmagma.so" ; then
+    compile_magma="no" ;
+    compile_magma_fmodules="no" ;
+    AC_MSG_RESULT([already compiled]) ;
+  else
+    compile_magma="yes" ;
+    compile_magma_fmodules="no" ;
+    #
+    NVIDIA_ARCH=
+    if test "x$with_cuda_cc" != "x" ; then
+       if test "$with_cuda_cc" -ge 60 &&  ! test "$with_cuda_cc" -ge 70 ; then NVIDIA_ARCH=Pascal ; fi
+       if test "$with_cuda_cc" -ge 70 &&  ! test "$with_cuda_cc" -ge 80 ; then NVIDIA_ARCH=Volta ; fi
+       if test "$with_cuda_cc" -ge 80 &&  ! test "$with_cuda_cc" -ge 90 ; then NVIDIA_ARCH=Hopper ; fi
+    fi
+    #
+    AC_MSG_RESULT([Compatible external Magma not found/specified. To be compiled.]) ;
+    AC_SUBST(NVIDIA_ARCH)
+    AC_CONFIG_FILES([lib/magma/make_magma.inc:lib/magma/make_magma.inc.in])
+  fi
+  #
+fi
+
+#
+# switch off internal magma compilation
+#
+deactivate_internal=no
+if test "x$compile_magma" = "xyes" && test "x$internal_magma" = "xyes" && test "x$deactivate_internal" = "xyes"  ; then
+  AC_MSG_RESULT([Internal Magma compilation not available yet. Deactivating it.]) ;
+  compile_magma="no"
+  def_magma=""
+  enable_magma="no"
+  MAGMA_INCS="" ;
+  MAGMA_LIBS="" ;
+fi
+#
+AC_SUBST(MAGMA_LIBS)
+AC_SUBST(MAGMA_INCS)
+AC_SUBST(def_magma)
+AC_SUBST(enable_magma)
+AC_SUBST(compile_magma)
+AC_SUBST(compile_magma_fmodules)
+AC_SUBST(internal_magma)
+#
+])
diff --git a/config/m4/netcdf_f90.m4 b/config/m4/netcdf_f90.m4
index 7e1cb7b1cd..57393cc4de 100644
--- a/config/m4/netcdf_f90.m4
+++ b/config/m4/netcdf_f90.m4
@@ -263,7 +263,7 @@ if test x"$enable_hdf5" = "xyes"; then
       compile_netcdf="no" ;
       AC_MSG_RESULT([already compiled]) ;
       #
-    elif test "$IO_LIB_VER" = "serial" && test -e "${NETCDF_HDF5_PAR_PATH}/lib/libnetcdf.a" && test -e "${NETCDF_HDF5_PAR_PATH}/lib/libnetcdff.a" && test -e "${NETCDF_HDF5_PAR_PATH}/lib/libhdf5.a"; then
+    elif test "$IO_LIB_VER" = "serial" &&  test "$mpibuild"  = "yes"   && test -e "${NETCDF_HDF5_PAR_PATH}/lib/libnetcdf.a" && test -e "${NETCDF_HDF5_PAR_PATH}/lib/libnetcdff.a" && test -e "${NETCDF_HDF5_PAR_PATH}/lib/libhdf5.a"; then
       #
       compile_netcdf="no" ;
       IO_LIB_VER="parallel";
diff --git a/config/m4/petsc_slepc.m4 b/config/m4/petsc_slepc.m4
index e8fe9d485d..6740b27be9 100644
--- a/config/m4/petsc_slepc.m4
+++ b/config/m4/petsc_slepc.m4
@@ -38,6 +38,7 @@ compile_slepc="no"
 #
 AC_LANG_PUSH(C)
 AC_CHECK_LIB(dl, dlopen,  [use_libdl="yes";  ],[use_libdl="no";  ],[])
+AC_CHECK_LIB(stdc++, main, [use_libstdc="yes";], [use_libstdc="no";], [])
 AC_LANG_POP(C)
 #
 if test x"$enable_slepc_linalg" = "xyes"; then
@@ -81,7 +82,8 @@ if test -d "$with_petsc_path" || test -d "$with_petsc_libdir" || test x"$with_pe
   try_PETSC_INCS="$IFLAG$try_petsc_incdir" ;
   try_PETSC_LIBS="-L$try_petsc_libdir -lpetsc" ;
   #
-  if test "$use_libdl"    = "yes"; then try_PETSC_LIBS="$try_PETSC_LIBS -ldl -lstdc++"   ; fi
+  if test "$use_libdl"    = "yes"; then try_PETSC_LIBS="$try_PETSC_LIBS -ldl"   ; fi
+  if test "$use_libstdc"  = "yes"; then try_PETSC_LIBS="$try_PETSC_LIBS -lstdc++"   ; fi
   #
   if test x"$with_petsc_libs" != "x" ; then try_PETSC_LIBS="$with_petsc_libs" ; fi
   if test x"$with_petsc_incs" != "x" ; then try_PETSC_INCS="$with_petsc_incs" ; fi
@@ -137,11 +139,11 @@ if test "x$enable_petsc" = "xyes" && test "x$petsc" = "xno" ; then
   fi
   PETSC_INCS="${IFLAG}${extlibs_path}/${FCKIND}/${FC}/${build_precision}/include" ;
   #
-  if test "$use_libdl"    = "yes"; then PETSC_LIBS="$PETSC_LIBS -ldl -lstdc++"   ; fi
-  #
   petsc=yes
   if test -e "$PETSC_LIBS_DN" ; then
     PETSC_LIBS="$PETSC_LIBS_DN" ;
+    if test "$use_libdl"    = "yes"; then PETSC_LIBS="$PETSC_LIBS -ldl"   ; fi
+    if test "$use_libstdc"  = "yes"; then PETSC_LIBS="$PETSC_LIBS -lstdc++"   ; fi
     compile_petsc="no" ;
     if test "x$lapack_shared" = "x1" ; then
       AC_MSG_RESULT([dynamic already compiled]) ;
@@ -151,6 +153,8 @@ if test "x$enable_petsc" = "xyes" && test "x$petsc" = "xno" ; then
     fi
   elif test -e "$PETSC_LIBS_ST" ; then
     PETSC_LIBS="$PETSC_LIBS_ST" ;
+    if test "$use_libdl"    = "yes"; then PETSC_LIBS="$PETSC_LIBS -ldl"   ; fi
+    if test "$use_libstdc"  = "yes"; then PETSC_LIBS="$PETSC_LIBS -lstdc++"   ; fi
     compile_petsc="no" ;
     if test "x$lapack_shared" = "x1" ; then
       AC_MSG_RESULT([static found, despite dynamic lapack.]) ;
diff --git a/config/m4/scalapack.m4 b/config/m4/scalapack.m4
index fbbab4780d..942520a099 100644
--- a/config/m4/scalapack.m4
+++ b/config/m4/scalapack.m4
@@ -9,34 +9,28 @@
 AC_DEFUN([AC_SLK_SETUP],[
 
 AC_ARG_ENABLE(par_linalg,   AS_HELP_STRING([--enable-par-linalg],[Use parallel linear algebra. Default is no]))
-AC_ARG_WITH(blacs_libs,    [AS_HELP_STRING([--with-blacs-libs=(libs|mkl)],[Use BLACS libraries <libs> or setup MKL],[32])])
 AC_ARG_WITH(scalapack_libs,[AS_HELP_STRING([--with-scalapack-libs=(libs|mkl)],[Use SCALAPACK libraries <libs> or setup MKL],[32])])
 AC_ARG_WITH(elpa_libs,     [AS_HELP_STRING([--with-elpa-libs=(libs)],[Use ELPA libraries <libs>],[32])])
 AC_ARG_WITH(elpa_includedir,AS_HELP_STRING([--with-elpa-includedir=<path>],[Path to the elpa include directory],[32]))
 
 SCALAPACK_LIBS=""
-BLACS_LIBS=""
 ELPA_LIBS=""
 ELPA_INCS=""
 
 reset_LIBS="$LIBS"
 
 enable_scalapack="no"
-enable_blacs="no"
 enable_elpa="no"
 
 internal_slk="no"
-internal_blacs="no"
 internal_elpa="no"
 
 compile_slk="no"
-compile_blacs="no"
 compile_elpa="no"
 
 #
-# Set fortran linker names of BLACS/SCALAPACK functions to check for.
+# Set fortran linker names of SCALAPACK functions to check for.
 #
-blacs_routine="blacs_set"
 scalapack_routine="pcheev"
 elpa_routine="elpa_init"
 mpi_routine=MPI_Init
@@ -78,23 +72,9 @@ fi
 # Parse configure options
 #
 if test "$enable_par_linalg" = "yes" ; then
-  enable_blacs="internal" ;
   enable_scalapack="internal" ;
 fi
 #
-case $with_blacs_libs in
-  yes) enable_blacs="internal" ;;
-  no)  enable_blacs="no" ; enable_par_linalg="no" ;;
-  mkl) 
-    if test "$try_mkl_scalapack" = "no" ; then
-       enable_blacs="no" ; enable_par_linalg="no" 
-    else
-       enable_blacs="check"; BLACS_LIBS="$try_mkl_scalapack" 
-    fi
-    ;;
-  *) enable_blacs="check"; BLACS_LIBS="$with_blacs_libs" ;;
-esac
-#
 case $with_scalapack_libs in
   yes) enable_scalapack="internal" ;;
   no) enable_scalapack="no" ; enable_par_linalg="no" ;;
@@ -116,27 +96,10 @@ esac
 #
 if test "$mpibuild"  = "yes"; then
   #
-  if test "$enable_blacs" = "check" ; then
-    #
-    acx_blacs_save_LIBS="$BLACS_LIBS"
-    LIBS="$LIBS $FLIBS $LAPACK_LIBS $BLAS_LIBS"
-    # First, check BLACS_LIBS environment variable
-    if test "x$BLACS_LIBS" != x; then
-      save_LIBS="$LIBS"; LIBS="$BLACS_LIBS $LIBS"
-      AC_MSG_CHECKING([for $blacs_routine in $BLACS_LIBS])
-      AC_TRY_LINK_FUNC($blacs_routine, [enable_blacs="yes"], [enable_blacs="internal"; BLACS_LIBS=""])
-      AC_MSG_RESULT($enable_blacs)
-      BLACS_LIBS="$acx_blacs_save_LIBS"
-      LIBS="$save_LIBS"
-    else
-      enable_blacs="no";
-    fi
-    #
-  fi
   #
   if test "$enable_scalapack" = "check" ; then
     acx_scalapack_save_LIBS="$SCALAPACK_LIBS"
-    LIBS="$LIBS $FLIBS $LAPACK_LIBS $BLAS_LIBS $BLACS_LIBS"
+    LIBS="$LIBS $FLIBS $LAPACK_LIBS $BLAS_LIBS"
     # First, check SCALAPACK_LIBS environment variable
     if test "x$SCALAPACK_LIBS" != x; then
       save_LIBS="$LIBS"; LIBS="$SCALAPACK_LIBS $LIBS"
@@ -152,7 +115,7 @@ if test "$mpibuild"  = "yes"; then
   #
   if test "$enable_elpa" = "check" ; then
     acx_elpa_save_LIBS="$ELPA_LIBS"
-    LIBS="$LIBS $FLIBS $SCALAPACK_LIBS $LAPACK_LIBS $BLAS_LIBS $BLACS_LIBS"
+    LIBS="$LIBS $FLIBS $SCALAPACK_LIBS $LAPACK_LIBS $BLAS_LIBS"
     # First, check ELPA_LIBS environment variable
     if test "x$ELPA_LIBS" != x; then
       save_LIBS="$LIBS"; LIBS="$ELPA_LIBS $LIBS"
@@ -173,10 +136,8 @@ if test "$mpibuild"  = "yes"; then
   #
   if test x"$enable_par_linalg" = "xyes"; then
     if test x"$enable_int_linalg" = "xyes"; then
-      enable_blacs="internal";
       enable_scalapack="internal";
     else
-      if test "$enable_blacs"     = "no"; then enable_blacs="internal"    ; fi
       if test "$enable_scalapack" = "no"; then enable_scalapack="internal"; fi
     fi
   fi
@@ -189,16 +150,6 @@ if test "$mpibuild"  = "yes"; then
   #  fi
   #fi
   #
-  if test "$mpif_found" = "yes" && test "$enable_blacs" = "internal"; then
-    enable_blacs="yes";
-    internal_blacs="yes";
-    BLACS_LIBS="${extlibs_path}/${FCKIND}/${FC}/lib/libblacs.a ${extlibs_path}/${FCKIND}/${FC}/lib/libblacs_C_init.a ${extlibs_path}/${FCKIND}/${FC}/lib/libblacs_init.a";
-    if test -e "${extlibs_path}/${FCKIND}/${FC}/lib/libblacs.a" && test -e "${extlibs_path}/${FCKIND}/${FC}/lib/libblacs_init.a"; then
-      compile_blacs="no"
-    else
-      compile_blacs="yes"
-    fi
-  fi
   #
   if test "$mpif_found" = "yes" && test "$enable_scalapack" = "internal"; then
     enable_scalapack="yes"
@@ -225,7 +176,7 @@ if test "$mpibuild"  = "yes"; then
   #
 fi
 #
-if test "$enable_blacs" = "yes" && test "$enable_scalapack" = "yes" ; then
+if test "$enable_scalapack" = "yes" ; then
   def_scalapack="-D_SCALAPACK"
   if test "$enable_elpa" = "yes" ; then
     def_elpa="-D_ELPA"
@@ -239,25 +190,20 @@ if test "$enable_blacs" = "yes" && test "$enable_scalapack" = "yes" ; then
   fi
 else
   enable_scalapack="no"
-  enable_blacs="no"
   enable_elpa="no"
   def_scalapack=""
   def_elpa=""
-  BLACS_LIBS=""
   SCALAPACK_LIBS=""
   ELPA_LIBS=""
   ELPA_INCS=""
-  compile_blacs="no"
   compile_slk="no"
   compile_elpa="no"
-  internal_blacs="no"
   internal_slk="no"
   internal_elpa="no"
 fi
 #
 LIBS="$reset_LIBS"
 #
-AC_SUBST(BLACS_LIBS)
 AC_SUBST(SCALAPACK_LIBS)
 AC_SUBST(ELPA_LIBS)
 AC_SUBST(ELPA_INCS)
@@ -267,8 +213,6 @@ AC_SUBST(def_scalapack)
 AC_SUBST(def_elpa)
 AC_SUBST(compile_slk)
 AC_SUBST(internal_slk)
-AC_SUBST(compile_blacs)
-AC_SUBST(internal_blacs)
 AC_SUBST(compile_elpa)
 AC_SUBST(internal_elpa)
 
diff --git a/config/m4/yambo_diago.m4 b/config/m4/yambo_diago.m4
index 46ef4a9496..952a32e3ba 100644
--- a/config/m4/yambo_diago.m4
+++ b/config/m4/yambo_diago.m4
@@ -5,46 +5,46 @@
 #
 # Authors (see AUTHORS file for details): AM
 #
-AC_DEFUN([AC_YDIAGO],[
+AC_DEFUN([AC_LDIAGO],[
 
 #
-AC_ARG_WITH(ydiago-branch,[AS_HELP_STRING([--with-ydiago-branch=<branch>],[Use the <branch> of the ydiago repository.],[32])],,[with_ydiago_branch=none])
+AC_ARG_WITH(ldiago-branch,[AS_HELP_STRING([--with-ldiago-branch=<branch>],[Use the <branch> of the ldiago repository.],[32])],,[with_ldiago_branch=none])
 #
-AC_CONFIG_FILES([lib/ydiago/make_ydiago.inc:lib/ydiago/make_ydiago.inc.in])
+AC_CONFIG_FILES([lib/ldiago/make_ldiago.inc:lib/ldiago/make_ldiago.inc.in])
 
 if test "$def_scalapack" = "-D_SCALAPACK" ; then
 
-  compile_ydiago="yes"
+  compile_ldiago="yes"
   
-  # GPU flags are passed to ydiago compilation only if elpa library is available
+  # GPU flags are passed to ldiago compilation only if elpa library is available
   if test ! x"$def_gpu" = "x" && test "$def_elpa" = "-D_ELPA" ; then
-    ydiago_opt="$def_gpu $def_elpa"
-    YDIAGO_GPU_SUPPORT="$GPU_SUPPORT"
+    ldiago_opt="$def_gpu $def_elpa"
+    LDIAGO_GPU_SUPPORT="$GPU_SUPPORT"
   else
-    ydiago_opt="$def_elpa"
-    YDIAGO_GPU_SUPPORT="no_gpu"
+    ldiago_opt="$def_elpa"
+    LDIAGO_GPU_SUPPORT="no_gpu"
   fi
 
-  YDIAGO_LIBS="${extlibs_path}/${FCKIND}/${FC}/${YDIAGO_GPU_SUPPORT}/lib/libydiago.a"
-  YDIAGO_INCS="$IFLAG${extlibs_path}/${FCKIND}/${FC}/${YDIAGO_GPU_SUPPORT}/include/"
-  #YDIAGO_LIBS="${compdir}/lib/libydiago.a"
-  #YDIAGO_INCS="$IFLAG${compdir}/include/"
+  LDIAGO_LIBS="${extlibs_path}/${FCKIND}/${FC}/diago/${LDIAGO_GPU_SUPPORT}/lib/libldiago.a"
+  LDIAGO_INCS="$IFLAG${extlibs_path}/${FCKIND}/${FC}/diago/${LDIAGO_GPU_SUPPORT}/include/"
+  #LDIAGO_LIBS="${compdir}/lib/libldiago.a"
+  #LDIAGO_INCS="$IFLAG${compdir}/include/"
 
 else
 
-  compile_ydiago="no"
-  ydiago_opt=""
-  YDIAGO_LIBS=""
-  YDIAGO_INCS=""
+  compile_ldiago="no"
+  ldiago_opt=""
+  LDIAGO_LIBS=""
+  LDIAGO_INCS=""
 
 fi
 
-AC_SUBST(YDIAGO_LIBS)
-AC_SUBST(YDIAGO_INCS)
-AC_SUBST(YDIAGO_GPU_SUPPORT)
+AC_SUBST(LDIAGO_LIBS)
+AC_SUBST(LDIAGO_INCS)
+AC_SUBST(LDIAGO_GPU_SUPPORT)
 
-AC_SUBST(compile_ydiago)
-AC_SUBST(ydiago_opt)
-AC_SUBST(with_ydiago_branch)
+AC_SUBST(compile_ldiago)
+AC_SUBST(ldiago_opt)
+AC_SUBST(with_ldiago_branch)
 
 ])
diff --git a/config/m4/yambo_folders.m4 b/config/m4/yambo_folders.m4
index 29516f9485..80b39b6002 100644
--- a/config/m4/yambo_folders.m4
+++ b/config/m4/yambo_folders.m4
@@ -112,9 +112,9 @@ if ! test -d "$extlibs_path/${FCKIND}/${FC}/${GPU_SUPPORT}/lib";       then mkdi
 if ! test -d "$extlibs_path/${FCKIND}/${FC}/${GPU_SUPPORT}/include";   then mkdir    "$extlibs_path/${FCKIND}/${FC}/${GPU_SUPPORT}/include";    fi
 #
 if ! test -d "$extlibs_path/${FCKIND}/${FC}/diago";                                 then mkdir    "$extlibs_path/${FCKIND}/${FC}/diago";                                  fi
-if ! test -d "$extlibs_path/${FCKIND}/${FC}/diago/${YDIAGO_GPU_SUPPORT}";           then mkdir    "$extlibs_path/${FCKIND}/${FC}/diago/${YDIAGO_GPU_SUPPORT}";            fi
-if ! test -d "$extlibs_path/${FCKIND}/${FC}/diago/${YDIAGO_GPU_SUPPORT}/lib";       then mkdir    "$extlibs_path/${FCKIND}/${FC}/diago/${YDIAGO_GPU_SUPPORT}/lib";        fi
-if ! test -d "$extlibs_path/${FCKIND}/${FC}/diago/${YDIAGO_GPU_SUPPORT}/include";   then mkdir    "$extlibs_path/${FCKIND}/${FC}/diago/${YDIAGO_GPU_SUPPORT}/include";    fi
+if ! test -d "$extlibs_path/${FCKIND}/${FC}/diago/${LDIAGO_GPU_SUPPORT}";           then mkdir    "$extlibs_path/${FCKIND}/${FC}/diago/${LDIAGO_GPU_SUPPORT}";            fi
+if ! test -d "$extlibs_path/${FCKIND}/${FC}/diago/${LDIAGO_GPU_SUPPORT}/lib";       then mkdir    "$extlibs_path/${FCKIND}/${FC}/diago/${LDIAGO_GPU_SUPPORT}/lib";        fi
+if ! test -d "$extlibs_path/${FCKIND}/${FC}/diago/${LDIAGO_GPU_SUPPORT}/include";   then mkdir    "$extlibs_path/${FCKIND}/${FC}/diago/${LDIAGO_GPU_SUPPORT}/include";    fi
 #
 if  test -d "$extlibs_path/${FCKIND}/${FC}" ; then
  BIN_LIBRARIES=$extlibs_path/${FCKIND}/${FC}/bin/* ;
diff --git a/config/m4/yambo_specific.m4 b/config/m4/yambo_specific.m4
index c251bac812..61cfc63612 100644
--- a/config/m4/yambo_specific.m4
+++ b/config/m4/yambo_specific.m4
@@ -57,6 +57,15 @@ AC_SUBST(def_dp)
 AC_SUBST(build_precision)
 #
 # ============================================================================
+# VERSION for gamma only case
+AC_ARG_ENABLE(gamma_only, AC_HELP_STRING([--enable-gamma-only], [Gamma only build. Default is no.]))
+def_gamma_only=""
+if test x"$enable_gamma_only" = "x";    then enable_gamma_only="no";   fi
+if test x"$enable_gamma_only" = "xyes"; then def_gamma_only="-D_GAMMA_ONLY";  fi
+AC_SUBST(enable_gamma_only)
+AC_SUBST(def_gamma_only)
+#
+# ============================================================================
 #
 # Time Profiling (mod_timing)
 #
diff --git a/config/mk/global/actions/compile_external_libraries.mk b/config/mk/global/actions/compile_external_libraries.mk
index e58f57a2d7..8b1e56f08d 100644
--- a/config/mk/global/actions/compile_external_libraries.mk
+++ b/config/mk/global/actions/compile_external_libraries.mk
@@ -41,9 +41,11 @@ scalapack: lapack
 	@if test "$(do_slk)" = yes ; then LIBS="scalapack" ; BASE="lib"; $(MAKE) $(MAKEFLAGS) scalapack-dl ; $(mk_external_lib); fi
 elpa: scalapack blacs
 	@if test "$(do_elpa)" = yes ; then LIBS="elpa" ; BASE="lib"; $(MAKE) $(MAKEFLAGS) elpa-dl ; $(mk_external_lib); fi
+magma: lapack
+	@if test "$(do_magma)" = yes ; then LIBS="magma" ; BASE="lib"; $(MAKE) $(MAKEFLAGS) magma-dl ; $(mk_external_lib); fi
 petsc:
 	@if test "$(do_petsc)" = yes ; then LIBS="petsc" ; BASE="lib"; $(MAKE) $(MAKEFLAGS) petsc-dl; $(mk_external_lib); fi
 slepc: petsc
 	@if test "$(do_slepc)" = yes ; then LIBS="slepc" ; BASE="lib"; $(MAKE) $(MAKEFLAGS) slepc-dl; $(mk_external_lib); fi
-ydiago: scalapack blacs elpa
-	@if test "$(do_ydiago)" = yes ; then LIBS="ydiago" ; BASE="lib"; $(MAKE) $(MAKEFLAGS) ydiago-dl; $(mk_external_lib); fi
+ldiago: scalapack blacs elpa
+	@if test "$(do_ldiago)" = yes ; then LIBS="ldiago" ; BASE="lib"; $(MAKE) $(MAKEFLAGS) ldiago-dl; $(mk_external_lib); fi
diff --git a/config/mk/global/actions/compile_interfaces.mk b/config/mk/global/actions/compile_interfaces.mk
index 25b8ac9f48..b5c61900fb 100644
--- a/config/mk/global/actions/compile_interfaces.mk
+++ b/config/mk/global/actions/compile_interfaces.mk
@@ -7,9 +7,9 @@
 #
 # Variable definitions
 #
-I_PRECMP=
+I_PRECMP=-D_ELPH
 ifneq (,$(findstring p2y,$(MAKECMDGOALS)))
- I_PRECMP=$(p2ycpp)
+ I_PRECMP+=$(p2ycpp)
 endif
 #
 GOALS=a2y c2y
diff --git a/config/mk/global/actions/compile_internal_libraries.mk b/config/mk/global/actions/compile_internal_libraries.mk
index 7a6e35f5a4..7b7276411a 100644
--- a/config/mk/global/actions/compile_internal_libraries.mk
+++ b/config/mk/global/actions/compile_internal_libraries.mk
@@ -3,7 +3,7 @@
 #
 # Copyright (C) 2020 The Yambo Team
 #
-# Authors (see AUTHORS file for details): AM
+# Authors (see AUTHORS file for details): AM DS
 #
 qe_pseudo: 
 	@+LIBS="qe_pseudo"; BASE="lib" ; ADF="$(STAMP_DBLE)"; LAB=""; $(todo_lib); $(mk_lib)
@@ -13,3 +13,5 @@ math77:
 	@+LIBS="math77";    BASE="lib" ; ADF="$(STAMP_DBLE)"; LAB=""; $(todo_lib); $(mk_lib)
 local: 
 	@+LIBS="local" ;    BASE="lib" ; ADF="$(STAMP_DBLE)"; LAB=""; $(todo_lib); $(mk_lib)
+magma_fmodules: 
+	@+LIBS="magma_fmodules"; BASE="lib" ; ADF="$(STAMP_DBLE)"; LAB=""; $(todo_lib); $(mk_lib)
diff --git a/config/mk/global/actions/download_external_libraries.mk b/config/mk/global/actions/download_external_libraries.mk
index a1d3d1c6da..2abbe356c8 100644
--- a/config/mk/global/actions/download_external_libraries.mk
+++ b/config/mk/global/actions/download_external_libraries.mk
@@ -18,7 +18,7 @@ fftw-dl:
 fftqe-dl: 
 	@LIB2DO="fftqe"; $(get_external_libraries)
 yaml-dl: 
-	@LIB2DO="fftqe"; $(get_external_libraries)
+	@LIB2DO="yaml"; $(get_external_libraries)
 futile-dl: 
 	@LIB2DO="futile"; $(get_external_libraries)
 iotk-dl: 
@@ -37,9 +37,11 @@ scalapack-dl:
 	@LIB2DO="scalapack"; $(get_external_libraries)
 elpa-dl: 
 	@LIB2DO="elpa"; $(get_external_libraries)
+magma-dl: 
+	@LIB2DO="magma"; $(get_external_libraries)
 petsc-dl: 
 	@LIB2DO="petsc"; $(get_external_libraries)
 slepc-dl: 
 	@LIB2DO="slepc"; $(get_external_libraries)
-ydiago-dl:
-	@LIB2DO="ydiago"; $(get_external_libraries)
+ldiago-dl:
+	@LIB2DO="ldiago"; $(get_external_libraries)
diff --git a/config/mk/global/defs.mk.in b/config/mk/global/defs.mk.in
index 41dd285d75..d506098180 100644
--- a/config/mk/global/defs.mk.in
+++ b/config/mk/global/defs.mk.in
@@ -14,7 +14,8 @@ scalapack   = @def_scalapack@
 elpa        = @def_elpa@
 slepc       = @def_slepc@
 fft         = @def_fft@
-xcpp        = @def_netcdf@ @def_mpi@ @def_fft@ @def_slepc@ @def_scalapack@ @def_elpa@ @def_compiler@ @def_dp@ @def_openmp@ @def_time_profile@ @def_memory_profile@ @def_uspp@ @def_gpu@ @def_yaml@
+magma       = @def_magma@
+xcpp        = @def_netcdf@ @def_mpi@ @def_fft@ @def_slepc@ @def_scalapack@ @def_elpa@ @def_magma@ @def_compiler@ @def_dp@ @def_openmp@ @def_time_profile@ @def_memory_profile@ @def_uspp@ @def_gpu@ @def_yaml@ @def_gamma_only@
 p2ycpp      = @PW_CPP@
 keep_objs   = @enable_keep_objects@
 do_blacs    = @compile_blacs@
@@ -35,7 +36,9 @@ do_libxc    = @compile_libxc@
 do_devxlib  = @compile_devxlib@
 do_petsc    = @compile_petsc@
 do_slepc    = @compile_slepc@
-do_ydiago   = @compile_ydiago@
+do_ldiago   = @compile_ldiago@
+do_magma    = @compile_magma@
+do_magma_fmodules = @compile_magma_fmodules@
 shell       = @SHELL@
 package_bugreport = @PACKAGE_BUGREPORT@
 prefix      = @prefix@
diff --git a/config/mk/global/functions/help.mk b/config/mk/global/functions/help.mk
index df65dec470..3966e210b3 100644
--- a/config/mk/global/functions/help.mk
+++ b/config/mk/global/functions/help.mk
@@ -46,20 +46,15 @@ define yambo_help
   fi
   if [ "$(1)" = "intro" ] ; then \
    $(ECHO)  "\n *** Interfaces ***\n " ; \
-   $(ECHO)  " a2y =  ABINIT to Yambo interface ";\
-   $(ECHO)  " p2y =  QuantumEspresso to Yambo interface ";\
-   $(ECHO)  "        http://www.yambo-code.eu/wiki/index.php?title=Bulk_material:_h-BN ";\
-   $(ECHO)  " c2y =  CPMD to Yambo interface";\
+   $(ECHO)  " a2y =  Interface with ABINIT ";\
+   $(ECHO)  " p2y =  Interface with QuantumEspresso ";\
    $(ECHO)  "\n *** Main Components***\n" ;\
-   $(ECHO)  " yambo =  main Yambo code ";\
-   $(ECHO)  " ypp   =  Yambo Post Processing utility ";\
+   $(ECHO)  " yambo =  Main executable for GW and BSE ";\
+   $(ECHO)  " ypp   =  Post Processing utility ";\
    $(ECHO)  "\n *** Other projects ***\n" ;\
-   $(ECHO)  " yambo_sc =  Self-consistent (COHSEX, HF, DFT) project";\
-   $(ECHO)  " yambo_rt =  Real-time dynamics project";\
-   $(ECHO)  "             http://www.yambo-code.eu/wiki/index.php?title=Linear_response_from_real_time_simulations";\
-   $(ECHO)  " yambo_nl =  Non-linear optics project ";\
-   $(ECHO)  "             http://www.yambo-code.eu/wiki/index.php?title=Tutorials#Non_linear_response";\
-   $(ECHO)  " yambo_ph =  Electron-phonon coupling project ";\
-   $(ECHO)  "             http://www.yambo-code.eu/wiki/index.php?title=Tutorials#Electron_phonon_coupling\n";\
+   $(ECHO)  " yambo_sc =  Self-consistent module ";\
+   $(ECHO)  " yambo_rt =  Real-time dynamics module ";\
+   $(ECHO)  " yambo_nl =  Non-linear optics module ";\
+   $(ECHO)  " yambo_ph =  Electron-phonon coupling module ";\
   fi
 endef
diff --git a/config/mk/global/libraries.mk b/config/mk/global/libraries.mk
index c428e0dcea..791acced83 100644
--- a/config/mk/global/libraries.mk
+++ b/config/mk/global/libraries.mk
@@ -12,7 +12,11 @@ ifeq ($(wildcard config/mk/global/defs.mk),config/mk/global/defs.mk)
 endif
 include lib/archive/package.list
 #
-INT_LIBS      = qe_pseudo slatec math77 local
+ifeq ($(do_magma_fmodules),yes)
+  INT_LIBS = qe_pseudo slatec math77 local magma_fmodules
+else
+  INT_LIBS      = qe_pseudo slatec math77 local
+endif
 YAMBO_INT_LIBS= Yio 
 YLIBIO        = modules Yio
 YLIBIO_LD     = $(YLIBIO)
@@ -22,14 +26,14 @@ YLIBIO_LD     = $(YLIBIO)
 #
 BASIC_LIBS   = driver tools modules memory allocations matrices linear_algebra parallel parser communicate output common timing Yio io $(IO_MODE) \
                xc_functionals interface stop_and_restart wf_and_fft bz_ops coulomb
-BASIC_LIBS_LD= driver tools memory allocations communicate modules matrices linear_algebra bz_ops parallel parser output common timing Yio io $(IO_MODE) \
+BASIC_LIBS_LD= driver tools memory allocations communicate modules matrices linear_algebra Yio io $(IO_MODE) common bz_ops parallel parser output timing \
                xc_functionals interface stop_and_restart wf_and_fft coulomb
 
 MAIN_LIBS    = $(BASIC_LIBS) interpolate qp_control setup tddft dipoles pol_function qp acfdt bse
 MAIN_LIBS_LD = $(BASIC_LIBS_LD) interpolate qp_control setup tddft dipoles pol_function qp acfdt bse
 
-PJ_PHLIBS    = $(BASIC_LIBS) interpolate qp_control setup tddft dipoles pol_function el-ph qp acfdt bse
-PJ_PHLIBS_LD = $(BASIC_LIBS_LD) interpolate qp_control setup tddft dipoles pol_function el-ph qp acfdt bse
+PJ_PHLIBS    = $(BASIC_LIBS) interpolate qp_control setup tddft dipoles pol_function el-ph exc-ph qp acfdt bse
+PJ_PHLIBS_LD = $(BASIC_LIBS_LD) interpolate qp_control setup tddft dipoles pol_function el-ph bse exc-ph qp acfdt
 
 PJ_SCLIBS    = $(MAIN_LIBS) collisions hamiltonian sc
 PJ_SCLIBS_LD = $(MAIN_LIBS_LD) hamiltonian collisions sc
@@ -80,10 +84,10 @@ YPPNL_LIBS_LD      = $(YPPRT_LIBS_LD)
 #
 YPP_MAIN_LIBS      = $(BASIC_LIBS) interpolate qp_control setup interface tddft dipoles pol_function qp bse
 YPP_MAIN_LIBS_LD   = $(BASIC_LIBS_LD) interpolate qp_control setup interface tddft dipoles pol_function qp bse
+YPPPH_MAIN_LIBS    = $(YPP_MAIN_LIBS) el-ph exc-ph
+YPPPH_MAIN_LIBS_LD = $(YPP_MAIN_LIBS_LD) el-ph exc-ph
 YPPSC_MAIN_LIBS    = $(YPP_MAIN_LIBS) collisions hamiltonian sc
 YPPSC_MAIN_LIBS_LD = $(YPP_MAIN_LIBS_LD) collisions hamiltonian sc
-YPPPH_MAIN_LIBS    = $(YPP_MAIN_LIBS)
-YPPPH_MAIN_LIBS_LD = $(YPP_MAIN_LIBS_LD)
 YPPRT_MAIN_LIBS    = $(BASIC_LIBS) real_time_control interpolate qp_control setup interface \
                      dipoles pol_function qp bse collisions hamiltonian 
 YPPRT_MAIN_LIBS_LD = $(BASIC_LIBS_LD) real_time_control interpolate qp_control setup interface \
diff --git a/config/mk/local/makefile b/config/mk/local/makefile
index ca0a3d5d2d..380c96b00a 100644
--- a/config/mk/local/makefile
+++ b/config/mk/local/makefile
@@ -24,7 +24,7 @@ include $(compdir)/config/setup
 #
 idriver=$(IFLAG)$(includedir)/driver $(IFLAG)$(includedir)/version
 lf90include=$(IFLAG)$(includedir) $(IFLAG)$(includedir)/headers/common $(IFLAG)$(includedir)/headers/parser $(idriver)
-lf90libinclude=$(iiotk) $(inetcdff) $(inetcdf) $(ipetsc) $(islepc) $(ielpa) $(iydiago) $(ihdf5) $(ilibxc) $(idevxlib) $(icudalib) $(ifft) $(ifutile) $(iyaml) $(idriver)
+lf90libinclude=$(iiotk) $(inetcdff) $(inetcdf) $(ipetsc) $(islepc) $(imagma) $(ielpa) $(ildiago) $(ihdf5) $(ilibxc) $(idevxlib) $(icudalib) $(ifft) $(ifutile) $(iyaml) $(idriver)
 mfiles=find . -maxdepth 1 -name '*.mod'
 #
 # OBJECTS
diff --git a/config/report.in b/config/report.in
index a50aee5359..5d81566b77 100644
--- a/config/report.in
+++ b/config/report.in
@@ -1,5 +1,5 @@
 #
-# [VER] @SVERSION@.@SSUBVERSION@
+# Yambo Fork: @PACKAGE_STRING@
 #
 # Legend: [E]=external library [C]=internal lib to be compiled [I]=internal lib already compiled [X]=used [-]=not used
 # 
@@ -16,6 +16,7 @@
 # [EDITOR] @editor@
 # [ MAKE ] @MAKE@
 #
+# [@GAMMA_ONLY_check@] Gamma-only version of the code
 # [@DP_check@] Double precision
 # [@KEEP_OBJS_check@] Keep object files
 # [@TIME_profile_check@] Run-Time timing profile 
@@ -60,18 +61,19 @@
 # [@SLK_check@] SCALAPACK : @SCALAPACK_LIBS_R@ 
 # [@ELPA_check@] ELPA      : @ELPA_LIBS_R@ 
 #                 @ELPA_INCS_R@
-# [@BLACS_check@] BLACS     : @BLACS_LIBS_R@
 # [@FFT_check@] FFT       : @FFT_LIBS_R@
 #                 @FFT_INCS_R@
 # [@PETSC_check@] PETSC     : @PETSC_LIBS_R@ @PETSC_info@
 #                 @PETSC_INCS_R@ 
 # [@SLEPC_check@] SLEPC     : @SLEPC_LIBS_R@ @SLEPC_info@
 #                 @SLEPC_INCS_R@ 
+# [@MAGMA_check@] MAGMA     : @MAGMA_LIBS_R@
+#                 @MAGMA_INCS_R@ (fortran modules to be compiled: @compile_magma_fmodules@) 
 #
 # > OTHERs: @DEVXLIB_info@
 #
-# [@ydiago_check@] Ydiago    : @YDIAGO_LIBS_R@
-#                 @YDIAGO_INCS_R@
+# [@LDIAGO_check@] Ldiago    : @LDIAGO_LIBS_R@
+#                 @LDIAGO_INCS_R@
 # [@LIBXC_check@] LibXC     : @LIBXC_LIBS_R@
 #                 @LIBXC_INCS_R@
 # [@DEVXLIB_check@] DevXlib   : @DEVXLIB_LIBS_R@
@@ -86,8 +88,8 @@
 # FC kind = @FCKIND@ @FCVERSION@
 # MPI kind= @MPIKIND@
 #
-# [ CPP ] @CPP@ @CPPFLAGS_yambo@ @def_netcdf@ @def_mpi@ @def_fft@ @def_slepc@ @def_scalapack@ @def_elpa@ @def_compiler@ @def_dp@ @def_openmp@ @def_time_profile@ @def_uspp@ @def_gpu@ @def_yaml@ @PW_CPP@
-# [ FPP ] @FPP@ @def_netcdf@ @def_mpi@ @def_fft@ @def_slepc@ @def_scalapack@ @def_elpa@ @def_compiler@ @def_dp@ @def_openmp@ @def_time_profile@ @def_uspp@ @def_gpu@ @def_yaml@
+# [ CPP ] @CPP@ @CPPFLAGS_yambo@ @def_netcdf@ @def_mpi@ @def_fft@ @def_slepc@ @def_scalapack@ @def_elpa@ @def_magma@ @def_compiler@ @def_dp@ @def_openmp@ @def_time_profile@ @def_uspp@ @def_gpu@ @def_yaml@ @def_gamma_only@ @PW_CPP@
+# [ FPP ] @FPP@ @def_netcdf@ @def_mpi@ @def_fft@ @def_slepc@ @def_scalapack@ @def_elpa@ @def_magma@ @def_compiler@ @def_dp@ @def_openmp@ @def_time_profile@ @def_uspp@ @def_gpu@ @def_yaml@ @def_gamma_only@
 # [ CC  ] @CC@ @CFLAGS@
 # [ FC  ] @FC@ @FCFLAGS@ @OPENMPLIBS@ @GPU_FLAGS@
 # [ FCUF] @FCUFLAGS@ @GPU_FLAGS@
diff --git a/config/setup.in b/config/setup.in
index 029efdbba8..f0bbf04e78 100644
--- a/config/setup.in
+++ b/config/setup.in
@@ -53,7 +53,6 @@ llapack     = @LAPACK_LIBS@
 lblaspetsc  = @BLAS_PETSC_LIBS@
 llapackpetsc= @LAPACK_PETSC_LIBS@
 petsc_flgs  = @PETSC_FLAGS@
-lblacs      = @BLACS_LIBS@
 lscalapack  = @SCALAPACK_LIBS@
 lelpa       = @ELPA_LIBS@
 ielpa       = @ELPA_INCS@
@@ -94,7 +93,7 @@ drocmlib    = @LIBROCM_PATH@
 devxlib_flgs= @DEVXLIB_FLAGS@
 devxlib_clib= @DEVXLIB_CUDALIBS@
 gpu_support = @GPU_SUPPORT@
-ydiago_gpu_support = @YDIAGO_GPU_SUPPORT@
+ldiago_gpu_support = @LDIAGO_GPU_SUPPORT@
 lfft        = @FFT_LIBS@
 ifft        = @FFT_INCS@
 liotk       = @IOTK_LIBS@
@@ -105,8 +104,10 @@ lfutile     = @FUTILE_LIBS@
 ifutile     = @FUTILE_INCS@
 letsf       = @ETSF_LIBS@
 ietsf       = @ETSF_INCS@
-lydiago     = @YDIAGO_LIBS@
-iydiago     = @YDIAGO_INCS@
+lldiago     = @LDIAGO_LIBS@
+ildiago     = @LDIAGO_INCS@
+lmagma      = @MAGMA_LIBS@
+imagma      = @MAGMA_INCS@
 mpipath     = @MPI_PATH@
 #
 # VPATH
diff --git a/config/stamps_and_lists/project_dependencies.stamp b/config/stamps_and_lists/project_dependencies.stamp
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/configure b/configure
index 9b34337451..b8db5f37c2 100755
--- a/configure
+++ b/configure
@@ -1,8 +1,8 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.72 for Yambo 5.4.0 r.24143 h.77ed66f02.
+# Generated by GNU Autoconf 2.72 for Lumen 2.0.0.
 #
-# Report bugs to <yambo@yambo-code.org>.
+# Report bugs to <https://gitlab.com/lumen-code/lumen/-/issues>.
 #
 #
 # Copyright (C) 1992-1996, 1998-2017, 2020-2023 Free Software Foundation,
@@ -270,10 +270,10 @@ then :
     printf "%s\n" "$0: be upgraded to zsh 4.3.4 or later."
   else
     printf "%s\n" "$0: Please tell bug-autoconf@gnu.org and
-$0: yambo@yambo-code.org about your system, including any
-$0: error possibly output before this message. Then install
-$0: a modern shell, or manually run the script under such a
-$0: shell if you do have one."
+$0: https://gitlab.com/lumen-code/lumen/-/issues about your
+$0: system, including any error possibly output before this
+$0: message. Then install a modern shell, or manually run
+$0: the script under such a shell if you do have one."
   fi
   exit 1
 fi ;;
@@ -601,12 +601,12 @@ MFLAGS=
 MAKEFLAGS=
 
 # Identity of this package.
-PACKAGE_NAME='Yambo'
-PACKAGE_TARNAME='yambo'
-PACKAGE_VERSION='5.4.0 r.24143 h.77ed66f02'
-PACKAGE_STRING='Yambo 5.4.0 r.24143 h.77ed66f02'
-PACKAGE_BUGREPORT='yambo@yambo-code.org'
-PACKAGE_URL=''
+PACKAGE_NAME='Lumen'
+PACKAGE_TARNAME='lumen'
+PACKAGE_VERSION='2.0.0'
+PACKAGE_STRING='Lumen 2.0.0'
+PACKAGE_BUGREPORT='https://gitlab.com/lumen-code/lumen/-/issues'
+PACKAGE_URL='www.lumen-code.org'
 
 ac_default_prefix=$PWD
 ac_unique_file="driver/yambo.F"
@@ -660,6 +660,8 @@ PETSC_INCS_R
 PETSC_LIBS_R
 BLACS_INCS_R
 BLACS_LIBS_R
+MAGMA_INCS_R
+MAGMA_LIBS_R
 ELPA_INCS_R
 ELPA_LIBS_R
 SCALAPACK_INCS_R
@@ -684,12 +686,13 @@ YAML_INCS_R
 YAML_LIBS_R
 IOTK_INCS_R
 IOTK_LIBS_R
-YDIAGO_INCS_R
-YDIAGO_LIBS_R
+LDIAGO_INCS_R
+LDIAGO_LIBS_R
 MPI_info
 MPI_check
 LIBCUDA_check
 DEVXLIB_check
+MAGMA_check
 LIBXC_check
 YPY_check
 YDB_check
@@ -697,6 +700,7 @@ SLEPC_info
 PETSC_info
 SLEPC_check
 PETSC_check
+LDIAGO_check
 ELPA_check
 SLK_check
 BLACS_check
@@ -719,19 +723,28 @@ CUDA_check
 MEM_profile_check
 TIME_profile_check
 KEEP_OBJS_check
+GAMMA_ONLY_check
 DP_check
-with_ydiago_branch
-ydiago_opt
-compile_ydiago
-YDIAGO_GPU_SUPPORT
-YDIAGO_INCS
-YDIAGO_LIBS
+internal_magma
+compile_magma_fmodules
+compile_magma
+enable_magma
+def_magma
+MAGMA_INCS
+MAGMA_LIBS
+NVIDIA_ARCH
 with_devxlib_branch
 DEVXLIB_info
 internal_devxlib
 compile_devxlib
 DEVXLIB_INCS
 DEVXLIB_LIBS
+with_ldiago_branch
+ldiago_opt
+compile_ldiago
+LDIAGO_GPU_SUPPORT
+LDIAGO_INCS
+LDIAGO_LIBS
 MKLGPU_LIBS
 LIBROCM_PATH
 LIBROCM_INCS
@@ -805,8 +818,6 @@ HDF5_INCS
 HDF5_LIBS
 internal_elpa
 compile_elpa
-internal_blacs
-compile_blacs
 internal_slk
 compile_slk
 def_elpa
@@ -816,7 +827,6 @@ enable_scalapack
 ELPA_INCS
 ELPA_LIBS
 SCALAPACK_LIBS
-BLACS_LIBS
 with_petsc_branch
 with_slepc_branch
 internal_slepc
@@ -936,6 +946,8 @@ MKMF_PREFIX
 def_memory_profile
 def_uspp
 def_time_profile
+def_gamma_only
+enable_gamma_only
 build_precision
 def_dp
 enable_dp
@@ -995,6 +1007,7 @@ enable_keep_objects
 enable_keep_src
 enable_keep_extlibs
 enable_dp
+enable_gamma_only
 enable_time_profile
 enable_uspp
 enable_memory_profile
@@ -1040,7 +1053,6 @@ with_petsc_includedir
 with_slepc_branch
 with_petsc_branch
 enable_par_linalg
-with_blacs_libs
 with_scalapack_libs
 with_elpa_libs
 with_elpa_includedir
@@ -1102,12 +1114,18 @@ with_rocm_libdir
 with_rocm_includedir
 with_rocm_path
 with_mklgpu_libs
+with_ldiago_branch
 with_devxlib_libs
 with_devxlib_path
 with_devxlib_libdir
 with_devxlib_includedir
 with_devxlib_branch
-with_ydiago_branch
+enable_magma
+with_magma_libs
+with_magma_incs
+with_magma_path
+with_magma_libdir
+with_magma_includedir
 '
       ac_precious_vars='build_alias
 host_alias
@@ -1675,7 +1693,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-'configure' configures Yambo 5.4.0 r.24143 h.77ed66f02 to adapt to many kinds of systems.
+'configure' configures Lumen 2.0.0 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1724,7 +1742,7 @@ Fine tuning of the installation directories:
   --infodir=DIR           info documentation [DATAROOTDIR/info]
   --localedir=DIR         locale-dependent data [DATAROOTDIR/locale]
   --mandir=DIR            man documentation [DATAROOTDIR/man]
-  --docdir=DIR            documentation root [DATAROOTDIR/doc/yambo]
+  --docdir=DIR            documentation root [DATAROOTDIR/doc/lumen]
   --htmldir=DIR           html documentation [DOCDIR]
   --dvidir=DIR            dvi documentation [DOCDIR]
   --pdfdir=DIR            pdf documentation [DOCDIR]
@@ -1741,7 +1759,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of Yambo 5.4.0 r.24143 h.77ed66f02:";;
+     short | recursive ) echo "Configuration of Lumen 2.0.0:";;
    esac
   cat <<\_ACEOF
 
@@ -1754,6 +1772,7 @@ Optional Features:
   --enable-keep-src       Keep preprocessed.f90 file. Default is yes.
   --enable-keep-extlibs   Keep downloaded packages as tar.gz . Default is yes.
   --enable-dp             Double-precision build. Default is no.
+  --enable-gamma-only     Gamma only build. Default is no.
   --enable-time-profile   Extended timing profile of specific sections.
                           Default is yes.
   --enable-uspp           Enable Ultrasoft PP support
@@ -1797,6 +1816,8 @@ Optional Features:
                           The configure script will check CUDA installation
                           and report problems [default=yes]
   --enable-nvtx=<path>    Enable NVTX support [default=no]
+  --enable-magma          Enable suport for the BSE diagonalization using
+                          MAGMA. Default is no
 
 Optional Packages:
   --with-PACKAGE[=ARG]    use PACKAGE [ARG=yes]
@@ -1831,7 +1852,6 @@ Optional Packages:
                                 Path to the Petsc include directory
   --with-slepc-branch=<branch>  Use the <branch> of the slepc repository.
   --with-petsc-branch=<branch>  Use the <branch> of the petsc repository.
-  --with-blacs-libs=(libs|mkl)  Use BLACS libraries <libs> or setup MKL
   --with-scalapack-libs=(libs|mkl)
                                 Use SCALAPACK libraries <libs> or setup MKL
   --with-elpa-libs=(libs)       Use ELPA libraries <libs>
@@ -1891,7 +1911,8 @@ Optional Packages:
   --with-rocm-libdir=<path>     Path to the rocm lib directory
   --with-rocm-includedir=<path> Path to the rocm include directory
   --with-rocm-path=<path>       Path to rocm install directory
-  --with-mklgpu-libs=<libs>     Use librocm library <libs>
+  --with-mklgpu-libs=<libs>     Use the MKLGPU library <libs>
+  --with-ldiago-branch=<branch> Use the <branch> of the ldiago repository.
   --with-devxlib-libs=<libs>    Use devxlib libraries <libs>
   --with-devxlib-path=<path>    Path to devxlib install directory
   --with-devxlib-libdir=<path>  Path to the devxlib lib directory
@@ -1899,7 +1920,12 @@ Optional Packages:
                                 Path to the devxlib include directory
   --with-devxlib-branch=<branch>
                                 Use the <branch> of the devxlib repository.
-  --with-ydiago-branch=<branch> Use the <branch> of the ydiago repository.
+  --with-magma-libs=<libs>      Use Magma libraries <libs>
+  --with-magma-incs=<incs>      Use Magma includes <incs>
+  --with-magma-path=<path>      Path to the Magma install directory
+  --with-magma-libdir=<path>    Path to the Magma lib directory
+  --with-magma-includedir=<path>
+                                Path to the Magma include directory
 
 Some influential environment variables:
   CC          C compiler command
@@ -1923,7 +1949,8 @@ Some influential environment variables:
 Use these variables to override the choices made by 'configure' or to help
 it to find libraries and programs with nonstandard names/locations.
 
-Report bugs to <yambo@yambo-code.org>.
+Report bugs to <https://gitlab.com/lumen-code/lumen/-/issues>.
+Lumen home page: <www.lumen-code.org>.
 _ACEOF
 ac_status=$?
 fi
@@ -1987,7 +2014,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-Yambo configure 5.4.0 r.24143 h.77ed66f02
+Lumen configure 2.0.0
 generated by GNU Autoconf 2.72
 
 Copyright (C) 2023 Free Software Foundation, Inc.
@@ -2635,7 +2662,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by Yambo $as_me 5.4.0 r.24143 h.77ed66f02, which was
+It was created by Lumen $as_me 2.0.0, which was
 generated by GNU Autoconf 2.72.  Invocation command line was
 
   $ $0$ac_configure_args_raw
@@ -3409,10 +3436,10 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 
 SVERSION="5"
-SSUBVERSION="4"
+SSUBVERSION="3"
 SPATCHLEVEL="0"
-SREVISION="24143"
-SHASH="77ed66f02"
+SREVISION="26345"
+SHASH="5d014f95da"
 
 
 
@@ -3565,6 +3592,20 @@ if test x"$build_precision" = "x"; then as_fn_error $? "Non correct specificatio
 
 
 
+#
+# ============================================================================
+# VERSION for gamma only case
+# Check whether --enable-gamma_only was given.
+if test ${enable_gamma_only+y}
+then :
+  enableval=$enable_gamma_only;
+fi
+
+def_gamma_only=""
+if test x"$enable_gamma_only" = "x";    then enable_gamma_only="no";   fi
+if test x"$enable_gamma_only" = "xyes"; then def_gamma_only="-D_GAMMA_ONLY";  fi
+
+
 #
 # ============================================================================
 #
@@ -3713,6 +3754,7 @@ else case e in #(
 esac
 fi
 
+if test x"$with_echo" = "x"; then AC_PROG_ECHO; fi
 ECHO=$with_echo
 
 #
@@ -4817,6 +4859,7 @@ fi
 # Check whether to use -e or not to intepret \backslash commands
 #
 
+
 #
 # Copyright (C) 2002 M. Marques, A. Castro, A. Rubio, G. Bertsch
 #
@@ -5119,6 +5162,8 @@ fi
 # version.m4 -> version.h
 
 cat << EOF > include/version/version.h
+#pragma once
+#define LUMEN_VERSION "$PACKAGE_STRING"
 #define YAMBO_VERSION $SVERSION
 #define YAMBO_SUBVERSION $SSUBVERSION
 #define YAMBO_PATCHLEVEL $SPATCHLEVEL
@@ -7479,7 +7524,12 @@ i?86*linux*)
        CPU_FLAG="-xHost"
        #CPU_FLAG=" "
        ;;
-      *2021* | *2022* | *2023* | *2024* | *2025* )
+      *2025* )
+       CPU_FLAG=" "
+       OMPFLAGS="-qopenmp"
+       FCMFLAG="-nofor-main"
+       ;;
+      *2021* | *2022* | *2023* | *2024* )
        CPU_FLAG=" "
        OMPFLAGS="-qopenmp -parallel"
        FCMFLAG="-nofor-main"
@@ -7729,7 +7779,13 @@ aarch*linux* | arm*linux* )
        #CPU_FLAG="-xHost"
        CPU_FLAG=" "
        ;;
-      *2020* | *2021* | *2022* | *2023* | *2024* | *2025* )
+      *2025* )
+       CPU_FLAG=" "
+       OMPFLAGS="-qopenmp"
+       FCMFLAG="-nofor-main"
+       CFLAGS="-O2 -std=gnu99"
+       ;;
+      *2020* | *2021* | *2022* | *2023* | *2024* )
        CPU_FLAG=" "
        OMPFLAGS="-qopenmp -parallel"
        FCMFLAG="-nofor-main"
@@ -10775,6 +10831,31 @@ url_ydb=https://github.com/yambo-code/ydb.git
 #
 
 
+#
+#        Copyright (C) 2000-2022 the YAMBO team
+#              http://www.yambo-code.org
+#
+# Authors (see AUTHORS file for details): AM
+#
+# This file is distributed under the terms of the GNU
+# General Public License. You can redistribute it and/or
+# modify it under the terms of the GNU General Public
+# License as published by the Free Software Foundation;
+# either version 2, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will
+# be useful, but WITHOUT ANY WARRANTY; without even the
+# implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.  See the GNU General Public License
+# for more details.
+#
+# You should have received a copy of the GNU General Public
+# License along with this program; if not, write to the Free
+# Software Foundation, Inc., 59 Temple Place - Suite 330,Boston,
+# MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+#
+
+
 #
 #
 #        Copyright (C) 2000-2021 the YAMBO team
@@ -12623,7 +12704,7 @@ if test $acx_lapack_ok = no; then
 ac_fn_fc_check_func "$LINENO" "$cheev" "$as_ac_var"
 if eval test \"x\$"$as_ac_var"\" = x"yes"
 then :
-  acx_lapack_ok=yes
+  acx_lapack_ok=yes LAPACK_LIBS="$BLAS_LIBS"
 fi
 
         LIBS="$save_LIBS"
@@ -13442,6 +13523,64 @@ else case e in #(
 esac
 fi
 
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for main in -lstdc++" >&5
+printf %s "checking for main in -lstdc++... " >&6; }
+if test ${ac_cv_lib_stdcpp_main+y}
+then :
+  printf %s "(cached) " >&6
+else case e in #(
+  e) ac_check_lib_save_LIBS=$LIBS
+LIBS="-lstdc++  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+#ifdef F77_DUMMY_MAIN
+
+#  ifdef __cplusplus
+     extern "C"
+#  endif
+   int F77_DUMMY_MAIN() { return 1; }
+
+#endif
+#ifdef FC_DUMMY_MAIN
+#ifndef FC_DUMMY_MAIN_EQ_F77
+#  ifdef __cplusplus
+     extern "C"
+#  endif
+   int FC_DUMMY_MAIN() { return 1; }
+#endif
+#endif
+int
+main (void)
+{
+return main ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  ac_cv_lib_stdcpp_main=yes
+else case e in #(
+  e) ac_cv_lib_stdcpp_main=no ;;
+esac
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS ;;
+esac
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_stdcpp_main" >&5
+printf "%s\n" "$ac_cv_lib_stdcpp_main" >&6; }
+if test "x$ac_cv_lib_stdcpp_main" = xyes
+then :
+  use_libstdc="yes";
+else case e in #(
+  e) use_libstdc="no"; ;;
+esac
+fi
+
 ac_ext=${ac_fc_srcext-f}
 ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
 ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
@@ -13492,7 +13631,8 @@ printf %s "checking for Petsc in $with_petsc_path... " >&6; } ;
   try_PETSC_INCS="$IFLAG$try_petsc_incdir" ;
   try_PETSC_LIBS="-L$try_petsc_libdir -lpetsc" ;
   #
-  if test "$use_libdl"    = "yes"; then try_PETSC_LIBS="$try_PETSC_LIBS -ldl -lstdc++"   ; fi
+  if test "$use_libdl"    = "yes"; then try_PETSC_LIBS="$try_PETSC_LIBS -ldl"   ; fi
+  if test "$use_libstdc"  = "yes"; then try_PETSC_LIBS="$try_PETSC_LIBS -lstdc++"   ; fi
   #
   if test x"$with_petsc_libs" != "x" ; then try_PETSC_LIBS="$with_petsc_libs" ; fi
   if test x"$with_petsc_incs" != "x" ; then try_PETSC_INCS="$with_petsc_incs" ; fi
@@ -13566,11 +13706,11 @@ printf %s "checking for internal Petsc library... " >&6; }
   fi
   PETSC_INCS="${IFLAG}${extlibs_path}/${FCKIND}/${FC}/${build_precision}/include" ;
   #
-  if test "$use_libdl"    = "yes"; then PETSC_LIBS="$PETSC_LIBS -ldl -lstdc++"   ; fi
-  #
   petsc=yes
   if test -e "$PETSC_LIBS_DN" ; then
     PETSC_LIBS="$PETSC_LIBS_DN" ;
+    if test "$use_libdl"    = "yes"; then PETSC_LIBS="$PETSC_LIBS -ldl"   ; fi
+    if test "$use_libstdc"  = "yes"; then PETSC_LIBS="$PETSC_LIBS -lstdc++"   ; fi
     compile_petsc="no" ;
     if test "x$lapack_shared" = "x1" ; then
       { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: dynamic already compiled" >&5
@@ -13583,6 +13723,8 @@ printf "%s\n" "The compilation may fail. In case remove the dynamic petsc libs."
     fi
   elif test -e "$PETSC_LIBS_ST" ; then
     PETSC_LIBS="$PETSC_LIBS_ST" ;
+    if test "$use_libdl"    = "yes"; then PETSC_LIBS="$PETSC_LIBS -ldl"   ; fi
+    if test "$use_libstdc"  = "yes"; then PETSC_LIBS="$PETSC_LIBS -lstdc++"   ; fi
     compile_petsc="no" ;
     if test "x$lapack_shared" = "x1" ; then
       { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: static found, despite dynamic lapack." >&5
@@ -13774,13 +13916,6 @@ then :
 fi
 
 
-# Check whether --with-blacs_libs was given.
-if test ${with_blacs_libs+y}
-then :
-  withval=$with_blacs_libs;
-fi
-
-
 # Check whether --with-scalapack_libs was given.
 if test ${with_scalapack_libs+y}
 then :
@@ -13803,28 +13938,23 @@ fi
 
 
 SCALAPACK_LIBS=""
-BLACS_LIBS=""
 ELPA_LIBS=""
 ELPA_INCS=""
 
 reset_LIBS="$LIBS"
 
 enable_scalapack="no"
-enable_blacs="no"
 enable_elpa="no"
 
 internal_slk="no"
-internal_blacs="no"
 internal_elpa="no"
 
 compile_slk="no"
-compile_blacs="no"
 compile_elpa="no"
 
 #
-# Set fortran linker names of BLACS/SCALAPACK functions to check for.
+# Set fortran linker names of SCALAPACK functions to check for.
 #
-blacs_routine="blacs_set"
 scalapack_routine="pcheev"
 elpa_routine="elpa_init"
 mpi_routine=MPI_Init
@@ -13872,23 +14002,9 @@ fi
 # Parse configure options
 #
 if test "$enable_par_linalg" = "yes" ; then
-  enable_blacs="internal" ;
   enable_scalapack="internal" ;
 fi
 #
-case $with_blacs_libs in
-  yes) enable_blacs="internal" ;;
-  no)  enable_blacs="no" ; enable_par_linalg="no" ;;
-  mkl)
-    if test "$try_mkl_scalapack" = "no" ; then
-       enable_blacs="no" ; enable_par_linalg="no"
-    else
-       enable_blacs="check"; BLACS_LIBS="$try_mkl_scalapack"
-    fi
-    ;;
-  *) enable_blacs="check"; BLACS_LIBS="$with_blacs_libs" ;;
-esac
-#
 case $with_scalapack_libs in
   yes) enable_scalapack="internal" ;;
   no) enable_scalapack="no" ; enable_par_linalg="no" ;;
@@ -13910,42 +14026,10 @@ esac
 #
 if test "$mpibuild"  = "yes"; then
   #
-  if test "$enable_blacs" = "check" ; then
-    #
-    acx_blacs_save_LIBS="$BLACS_LIBS"
-    LIBS="$LIBS $FLIBS $LAPACK_LIBS $BLAS_LIBS"
-    # First, check BLACS_LIBS environment variable
-    if test "x$BLACS_LIBS" != x; then
-      save_LIBS="$LIBS"; LIBS="$BLACS_LIBS $LIBS"
-      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $blacs_routine in $BLACS_LIBS" >&5
-printf %s "checking for $blacs_routine in $BLACS_LIBS... " >&6; }
-      cat > conftest.$ac_ext <<_ACEOF
-      program main
-      call $blacs_routine
-      end
-_ACEOF
-if ac_fn_fc_try_link "$LINENO"
-then :
-  enable_blacs="yes"
-else case e in #(
-  e) enable_blacs="internal"; BLACS_LIBS="" ;;
-esac
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.beam \
-    conftest$ac_exeext conftest.$ac_ext
-      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_blacs" >&5
-printf "%s\n" "$enable_blacs" >&6; }
-      BLACS_LIBS="$acx_blacs_save_LIBS"
-      LIBS="$save_LIBS"
-    else
-      enable_blacs="no";
-    fi
-    #
-  fi
   #
   if test "$enable_scalapack" = "check" ; then
     acx_scalapack_save_LIBS="$SCALAPACK_LIBS"
-    LIBS="$LIBS $FLIBS $LAPACK_LIBS $BLAS_LIBS $BLACS_LIBS"
+    LIBS="$LIBS $FLIBS $LAPACK_LIBS $BLAS_LIBS"
     # First, check SCALAPACK_LIBS environment variable
     if test "x$SCALAPACK_LIBS" != x; then
       save_LIBS="$LIBS"; LIBS="$SCALAPACK_LIBS $LIBS"
@@ -13976,7 +14060,7 @@ printf "%s\n" "$enable_scalapack" >&6; }
   #
   if test "$enable_elpa" = "check" ; then
     acx_elpa_save_LIBS="$ELPA_LIBS"
-    LIBS="$LIBS $FLIBS $SCALAPACK_LIBS $LAPACK_LIBS $BLAS_LIBS $BLACS_LIBS"
+    LIBS="$LIBS $FLIBS $SCALAPACK_LIBS $LAPACK_LIBS $BLAS_LIBS"
     # First, check ELPA_LIBS environment variable
     if test "x$ELPA_LIBS" != x; then
       save_LIBS="$LIBS"; LIBS="$ELPA_LIBS $LIBS"
@@ -14010,10 +14094,8 @@ printf "%s\n" "$enable_elpa" >&6; }
   #
   if test x"$enable_par_linalg" = "xyes"; then
     if test x"$enable_int_linalg" = "xyes"; then
-      enable_blacs="internal";
       enable_scalapack="internal";
     else
-      if test "$enable_blacs"     = "no"; then enable_blacs="internal"    ; fi
       if test "$enable_scalapack" = "no"; then enable_scalapack="internal"; fi
     fi
   fi
@@ -14026,16 +14108,6 @@ printf "%s\n" "$enable_elpa" >&6; }
   #  fi
   #fi
   #
-  if test "$mpif_found" = "yes" && test "$enable_blacs" = "internal"; then
-    enable_blacs="yes";
-    internal_blacs="yes";
-    BLACS_LIBS="${extlibs_path}/${FCKIND}/${FC}/lib/libblacs.a ${extlibs_path}/${FCKIND}/${FC}/lib/libblacs_C_init.a ${extlibs_path}/${FCKIND}/${FC}/lib/libblacs_init.a";
-    if test -e "${extlibs_path}/${FCKIND}/${FC}/lib/libblacs.a" && test -e "${extlibs_path}/${FCKIND}/${FC}/lib/libblacs_init.a"; then
-      compile_blacs="no"
-    else
-      compile_blacs="yes"
-    fi
-  fi
   #
   if test "$mpif_found" = "yes" && test "$enable_scalapack" = "internal"; then
     enable_scalapack="yes"
@@ -14062,7 +14134,7 @@ printf "%s\n" "$enable_elpa" >&6; }
   #
 fi
 #
-if test "$enable_blacs" = "yes" && test "$enable_scalapack" = "yes" ; then
+if test "$enable_scalapack" = "yes" ; then
   def_scalapack="-D_SCALAPACK"
   if test "$enable_elpa" = "yes" ; then
     def_elpa="-D_ELPA"
@@ -14076,18 +14148,14 @@ if test "$enable_blacs" = "yes" && test "$enable_scalapack" = "yes" ; then
   fi
 else
   enable_scalapack="no"
-  enable_blacs="no"
   enable_elpa="no"
   def_scalapack=""
   def_elpa=""
-  BLACS_LIBS=""
   SCALAPACK_LIBS=""
   ELPA_LIBS=""
   ELPA_INCS=""
-  compile_blacs="no"
   compile_slk="no"
   compile_elpa="no"
-  internal_blacs="no"
   internal_slk="no"
   internal_elpa="no"
 fi
@@ -14107,9 +14175,6 @@ LIBS="$reset_LIBS"
 
 
 
-
-
-
 # ============================================================================
 # HDF5
 
@@ -14800,7 +14865,7 @@ printf %s "checking for internal HDF5 library... " >&6; };
       { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: already compiled" >&5
 printf "%s\n" "already compiled" >&6; } ;
       #
-    elif test "$IO_LIB_VER" = "serial" && test -e "${NETCDF_HDF5_PAR_PATH}/lib/libhdf5.a"; then
+    elif test "$IO_LIB_VER" = "serial" &&  test "$mpibuild"  = "yes"  && test -e "${NETCDF_HDF5_PAR_PATH}/lib/libhdf5.a"; then
       #
       compile_hdf5="no" ;
       IO_LIB_VER="parallel";
@@ -15246,7 +15311,7 @@ printf %s "checking for internal NETCDF library... " >&6; };
       { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: already compiled" >&5
 printf "%s\n" "already compiled" >&6; } ;
       #
-    elif test "$IO_LIB_VER" = "serial" && test -e "${NETCDF_HDF5_PAR_PATH}/lib/libnetcdf.a" && test -e "${NETCDF_HDF5_PAR_PATH}/lib/libnetcdff.a" && test -e "${NETCDF_HDF5_PAR_PATH}/lib/libhdf5.a"; then
+    elif test "$IO_LIB_VER" = "serial" &&  test "$mpibuild"  = "yes"   && test -e "${NETCDF_HDF5_PAR_PATH}/lib/libnetcdf.a" && test -e "${NETCDF_HDF5_PAR_PATH}/lib/libnetcdff.a" && test -e "${NETCDF_HDF5_PAR_PATH}/lib/libhdf5.a"; then
       #
       compile_netcdf="no" ;
       IO_LIB_VER="parallel";
@@ -15999,12 +16064,14 @@ if test x"$acx_libxc_ok" = xno ; then
   if test -d "$with_libxc_path"; then
      libxc_incdir="$with_libxc_path/include"
      libxc_libdir="$with_libxc_path/lib"
+     libxc_lib64dir="$with_libxc_path/lib64"
   fi
   if test -d "$with_libxc_includedir"; then libxc_incdir="$with_libxc_includedir" ; fi
   if test -d "$with_libxc_libdir";     then libxc_libdir="$with_libxc_libdir" ; fi
   #
   # dynamic linkage, separate Fortran interface
-  if test ! -z "$libxc_libdir"; then LIBXC_LIBS="-L$libxc_libdir -lxcf90 -lxcf03 -lxc"; fi
+  if test ! -z "$libxc_libdir";  then LIBXC_LIBS="-L$libxc_libdir -lxcf90 -lxcf03 -lxc"; fi
+  if test ! -z "$libxc_lib64dir"; then LIBXC_LIBS="-L$libxc_lib64dir $LIBXC_LIBS"; fi
   if test ! -z "$libxc_incdir"; then LIBXC_INCS="$IFLAG$libxc_incdir"; fi
   #
   if test ! -z "$with_libxc_libs" ; then LIBXC_LIBS="$with_libxc_libs" ; fi
@@ -16912,6 +16979,61 @@ fi
 
 #
 
+# ============================================================================
+# Diago library
+
+
+#
+
+# Check whether --with-ldiago-branch was given.
+if test ${with_ldiago_branch+y}
+then :
+  withval=$with_ldiago_branch;
+else case e in #(
+  e) with_ldiago_branch=none ;;
+esac
+fi
+
+#
+ac_config_files="$ac_config_files lib/ldiago/make_ldiago.inc:lib/ldiago/make_ldiago.inc.in"
+
+
+if test "$def_scalapack" = "-D_SCALAPACK" ; then
+
+  compile_ldiago="yes"
+
+  # GPU flags are passed to ldiago compilation only if elpa library is available
+  if test ! x"$def_gpu" = "x" && test "$def_elpa" = "-D_ELPA" ; then
+    ldiago_opt="$def_gpu $def_elpa"
+    LDIAGO_GPU_SUPPORT="$GPU_SUPPORT"
+  else
+    ldiago_opt="$def_elpa"
+    LDIAGO_GPU_SUPPORT="no_gpu"
+  fi
+
+  LDIAGO_LIBS="${extlibs_path}/${FCKIND}/${FC}/diago/${LDIAGO_GPU_SUPPORT}/lib/libldiago.a"
+  LDIAGO_INCS="$IFLAG${extlibs_path}/${FCKIND}/${FC}/diago/${LDIAGO_GPU_SUPPORT}/include/"
+  #LDIAGO_LIBS="${compdir}/lib/libldiago.a"
+  #LDIAGO_INCS="$IFLAG${compdir}/include/"
+
+else
+
+  compile_ldiago="no"
+  ldiago_opt=""
+  LDIAGO_LIBS=""
+  LDIAGO_INCS=""
+
+fi
+
+
+
+
+
+
+
+
+
+
 # ============================================================================
 # Device XLIB
 
@@ -17124,60 +17246,265 @@ LIBS="$acx_devxlib_save_LIBS"
 
 
 # ============================================================================
-# Yambo Libs
+# MAGMA
 
+#
+# Check whether --enable-magma was given.
+if test ${enable_magma+y}
+then :
+  enableval=$enable_magma;
+fi
 
 #
 
-# Check whether --with-ydiago-branch was given.
-if test ${with_ydiago_branch+y}
+# Check whether --with-magma_libs was given.
+if test ${with_magma_libs+y}
 then :
-  withval=$with_ydiago_branch;
-else case e in #(
-  e) with_ydiago_branch=none ;;
-esac
+  withval=$with_magma_libs;
 fi
 
-#
-ac_config_files="$ac_config_files lib/ydiago/make_ydiago.inc:lib/ydiago/make_ydiago.inc.in"
 
+# Check whether --with-magma_incs was given.
+if test ${with_magma_incs+y}
+then :
+  withval=$with_magma_incs;
+fi
 
-if test "$def_scalapack" = "-D_SCALAPACK" ; then
 
-  compile_ydiago="yes"
+# Check whether --with-magma_path was given.
+if test ${with_magma_path+y}
+then :
+  withval=$with_magma_path;
+fi
 
-  # GPU flags are passed to ydiago compilation only if elpa library is available
-  if test ! x"$def_gpu" = "x" && test "$def_elpa" = "-D_ELPA" ; then
-    ydiago_opt="$def_gpu $def_elpa"
-    YDIAGO_GPU_SUPPORT="$GPU_SUPPORT"
-  else
-    ydiago_opt="$def_elpa"
-    YDIAGO_GPU_SUPPORT="no_gpu"
-  fi
 
-  YDIAGO_LIBS="${extlibs_path}/${FCKIND}/${FC}/${YDIAGO_GPU_SUPPORT}/lib/libydiago.a"
-  YDIAGO_INCS="$IFLAG${extlibs_path}/${FCKIND}/${FC}/${YDIAGO_GPU_SUPPORT}/include/"
-  #YDIAGO_LIBS="${compdir}/lib/libydiago.a"
-  #YDIAGO_INCS="$IFLAG${compdir}/include/"
+# Check whether --with-magma_libdir was given.
+if test ${with_magma_libdir+y}
+then :
+  withval=$with_magma_libdir;
+fi
 
-else
 
-  compile_ydiago="no"
-  ydiago_opt=""
-  YDIAGO_LIBS=""
-  YDIAGO_INCS=""
+# Check whether --with-magma_includedir was given.
+if test ${with_magma_includedir+y}
+then :
+  withval=$with_magma_includedir;
+fi
+
 
+#
+def_magma=""
+magma="no"
+internal_magma="no"
+compile_magma="no"
+compile_magma_fmodules="no"
+#
+if test x"$enable_magma" = "x"; then enable_magma="no" ; fi
+#if test x"$enable_magma" = "xyes"; then enable_magma="yes"; fi
+#
+# MAGMA global options
+#
+if test  x"$with_magma_libs" = "xyes" ; then
+  enable_magma="yes" ;
+  compile_magma_fmodules="yes" ;
+  with_magma_libs="";
+elif test  x"$with_magma_libs" = "xno" ; then
+  enable_magma="no" ;
+  compile_magma_fmodules="no" ;
+  with_magma_libs="";
 fi
+#
+if test  x"$with_magma_libdir" != "x" ; then enable_magma="yes" ; fi
+if test  x"$with_magma_path"   != "x" ; then enable_magma="yes" ; fi
+if test  x"$with_magma_libs"   != "x" ; then enable_magma="yes" ; fi
+#
+# Set MAGMA LIBS and FLAGS from INPUT
+#
+if test -d "$with_magma_path" || test -d "$with_magma_libdir" || test x"$with_magma_libs" != "x" ; then
+  #
+  # external magma
+  #
+  if   test   x"$with_magma_libs" != "x" ; then  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for Magma using $with_magma_libs" >&5
+printf %s "checking for Magma using $with_magma_libs... " >&6; } ;
+  elif test -d "$with_magma_libdir"      ; then  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for Magma in $with_magma_libdir" >&5
+printf %s "checking for Magma in $with_magma_libdir... " >&6; } ;
+  elif test -d "$with_magma_path"        ; then  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for Magma in $with_magma_path/lib" >&5
+printf %s "checking for Magma in $with_magma_path/lib... " >&6; } ;
+  fi
+  #
+  if test -d "$with_magma_path" ; then
+      try_magma_libdir="$with_magma_path/lib" ;
+      try_magma_incdir="$with_magma_path/include" ;
+  fi
+  #
+  if test -d "$with_magma_libdir"     ; then try_magma_libdir="$with_magma_libdir" ; fi
+  if test -d "$with_magma_includedir" ; then try_magma_incdir="$with_magma_includedir" ; fi
+  #
+  try_MAGMA_INCS="$IFLAG$try_magma_incdir" ;
+  try_MAGMA_LIBS="-L$try_magma_libdir -lmagma" ;
+  #
+  if test x"$with_magma_libs" != "x" ; then try_MAGMA_LIBS="$with_magma_libs" ; fi
+  if test x"$with_magma_incs" != "x" ; then try_MAGMA_INCS="$with_magma_incs" ; fi
+  #
+  if test -z "$try_MAGMA_LIBS" ; then as_fn_error $? "No libs specified" "$LINENO" 5 ; fi
+  if test -z "$try_MAGMA_INCS" ; then as_fn_error $? "No include-dir specified" "$LINENO" 5 ; fi
+  #
+  ac_ext=${ac_fc_srcext-f}
+ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
+ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_fc_compiler_gnu
 
+  #
+  save_fcflags="$FCFLAGS" ;
+  save_libs="$LIBS" ;
+  #
+  FCFLAGS="$try_MAGMA_INCS $save_fcflags";
+  LIBS="$try_MAGMA_LIBS $save_libs";
+  #
+  # check for magma with fortran-interfaces
+  cat > conftest.$ac_ext <<_ACEOF
+      program main
 
+    use magma
+    implicit none
+    integer :: lda
+    !magma_devptr_t :: dA
+      end
+_ACEOF
+if ac_fn_fc_try_compile "$LINENO"
+then :
+  magmaf=yes
+else case e in #(
+  e) magmaf=no ;;
+esac
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext;
+  #
+  # check for c-style magma
+  cat > conftest.$ac_ext <<_ACEOF
+      program main
 
+    ierr = magma_init();
+      end
+_ACEOF
+if ac_fn_fc_try_compile "$LINENO"
+then :
+  magmac=yes
+else case e in #(
+  e) magmac=no ;;
+esac
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext;
+  #
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: Library: $magmac;  Fortran support: $magmaf" >&5
+printf "%s\n" "Library: $magmac;  Fortran support: $magmaf" >&6; } ;
+  #
+  if test "x$magmaf" = "xyes"; then
+    #
+    MAGMA_INCS="$try_MAGMA_INCS" ;
+    MAGMA_LIBS="$try_MAGMA_LIBS" ;
+    magma=yes;
+    compile_magma="no";
+    internal_magma="no";
+    def_magma="-D_MAGMA"
+    #
+  elif test "x$magmac" = "xyes"; then
+    #
+    MAGMA_LIBS="$try_MAGMA_LIBS" ;
+    MAGMA_INCS="${IFLAG}${extlibs_path}/${FCKIND}/${FC}/include" ;
+    magma=yes;
+    compile_magma="no";
+    compile_magma_fmodules="yes";
+    def_magma="-D_MAGMA"
+    #
+  else
+    #
+    magma=no;
+    #
+  fi
+  #
+  FCFLAGS="$save_fcflags" ;
+  LIBS="$save_libs" ;
+  #
+fi
+#
+# TO BE FIXED: needs internal compilation support and paths
+#              have to be corrected with GPU_SUPPORT folder
+#
+# Internal compilation
+#
+if test "x$enable_magma" = "xyes" && test "x$magma" = "xno" ; then
+  #
+  # internal magma
+  #
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for internal Magma library" >&5
+printf %s "checking for internal Magma library... " >&6; }
+  #
+  internal_magma="yes"
+  #
+  #if test "x$lapack_shared" = "x1" ; then
+  #  MAGMA_LIBS="${extlibs_path}/${FCKIND}/${FC}/lib/libmagma.so" ;
+  #  #MAGMA_LIBS="" ;
+  #else
+    MAGMA_LIBS="${extlibs_path}/${FCKIND}/${FC}/lib/libmagma.a" ;
+  #fi
+  MAGMA_INCS="${IFLAG}${extlibs_path}/${FCKIND}/${FC}/include" ;
+  #
+  magma=yes
+  def_magma="-D_MAGMA"
+  if test -e "${extlibs_path}/${FCKIND}/${FC}/lib/libmagma.a" ; then
+    compile_magma="no" ;
+    compile_magma_fmodules="no" ;
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: already compiled" >&5
+printf "%s\n" "already compiled" >&6; } ;
+  elif test -e "${extlibs_path}/${FCKIND}/${FC}/lib/libmagma.so" ; then
+    compile_magma="no" ;
+    compile_magma_fmodules="no" ;
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: already compiled" >&5
+printf "%s\n" "already compiled" >&6; } ;
+  else
+    compile_magma="yes" ;
+    compile_magma_fmodules="no" ;
+    #
+    NVIDIA_ARCH=
+    if test "x$with_cuda_cc" != "x" ; then
+       if test "$with_cuda_cc" -ge 60 &&  ! test "$with_cuda_cc" -ge 70 ; then NVIDIA_ARCH=Pascal ; fi
+       if test "$with_cuda_cc" -ge 70 &&  ! test "$with_cuda_cc" -ge 80 ; then NVIDIA_ARCH=Volta ; fi
+       if test "$with_cuda_cc" -ge 80 &&  ! test "$with_cuda_cc" -ge 90 ; then NVIDIA_ARCH=Hopper ; fi
+    fi
+    #
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: Compatible external Magma not found/specified. To be compiled." >&5
+printf "%s\n" "Compatible external Magma not found/specified. To be compiled." >&6; } ;
 
+    ac_config_files="$ac_config_files lib/magma/make_magma.inc:lib/magma/make_magma.inc.in"
 
+  fi
+  #
+fi
+
+#
+# switch off internal magma compilation
+#
+deactivate_internal=no
+if test "x$compile_magma" = "xyes" && test "x$internal_magma" = "xyes" && test "x$deactivate_internal" = "xyes"  ; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: Internal Magma compilation not available yet. Deactivating it." >&5
+printf "%s\n" "Internal Magma compilation not available yet. Deactivating it." >&6; } ;
+  compile_magma="no"
+  def_magma=""
+  enable_magma="no"
+  MAGMA_INCS="" ;
+  MAGMA_LIBS="" ;
+fi
+#
 
 
 
 
 
+
+
+#
+
 # ============================================================================
 # Prepare the REPORT file variables
 
@@ -17187,6 +17514,9 @@ fi
 DP_check="-"
 if test "$enable_dp" = "yes" ; then DP_check="X"; fi
 #
+GAMMA_ONLY_check="-"
+if test "$enable_gamma_only" = "yes" ; then GAMMA_ONLY_check="X"; fi
+#
 KEEP_OBJS_check="-"
 if test "$enable_keep_objects" = "yes" ; then KEEP_OBJS_check="X"; fi
 #
@@ -17298,14 +17628,14 @@ if test "$internal_blacs" = "yes" ; then
   if test "$compile_blacs" = "no" ; then BLACS_check="I"; fi
 fi
 #
-if test "$compile_ydiago" = "yes"; then
-  if test x"$with_ydiago_branch" = "xnone"; then
-    YDIAGO_check="I";
+if test "$compile_ldiago" = "yes"; then
+  if test x"$with_ldiago_branch" = "xnone"; then
+    LDIAGO_check="I";
   else
-    YDIAGO_check="G";
+    LDIAGO_check="G";
   fi
 else
-  YDIAGO_check="-";
+  LDIAGO_check="-";
 fi
 #
 PETSC_check="-"
@@ -17334,6 +17664,14 @@ if test "$internal_libxc" = "yes" ; then
   if test "$compile_libxc" = "no" ; then LIBXC_check="I"; fi
 fi
 #
+MAGMA_check="-"
+if test "$internal_magma" = "yes" ; then
+  if test "$compile_magma" = "yes" ; then MAGMA_check="C"; fi
+  if test "$compile_magma" = "no"  ; then MAGMA_check="I"; fi
+elif test "$enable_magma" = "yes" ; then
+  MAGMA_check="E"
+fi
+#
 DEVXLIB_check="E"
 if test "$internal_devxlib" = "yes" ; then
   if test "$compile_devxlib" = "yes"; then DEVXLIB_check="C"; fi
@@ -17394,6 +17732,7 @@ fi
 
 
 
+
 #
 
 
@@ -17420,6 +17759,7 @@ fi
 
 
 
+
 #
 
 
@@ -17429,47 +17769,50 @@ fi
 
 
 
+
 #
 # STRIPE [LIB] from paths
 #
 
-TMP1=`echo $YDIAGO_LIBS | sed 's/\//+/g'`
-TMP2=`echo $compdir | sed 's/\//+/g'`
-TMP3=`echo $TMP1 | sed "s/$TMP2/\(CMP\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+TMP1=`echo $LDIAGO_LIBS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
+TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("LIB")"
 fi
 
-YDIAGO_LIBS_R=$STRIPE
+#ACX_STRIPE_SUBPATH2($LDIAGO_LIBS,"LIB")
+LDIAGO_LIBS_R=$STRIPE
 
-TMP1=`echo $YDIAGO_INCS | sed 's/\//+/g'`
-TMP2=`echo $compdir | sed 's/\//+/g'`
-TMP3=`echo $TMP1 | sed "s/$TMP2/\(CMP\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+TMP1=`echo $LDIAGO_INCS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
+TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("INC")"
 fi
 
-YDIAGO_INCS_R=$STRIPE
+#ACX_STRIPE_SUBPATH2($LDIAGO_INCS,"INC")
+LDIAGO_INCS_R=$STRIPE
 
 
 #
 
-TMP1=`echo $IOTK_LIBS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $IOTK_LIBS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("LIB")"
 fi
 
 IOTK_LIBS_R=$STRIPE
 
-TMP1=`echo $IOTK_INCS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $IOTK_INCS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("INC")"
 fi
@@ -17479,20 +17822,20 @@ IOTK_INCS_R=$STRIPE
 
 #
 
-TMP1=`echo $YAML_LIBS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $YAML_LIBS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("LIB")"
 fi
 
 YAML_LIBS_R=$STRIPE
 
-TMP1=`echo $YAML_INCS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $YAML_INCS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("INC")"
 fi
@@ -17502,20 +17845,20 @@ YAML_INCS_R=$STRIPE
 
 #
 
-TMP1=`echo $FUTILE_LIBS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $FUTILE_LIBS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("LIB")"
 fi
 
 FUTILE_LIBS_R=$STRIPE
 
-TMP1=`echo $FUTILE_INCS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $FUTILE_INCS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("INC")"
 fi
@@ -17525,20 +17868,20 @@ FUTILE_INCS_R=$STRIPE
 
 #
 
-TMP1=`echo $ETSF_LIBS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $ETSF_LIBS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("LIB")"
 fi
 
 ETSF_LIBS_R=$STRIPE
 
-TMP1=`echo $ETSF_INCS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $ETSF_INCS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("INC")"
 fi
@@ -17548,20 +17891,20 @@ ETSF_INCS_R=$STRIPE
 
 #
 
-TMP1=`echo $NETCDFF_LIBS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $NETCDFF_LIBS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("LIB")"
 fi
 
 NETCDFF_LIBS_R=$STRIPE
 
-TMP1=`echo $NETCDFF_INCS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $NETCDFF_INCS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("INC")"
 fi
@@ -17571,20 +17914,20 @@ NETCDFF_INCS_R=$STRIPE
 
 #
 
-TMP1=`echo $NETCDF_LIBS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $NETCDF_LIBS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("LIB")"
 fi
 
 NETCDF_LIBS_R=$STRIPE
 
-TMP1=`echo $NETCDF_INCS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $NETCDF_INCS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("INC")"
 fi
@@ -17594,20 +17937,20 @@ NETCDF_INCS_R=$STRIPE
 
 #
 
-TMP1=`echo $HDF5_LIBS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $HDF5_LIBS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("LIB")"
 fi
 
 HDF5_LIBS_R=$STRIPE
 
-TMP1=`echo $HDF5_INCS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $HDF5_INCS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("INC")"
 fi
@@ -17617,20 +17960,20 @@ HDF5_INCS_R=$STRIPE
 
 #
 
-TMP1=`echo $FFT_LIBS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $FFT_LIBS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("LIB")"
 fi
 
 FFT_LIBS_R=$STRIPE
 
-TMP1=`echo $FFT_INCS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $FFT_INCS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("INC")"
 fi
@@ -17640,20 +17983,20 @@ FFT_INCS_R=$STRIPE
 
 #
 
-TMP1=`echo $BLAS_LIBS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $BLAS_LIBS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("LIB")"
 fi
 
 BLAS_LIBS_R=$STRIPE
 
-TMP1=`echo $BLAS_INCS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $BLAS_INCS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("INC")"
 fi
@@ -17663,20 +18006,20 @@ BLAS_INCS_R=$STRIPE
 
 #
 
-TMP1=`echo $LAPACK_LIBS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $LAPACK_LIBS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("LIB")"
 fi
 
 LAPACK_LIBS_R=$STRIPE
 
-TMP1=`echo $LAPACK_INCS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $LAPACK_INCS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("INC")"
 fi
@@ -17686,20 +18029,20 @@ LAPACK_INCS_R=$STRIPE
 
 #
 
-TMP1=`echo $SCALAPACK_LIBS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $SCALAPACK_LIBS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("LIB")"
 fi
 
 SCALAPACK_LIBS_R=$STRIPE
 
-TMP1=`echo $SCALAPACK_INCS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $SCALAPACK_INCS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("INC")"
 fi
@@ -17709,20 +18052,20 @@ SCALAPACK_INCS_R=$STRIPE
 
 #
 
-TMP1=`echo $ELPA_LIBS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $ELPA_LIBS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("LIB")"
 fi
 
 ELPA_LIBS_R=$STRIPE
 
-TMP1=`echo $ELPA_INCS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $ELPA_INCS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("INC")"
 fi
@@ -17732,20 +18075,43 @@ ELPA_INCS_R=$STRIPE
 
 #
 
-TMP1=`echo $BLACS_LIBS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $MAGMA_LIBS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
+TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
+if [ -z "${1// }" ]; then
+ STRIPE="$STRIPE ("LIB")"
+fi
+
+MAGMA_LIBS_R=$STRIPE
+
+TMP1=`echo $MAGMA_INCS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
+TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
+if [ -z "${1// }" ]; then
+ STRIPE="$STRIPE ("INC")"
+fi
+
+MAGMA_INCS_R=$STRIPE
+
+
+#
+
+TMP1=`echo $BLACS_LIBS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("LIB")"
 fi
 
 BLACS_LIBS_R=$STRIPE
 
-TMP1=`echo $BLACS_INCS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $BLACS_INCS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("INC")"
 fi
@@ -17755,20 +18121,20 @@ BLACS_INCS_R=$STRIPE
 
 #
 
-TMP1=`echo $PETSC_LIBS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $PETSC_LIBS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("LIB")"
 fi
 
 PETSC_LIBS_R=$STRIPE
 
-TMP1=`echo $PETSC_INCS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $PETSC_INCS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("INC")"
 fi
@@ -17778,20 +18144,20 @@ PETSC_INCS_R=$STRIPE
 
 #
 
-TMP1=`echo $SLEPC_LIBS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $SLEPC_LIBS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("LIB")"
 fi
 
 SLEPC_LIBS_R=$STRIPE
 
-TMP1=`echo $SLEPC_INCS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $SLEPC_INCS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("INC")"
 fi
@@ -17801,20 +18167,43 @@ SLEPC_INCS_R=$STRIPE
 
 #
 
-TMP1=`echo $LIBXC_LIBS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $MAGMA_LIBS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
+TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
+if [ -z "${1// }" ]; then
+ STRIPE="$STRIPE ("LIB")"
+fi
+
+MAGMA_LIBS_R=$STRIPE
+
+TMP1=`echo $MAGMA_INCS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
+TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
+if [ -z "${1// }" ]; then
+ STRIPE="$STRIPE ("INC")"
+fi
+
+MAGMA_INCS_R=$STRIPE
+
+
+#
+
+TMP1=`echo $LIBXC_LIBS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("LIB")"
 fi
 
 LIBXC_LIBS_R=$STRIPE
 
-TMP1=`echo $LIBXC_INCS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $LIBXC_INCS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("INC")"
 fi
@@ -17824,20 +18213,20 @@ LIBXC_INCS_R=$STRIPE
 
 #
 
-TMP1=`echo $DEVXLIB_LIBS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $DEVXLIB_LIBS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("LIB")"
 fi
 
 DEVXLIB_LIBS_R=$STRIPE
 
-TMP1=`echo $DEVXLIB_INCS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $DEVXLIB_INCS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("INC")"
 fi
@@ -17847,20 +18236,20 @@ DEVXLIB_INCS_R=$STRIPE
 
 #
 
-TMP1=`echo $LIBCUDA_LIBS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $LIBCUDA_LIBS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("LIB")"
 fi
 
 LIBCUDA_LIBS_R=$STRIPE
 
-TMP1=`echo $LIBCUDA_INCS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $LIBCUDA_INCS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("INC")"
 fi
@@ -17870,20 +18259,20 @@ LIBCUDA_INCS_R=$STRIPE
 
 #
 
-TMP1=`echo $BLAS_PETSC_LIBS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $BLAS_PETSC_LIBS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("LIB")"
 fi
 
 BLAS_PETSC_LIBS_R=$STRIPE
 
-TMP1=`echo $BLAS_PETSC_INCS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $BLAS_PETSC_INCS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("INC")"
 fi
@@ -17893,20 +18282,20 @@ BLAS_PETSC_INCS_R=$STRIPE
 
 #
 
-TMP1=`echo $LAPACK_PETSC_LIBS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $LAPACK_PETSC_LIBS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("LIB")"
 fi
 
 LAPACK_PETSC_LIBS_R=$STRIPE
 
-TMP1=`echo $LAPACK_PETSC_INCS | sed 's/\//+/g'`
-TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//+/g'`
+TMP1=`echo $LAPACK_PETSC_INCS | sed 's/\//\#/g'`
+TMP2=`echo $extlibs_path/${FCKIND}/${FC} | sed 's/\//\#/g'`
 TMP3=`echo $TMP1 | sed "s/$TMP2/\(LIB\)/g"`
-STRIPE=`echo $TMP3 | sed 's/+/\//g'`
+STRIPE=`echo $TMP3 | sed 's/\#/\//g'`
 if [ -z "${1// }" ]; then
  STRIPE="$STRIPE ("INC")"
 fi
@@ -18427,7 +18816,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by Yambo $as_me 5.4.0 r.24143 h.77ed66f02, which was
+This file was extended by Lumen $as_me 2.0.0, which was
 generated by GNU Autoconf 2.72.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -18483,7 +18872,8 @@ $config_files
 Configuration headers:
 $config_headers
 
-Report bugs to <yambo@yambo-code.org>."
+Report bugs to <https://gitlab.com/lumen-code/lumen/-/issues>.
+Lumen home page: <www.lumen-code.org>."
 
 _ACEOF
 ac_cs_config=`printf "%s\n" "$ac_configure_args" | sed "$ac_safe_unquote"`
@@ -18491,7 +18881,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config='$ac_cs_config_escaped'
 ac_cs_version="\\
-Yambo config.status 5.4.0 r.24143 h.77ed66f02
+Lumen config.status 2.0.0
 configured by $0, generated by GNU Autoconf 2.72,
   with options \\"\$ac_cs_config\\"
 
@@ -18615,7 +19005,8 @@ do
     "lib/fftqe/c_defs.h") CONFIG_HEADERS="$CONFIG_HEADERS lib/fftqe/c_defs.h:lib/fftqe/c_defs.h.in" ;;
     "lib/fftqe/fftqe_defs.h") CONFIG_FILES="$CONFIG_FILES lib/fftqe/fftqe_defs.h:lib/fftqe/fftqe_defs.h.in" ;;
     "lib/iotk/make_iotk.inc") CONFIG_FILES="$CONFIG_FILES lib/iotk/make_iotk.inc:lib/iotk/make_iotk.inc.in" ;;
-    "lib/ydiago/make_ydiago.inc") CONFIG_FILES="$CONFIG_FILES lib/ydiago/make_ydiago.inc:lib/ydiago/make_ydiago.inc.in" ;;
+    "lib/ldiago/make_ldiago.inc") CONFIG_FILES="$CONFIG_FILES lib/ldiago/make_ldiago.inc:lib/ldiago/make_ldiago.inc.in" ;;
+    "lib/magma/make_magma.inc") CONFIG_FILES="$CONFIG_FILES lib/magma/make_magma.inc:lib/magma/make_magma.inc.in" ;;
     "include/headers/common/have_malloc.h") CONFIG_HEADERS="$CONFIG_HEADERS include/headers/common/have_malloc.h:include/headers/common/have_malloc.h.in" ;;
     "config/setup") CONFIG_FILES="$CONFIG_FILES config/setup" ;;
     "config/mk/global/defs.mk") CONFIG_FILES="$CONFIG_FILES config/mk/global/defs.mk" ;;
@@ -19327,9 +19718,9 @@ if ! test -d "$extlibs_path/${FCKIND}/${FC}/${GPU_SUPPORT}/lib";       then mkdi
 if ! test -d "$extlibs_path/${FCKIND}/${FC}/${GPU_SUPPORT}/include";   then mkdir    "$extlibs_path/${FCKIND}/${FC}/${GPU_SUPPORT}/include";    fi
 #
 if ! test -d "$extlibs_path/${FCKIND}/${FC}/diago";                                 then mkdir    "$extlibs_path/${FCKIND}/${FC}/diago";                                  fi
-if ! test -d "$extlibs_path/${FCKIND}/${FC}/diago/${YDIAGO_GPU_SUPPORT}";           then mkdir    "$extlibs_path/${FCKIND}/${FC}/diago/${YDIAGO_GPU_SUPPORT}";            fi
-if ! test -d "$extlibs_path/${FCKIND}/${FC}/diago/${YDIAGO_GPU_SUPPORT}/lib";       then mkdir    "$extlibs_path/${FCKIND}/${FC}/diago/${YDIAGO_GPU_SUPPORT}/lib";        fi
-if ! test -d "$extlibs_path/${FCKIND}/${FC}/diago/${YDIAGO_GPU_SUPPORT}/include";   then mkdir    "$extlibs_path/${FCKIND}/${FC}/diago/${YDIAGO_GPU_SUPPORT}/include";    fi
+if ! test -d "$extlibs_path/${FCKIND}/${FC}/diago/${LDIAGO_GPU_SUPPORT}";           then mkdir    "$extlibs_path/${FCKIND}/${FC}/diago/${LDIAGO_GPU_SUPPORT}";            fi
+if ! test -d "$extlibs_path/${FCKIND}/${FC}/diago/${LDIAGO_GPU_SUPPORT}/lib";       then mkdir    "$extlibs_path/${FCKIND}/${FC}/diago/${LDIAGO_GPU_SUPPORT}/lib";        fi
+if ! test -d "$extlibs_path/${FCKIND}/${FC}/diago/${LDIAGO_GPU_SUPPORT}/include";   then mkdir    "$extlibs_path/${FCKIND}/${FC}/diago/${LDIAGO_GPU_SUPPORT}/include";    fi
 #
 if  test -d "$extlibs_path/${FCKIND}/${FC}" ; then
  BIN_LIBRARIES=$extlibs_path/${FCKIND}/${FC}/bin/* ;
diff --git a/doc/Discussion_AD_DS_20250225.dat b/doc/Discussion_AD_DS_20250225.dat
new file mode 100644
index 0000000000..6f06a2d10b
--- /dev/null
+++ b/doc/Discussion_AD_DS_20250225.dat
@@ -0,0 +1,40 @@
+
+
+-----------------------
+OVERALL philosophy
+-----------------------
+
+- dedicated subroutines which interfaces lapack or magma (single and double precision version), and serial or parallel (?)
+  
+  SERIAL_HERMITIAN_diagonalization            --> could be renamed HEEV
+  SERIAL_diagonalization (non hermitian case) --> could be renamed GEEV ?
+  SERIAL_inversion
+  SERIAL_SVD_inversion
+  SERIAL_lin_system (to be merge also with SERIAL_lin_system_gpu)
+  
+  PARALLEL_HERMITIAN_diagonalization            --> could be merged with SERIAL_HERMITIAN_diagonalization 
+  PARALLEL_diagonalization (non hermitian case) --> could be merged with SERIAL_diagonalization
+
+  PARALLEL_MbyM could be created (for now it does not exist)
+
+  The kind of operation could be managed via an handle, however the sobroutines must be cleaned by all extra operations, e.g.
+  allocation, parallel setup, etc ... which should be shifted inside LINEAR_ALGEBRA_init or similar subroutine
+
+  LINEAR_ALGEBRA_driver is never directly called, but the above subroutines are directly called. It only performs initializations
+
+
+-----------------------
+DONE
+-----------------------
+
+- compilation to be fixed
+
+- To be removed:
+    M_eigenvalues, heev and geev interfaces 
+
+- mod_wrapper --> wrapper yambo for single and double to blas --> when available calls devxlib single and double subroutines
+
+- mod_linear algebra has wrappers single and double to few selected lapack
+  --> LU_factorization
+  --> SV_decomposition
+
diff --git a/doc/RELEASE_NOTES b/doc/RELEASE_NOTES
new file mode 100644
index 0000000000..ccda43fa80
--- /dev/null
+++ b/doc/RELEASE_NOTES
@@ -0,0 +1,8 @@
+Lumen 2.0 (04/12/2025)
+----------------------
+https://www.lumen-code.org/wiki/index.php?title=Lumen_2.0
+
+Lumen 1.x (2018)
+---------------------
+https://www.attaccalite.com/lumen/
+
diff --git a/driver/yambo.F b/driver/yambo.F
index 40f8ac275c..a4656de8e0 100644
--- a/driver/yambo.F
+++ b/driver/yambo.F
@@ -20,6 +20,9 @@ integer function yambo(np,pid,lnstr,iinf,iind,iod,icd,ijs,instr,inf,ind,od,com_d
 &                        l_slk_test
 #if defined _ELPH
  use drivers,       ONLY:l_elph_Hamiltonian
+#endif
+#if defined _ELPH && !defined _RT
+ use drivers,       ONLY:l_EXCPH_gkkp,l_EXCPH_optics,l_EXCPH_lifetime
 #endif
  use gpu_m,         ONLY:GPU_test_dim
  use X_m,           ONLY:X_t,i_X_kind_existing_DB
@@ -69,7 +72,11 @@ integer function yambo(np,pid,lnstr,iinf,iind,iod,icd,ijs,instr,inf,ind,od,com_d
  myid    = pid
  infile  = inf
  yambo   = 0
- iinf    = 0 
+ iinf    = 0
+ !
+#if defined _GPU && defined _GAMMA_ONLY
+ call error("Gamma only mode is not compatible with GPU porting")
+#endif 
  !
  ! ... Internal Defaults
  !
@@ -115,7 +122,7 @@ integer function yambo(np,pid,lnstr,iinf,iind,iod,icd,ijs,instr,inf,ind,od,com_d
  ! Basical 
  !
  launch_me= .not.(l_slk_test.or.l_gpu_test)
- if(launch_me) call setup(en,Xen,Ken,k,Xk)
+ if(launch_me) call setup(en,q,Xen,Ken,k,Xk)
  !
  ! Update the default variables
  !
@@ -226,6 +233,15 @@ integer function yambo(np,pid,lnstr,iinf,iind,iod,icd,ijs,instr,inf,ind,od,com_d
  if (launch_me) call K_driver(Xen,Ken,Xk,q,X(i_X_kind),Xw(i_X_kind),Dip)
  if (launch_me) call mem_manager_report
  !
+ ! EXCITON-PHONON 
+ ! ===============
+#if defined _ELPH && !defined _RT
+ if (l_EXCPH_gkkp)     call EXCPH_gkkp_driver(Ken,Xk,q,X(i_X_kind))
+ if (l_EXCPH_optics)   call EXCPH_optics(Ken,Xk,q,X(i_X_kind),Xw(3))
+ if (l_EXCPH_lifetime) call EXCPH_lifetime(Xk,q,X(i_X_kind))
+#endif
+ !
+ !
  ! ACFDT
  !=======
  !
diff --git a/driver/ypp.F b/driver/ypp.F
index 647fdfdefb..bb0cd52ae9 100644
--- a/driver/ypp.F
+++ b/driver/ypp.F
@@ -24,13 +24,13 @@ integer function ypp(np,pid,lnstr,iinf,iind,iod,icd,ijs,instr,inf,ind,od,com_dir
  use YPPm,                ONLY:l_bz_grids,l_dos,l_electrons,l_fix_syms,l_map_kpts,l_SOC_map,&
 &                              l_QP_DBs_create_and_modify,l_QP_DBs_manipulate,l_excitons,l_wannier,&
 &                              alat_used_for_output,l_QP_DB_expand,coo_out,coo_in,l_dipoles,&
-&                              DIPs_kind,l_sp_wf,l_mean_potential
+&                              DIPs_kind,l_sp_wf,l_mean_potential,l_magnons
 #if defined _YPP_RT
  use YPP_real_time,       ONLY:l_RealTime,l_RT_DBs,l_NL_X,l_NL_exc,l_RT_abs
 #endif
 #if defined _YPP_ELPH
  use YPP_ELPH,            ONLY:l_atomic_amplitude,l_eliashberg,l_phonons,l_gkkp,ELPH_general_gFsq, &
-&                              l_gkkp_dg,l_gkkp_plot,l_gkkp_db
+&                              l_gkkp_dg,l_gkkp_plot,l_gkkp_db,l_gkkp_sngl
 #endif
 #if defined _YPP_SC
  ! DS This is needed otherwise ypp_sc fails if compiling after ypp_ph and later yambo_sc
@@ -170,7 +170,7 @@ integer function ypp(np,pid,lnstr,iinf,iind,iod,icd,ijs,instr,inf,ind,od,com_dir
  else
    call io_control(ACTION=OP_RD_CL,COM=NONE,SEC=(/1,2,3/),MODE=DUMP,ID=ID)
  endif
- i_err=io_QINDX(k,q,ID)
+ i_err=io_QINDX(k,q,ID,"minus_q")
  if(i_err/=0) call error(' Error reading K-point grid, please run setup again!')
  !
  ! Rim energies and kpts
@@ -197,7 +197,7 @@ integer function ypp(np,pid,lnstr,iinf,iind,iod,icd,ijs,instr,inf,ind,od,com_dir
  ! Main SETUP
  !============
  !
- call setup(en,Xen,Ken,k,Xk)
+ call setup(en,q,Xen,Ken,k,Xk)
  !
  call PARALLEL_global_indexes(Xen,Xk,q," ",RESET=.TRUE.)
  !
@@ -237,7 +237,7 @@ integer function ypp(np,pid,lnstr,iinf,iind,iod,icd,ijs,instr,inf,ind,od,com_dir
  !
 #endif
  !
- l_flag=l_excitons.or.(l_dipoles.and.trim(DIPs_kind)=="EXCITONIC")
+ l_flag=l_excitons.or.l_magnons.or.(l_dipoles.and.trim(DIPs_kind)=="EXCITONIC")
 #if defined _YPP_RT
  l_flag=l_flag.or.l_RT_abs
 #endif
@@ -254,7 +254,8 @@ integer function ypp(np,pid,lnstr,iinf,iind,iod,icd,ijs,instr,inf,ind,od,com_dir
  !
  if ((l_gkkp.and..not.l_excitons).or.l_phonons) then
    ! =====     ========
-   if (l_gkkp)                call ELPH_databases(k,en,q)
+   if (l_gkkp_db)             call ELPH_databases(k,en,q)
+   if (l_gkkp_sngl)           call ELPH_sngl_db(k,en,q)
    if (l_eliashberg.or.l_dos) call ELPH_eliashberg_dos(k,en,q)
    if (l_atomic_amplitude)    call ELPH_atomic_amplitude(q)
    if (l_gkkp_dg)             call ELPH_double_grid(k,en,q)
diff --git a/include/driver/driver.h b/include/driver/driver.h
index 7663f515cc..91d866fead 100644
--- a/include/driver/driver.h
+++ b/include/driver/driver.h
@@ -1,26 +1,34 @@
 /*
   License-Identifier: GPL
- 
+
   Copyright (C) 2020 The Yambo Team
- 
+
   Authors (see AUTHORS file for details): AM
 */
-struct yambo_seed_struct command_line(int argc, char *argv[],struct options_struct options[], struct tool_struct t, int *use_editor, int *use_mpi, int n_options);
-void input_file(struct yambo_seed_struct y,struct tool_struct t, int *use_editor);
-void launcher(int argc, char *argv[],int np, int pid, struct yambo_seed_struct y,int *use_editor , int *use_mpi );
-void options_maker(struct options_struct options[], int n_options);
-struct tool_struct tool_init( );
-void usage(struct options_struct options[], struct tool_struct t, char* what, int n_options);
-struct tool_struct versions( );
-void title(FILE *file_name,char *cmnt, struct tool_struct t);
-int use_me(struct options_struct options[], struct tool_struct t, int i_opt);
+#pragma once
+
+#include <stdio.h>
+
+#include "kind.h"
+#include "yambo_driver.h"
+
+struct yambo_seed_struct command_line(int argc, char *argv[],
+                                      struct options_struct *options,
+                                      struct tool_struct t, int *use_editor,
+                                      int *use_mpi, int n_options);
+void input_file(struct yambo_seed_struct y, struct tool_struct t,
+                int *use_editor);
+void launcher(int argc, char *argv[], int np, int pid,
+              struct yambo_seed_struct y, int *use_editor, int *use_mpi);
+void options_maker(struct options_struct *options, int n_options);
+struct tool_struct tool_init();
+void usage(struct options_struct *options, struct tool_struct t, char *what,
+           int n_options);
+struct tool_struct versions();
+void title_lumen(FILE *file_name, char *cmnt, struct tool_struct t);
+void title_yambo(FILE *file_name, char *cmnt, struct tool_struct t);
+int use_me(struct options_struct *options, struct tool_struct t, int i_opt);
 char *running_tool();
 char *running_project();
 char *running_libraries();
 char *runlevel(int *runid, int *id);
-void options_help(struct options_struct options[],int *i_opt);
-void options_control(struct options_struct options[],int *i_opt);
-void options_ypp(struct options_struct options[],int *i_opt);
-void options_yambo(struct options_struct options[],int *i_opt);
-void options_projects(struct options_struct options[],int *i_opt);
-void options_interfaces(struct options_struct options[],int *i_opt);
diff --git a/include/driver/editor.h.in b/include/driver/editor.h.in
index 69b2626084..e96797cd09 100644
--- a/include/driver/editor.h.in
+++ b/include/driver/editor.h.in
@@ -5,4 +5,6 @@
  
   Authors (see AUTHORS file for details): AM
 */
- char *editor="@editor@ ";
+#pragma once
+
+char *editor="@editor@ ";
diff --git a/include/driver/fortran_arguments.h b/include/driver/fortran_arguments.h
deleted file mode 100644
index 7204a677de..0000000000
--- a/include/driver/fortran_arguments.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/*
-  License-Identifier: GPL
- 
-  Copyright (C) 2020 The Yambo Team
- 
-  Authors (see AUTHORS file for details): DS
-*/
-&np,&pid,
-&y.string_N,&y.in_file_N,&y.in_dir_N,&y.out_dir_N,&y.com_dir_N,&y.job_N,
- y.string,   y.in_file,   y.in_dir,   y.out_dir,   y.com_dir,   y.job,
- y.string_N, y.in_file_N, y.in_dir_N, y.out_dir_N, y.com_dir_N, y.job_N
diff --git a/include/driver/fortran_driver.h b/include/driver/fortran_driver.h
index d4f987d7b5..28fa698edc 100644
--- a/include/driver/fortran_driver.h
+++ b/include/driver/fortran_driver.h
@@ -1,62 +1,29 @@
 /*
   License-Identifier: GPL
- 
+
   Copyright (C) 2019 The Yambo Team
- 
+
   Authors (see AUTHORS file for details): DS
 */
-/* 
- Tool drivers 
-*/
-/* YAMBO 
+/*
+ Tool drivers
 */
-#if defined _yambo 
- #if defined _FORTRAN_US
-  int yambo_
- #else
-  int yambo
- #endif
-#endif
-/* YPP */
-#if defined _ypp
- #if defined _FORTRAN_US
-  int ypp_
- #else
-  int ypp
- #endif
-#endif
-/* A2Y */
-#if defined _a2y
- #if defined _FORTRAN_US
-  int a2y_
- #else
-  int a2y
- #endif
-#endif
-/* C2Y */
-#if defined _c2y
- #if defined _FORTRAN_US
-  int c2y_
- #else
-  int c2y
- #endif
-#endif
-/* P2Y */
-#if defined _p2y
- #if defined _FORTRAN_US
-  int p2y_
- #else
-  int p2y
- #endif
-#endif
-/* E2Y */
-#if defined _e2y
- #if defined _FORTRAN_US
-  int e2y_
- #else
-  int e2y
- #endif
-#endif
- (int *, int *,int *,int *,int *,int *,int *,int *,
-  char *string, char *in_file, char *in_dir, char *out_dir, char *com_dir, char *job,
-  int string_N, int in_file_N, int in_dir_N, int out_dir_N, int com_dir_N, int job_N);
+/* YAMBO
+ */
+#pragma once
+#include "wrapper.h"
+
+// Common prototype for all FORTRAN driver entry points
+typedef int FortranDriverFn(int *, int *, int *, int *, int *, int *, int *,
+                            int *, char *string, char *in_file, char *in_dir,
+                            char *out_dir, char *com_dir, char *job,
+                            int string_N, int in_file_N, int in_dir_N,
+                            int out_dir_N, int com_dir_N, int job_N);
+
+// Declare driver functions
+extern FortranDriverFn F90_FUNC(yambo);
+extern FortranDriverFn F90_FUNC(ypp);
+extern FortranDriverFn F90_FUNC(a2y);
+extern FortranDriverFn F90_FUNC(p2y);
+extern FortranDriverFn F90_FUNC(c2y);
+extern FortranDriverFn F90_FUNC(e2y);
diff --git a/include/driver/kind.h b/include/driver/kind.h
index bc6df98722..cf6bb3fc67 100644
--- a/include/driver/kind.h
+++ b/include/driver/kind.h
@@ -1,58 +1,60 @@
 /*
   License-Identifier: GPL
- 
+
   Copyright (C) 2020 The Yambo Team
- 
+
   Authors (see AUTHORS file for details): AM
 
 */
 
+#pragma once
+
 typedef struct tool_struct
 {
- char *editor;
- char *tool;
- char *bin;
- char *desc;
- char version_string[500];
- char hash[500];
- char *pj;
- int  version;
- int  subversion;
- int  patchlevel;
- int  revision;
+    char *editor;
+    char *tool;
+    char *bin;
+    char *desc;
+    char version_string[500];
+    char lumenver[500];
+    char hash[500];
+    char *pj;
+    int version;
+    int subversion;
+    int patchlevel;
+    int revision;
 } tool_struct;
 
 typedef struct yambo_seed_struct
 {
- char string[500];
- char *in_file;
- char *in_dir;
- char *out_dir;
- char *com_dir;
- char *job;
- char *parenv_file;
- int  string_N;
- int  in_file_N;
- int  in_dir_N;
- int  out_dir_N;
- int  com_dir_N;
- int  job_N;
+    char string[500];
+    char *in_file;
+    char *in_dir;
+    char *out_dir;
+    char *com_dir;
+    char *job;
+    char *parenv_file;
+    int string_N;
+    int in_file_N;
+    int in_dir_N;
+    int out_dir_N;
+    int com_dir_N;
+    int job_N;
 } yambo_seed_struct;
 
 typedef struct options_struct
 {
- int  short_opt;
- char *long_opt;
- char *short_desc;
- char long_desc[20][128];
- char *bin;
- char *no_bin;
- char *yambo_string;
- char *section;
- int   int_var;
- int   float_var;
- int   char_var;
- int   optional_var;
- int   serial_var;
+    int short_opt;
+    char *long_opt;
+    char *short_desc;
+    char long_desc[20][128];
+    char *bin;
+    char *no_bin;
+    char *yambo_string;
+    char *section;
+    int int_var;
+    int float_var;
+    int char_var;
+    int optional_var;
+    int serial_var;
 } options_struct;
-
diff --git a/include/driver/tool.h b/include/driver/tool.h
deleted file mode 100644
index d6438e810d..0000000000
--- a/include/driver/tool.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
-  License-Identifier: GPL
- 
-  Copyright (C) 2019 The Yambo Team
- 
-  Authors (see AUTHORS file for details): DS
-
-  tool & desc
-
-*/
-#if defined _yambo 
- char *tool="yambo";
- char *tool_desc="A shiny pot of fun and happiness [C.D.Hogan]";
-#endif
-#if defined _ypp
- char *tool="ypp";
- char *tool_desc="Y(ambo) P(ost)/(re) P(rocessor)";
-#endif
-#if defined _a2y
- char *tool="a2y";
- char *tool_desc="A(binit) 2 Y(ambo) interface";
-#endif
-#if defined _c2y
- char *tool="c2y";
- char *tool_desc="C(pmd) 2 Y(ambo) interface";
-#endif
-#if defined _p2y
- char *tool="p2y";
- char *tool_desc="P(Wscf) 2 Y(ambo) interface";
-#endif
-#if defined _e2y
- char *tool="e2y";
- char *tool_desc="E(TSF) 2 Y(ambo) interface (0.6)";
-#endif
diff --git a/include/driver/wrapper.h b/include/driver/wrapper.h
index 596f7aff4a..00e2c4d00f 100644
--- a/include/driver/wrapper.h
+++ b/include/driver/wrapper.h
@@ -1,23 +1,25 @@
 /*
   License-Identifier: GPL
- 
+
   Copyright (C) 2020 The Yambo Team
- 
+
   Authors (see AUTHORS file for details): DS
 */
 /*
  C wrapper
-*/ 
+*/
+#pragma once
+
 #if defined _C_US
- #define C_FUNC(name,NAME) name ## _
+#define C_FUNC(name, NAME) name##_
 #else
- #define C_FUNC(name,NAME) name
+#define C_FUNC(name, NAME) name
 #endif
 /*
  F90 wrapper
 */
 #if defined _FORTRAN_US
- #define F90_FUNC(name) name ## _
+#define F90_FUNC(name) name##_
 #else
- #define F90_FUNC(name) name
+#define F90_FUNC(name) name
 #endif
diff --git a/include/driver/yambo_driver.h b/include/driver/yambo_driver.h
index e6f33c8361..9f5d970ae4 100644
--- a/include/driver/yambo_driver.h
+++ b/include/driver/yambo_driver.h
@@ -1,13 +1,23 @@
 /*
   License-Identifier: GPL
- 
+
   Copyright (C) 2019 The Yambo Team
- 
+
   Authors (see AUTHORS file for details): AM
 */
+#pragma once
+
+#include "kind.h"
+
 int load_environments(char *file_name);
-void options_control(struct options_struct options[],int *i_opt);
-void options_yambo(struct options_struct options[],int *i_opt);
-void options_projects(struct options_struct options[],int *i_opt);
-void options_interfaces(struct options_struct options[],int *i_opt);
-void options_ypp(struct options_struct options[],int *i_opt);
+
+typedef void OptionsFn(struct options_struct *options, int *i_opt);
+
+// Declare all option handler functions
+extern OptionsFn options_control;
+extern OptionsFn options_yambo;
+extern OptionsFn options_projects;
+extern OptionsFn options_interfaces;
+extern OptionsFn options_ypp;
+extern OptionsFn options_help;
+
diff --git a/include/headers/common/dev_defs.h b/include/headers/common/dev_defs.h
index 6189f408f1..49ff306286 100644
--- a/include/headers/common/dev_defs.h
+++ b/include/headers/common/dev_defs.h
@@ -44,7 +44,7 @@
 
 #if defined _OPENACC
 #  define DEV_ACC $acc
-#  define DEV_ACC_DEBUG !!!!
+#  define DEV_ACC_DEBUG $acc
 #else
 #  define DEV_ACC !!!!
 #  define DEV_ACC_DEBUG !!!!
diff --git a/include/headers/common/parallel_memory.h b/include/headers/common/parallel_memory.h
index da6c88b966..14cd22a7c6 100644
--- a/include/headers/common/parallel_memory.h
+++ b/include/headers/common/parallel_memory.h
@@ -7,22 +7,17 @@
  
 */
 
- use y_memory,       ONLY:MEM_global_mesg
- use parallel_m,   ONLY:PAR_COM_HOST
- use parallel_int, ONLY:PP_redux_wait,PP_wait
-
-#include<y_memory.h>
-
- integer(IPL) :: HOST_SIZE(1),LOCAL_SIZE(1)
-
 #if defined _MPI
 
  /* With MPI */ 
 
 #define CHECK_ALLOC_A(SIZE) \
-  LOCAL_SIZE=SIZE NEWLINE \
-  HOST_SIZE=SIZE NEWLINE \
+  HOST_SIZE=0 NEWLINE \
+  HOST_SIZE(1)=product(SIZE) NEWLINE \
   call PP_redux_wait(HOST_SIZE,COMM=PAR_COM_HOST%COMM) NEWLINE \
+  do isize_mem=2,size(HOST_size) NEWLINE \
+    HOST_size(isize_mem)=1 NEWLINE \
+  enddo NEWLINE \
   if (PAR_COM_HOST%CPU_id==0 ) then
 
 #define CHECK_ALLOC_B(x,HOST_SIZE) \
@@ -81,24 +76,40 @@
 
 #endif
 
+#define YAMBO_ALLOC1(x,SIZE) \
+    YAMBO_ALLOC(x,(SIZE(1)))
+#define YAMBO_ALLOC2(x,SIZE) \
+    YAMBO_ALLOC(x,(SIZE(1),SIZE(2)))
+#define YAMBO_ALLOC3(x,SIZE) \
+    YAMBO_ALLOC(x,(SIZE(1),SIZE(2),SIZE(3)))
+#define YAMBO_ALLOC4(x,SIZE) \
+    YAMBO_ALLOC(x,(SIZE(1),SIZE(2),SIZE(3),SIZE(4)))
+#define YAMBO_ALLOC5(x,SIZE) \
+    YAMBO_ALLOC(x,(SIZE(1),SIZE(2),SIZE(3),SIZE(4),SIZE(5)))
+#define YAMBO_ALLOC6(x,SIZE) \
+    YAMBO_ALLOC(x,(SIZE(1),SIZE(2),SIZE(3),SIZE(4),SIZE(5),SIZE(6)))
+
 #define YAMBO_PAR_ALLOC1(x,SIZE) \
   YAMBO_PAR_ALLOC_CHECK1(x,SIZE) NEWLINE \
-  YAMBO_ALLOC1(x,LOCAL_SIZE)
+  YAMBO_ALLOC1(x,SIZE)
 #define YAMBO_PAR_ALLOC2(x,SIZE) \
   YAMBO_PAR_ALLOC_CHECK2(x,SIZE) NEWLINE \
-  YAMBO_ALLOC2(x,LOCAL_SIZE)
+  YAMBO_ALLOC2(x,SIZE)
 #define YAMBO_PAR_ALLOC3(x,SIZE) \
   YAMBO_PAR_ALLOC_CHECK3(x,SIZE) NEWLINE \
-  YAMBO_ALLOC3(x,LOCAL_SIZE)
+  YAMBO_ALLOC3(x,SIZE)
 #define YAMBO_PAR_ALLOC4(x,SIZE) \
   YAMBO_PAR_ALLOC_CHECK4(x,SIZE) NEWLINE \
-  YAMBO_ALLOC4(x,LOCAL_SIZE)
+  YAMBO_ALLOC4(x,SIZE)
 #define YAMBO_PAR_ALLOC5(x,SIZE) \
   YAMBO_PAR_ALLOC_CHECK5(x,SIZE) NEWLINE \
-  YAMBO_ALLOC5(x,LOCAL_SIZE)
+  YAMBO_ALLOC5(x,SIZE)
 #define YAMBO_PAR_ALLOC6(x,SIZE) \
   YAMBO_PAR_ALLOC_CHECK6(x,SIZE) NEWLINE \
-  YAMBO_ALLOC6(x,LOCAL_SIZE)
+  YAMBO_ALLOC6(x,SIZE)
+
+#define SIMPLE_ALLOC(x,SIZE) \
+  allocate(x SIZE, &NEWLINE& stat=MEM_err,errmsg=MEM_msg)
 
 #define SIMPLE_ALLOC1(x,SIZE) \
     SIMPLE_ALLOC(x,(SIZE(1)))
diff --git a/include/headers/common/y_memory.h b/include/headers/common/y_memory.h
index ebda759bed..80a8a77d62 100644
--- a/include/headers/common/y_memory.h
+++ b/include/headers/common/y_memory.h
@@ -3,35 +3,10 @@
  
   Copyright (C) 2016 The Yambo Team
  
-  Authors (see AUTHORS file for details): HM AM
+  Authors (see AUTHORS file for details): HM AM DS
  
 */
 
- use pars,         ONLY:IPL
- use y_memory,     ONLY:MEM_err,MEM_msg,MEM_count,MEM_global_mesg
-#if defined _OPENACC || defined _OPENMP_GPU
- use y_memory,     ONLY:MEM_count_d
- use devxlib,      ONLY:devxlib_map,devxlib_unmap,devxlib_mapped,devxlib_memcpy_h2d
-#endif
-
- implicit none
-
-#define YAMBO_ALLOC1(x,SIZE) \
-    YAMBO_ALLOC(x,(SIZE(1)))
-#define YAMBO_ALLOC2(x,SIZE) \
-    YAMBO_ALLOC(x,(SIZE(1),SIZE(2)))
-#define YAMBO_ALLOC3(x,SIZE) \
-    YAMBO_ALLOC(x,(SIZE(1),SIZE(2),SIZE(3)))
-#define YAMBO_ALLOC4(x,SIZE) \
-    YAMBO_ALLOC(x,(SIZE(1),SIZE(2),SIZE(3),SIZE(4)))
-#define YAMBO_ALLOC5(x,SIZE) \
-    YAMBO_ALLOC(x,(SIZE(1),SIZE(2),SIZE(3),SIZE(4),SIZE(5)))
-#define YAMBO_ALLOC6(x,SIZE) \
-    YAMBO_ALLOC(x,(SIZE(1),SIZE(2),SIZE(3),SIZE(4),SIZE(5),SIZE(6)))
-
-#define SIMPLE_ALLOC(x,SIZE) \
-  allocate(x SIZE, &NEWLINE& stat=MEM_err,errmsg=MEM_msg)
-
 #define YAMBO_ALLOC_P(x,SIZE) \
   allocate(x SIZE, &NEWLINE& stat=MEM_err,errmsg=MEM_msg)NEWLINE \
   if (     associated(x)) &NEWLINE& call MEM_count(QUOTES x QUOTES,x)NEWLINE \
diff --git a/include/headers/common/yambo_wfs.h b/include/headers/common/yambo_wfs.h
new file mode 100644
index 0000000000..d7fe0ef20d
--- /dev/null
+++ b/include/headers/common/yambo_wfs.h
@@ -0,0 +1,48 @@
+/*
+        Copyright (C) 2000-2020 the YAMBO team
+              http://www.yambo-code.org
+
+ Authors (see AUTHORS file for details): DS
+ 
+ This file is distributed under the terms of the GNU 
+ General Public License. You can redistribute it and/or 
+ modify it under the terms of the GNU General Public 
+ License as published by the Free Software Foundation; 
+ either version 2, or (at your option) any later version.
+
+ This program is distributed in the hope that it will 
+ be useful, but WITHOUT ANY WARRANTY; without even the 
+ implied warranty of MERCHANTABILITY or FITNESS FOR A 
+ PARTICULAR PURPOSE.  See the GNU General Public License 
+ for more details.
+
+ You should have received a copy of the GNU General Public 
+ License along with this program; if not, write to the Free 
+ Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, 
+ MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+*/
+
+#ifdef _GAMMA_ONLY
+#  define WF_RSPACE   real
+#else
+#  define WF_RSPACE   complex
+#endif
+
+
+#ifdef _GAMMA_ONLY
+#  define wfconjg(x)   x
+#else
+#  define wfconjg(x)   conjg(x)
+#endif
+
+#ifdef _GAMMA_ONLY
+#  define wfaimag(x)   x
+#else
+#  define wfaimag(x)   aimag(x)
+#endif
+
+#ifdef _GAMMA_ONLY
+#  define wfcmplx(x,y)  real(x,y)
+#else
+#  define wfcmplx(x,y)  cmplx(x,y)
+#endif
diff --git a/include/headers/parser/gsl_complex.h b/include/headers/parser/gsl_complex.h
index f9fa463b1c..269d40e600 100644
--- a/include/headers/parser/gsl_complex.h
+++ b/include/headers/parser/gsl_complex.h
@@ -1,17 +1,17 @@
 /* complex/gsl_complex.h
- * 
+ *
  * Copyright (C) 1996, 1997, 1998, 1999, 2000 Gerard Jungman, Brian Gough
- * 
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or (at
  * your option) any later version.
- * 
+ *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
@@ -23,80 +23,94 @@
 #undef __BEGIN_DECLS
 #undef __END_DECLS
 #ifdef __cplusplus
-# define __BEGIN_DECLS extern "C" {
-# define __END_DECLS }
+#define __BEGIN_DECLS \
+    extern "C"        \
+    {
+#define __END_DECLS }
 #else
-# define __BEGIN_DECLS /* empty */
-# define __END_DECLS /* empty */
+#define __BEGIN_DECLS /* empty */
+#define __END_DECLS   /* empty */
 #endif
 
 __BEGIN_DECLS
 
-
 /* two consecutive built-in types as a complex number */
-typedef double *       gsl_complex_packed ;
-typedef float *        gsl_complex_packed_float  ;
-typedef long double *  gsl_complex_packed_long_double ;
-
-typedef const double *       gsl_const_complex_packed ;
-typedef const float *        gsl_const_complex_packed_float  ;
-typedef const long double *  gsl_const_complex_packed_long_double ;
+typedef double* gsl_complex_packed;
+typedef float* gsl_complex_packed_float;
+typedef long double* gsl_complex_packed_long_double;
 
+typedef const double* gsl_const_complex_packed;
+typedef const float* gsl_const_complex_packed_float;
+typedef const long double* gsl_const_complex_packed_long_double;
 
 /* 2N consecutive built-in types as N complex numbers */
-typedef double *       gsl_complex_packed_array ;
-typedef float *        gsl_complex_packed_array_float  ;
-typedef long double *  gsl_complex_packed_array_long_double ;
-
-typedef const double *       gsl_const_complex_packed_array ;
-typedef const float *        gsl_const_complex_packed_array_float  ;
-typedef const long double *  gsl_const_complex_packed_array_long_double ;
+typedef double* gsl_complex_packed_array;
+typedef float* gsl_complex_packed_array_float;
+typedef long double* gsl_complex_packed_array_long_double;
 
+typedef const double* gsl_const_complex_packed_array;
+typedef const float* gsl_const_complex_packed_array_float;
+typedef const long double* gsl_const_complex_packed_array_long_double;
 
 /* Yes... this seems weird. Trust us. The point is just that
    sometimes you want to make it obvious that something is
    an output value. The fact that it lacks a 'const' may not
    be enough of a clue for people in some contexts.
  */
-typedef double *       gsl_complex_packed_ptr ;
-typedef float *        gsl_complex_packed_float_ptr  ;
-typedef long double *  gsl_complex_packed_long_double_ptr ;
-
-typedef const double *       gsl_const_complex_packed_ptr ;
-typedef const float *        gsl_const_complex_packed_float_ptr  ;
-typedef const long double *  gsl_const_complex_packed_long_double_ptr ;
+typedef double* gsl_complex_packed_ptr;
+typedef float* gsl_complex_packed_float_ptr;
+typedef long double* gsl_complex_packed_long_double_ptr;
 
+typedef const double* gsl_const_complex_packed_ptr;
+typedef const float* gsl_const_complex_packed_float_ptr;
+typedef const long double* gsl_const_complex_packed_long_double_ptr;
 
 typedef struct
-  {
+{
     long double dat[2];
-  }
-gsl_complex_long_double;
+} gsl_complex_long_double;
 
 typedef struct
-  {
+{
     double dat[2];
-  }
-gsl_complex;
+} gsl_complex;
 
 typedef struct
-  {
+{
     float dat[2];
-  }
-gsl_complex_float;
+} gsl_complex_float;
 
-#define GSL_REAL(z)     ((z).dat[0])
-#define GSL_IMAG(z)     ((z).dat[1])
+#define GSL_REAL(z) ((z).dat[0])
+#define GSL_IMAG(z) ((z).dat[1])
 #define GSL_COMPLEX_P(zp) ((zp)->dat)
-#define GSL_COMPLEX_P_REAL(zp)  ((zp)->dat[0])
-#define GSL_COMPLEX_P_IMAG(zp)  ((zp)->dat[1])
-#define GSL_COMPLEX_EQ(z1,z2) (((z1).dat[0] == (z2).dat[0]) && ((z1).dat[1] == (z2).dat[1]))
-
-#define GSL_SET_COMPLEX(zp,x,y) do {(zp)->dat[0]=(x); (zp)->dat[1]=(y);} while(0)
-#define GSL_SET_REAL(zp,x) do {(zp)->dat[0]=(x);} while(0)
-#define GSL_SET_IMAG(zp,y) do {(zp)->dat[1]=(y);} while(0)
-
-#define GSL_SET_COMPLEX_PACKED(zp,n,x,y) do {*((zp)+2*(n))=(x); *((zp)+(2*(n)+1))=(y);} while(0)
+#define GSL_COMPLEX_P_REAL(zp) ((zp)->dat[0])
+#define GSL_COMPLEX_P_IMAG(zp) ((zp)->dat[1])
+#define GSL_COMPLEX_EQ(z1, z2) \
+    (((z1).dat[0] == (z2).dat[0]) && ((z1).dat[1] == (z2).dat[1]))
+
+#define GSL_SET_COMPLEX(zp, x, y) \
+    do                            \
+    {                             \
+        (zp)->dat[0] = (x);       \
+        (zp)->dat[1] = (y);       \
+    } while (0)
+#define GSL_SET_REAL(zp, x) \
+    do                      \
+    {                       \
+        (zp)->dat[0] = (x); \
+    } while (0)
+#define GSL_SET_IMAG(zp, y) \
+    do                      \
+    {                       \
+        (zp)->dat[1] = (y); \
+    } while (0)
+
+#define GSL_SET_COMPLEX_PACKED(zp, n, x, y) \
+    do                                      \
+    {                                       \
+        *((zp) + 2 * (n)) = (x);            \
+        *((zp) + (2 * (n) + 1)) = (y);      \
+    } while (0)
 
 __END_DECLS
 
diff --git a/include/headers/parser/gsl_complex_math.h b/include/headers/parser/gsl_complex_math.h
index 77e25d7308..5521eb1185 100644
--- a/include/headers/parser/gsl_complex_math.h
+++ b/include/headers/parser/gsl_complex_math.h
@@ -1,17 +1,17 @@
 /* complex/gsl_complex_math.h
- * 
+ *
  * Copyright (C) 1996, 1997, 1998, 1999, 2000 Jorma Olavi T�htinen, Brian Gough
- * 
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or (at
  * your option) any later version.
- * 
+ *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
@@ -24,116 +24,118 @@
 #undef __BEGIN_DECLS
 #undef __END_DECLS
 #ifdef __cplusplus
-#define __BEGIN_DECLS extern "C" {
+#define __BEGIN_DECLS \
+    extern "C"        \
+    {
 #define __END_DECLS }
 #else
-#define __BEGIN_DECLS           /* empty */
-#define __END_DECLS             /* empty */
+#define __BEGIN_DECLS /* empty */
+#define __END_DECLS   /* empty */
 #endif
 
 __BEGIN_DECLS
 
 /* Complex numbers */
 
-gsl_complex gsl_complex_rect (double x, double y);  /* r= real+i*imag */
-gsl_complex gsl_complex_polar (double r, double theta); /* r= r e^(i theta) */
+gsl_complex gsl_complex_rect(double x, double y);      /* r= real+i*imag */
+gsl_complex gsl_complex_polar(double r, double theta); /* r= r e^(i theta) */
 
 #ifdef HAVE_INLINE
-extern inline gsl_complex
-gsl_complex_rect (double x, double y)
-{				/* return z = x + i y */
-  gsl_complex z;
-  GSL_SET_COMPLEX (&z, x, y);
-  return z;
+extern inline gsl_complex gsl_complex_rect(double x, double y)
+{ /* return z = x + i y */
+    gsl_complex z;
+    GSL_SET_COMPLEX(&z, x, y);
+    return z;
 }
 #endif
 
-#define GSL_COMPLEX_ONE (gsl_complex_rect(1.0,0.0))
-#define GSL_COMPLEX_ZERO (gsl_complex_rect(0.0,0.0))
-#define GSL_COMPLEX_NEGONE (gsl_complex_rect(-1.0,0.0))
+#define GSL_COMPLEX_ONE (gsl_complex_rect(1.0, 0.0))
+#define GSL_COMPLEX_ZERO (gsl_complex_rect(0.0, 0.0))
+#define GSL_COMPLEX_NEGONE (gsl_complex_rect(-1.0, 0.0))
 
 /* Properties of complex numbers */
 
-double gsl_complex_arg (gsl_complex z); /* return arg(z), -pi< arg(z) <=+pi */
-double gsl_complex_abs (gsl_complex z);   /* return |z|   */
-double gsl_complex_abs2 (gsl_complex z);  /* return |z|^2 */
-double gsl_complex_logabs (gsl_complex z); /* return log|z| */
+double gsl_complex_arg(gsl_complex z);    /* return arg(z), -pi< arg(z) <=+pi */
+double gsl_complex_abs(gsl_complex z);    /* return |z|   */
+double gsl_complex_abs2(gsl_complex z);   /* return |z|^2 */
+double gsl_complex_logabs(gsl_complex z); /* return log|z| */
 
 /* Complex arithmetic operators */
 
-gsl_complex gsl_complex_add (gsl_complex a, gsl_complex b);  /* r=a+b */
-gsl_complex gsl_complex_sub (gsl_complex a, gsl_complex b);  /* r=a-b */
-gsl_complex gsl_complex_mul (gsl_complex a, gsl_complex b);  /* r=a*b */
-gsl_complex gsl_complex_div (gsl_complex a, gsl_complex b);  /* r=a/b */
-                                                           
-gsl_complex gsl_complex_add_real (gsl_complex a, double x);  /* r=a+x */
-gsl_complex gsl_complex_sub_real (gsl_complex a, double x);  /* r=a-x */
-gsl_complex gsl_complex_mul_real (gsl_complex a, double x);  /* r=a*x */
-gsl_complex gsl_complex_div_real (gsl_complex a, double x);  /* r=a/x */
-
-gsl_complex gsl_complex_add_imag (gsl_complex a, double y);  /* r=a+iy */
-gsl_complex gsl_complex_sub_imag (gsl_complex a, double y);  /* r=a-iy */
-gsl_complex gsl_complex_mul_imag (gsl_complex a, double y);  /* r=a*iy */
-gsl_complex gsl_complex_div_imag (gsl_complex a, double y);  /* r=a/iy */
-
-gsl_complex gsl_complex_conjugate (gsl_complex z);  /* r=conj(z) */
-gsl_complex gsl_complex_inverse (gsl_complex a);    /* r=1/a */
-gsl_complex gsl_complex_negative (gsl_complex a);    /* r=-a */
+gsl_complex gsl_complex_add(gsl_complex a, gsl_complex b); /* r=a+b */
+gsl_complex gsl_complex_sub(gsl_complex a, gsl_complex b); /* r=a-b */
+gsl_complex gsl_complex_mul(gsl_complex a, gsl_complex b); /* r=a*b */
+gsl_complex gsl_complex_div(gsl_complex a, gsl_complex b); /* r=a/b */
+
+gsl_complex gsl_complex_add_real(gsl_complex a, double x); /* r=a+x */
+gsl_complex gsl_complex_sub_real(gsl_complex a, double x); /* r=a-x */
+gsl_complex gsl_complex_mul_real(gsl_complex a, double x); /* r=a*x */
+gsl_complex gsl_complex_div_real(gsl_complex a, double x); /* r=a/x */
+
+gsl_complex gsl_complex_add_imag(gsl_complex a, double y); /* r=a+iy */
+gsl_complex gsl_complex_sub_imag(gsl_complex a, double y); /* r=a-iy */
+gsl_complex gsl_complex_mul_imag(gsl_complex a, double y); /* r=a*iy */
+gsl_complex gsl_complex_div_imag(gsl_complex a, double y); /* r=a/iy */
+
+gsl_complex gsl_complex_conjugate(gsl_complex z); /* r=conj(z) */
+gsl_complex gsl_complex_inverse(gsl_complex a);   /* r=1/a */
+gsl_complex gsl_complex_negative(gsl_complex a);  /* r=-a */
 
 /* Elementary Complex Functions */
 
-gsl_complex gsl_complex_sqrt (gsl_complex z);  /* r=sqrt(z) */
-gsl_complex gsl_complex_sqrt_real (double x);  /* r=sqrt(x) (x<0 ok) */
+gsl_complex gsl_complex_sqrt(gsl_complex z); /* r=sqrt(z) */
+gsl_complex gsl_complex_sqrt_real(double x); /* r=sqrt(x) (x<0 ok) */
 
-gsl_complex gsl_complex_pow (gsl_complex a, gsl_complex b);  /* r=a^b */
-gsl_complex gsl_complex_pow_real (gsl_complex a, double b);  /* r=a^b */
+gsl_complex gsl_complex_pow(gsl_complex a, gsl_complex b); /* r=a^b */
+gsl_complex gsl_complex_pow_real(gsl_complex a, double b); /* r=a^b */
 
-gsl_complex gsl_complex_exp (gsl_complex a);    /* r=exp(a) */
-gsl_complex gsl_complex_log (gsl_complex a);    /* r=log(a) (base e) */
-gsl_complex gsl_complex_log10 (gsl_complex a);  /* r=log10(a) (base 10) */
-gsl_complex gsl_complex_log_b (gsl_complex a, gsl_complex b);   /* r=log_b(a) (base=b) */
+gsl_complex gsl_complex_exp(gsl_complex a);   /* r=exp(a) */
+gsl_complex gsl_complex_log(gsl_complex a);   /* r=log(a) (base e) */
+gsl_complex gsl_complex_log10(gsl_complex a); /* r=log10(a) (base 10) */
+gsl_complex gsl_complex_log_b(gsl_complex a,
+                              gsl_complex b); /* r=log_b(a) (base=b) */
 
 /* Complex Trigonometric Functions */
 
-gsl_complex gsl_complex_sin (gsl_complex a);  /* r=sin(a) */
-gsl_complex gsl_complex_cos (gsl_complex a);  /* r=cos(a) */
-gsl_complex gsl_complex_sec (gsl_complex a);  /* r=sec(a) */
-gsl_complex gsl_complex_csc (gsl_complex a);  /* r=csc(a) */
-gsl_complex gsl_complex_tan (gsl_complex a);  /* r=tan(a) */
-gsl_complex gsl_complex_cot (gsl_complex a);  /* r=cot(a) */
+gsl_complex gsl_complex_sin(gsl_complex a); /* r=sin(a) */
+gsl_complex gsl_complex_cos(gsl_complex a); /* r=cos(a) */
+gsl_complex gsl_complex_sec(gsl_complex a); /* r=sec(a) */
+gsl_complex gsl_complex_csc(gsl_complex a); /* r=csc(a) */
+gsl_complex gsl_complex_tan(gsl_complex a); /* r=tan(a) */
+gsl_complex gsl_complex_cot(gsl_complex a); /* r=cot(a) */
 
 /* Inverse Complex Trigonometric Functions */
 
-gsl_complex gsl_complex_arcsin (gsl_complex a);  /* r=arcsin(a) */
-gsl_complex gsl_complex_arcsin_real (double a);  /* r=arcsin(a) */
-gsl_complex gsl_complex_arccos (gsl_complex a);  /* r=arccos(a) */
-gsl_complex gsl_complex_arccos_real (double a);  /* r=arccos(a) */
-gsl_complex gsl_complex_arcsec (gsl_complex a);  /* r=arcsec(a) */
-gsl_complex gsl_complex_arcsec_real (double a);  /* r=arcsec(a) */
-gsl_complex gsl_complex_arccsc (gsl_complex a);  /* r=arccsc(a) */
-gsl_complex gsl_complex_arccsc_real (double a);  /* r=arccsc(a) */
-gsl_complex gsl_complex_arctan (gsl_complex a);  /* r=arctan(a) */
-gsl_complex gsl_complex_arccot (gsl_complex a);  /* r=arccot(a) */
+gsl_complex gsl_complex_arcsin(gsl_complex a); /* r=arcsin(a) */
+gsl_complex gsl_complex_arcsin_real(double a); /* r=arcsin(a) */
+gsl_complex gsl_complex_arccos(gsl_complex a); /* r=arccos(a) */
+gsl_complex gsl_complex_arccos_real(double a); /* r=arccos(a) */
+gsl_complex gsl_complex_arcsec(gsl_complex a); /* r=arcsec(a) */
+gsl_complex gsl_complex_arcsec_real(double a); /* r=arcsec(a) */
+gsl_complex gsl_complex_arccsc(gsl_complex a); /* r=arccsc(a) */
+gsl_complex gsl_complex_arccsc_real(double a); /* r=arccsc(a) */
+gsl_complex gsl_complex_arctan(gsl_complex a); /* r=arctan(a) */
+gsl_complex gsl_complex_arccot(gsl_complex a); /* r=arccot(a) */
 
 /* Complex Hyperbolic Functions */
 
-gsl_complex gsl_complex_sinh (gsl_complex a);  /* r=sinh(a) */
-gsl_complex gsl_complex_cosh (gsl_complex a);  /* r=coshh(a) */
-gsl_complex gsl_complex_sech (gsl_complex a);  /* r=sech(a) */
-gsl_complex gsl_complex_csch (gsl_complex a);  /* r=csch(a) */
-gsl_complex gsl_complex_tanh (gsl_complex a);  /* r=tanh(a) */
-gsl_complex gsl_complex_coth (gsl_complex a);  /* r=coth(a) */
+gsl_complex gsl_complex_sinh(gsl_complex a); /* r=sinh(a) */
+gsl_complex gsl_complex_cosh(gsl_complex a); /* r=coshh(a) */
+gsl_complex gsl_complex_sech(gsl_complex a); /* r=sech(a) */
+gsl_complex gsl_complex_csch(gsl_complex a); /* r=csch(a) */
+gsl_complex gsl_complex_tanh(gsl_complex a); /* r=tanh(a) */
+gsl_complex gsl_complex_coth(gsl_complex a); /* r=coth(a) */
 
 /* Inverse Complex Hyperbolic Functions */
 
-gsl_complex gsl_complex_arcsinh (gsl_complex a);  /* r=arcsinh(a) */
-gsl_complex gsl_complex_arccosh (gsl_complex a);  /* r=arccosh(a) */
-gsl_complex gsl_complex_arccosh_real (double a);  /* r=arccosh(a) */
-gsl_complex gsl_complex_arcsech (gsl_complex a);  /* r=arcsech(a) */
-gsl_complex gsl_complex_arccsch (gsl_complex a);  /* r=arccsch(a) */
-gsl_complex gsl_complex_arctanh (gsl_complex a);  /* r=arctanh(a) */
-gsl_complex gsl_complex_arctanh_real (double a);  /* r=arctanh(a) */
-gsl_complex gsl_complex_arccoth (gsl_complex a);  /* r=arccoth(a) */
+gsl_complex gsl_complex_arcsinh(gsl_complex a); /* r=arcsinh(a) */
+gsl_complex gsl_complex_arccosh(gsl_complex a); /* r=arccosh(a) */
+gsl_complex gsl_complex_arccosh_real(double a); /* r=arccosh(a) */
+gsl_complex gsl_complex_arcsech(gsl_complex a); /* r=arcsech(a) */
+gsl_complex gsl_complex_arccsch(gsl_complex a); /* r=arccsch(a) */
+gsl_complex gsl_complex_arctanh(gsl_complex a); /* r=arctanh(a) */
+gsl_complex gsl_complex_arctanh_real(double a); /* r=arctanh(a) */
+gsl_complex gsl_complex_arccoth(gsl_complex a); /* r=arccoth(a) */
 
 __END_DECLS
 
diff --git a/include/headers/parser/parser.h b/include/headers/parser/parser.h
index 9c23ea93ea..532782962e 100644
--- a/include/headers/parser/parser.h
+++ b/include/headers/parser/parser.h
@@ -46,12 +46,18 @@ int parse_block_complex(char *name, int l, int col, gsl_complex *r);
 int parse_block_string(char *name, int l, int col, char **r);
 
 /* from parse_exp.c */
-typedef struct parse_result{
-  union {
-	  gsl_complex c;
-		char *s;
-	} value;
-	enum {PR_CMPLX, PR_STR} type;
+typedef struct parse_result
+{
+    union
+    {
+        gsl_complex c;
+        char *s;
+    } value;
+    enum
+    {
+        PR_CMPLX,
+        PR_STR
+    } type;
 } parse_result;
 
 int parse_exp(char *exp, parse_result *t);
diff --git a/include/headers/parser/symbols.h b/include/headers/parser/symbols.h
index ccc5cd0d89..78883fc518 100644
--- a/include/headers/parser/symbols.h
+++ b/include/headers/parser/symbols.h
@@ -22,41 +22,49 @@
 
 #include <gsl_complex.h>
 
-typedef struct sym_block_line{
-	int n;
-	char **fields;
-}sym_block_line;
+typedef struct sym_block_line
+{
+    int n;
+    char **fields;
+} sym_block_line;
 
-typedef struct sym_block{
-	int n;
-	sym_block_line *lines;
-}sym_block;
+typedef struct sym_block
+{
+    int n;
+    sym_block_line *lines;
+} sym_block;
 
-typedef enum{
-	S_CMPLX, S_STR, S_BLOCK, S_FNCT
-}symrec_type;
+typedef enum
+{
+    S_CMPLX,
+    S_STR,
+    S_BLOCK,
+    S_FNCT
+} symrec_type;
 
 /* Data type for links in the chain of symbols. */
-typedef struct symrec{
-	char *name;                  /* name of symbol */
-	symrec_type type;            /* type of symbol: either VAR or FNCT */
+typedef struct symrec
+{
+    char *name;       /* name of symbol */
+    symrec_type type; /* type of symbol: either VAR or FNCT */
 
-	union {
-		gsl_complex c;             /* value of a VAR */
-		char *str;                 /* value of a STRING */
-		sym_block *block;          /* to store blocks */
-		gsl_complex (*fnctptr)(gsl_complex);  /* value of a FNCT */
-	} value;
+    union
+    {
+        gsl_complex c;            /* value of a VAR */
+        char *str;                /* value of a STRING */
+        sym_block *block;         /* to store blocks */
+        gsl_complex (*fnctptr)(gsl_complex);  /* value of a FNCT */
+    } value;
 
-	struct symrec *next;         /* link field */
+    struct symrec *next; /* link field */
 } symrec;
 
 /* The symbol table: a chain of struct symrec. */
 extern symrec *sym_table;
 
-symrec *putsym (char *sym_name, symrec_type sym_type);
-symrec *getsym (char *sym_name);
-int rmsym (char *sym_name);
+symrec *putsym(char *sym_name, symrec_type sym_type);
+symrec *getsym(char *sym_name);
+int rmsym(char *sym_name);
 void sym_init_table();
 void sym_clear_reserved();
 void sym_end_table();
diff --git a/include/version/version.m4 b/include/version/version.m4
index 74319f4890..55a41fef90 100644
--- a/include/version/version.m4
+++ b/include/version/version.m4
@@ -1,9 +1,9 @@
-AC_INIT(Yambo, 5.4.0 r.24143 h.77ed66f02, yambo@yambo-code.org)
+AC_INIT(Lumen, 2.0.0, https://gitlab.com/lumen-code/lumen/-/issues, lumen ,www.lumen-code.org)
 SVERSION="5"
-SSUBVERSION="4"
+SSUBVERSION="3"
 SPATCHLEVEL="0"
-SREVISION="24143"
-SHASH="77ed66f02"
+SREVISION="26345"
+SHASH="5d014f95da"
 AC_SUBST(SVERSION)
 AC_SUBST(SSUBVERSION)
 AC_SUBST(SPATCHLEVEL)
diff --git a/interfaces/a2y/.objects b/interfaces/a2y/.objects
index 532c609d6a..af9bf0d8f1 100644
--- a/interfaces/a2y/.objects
+++ b/interfaces/a2y/.objects
@@ -1 +1 @@
-objs = netcdf_data.o a2y.o a2y_db1.o a2y_wf.o a2y_kb_pp.o
+objs = netcdf_data.o a2y.o a2y_db1.o a2y_wf.o a2y_kb_pp.o a2y_gkkp.o
diff --git a/interfaces/a2y/DOUBLE_project.dep b/interfaces/a2y/DOUBLE_project.dep
index 2672db47c2..e008ad9ccf 100644
--- a/interfaces/a2y/DOUBLE_project.dep
+++ b/interfaces/a2y/DOUBLE_project.dep
@@ -1,5 +1,6 @@
  a2y.o
  a2y_db1.o
+ a2y_gkkp.o
  a2y_kb_pp.o
  a2y_wf.o
  netcdf_data.o
diff --git a/interfaces/a2y/a2y.F b/interfaces/a2y/a2y.F
index ef8b11f5cd..391b39c5fa 100644
--- a/interfaces/a2y/a2y.F
+++ b/interfaces/a2y/a2y.F
@@ -9,21 +9,23 @@ integer function a2y(np,pid,lnstr,iinf,iind,iod,icd,ijs,instr,inf,ind,od,com_dir
  !
  use netcdf
  use netcdf_data
+ use stderr,              ONLY: STRING_remove
  use vec_operate,         ONLY: sort
- use pars,                ONLY: SP,cZERO
+ use pars,                ONLY: SP,lchlen,cZERO
  use LOGO,                ONLY: pickup_a_random
- use com,                 ONLY: msg,core_io_path
+ use com,                 ONLY: msg,core_io_path,jobstr
  use it_m,                ONLY: nrnlvls,rnlvls,rstatus
  use parallel_m,          ONLY: ncpu,myid
  use electrons,           ONLY: levels,E_reset,n_spinor,n_sp_pol
  use D_lattice,           ONLY: n_atoms_species_max
  use pseudo,              ONLY: PP_free,pp_n_l_times_proj_max
- use R_lattice,           ONLY: bz_samp,bz_samp_reset,nkibz
+ use R_lattice,           ONLY: bz_samp,bz_samp_reset,nkibz,nqibz,nqbz
  use wave_func,           ONLY: wf_ncx,io_WF,wf_nb_io_groups,wf_nb_io,wf_igk,wf_nc_k
  use IO_int,              ONLY: io_control,IO_and_Messaging_switch
- use IO_m,                ONLY: OP_WR_CL,NONE,OP_APP_CL,serial_number
+ use IO_m,                ONLY: OP_WR,WR,WR_CL,OP_WR_CL,NONE,OP_APP_CL,serial_number
  use mod_com2y,           ONLY: interface_presets, force_noWFs
  use mod_wf2y,            ONLY: WF_splitter
+ use elph,                ONLY: elph_nQ
  !
  implicit none
  integer,          intent(in) :: lnstr,iind,iod,ijs,np,pid,icd
@@ -38,15 +40,16 @@ integer function a2y(np,pid,lnstr,iinf,iind,iod,icd,ijs,instr,inf,ind,od,com_dir
  type(levels)                 :: en
  type(bz_samp)                :: k
  !
- integer                      :: ID,io_err,i_sp_pol,ik,ifrag,ib_grp
- integer,     external        :: io_DB1, io_KB_abinit
+ integer                      :: ID,io_err,i_sp_pol,ik,iq,ifrag,ib_grp
+ integer,     external        :: io_DB1, io_KB_abinit, io_ELPH
+ character(lchlen)            :: wf_file,gkkp_file
  complex(SP), allocatable     :: wf_disk(:,:,:)
  !
  logical, external            :: file_exists
  ! 
  ! Work Space
  !
- integer                     :: ncid, netcdf_error
+ integer                     :: ncid_wf, ncid_gkkp, grpid_gkkp, netcdf_error
  !
  ! Presets
  !=========
@@ -100,32 +103,34 @@ integer function a2y(np,pid,lnstr,iinf,iind,iod,icd,ijs,instr,inf,ind,od,com_dir
  !
  call msg('s','Checking input file ...')
  !
- if      (index(trim(inf),'KSS')/=0) then
+ wf_file=trim(inf)
+ !
+ if      (index(trim(wf_file),'KSS')/=0) then
    NETCDF_kind='KSS'
- else if (index(trim(inf),'WFK')/=0) then
+ else if (index(trim(wf_file),'WFK')/=0) then
    NETCDF_kind='WFK'
+   gkkp_file=STRING_remove(wf_file,"WFK",replace="GKKP")
  else
    call msg('ln','File kind not supported! Use either KSS or WFK filename.')
  end if
  !
- call msg('s','NETCDF file ',trim(inf))
+ call msg('s','DBs path set to ',trim(core_io_path))
  !
- if(.not.file_exists(trim(inf))) then
-   call msg('ln','File not found! Specify -F filename.')
+ if(.not.file_exists(trim(wf_file))) then
+   call msg('ln','File '//trim(wf_file)//' found! Specify -F filename.')
    return
  endif
  !
- call msg('s','DBs path set to ',trim(core_io_path))
- !
  ! Open ETSF file for reading
  !
- netcdf_error = nf90_open(path = trim(inf), mode = NF90_NOWRITE, ncid = ncid)
- call netcdf_check(ncid,netcdf_error, 0)
+ call msg('s','NETCDF Wave-functions file ',trim(wf_file))
+ netcdf_error = nf90_open(path = trim(wf_file), mode = NF90_NOWRITE, ncid = ncid_wf)
+ call netcdf_check(ncid_wf,netcdf_error, 0)
  !
  ! DB1
  !==========================================
  !
- call a2y_db1(en,k,ncid)
+ call a2y_db1(en,k,ncid_wf)
  !
  if (force_noWFs) then
    !
@@ -136,6 +141,7 @@ integer function a2y(np,pid,lnstr,iinf,iind,iod,icd,ijs,instr,inf,ind,od,com_dir
    io_err=io_DB1(en,k,ID)
    call msg('ln','done ==')
    return
+   !
  endif
  !
  call WF_splitter()
@@ -164,7 +170,7 @@ integer function a2y(np,pid,lnstr,iinf,iind,iod,icd,ijs,instr,inf,ind,od,com_dir
        !
        ! Read from the etsf-nc file
        !
-       call a2y_wf(wf_disk,i_sp_pol,ik,ib_grp,ncid)
+       call a2y_wf(wf_disk,i_sp_pol,ik,ib_grp,ncid_wf)
        ! 
        ! Write to the YAMBO WF database
        ! 
@@ -172,9 +178,9 @@ integer function a2y(np,pid,lnstr,iinf,iind,iod,icd,ijs,instr,inf,ind,od,com_dir
        if (ifrag> 1.or. ib_grp> 1) call io_control(ACTION=OP_APP_CL,COM=NONE,SEC=(/ifrag+1,ib_grp/),ID=ID)
        io_err=io_WF(ID,wf_disk)
        !
-       if(trim(NETCDF_kind)=='KSS') call a2y_kb_pp(ik,ncid) 
+       if(trim(NETCDF_kind)=='KSS') call a2y_kb_pp(ik,ncid_wf) 
        if( pp_n_l_times_proj_max/=-1 ) then
-         if(trim(NETCDF_kind)=='WFK') call a2y_kb_pp_wfk(ik,i_sp_pol,ncid) 
+         if(trim(NETCDF_kind)=='WFK') call a2y_kb_pp_wfk(ik,i_sp_pol,ncid_wf) 
          if (n_atoms_species_max>0.and.ib_grp==1) then
            if (ifrag==1) call io_control(ACTION=OP_WR_CL,COM=NONE,SEC=(/1,2/),ID=ID)
            if (ifrag> 1) call io_control(ACTION=OP_APP_CL,COM=NONE,SEC=(/ifrag+1/),ID=ID)
@@ -204,10 +210,54 @@ integer function a2y(np,pid,lnstr,iinf,iind,iod,icd,ijs,instr,inf,ind,od,com_dir
  call msg('s',' == Writing DB1 ...')
  call io_control(ACTION=OP_WR_CL,COM=NONE,SEC=(/1,2/),ID=ID)
  io_err=io_DB1(en,k,ID)
- call msg('ln','done ==')
+ call msg('l','done ==')
  !
  ! Close file
  !
- netcdf_error = nf90_close(ncid)
+ netcdf_error = nf90_close(ncid_wf)
+ !
+ if(file_exists(trim(gkkp_file))) then
+   !
+   jobstr="GKKP_abinit"
+   !
+   call msg('s','NETCDF Gkkp file ',trim(gkkp_file))
+   netcdf_error = nf90_open(path = trim(gkkp_file), mode = NF90_NOWRITE, ncid = ncid_gkkp)
+   call netcdf_check(ncid_gkkp,netcdf_error, 0)
+   !
+   do i_sp_pol=1,n_sp_pol
+     !
+     call a2y_gkkp_header(i_sp_pol,ncid_gkkp,grpid_gkkp)
+     !
+     call msg('s',' == Writing DB3 (gkkp) ...')
+     !
+     do iq=1,elph_nQ
+       !
+       ! Read from the etsf-nc file
+       !
+       call a2y_gkkp(iq,i_sp_pol,ncid_gkkp,grpid_gkkp)
+       !
+       if(i_sp_pol==1.and.iq==1) then
+         call io_control(ACTION=OP_WR,SEC=(/1/),ID=ID)
+         if(elph_nQ==nqibz) io_err=io_ELPH(ID,'gkkp')
+         if(elph_nQ==nqbz)  io_err=io_ELPH(ID,'gkkp_expanded')
+       endif
+       !
+       ifrag=iq+elph_nQ*(i_sp_pol-1)
+       if(ifrag <elph_nQ*n_sp_pol) call io_control(ACTION=WR,   SEC=(/ifrag+1/),ID=ID)
+       if(ifrag==elph_nQ*n_sp_pol) call io_control(ACTION=WR_CL,SEC=(/ifrag+1/),ID=ID)
+       if(elph_nQ==nqibz) io_err=io_ELPH(ID,'gkkp')
+       if(elph_nQ==nqbz)  io_err=io_ELPH(ID,'gkkp_expanded')
+       !
+     enddo
+     !
+   enddo
+   !
+   netcdf_error = nf90_close(ncid_gkkp)
+   !
+   call msg('l','done ==')
+   !
+ endif
+ !
+ call msg('ln',' ')
  !
 end function
diff --git a/interfaces/a2y/a2y_db1.F b/interfaces/a2y/a2y_db1.F
index 212cf8f274..8b8d523dfe 100644
--- a/interfaces/a2y/a2y_db1.F
+++ b/interfaces/a2y/a2y_db1.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): CH DS HM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine a2y_db1(en,k,ncid)
  !
  use netcdf
@@ -17,8 +21,9 @@ subroutine a2y_db1(en,k,ncid)
  use R_lattice,            only : ng_vec, bz_samp, nkibz
  use D_lattice,            only : nsym, n_atoms, n_atomic_species, atom_mass
  use mod_com2y,            only : print_interface_dimensions,  artificial_spin_pol
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels),   intent(inout) :: en
  type(bz_samp),  intent(inout) :: k
@@ -644,7 +649,9 @@ subroutine import_gwdata_group_kss
     use pseudo,                only : PP_alloc_abinit, pp_n_l_times_proj_max,&
 &                                     pp_n_l_comp, pp_kbs,pp_kb,pp_kbd,&
 &                                     pp_table,l_many_proj,pp_n_l_max
-#include<y_memory.h>
+    use y_memory_alloc
+    !
+    implicit none
     real(SP),allocatable :: pp_kbs_(:,:)
     integer :: max_number_of_angular_momenta,max_number_of_projectors
     integer :: ia, nproj, ip, il, varid, start(3), count(3)
@@ -703,7 +710,9 @@ subroutine import_gwdata_group_wfk
     use pseudo,                only : PP_alloc_abinit, pp_n_l_times_proj_max,&
 &                                     pp_n_l_comp, pp_kbs,pp_kb,pp_kbd,&
 &                                     pp_table,l_many_proj,pp_n_l_max
-#include<y_memory.h>
+    use y_memory_alloc
+    !
+    implicit none
     real(SP),allocatable :: pp_kbs_(:,:)
     integer :: lmnmax
     integer, allocatable :: indlmn(:,:,:)
diff --git a/interfaces/a2y/a2y_gkkp.F b/interfaces/a2y/a2y_gkkp.F
new file mode 100644
index 0000000000..884d4ea8bd
--- /dev/null
+++ b/interfaces/a2y/a2y_gkkp.F
@@ -0,0 +1,335 @@
+!
+!        Copyright (C) 2000-2022 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): DS
+!
+! headers
+!
+#include<y_memory.h>
+! 
+! This file is distributed under the terms of the GNU 
+! General Public License. You can redistribute it and/or 
+! modify it under the terms of the GNU General Public 
+! License as published by the Free Software Foundation; 
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will 
+! be useful, but WITHOUT ANY WARRANTY; without even the 
+! implied warranty of MERCHANTABILITY or FITNESS FOR A 
+! PARTICULAR PURPOSE.  See the GNU General Public License 
+! for more details.
+!
+! You should have received a copy of the GNU General Public 
+! License along with this program; if not, write to the Free 
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, 
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+subroutine a2y_gkkp_header(i_sp_pol,ncid,grpid)
+ !
+ use netcdf
+ use netcdf_data
+ use pars,                  ONLY:DP,SP,cZERO
+ use stderr,                ONLY:intc
+ use com,                   ONLY:msg
+ use ALLOC,                 ONLY:ELPH_alloc
+ use R_lattice,             ONLY:nkibz,nkbz,nqibz,nqbz,k_pt,k_sstar,q_pt,q_sstar
+ use D_lattice,             ONLY:n_atoms
+ use ELPH,                  ONLY:ph_modes,elph_nb,elph_bands,l_GKKP_hosts_bare_dV,l_GKKP_hosts_DW,&
+&                                elph_nQ,elph_nk_bz,elph_grids_are_expanded,elph_use_q_grid,&
+&                                PH_qpt,PH_kpt_bz,GKKP,PH_freqs_sq
+ !
+ use y_memory_alloc
+ !
+ implicit none
+ !
+ integer,     intent(in)  :: i_sp_pol,ncid
+ integer,     intent(out) :: grpid
+ !
+ ! Work Space
+ !
+ integer   :: varid,nk_gkkp,nq_gkkp,netcdf_error
+ !
+ l_GKKP_hosts_bare_dV=.false.
+ l_GKKP_hosts_DW     =.false.
+ elph_use_q_grid     =.true.
+ !
+ ! global dimensions
+ ! TODO: add checks vs WFK file
+ !
+ !n_atoms     = netcdf_get_dimension(ncid, "natom")
+ ph_modes    = netcdf_get_dimension(ncid, "natom3")
+ ! 
+ nkibz = netcdf_get_dimension(ncid, "gstore_nkibz")
+ nkbz  = netcdf_get_dimension(ncid, "gstore_nkbz" )
+ nqibz = netcdf_get_dimension(ncid, "gstore_nqibz")
+ nqbz  = netcdf_get_dimension(ncid, "gstore_nqbz" )
+ !
+ ! dimensions specific of gkk_spin group
+ !
+ netcdf_error = nf90_inq_grp_ncid(ncid, "gqk_spin"//trim(intc(i_sp_pol)), grpid)
+ !
+ elph_nb    = netcdf_get_dimension(grpid, "nb")
+ elph_nk_bz = netcdf_get_dimension(grpid, "glob_nk")
+ elph_nQ    = netcdf_get_dimension(grpid, "glob_nq")
+ !
+ elph_bands=(/1,elph_nb/)
+ !
+ call msg('s','ELPH dims',(/elph_bands,ph_modes,elph_nk_bz,elph_nQ/))
+ !
+ call ELPH_alloc("PHONONS dV LEVELS",GKKP=GKKP)
+ !
+ ! K-points
+ !
+ call msg('s','ELPH K-points mesh (BZ) ...')
+ call import_kpoints_group()
+ call msg('l','done')
+ !
+ ! Q-points
+ !
+ call msg('s','ELPH Q-points mesh (IBZ) ...')
+ call import_qpoints_group()
+ call msg('l','done')
+ !
+ call import_ph_frequencies()
+ !
+ contains
+  !
+  ! This is a duplication, to be fixed
+  !
+  ! K-points (convert to new units)
+  !===========================================================
+  subroutine import_kpoints_group()
+   use pars,                 only : pi
+   use R_lattice,            only : b
+   use D_lattice,            only : alat
+   implicit none
+   integer :: ik
+   real(DP), allocatable :: gstore_kbz(:,:)
+   real(DP), allocatable :: abi_kibz(:,:)
+   integer,  allocatable :: abi_kbz2ibz(:,:)
+   !
+   ! k-points of gstore, for now always in the BZ
+   !==============================================
+   allocate(gstore_kbz(3, elph_nk_bz))
+   !
+   varid = netcdf_inquire_varid(ncid, "gstore_kbz")
+   netcdf_error = nf90_get_var(ncid, varid, gstore_kbz)
+   call netcdf_check(ncid,netcdf_error,varid)
+   !
+   do ik = 1,elph_nk_bz
+     PH_kpt_bz(ik,:)=matmul(transpose(b),gstore_kbz(:,ik))*alat(:)/2.0_SP/pi
+   enddo
+   !
+   deallocate(gstore_kbz)
+   !
+   ! k-points in the IBZ
+   !=====================
+   allocate(abi_kibz(3, nkibz))
+   !
+   varid = netcdf_inquire_varid(ncid, "reduced_coordinates_of_kpoints")
+   netcdf_error = nf90_get_var(ncid, varid, abi_kibz)
+   call netcdf_check(ncid,netcdf_error,varid)
+   !
+   allocate(k_pt(nkibz,3) )
+   do ik = 1,nkibz
+     k_pt(ik,:)=matmul(transpose(b),abi_kibz(:,ik))*alat(:)/2.0_SP/pi
+   enddo
+   !
+   deallocate(abi_kibz)
+   !
+   ! symmetries to move from the BZ to IBZ
+   !=======================================
+   allocate(abi_kbz2ibz(6, nkbz))
+   !
+   ! Here I will have to load the k-points symmetries, etc ...
+   varid = netcdf_inquire_varid(ncid, "gstore_kbz2ibz")
+   netcdf_error = nf90_get_var(ncid, varid, abi_kbz2ibz)
+   call netcdf_check(ncid,netcdf_error,varid)
+   !
+   allocate(k_sstar(nkbz,2) )
+   do ik = 1,nkbz
+     k_sstar(ik,:)=abi_kbz2ibz(1:2,ik)
+   enddo
+   !
+   deallocate(abi_kbz2ibz)
+   !
+  end subroutine import_kpoints_group
+  !
+  ! Q-points (convert to new units)
+  !=================================
+  subroutine import_qpoints_group()
+   use pars,                 only : pi
+   use R_lattice,            only : b
+   use D_lattice,            only : alat
+   implicit none
+   integer :: iq
+   real(DP), allocatable :: gstore_q(:,:)
+   real(DP), allocatable :: abi_qibz(:,:)
+   integer,  allocatable :: abi_qbz2ibz(:,:)
+   !
+   allocate(gstore_q(3, elph_nQ))
+   !
+   if(elph_nQ==nqibz) varid = netcdf_inquire_varid(ncid, "gstore_qibz")
+   if(elph_nQ==nqbz ) varid = netcdf_inquire_varid(ncid, "gstore_qbz")
+   netcdf_error = nf90_get_var(ncid, varid, gstore_q)
+   call netcdf_check(ncid,netcdf_error,varid)
+   !
+   ! warning: here I et a -q
+   call warning(" setting PH_qpt=-qpt_abi")
+   do iq = 1,elph_nQ
+     PH_qpt(iq,:)=-matmul(transpose(b),gstore_q(:,iq))*alat(:)/2.0_SP/pi
+   enddo
+   !
+   deallocate(gstore_q)
+   !
+   ! q-points in the IBZ
+   !=====================
+   allocate(abi_qibz(3, nqibz))
+   !
+   varid = netcdf_inquire_varid(ncid, "reduced_coordinates_of_kpoints")
+   netcdf_error = nf90_get_var(ncid, varid, abi_qibz)
+   call netcdf_check(ncid,netcdf_error,varid)
+   !
+   allocate(q_pt(nqibz,3) )
+   do iq = 1,nqibz
+     q_pt(iq,:)=matmul(transpose(b),abi_qibz(:,iq))*alat(:)/2.0_SP/pi
+   enddo
+   !
+   deallocate(abi_qibz)
+   !
+   ! symmetries to move from the BZ to IBZ
+   !=======================================
+   allocate(abi_qbz2ibz(6, nqbz))
+   !
+   ! Here I will have to load the k-points symmetries, etc ...
+   varid = netcdf_inquire_varid(ncid, "gstore_qbz2ibz")
+   netcdf_error = nf90_get_var(ncid, varid, abi_qbz2ibz)
+   call netcdf_check(ncid,netcdf_error,varid)
+   !
+   allocate(q_sstar(nqbz,2) )
+   do iq = 1,nqbz
+     q_sstar(iq,:)=abi_qbz2ibz(1:2,iq)
+   enddo
+   !
+   deallocate(abi_qbz2ibz)
+   !
+  end subroutine import_qpoints_group
+  !
+  ! PH_frequencies
+  !=================
+  subroutine import_ph_frequencies()
+   use pars,                 only : pi
+   use R_lattice,            only : b
+   use D_lattice,            only : alat
+   implicit none
+   integer :: iq,iqibz
+   real(DP), allocatable :: phfreqs_ibz(:,:)
+   !
+   allocate(phfreqs_ibz(ph_modes,nqibz))
+   !
+   varid = netcdf_inquire_varid(ncid, "phfreqs_ibz")
+   netcdf_error = nf90_get_var(ncid, varid, phfreqs_ibz)
+   call netcdf_check(ncid,netcdf_error,varid)
+   !
+   if (elph_nQ==nqbz) then
+     do iq = 1,nqbz
+       iqibz=q_sstar(iq,1)
+       PH_freqs_sq(iq,:)=real(phfreqs_ibz(:,iqibz),SP)**2
+     enddo
+   else
+     do iqibz = 1,nqibz
+       PH_freqs_sq(iqibz,:)=real(phfreqs_ibz(:,iqibz),SP)**2
+     enddo
+   endif
+   !
+   deallocate(phfreqs_ibz)
+   !
+  end subroutine import_ph_frequencies
+  !
+end subroutine a2y_gkkp_header
+ !
+ !  TO FIX
+ ! call io_bulk(ID_frag,VAR="E_K_PLUS_Q"//trim(intc(iq)),VAR_SZ=(/elph_nb,nkbz,1/) )
+ ! call io_bulk(ID_frag,R3=GKKP%E_kpq(iq)%E)
+ !
+ !
+subroutine a2y_gkkp(iq,i_sp_pol,ncid,grpid)
+ !
+ ! Reads and returns g_kkp matrix elements for each q point
+ !
+ use netcdf
+ use netcdf_data
+ use D_lattice,             ONLY: n_atoms
+ use pars,                  ONLY: DP,SP
+ use elph,                  ONLY: GKKP,elph_nb,ph_modes,elph_nQ,elph_nk_bz,PH_pol_vector
+ !
+ use y_memory_alloc
+ !
+ implicit none
+ !
+ integer,     intent(in)  :: iq,i_sp_pol,ncid,grpid
+ !
+ ! Workspace
+ !
+ integer    :: varid,netcdf_error,ik,i_modes,ipos(6),isize(6),i_atom,xyz,i_n,i_m
+ real(DP), allocatable :: gstore_abinit(:,:,:,:,:,:),pheigvec_cart_ibz(:,:,:,:,:)
+ !
+ ! (natom3, glob_nk, nb, glob_nq, nb, cplex) 
+ !
+ allocate(gstore_abinit(2,elph_nb,elph_nb,ph_modes,elph_nk_bz,1))
+ !AMBO_ALLOC(gstore_abinit,(2,elph_nb,elph_nb,ph_modes,elph_nQ))
+ !
+ ipos =(/1,1,1,1,1,iq/)
+ isize=(/2,elph_nb,elph_nb,ph_modes,elph_nk_bz,1/)
+ !
+ varid = netcdf_inquire_varid(grpid, "gvals")
+ netcdf_error = nf90_get_var(grpid, varid, gstore_abinit, ipos, isize)
+ !
+ !call ELPH_alloc("dV")
+ !
+ ! (/2,ph_modes,elph_nb,elph_nb,nkbz/)
+ !
+ do ik=1,elph_nk_bz
+   do i_modes=1,ph_modes
+     ! Here we should invert the band indexes and do the complex conjugate
+     ! See eq.(5) of the notes of Fulvio
+     ! This operation however is already performed inside the subrtouine
+     ! src/el-ph/ELPH_databases_load.F
+     !
+     ! See comment there, reported also here.
+     ! There has been a key error in the definition of the el-ph SE of the BKE (Eq. 30_9_11.18)
+     ! where the band indexes have been exchanged. Instead of doing a global change of the notes
+     ! and of the code I decided, here, to exchange the indexes:
+     !
+     do i_n=1,elph_nb
+       do i_m=1,elph_nb
+         GKKP%dVc(i_modes,i_m,i_n,ik,1)=cmplx(gstore_abinit(1,i_m,i_n,i_modes,ik,1),&
+         &                                    gstore_abinit(2,i_m,i_n,i_modes,ik,1),kind=SP)
+       enddo
+     enddo
+   enddo
+ enddo
+ !
+ deallocate(gstore_abinit)
+ !
+ ! Polarization vectors
+ !
+ allocate(pheigvec_cart_ibz(2,3,n_atoms,ph_modes,1))
+ !
+ ipos(1:5) =(/1,1,1,1,iq/)
+ isize(1:5)=(/2,3,n_atoms,ph_modes,1/)
+ !
+ varid = netcdf_inquire_varid(ncid, "pheigvec_cart_ibz")
+ !varid = netcdf_inquire_varid(ncid, "phdispl_cart_ibz")
+ netcdf_error = nf90_get_var(ncid, varid, pheigvec_cart_ibz, ipos(1:5), isize(1:5))
+ !
+ do xyz=1,3
+   do i_atom=1,n_atoms
+     PH_pol_vector(:,i_atom,xyz,iq)=cmplx(pheigvec_cart_ibz(1,xyz,i_atom,:,1),pheigvec_cart_ibz(2,xyz,i_atom,:,1),kind=SP)
+   enddo
+ enddo
+ ! 
+ deallocate(pheigvec_cart_ibz)
+ !
+end subroutine a2y_gkkp
diff --git a/interfaces/c2y/c2y.F b/interfaces/c2y/c2y.F
index be39cae7e6..6dee2f3ec3 100644
--- a/interfaces/c2y/c2y.F
+++ b/interfaces/c2y/c2y.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): DV
 !
+! headers
+!
+#include<y_memory.h>
+!
 integer function c2y(np,pid,lnstr,iinf,iind,iod,icd,ijs,instr,inf,ind,od,com_dir,js)
  !
  use pars,                ONLY:SP,lchlen
@@ -19,8 +23,9 @@ integer function c2y(np,pid,lnstr,iinf,iind,iod,icd,ijs,instr,inf,ind,od,com_dir
  use mod_com2y,           ONLY:interface_presets,force_noWFs
  use mod_wf2y,            ONLY:WF_splitter
  use parallel_int,        ONLY:PARALLEL_live_message
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  type(levels)                 :: en 
  type(bz_samp)                :: k 
  integer,          intent(in) :: lnstr,iind,iod,ijs,np,pid,icd
diff --git a/interfaces/c2y/c2y_db1.F b/interfaces/c2y/c2y_db1.F
index 8a14f0fb35..3945690522 100644
--- a/interfaces/c2y/c2y_db1.F
+++ b/interfaces/c2y/c2y_db1.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): DV
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine c2y_db1(en,k,file_name_)
  !
  use pars,                  ONLY: DP,pi
@@ -21,7 +25,9 @@ subroutine c2y_db1(en,k,file_name_)
  use xc_functionals,        ONLY: XC_EXCHANGE_CORRELATION,XC_LDA_C_PW
  use mod_com2y,             ONLY: print_interface_dimensions,symmetries_check_and_load,&
 &                                 alat_mult_factor
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  character(*)                   :: file_name_
  type(levels),     intent(out)  :: en
  type(bz_samp),    intent(out)  :: k  
diff --git a/interfaces/p2y/mod_p2y.F b/interfaces/p2y/mod_p2y.F
index 0b40c037b3..69ac65a6e4 100644
--- a/interfaces/p2y/mod_p2y.F
+++ b/interfaces/p2y/mod_p2y.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM AF DV
 !
+! headers
+!
+#include<y_memory.h>
+!
 module P2Ym
  !
  use pw_export_module
@@ -18,8 +22,9 @@ module P2Ym
  use parallel_m,            ONLY : myid
  use parallel_int,          ONLY : PP_bcast   
  use units,                 ONLY : Da2AU
- ! 
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  character(lchlen) :: index_filename
  character(lchlen) :: dftdata_fmt
@@ -309,8 +314,9 @@ end subroutine get_dimensions
  subroutine get_atoms
    use D_lattice,   ONLY:n_atoms_species_max,n_atomic_species,n_atoms_species, &
 &                        atom_pos, Z_species,atomic_number,atom_mass,atoms_map
-   ! 
-#include<y_memory.h>
+   use y_memory_alloc
+   !
+   implicit none
    !
    ! Work Space
    !
@@ -547,8 +553,9 @@ subroutine get_k_points(k)
    use R_lattice,   ONLY:bz_samp
    use D_lattice,   ONLY:alat
    use vec_operate, ONLY:v_is_zero 
+   use y_memory_alloc
    !
-#include<y_memory.h> 
+   implicit none
    !
    type(bz_samp) :: k
    !
@@ -617,7 +624,9 @@ subroutine get_R_vectors
    use pars,        only: pi
    use R_lattice,   ONLY: b, ng_vec, g_vec
    use D_lattice,   ONLY: alat
-#include<y_memory.h>
+   use y_memory_alloc
+   !
+   implicit none
    !
    YAMBO_ALLOC(g_vec,(ng_vec,3)) ! The YAMBO array
    !
diff --git a/interfaces/p2y/p2y.F b/interfaces/p2y/p2y.F
index e2ca38bba8..10bb640719 100644
--- a/interfaces/p2y/p2y.F
+++ b/interfaces/p2y/p2y.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): CH AF
 !
+! headers
+!
+#include<y_memory.h>
+!
 integer function p2y(np,pid,lnstr,iinf,iind,iod,icd,ijs,instr,inf,ind,od,com_dir,js)
  !
  use P2Ym,                ONLY:pw_init, pw_close, dftdata_fmt
@@ -25,8 +29,9 @@ integer function p2y(np,pid,lnstr,iinf,iind,iod,icd,ijs,instr,inf,ind,od,com_dir
  use mod_com2y,           ONLY:interface_presets,force_noWFs,verboseIO
  use stderr,              ONLY:intc
  use qexml_module
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)                 :: en
  type(bz_samp)                :: k
diff --git a/interfaces/p2y/p2y_atmproj.F b/interfaces/p2y/p2y_atmproj.F
index 8d8fe3b81f..fbec1e7819 100644
--- a/interfaces/p2y/p2y_atmproj.F
+++ b/interfaces/p2y/p2y_atmproj.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): DS DV
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine p2y_atmproj(filename,k)
  !
  use pars,                  ONLY:SP,DP,schlen
@@ -16,8 +20,9 @@ subroutine p2y_atmproj(filename,k)
  !
  use atmproj_tools_module,  ONLY:atmproj_read_ext !,nwfcx
  use qe_pseudo_m,           ONLY:nsp
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  character(schlen), intent(in) :: filename
  type(bz_samp),     intent(in) :: k
diff --git a/interfaces/p2y/p2y_db1.F b/interfaces/p2y/p2y_db1.F
index 9c38e2d748..898dd13f5e 100644
--- a/interfaces/p2y/p2y_db1.F
+++ b/interfaces/p2y/p2y_db1.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): CH
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine p2y_db1(en,k)
  !
  use P2Ym,                  ONLY : get_dimensions,get_cell,get_atoms,get_symmetries,get_xc,get_k_points,get_more,get_energies,&
@@ -16,7 +20,9 @@ subroutine p2y_db1(en,k)
  use R_lattice,             ONLY : ng_vec,g_vec,bz_samp
  use wave_func,             ONLY : wf_ng, wf_nc_k, wf_ncx, wf_igk
  use mod_com2y,             ONLY : force_noWFs
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  type(levels),     intent(out)  :: en     ! Energies
  type(bz_samp),    intent(out)  :: k      ! K/Q points
  !---------------------------------------------------------------------*
diff --git a/interfaces/p2y/p2y_wf.F b/interfaces/p2y/p2y_wf.F
index 17e125ae76..6ab68b47e5 100644
--- a/interfaces/p2y/p2y_wf.F
+++ b/interfaces/p2y/p2y_wf.F
@@ -4,6 +4,10 @@
 ! Copyright (C) 2006 The Yambo Team
 !
 ! Authors (see AUTHORS file for details): CH AF
+!
+! headers
+!
+#include<y_memory.h>
 !
  subroutine p2y_wf(wf_disk,i_sp_pol,ikibz, ib_grp)
  !
@@ -18,8 +22,9 @@ subroutine p2y_wf(wf_disk,i_sp_pol,ikibz, ib_grp)
  use pw_data,               ONLY: n_spin_pw_, gamma_only_, noncolin_, igkv_
  use qexml_module
  use qexsd_module
- ! 
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  integer,     intent(in)   :: i_sp_pol,ikibz, ib_grp
  complex(SP), intent(out)  :: wf_disk(wf_ncx,n_spinor,wf_nb_io)
diff --git a/interfaces/p2y/qe_pseudo_init.F b/interfaces/p2y/qe_pseudo_init.F
index 9aa2d084df..8e99cb7aae 100644
--- a/interfaces/p2y/qe_pseudo_init.F
+++ b/interfaces/p2y/qe_pseudo_init.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AF IM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine qe_pseudo_init()
  !
  use pars,              ONLY: DP, pi_DP
@@ -30,8 +34,9 @@ subroutine qe_pseudo_init()
  use gvecw,             ONLY: qe_ecutwfc => ecutwfc, qe_gcutw => gcutw
  !
  use matrix_inversion,  ONLY: invmat
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  real(DP), parameter :: Ha2Ry=2.0_DP
  character(14)       :: subname="qe_pseudo_init"
diff --git a/lib/archive/Makefile.loc b/lib/archive/Makefile.loc
index 3eb012eba6..64588f97dd 100644
--- a/lib/archive/Makefile.loc
+++ b/lib/archive/Makefile.loc
@@ -14,8 +14,8 @@ all: $(EXT_LIBS)
 #
 # Libraries (GIT + archives)
 #
-ydiago:
-	@+URL="$(url_Ydiago)"  ;   GBRANCH="$(branch_Ydiago)"   ;  GIT="$(git_Ydiago)";  LIB="$(pkgname_Ydiago)";  $(call getsrc_git,"Ydiago");
+ldiago:
+	@+URL="$(url_Ldiago)"  ;   GBRANCH="$(branch_Ldiago)"   ;  GIT="$(git_Ldiago)";  LIB="$(pkgname_Ldiago)";  $(call getsrc_git,"Ldiago");
 devxlib:
 	@+URL="$(url_devxlib)" ;  GBRANCH="$(branch_devxlib)" ;  GIT="$(git_devxlib)"; LIB="$(pkgname_devxlib)" ; $(call getsrc_git,"devxlib")
 #
@@ -45,6 +45,8 @@ scalapack:
 	@+URL="$(url_scalapack)"; LIB="$(pkgname_scalapack)"; $(getsrc)
 elpa:
 	@+URL="$(url_elpa)"     ; LIB="$(pkgname_elpa)"; $(getsrc)
+magma:
+	@+URL="$(url_magma)"    ; LIB="$(pkgname_magma)"; $(getsrc)
 petsc:
 	@+URL="$(url_petsc)"    ; LIB="$(pkgname_petsc)";  GBRANCH="$(branch_petsc)"  ;  GIT="$(git_petsc)" ;  $(call getsrc_git,"petsc");
 slepc:
@@ -65,7 +67,7 @@ clean_git_folders:
 #
 clean_tgz:
 	if test ! -e keep-extlibs-stamp ; then \
-          if test -s $(tarball_Ydiago)   && test "$(keep_Ydiago)"    != "yes" ;  then rm $(tarball_Ydiago)    ; fi ; \
+          if test -s $(tarball_Ldiago)   && test "$(keep_Ldiago)"    != "yes" ;  then rm $(tarball_Ldiago)    ; fi ; \
           if test -s $(tarball_libxc)    && test "$(keep_libxc)"     != "yes" ;  then rm $(tarball_libxc)     ; fi ; \
           if test -s $(tarball_iotk)     && test "$(keep_iotk)"      != "yes" ;  then rm $(tarball_iotk)      ; fi ; \
           if test -s $(tarball_netcdf)   && test "$(keep_netcdf)"    != "yes" ;  then rm $(tarball_netcdf)    ; fi ; \
@@ -75,6 +77,7 @@ clean_tgz:
           if test -s $(tarball_lapack)   && test "$(keep_lapack)"    != "yes" ;  then rm $(tarball_lapack)    ; fi ; \
           if test -s $(tarball_scalapack)&& test "$(keep_scalapack)" != "yes" ;  then rm $(tarball_scalapack) ; fi ; \
           if test -s $(tarball_elpa)     && test "$(keep_elpa)"      != "yes" ;  then rm $(tarball_elpa)      ; fi ; \
+          if test -s $(tarball_magma)    && test "$(keep_magma)"     != "yes" ;  then rm $(tarball_magma)     ; fi ; \
           if test -s $(tarball_blacs)    && test "$(keep_blacs)"     != "yes" ;  then rm $(tarball_blacs)     ; fi ; \
           if test -s $(tarball_petsc)    && test "$(keep_petsc)"     != "yes" ;  then rm $(tarball_petsc)     ; fi ; \
           if test -s $(tarball_slepc)    && test "$(keep_slepc)"     != "yes" ;  then rm $(tarball_slepc)     ; fi ; \
diff --git a/lib/archive/git.list.in b/lib/archive/git.list.in
index 5701bc7844..57e46b27be 100644
--- a/lib/archive/git.list.in
+++ b/lib/archive/git.list.in
@@ -5,7 +5,7 @@
 # 
 # Authors (see AUTHORS file for details): AM, DS
 #
-branch_Ydiago=@with_ydiago_branch@
+branch_Ldiago=@with_ldiago_branch@
 branch_devxlib=@with_devxlib_branch@
 branch_slepc=@with_slepc_branch@
 branch_petsc=@with_petsc_branch@
diff --git a/lib/archive/package.list b/lib/archive/package.list
index 66a7ee93a3..85c3f27250 100644
--- a/lib/archive/package.list
+++ b/lib/archive/package.list
@@ -1,20 +1,20 @@
 #
 # Libs list
 #
-EXT_LIBS= yaml futile iotk hdf5 netcdf netcdff etsf_io libxc lapack blacs scalapack elpa petsc slepc fftw fftqe ydiago devxlib
+EXT_LIBS= yaml futile iotk hdf5 netcdf netcdff etsf_io libxc lapack scalapack elpa magma petsc slepc fftw fftqe ldiago devxlib
 #
 # storing options
 #
-keep_Ydiago=yes
+keep_Ldiago=yes
 keep_iotk=yes
 keep_netcdf=no
 keep_netcdff=no
 keep_etsf_io=no
 keep_libxc=no
 keep_lapack=no
-keep_blacs=no
 keep_scalapack=no
 keep_elpa=no
+keep_magma=no
 keep_petsc=no
 keep_slepc=no
 keep_fftw=no
@@ -26,20 +26,22 @@ keep_hdf5=no
 #
 # package versions
 #
-version_Ydiago=0.4.0
+version_Ldiago=0.5.1
 version_iotk=y1.2.2
-version_netcdf=4.9.0
-version_netcdff=4.6.0
-version_hdf5=1.12.2
+version_netcdf=4.9.3
+version_netcdff=4.6.2
+version_hdf5=1.14.6
 version_etsf_io=1.0.4
 version_libxc=5.2.3
 version_lapack=3.12.0
-version_blacs=missing
-version_scalapack=2.2.1
+version_scalapack=2.2.2
 #version_elpa=release_2024_05_001   # This was neeeded for gitlab
-version_elpa=2024.05.001
-version_slepc=3.22.2
-version_petsc=3.22.2
+version_elpa=2025.06.002
+version_magma=2.8.0
+#version_slepc=3.22.2  # This is the latest version which does not require fortran 2018
+#version_petsc=3.22.2  # It is needed to compile with intel compilers
+version_slepc=3.24.1
+version_petsc=3.24.1
 version_fftw=3.3.10
 version_fftqe=missing
 version_yaml=0.2.2
@@ -48,7 +50,7 @@ version_devxlib=0.8.5
 #
 # package names
 #
-pkgname_Ydiago=Ydiago-$(version_Ydiago)
+pkgname_Ldiago=Ldiago-$(version_Ldiago)
 pkgname_iotk=iotk-$(version_iotk)
 pkgname_netcdf=netcdf-c-$(version_netcdf)
 pkgname_netcdff=netcdf-fortran-$(version_netcdff)
@@ -56,9 +58,9 @@ pkgname_hdf5=hdf5-$(version_hdf5)
 pkgname_etsf_io=etsf_io-$(version_etsf_io)
 pkgname_libxc=libxc-$(version_libxc)
 pkgname_lapack=lapack-$(version_lapack)
-pkgname_blacs=blacs
 pkgname_scalapack=scalapack-$(version_scalapack)
 pkgname_elpa=elpa-$(version_elpa)
+pkgname_magma=magma-$(version_magma)
 pkgname_slepc=slepc-$(version_slepc)
 pkgname_petsc=petsc-$(version_petsc)
 pkgname_fftw=fftw-$(version_fftw)
@@ -69,17 +71,17 @@ pkgname_devxlib=devicexlib-$(version_devxlib)
 #
 # tarball names
 #
-tarball_Ydiago=$(version_Ydiago).tar.gz
+tarball_Ldiago=$(version_Ldiago).tar.gz
 tarball_iotk=$(pkgname_iotk).tar.gz
 tarball_netcdf=v$(version_netcdf).tar.gz
 tarball_netcdff=v$(version_netcdff).tar.gz
-tarball_hdf5=hdf5-1_12_2.tar.gz
+tarball_hdf5=hdf5-1.14.6.tar.gz
 tarball_etsf_io=$(pkgname_etsf_io).tar.gz
 tarball_libxc=$(pkgname_libxc).tar.gz
 tarball_lapack=v$(version_lapack).tar.gz
 tarball_scalapack=v$(version_scalapack).tar.gz
 tarball_elpa=$(pkgname_elpa).tar.gz
-tarball_blacs=$(pkgname_blacs).tar.gz
+tarball_magma=$(pkgname_magma).tar.gz
 tarball_slepc=slepc-v$(version_slepc).tar.gz
 tarball_petsc=petsc-v$(version_petsc).tar.gz
 tarball_fftw=$(pkgname_fftw).tar.gz
@@ -90,7 +92,6 @@ tarball_devxlib=$(pkgname_devxlib).tar.gz
 #
 # External URL's on Github
 #
-url_Ydiago=https://github.com/yambo-code/Ydiago/archive/refs/tags/$(tarball_Ydiago)
 url_lapack=https://github.com/Reference-LAPACK/lapack/archive/refs/tags/$(tarball_lapack)
 url_scalapack=https://github.com/Reference-ScaLAPACK/scalapack/archive/refs/tags/$(tarball_scalapack)
 url_hdf5=https://github.com/HDFGroup/hdf5/archive/refs/tags/$(tarball_hdf5)
@@ -99,6 +100,7 @@ url_netcdff=https://github.com/Unidata/netcdf-fortran/archive/refs/tags/$(tarbal
 #
 # External URL's on Gitlab
 #
+url_Ldiago=https://gitlab.com/lumen-code/Ldiago/-/archive/$(version_Ldiago)/$(tarball_Ldiago)
 url_slepc=https://gitlab.com/slepc/slepc/-/archive/v$(version_slepc)/$(tarball_slepc)
 url_petsc=https://gitlab.com/petsc/petsc/-/archive/v$(version_petsc)/$(tarball_petsc)
 url_devxlib=https://gitlab.com/max-centre/components/devicexlib/-/archive/$(version_devxlib)/$(tarball_devxlib)
@@ -111,6 +113,7 @@ url_libxc=https://gitlab.com/libxc/libxc/-/archive/$(version_libxc)/$(tarball_li
 #
 url_fftw=https://fftw.org/$(tarball_fftw)
 url_elpa=https://elpa.mpcdf.mpg.de/software/tarball-archive/Releases/$(version_elpa)/$(tarball_elpa)
+url_magma=https://icl.utk.edu/projectsfiles/magma/downloads/$(tarball_magma)
 #
 # Internal URL's
 #
@@ -118,21 +121,20 @@ url_base=https://github.com/yambo-code/yambo-libraries/raw/master/external/
 #
 url_iotk=$(url_base)/$(tarball_iotk)
 url_etsf_io=$(url_base)/$(tarball_etsf_io)
-url_blacs=$(url_base)/$(tarball_blacs)
 url_fftqe=$(url_base)/$(tarball_fftqe)
 url_yaml=$(url_base)/$(tarball_yaml)
 url_futile=$(url_base)/$(tarball_futile)
 #
 # git repositories
 #
-git_Ydiago=git@github.com:yambo-code/Ydiago.git
+git_Ldiago=git@gitlab.com:lumen-code/Ldiago.git
 git_devxlib=git@gitlab.com:max-centre/components/devicexlib.git
 git_slepc=git@gitlab.com:slepc/slepc.git
 git_petsc=git@gitlab.com:petsc/petsc.git
 #
 # git default branches
 #
-branch_Ydiago=none
+branch_Ldiago=none
 branch_devxlib=none
 branch_slepc=none
 branch_petsc=none
diff --git a/lib/blacs/Bmake.inc_lib b/lib/blacs/Bmake.inc_lib
deleted file mode 100644
index d653a75a71..0000000000
--- a/lib/blacs/Bmake.inc_lib
+++ /dev/null
@@ -1,215 +0,0 @@
-#=============================================================================
-#====================== SECTION 1: PATHS AND LIBRARIES =======================
-#=============================================================================
-#  The following macros specify the name and location of libraries required by
-#  the BLACS and its tester.
-#=============================================================================
- 
-#  --------------------------------------
-#  Make sure we've got a consistent shell
-#  --------------------------------------
-   SHELL = $(shell)
-
-#  -----------------------------
-#  The top level BLACS directory
-#  -----------------------------
-   BTOPdir = $(prefix)/lib/blacs/blacs
-
-#  ---------------------------------------------------------------------------
-#  The communication library your BLACS have been written for.
-#  Known choices (and the machines they run on) are:
-#
-#     COMMLIB   MACHINE
-#     .......   ..............................................................
-#     CMMD      Thinking Machine's CM-5
-#     MPI       Wide variety of systems
-#     MPL       IBM's SP series (SP1 and SP2)
-#     NX        Intel's supercomputer series (iPSC2, iPSC/860, DELTA, PARAGON)
-#     PVM       Most unix machines; See PVM User's Guide for details
-#  ---------------------------------------------------------------------------
-   COMMLIB = MPI
-
-#  -------------------------------------------------------------
-#  The platform identifier to suffix to the end of library names
-#  -------------------------------------------------------------
-   PLAT = LINUX
-#  ----------------------------------------------------------
-#  Name and location of the BLACS library.  See section 2 for 
-#  details on BLACS debug level (BLACSDBGLVL).
-#  ----------------------------------------------------------
-   BLACSdir    = $(BTOPdir)
-   BLACSDBGLVL = 0
-   BLACSFINIT  = $(BLACSdir)/libblacs_init.a
-   BLACSCINIT  = $(BLACSdir)/libblacs_C_init.a
-   BLACSLIB    = $(BLACSdir)/libblacs.a
-
-#  Name and location of the MPI library.
-#  -------------------------------------
-#   MPIdir = $(mpidir)
-   MPILIBdir = $(mpildir)
-   MPIINCdir = $(mpiidir)
-
-#  -------------------------------------
-#  All libraries required by the tester.
-#  -------------------------------------
-   BTLIBS = $(BLACSFINIT) $(BLACSLIB) $(BLACSFINIT) $(MPI_LIBS) 
-
-#  ----------------------------------------------------------------
-#  The directory to put the installation help routines' executables
-#  ----------------------------------------------------------------
-   INSTdir = $(BTOPdir)/INSTALL/EXE
-
-#  ------------------------------------------------
-#  The name and location of the tester's executable
-#  ------------------------------------------------
-   TESTdir = $(BTOPdir)/TESTING/EXE
-   FTESTexe = $(TESTdir)/xFbtest_$(COMMLIB)-$(PLAT)-$(BLACSDBGLVL)
-   CTESTexe = $(TESTdir)/xCbtest_$(COMMLIB)-$(PLAT)-$(BLACSDBGLVL)
-#=============================================================================
-#=============================== End SECTION 1 ===============================
-#=============================================================================
- 
-
-#=============================================================================
-#========================= SECTION 2: BLACS INTERNALS ========================
-#=============================================================================
-#  The following macro definitions set preprocessor values for the BLACS.
-#  The file Bconfig.h sets these values if they are not set by the makefile.
-#  User's compiling only the tester can skip this entire section.
-#  NOTE: The MPI defaults have been set for MPICH.
-#=============================================================================
-
-#  -----------------------------------------------------------------------
-#  The directory to find the required communication library include files, 
-#  if they are required by your system.
-#  -----------------------------------------------------------------------
-   SYSINC = $(impi) -I$(libdir)/blacs/blacs/SRC/MPI
-
-#  ---------------------------------------------------------------------------
-#  The Fortran 77 to C interface to be used.  If you are unsure of the correct
-#  setting for your platform, compile and run BLACS/INSTALL/xintface.
-#  Choices are: Add_, NoChange, UpCase, or f77IsF2C.
-#  ---------------------------------------------------------------------------
-   INTFACE =-DAdd_
-
-#  ------------------------------------------------------------------------
-#  Allows the user to vary the topologies that the BLACS default topologies
-#  (TOP = ' ') correspond to.  If you wish to use a particular topology
-#  (as opposed to letting the BLACS make the choice), uncomment the
-#  following macros, and replace the character in single quotes with the
-#  topology of your choice.
-#  ------------------------------------------------------------------------
-#  DEFBSTOP   = -DDefBSTop="'1'"
-#  DEFCOMBTOP = -DDefCombTop="'1'"
-
-#  -------------------------------------------------------------------
-#  If your MPI_Send is locally-blocking, substitute the following line
-#  for the empty macro definition below.
-#  SENDIS = -DSndIsLocBlk
-#  -------------------------------------------------------------------
-   SENDIS =
-
-#  --------------------------------------------------------------------
-#  If your MPI handles packing of non-contiguous messages by copying to
-#  another buffer or sending extra bytes, better performance may be
-#  obtained by replacing the empty macro definition below with the
-#  macro definition on the following line.
-#  BUFF = -DNoMpiBuff
-#  --------------------------------------------------------------------
-   BUFF = 
-
-#  -----------------------------------------------------------------------
-#  If you know something about your system, you may make it easier for the
-#  BLACS to translate between C and fortran communicators.  If the empty
-#  macro defininition is left alone, this translation will cause the C
-#  BLACS to globally block for MPI_COMM_WORLD on calls to BLACS_GRIDINIT
-#  and BLACS_GRIDMAP.  If you choose one of the options for translating
-#  the context, neither the C or fortran calls will globally block.
-#  If you are using MPICH, or a derivitive system, you can replace the 
-#  empty macro definition below with the following (note that if you let
-#  MPICH do the translation between C and fortran, you must also indicate
-#  here if your system has pointers that are longer than integers.  If so,
-#  define -DPOINTER_64_BITS=1.)  For help on setting TRANSCOMM, you can
-#  run BLACS/INSTALL/xtc_CsameF77 and BLACS/INSTALL/xtc_UseMpich as
-#  explained in BLACS/INSTALL/README.
-#   TRANSCOMM = -DUseMpich
-#
-#  If you know that your MPI uses the same handles for fortran and C
-#  communicators, you can replace the empty macro definition below with
-#  the macro definition on the following line.
-#  TRANSCOMM = -DCSameF77
-#  -----------------------------------------------------------------------
-#  TRANSCOMM =
-
-#  --------------------------------------------------------------------------
-#  You may choose to have the BLACS internally call either the C or Fortran77
-#  interface to MPI by varying the following macro.  If TRANSCOMM is left
-#  empty, the C interface BLACS_GRIDMAP/BLACS_GRIDINIT will globally-block if
-#  you choose to use the fortran internals, and the fortran interface will
-#  block if you choose to use the C internals.  It is recommended that the 
-#  user leave this macro definition blank, unless there is a strong reason
-#  to prefer one MPI interface over the other.
-#  WHATMPI = -DUseF77Mpi
-#  WHATMPI = -DUseCMpi
-#  --------------------------------------------------------------------------
-   WHATMPI =
-
-#  ---------------------------------------------------------------------------
-#  Some early versions of MPICH and its derivatives cannot handle user defined
-#  zero byte data types.  If your system has this problem (compile and run
-#  BLACS/INSTALL/xsyserrors to check if unsure), replace the empty macro
-#  definition below with the macro definition on the following line.
-#  SYSERRORS = -DZeroByteTypeBug
-#  ---------------------------------------------------------------------------
-   SYSERRORS =
-
-#  ------------------------------------------------------------------
-#  These macros set the debug level for the BLACS.  The fastest
-#  code is produced by BlacsDebugLvl 0.  Higher levels provide
-#  more debug information at the cost of performance.  Present levels
-#  of debug are:
-#  0 : No debug information
-#  1 : Mainly parameter checking.
-#  ------------------------------------------------------------------
-   DEBUGLVL = -DBlacsDebugLvl=$(BLACSDBGLVL)
-
-#  -------------------------------------------------------------------------
-#  All BLACS definitions needed for compile (DEFS1 contains definitions used
-#  by all BLACS versions).
-#  -------------------------------------------------------------------------
-   DEFS1 = -DSYSINC $(SYSINC) $(INTFACE) $(DEFBSTOP) $(DEFCOMBTOP) $(DEBUGLVL)
-   BLACSDEFS = $(DEFS1) $(SENDIS) $(BUFF) $(TRANSCOMM) $(WHATMPI) $(SYSERRORS)
-#=============================================================================
-#=============================== End SECTION 2 ===============================
-#=============================================================================
- 
-
-#=============================================================================
-#=========================== SECTION 3: COMPILERS ============================
-#=============================================================================
-#  The following macros specify compilers, linker/loaders, the archiver,
-#  and their options.  Some of the fortran files need to be compiled with no
-#  optimization.  This is the F77NO_OPTFLAG.  The usage of the remaining
-#  macros should be obvious from the names.
-#=============================================================================
-   F77            = $(fc)
-   F77NO_OPTFLAGS = $(fcuflags)
-   F77FLAGS       = $(fcflags)
-   F77LOADER      = $(fc)
-   F77LOADFLAGS   = $(fcflags)
-   CC             = $(cc)
-   CCFLAGS        = $(cflags)
-   CCLOADER       = $(cc)
-   CCLOADFLAGS    = $(cflags)
-
-#  --------------------------------------------------------------------------
-#  The archiver and the flag(s) to use when building an archive (library).
-#  Also the ranlib routine.  If your system has no ranlib, set RANLIB = echo.
-#  --------------------------------------------------------------------------
-   ARCH      = $(ar)
-   ARCHFLAGS = $(arflags)
-   RANLIB    = ranlib
-
-#=============================================================================
-#=============================== End SECTION 3 ===============================
-#=============================================================================
diff --git a/lib/blacs/Makefile.lib b/lib/blacs/Makefile.lib
deleted file mode 100644
index 6111b18f19..0000000000
--- a/lib/blacs/Makefile.lib
+++ /dev/null
@@ -1,57 +0,0 @@
-help :
-	@ echo
-	@ echo "Make sure you are using the correct Bmake.inc for your system."
-	@ echo "At this level, assuming you have downloaded the necessary files,"
-	@ echo "you may make the BLACS tester (make tester), or one of the BLACS"
-	@ echo "versions (make cmmd, make mpl, make nx, or make pvm)"
-	@ echo "You can define the make macro 'what' to perform a specific action."
-	@ echo "(eg., make tester what=clean)"
-	@ echo "There are short README files in TESTING/ and SRC/."
-	@ echo
-
-all : mpi 
-
-clean: cleanall
-
-cleanall:
-	( cd SRC/MPI ; make clean )
-#	( cd TESTING ; make clean )
-#	( cd SRC/CMMD ; make clean )
-#	( cd SRC/MPL  ; make clean )
-#	( cd SRC/NX   ; make clean )
-#	( cd SRC/PVM  ; make clean )
-
-testing: tester
-xbtest : tester
-test   : tester
-tester :
-	( cd TESTING ; make $(what) )
-
-CM5  : CMMD
-cmmd : CMMD
-CMMD :
-	( cd SRC/CMMD ; make $(what) )
-
-SP1 : MPL
-SP2 : MPL
-mpl : MPL
-MPL :
-	( cd SRC/MPL ; make $(what) )
-
-intel   : NX
-ipsc2   : NX
-i860    : NX
-delta   : NX
-gamma   : NX
-paragon : NX
-nx      : NX
-NX :
-	( cd SRC/NX ; make $(what) )
-
-pvm : PVM
-PVM :
-	( cd SRC/PVM ; make $(what) )
-
-mpi : MPI
-MPI :
-	( cd SRC/MPI ; make $(what) )
diff --git a/lib/ldiago/Makefile.loc b/lib/ldiago/Makefile.loc
new file mode 100644
index 0000000000..7545f0528e
--- /dev/null
+++ b/lib/ldiago/Makefile.loc
@@ -0,0 +1,67 @@
+#
+#===============================
+# Yambo package
+#===============================
+#
+include ../../config/setup
+include ../archive/package.list
+#
+LIBNAME=libldiago.a
+LIBPATH=$(libs_prefix)/$(fc_kind)/${fc}/diago/$(ldiago_gpu_support)
+#LIBPATH=$(compdir)
+LIBRARY=$(LIBPATH)/lib/$(LIBNAME)
+#
+PACKAGE=$(pkgname_Ldiago)
+#
+include ../config/external_libs_commons.mk
+include ../config/external_libs_defs.mk
+#
+#
+# MAIN target
+#
+all: $(LIBRARY)
+#
+uncompress:
+	@$(uncompress)
+
+configure: uncompress
+	@if ! test -e configured.stamp; then \
+	 rm -f ${compdir}/log/config_$(PACKAGE).log; \
+	 echo "\t[$(PACKAGE)] configuration"; \
+	 if test -e ${compdir}/lib/ldiago/make_ldiago.inc ; then \
+	   echo "cp ${compdir}/lib/ldiago/make_ldiago.inc ${compdir}/lib/ldiago/$(PACKAGE)/src/make.inc" >> ${compdir}/log/config_$(PACKAGE).log ; \
+	   cp ${compdir}/lib/ldiago/make_ldiago.inc ${compdir}/lib/ldiago/$(PACKAGE)/src/make.inc ; \
+	 fi ; \
+	 touch configured.stamp;\
+	fi
+#
+compile: uncompress configure
+	@if ! test -e compiled.stamp ; then \
+	  cd ${compdir}/lib/ldiago/$(PACKAGE)/src ; make ; \
+	  cd ${compdir}/lib/ldiago/ ; touch compiled.stamp; \
+	fi
+
+install: uncompress configure compile
+	@if ! test -e installed.stamp ; then \
+	 echo "\t[$(PACKAGE)] installation"; \
+	 echo "cp ${compdir}/lib/ldiago/$(PACKAGE)/src/libdiago.a $(LIBPATH)/lib/libldiago.a" >> ${compdir}/log/install_$(PACKAGE).log ; \
+	 echo "cp ${compdir}/lib/ldiago/$(PACKAGE)/src/ldiago_interface.mod $(LIBPATH)/include/" >> ${compdir}/log/install_$(PACKAGE).log ; \
+	 echo "chmod +x $(LIBPATH)/lib/libldiago.a" >> ${compdir}/log/install_$(PACKAGE).log ; \
+	 cp ${compdir}/lib/ldiago/$(PACKAGE)/src/libdiago.a $(LIBPATH)/lib/libldiago.a ;\
+	 cp ${compdir}/lib/ldiago/$(PACKAGE)/src/ldiago_interface.mod $(LIBPATH)/include/ ;\
+	 chmod +x $(LIBPATH)/lib/libldiago.a ; \
+	 touch ${compdir}/config/stamps_and_lists/libldiago.a.stamp;\
+	 echo ldiago_interface >> ${compdir}/src/bse/modules.list;\
+	 touch installed.stamp;\
+	fi
+#
+$(LIBRARY): uncompress configure compile install
+#
+# cleaning
+#
+clean:
+	@$(clean_the_lib)
+#
+clean_all: clean
+	@$(rm_the_lib)
+#
diff --git a/lib/ydiago/make_ydiago.inc.in b/lib/ldiago/make_ldiago.inc.in
similarity index 83%
rename from lib/ydiago/make_ydiago.inc.in
rename to lib/ldiago/make_ldiago.inc.in
index 0a90adad02..135562bae1 100644
--- a/lib/ydiago/make_ydiago.inc.in
+++ b/lib/ldiago/make_ldiago.inc.in
@@ -5,15 +5,15 @@
 #
 # Authors (see AUTHORS file for details): DS
 
-# make.inc from Ydiago repository
+# make.inc from Ldiago repository
 
 CC               = @CC@
 FC               = @FC@
 AR               = @AR@
 CPP              = @CPP@ 
 
-CFLAGS       	 = @CFLAGS@  #-g -fsanitize=address -fno-omit-frame-pointer 
-YAMBO_FLAGS   	 = @def_mpi@ @def_scalapack@ @def_compiler@ @def_dp@ @def_openmp@ @def_gpu@ @def_elpa@
+CFLAGS       	 = -O3 #@CFLAGS@  #-g -fsanitize=address -fno-omit-frame-pointer 
+YAMBO_FLAGS   	 = @def_mpi@ @def_scalapack@ @def_compiler@ @def_dp@ @def_openmp@ @ldiago_opt@
 ELPA_INC         = @ELPA_INCS@ @LIBCUDA_INCS@ @GPU_INCS@ @LIBROCM_INCS@
 
 #
@@ -60,6 +60,6 @@ RANLIB           = echo
 # libraries
 
 #
-#LIBS             = $(extlibs_prefix)/$(fc_kind)/$(f90)/lib/libydiago.a
-LIBS             = ./libydiago.a
+#LIBS             = $(extlibs_prefix)/$(fc_kind)/$(f90)/$(ldiago_gpu_support)/lib/libldiago.a
+LIBS             = ./libldiago.a
 IFLAGS           = -I./ -I../ -I../include
diff --git a/lib/blacs/Makefile.loc b/lib/magma/Makefile.loc
similarity index 69%
rename from lib/blacs/Makefile.loc
rename to lib/magma/Makefile.loc
index 164bc2eaa5..6703f930a2 100644
--- a/lib/blacs/Makefile.loc
+++ b/lib/magma/Makefile.loc
@@ -5,16 +5,17 @@
 #
 include ../../config/setup
 include ../archive/package.list
-
-LIBNAME=libblacs.a
-LIBPATH=$(libs_prefix)/$(fc_kind)/${fc}
+#
+LIBNAME=libmagma.a
+LIBPATH=$(libs_prefix)/$(fc_kind)/${fc}/
 LIBRARY=$(LIBPATH)/lib/$(LIBNAME)
 #
-PACKAGE=$(pkgname_blacs)
+PACKAGE=$(pkgname_magma)
 #
 include ../config/external_libs_commons.mk
 include ../config/external_libs_defs.mk
 #
+#
 all: $(LIBRARY)
 #
 uncompress:
@@ -24,19 +25,21 @@ configure: uncompress
 	@if test -d $(PACKAGE) && ! test -f configured.stamp ; then \
          echo "\t[$(PACKAGE)] configuration"; \
 	 cd $(PACKAGE);  \
-	 cat $(compdir)/config/setup $(srcdir)/lib/blacs/Bmake.inc_lib > Bmake.inc ; \
-	 cp  $(srcdir)/lib/blacs/Makefile.lib  Makefile ;  \
+	 cat $(compdir)/config/setup $(srcdir)/lib/magma/make_magma.inc > make.inc ; \
          touch ../configured.stamp;\
 	fi
-	
-compile: uncompress configure
-	@$(call compile,mpi)
 
+#	 cp  $(srcdir)/lib/magma/Makefile.lib  Makefile ;  \
+
+compile: uncompress configure
+	@$(call compile)
+	
 install: uncompress configure compile
 	@if ! test -e installed.stamp ; then \
          echo "\t[$(PACKAGE)] installation"; \
          cd $(PACKAGE); cp *.a $(LIBPATH)/lib ; \
-	 chmod +x $(LIBPATH)/lib/*blacs*.a; \
+         cp include/*.h $(LIBPATH)/include ; \
+         cp include/*.mod $(LIBPATH)/include ; \
 	 touch ../installed.stamp;\
 	fi
 
@@ -45,7 +48,8 @@ $(LIBRARY): uncompress configure compile install
 # cleaning
 #
 clean:
-	@$(clean_the_lib)
+	@$(call clean_the_lib,clean)
 
 clean_all: clean
 	@$(rm_the_lib)
+#
diff --git a/lib/magma/make_magma.inc.in b/lib/magma/make_magma.inc.in
new file mode 100644
index 0000000000..6921b5dd21
--- /dev/null
+++ b/lib/magma/make_magma.inc.in
@@ -0,0 +1,126 @@
+#//////////////////////////////////////////////////////////////////////////////
+#   -- MAGMA (version 2.8.0) --
+#      Univ. of Tennessee, Knoxville
+#      Univ. of California, Berkeley
+#      Univ. of Colorado, Denver
+#      @date March 2024
+#//////////////////////////////////////////////////////////////////////////////
+
+
+
+# --------------------
+# configuration
+
+# should MAGMA be built on CUDA (NVIDIA only) or ROCM (AMD or NVIDIA)
+# enter 'cuda' or 'hip' respectively
+BACKEND     ?= cuda
+
+# set these to their real paths
+CUDADIR     ?= @LIBCUDA_PATH@
+ROCM_PATH   ?= @LIBROCM_PATH@
+
+USE_FORTRAN ?=yes
+
+# require either hip or cuda
+ifeq (,$(findstring $(BACKEND),hip cuda))
+    $(error "'BACKEND' should be either 'cuda' or 'hip' (got $(BACKEND))")
+endif
+
+# --------------------
+# programs
+
+# set compilers
+CC          ?= gcc
+CXX         ?= g++
+FORT        ?= @FC@
+HIPCC       ?= hipcc
+NVCC        ?= nvcc
+DEVCC       ?= NONE
+
+# set from 'BACKEND'
+ifeq ($(BACKEND),cuda)
+    DEVCC = $(NVCC)
+else ifeq ($(BACKEND),hip)
+    DEVCC = $(HIPCC)
+endif
+
+# and utilities
+ARCH      = ar
+ARCHFLAGS = cr
+RANLIB    = ranlib
+
+
+# --------------------
+# flags/settings
+
+# set our GPU targets
+ifeq ($(BACKEND),cuda)
+    GPU_TARGET = @NVIDIA_ARCH@
+else ifeq ($(BACKEND),hip)
+    GPU_TARGET = gfx900 gfx906 gfx908
+endif
+
+# Use -fPIC to make shared (.so) and static (.a) library;
+# can be commented out if making only static library.
+FPIC      = -fPIC
+
+# now, generate our flags
+CFLAGS      = -O3 $(FPIC) -DNDEBUG -DADD_ -Wall -fopenmp -std=c99
+CXXFLAGS    = -O3 $(FPIC) -DNDEBUG -DADD_ -Wall -fopenmp -std=c++11
+FFLAGS      = -O3 $(FPIC) -DNDEBUG -DADD_ 
+F90FLAGS    = -O3 $(FPIC) -DNDEBUG -DADD_ 
+LDFLAGS     =     $(FPIC)                       -fopenmp
+
+DEVCCFLAGS  = -O3         -DNDEBUG -DADD_
+
+# DEVCCFLAGS are populated later in `backend-specific`
+
+
+# --------------------
+# libraries
+
+# gcc with OpenBLAS (includes LAPACK)
+LIB       += $(lblas) $(llapack)
+
+# --------------------
+# directories
+
+# define library directories preferably in your environment, or here.
+LIBDIR    += -L$(libs_prefix)/$(fc_kind)/${fc}/lib
+INC       += -I$(libs_prefix)/$(fc_kind)/${fc}/include
+
+
+# --------------------
+# checks
+
+# check for openblas
+#-include make.check-openblas
+
+
+# --------------------
+# backend-specific
+
+# add appropriate cuda flags
+ifeq ($(BACKEND),cuda)
+    -include make.check-cuda
+
+    DEVCCFLAGS += -Xcompiler "$(FPIC)" -std=c++11
+
+    # link with cuda specific libraries
+    INC    += -I$(CUDADIR)/include
+    LIBDIR += -L$(CUDADIR)/lib64
+    LIB    += -lcublas -lcusparse -lcudart -lcudadevrt
+endif
+
+# add appropriate ROCM flags
+ifeq ($(BACKEND),hip)
+    -include make.check-hip
+
+    DEVCCFLAGS += $(FPIC) -std=c++11
+
+    INC    += -I$(ROCM_PATH)/include
+    LIBDIR += -L$(ROCM_PATH)/lib
+    LIB    += -lhipblas -lhipsparse
+endif
+
+
diff --git a/lib/magma_fmodules/.objects b/lib/magma_fmodules/.objects
new file mode 100644
index 0000000000..77f516e43f
--- /dev/null
+++ b/lib/magma_fmodules/.objects
@@ -0,0 +1,5 @@
+#if defined _MAGMA
+MAGMA_objects    = magma.o  magma_param.o \
+                   magma_cfortran.o  magma_zfortran.o  
+#endif
+objs = $(MAGMA_objects)
diff --git a/lib/magma_fmodules/DOUBLE_project.dep b/lib/magma_fmodules/DOUBLE_project.dep
new file mode 100644
index 0000000000..5efbe90e72
--- /dev/null
+++ b/lib/magma_fmodules/DOUBLE_project.dep
@@ -0,0 +1,7 @@
+ magma.o
+ magma_cfortran.o
+ magma_dfortran.o
+ magma_param.o
+ magma_sfortran.o
+ magma_zfortran.o
+
diff --git a/lib/magma_fmodules/magma.F b/lib/magma_fmodules/magma.F
new file mode 100644
index 0000000000..b86b503f84
--- /dev/null
+++ b/lib/magma_fmodules/magma.F
@@ -0,0 +1,373 @@
+!
+!   -- MAGMA (version 2.8.0) --
+!      Univ. of Tennessee, Knoxville
+!      Univ. of California, Berkeley
+!      Univ. of Colorado, Denver
+!      @date March 2024
+!
+
+module magma
+
+    use magma_param
+    use iso_fortran_env, ONLY: int64
+
+#define magma_devptr_t integer(int64)
+
+    use magma_zfortran
+    use magma_cfortran
+!    use magma_dfortran
+!    use magma_sfortran
+
+!    use magmablas_zfortran
+!    use magmablas_dfortran
+!    use magmablas_cfortran
+!    use magmablas_sfortran
+
+    !---- Fortran interfaces to MAGMA subroutines ----
+    interface
+
+    !! -------------------------------------------------------------------------
+    !! initialize
+    subroutine magmaf_init( )
+    end subroutine
+
+    subroutine magmaf_finalize(  )
+    end subroutine
+
+    !! -------------------------------------------------------------------------
+    !! version
+    subroutine magmaf_version( major, minor, micro )
+        integer         :: major, minor, micro
+    end subroutine
+
+    subroutine magmaf_print_environment()
+    end subroutine
+
+    !! -------------------------------------------------------------------------
+    !! device support
+    integer function magmaf_num_gpus()
+    end function
+
+    integer function magmaf_getdevice_arch()
+    end function
+
+    subroutine magmaf_getdevice( dev )
+        integer         :: dev
+    end subroutine
+
+    subroutine magmaf_setdevice( dev )
+        integer         :: dev
+    end subroutine
+
+    function magmaf_mem_size( queue )
+        import int64
+        integer(kind=8) :: magmaf_mem_size
+        magma_devptr_t  :: queue
+    end function
+
+    !! -------------------------------------------------------------------------
+    !! queue support
+    subroutine magmaf_queue_create( dev, queue )
+        import int64
+        integer        :: dev
+        magma_devptr_t :: queue
+    end subroutine
+
+    subroutine magmaf_queue_destroy( queue )
+        import int64
+        magma_devptr_t :: queue
+    end subroutine
+
+    subroutine magmaf_queue_sync( queue )
+        import int64
+        magma_devptr_t :: queue
+    end subroutine
+
+    integer function magmaf_queue_get_device( queue )
+        import int64
+        magma_devptr_t :: queue
+    end function
+
+    !! -------------------------------------------------------------------------
+    !! GPU allocation
+    integer function magmaf_malloc( ptr, bytes )
+        import int64
+        magma_devptr_t  :: ptr
+        integer         :: bytes
+    end function
+
+    integer function magmaf_smalloc( ptr, n )
+        import int64
+        magma_devptr_t  :: ptr
+        integer         :: n
+    end function
+
+    integer function magmaf_dmalloc( ptr, n )
+        import int64
+        magma_devptr_t  :: ptr
+        integer         :: n
+    end function
+
+    integer function magmaf_cmalloc( ptr, n )
+        import int64
+        magma_devptr_t  :: ptr
+        integer         :: n
+    end function
+
+    integer function magmaf_zmalloc( ptr, n )
+        import int64
+        magma_devptr_t  :: ptr
+        integer         :: n
+    end function
+
+    integer function magmaf_free( ptr )
+        import int64
+        magma_devptr_t  :: ptr
+    end function
+
+    !! -------------------------------------------------------------------------
+    !! CPU regular (non-pinned) allocation
+    integer function magmaf_malloc_cpu( ptr, bytes )
+        import int64
+        magma_devptr_t  :: ptr
+        integer         :: bytes
+    end function
+
+    integer function magmaf_smalloc_cpu( ptr, n )
+        import int64
+        magma_devptr_t  :: ptr
+        integer         :: n
+    end function
+
+    integer function magmaf_dmalloc_cpu( ptr, n )
+        import int64
+        magma_devptr_t  :: ptr
+        integer         :: n
+    end function
+
+    integer function magmaf_cmalloc_cpu( ptr, n )
+        import int64
+        magma_devptr_t  :: ptr
+        integer         :: n
+    end function
+
+    integer function magmaf_zmalloc_cpu( ptr, n )
+        import int64
+        magma_devptr_t  :: ptr
+        integer         :: n
+    end function
+
+    integer function magmaf_free_cpu( ptr )
+        import int64
+        magma_devptr_t  :: ptr
+    end function
+
+    !! -------------------------------------------------------------------------
+    !! CPU pinned allocation
+    integer function magmaf_malloc_pinned( ptr, bytes )
+        import int64
+        magma_devptr_t  :: ptr
+        integer         :: bytes
+    end function
+
+    integer function magmaf_smalloc_pinned( ptr, n )
+        import int64
+        magma_devptr_t  :: ptr
+        integer         :: n
+    end function
+
+    integer function magmaf_dmalloc_pinned( ptr, n )
+        import int64
+        magma_devptr_t  :: ptr
+        integer         :: n
+    end function
+
+    integer function magmaf_cmalloc_pinned( ptr, n )
+        import int64
+        magma_devptr_t  :: ptr
+        integer         :: n
+    end function
+
+    integer function magmaf_zmalloc_pinned( ptr, n )
+        import int64
+        magma_devptr_t  :: ptr
+        integer         :: n
+    end function
+
+    integer function magmaf_free_pinned( ptr )
+        import int64
+        magma_devptr_t  :: ptr
+    end function
+
+    !! -------------------------------------------------------------------------
+    !! timing; see magma_timer.cpp
+    subroutine magmaf_wtime( time )
+        double precision :: time
+    end subroutine
+
+    end interface
+
+    !! -------------------------------------------------------------------------
+    ! parameter constants from magma_types.h
+    ! currently MAGMA's Fortran interface uses characters, not integers
+    character, parameter :: &
+        MagmaFalse         = 'n',  &
+        MagmaTrue          = 'y',  &
+        MagmaRowMajor      = 'r',  &
+        MagmaColMajor      = 'c',  &
+        MagmaNoTrans       = 'n',  &
+        MagmaTrans         = 't',  &
+        MagmaConjTrans     = 'c',  &
+        MagmaUpper         = 'u',  &
+        MagmaLower         = 'l',  &
+        MagmaFull          = 'f',  &
+        MagmaNonUnit       = 'n',  &
+        MagmaUnit          = 'u',  &
+        MagmaLeft          = 'l',  &
+        MagmaRight         = 'r',  &
+        MagmaBothSides     = 'b',  &
+        MagmaOneNorm       = '1',  &
+        MagmaTwoNorm       = '2',  &
+        MagmaFrobeniusNorm = 'f',  &
+        MagmaInfNorm       = 'i',  &
+        MagmaMaxNorm       = 'm',  &
+        MagmaDistUniform   = 'u',  &
+        MagmaDistSymmetric = 's',  &
+        MagmaDistNormal    = 'n',  &
+        MagmaHermGeev      = 'h',  &
+        MagmaHermPoev      = 'p',  &
+        MagmaNonsymPosv    = 'n',  &
+        MagmaSymPosv       = 's',  &
+        MagmaNoPacking     = 'n',  &
+        MagmaPackSubdiag   = 'u',  &
+        MagmaPackSupdiag   = 'l',  &
+        MagmaPackColumn    = 'c',  &
+        MagmaPackRow       = 'r',  &
+        MagmaPackLowerBand = 'b',  &
+        MagmaPackUpeprBand = 'q',  &
+        MagmaPackAll       = 'z',  &
+        MagmaNoVec         = 'n',  &
+        MagmaVec           = 'v',  &
+        MagmaIVec          = 'i',  &
+        MagmaAllVec        = 'a',  &
+        MagmaSomeVec       = 's',  &
+        MagmaOverwriteVec  = 'o',  &
+        MagmaBacktransVec  = 'b',  &
+        MagmaRangeAll      = 'a',  &
+        MagmaRangeV        = 'v',  &
+        MagmaRangeI        = 'i',  &
+        MagmaQ             = 'q',  &
+        MagmaP             = 'p',  &
+        MagmaForward       = 'f',  &
+        MagmaBackward      = 'b',  &
+        MagmaColumnwise    = 'c',  &
+        MagmaRowwise       = 'r'
+
+contains
+
+! --------------------
+!> Sets ptrNew = ptrOld( i ), with stride inc.
+!! Useful because CUDA pointers are opaque types in Fortran.
+subroutine magmaf_soff1d( ptrNew, ptrOld, inc, i )
+    magma_devptr_t   :: ptrNew
+    magma_devptr_t   :: ptrOld
+    integer          :: inc, i
+
+    ptrNew = ptrOld + (i-1) * inc * sizeof_real
+end subroutine magmaf_soff1d
+
+!> Sets ptrNew = ptrOld( i, j ), with leading dimension lda.
+!! Useful because CUDA pointers are opaque types in Fortran.
+subroutine magmaf_soff2d( ptrNew, ptrOld, lda, i, j )
+    magma_devptr_t   :: ptrNew
+    magma_devptr_t   :: ptrOld
+    integer          :: lda, i, j
+
+    ptrNew = ptrOld + ((j-1) * lda + (i-1)) * sizeof_real
+end subroutine magmaf_soff2d
+
+! --------------------
+!> Sets ptrNew = ptrOld( i ), with stride inc.
+!! Useful because CUDA pointers are opaque types in Fortran.
+subroutine magmaf_doff1d( ptrNew, ptrOld, inc, i )
+    magma_devptr_t   :: ptrNew
+    magma_devptr_t   :: ptrOld
+    integer          :: inc, i
+
+    ptrNew = ptrOld + (i-1) * inc * sizeof_double
+end subroutine magmaf_doff1d
+
+!> Sets ptrNew = ptrOld( i, j ), with leading dimension lda.
+!! Useful because CUDA pointers are opaque types in Fortran.
+subroutine magmaf_doff2d( ptrNew, ptrOld, lda, i, j )
+    magma_devptr_t   :: ptrNew
+    magma_devptr_t   :: ptrOld
+    integer          :: lda, i, j
+
+    ptrNew = ptrOld + ((j-1) * lda + (i-1)) * sizeof_double
+end subroutine magmaf_doff2d
+
+! --------------------
+!> Sets ptrNew = ptrOld( i ), with stride inc.
+!! Useful because CUDA pointers are opaque types in Fortran.
+subroutine magmaf_coff1d( ptrNew, ptrOld, inc, i )
+    magma_devptr_t   :: ptrNew
+    magma_devptr_t   :: ptrOld
+    integer          :: inc, i
+
+    ptrNew = ptrOld + (i-1) * inc * sizeof_complex
+end subroutine magmaf_coff1d
+
+!> Sets ptrNew = ptrOld( i, j ), with leading dimension lda.
+!! Useful because CUDA pointers are opaque types in Fortran.
+subroutine magmaf_coff2d( ptrNew, ptrOld, lda, i, j )
+    magma_devptr_t   :: ptrNew
+    magma_devptr_t   :: ptrOld
+    integer          :: lda, i, j
+
+    ptrNew = ptrOld + ((j-1) * lda + (i-1)) * sizeof_complex
+end subroutine magmaf_coff2d
+
+! --------------------
+!> Sets ptrNew = ptrOld( i ), with stride inc.
+!! Useful because CUDA pointers are opaque types in Fortran.
+subroutine magmaf_zoff1d( ptrNew, ptrOld, inc, i )
+    magma_devptr_t   :: ptrNew
+    magma_devptr_t   :: ptrOld
+    integer          :: inc, i
+
+    ptrNew = ptrOld + (i-1) * inc * sizeof_complex_16
+end subroutine magmaf_zoff1d
+
+!> Sets ptrNew = ptrOld( i, j ), with leading dimension lda.
+!! Useful because CUDA pointers are opaque types in Fortran.
+subroutine magmaf_zoff2d( ptrNew, ptrOld, lda, i, j )
+    magma_devptr_t   :: ptrNew
+    magma_devptr_t   :: ptrOld
+    integer          :: lda, i, j
+
+    ptrNew = ptrOld + ((j-1) * lda + (i-1)) * sizeof_complex_16
+end subroutine magmaf_zoff2d
+
+! --------------------
+!> Sets ptrNew = ptrOld( i ), with stride inc.
+!! Useful because CUDA pointers are opaque types in Fortran.
+subroutine magmaf_ioff1d( ptrNew, ptrOld, inc, i )
+    magma_devptr_t   :: ptrNew
+    magma_devptr_t   :: ptrOld
+    integer          :: inc, i
+
+    ptrNew = ptrOld + (i-1) * inc * sizeof_integer
+end subroutine magmaf_ioff1d
+
+!> Sets ptrNew = ptrOld( i, j ), with leading dimension lda.
+!! Useful because CUDA pointers are opaque types in Fortran.
+subroutine magmaf_ioff2d( ptrNew, ptrOld, lda, i, j )
+    magma_devptr_t   :: ptrNew
+    magma_devptr_t   :: ptrOld
+    integer          :: lda, i, j
+
+    ptrNew = ptrOld + ((j-1) * lda + (i-1)) * sizeof_integer
+end subroutine magmaf_ioff2d
+
+end module magma
diff --git a/lib/magma_fmodules/magma_cfortran.F b/lib/magma_fmodules/magma_cfortran.F
new file mode 100644
index 0000000000..48d48b9c71
--- /dev/null
+++ b/lib/magma_fmodules/magma_cfortran.F
@@ -0,0 +1,2086 @@
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!  This file is AUTOMATICALLY GENERATED by:
+!!  tools/fortran_wrappers.pl include/magma_c.i
+!!  Do not edit.
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+module magma_cfortran
+
+use magma_param
+use iso_fortran_env, ONLY: int64
+
+#define magma_devptr_t integer(int64)
+
+implicit none
+
+!---- Fortran interfaces to MAGMA subroutines ----
+interface
+
+integer function magmaf_get_cpotrf_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_cgetrf_nb( m, n )
+    integer          :: m
+    integer          :: n
+end
+
+integer function magmaf_get_cgetri_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_chetrf_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_chetrf_nopiv_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_chetrf_aasen_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_cgeqp3_nb( m, n )
+    integer          :: m
+    integer          :: n
+end
+
+integer function magmaf_get_cgeqrf_nb( m, n )
+    integer          :: m
+    integer          :: n
+end
+
+integer function magmaf_get_cgeqlf_nb( m, n )
+    integer          :: m
+    integer          :: n
+end
+
+integer function magmaf_get_cgelqf_nb( m, n )
+    integer          :: m
+    integer          :: n
+end
+
+integer function magmaf_get_cgehrd_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_chetrd_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_chegst_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_chegst_m_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_cgebrd_nb( m, n )
+    integer          :: m
+    integer          :: n
+end
+
+integer function magmaf_get_cgesvd_nb( m, n )
+    integer          :: m
+    integer          :: n
+end
+
+integer function magmaf_get_cbulge_nb( n, nbthreads )
+    integer          :: n
+    integer          :: nbthreads
+end
+
+integer function magmaf_get_cbulge_vblksiz( n, nb, nbthreads )
+    integer          :: n
+    integer          :: nb
+    integer          :: nbthreads
+end
+
+integer function magmaf_get_cbulge_gcperf(  )
+end
+
+subroutine magmaf_cgebrd( m, n, A, lda, d, e, tauq, taup, work, lwork, info )
+    integer          :: m
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    real             :: d(*)
+    real             :: e(*)
+    complex          :: tauq(*)
+    complex          :: taup(*)
+    complex          :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_cgeev( jobvl, jobvr, n, A, lda, w, VL, ldvl, VR, ldvr, work, lwork,  &
+        rwork, info )
+    character        :: jobvl
+    character        :: jobvr
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: w(*)
+    complex          :: VL(*)
+    integer          :: ldvl
+    complex          :: VR(*)
+    integer          :: ldvr
+    complex          :: work(*)
+    integer          :: lwork
+    real             :: rwork(*)
+    integer          :: info
+end
+
+subroutine magmaf_cgeev_m( jobvl, jobvr, n, A, lda, w, VL, ldvl, VR, ldvr, work, lwork,  &
+        rwork, info )
+    character        :: jobvl
+    character        :: jobvr
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: w(*)
+    complex          :: VL(*)
+    integer          :: ldvl
+    complex          :: VR(*)
+    integer          :: ldvr
+    complex          :: work(*)
+    integer          :: lwork
+    real             :: rwork(*)
+    integer          :: info
+end
+
+subroutine magmaf_cgegqr_gpu( ikind, m, n, dA, ldda, dwork, work, info )
+    import int64
+    integer          :: ikind
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dwork
+    complex          :: work(*)
+    integer          :: info
+end
+
+subroutine magmaf_cgehrd( n, ilo, ihi, A, lda, tau, work, lwork, dT, info )
+    import int64
+    integer          :: n
+    integer          :: ilo
+    integer          :: ihi
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: tau(*)
+    complex          :: work(*)
+    integer          :: lwork
+    magma_devptr_t   :: dT
+    integer          :: info
+end
+
+subroutine magmaf_cgehrd_m( n, ilo, ihi, A, lda, tau, work, lwork, T, info )
+    integer          :: n
+    integer          :: ilo
+    integer          :: ihi
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: tau(*)
+    complex          :: work(*)
+    integer          :: lwork
+    complex          :: T(*)
+    integer          :: info
+end
+
+subroutine magmaf_cgehrd2( n, ilo, ihi, A, lda, tau, work, lwork, info )
+    integer          :: n
+    integer          :: ilo
+    integer          :: ihi
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: tau(*)
+    complex          :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_cgelqf( m, n, A, lda, tau, work, lwork, info )
+    integer          :: m
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: tau(*)
+    complex          :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_cgelqf_gpu( m, n, dA, ldda, tau, work, lwork, info )
+    import int64
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    complex          :: tau(*)
+    complex          :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_cgels( trans, m, n, nrhs, A, lda, B, ldb, hwork, lwork, info )
+    import int64
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: A
+    integer          :: lda
+    magma_devptr_t   :: B
+    integer          :: ldb
+    complex          :: hwork(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_cggrqf( m, p, n, A, lda, taua, B, ldb, taub, work, lwork, info )
+    integer          :: m
+    integer          :: p
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: taua(*)
+    complex          :: B(*)
+    integer          :: ldb
+    complex          :: taub(*)
+    complex          :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_cgglse( m, n, p, A, lda, B, ldb, c, d, x, work, lwork, info )
+    integer          :: m
+    integer          :: n
+    integer          :: p
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: B(*)
+    integer          :: ldb
+    complex          :: c(*)
+    complex          :: d(*)
+    complex          :: x(*)
+    complex          :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_cgels_gpu( trans, m, n, nrhs, dA, ldda, dB, lddb, hwork, lwork, info )
+    import int64
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    complex          :: hwork(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_cgels3_gpu( trans, m, n, nrhs, dA, ldda, dB, lddb, hwork, lwork, info )
+    import int64
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    complex          :: hwork(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_cgeqlf( m, n, A, lda, tau, work, lwork, info )
+    integer          :: m
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: tau(*)
+    complex          :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_cgeqp3( m, n, A, lda, jpvt, tau, work, lwork, rwork, info )
+    integer          :: m
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    integer          :: jpvt(*)
+    complex          :: tau(*)
+    complex          :: work(*)
+    integer          :: lwork
+    real             :: rwork(*)
+    integer          :: info
+end
+
+subroutine magmaf_cgeqp3_gpu( m, n, dA, ldda, jpvt, tau, dwork, lwork, rwork, info )
+    import int64
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: jpvt(*)
+    complex          :: tau(*)
+    magma_devptr_t   :: dwork
+    integer          :: lwork
+    real             :: rwork(*)
+    integer          :: info
+end
+
+subroutine magmaf_cgeqr2x_gpu( m, n, dA, ldda, dtau, dT, ddA, dwork, info )
+    import int64
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dtau
+    magma_devptr_t   :: dT
+    magma_devptr_t   :: ddA
+    magma_devptr_t   :: dwork
+    integer          :: info
+end
+
+subroutine magmaf_cgeqr2x2_gpu( m, n, dA, ldda, dtau, dT, ddA, dwork, info )
+    import int64
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dtau
+    magma_devptr_t   :: dT
+    magma_devptr_t   :: ddA
+    magma_devptr_t   :: dwork
+    integer          :: info
+end
+
+subroutine magmaf_cgeqr2x3_gpu( m, n, dA, ldda, dtau, dT, ddA, dwork, info )
+    import int64
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dtau
+    magma_devptr_t   :: dT
+    magma_devptr_t   :: ddA
+    magma_devptr_t   :: dwork
+    integer          :: info
+end
+
+subroutine magmaf_cgeqr2x4_gpu( m, n, dA, ldda, dtau, dT, ddA, dwork, queue, info )
+    import int64
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dtau
+    magma_devptr_t   :: dT
+    magma_devptr_t   :: ddA
+    magma_devptr_t   :: dwork
+    magma_devptr_t   :: queue
+    integer          :: info
+end
+
+subroutine magmaf_cgeqrf( m, n, A, lda, tau, work, lwork, info )
+    integer          :: m
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: tau(*)
+    complex          :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_cgeqrf_gpu( m, n, dA, ldda, tau, dT, info )
+    import int64
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    complex          :: tau(*)
+    magma_devptr_t   :: dT
+    integer          :: info
+end
+
+subroutine magmaf_cgeqrf_m( ngpu, m, n, A, lda, tau, work, lwork, info )
+    integer          :: ngpu
+    integer          :: m
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: tau(*)
+    complex          :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_cgeqrf_ooc( m, n, A, lda, tau, work, lwork, info )
+    integer          :: m
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: tau(*)
+    complex          :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_cgeqrf2_gpu( m, n, dA, ldda, tau, info )
+    import int64
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    complex          :: tau(*)
+    integer          :: info
+end
+
+subroutine magmaf_cgeqrf3_gpu( m, n, dA, ldda, tau, dT, info )
+    import int64
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    complex          :: tau(*)
+    magma_devptr_t   :: dT
+    integer          :: info
+end
+
+subroutine magmaf_cgeqrs_gpu( m, n, nrhs, dA, ldda, tau, dT, dB, lddb, hwork, lwork, info  &
+        )
+    import int64
+    integer          :: m
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    complex          :: tau(*)
+    magma_devptr_t   :: dT
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    complex          :: hwork(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_cgeqrs3_gpu( m, n, nrhs, dA, ldda, tau, dT, dB, lddb, hwork, lwork,  &
+        info )
+    import int64
+    integer          :: m
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    complex          :: tau(*)
+    magma_devptr_t   :: dT
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    complex          :: hwork(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_cgerbt_gpu( gen, n, nrhs, dA, ldda, dB, lddb, U, V, info )
+    import int64
+    character        :: gen
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    complex          :: U(*)
+    complex          :: V(*)
+    integer          :: info
+end
+
+subroutine magmaf_cgerfs_nopiv_gpu( trans, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dworkd,  &
+        dAF, iter, info )
+    import int64
+    character        :: trans
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    magma_devptr_t   :: dX
+    integer          :: lddx
+    magma_devptr_t   :: dworkd
+    magma_devptr_t   :: dAF
+    integer          :: iter
+    integer          :: info
+end
+
+subroutine magmaf_cgesdd( jobz, m, n, A, lda, s, U, ldu, VT, ldvt, work, lwork, rwork,  &
+        iwork, info )
+    character        :: jobz
+    integer          :: m
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    real             :: s(*)
+    complex          :: U(*)
+    integer          :: ldu
+    complex          :: VT(*)
+    integer          :: ldvt
+    complex          :: work(*)
+    integer          :: lwork
+    real             :: rwork(*)
+    integer          :: iwork(*)
+    integer          :: info
+end
+
+subroutine magmaf_cgesv( n, nrhs, A, lda, ipiv, B, ldb, info )
+    integer          :: n
+    integer          :: nrhs
+    complex          :: A(*)
+    integer          :: lda
+    integer          :: ipiv(*)
+    complex          :: B(*)
+    integer          :: ldb
+    integer          :: info
+end
+
+subroutine magmaf_cgesv_gpu( n, nrhs, dA, ldda, ipiv, dB, lddb, info )
+    import int64
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: ipiv(*)
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_cgesv_nopiv_gpu( n, nrhs, dA, ldda, dB, lddb, info )
+    import int64
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_cgesv_rbt( ref, n, nrhs, A, lda, B, ldb, info )
+    character        :: ref
+    integer          :: n
+    integer          :: nrhs
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: B(*)
+    integer          :: ldb
+    integer          :: info
+end
+
+subroutine magmaf_cgesvd( jobu, jobvt, m, n, A, lda, s, U, ldu, VT, ldvt, work, lwork,  &
+        rwork, info )
+    character        :: jobu
+    character        :: jobvt
+    integer          :: m
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    real             :: s(*)
+    complex          :: U(*)
+    integer          :: ldu
+    complex          :: VT(*)
+    integer          :: ldvt
+    complex          :: work(*)
+    integer          :: lwork
+    real             :: rwork(*)
+    integer          :: info
+end
+
+subroutine magmaf_cgetf2_gpu( m, n, dA, ldda, ipiv, queue, info )
+    import int64
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: ipiv(*)
+    magma_devptr_t   :: queue
+    integer          :: info
+end
+
+subroutine magmaf_cgetf2_nopiv( m, n, A, lda, info )
+    integer          :: m
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_cgetrf( m, n, A, lda, ipiv, info )
+    integer          :: m
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    integer          :: ipiv(*)
+    integer          :: info
+end
+
+subroutine magmaf_cgetrf_gpu( m, n, dA, ldda, ipiv, info )
+    import int64
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: ipiv(*)
+    integer          :: info
+end
+
+subroutine magmaf_cgetrf_m( ngpu, m, n, A, lda, ipiv, info )
+    integer          :: ngpu
+    integer          :: m
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    integer          :: ipiv(*)
+    integer          :: info
+end
+
+subroutine magmaf_cgetrf_nopiv( m, n, A, lda, info )
+    integer          :: m
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_cgetrf_nopiv_gpu( m, n, dA, ldda, info )
+    import int64
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: info
+end
+
+subroutine magmaf_cgetri_gpu( n, dA, ldda, ipiv, dwork, lwork, info )
+    import int64
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: ipiv(*)
+    magma_devptr_t   :: dwork
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_cgetrs_gpu( trans, n, nrhs, dA, ldda, ipiv, dB, lddb, info )
+    import int64
+    character        :: trans
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: ipiv(*)
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_cgetrs_nopiv_gpu( trans, n, nrhs, dA, ldda, dB, lddb, info )
+    import int64
+    character        :: trans
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_cheevd( jobz, uplo, n, A, lda, w, work, lwork, rwork, lrwork, iwork,  &
+        liwork, info )
+    character        :: jobz
+    character        :: uplo
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    real             :: w(*)
+    complex          :: work(*)
+    integer          :: lwork
+    real             :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_cheevd_gpu( jobz, uplo, n, dA, ldda, w, wA, ldwa, work, lwork, rwork,  &
+        lrwork, iwork, liwork, info )
+    import int64
+    character        :: jobz
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    real             :: w(*)
+    complex          :: wA(*)
+    integer          :: ldwa
+    complex          :: work(*)
+    integer          :: lwork
+    real             :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_cheevd_m( ngpu, jobz, uplo, n, A, lda, w, work, lwork, rwork, lrwork,  &
+        iwork, liwork, info )
+    integer          :: ngpu
+    character        :: jobz
+    character        :: uplo
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    real             :: w(*)
+    complex          :: work(*)
+    integer          :: lwork
+    real             :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_cheevdx( jobz, range, uplo, n, A, lda, vl, vu, il, iu, mout, w, work,  &
+        lwork, rwork, lrwork, iwork, liwork, info )
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    real             :: vl
+    real             :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    real             :: w(*)
+    complex          :: work(*)
+    integer          :: lwork
+    real             :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_cheevdx_gpu( jobz, range, uplo, n, dA, ldda, vl, vu, il, iu, mout, w,  &
+        wA, ldwa, work, lwork, rwork, lrwork, iwork, liwork, info )
+    import int64
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    real             :: vl
+    real             :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    real             :: w(*)
+    complex          :: wA(*)
+    integer          :: ldwa
+    complex          :: work(*)
+    integer          :: lwork
+    real             :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_cheevdx_m( ngpu, jobz, range, uplo, n, A, lda, vl, vu, il, iu, mout, w,  &
+        work, lwork, rwork, lrwork, iwork, liwork, info )
+    integer          :: ngpu
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    real             :: vl
+    real             :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    real             :: w(*)
+    complex          :: work(*)
+    integer          :: lwork
+    real             :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_cheevdx_2stage( jobz, range, uplo, n, A, lda, vl, vu, il, iu, mout, w,  &
+        work, lwork, rwork, lrwork, iwork, liwork, info )
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    real             :: vl
+    real             :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    real             :: w(*)
+    complex          :: work(*)
+    integer          :: lwork
+    real             :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_cheevdx_2stage_m( ngpu, jobz, range, uplo, n, A, lda, vl, vu, il, iu,  &
+        mout, w, work, lwork, rwork, lrwork, iwork, liwork, info )
+    integer          :: ngpu
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    real             :: vl
+    real             :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    real             :: w(*)
+    complex          :: work(*)
+    integer          :: lwork
+    real             :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_cheevr( jobz, range, uplo, n, A, lda, vl, vu, il, iu, abstol, mout, w,  &
+        Z, ldz, isuppz, work, lwork, rwork, lrwork, iwork, liwork, info )
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    real             :: vl
+    real             :: vu
+    integer          :: il
+    integer          :: iu
+    real             :: abstol
+    integer          :: mout(*)
+    real             :: w(*)
+    complex          :: Z(*)
+    integer          :: ldz
+    integer          :: isuppz(*)
+    complex          :: work(*)
+    integer          :: lwork
+    real             :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_cheevr_gpu( jobz, range, uplo, n, dA, ldda, vl, vu, il, iu, abstol,  &
+        mout, w, dZ, lddz, isuppz, wA, ldwa, wZ, ldwz, work, lwork, rwork, lrwork, iwork,  &
+        liwork, info )
+    import int64
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    real             :: vl
+    real             :: vu
+    integer          :: il
+    integer          :: iu
+    real             :: abstol
+    integer          :: mout(*)
+    real             :: w(*)
+    magma_devptr_t   :: dZ
+    integer          :: lddz
+    integer          :: isuppz(*)
+    complex          :: wA(*)
+    integer          :: ldwa
+    complex          :: wZ(*)
+    integer          :: ldwz
+    complex          :: work(*)
+    integer          :: lwork
+    real             :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_cheevx( jobz, range, uplo, n, A, lda, vl, vu, il, iu, abstol, mout, w,  &
+        Z, ldz, work, lwork, rwork, iwork, ifail, info )
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    real             :: vl
+    real             :: vu
+    integer          :: il
+    integer          :: iu
+    real             :: abstol
+    integer          :: mout(*)
+    real             :: w(*)
+    complex          :: Z(*)
+    integer          :: ldz
+    complex          :: work(*)
+    integer          :: lwork
+    real             :: rwork(*)
+    integer          :: iwork(*)
+    integer          :: ifail(*)
+    integer          :: info
+end
+
+subroutine magmaf_cheevx_gpu( jobz, range, uplo, n, dA, ldda, vl, vu, il, iu, abstol,  &
+        mout, w, dZ, lddz, wA, ldwa, wZ, ldwz, work, lwork, rwork, iwork, ifail, info )
+    import int64
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    real             :: vl
+    real             :: vu
+    integer          :: il
+    integer          :: iu
+    real             :: abstol
+    integer          :: mout(*)
+    real             :: w(*)
+    magma_devptr_t   :: dZ
+    integer          :: lddz
+    complex          :: wA(*)
+    integer          :: ldwa
+    complex          :: wZ(*)
+    integer          :: ldwz
+    complex          :: work(*)
+    integer          :: lwork
+    real             :: rwork(*)
+    integer          :: iwork(*)
+    integer          :: ifail(*)
+    integer          :: info
+end
+
+subroutine magmaf_chegst( itype, uplo, n, A, lda, B, ldb, info )
+    integer          :: itype
+    character        :: uplo
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: B(*)
+    integer          :: ldb
+    integer          :: info
+end
+
+subroutine magmaf_chegst_gpu( itype, uplo, n, dA, ldda, dB, lddb, info )
+    import int64
+    integer          :: itype
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_chegst_m( ngpu, itype, uplo, n, A, lda, B, ldb, info )
+    integer          :: ngpu
+    integer          :: itype
+    character        :: uplo
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: B(*)
+    integer          :: ldb
+    integer          :: info
+end
+
+subroutine magmaf_chegvd( itype, jobz, uplo, n, A, lda, B, ldb, w, work, lwork, rwork,  &
+        lrwork, iwork, liwork, info )
+    integer          :: itype
+    character        :: jobz
+    character        :: uplo
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: B(*)
+    integer          :: ldb
+    real             :: w(*)
+    complex          :: work(*)
+    integer          :: lwork
+    real             :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_chegvd_m( ngpu, itype, jobz, uplo, n, A, lda, B, ldb, w, work, lwork,  &
+        rwork, lrwork, iwork, liwork, info )
+    integer          :: ngpu
+    integer          :: itype
+    character        :: jobz
+    character        :: uplo
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: B(*)
+    integer          :: ldb
+    real             :: w(*)
+    complex          :: work(*)
+    integer          :: lwork
+    real             :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_chegvdx( itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, il, iu,  &
+        mout, w, work, lwork, rwork, lrwork, iwork, liwork, info )
+    integer          :: itype
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: B(*)
+    integer          :: ldb
+    real             :: vl
+    real             :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    real             :: w(*)
+    complex          :: work(*)
+    integer          :: lwork
+    real             :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_chegvdx_m( ngpu, itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu,  &
+        il, iu, mout, w, work, lwork, rwork, lrwork, iwork, liwork, info )
+    integer          :: ngpu
+    integer          :: itype
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: B(*)
+    integer          :: ldb
+    real             :: vl
+    real             :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    real             :: w(*)
+    complex          :: work(*)
+    integer          :: lwork
+    real             :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_chegvdx_2stage( itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu,  &
+        il, iu, mout, w, work, lwork, rwork, lrwork, iwork, liwork, info )
+    integer          :: itype
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: B(*)
+    integer          :: ldb
+    real             :: vl
+    real             :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    real             :: w(*)
+    complex          :: work(*)
+    integer          :: lwork
+    real             :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_chegvdx_2stage_m( ngpu, itype, jobz, range, uplo, n, A, lda, B, ldb,  &
+        vl, vu, il, iu, mout, w, work, lwork, rwork, lrwork, iwork, liwork, info )
+    integer          :: ngpu
+    integer          :: itype
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: B(*)
+    integer          :: ldb
+    real             :: vl
+    real             :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    real             :: w(*)
+    complex          :: work(*)
+    integer          :: lwork
+    real             :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_chegvr( itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, il, iu,  &
+        abstol, mout, w, Z, ldz, isuppz, work, lwork, rwork, lrwork, iwork, liwork, info  &
+        )
+    integer          :: itype
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: B(*)
+    integer          :: ldb
+    real             :: vl
+    real             :: vu
+    integer          :: il
+    integer          :: iu
+    real             :: abstol
+    integer          :: mout(*)
+    real             :: w(*)
+    complex          :: Z(*)
+    integer          :: ldz
+    integer          :: isuppz(*)
+    complex          :: work(*)
+    integer          :: lwork
+    real             :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_chegvx( itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, il, iu,  &
+        abstol, mout, w, Z, ldz, work, lwork, rwork, iwork, ifail, info )
+    integer          :: itype
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: B(*)
+    integer          :: ldb
+    real             :: vl
+    real             :: vu
+    integer          :: il
+    integer          :: iu
+    real             :: abstol
+    integer          :: mout(*)
+    real             :: w(*)
+    complex          :: Z(*)
+    integer          :: ldz
+    complex          :: work(*)
+    integer          :: lwork
+    real             :: rwork(*)
+    integer          :: iwork(*)
+    integer          :: ifail(*)
+    integer          :: info
+end
+
+subroutine magmaf_chesv( uplo, n, nrhs, A, lda, ipiv, B, ldb, info )
+    character        :: uplo
+    integer          :: n
+    integer          :: nrhs
+    complex          :: A(*)
+    integer          :: lda
+    integer          :: ipiv(*)
+    complex          :: B(*)
+    integer          :: ldb
+    integer          :: info
+end
+
+subroutine magmaf_chesv_nopiv_gpu( uplo, n, nrhs, dA, ldda, dB, lddb, info )
+    import int64
+    character        :: uplo
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_chetrd( uplo, n, A, lda, d, e, tau, work, lwork, info )
+    character        :: uplo
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    real             :: d(*)
+    real             :: e(*)
+    complex          :: tau(*)
+    complex          :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_chetrd_gpu( uplo, n, dA, ldda, d, e, tau, wA, ldwa, work, lwork, info )
+    import int64
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    real             :: d(*)
+    real             :: e(*)
+    complex          :: tau(*)
+    complex          :: wA(*)
+    integer          :: ldwa
+    complex          :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_chetrd2_gpu( uplo, n, dA, ldda, d, e, tau, wA, ldwa, work, lwork,  &
+        dwork, ldwork, info )
+    import int64
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    real             :: d(*)
+    real             :: e(*)
+    complex          :: tau(*)
+    complex          :: wA(*)
+    integer          :: ldwa
+    complex          :: work(*)
+    integer          :: lwork
+    magma_devptr_t   :: dwork
+    integer          :: ldwork
+    integer          :: info
+end
+
+integer function magmaf_chetrd_hb2st( uplo, n, nb, Vblksiz, A, lda, d, e, V, ldv, TAU,  &
+        compT, T, ldt )
+    character        :: uplo
+    integer          :: n
+    integer          :: nb
+    integer          :: Vblksiz
+    complex          :: A(*)
+    integer          :: lda
+    real             :: d(*)
+    real             :: e(*)
+    complex          :: V(*)
+    integer          :: ldv
+    complex          :: TAU(*)
+    integer          :: compT
+    complex          :: T(*)
+    integer          :: ldt
+end
+
+subroutine magmaf_chetrd_he2hb( uplo, n, nb, A, lda, tau, work, lwork, dT, info )
+    import int64
+    character        :: uplo
+    integer          :: n
+    integer          :: nb
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: tau(*)
+    complex          :: work(*)
+    integer          :: lwork
+    magma_devptr_t   :: dT
+    integer          :: info
+end
+
+subroutine magmaf_chetrf( uplo, n, A, lda, ipiv, info )
+    character        :: uplo
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    integer          :: ipiv(*)
+    integer          :: info
+end
+
+subroutine magmaf_chetrf_aasen( uplo, cpu_panel, n, A, lda, ipiv, info )
+    character        :: uplo
+    integer          :: cpu_panel
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    integer          :: ipiv(*)
+    integer          :: info
+end
+
+subroutine magmaf_chetrf_nopiv( uplo, n, A, lda, info )
+    character        :: uplo
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_chetrf_nopiv_cpu( uplo, n, ib, A, lda, info )
+    character        :: uplo
+    integer          :: n
+    integer          :: ib
+    complex          :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_chetrf_nopiv_gpu( uplo, n, dA, ldda, info )
+    import int64
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: info
+end
+
+subroutine magmaf_chetrs_nopiv_gpu( uplo, n, nrhs, dA, ldda, dB, lddb, info )
+    import int64
+    character        :: uplo
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+integer function magmaf_clarf_gpu( m, n, dv, dtau, dC, lddc, queue )
+    import int64
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dv
+    magma_devptr_t   :: dtau
+    magma_devptr_t   :: dC
+    integer          :: lddc
+    magma_devptr_t   :: queue
+end
+
+integer function magmaf_clarfb2_gpu( m, n, k, dV, lddv, dT, lddt, dC, lddc, dwork,  &
+        ldwork, queue )
+    import int64
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    magma_devptr_t   :: dV
+    integer          :: lddv
+    magma_devptr_t   :: dT
+    integer          :: lddt
+    magma_devptr_t   :: dC
+    integer          :: lddc
+    magma_devptr_t   :: dwork
+    integer          :: ldwork
+    magma_devptr_t   :: queue
+end
+
+subroutine magmaf_clatrsd( uplo, trans, diag, normin, n, A, lda, lambda, x, scale, cnorm,  &
+        info )
+    character        :: uplo
+    character        :: trans
+    character        :: diag
+    character        :: normin
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: lambda
+    complex          :: x(*)
+    real             :: scale(*)
+    real             :: cnorm(*)
+    integer          :: info
+end
+
+subroutine magmaf_clauum( uplo, n, A, lda, info )
+    character        :: uplo
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_clauum_gpu( uplo, n, dA, ldda, info )
+    import int64
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: info
+end
+
+subroutine magmaf_cposv( uplo, n, nrhs, A, lda, B, ldb, info )
+    character        :: uplo
+    integer          :: n
+    integer          :: nrhs
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: B(*)
+    integer          :: ldb
+    integer          :: info
+end
+
+subroutine magmaf_cposv_gpu( uplo, n, nrhs, dA, ldda, dB, lddb, info )
+    import int64
+    character        :: uplo
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_cpotf2_gpu( uplo, n, dA, ldda, queue, info )
+    import int64
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: queue
+    integer          :: info
+end
+
+subroutine magmaf_cpotrf( uplo, n, A, lda, info )
+    character        :: uplo
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_cpotrf_gpu( uplo, n, dA, ldda, info )
+    import int64
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: info
+end
+
+subroutine magmaf_cpotrf_m( ngpu, uplo, n, A, lda, info )
+    integer          :: ngpu
+    character        :: uplo
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_cpotri( uplo, n, A, lda, info )
+    character        :: uplo
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_cpotri_gpu( uplo, n, dA, ldda, info )
+    import int64
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: info
+end
+
+subroutine magmaf_cpotrs_gpu( uplo, n, nrhs, dA, ldda, dB, lddb, info )
+    import int64
+    character        :: uplo
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_csysv_nopiv_gpu( uplo, n, nrhs, dA, ldda, dB, lddb, info )
+    import int64
+    character        :: uplo
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_csytrf_nopiv_cpu( uplo, n, ib, A, lda, info )
+    character        :: uplo
+    integer          :: n
+    integer          :: ib
+    complex          :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_csytrf_nopiv_gpu( uplo, n, dA, ldda, info )
+    import int64
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: info
+end
+
+subroutine magmaf_csytrs_nopiv_gpu( uplo, n, nrhs, dA, ldda, dB, lddb, info )
+    import int64
+    character        :: uplo
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_cstedx( range, n, vl, vu, il, iu, d, e, Z, ldz, rwork, lrwork, iwork,  &
+        liwork, dwork, info )
+    import int64
+    character        :: range
+    integer          :: n
+    real             :: vl
+    real             :: vu
+    integer          :: il
+    integer          :: iu
+    real             :: d(*)
+    real             :: e(*)
+    complex          :: Z(*)
+    integer          :: ldz
+    real             :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    magma_devptr_t   :: dwork
+    integer          :: info
+end
+
+subroutine magmaf_cstedx_m( ngpu, range, n, vl, vu, il, iu, d, e, Z, ldz, rwork, lrwork,  &
+        iwork, liwork, info )
+    integer          :: ngpu
+    character        :: range
+    integer          :: n
+    real             :: vl
+    real             :: vu
+    integer          :: il
+    integer          :: iu
+    real             :: d(*)
+    real             :: e(*)
+    complex          :: Z(*)
+    integer          :: ldz
+    real             :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_ctrevc3( side, howmany, select, n, T, ldt, VL, ldvl, VR, ldvr, mm,  &
+        mout, work, lwork, rwork, info )
+    character        :: side
+    character        :: howmany
+    integer          :: select(*)
+    integer          :: n
+    complex          :: T(*)
+    integer          :: ldt
+    complex          :: VL(*)
+    integer          :: ldvl
+    complex          :: VR(*)
+    integer          :: ldvr
+    integer          :: mm
+    integer          :: mout(*)
+    complex          :: work(*)
+    integer          :: lwork
+    real             :: rwork(*)
+    integer          :: info
+end
+
+subroutine magmaf_ctrevc3_mt( side, howmany, select, n, T, ldt, VL, ldvl, VR, ldvr, mm,  &
+        mout, work, lwork, rwork, info )
+    character        :: side
+    character        :: howmany
+    integer          :: select(*)
+    integer          :: n
+    complex          :: T(*)
+    integer          :: ldt
+    complex          :: VL(*)
+    integer          :: ldvl
+    complex          :: VR(*)
+    integer          :: ldvr
+    integer          :: mm
+    integer          :: mout(*)
+    complex          :: work(*)
+    integer          :: lwork
+    real             :: rwork(*)
+    integer          :: info
+end
+
+integer function magmaf_ctrsm_m( ngpu, side, uplo, transa, diag, m, n, alpha, A, lda, B,  &
+        ldb )
+    integer          :: ngpu
+    character        :: side
+    character        :: uplo
+    character        :: transa
+    character        :: diag
+    integer          :: m
+    integer          :: n
+    complex          :: alpha
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: B(*)
+    integer          :: ldb
+end
+
+subroutine magmaf_ctrtri( uplo, diag, n, A, lda, info )
+    character        :: uplo
+    character        :: diag
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_ctrtri_gpu( uplo, diag, n, dA, ldda, info )
+    import int64
+    character        :: uplo
+    character        :: diag
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: info
+end
+
+subroutine magmaf_cungbr( vect, m, n, k, A, lda, tau, work, lwork, info )
+    character        :: vect
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: tau(*)
+    complex          :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_cunghr( n, ilo, ihi, A, lda, tau, dT, nb, info )
+    import int64
+    integer          :: n
+    integer          :: ilo
+    integer          :: ihi
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: tau(*)
+    magma_devptr_t   :: dT
+    integer          :: nb
+    integer          :: info
+end
+
+subroutine magmaf_cunghr_m( n, ilo, ihi, A, lda, tau, T, nb, info )
+    integer          :: n
+    integer          :: ilo
+    integer          :: ihi
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: tau(*)
+    complex          :: T(*)
+    integer          :: nb
+    integer          :: info
+end
+
+subroutine magmaf_cunglq( m, n, k, A, lda, tau, dT, nb, info )
+    import int64
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: tau(*)
+    magma_devptr_t   :: dT
+    integer          :: nb
+    integer          :: info
+end
+
+subroutine magmaf_cungqr( m, n, k, A, lda, tau, dT, nb, info )
+    import int64
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: tau(*)
+    magma_devptr_t   :: dT
+    integer          :: nb
+    integer          :: info
+end
+
+subroutine magmaf_cungqr_gpu( m, n, k, dA, ldda, tau, dT, nb, info )
+    import int64
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    complex          :: tau(*)
+    magma_devptr_t   :: dT
+    integer          :: nb
+    integer          :: info
+end
+
+subroutine magmaf_cungqr_m( m, n, k, A, lda, tau, T, nb, info )
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: tau(*)
+    complex          :: T(*)
+    integer          :: nb
+    integer          :: info
+end
+
+subroutine magmaf_cungqr2( m, n, k, A, lda, tau, info )
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: tau(*)
+    integer          :: info
+end
+
+subroutine magmaf_cunmbr( vect, side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork,  &
+        info )
+    character        :: vect
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: tau(*)
+    complex          :: C(*)
+    integer          :: ldc
+    complex          :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_cunmlq( side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, info )
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: tau(*)
+    complex          :: C(*)
+    integer          :: ldc
+    complex          :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_cunmrq( side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, info )
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: tau(*)
+    complex          :: C(*)
+    integer          :: ldc
+    complex          :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_cunmql( side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, info )
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: tau(*)
+    complex          :: C(*)
+    integer          :: ldc
+    complex          :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_cunmql2_gpu( side, trans, m, n, k, dA, ldda, tau, dC, lddc, wA, ldwa,  &
+        info )
+    import int64
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    complex          :: tau(*)
+    magma_devptr_t   :: dC
+    integer          :: lddc
+    complex          :: wA(*)
+    integer          :: ldwa
+    integer          :: info
+end
+
+subroutine magmaf_cunmqr( side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, info )
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: tau(*)
+    complex          :: C(*)
+    integer          :: ldc
+    complex          :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_cunmqr_gpu( side, trans, m, n, k, dA, ldda, tau, dC, lddc, hwork,  &
+        lwork, dT, nb, info )
+    import int64
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    complex          :: tau(*)
+    magma_devptr_t   :: dC
+    integer          :: lddc
+    complex          :: hwork(*)
+    integer          :: lwork
+    magma_devptr_t   :: dT
+    integer          :: nb
+    integer          :: info
+end
+
+subroutine magmaf_cunmqr2_gpu( side, trans, m, n, k, dA, ldda, tau, dC, lddc, wA, ldwa,  &
+        info )
+    import int64
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    complex          :: tau(*)
+    magma_devptr_t   :: dC
+    integer          :: lddc
+    complex          :: wA(*)
+    integer          :: ldwa
+    integer          :: info
+end
+
+subroutine magmaf_cunmqr_m( ngpu, side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork,  &
+        info )
+    integer          :: ngpu
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: tau(*)
+    complex          :: C(*)
+    integer          :: ldc
+    complex          :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_cunmtr( side, uplo, trans, m, n, A, lda, tau, C, ldc, work, lwork, info  &
+        )
+    character        :: side
+    character        :: uplo
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: tau(*)
+    complex          :: C(*)
+    integer          :: ldc
+    complex          :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_cunmtr_gpu( side, uplo, trans, m, n, dA, ldda, tau, dC, lddc, wA, ldwa,  &
+        info )
+    import int64
+    character        :: side
+    character        :: uplo
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    complex          :: tau(*)
+    magma_devptr_t   :: dC
+    integer          :: lddc
+    complex          :: wA(*)
+    integer          :: ldwa
+    integer          :: info
+end
+
+subroutine magmaf_cunmtr_m( ngpu, side, uplo, trans, m, n, A, lda, tau, C, ldc, work,  &
+        lwork, info )
+    integer          :: ngpu
+    character        :: side
+    character        :: uplo
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: tau(*)
+    complex          :: C(*)
+    integer          :: ldc
+    complex          :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+integer function magmaf_c_isnan( x )
+    complex          :: x
+end
+
+integer function magmaf_c_isinf( x )
+    complex          :: x
+end
+
+integer function magmaf_c_isnan_inf( x )
+    complex          :: x
+end
+
+integer function magmaf_cnan_inf( uplo, m, n, A, lda, cnt_nan, cnt_inf )
+    character        :: uplo
+    integer          :: m
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+    integer          :: cnt_nan(*)
+    integer          :: cnt_inf(*)
+end
+
+integer function magmaf_cnan_inf_gpu( uplo, m, n, dA, ldda, cnt_nan, cnt_inf, queue )
+    import int64
+    character        :: uplo
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: cnt_nan(*)
+    integer          :: cnt_inf(*)
+    magma_devptr_t   :: queue
+end
+
+subroutine magmaf_cprint( m, n, A, lda )
+    integer          :: m
+    integer          :: n
+    complex          :: A(*)
+    integer          :: lda
+end
+
+subroutine magmaf_cprint_gpu( m, n, dA, ldda, queue )
+    import int64
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: queue
+end
+
+subroutine magmaf_cpanel_to_q( uplo, ib, A, lda, work )
+    character        :: uplo
+    integer          :: ib
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: work(*)
+end
+
+subroutine magmaf_cq_to_panel( uplo, ib, A, lda, work )
+    character        :: uplo
+    integer          :: ib
+    complex          :: A(*)
+    integer          :: lda
+    complex          :: work(*)
+end
+
+end interface
+
+end module magma_cfortran
diff --git a/lib/magma_fmodules/magma_dfortran.F b/lib/magma_fmodules/magma_dfortran.F
new file mode 100644
index 0000000000..22b3dcc2cc
--- /dev/null
+++ b/lib/magma_fmodules/magma_dfortran.F
@@ -0,0 +1,1830 @@
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!  This file is AUTOMATICALLY GENERATED by:
+!!  tools/fortran_wrappers.pl include/magma_d.i
+!!  Do not edit.
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+module magma_dfortran
+
+use magma_param
+
+implicit none
+
+!---- Fortran interfaces to MAGMA subroutines ----
+interface
+
+integer function magmaf_get_dlaex3_m_nb(  )
+end
+
+integer function magmaf_get_dpotrf_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_dgetrf_nb( m, n )
+    integer          :: m
+    integer          :: n
+end
+
+integer function magmaf_get_dgetri_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_dsytrf_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_dsytrf_nopiv_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_dsytrf_aasen_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_dgeqp3_nb( m, n )
+    integer          :: m
+    integer          :: n
+end
+
+integer function magmaf_get_dgeqrf_nb( m, n )
+    integer          :: m
+    integer          :: n
+end
+
+integer function magmaf_get_dgeqlf_nb( m, n )
+    integer          :: m
+    integer          :: n
+end
+
+integer function magmaf_get_dgelqf_nb( m, n )
+    integer          :: m
+    integer          :: n
+end
+
+integer function magmaf_get_dgehrd_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_dsytrd_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_dsygst_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_dsygst_m_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_dgebrd_nb( m, n )
+    integer          :: m
+    integer          :: n
+end
+
+integer function magmaf_get_dgesvd_nb( m, n )
+    integer          :: m
+    integer          :: n
+end
+
+integer function magmaf_get_dbulge_nb( n, nbthreads )
+    integer          :: n
+    integer          :: nbthreads
+end
+
+integer function magmaf_get_dbulge_vblksiz( n, nb, nbthreads )
+    integer          :: n
+    integer          :: nb
+    integer          :: nbthreads
+end
+
+integer function magmaf_get_dbulge_gcperf(  )
+end
+
+subroutine magmaf_dmove_eig( range, n, w, il, iu, vl, vu, mout )
+    character        :: range
+    integer          :: n
+    double precision :: w(*)
+    integer          :: il(*)
+    integer          :: iu(*)
+    double precision :: vl
+    double precision :: vu
+    integer          :: mout(*)
+end
+
+subroutine magmaf_dvrange( k, d, il, iu, vl, vu )
+    integer          :: k
+    double precision :: d(*)
+    integer          :: il(*)
+    integer          :: iu(*)
+    double precision :: vl
+    double precision :: vu
+end
+
+subroutine magmaf_dirange( k, indxq, iil, iiu, il, iu )
+    integer          :: k
+    integer          :: indxq(*)
+    integer          :: iil(*)
+    integer          :: iiu(*)
+    integer          :: il
+    integer          :: iu
+end
+
+subroutine magmaf_dgebrd( m, n, A, lda, d, e, tauq, taup, work, lwork, info )
+    integer          :: m
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: d(*)
+    double precision :: e(*)
+    double precision :: tauq(*)
+    double precision :: taup(*)
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_dgeev( jobvl, jobvr, n, A, lda, wr, wi, VL, ldvl, VR, ldvr, work,  &
+        lwork, info )
+    character        :: jobvl
+    character        :: jobvr
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: wr(*)
+    double precision :: wi(*)
+    double precision :: VL(*)
+    integer          :: ldvl
+    double precision :: VR(*)
+    integer          :: ldvr
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_dgeev_m( jobvl, jobvr, n, A, lda, wr, wi, VL, ldvl, VR, ldvr, work,  &
+        lwork, info )
+    character        :: jobvl
+    character        :: jobvr
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: wr(*)
+    double precision :: wi(*)
+    double precision :: VL(*)
+    integer          :: ldvl
+    double precision :: VR(*)
+    integer          :: ldvr
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_dgegqr_gpu( ikind, m, n, dA, ldda, dwork, work, info )
+    integer          :: ikind
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dwork
+    double precision :: work(*)
+    integer          :: info
+end
+
+subroutine magmaf_dgehrd( n, ilo, ihi, A, lda, tau, work, lwork, dT, info )
+    integer          :: n
+    integer          :: ilo
+    integer          :: ihi
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: tau(*)
+    double precision :: work(*)
+    integer          :: lwork
+    magma_devptr_t   :: dT
+    integer          :: info
+end
+
+subroutine magmaf_dgehrd_m( n, ilo, ihi, A, lda, tau, work, lwork, T, info )
+    integer          :: n
+    integer          :: ilo
+    integer          :: ihi
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: tau(*)
+    double precision :: work(*)
+    integer          :: lwork
+    double precision :: T(*)
+    integer          :: info
+end
+
+subroutine magmaf_dgehrd2( n, ilo, ihi, A, lda, tau, work, lwork, info )
+    integer          :: n
+    integer          :: ilo
+    integer          :: ihi
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: tau(*)
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_dgelqf( m, n, A, lda, tau, work, lwork, info )
+    integer          :: m
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: tau(*)
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_dgelqf_gpu( m, n, dA, ldda, tau, work, lwork, info )
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    double precision :: tau(*)
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_dgels( trans, m, n, nrhs, A, lda, B, ldb, hwork, lwork, info )
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: A
+    integer          :: lda
+    magma_devptr_t   :: B
+    integer          :: ldb
+    double precision :: hwork(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_dggrqf( m, p, n, A, lda, taua, B, ldb, taub, work, lwork, info )
+    integer          :: m
+    integer          :: p
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: taua(*)
+    double precision :: B(*)
+    integer          :: ldb
+    double precision :: taub(*)
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_dgglse( m, n, p, A, lda, B, ldb, c, d, x, work, lwork, info )
+    integer          :: m
+    integer          :: n
+    integer          :: p
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: B(*)
+    integer          :: ldb
+    double precision :: c(*)
+    double precision :: d(*)
+    double precision :: x(*)
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_dgels_gpu( trans, m, n, nrhs, dA, ldda, dB, lddb, hwork, lwork, info )
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    double precision :: hwork(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_dgels3_gpu( trans, m, n, nrhs, dA, ldda, dB, lddb, hwork, lwork, info )
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    double precision :: hwork(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_dgeqlf( m, n, A, lda, tau, work, lwork, info )
+    integer          :: m
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: tau(*)
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_dgeqp3( m, n, A, lda, jpvt, tau, work, lwork, info )
+    integer          :: m
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    integer          :: jpvt(*)
+    double precision :: tau(*)
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_dgeqp3_gpu( m, n, dA, ldda, jpvt, tau, dwork, lwork, info )
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: jpvt(*)
+    double precision :: tau(*)
+    magma_devptr_t   :: dwork
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_dgeqr2x_gpu( m, n, dA, ldda, dtau, dT, ddA, dwork, info )
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dtau
+    magma_devptr_t   :: dT
+    magma_devptr_t   :: ddA
+    magma_devptr_t   :: dwork
+    integer          :: info
+end
+
+subroutine magmaf_dgeqr2x2_gpu( m, n, dA, ldda, dtau, dT, ddA, dwork, info )
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dtau
+    magma_devptr_t   :: dT
+    magma_devptr_t   :: ddA
+    magma_devptr_t   :: dwork
+    integer          :: info
+end
+
+subroutine magmaf_dgeqr2x3_gpu( m, n, dA, ldda, dtau, dT, ddA, dwork, info )
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dtau
+    magma_devptr_t   :: dT
+    magma_devptr_t   :: ddA
+    magma_devptr_t   :: dwork
+    integer          :: info
+end
+
+subroutine magmaf_dgeqr2x4_gpu( m, n, dA, ldda, dtau, dT, ddA, dwork, queue, info )
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dtau
+    magma_devptr_t   :: dT
+    magma_devptr_t   :: ddA
+    magma_devptr_t   :: dwork
+    magma_devptr_t   :: queue
+    integer          :: info
+end
+
+subroutine magmaf_dgeqrf( m, n, A, lda, tau, work, lwork, info )
+    integer          :: m
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: tau(*)
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_dgeqrf_gpu( m, n, dA, ldda, tau, dT, info )
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    double precision :: tau(*)
+    magma_devptr_t   :: dT
+    integer          :: info
+end
+
+subroutine magmaf_dgeqrf_m( ngpu, m, n, A, lda, tau, work, lwork, info )
+    integer          :: ngpu
+    integer          :: m
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: tau(*)
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_dgeqrf_ooc( m, n, A, lda, tau, work, lwork, info )
+    integer          :: m
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: tau(*)
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_dgeqrf2_gpu( m, n, dA, ldda, tau, info )
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    double precision :: tau(*)
+    integer          :: info
+end
+
+subroutine magmaf_dgeqrf3_gpu( m, n, dA, ldda, tau, dT, info )
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    double precision :: tau(*)
+    magma_devptr_t   :: dT
+    integer          :: info
+end
+
+subroutine magmaf_dgeqrs_gpu( m, n, nrhs, dA, ldda, tau, dT, dB, lddb, hwork, lwork, info  &
+        )
+    integer          :: m
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    double precision :: tau(*)
+    magma_devptr_t   :: dT
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    double precision :: hwork(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_dgeqrs3_gpu( m, n, nrhs, dA, ldda, tau, dT, dB, lddb, hwork, lwork,  &
+        info )
+    integer          :: m
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    double precision :: tau(*)
+    magma_devptr_t   :: dT
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    double precision :: hwork(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_dgerbt_gpu( gen, n, nrhs, dA, ldda, dB, lddb, U, V, info )
+    character        :: gen
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    double precision :: U(*)
+    double precision :: V(*)
+    integer          :: info
+end
+
+subroutine magmaf_dgerfs_nopiv_gpu( trans, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dworkd,  &
+        dAF, iter, info )
+    character        :: trans
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    magma_devptr_t   :: dX
+    integer          :: lddx
+    magma_devptr_t   :: dworkd
+    magma_devptr_t   :: dAF
+    integer          :: iter
+    integer          :: info
+end
+
+subroutine magmaf_dgesdd( jobz, m, n, A, lda, s, U, ldu, VT, ldvt, work, lwork, iwork,  &
+        info )
+    character        :: jobz
+    integer          :: m
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: s(*)
+    double precision :: U(*)
+    integer          :: ldu
+    double precision :: VT(*)
+    integer          :: ldvt
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: iwork(*)
+    integer          :: info
+end
+
+subroutine magmaf_dgesv( n, nrhs, A, lda, ipiv, B, ldb, info )
+    integer          :: n
+    integer          :: nrhs
+    double precision :: A(*)
+    integer          :: lda
+    integer          :: ipiv(*)
+    double precision :: B(*)
+    integer          :: ldb
+    integer          :: info
+end
+
+subroutine magmaf_dgesv_gpu( n, nrhs, dA, ldda, ipiv, dB, lddb, info )
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: ipiv(*)
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_dgesv_nopiv_gpu( n, nrhs, dA, ldda, dB, lddb, info )
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_dgesv_rbt( ref, n, nrhs, A, lda, B, ldb, info )
+    character        :: ref
+    integer          :: n
+    integer          :: nrhs
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: B(*)
+    integer          :: ldb
+    integer          :: info
+end
+
+subroutine magmaf_dgesvd( jobu, jobvt, m, n, A, lda, s, U, ldu, VT, ldvt, work, lwork,  &
+        info )
+    character        :: jobu
+    character        :: jobvt
+    integer          :: m
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: s(*)
+    double precision :: U(*)
+    integer          :: ldu
+    double precision :: VT(*)
+    integer          :: ldvt
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_dgetf2_gpu( m, n, dA, ldda, ipiv, queue, info )
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: ipiv(*)
+    magma_devptr_t   :: queue
+    integer          :: info
+end
+
+subroutine magmaf_dgetf2_nopiv( m, n, A, lda, info )
+    integer          :: m
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_dgetrf( m, n, A, lda, ipiv, info )
+    integer          :: m
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    integer          :: ipiv(*)
+    integer          :: info
+end
+
+subroutine magmaf_dgetrf_gpu( m, n, dA, ldda, ipiv, info )
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: ipiv(*)
+    integer          :: info
+end
+
+subroutine magmaf_dgetrf_m( ngpu, m, n, A, lda, ipiv, info )
+    integer          :: ngpu
+    integer          :: m
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    integer          :: ipiv(*)
+    integer          :: info
+end
+
+subroutine magmaf_dgetrf_nopiv( m, n, A, lda, info )
+    integer          :: m
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_dgetrf_nopiv_gpu( m, n, dA, ldda, info )
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: info
+end
+
+subroutine magmaf_dgetri_gpu( n, dA, ldda, ipiv, dwork, lwork, info )
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: ipiv(*)
+    magma_devptr_t   :: dwork
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_dgetrs_gpu( trans, n, nrhs, dA, ldda, ipiv, dB, lddb, info )
+    character        :: trans
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: ipiv(*)
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_dgetrs_nopiv_gpu( trans, n, nrhs, dA, ldda, dB, lddb, info )
+    character        :: trans
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_dsyevd( jobz, uplo, n, A, lda, w, work, lwork, iwork, liwork, info )
+    character        :: jobz
+    character        :: uplo
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: w(*)
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_dsyevd_gpu( jobz, uplo, n, dA, ldda, w, wA, ldwa, work, lwork, iwork,  &
+        liwork, info )
+    character        :: jobz
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    double precision :: w(*)
+    double precision :: wA(*)
+    integer          :: ldwa
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_dsyevd_m( ngpu, jobz, uplo, n, A, lda, w, work, lwork, iwork, liwork,  &
+        info )
+    integer          :: ngpu
+    character        :: jobz
+    character        :: uplo
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: w(*)
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_dsyevdx( jobz, range, uplo, n, A, lda, vl, vu, il, iu, mout, w, work,  &
+        lwork, iwork, liwork, info )
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: vl
+    double precision :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    double precision :: w(*)
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_dsyevdx_gpu( jobz, range, uplo, n, dA, ldda, vl, vu, il, iu, mout, w,  &
+        wA, ldwa, work, lwork, iwork, liwork, info )
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    double precision :: vl
+    double precision :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    double precision :: w(*)
+    double precision :: wA(*)
+    integer          :: ldwa
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_dsyevdx_m( ngpu, jobz, range, uplo, n, A, lda, vl, vu, il, iu, mout, w,  &
+        work, lwork, iwork, liwork, info )
+    integer          :: ngpu
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: vl
+    double precision :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    double precision :: w(*)
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_dsyevdx_2stage( jobz, range, uplo, n, A, lda, vl, vu, il, iu, mout, w,  &
+        work, lwork, iwork, liwork, info )
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: vl
+    double precision :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    double precision :: w(*)
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_dsyevdx_2stage_m( ngpu, jobz, range, uplo, n, A, lda, vl, vu, il, iu,  &
+        mout, w, work, lwork, iwork, liwork, info )
+    integer          :: ngpu
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: vl
+    double precision :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    double precision :: w(*)
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_dsygst( itype, uplo, n, A, lda, B, ldb, info )
+    integer          :: itype
+    character        :: uplo
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: B(*)
+    integer          :: ldb
+    integer          :: info
+end
+
+subroutine magmaf_dsygst_gpu( itype, uplo, n, dA, ldda, dB, lddb, info )
+    integer          :: itype
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_dsygst_m( ngpu, itype, uplo, n, A, lda, B, ldb, info )
+    integer          :: ngpu
+    integer          :: itype
+    character        :: uplo
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: B(*)
+    integer          :: ldb
+    integer          :: info
+end
+
+subroutine magmaf_dsygvd( itype, jobz, uplo, n, A, lda, B, ldb, w, work, lwork, iwork,  &
+        liwork, info )
+    integer          :: itype
+    character        :: jobz
+    character        :: uplo
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: B(*)
+    integer          :: ldb
+    double precision :: w(*)
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_dsygvd_m( ngpu, itype, jobz, uplo, n, A, lda, B, ldb, w, work, lwork,  &
+        iwork, liwork, info )
+    integer          :: ngpu
+    integer          :: itype
+    character        :: jobz
+    character        :: uplo
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: B(*)
+    integer          :: ldb
+    double precision :: w(*)
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_dsygvdx( itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, il, iu,  &
+        mout, w, work, lwork, iwork, liwork, info )
+    integer          :: itype
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: B(*)
+    integer          :: ldb
+    double precision :: vl
+    double precision :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    double precision :: w(*)
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_dsygvdx_m( ngpu, itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu,  &
+        il, iu, mout, w, work, lwork, iwork, liwork, info )
+    integer          :: ngpu
+    integer          :: itype
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: B(*)
+    integer          :: ldb
+    double precision :: vl
+    double precision :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    double precision :: w(*)
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_dsygvdx_2stage( itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu,  &
+        il, iu, mout, w, work, lwork, iwork, liwork, info )
+    integer          :: itype
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: B(*)
+    integer          :: ldb
+    double precision :: vl
+    double precision :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    double precision :: w(*)
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_dsygvdx_2stage_m( ngpu, itype, jobz, range, uplo, n, A, lda, B, ldb,  &
+        vl, vu, il, iu, mout, w, work, lwork, iwork, liwork, info )
+    integer          :: ngpu
+    integer          :: itype
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: B(*)
+    integer          :: ldb
+    double precision :: vl
+    double precision :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    double precision :: w(*)
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_dsysv( uplo, n, nrhs, A, lda, ipiv, B, ldb, info )
+    character        :: uplo
+    integer          :: n
+    integer          :: nrhs
+    double precision :: A(*)
+    integer          :: lda
+    integer          :: ipiv(*)
+    double precision :: B(*)
+    integer          :: ldb
+    integer          :: info
+end
+
+subroutine magmaf_dsysv_nopiv_gpu( uplo, n, nrhs, dA, ldda, dB, lddb, info )
+    character        :: uplo
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_dsytrd( uplo, n, A, lda, d, e, tau, work, lwork, info )
+    character        :: uplo
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: d(*)
+    double precision :: e(*)
+    double precision :: tau(*)
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_dsytrd_gpu( uplo, n, dA, ldda, d, e, tau, wA, ldwa, work, lwork, info )
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    double precision :: d(*)
+    double precision :: e(*)
+    double precision :: tau(*)
+    double precision :: wA(*)
+    integer          :: ldwa
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_dsytrd2_gpu( uplo, n, dA, ldda, d, e, tau, wA, ldwa, work, lwork,  &
+        dwork, ldwork, info )
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    double precision :: d(*)
+    double precision :: e(*)
+    double precision :: tau(*)
+    double precision :: wA(*)
+    integer          :: ldwa
+    double precision :: work(*)
+    integer          :: lwork
+    magma_devptr_t   :: dwork
+    integer          :: ldwork
+    integer          :: info
+end
+
+integer function magmaf_dsytrd_sb2st( uplo, n, nb, Vblksiz, A, lda, d, e, V, ldv, TAU,  &
+        compT, T, ldt )
+    character        :: uplo
+    integer          :: n
+    integer          :: nb
+    integer          :: Vblksiz
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: d(*)
+    double precision :: e(*)
+    double precision :: V(*)
+    integer          :: ldv
+    double precision :: TAU(*)
+    integer          :: compT
+    double precision :: T(*)
+    integer          :: ldt
+end
+
+subroutine magmaf_dsytrd_sy2sb( uplo, n, nb, A, lda, tau, work, lwork, dT, info )
+    character        :: uplo
+    integer          :: n
+    integer          :: nb
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: tau(*)
+    double precision :: work(*)
+    integer          :: lwork
+    magma_devptr_t   :: dT
+    integer          :: info
+end
+
+subroutine magmaf_dsytrf( uplo, n, A, lda, ipiv, info )
+    character        :: uplo
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    integer          :: ipiv(*)
+    integer          :: info
+end
+
+subroutine magmaf_dsytrf_aasen( uplo, cpu_panel, n, A, lda, ipiv, info )
+    character        :: uplo
+    integer          :: cpu_panel
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    integer          :: ipiv(*)
+    integer          :: info
+end
+
+subroutine magmaf_dsytrf_nopiv( uplo, n, A, lda, info )
+    character        :: uplo
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_dsytrf_nopiv_cpu( uplo, n, ib, A, lda, info )
+    character        :: uplo
+    integer          :: n
+    integer          :: ib
+    double precision :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_dsytrf_nopiv_gpu( uplo, n, dA, ldda, info )
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: info
+end
+
+subroutine magmaf_dsytrs_nopiv_gpu( uplo, n, nrhs, dA, ldda, dB, lddb, info )
+    character        :: uplo
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_dlaln2( trans, na, nw, smin, ca, A, lda, d1, d2, B, ldb, wr, wi, X,  &
+        ldx, scale, xnorm, info )
+    integer          :: trans
+    integer          :: na
+    integer          :: nw
+    double precision :: smin
+    double precision :: ca
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: d1
+    double precision :: d2
+    double precision :: B(*)
+    integer          :: ldb
+    double precision :: wr
+    double precision :: wi
+    double precision :: X(*)
+    integer          :: ldx
+    double precision :: scale(*)
+    double precision :: xnorm(*)
+    integer          :: info
+end
+
+subroutine magmaf_dlaqtrsd( trans, n, T, ldt, x, ldx, cnorm, info )
+    character        :: trans
+    integer          :: n
+    double precision :: T(*)
+    integer          :: ldt
+    double precision :: x(*)
+    integer          :: ldx
+    double precision :: cnorm(*)
+    integer          :: info
+end
+
+integer function magmaf_dlarf_gpu( m, n, dv, dtau, dC, lddc, queue )
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dv
+    magma_devptr_t   :: dtau
+    magma_devptr_t   :: dC
+    integer          :: lddc
+    magma_devptr_t   :: queue
+end
+
+integer function magmaf_dlarfb2_gpu( m, n, k, dV, lddv, dT, lddt, dC, lddc, dwork,  &
+        ldwork, queue )
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    magma_devptr_t   :: dV
+    integer          :: lddv
+    magma_devptr_t   :: dT
+    integer          :: lddt
+    magma_devptr_t   :: dC
+    integer          :: lddc
+    magma_devptr_t   :: dwork
+    integer          :: ldwork
+    magma_devptr_t   :: queue
+end
+
+subroutine magmaf_dlauum( uplo, n, A, lda, info )
+    character        :: uplo
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_dlauum_gpu( uplo, n, dA, ldda, info )
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: info
+end
+
+subroutine magmaf_dposv( uplo, n, nrhs, A, lda, B, ldb, info )
+    character        :: uplo
+    integer          :: n
+    integer          :: nrhs
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: B(*)
+    integer          :: ldb
+    integer          :: info
+end
+
+subroutine magmaf_dposv_gpu( uplo, n, nrhs, dA, ldda, dB, lddb, info )
+    character        :: uplo
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_dpotf2_gpu( uplo, n, dA, ldda, queue, info )
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: queue
+    integer          :: info
+end
+
+subroutine magmaf_dpotrf( uplo, n, A, lda, info )
+    character        :: uplo
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_dpotrf_gpu( uplo, n, dA, ldda, info )
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: info
+end
+
+subroutine magmaf_dpotrf_m( ngpu, uplo, n, A, lda, info )
+    integer          :: ngpu
+    character        :: uplo
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_dpotri( uplo, n, A, lda, info )
+    character        :: uplo
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_dpotri_gpu( uplo, n, dA, ldda, info )
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: info
+end
+
+subroutine magmaf_dpotrs_gpu( uplo, n, nrhs, dA, ldda, dB, lddb, info )
+    character        :: uplo
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_dstedx( range, n, vl, vu, il, iu, d, e, Z, ldz, rwork, lrwork, iwork,  &
+        liwork, dwork, info )
+    character        :: range
+    integer          :: n
+    double precision :: vl
+    double precision :: vu
+    integer          :: il
+    integer          :: iu
+    double precision :: d(*)
+    double precision :: e(*)
+    double precision :: Z(*)
+    integer          :: ldz
+    double precision :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    magma_devptr_t   :: dwork
+    integer          :: info
+end
+
+subroutine magmaf_dstedx_m( ngpu, range, n, vl, vu, il, iu, d, e, Z, ldz, rwork, lrwork,  &
+        iwork, liwork, info )
+    integer          :: ngpu
+    character        :: range
+    integer          :: n
+    double precision :: vl
+    double precision :: vu
+    integer          :: il
+    integer          :: iu
+    double precision :: d(*)
+    double precision :: e(*)
+    double precision :: Z(*)
+    integer          :: ldz
+    double precision :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_dtrevc3( side, howmany, select, n, T, ldt, VL, ldvl, VR, ldvr, mm,  &
+        mout, work, lwork, info )
+    character        :: side
+    character        :: howmany
+    integer          :: select(*)
+    integer          :: n
+    double precision :: T(*)
+    integer          :: ldt
+    double precision :: VL(*)
+    integer          :: ldvl
+    double precision :: VR(*)
+    integer          :: ldvr
+    integer          :: mm
+    integer          :: mout(*)
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_dtrevc3_mt( side, howmany, select, n, T, ldt, VL, ldvl, VR, ldvr, mm,  &
+        mout, work, lwork, info )
+    character        :: side
+    character        :: howmany
+    integer          :: select(*)
+    integer          :: n
+    double precision :: T(*)
+    integer          :: ldt
+    double precision :: VL(*)
+    integer          :: ldvl
+    double precision :: VR(*)
+    integer          :: ldvr
+    integer          :: mm
+    integer          :: mout(*)
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+integer function magmaf_dtrsm_m( ngpu, side, uplo, transa, diag, m, n, alpha, A, lda, B,  &
+        ldb )
+    integer          :: ngpu
+    character        :: side
+    character        :: uplo
+    character        :: transa
+    character        :: diag
+    integer          :: m
+    integer          :: n
+    double precision :: alpha
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: B(*)
+    integer          :: ldb
+end
+
+subroutine magmaf_dtrtri( uplo, diag, n, A, lda, info )
+    character        :: uplo
+    character        :: diag
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_dtrtri_gpu( uplo, diag, n, dA, ldda, info )
+    character        :: uplo
+    character        :: diag
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: info
+end
+
+subroutine magmaf_dorgbr( vect, m, n, k, A, lda, tau, work, lwork, info )
+    character        :: vect
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: tau(*)
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_dorghr( n, ilo, ihi, A, lda, tau, dT, nb, info )
+    integer          :: n
+    integer          :: ilo
+    integer          :: ihi
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: tau(*)
+    magma_devptr_t   :: dT
+    integer          :: nb
+    integer          :: info
+end
+
+subroutine magmaf_dorghr_m( n, ilo, ihi, A, lda, tau, T, nb, info )
+    integer          :: n
+    integer          :: ilo
+    integer          :: ihi
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: tau(*)
+    double precision :: T(*)
+    integer          :: nb
+    integer          :: info
+end
+
+subroutine magmaf_dorglq( m, n, k, A, lda, tau, dT, nb, info )
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: tau(*)
+    magma_devptr_t   :: dT
+    integer          :: nb
+    integer          :: info
+end
+
+subroutine magmaf_dorgqr( m, n, k, A, lda, tau, dT, nb, info )
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: tau(*)
+    magma_devptr_t   :: dT
+    integer          :: nb
+    integer          :: info
+end
+
+subroutine magmaf_dorgqr_gpu( m, n, k, dA, ldda, tau, dT, nb, info )
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    double precision :: tau(*)
+    magma_devptr_t   :: dT
+    integer          :: nb
+    integer          :: info
+end
+
+subroutine magmaf_dorgqr_m( m, n, k, A, lda, tau, T, nb, info )
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: tau(*)
+    double precision :: T(*)
+    integer          :: nb
+    integer          :: info
+end
+
+subroutine magmaf_dorgqr2( m, n, k, A, lda, tau, info )
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: tau(*)
+    integer          :: info
+end
+
+subroutine magmaf_dormbr( vect, side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork,  &
+        info )
+    character        :: vect
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: tau(*)
+    double precision :: C(*)
+    integer          :: ldc
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_dormlq( side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, info )
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: tau(*)
+    double precision :: C(*)
+    integer          :: ldc
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_dormrq( side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, info )
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: tau(*)
+    double precision :: C(*)
+    integer          :: ldc
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_dormql( side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, info )
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: tau(*)
+    double precision :: C(*)
+    integer          :: ldc
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_dormql2_gpu( side, trans, m, n, k, dA, ldda, tau, dC, lddc, wA, ldwa,  &
+        info )
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    double precision :: tau(*)
+    magma_devptr_t   :: dC
+    integer          :: lddc
+    double precision :: wA(*)
+    integer          :: ldwa
+    integer          :: info
+end
+
+subroutine magmaf_dormqr( side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, info )
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: tau(*)
+    double precision :: C(*)
+    integer          :: ldc
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_dormqr_gpu( side, trans, m, n, k, dA, ldda, tau, dC, lddc, hwork,  &
+        lwork, dT, nb, info )
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    double precision :: tau(*)
+    magma_devptr_t   :: dC
+    integer          :: lddc
+    double precision :: hwork(*)
+    integer          :: lwork
+    magma_devptr_t   :: dT
+    integer          :: nb
+    integer          :: info
+end
+
+subroutine magmaf_dormqr2_gpu( side, trans, m, n, k, dA, ldda, tau, dC, lddc, wA, ldwa,  &
+        info )
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    double precision :: tau(*)
+    magma_devptr_t   :: dC
+    integer          :: lddc
+    double precision :: wA(*)
+    integer          :: ldwa
+    integer          :: info
+end
+
+subroutine magmaf_dormqr_m( ngpu, side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork,  &
+        info )
+    integer          :: ngpu
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: tau(*)
+    double precision :: C(*)
+    integer          :: ldc
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_dormtr( side, uplo, trans, m, n, A, lda, tau, C, ldc, work, lwork, info  &
+        )
+    character        :: side
+    character        :: uplo
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: tau(*)
+    double precision :: C(*)
+    integer          :: ldc
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_dormtr_gpu( side, uplo, trans, m, n, dA, ldda, tau, dC, lddc, wA, ldwa,  &
+        info )
+    character        :: side
+    character        :: uplo
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    double precision :: tau(*)
+    magma_devptr_t   :: dC
+    integer          :: lddc
+    double precision :: wA(*)
+    integer          :: ldwa
+    integer          :: info
+end
+
+subroutine magmaf_dormtr_m( ngpu, side, uplo, trans, m, n, A, lda, tau, C, ldc, work,  &
+        lwork, info )
+    integer          :: ngpu
+    character        :: side
+    character        :: uplo
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: tau(*)
+    double precision :: C(*)
+    integer          :: ldc
+    double precision :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+integer function magmaf_d_isnan( x )
+    double precision :: x
+end
+
+integer function magmaf_d_isinf( x )
+    double precision :: x
+end
+
+integer function magmaf_d_isnan_inf( x )
+    double precision :: x
+end
+
+double precision function magmaf_dmake_lwork( lwork )
+    integer          :: lwork
+end
+
+integer function magmaf_dnan_inf( uplo, m, n, A, lda, cnt_nan, cnt_inf )
+    character        :: uplo
+    integer          :: m
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+    integer          :: cnt_nan(*)
+    integer          :: cnt_inf(*)
+end
+
+integer function magmaf_dnan_inf_gpu( uplo, m, n, dA, ldda, cnt_nan, cnt_inf, queue )
+    character        :: uplo
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: cnt_nan(*)
+    integer          :: cnt_inf(*)
+    magma_devptr_t   :: queue
+end
+
+subroutine magmaf_dprint( m, n, A, lda )
+    integer          :: m
+    integer          :: n
+    double precision :: A(*)
+    integer          :: lda
+end
+
+subroutine magmaf_dprint_gpu( m, n, dA, ldda, queue )
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: queue
+end
+
+subroutine magmaf_dpanel_to_q( uplo, ib, A, lda, work )
+    character        :: uplo
+    integer          :: ib
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: work(*)
+end
+
+subroutine magmaf_dq_to_panel( uplo, ib, A, lda, work )
+    character        :: uplo
+    integer          :: ib
+    double precision :: A(*)
+    integer          :: lda
+    double precision :: work(*)
+end
+
+end interface
+
+end module magma_dfortran
diff --git a/lib/magma_fmodules/magma_param.F b/lib/magma_fmodules/magma_param.F
new file mode 100644
index 0000000000..8fd7c5f978
--- /dev/null
+++ b/lib/magma_fmodules/magma_param.F
@@ -0,0 +1,25 @@
+!
+!   -- MAGMA (version 2.8.0) --
+!      Univ. of Tennessee, Knoxville
+!      Univ. of California, Berkeley
+!      Univ. of Colorado, Denver
+!      @date March 2024
+!
+
+module magma_param
+
+    implicit none
+
+    ! could use STORAGE_SIZE in Fortran 2008
+    integer, parameter :: sizeof_complex_16 = 16
+    integer, parameter :: sizeof_complex    = 8
+    integer, parameter :: sizeof_double     = 8
+    integer, parameter :: sizeof_real       = 4
+    
+#if defined(MAGMA_ILP64) || defined(MKL_ILP64)
+    integer, parameter :: sizeof_integer    = 8
+#else
+    integer, parameter :: sizeof_integer    = 4
+#endif
+
+end module magma_param
diff --git a/lib/magma_fmodules/magma_sfortran.F b/lib/magma_fmodules/magma_sfortran.F
new file mode 100644
index 0000000000..6a50cb38ce
--- /dev/null
+++ b/lib/magma_fmodules/magma_sfortran.F
@@ -0,0 +1,1830 @@
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!  This file is AUTOMATICALLY GENERATED by:
+!!  tools/fortran_wrappers.pl include/magma_s.i
+!!  Do not edit.
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+module magma_sfortran
+
+use magma_param
+
+implicit none
+
+!---- Fortran interfaces to MAGMA subroutines ----
+interface
+
+integer function magmaf_get_slaex3_m_nb(  )
+end
+
+integer function magmaf_get_spotrf_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_sgetrf_nb( m, n )
+    integer          :: m
+    integer          :: n
+end
+
+integer function magmaf_get_sgetri_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_ssytrf_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_ssytrf_nopiv_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_ssytrf_aasen_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_sgeqp3_nb( m, n )
+    integer          :: m
+    integer          :: n
+end
+
+integer function magmaf_get_sgeqrf_nb( m, n )
+    integer          :: m
+    integer          :: n
+end
+
+integer function magmaf_get_sgeqlf_nb( m, n )
+    integer          :: m
+    integer          :: n
+end
+
+integer function magmaf_get_sgelqf_nb( m, n )
+    integer          :: m
+    integer          :: n
+end
+
+integer function magmaf_get_sgehrd_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_ssytrd_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_ssygst_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_ssygst_m_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_sgebrd_nb( m, n )
+    integer          :: m
+    integer          :: n
+end
+
+integer function magmaf_get_sgesvd_nb( m, n )
+    integer          :: m
+    integer          :: n
+end
+
+integer function magmaf_get_sbulge_nb( n, nbthreads )
+    integer          :: n
+    integer          :: nbthreads
+end
+
+integer function magmaf_get_sbulge_vblksiz( n, nb, nbthreads )
+    integer          :: n
+    integer          :: nb
+    integer          :: nbthreads
+end
+
+integer function magmaf_get_sbulge_gcperf(  )
+end
+
+subroutine magmaf_smove_eig( range, n, w, il, iu, vl, vu, mout )
+    character        :: range
+    integer          :: n
+    real             :: w(*)
+    integer          :: il(*)
+    integer          :: iu(*)
+    real             :: vl
+    real             :: vu
+    integer          :: mout(*)
+end
+
+subroutine magmaf_svrange( k, d, il, iu, vl, vu )
+    integer          :: k
+    real             :: d(*)
+    integer          :: il(*)
+    integer          :: iu(*)
+    real             :: vl
+    real             :: vu
+end
+
+subroutine magmaf_sirange( k, indxq, iil, iiu, il, iu )
+    integer          :: k
+    integer          :: indxq(*)
+    integer          :: iil(*)
+    integer          :: iiu(*)
+    integer          :: il
+    integer          :: iu
+end
+
+subroutine magmaf_sgebrd( m, n, A, lda, d, e, tauq, taup, work, lwork, info )
+    integer          :: m
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    real             :: d(*)
+    real             :: e(*)
+    real             :: tauq(*)
+    real             :: taup(*)
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_sgeev( jobvl, jobvr, n, A, lda, wr, wi, VL, ldvl, VR, ldvr, work,  &
+        lwork, info )
+    character        :: jobvl
+    character        :: jobvr
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    real             :: wr(*)
+    real             :: wi(*)
+    real             :: VL(*)
+    integer          :: ldvl
+    real             :: VR(*)
+    integer          :: ldvr
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_sgeev_m( jobvl, jobvr, n, A, lda, wr, wi, VL, ldvl, VR, ldvr, work,  &
+        lwork, info )
+    character        :: jobvl
+    character        :: jobvr
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    real             :: wr(*)
+    real             :: wi(*)
+    real             :: VL(*)
+    integer          :: ldvl
+    real             :: VR(*)
+    integer          :: ldvr
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_sgegqr_gpu( ikind, m, n, dA, ldda, dwork, work, info )
+    integer          :: ikind
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dwork
+    real             :: work(*)
+    integer          :: info
+end
+
+subroutine magmaf_sgehrd( n, ilo, ihi, A, lda, tau, work, lwork, dT, info )
+    integer          :: n
+    integer          :: ilo
+    integer          :: ihi
+    real             :: A(*)
+    integer          :: lda
+    real             :: tau(*)
+    real             :: work(*)
+    integer          :: lwork
+    magma_devptr_t   :: dT
+    integer          :: info
+end
+
+subroutine magmaf_sgehrd_m( n, ilo, ihi, A, lda, tau, work, lwork, T, info )
+    integer          :: n
+    integer          :: ilo
+    integer          :: ihi
+    real             :: A(*)
+    integer          :: lda
+    real             :: tau(*)
+    real             :: work(*)
+    integer          :: lwork
+    real             :: T(*)
+    integer          :: info
+end
+
+subroutine magmaf_sgehrd2( n, ilo, ihi, A, lda, tau, work, lwork, info )
+    integer          :: n
+    integer          :: ilo
+    integer          :: ihi
+    real             :: A(*)
+    integer          :: lda
+    real             :: tau(*)
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_sgelqf( m, n, A, lda, tau, work, lwork, info )
+    integer          :: m
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    real             :: tau(*)
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_sgelqf_gpu( m, n, dA, ldda, tau, work, lwork, info )
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    real             :: tau(*)
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_sgels( trans, m, n, nrhs, A, lda, B, ldb, hwork, lwork, info )
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: A
+    integer          :: lda
+    magma_devptr_t   :: B
+    integer          :: ldb
+    real             :: hwork(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_sggrqf( m, p, n, A, lda, taua, B, ldb, taub, work, lwork, info )
+    integer          :: m
+    integer          :: p
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    real             :: taua(*)
+    real             :: B(*)
+    integer          :: ldb
+    real             :: taub(*)
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_sgglse( m, n, p, A, lda, B, ldb, c, d, x, work, lwork, info )
+    integer          :: m
+    integer          :: n
+    integer          :: p
+    real             :: A(*)
+    integer          :: lda
+    real             :: B(*)
+    integer          :: ldb
+    real             :: c(*)
+    real             :: d(*)
+    real             :: x(*)
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_sgels_gpu( trans, m, n, nrhs, dA, ldda, dB, lddb, hwork, lwork, info )
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    real             :: hwork(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_sgels3_gpu( trans, m, n, nrhs, dA, ldda, dB, lddb, hwork, lwork, info )
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    real             :: hwork(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_sgeqlf( m, n, A, lda, tau, work, lwork, info )
+    integer          :: m
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    real             :: tau(*)
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_sgeqp3( m, n, A, lda, jpvt, tau, work, lwork, info )
+    integer          :: m
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    integer          :: jpvt(*)
+    real             :: tau(*)
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_sgeqp3_gpu( m, n, dA, ldda, jpvt, tau, dwork, lwork, info )
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: jpvt(*)
+    real             :: tau(*)
+    magma_devptr_t   :: dwork
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_sgeqr2x_gpu( m, n, dA, ldda, dtau, dT, ddA, dwork, info )
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dtau
+    magma_devptr_t   :: dT
+    magma_devptr_t   :: ddA
+    magma_devptr_t   :: dwork
+    integer          :: info
+end
+
+subroutine magmaf_sgeqr2x2_gpu( m, n, dA, ldda, dtau, dT, ddA, dwork, info )
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dtau
+    magma_devptr_t   :: dT
+    magma_devptr_t   :: ddA
+    magma_devptr_t   :: dwork
+    integer          :: info
+end
+
+subroutine magmaf_sgeqr2x3_gpu( m, n, dA, ldda, dtau, dT, ddA, dwork, info )
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dtau
+    magma_devptr_t   :: dT
+    magma_devptr_t   :: ddA
+    magma_devptr_t   :: dwork
+    integer          :: info
+end
+
+subroutine magmaf_sgeqr2x4_gpu( m, n, dA, ldda, dtau, dT, ddA, dwork, queue, info )
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dtau
+    magma_devptr_t   :: dT
+    magma_devptr_t   :: ddA
+    magma_devptr_t   :: dwork
+    magma_devptr_t   :: queue
+    integer          :: info
+end
+
+subroutine magmaf_sgeqrf( m, n, A, lda, tau, work, lwork, info )
+    integer          :: m
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    real             :: tau(*)
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_sgeqrf_gpu( m, n, dA, ldda, tau, dT, info )
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    real             :: tau(*)
+    magma_devptr_t   :: dT
+    integer          :: info
+end
+
+subroutine magmaf_sgeqrf_m( ngpu, m, n, A, lda, tau, work, lwork, info )
+    integer          :: ngpu
+    integer          :: m
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    real             :: tau(*)
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_sgeqrf_ooc( m, n, A, lda, tau, work, lwork, info )
+    integer          :: m
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    real             :: tau(*)
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_sgeqrf2_gpu( m, n, dA, ldda, tau, info )
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    real             :: tau(*)
+    integer          :: info
+end
+
+subroutine magmaf_sgeqrf3_gpu( m, n, dA, ldda, tau, dT, info )
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    real             :: tau(*)
+    magma_devptr_t   :: dT
+    integer          :: info
+end
+
+subroutine magmaf_sgeqrs_gpu( m, n, nrhs, dA, ldda, tau, dT, dB, lddb, hwork, lwork, info  &
+        )
+    integer          :: m
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    real             :: tau(*)
+    magma_devptr_t   :: dT
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    real             :: hwork(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_sgeqrs3_gpu( m, n, nrhs, dA, ldda, tau, dT, dB, lddb, hwork, lwork,  &
+        info )
+    integer          :: m
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    real             :: tau(*)
+    magma_devptr_t   :: dT
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    real             :: hwork(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_sgerbt_gpu( gen, n, nrhs, dA, ldda, dB, lddb, U, V, info )
+    character        :: gen
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    real             :: U(*)
+    real             :: V(*)
+    integer          :: info
+end
+
+subroutine magmaf_sgerfs_nopiv_gpu( trans, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dworkd,  &
+        dAF, iter, info )
+    character        :: trans
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    magma_devptr_t   :: dX
+    integer          :: lddx
+    magma_devptr_t   :: dworkd
+    magma_devptr_t   :: dAF
+    integer          :: iter
+    integer          :: info
+end
+
+subroutine magmaf_sgesdd( jobz, m, n, A, lda, s, U, ldu, VT, ldvt, work, lwork, iwork,  &
+        info )
+    character        :: jobz
+    integer          :: m
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    real             :: s(*)
+    real             :: U(*)
+    integer          :: ldu
+    real             :: VT(*)
+    integer          :: ldvt
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: iwork(*)
+    integer          :: info
+end
+
+subroutine magmaf_sgesv( n, nrhs, A, lda, ipiv, B, ldb, info )
+    integer          :: n
+    integer          :: nrhs
+    real             :: A(*)
+    integer          :: lda
+    integer          :: ipiv(*)
+    real             :: B(*)
+    integer          :: ldb
+    integer          :: info
+end
+
+subroutine magmaf_sgesv_gpu( n, nrhs, dA, ldda, ipiv, dB, lddb, info )
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: ipiv(*)
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_sgesv_nopiv_gpu( n, nrhs, dA, ldda, dB, lddb, info )
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_sgesv_rbt( ref, n, nrhs, A, lda, B, ldb, info )
+    character        :: ref
+    integer          :: n
+    integer          :: nrhs
+    real             :: A(*)
+    integer          :: lda
+    real             :: B(*)
+    integer          :: ldb
+    integer          :: info
+end
+
+subroutine magmaf_sgesvd( jobu, jobvt, m, n, A, lda, s, U, ldu, VT, ldvt, work, lwork,  &
+        info )
+    character        :: jobu
+    character        :: jobvt
+    integer          :: m
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    real             :: s(*)
+    real             :: U(*)
+    integer          :: ldu
+    real             :: VT(*)
+    integer          :: ldvt
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_sgetf2_gpu( m, n, dA, ldda, ipiv, queue, info )
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: ipiv(*)
+    magma_devptr_t   :: queue
+    integer          :: info
+end
+
+subroutine magmaf_sgetf2_nopiv( m, n, A, lda, info )
+    integer          :: m
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_sgetrf( m, n, A, lda, ipiv, info )
+    integer          :: m
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    integer          :: ipiv(*)
+    integer          :: info
+end
+
+subroutine magmaf_sgetrf_gpu( m, n, dA, ldda, ipiv, info )
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: ipiv(*)
+    integer          :: info
+end
+
+subroutine magmaf_sgetrf_m( ngpu, m, n, A, lda, ipiv, info )
+    integer          :: ngpu
+    integer          :: m
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    integer          :: ipiv(*)
+    integer          :: info
+end
+
+subroutine magmaf_sgetrf_nopiv( m, n, A, lda, info )
+    integer          :: m
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_sgetrf_nopiv_gpu( m, n, dA, ldda, info )
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: info
+end
+
+subroutine magmaf_sgetri_gpu( n, dA, ldda, ipiv, dwork, lwork, info )
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: ipiv(*)
+    magma_devptr_t   :: dwork
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_sgetrs_gpu( trans, n, nrhs, dA, ldda, ipiv, dB, lddb, info )
+    character        :: trans
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: ipiv(*)
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_sgetrs_nopiv_gpu( trans, n, nrhs, dA, ldda, dB, lddb, info )
+    character        :: trans
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_ssyevd( jobz, uplo, n, A, lda, w, work, lwork, iwork, liwork, info )
+    character        :: jobz
+    character        :: uplo
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    real             :: w(*)
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_ssyevd_gpu( jobz, uplo, n, dA, ldda, w, wA, ldwa, work, lwork, iwork,  &
+        liwork, info )
+    character        :: jobz
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    real             :: w(*)
+    real             :: wA(*)
+    integer          :: ldwa
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_ssyevd_m( ngpu, jobz, uplo, n, A, lda, w, work, lwork, iwork, liwork,  &
+        info )
+    integer          :: ngpu
+    character        :: jobz
+    character        :: uplo
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    real             :: w(*)
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_ssyevdx( jobz, range, uplo, n, A, lda, vl, vu, il, iu, mout, w, work,  &
+        lwork, iwork, liwork, info )
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    real             :: vl
+    real             :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    real             :: w(*)
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_ssyevdx_gpu( jobz, range, uplo, n, dA, ldda, vl, vu, il, iu, mout, w,  &
+        wA, ldwa, work, lwork, iwork, liwork, info )
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    real             :: vl
+    real             :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    real             :: w(*)
+    real             :: wA(*)
+    integer          :: ldwa
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_ssyevdx_m( ngpu, jobz, range, uplo, n, A, lda, vl, vu, il, iu, mout, w,  &
+        work, lwork, iwork, liwork, info )
+    integer          :: ngpu
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    real             :: vl
+    real             :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    real             :: w(*)
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_ssyevdx_2stage( jobz, range, uplo, n, A, lda, vl, vu, il, iu, mout, w,  &
+        work, lwork, iwork, liwork, info )
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    real             :: vl
+    real             :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    real             :: w(*)
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_ssyevdx_2stage_m( ngpu, jobz, range, uplo, n, A, lda, vl, vu, il, iu,  &
+        mout, w, work, lwork, iwork, liwork, info )
+    integer          :: ngpu
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    real             :: vl
+    real             :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    real             :: w(*)
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_ssygst( itype, uplo, n, A, lda, B, ldb, info )
+    integer          :: itype
+    character        :: uplo
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    real             :: B(*)
+    integer          :: ldb
+    integer          :: info
+end
+
+subroutine magmaf_ssygst_gpu( itype, uplo, n, dA, ldda, dB, lddb, info )
+    integer          :: itype
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_ssygst_m( ngpu, itype, uplo, n, A, lda, B, ldb, info )
+    integer          :: ngpu
+    integer          :: itype
+    character        :: uplo
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    real             :: B(*)
+    integer          :: ldb
+    integer          :: info
+end
+
+subroutine magmaf_ssygvd( itype, jobz, uplo, n, A, lda, B, ldb, w, work, lwork, iwork,  &
+        liwork, info )
+    integer          :: itype
+    character        :: jobz
+    character        :: uplo
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    real             :: B(*)
+    integer          :: ldb
+    real             :: w(*)
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_ssygvd_m( ngpu, itype, jobz, uplo, n, A, lda, B, ldb, w, work, lwork,  &
+        iwork, liwork, info )
+    integer          :: ngpu
+    integer          :: itype
+    character        :: jobz
+    character        :: uplo
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    real             :: B(*)
+    integer          :: ldb
+    real             :: w(*)
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_ssygvdx( itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, il, iu,  &
+        mout, w, work, lwork, iwork, liwork, info )
+    integer          :: itype
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    real             :: B(*)
+    integer          :: ldb
+    real             :: vl
+    real             :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    real             :: w(*)
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_ssygvdx_m( ngpu, itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu,  &
+        il, iu, mout, w, work, lwork, iwork, liwork, info )
+    integer          :: ngpu
+    integer          :: itype
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    real             :: B(*)
+    integer          :: ldb
+    real             :: vl
+    real             :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    real             :: w(*)
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_ssygvdx_2stage( itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu,  &
+        il, iu, mout, w, work, lwork, iwork, liwork, info )
+    integer          :: itype
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    real             :: B(*)
+    integer          :: ldb
+    real             :: vl
+    real             :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    real             :: w(*)
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_ssygvdx_2stage_m( ngpu, itype, jobz, range, uplo, n, A, lda, B, ldb,  &
+        vl, vu, il, iu, mout, w, work, lwork, iwork, liwork, info )
+    integer          :: ngpu
+    integer          :: itype
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    real             :: B(*)
+    integer          :: ldb
+    real             :: vl
+    real             :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    real             :: w(*)
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_ssysv( uplo, n, nrhs, A, lda, ipiv, B, ldb, info )
+    character        :: uplo
+    integer          :: n
+    integer          :: nrhs
+    real             :: A(*)
+    integer          :: lda
+    integer          :: ipiv(*)
+    real             :: B(*)
+    integer          :: ldb
+    integer          :: info
+end
+
+subroutine magmaf_ssysv_nopiv_gpu( uplo, n, nrhs, dA, ldda, dB, lddb, info )
+    character        :: uplo
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_ssytrd( uplo, n, A, lda, d, e, tau, work, lwork, info )
+    character        :: uplo
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    real             :: d(*)
+    real             :: e(*)
+    real             :: tau(*)
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_ssytrd_gpu( uplo, n, dA, ldda, d, e, tau, wA, ldwa, work, lwork, info )
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    real             :: d(*)
+    real             :: e(*)
+    real             :: tau(*)
+    real             :: wA(*)
+    integer          :: ldwa
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_ssytrd2_gpu( uplo, n, dA, ldda, d, e, tau, wA, ldwa, work, lwork,  &
+        dwork, ldwork, info )
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    real             :: d(*)
+    real             :: e(*)
+    real             :: tau(*)
+    real             :: wA(*)
+    integer          :: ldwa
+    real             :: work(*)
+    integer          :: lwork
+    magma_devptr_t   :: dwork
+    integer          :: ldwork
+    integer          :: info
+end
+
+integer function magmaf_ssytrd_sb2st( uplo, n, nb, Vblksiz, A, lda, d, e, V, ldv, TAU,  &
+        compT, T, ldt )
+    character        :: uplo
+    integer          :: n
+    integer          :: nb
+    integer          :: Vblksiz
+    real             :: A(*)
+    integer          :: lda
+    real             :: d(*)
+    real             :: e(*)
+    real             :: V(*)
+    integer          :: ldv
+    real             :: TAU(*)
+    integer          :: compT
+    real             :: T(*)
+    integer          :: ldt
+end
+
+subroutine magmaf_ssytrd_sy2sb( uplo, n, nb, A, lda, tau, work, lwork, dT, info )
+    character        :: uplo
+    integer          :: n
+    integer          :: nb
+    real             :: A(*)
+    integer          :: lda
+    real             :: tau(*)
+    real             :: work(*)
+    integer          :: lwork
+    magma_devptr_t   :: dT
+    integer          :: info
+end
+
+subroutine magmaf_ssytrf( uplo, n, A, lda, ipiv, info )
+    character        :: uplo
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    integer          :: ipiv(*)
+    integer          :: info
+end
+
+subroutine magmaf_ssytrf_aasen( uplo, cpu_panel, n, A, lda, ipiv, info )
+    character        :: uplo
+    integer          :: cpu_panel
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    integer          :: ipiv(*)
+    integer          :: info
+end
+
+subroutine magmaf_ssytrf_nopiv( uplo, n, A, lda, info )
+    character        :: uplo
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_ssytrf_nopiv_cpu( uplo, n, ib, A, lda, info )
+    character        :: uplo
+    integer          :: n
+    integer          :: ib
+    real             :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_ssytrf_nopiv_gpu( uplo, n, dA, ldda, info )
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: info
+end
+
+subroutine magmaf_ssytrs_nopiv_gpu( uplo, n, nrhs, dA, ldda, dB, lddb, info )
+    character        :: uplo
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_slaln2( trans, na, nw, smin, ca, A, lda, d1, d2, B, ldb, wr, wi, X,  &
+        ldx, scale, xnorm, info )
+    integer          :: trans
+    integer          :: na
+    integer          :: nw
+    real             :: smin
+    real             :: ca
+    real             :: A(*)
+    integer          :: lda
+    real             :: d1
+    real             :: d2
+    real             :: B(*)
+    integer          :: ldb
+    real             :: wr
+    real             :: wi
+    real             :: X(*)
+    integer          :: ldx
+    real             :: scale(*)
+    real             :: xnorm(*)
+    integer          :: info
+end
+
+subroutine magmaf_slaqtrsd( trans, n, T, ldt, x, ldx, cnorm, info )
+    character        :: trans
+    integer          :: n
+    real             :: T(*)
+    integer          :: ldt
+    real             :: x(*)
+    integer          :: ldx
+    real             :: cnorm(*)
+    integer          :: info
+end
+
+integer function magmaf_slarf_gpu( m, n, dv, dtau, dC, lddc, queue )
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dv
+    magma_devptr_t   :: dtau
+    magma_devptr_t   :: dC
+    integer          :: lddc
+    magma_devptr_t   :: queue
+end
+
+integer function magmaf_slarfb2_gpu( m, n, k, dV, lddv, dT, lddt, dC, lddc, dwork,  &
+        ldwork, queue )
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    magma_devptr_t   :: dV
+    integer          :: lddv
+    magma_devptr_t   :: dT
+    integer          :: lddt
+    magma_devptr_t   :: dC
+    integer          :: lddc
+    magma_devptr_t   :: dwork
+    integer          :: ldwork
+    magma_devptr_t   :: queue
+end
+
+subroutine magmaf_slauum( uplo, n, A, lda, info )
+    character        :: uplo
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_slauum_gpu( uplo, n, dA, ldda, info )
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: info
+end
+
+subroutine magmaf_sposv( uplo, n, nrhs, A, lda, B, ldb, info )
+    character        :: uplo
+    integer          :: n
+    integer          :: nrhs
+    real             :: A(*)
+    integer          :: lda
+    real             :: B(*)
+    integer          :: ldb
+    integer          :: info
+end
+
+subroutine magmaf_sposv_gpu( uplo, n, nrhs, dA, ldda, dB, lddb, info )
+    character        :: uplo
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_spotf2_gpu( uplo, n, dA, ldda, queue, info )
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: queue
+    integer          :: info
+end
+
+subroutine magmaf_spotrf( uplo, n, A, lda, info )
+    character        :: uplo
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_spotrf_gpu( uplo, n, dA, ldda, info )
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: info
+end
+
+subroutine magmaf_spotrf_m( ngpu, uplo, n, A, lda, info )
+    integer          :: ngpu
+    character        :: uplo
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_spotri( uplo, n, A, lda, info )
+    character        :: uplo
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_spotri_gpu( uplo, n, dA, ldda, info )
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: info
+end
+
+subroutine magmaf_spotrs_gpu( uplo, n, nrhs, dA, ldda, dB, lddb, info )
+    character        :: uplo
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_sstedx( range, n, vl, vu, il, iu, d, e, Z, ldz, rwork, lrwork, iwork,  &
+        liwork, dwork, info )
+    character        :: range
+    integer          :: n
+    real             :: vl
+    real             :: vu
+    integer          :: il
+    integer          :: iu
+    real             :: d(*)
+    real             :: e(*)
+    real             :: Z(*)
+    integer          :: ldz
+    real             :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    magma_devptr_t   :: dwork
+    integer          :: info
+end
+
+subroutine magmaf_sstedx_m( ngpu, range, n, vl, vu, il, iu, d, e, Z, ldz, rwork, lrwork,  &
+        iwork, liwork, info )
+    integer          :: ngpu
+    character        :: range
+    integer          :: n
+    real             :: vl
+    real             :: vu
+    integer          :: il
+    integer          :: iu
+    real             :: d(*)
+    real             :: e(*)
+    real             :: Z(*)
+    integer          :: ldz
+    real             :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_strevc3( side, howmany, select, n, T, ldt, VL, ldvl, VR, ldvr, mm,  &
+        mout, work, lwork, info )
+    character        :: side
+    character        :: howmany
+    integer          :: select(*)
+    integer          :: n
+    real             :: T(*)
+    integer          :: ldt
+    real             :: VL(*)
+    integer          :: ldvl
+    real             :: VR(*)
+    integer          :: ldvr
+    integer          :: mm
+    integer          :: mout(*)
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_strevc3_mt( side, howmany, select, n, T, ldt, VL, ldvl, VR, ldvr, mm,  &
+        mout, work, lwork, info )
+    character        :: side
+    character        :: howmany
+    integer          :: select(*)
+    integer          :: n
+    real             :: T(*)
+    integer          :: ldt
+    real             :: VL(*)
+    integer          :: ldvl
+    real             :: VR(*)
+    integer          :: ldvr
+    integer          :: mm
+    integer          :: mout(*)
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+integer function magmaf_strsm_m( ngpu, side, uplo, transa, diag, m, n, alpha, A, lda, B,  &
+        ldb )
+    integer          :: ngpu
+    character        :: side
+    character        :: uplo
+    character        :: transa
+    character        :: diag
+    integer          :: m
+    integer          :: n
+    real             :: alpha
+    real             :: A(*)
+    integer          :: lda
+    real             :: B(*)
+    integer          :: ldb
+end
+
+subroutine magmaf_strtri( uplo, diag, n, A, lda, info )
+    character        :: uplo
+    character        :: diag
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_strtri_gpu( uplo, diag, n, dA, ldda, info )
+    character        :: uplo
+    character        :: diag
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: info
+end
+
+subroutine magmaf_sorgbr( vect, m, n, k, A, lda, tau, work, lwork, info )
+    character        :: vect
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    real             :: A(*)
+    integer          :: lda
+    real             :: tau(*)
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_sorghr( n, ilo, ihi, A, lda, tau, dT, nb, info )
+    integer          :: n
+    integer          :: ilo
+    integer          :: ihi
+    real             :: A(*)
+    integer          :: lda
+    real             :: tau(*)
+    magma_devptr_t   :: dT
+    integer          :: nb
+    integer          :: info
+end
+
+subroutine magmaf_sorghr_m( n, ilo, ihi, A, lda, tau, T, nb, info )
+    integer          :: n
+    integer          :: ilo
+    integer          :: ihi
+    real             :: A(*)
+    integer          :: lda
+    real             :: tau(*)
+    real             :: T(*)
+    integer          :: nb
+    integer          :: info
+end
+
+subroutine magmaf_sorglq( m, n, k, A, lda, tau, dT, nb, info )
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    real             :: A(*)
+    integer          :: lda
+    real             :: tau(*)
+    magma_devptr_t   :: dT
+    integer          :: nb
+    integer          :: info
+end
+
+subroutine magmaf_sorgqr( m, n, k, A, lda, tau, dT, nb, info )
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    real             :: A(*)
+    integer          :: lda
+    real             :: tau(*)
+    magma_devptr_t   :: dT
+    integer          :: nb
+    integer          :: info
+end
+
+subroutine magmaf_sorgqr_gpu( m, n, k, dA, ldda, tau, dT, nb, info )
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    real             :: tau(*)
+    magma_devptr_t   :: dT
+    integer          :: nb
+    integer          :: info
+end
+
+subroutine magmaf_sorgqr_m( m, n, k, A, lda, tau, T, nb, info )
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    real             :: A(*)
+    integer          :: lda
+    real             :: tau(*)
+    real             :: T(*)
+    integer          :: nb
+    integer          :: info
+end
+
+subroutine magmaf_sorgqr2( m, n, k, A, lda, tau, info )
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    real             :: A(*)
+    integer          :: lda
+    real             :: tau(*)
+    integer          :: info
+end
+
+subroutine magmaf_sormbr( vect, side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork,  &
+        info )
+    character        :: vect
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    real             :: A(*)
+    integer          :: lda
+    real             :: tau(*)
+    real             :: C(*)
+    integer          :: ldc
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_sormlq( side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, info )
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    real             :: A(*)
+    integer          :: lda
+    real             :: tau(*)
+    real             :: C(*)
+    integer          :: ldc
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_sormrq( side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, info )
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    real             :: A(*)
+    integer          :: lda
+    real             :: tau(*)
+    real             :: C(*)
+    integer          :: ldc
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_sormql( side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, info )
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    real             :: A(*)
+    integer          :: lda
+    real             :: tau(*)
+    real             :: C(*)
+    integer          :: ldc
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_sormql2_gpu( side, trans, m, n, k, dA, ldda, tau, dC, lddc, wA, ldwa,  &
+        info )
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    real             :: tau(*)
+    magma_devptr_t   :: dC
+    integer          :: lddc
+    real             :: wA(*)
+    integer          :: ldwa
+    integer          :: info
+end
+
+subroutine magmaf_sormqr( side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, info )
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    real             :: A(*)
+    integer          :: lda
+    real             :: tau(*)
+    real             :: C(*)
+    integer          :: ldc
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_sormqr_gpu( side, trans, m, n, k, dA, ldda, tau, dC, lddc, hwork,  &
+        lwork, dT, nb, info )
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    real             :: tau(*)
+    magma_devptr_t   :: dC
+    integer          :: lddc
+    real             :: hwork(*)
+    integer          :: lwork
+    magma_devptr_t   :: dT
+    integer          :: nb
+    integer          :: info
+end
+
+subroutine magmaf_sormqr2_gpu( side, trans, m, n, k, dA, ldda, tau, dC, lddc, wA, ldwa,  &
+        info )
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    real             :: tau(*)
+    magma_devptr_t   :: dC
+    integer          :: lddc
+    real             :: wA(*)
+    integer          :: ldwa
+    integer          :: info
+end
+
+subroutine magmaf_sormqr_m( ngpu, side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork,  &
+        info )
+    integer          :: ngpu
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    real             :: A(*)
+    integer          :: lda
+    real             :: tau(*)
+    real             :: C(*)
+    integer          :: ldc
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_sormtr( side, uplo, trans, m, n, A, lda, tau, C, ldc, work, lwork, info  &
+        )
+    character        :: side
+    character        :: uplo
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    real             :: tau(*)
+    real             :: C(*)
+    integer          :: ldc
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_sormtr_gpu( side, uplo, trans, m, n, dA, ldda, tau, dC, lddc, wA, ldwa,  &
+        info )
+    character        :: side
+    character        :: uplo
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    real             :: tau(*)
+    magma_devptr_t   :: dC
+    integer          :: lddc
+    real             :: wA(*)
+    integer          :: ldwa
+    integer          :: info
+end
+
+subroutine magmaf_sormtr_m( ngpu, side, uplo, trans, m, n, A, lda, tau, C, ldc, work,  &
+        lwork, info )
+    integer          :: ngpu
+    character        :: side
+    character        :: uplo
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    real             :: tau(*)
+    real             :: C(*)
+    integer          :: ldc
+    real             :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+integer function magmaf_s_isnan( x )
+    real             :: x
+end
+
+integer function magmaf_s_isinf( x )
+    real             :: x
+end
+
+integer function magmaf_s_isnan_inf( x )
+    real             :: x
+end
+
+real function magmaf_smake_lwork( lwork )
+    integer          :: lwork
+end
+
+integer function magmaf_snan_inf( uplo, m, n, A, lda, cnt_nan, cnt_inf )
+    character        :: uplo
+    integer          :: m
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+    integer          :: cnt_nan(*)
+    integer          :: cnt_inf(*)
+end
+
+integer function magmaf_snan_inf_gpu( uplo, m, n, dA, ldda, cnt_nan, cnt_inf, queue )
+    character        :: uplo
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: cnt_nan(*)
+    integer          :: cnt_inf(*)
+    magma_devptr_t   :: queue
+end
+
+subroutine magmaf_sprint( m, n, A, lda )
+    integer          :: m
+    integer          :: n
+    real             :: A(*)
+    integer          :: lda
+end
+
+subroutine magmaf_sprint_gpu( m, n, dA, ldda, queue )
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: queue
+end
+
+subroutine magmaf_spanel_to_q( uplo, ib, A, lda, work )
+    character        :: uplo
+    integer          :: ib
+    real             :: A(*)
+    integer          :: lda
+    real             :: work(*)
+end
+
+subroutine magmaf_sq_to_panel( uplo, ib, A, lda, work )
+    character        :: uplo
+    integer          :: ib
+    real             :: A(*)
+    integer          :: lda
+    real             :: work(*)
+end
+
+end interface
+
+end module magma_sfortran
diff --git a/lib/magma_fmodules/magma_zfortran.F b/lib/magma_fmodules/magma_zfortran.F
new file mode 100644
index 0000000000..c5ef5cbf41
--- /dev/null
+++ b/lib/magma_fmodules/magma_zfortran.F
@@ -0,0 +1,2086 @@
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!  This file is AUTOMATICALLY GENERATED by:
+!!  tools/fortran_wrappers.pl include/magma_z.i
+!!  Do not edit.
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+module magma_zfortran
+
+use magma_param
+use iso_fortran_env, ONLY: int64
+
+#define magma_devptr_t integer(int64)
+
+implicit none
+
+!---- Fortran interfaces to MAGMA subroutines ----
+interface
+
+integer function magmaf_get_zpotrf_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_zgetrf_nb( m, n )
+    integer          :: m
+    integer          :: n
+end
+
+integer function magmaf_get_zgetri_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_zhetrf_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_zhetrf_nopiv_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_zhetrf_aasen_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_zgeqp3_nb( m, n )
+    integer          :: m
+    integer          :: n
+end
+
+integer function magmaf_get_zgeqrf_nb( m, n )
+    integer          :: m
+    integer          :: n
+end
+
+integer function magmaf_get_zgeqlf_nb( m, n )
+    integer          :: m
+    integer          :: n
+end
+
+integer function magmaf_get_zgelqf_nb( m, n )
+    integer          :: m
+    integer          :: n
+end
+
+integer function magmaf_get_zgehrd_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_zhetrd_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_zhegst_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_zhegst_m_nb( n )
+    integer          :: n
+end
+
+integer function magmaf_get_zgebrd_nb( m, n )
+    integer          :: m
+    integer          :: n
+end
+
+integer function magmaf_get_zgesvd_nb( m, n )
+    integer          :: m
+    integer          :: n
+end
+
+integer function magmaf_get_zbulge_nb( n, nbthreads )
+    integer          :: n
+    integer          :: nbthreads
+end
+
+integer function magmaf_get_zbulge_vblksiz( n, nb, nbthreads )
+    integer          :: n
+    integer          :: nb
+    integer          :: nbthreads
+end
+
+integer function magmaf_get_zbulge_gcperf(  )
+end
+
+subroutine magmaf_zgebrd( m, n, A, lda, d, e, tauq, taup, work, lwork, info )
+    integer          :: m
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    double precision :: d(*)
+    double precision :: e(*)
+    complex*16       :: tauq(*)
+    complex*16       :: taup(*)
+    complex*16       :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_zgeev( jobvl, jobvr, n, A, lda, w, VL, ldvl, VR, ldvr, work, lwork,  &
+        rwork, info )
+    character        :: jobvl
+    character        :: jobvr
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: w(*)
+    complex*16       :: VL(*)
+    integer          :: ldvl
+    complex*16       :: VR(*)
+    integer          :: ldvr
+    complex*16       :: work(*)
+    integer          :: lwork
+    double precision :: rwork(*)
+    integer          :: info
+end
+
+subroutine magmaf_zgeev_m( jobvl, jobvr, n, A, lda, w, VL, ldvl, VR, ldvr, work, lwork,  &
+        rwork, info )
+    character        :: jobvl
+    character        :: jobvr
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: w(*)
+    complex*16       :: VL(*)
+    integer          :: ldvl
+    complex*16       :: VR(*)
+    integer          :: ldvr
+    complex*16       :: work(*)
+    integer          :: lwork
+    double precision :: rwork(*)
+    integer          :: info
+end
+
+subroutine magmaf_zgegqr_gpu( ikind, m, n, dA, ldda, dwork, work, info )
+    import int64
+    integer          :: ikind
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dwork
+    complex*16       :: work(*)
+    integer          :: info
+end
+
+subroutine magmaf_zgehrd( n, ilo, ihi, A, lda, tau, work, lwork, dT, info )
+    import int64
+    integer          :: n
+    integer          :: ilo
+    integer          :: ihi
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: tau(*)
+    complex*16       :: work(*)
+    integer          :: lwork
+    magma_devptr_t   :: dT
+    integer          :: info
+end
+
+subroutine magmaf_zgehrd_m( n, ilo, ihi, A, lda, tau, work, lwork, T, info )
+    integer          :: n
+    integer          :: ilo
+    integer          :: ihi
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: tau(*)
+    complex*16       :: work(*)
+    integer          :: lwork
+    complex*16       :: T(*)
+    integer          :: info
+end
+
+subroutine magmaf_zgehrd2( n, ilo, ihi, A, lda, tau, work, lwork, info )
+    integer          :: n
+    integer          :: ilo
+    integer          :: ihi
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: tau(*)
+    complex*16       :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_zgelqf( m, n, A, lda, tau, work, lwork, info )
+    integer          :: m
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: tau(*)
+    complex*16       :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_zgelqf_gpu( m, n, dA, ldda, tau, work, lwork, info )
+    import int64
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    complex*16       :: tau(*)
+    complex*16       :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_zgels( trans, m, n, nrhs, A, lda, B, ldb, hwork, lwork, info )
+    import int64
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: A
+    integer          :: lda
+    magma_devptr_t   :: B
+    integer          :: ldb
+    complex*16       :: hwork(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_zggrqf( m, p, n, A, lda, taua, B, ldb, taub, work, lwork, info )
+    integer          :: m
+    integer          :: p
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: taua(*)
+    complex*16       :: B(*)
+    integer          :: ldb
+    complex*16       :: taub(*)
+    complex*16       :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_zgglse( m, n, p, A, lda, B, ldb, c, d, x, work, lwork, info )
+    integer          :: m
+    integer          :: n
+    integer          :: p
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: B(*)
+    integer          :: ldb
+    complex*16       :: c(*)
+    complex*16       :: d(*)
+    complex*16       :: x(*)
+    complex*16       :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_zgels_gpu( trans, m, n, nrhs, dA, ldda, dB, lddb, hwork, lwork, info )
+    import int64
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    complex*16       :: hwork(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_zgels3_gpu( trans, m, n, nrhs, dA, ldda, dB, lddb, hwork, lwork, info )
+    import int64
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    complex*16       :: hwork(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_zgeqlf( m, n, A, lda, tau, work, lwork, info )
+    integer          :: m
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: tau(*)
+    complex*16       :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_zgeqp3( m, n, A, lda, jpvt, tau, work, lwork, rwork, info )
+    integer          :: m
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    integer          :: jpvt(*)
+    complex*16       :: tau(*)
+    complex*16       :: work(*)
+    integer          :: lwork
+    double precision :: rwork(*)
+    integer          :: info
+end
+
+subroutine magmaf_zgeqp3_gpu( m, n, dA, ldda, jpvt, tau, dwork, lwork, rwork, info )
+    import int64
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: jpvt(*)
+    complex*16       :: tau(*)
+    magma_devptr_t   :: dwork
+    integer          :: lwork
+    double precision :: rwork(*)
+    integer          :: info
+end
+
+subroutine magmaf_zgeqr2x_gpu( m, n, dA, ldda, dtau, dT, ddA, dwork, info )
+    import int64
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dtau
+    magma_devptr_t   :: dT
+    magma_devptr_t   :: ddA
+    magma_devptr_t   :: dwork
+    integer          :: info
+end
+
+subroutine magmaf_zgeqr2x2_gpu( m, n, dA, ldda, dtau, dT, ddA, dwork, info )
+    import int64
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dtau
+    magma_devptr_t   :: dT
+    magma_devptr_t   :: ddA
+    magma_devptr_t   :: dwork
+    integer          :: info
+end
+
+subroutine magmaf_zgeqr2x3_gpu( m, n, dA, ldda, dtau, dT, ddA, dwork, info )
+    import int64
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dtau
+    magma_devptr_t   :: dT
+    magma_devptr_t   :: ddA
+    magma_devptr_t   :: dwork
+    integer          :: info
+end
+
+subroutine magmaf_zgeqr2x4_gpu( m, n, dA, ldda, dtau, dT, ddA, dwork, queue, info )
+    import int64
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dtau
+    magma_devptr_t   :: dT
+    magma_devptr_t   :: ddA
+    magma_devptr_t   :: dwork
+    magma_devptr_t   :: queue
+    integer          :: info
+end
+
+subroutine magmaf_zgeqrf( m, n, A, lda, tau, work, lwork, info )
+    integer          :: m
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: tau(*)
+    complex*16       :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_zgeqrf_gpu( m, n, dA, ldda, tau, dT, info )
+    import int64
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    complex*16       :: tau(*)
+    magma_devptr_t   :: dT
+    integer          :: info
+end
+
+subroutine magmaf_zgeqrf_m( ngpu, m, n, A, lda, tau, work, lwork, info )
+    integer          :: ngpu
+    integer          :: m
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: tau(*)
+    complex*16       :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_zgeqrf_ooc( m, n, A, lda, tau, work, lwork, info )
+    integer          :: m
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: tau(*)
+    complex*16       :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_zgeqrf2_gpu( m, n, dA, ldda, tau, info )
+    import int64
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    complex*16       :: tau(*)
+    integer          :: info
+end
+
+subroutine magmaf_zgeqrf3_gpu( m, n, dA, ldda, tau, dT, info )
+    import int64
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    complex*16       :: tau(*)
+    magma_devptr_t   :: dT
+    integer          :: info
+end
+
+subroutine magmaf_zgeqrs_gpu( m, n, nrhs, dA, ldda, tau, dT, dB, lddb, hwork, lwork, info  &
+        )
+    import int64
+    integer          :: m
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    complex*16       :: tau(*)
+    magma_devptr_t   :: dT
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    complex*16       :: hwork(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_zgeqrs3_gpu( m, n, nrhs, dA, ldda, tau, dT, dB, lddb, hwork, lwork,  &
+        info )
+    import int64
+    integer          :: m
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    complex*16       :: tau(*)
+    magma_devptr_t   :: dT
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    complex*16       :: hwork(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_zgerbt_gpu( gen, n, nrhs, dA, ldda, dB, lddb, U, V, info )
+    import int64
+    character        :: gen
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    complex*16       :: U(*)
+    complex*16       :: V(*)
+    integer          :: info
+end
+
+subroutine magmaf_zgerfs_nopiv_gpu( trans, n, nrhs, dA, ldda, dB, lddb, dX, lddx, dworkd,  &
+        dAF, iter, info )
+    import int64
+    character        :: trans
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    magma_devptr_t   :: dX
+    integer          :: lddx
+    magma_devptr_t   :: dworkd
+    magma_devptr_t   :: dAF
+    integer          :: iter
+    integer          :: info
+end
+
+subroutine magmaf_zgesdd( jobz, m, n, A, lda, s, U, ldu, VT, ldvt, work, lwork, rwork,  &
+        iwork, info )
+    character        :: jobz
+    integer          :: m
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    double precision :: s(*)
+    complex*16       :: U(*)
+    integer          :: ldu
+    complex*16       :: VT(*)
+    integer          :: ldvt
+    complex*16       :: work(*)
+    integer          :: lwork
+    double precision :: rwork(*)
+    integer          :: iwork(*)
+    integer          :: info
+end
+
+subroutine magmaf_zgesv( n, nrhs, A, lda, ipiv, B, ldb, info )
+    integer          :: n
+    integer          :: nrhs
+    complex*16       :: A(*)
+    integer          :: lda
+    integer          :: ipiv(*)
+    complex*16       :: B(*)
+    integer          :: ldb
+    integer          :: info
+end
+
+subroutine magmaf_zgesv_gpu( n, nrhs, dA, ldda, ipiv, dB, lddb, info )
+    import int64
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: ipiv(*)
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_zgesv_nopiv_gpu( n, nrhs, dA, ldda, dB, lddb, info )
+    import int64
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_zgesv_rbt( ref, n, nrhs, A, lda, B, ldb, info )
+    character        :: ref
+    integer          :: n
+    integer          :: nrhs
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: B(*)
+    integer          :: ldb
+    integer          :: info
+end
+
+subroutine magmaf_zgesvd( jobu, jobvt, m, n, A, lda, s, U, ldu, VT, ldvt, work, lwork,  &
+        rwork, info )
+    character        :: jobu
+    character        :: jobvt
+    integer          :: m
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    double precision :: s(*)
+    complex*16       :: U(*)
+    integer          :: ldu
+    complex*16       :: VT(*)
+    integer          :: ldvt
+    complex*16       :: work(*)
+    integer          :: lwork
+    double precision :: rwork(*)
+    integer          :: info
+end
+
+subroutine magmaf_zgetf2_gpu( m, n, dA, ldda, ipiv, queue, info )
+    import int64
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: ipiv(*)
+    magma_devptr_t   :: queue
+    integer          :: info
+end
+
+subroutine magmaf_zgetf2_nopiv( m, n, A, lda, info )
+    integer          :: m
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_zgetrf( m, n, A, lda, ipiv, info )
+    integer          :: m
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    integer          :: ipiv(*)
+    integer          :: info
+end
+
+subroutine magmaf_zgetrf_gpu( m, n, dA, ldda, ipiv, info )
+    import int64
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: ipiv(*)
+    integer          :: info
+end
+
+subroutine magmaf_zgetrf_m( ngpu, m, n, A, lda, ipiv, info )
+    integer          :: ngpu
+    integer          :: m
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    integer          :: ipiv(*)
+    integer          :: info
+end
+
+subroutine magmaf_zgetrf_nopiv( m, n, A, lda, info )
+    integer          :: m
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_zgetrf_nopiv_gpu( m, n, dA, ldda, info )
+    import int64
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: info
+end
+
+subroutine magmaf_zgetri_gpu( n, dA, ldda, ipiv, dwork, lwork, info )
+    import int64
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: ipiv(*)
+    magma_devptr_t   :: dwork
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_zgetrs_gpu( trans, n, nrhs, dA, ldda, ipiv, dB, lddb, info )
+    import int64
+    character        :: trans
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: ipiv(*)
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_zgetrs_nopiv_gpu( trans, n, nrhs, dA, ldda, dB, lddb, info )
+    import int64
+    character        :: trans
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_zheevd( jobz, uplo, n, A, lda, w, work, lwork, rwork, lrwork, iwork,  &
+        liwork, info )
+    character        :: jobz
+    character        :: uplo
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    double precision :: w(*)
+    complex*16       :: work(*)
+    integer          :: lwork
+    double precision :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_zheevd_gpu( jobz, uplo, n, dA, ldda, w, wA, ldwa, work, lwork, rwork,  &
+        lrwork, iwork, liwork, info )
+    import int64
+    character        :: jobz
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    double precision :: w(*)
+    complex*16       :: wA(*)
+    integer          :: ldwa
+    complex*16       :: work(*)
+    integer          :: lwork
+    double precision :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_zheevd_m( ngpu, jobz, uplo, n, A, lda, w, work, lwork, rwork, lrwork,  &
+        iwork, liwork, info )
+    integer          :: ngpu
+    character        :: jobz
+    character        :: uplo
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    double precision :: w(*)
+    complex*16       :: work(*)
+    integer          :: lwork
+    double precision :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_zheevdx( jobz, range, uplo, n, A, lda, vl, vu, il, iu, mout, w, work,  &
+        lwork, rwork, lrwork, iwork, liwork, info )
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    double precision :: vl
+    double precision :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    double precision :: w(*)
+    complex*16       :: work(*)
+    integer          :: lwork
+    double precision :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_zheevdx_gpu( jobz, range, uplo, n, dA, ldda, vl, vu, il, iu, mout, w,  &
+        wA, ldwa, work, lwork, rwork, lrwork, iwork, liwork, info )
+    import int64
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    double precision :: vl
+    double precision :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    double precision :: w(*)
+    complex*16       :: wA(*)
+    integer          :: ldwa
+    complex*16       :: work(*)
+    integer          :: lwork
+    double precision :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_zheevdx_m( ngpu, jobz, range, uplo, n, A, lda, vl, vu, il, iu, mout, w,  &
+        work, lwork, rwork, lrwork, iwork, liwork, info )
+    integer          :: ngpu
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    double precision :: vl
+    double precision :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    double precision :: w(*)
+    complex*16       :: work(*)
+    integer          :: lwork
+    double precision :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_zheevdx_2stage( jobz, range, uplo, n, A, lda, vl, vu, il, iu, mout, w,  &
+        work, lwork, rwork, lrwork, iwork, liwork, info )
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    double precision :: vl
+    double precision :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    double precision :: w(*)
+    complex*16       :: work(*)
+    integer          :: lwork
+    double precision :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_zheevdx_2stage_m( ngpu, jobz, range, uplo, n, A, lda, vl, vu, il, iu,  &
+        mout, w, work, lwork, rwork, lrwork, iwork, liwork, info )
+    integer          :: ngpu
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    double precision :: vl
+    double precision :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    double precision :: w(*)
+    complex*16       :: work(*)
+    integer          :: lwork
+    double precision :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_zheevr( jobz, range, uplo, n, A, lda, vl, vu, il, iu, abstol, mout, w,  &
+        Z, ldz, isuppz, work, lwork, rwork, lrwork, iwork, liwork, info )
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    double precision :: vl
+    double precision :: vu
+    integer          :: il
+    integer          :: iu
+    double precision :: abstol
+    integer          :: mout(*)
+    double precision :: w(*)
+    complex*16       :: Z(*)
+    integer          :: ldz
+    integer          :: isuppz(*)
+    complex*16       :: work(*)
+    integer          :: lwork
+    double precision :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_zheevr_gpu( jobz, range, uplo, n, dA, ldda, vl, vu, il, iu, abstol,  &
+        mout, w, dZ, lddz, isuppz, wA, ldwa, wZ, ldwz, work, lwork, rwork, lrwork, iwork,  &
+        liwork, info )
+    import int64
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    double precision :: vl
+    double precision :: vu
+    integer          :: il
+    integer          :: iu
+    double precision :: abstol
+    integer          :: mout(*)
+    double precision :: w(*)
+    magma_devptr_t   :: dZ
+    integer          :: lddz
+    integer          :: isuppz(*)
+    complex*16       :: wA(*)
+    integer          :: ldwa
+    complex*16       :: wZ(*)
+    integer          :: ldwz
+    complex*16       :: work(*)
+    integer          :: lwork
+    double precision :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_zheevx( jobz, range, uplo, n, A, lda, vl, vu, il, iu, abstol, mout, w,  &
+        Z, ldz, work, lwork, rwork, iwork, ifail, info )
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    double precision :: vl
+    double precision :: vu
+    integer          :: il
+    integer          :: iu
+    double precision :: abstol
+    integer          :: mout(*)
+    double precision :: w(*)
+    complex*16       :: Z(*)
+    integer          :: ldz
+    complex*16       :: work(*)
+    integer          :: lwork
+    double precision :: rwork(*)
+    integer          :: iwork(*)
+    integer          :: ifail(*)
+    integer          :: info
+end
+
+subroutine magmaf_zheevx_gpu( jobz, range, uplo, n, dA, ldda, vl, vu, il, iu, abstol,  &
+        mout, w, dZ, lddz, wA, ldwa, wZ, ldwz, work, lwork, rwork, iwork, ifail, info )
+    import int64
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    double precision :: vl
+    double precision :: vu
+    integer          :: il
+    integer          :: iu
+    double precision :: abstol
+    integer          :: mout(*)
+    double precision :: w(*)
+    magma_devptr_t   :: dZ
+    integer          :: lddz
+    complex*16       :: wA(*)
+    integer          :: ldwa
+    complex*16       :: wZ(*)
+    integer          :: ldwz
+    complex*16       :: work(*)
+    integer          :: lwork
+    double precision :: rwork(*)
+    integer          :: iwork(*)
+    integer          :: ifail(*)
+    integer          :: info
+end
+
+subroutine magmaf_zhegst( itype, uplo, n, A, lda, B, ldb, info )
+    integer          :: itype
+    character        :: uplo
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: B(*)
+    integer          :: ldb
+    integer          :: info
+end
+
+subroutine magmaf_zhegst_gpu( itype, uplo, n, dA, ldda, dB, lddb, info )
+    import int64
+    integer          :: itype
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_zhegst_m( ngpu, itype, uplo, n, A, lda, B, ldb, info )
+    integer          :: ngpu
+    integer          :: itype
+    character        :: uplo
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: B(*)
+    integer          :: ldb
+    integer          :: info
+end
+
+subroutine magmaf_zhegvd( itype, jobz, uplo, n, A, lda, B, ldb, w, work, lwork, rwork,  &
+        lrwork, iwork, liwork, info )
+    integer          :: itype
+    character        :: jobz
+    character        :: uplo
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: B(*)
+    integer          :: ldb
+    double precision :: w(*)
+    complex*16       :: work(*)
+    integer          :: lwork
+    double precision :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_zhegvd_m( ngpu, itype, jobz, uplo, n, A, lda, B, ldb, w, work, lwork,  &
+        rwork, lrwork, iwork, liwork, info )
+    integer          :: ngpu
+    integer          :: itype
+    character        :: jobz
+    character        :: uplo
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: B(*)
+    integer          :: ldb
+    double precision :: w(*)
+    complex*16       :: work(*)
+    integer          :: lwork
+    double precision :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_zhegvdx( itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, il, iu,  &
+        mout, w, work, lwork, rwork, lrwork, iwork, liwork, info )
+    integer          :: itype
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: B(*)
+    integer          :: ldb
+    double precision :: vl
+    double precision :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    double precision :: w(*)
+    complex*16       :: work(*)
+    integer          :: lwork
+    double precision :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_zhegvdx_m( ngpu, itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu,  &
+        il, iu, mout, w, work, lwork, rwork, lrwork, iwork, liwork, info )
+    integer          :: ngpu
+    integer          :: itype
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: B(*)
+    integer          :: ldb
+    double precision :: vl
+    double precision :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    double precision :: w(*)
+    complex*16       :: work(*)
+    integer          :: lwork
+    double precision :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_zhegvdx_2stage( itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu,  &
+        il, iu, mout, w, work, lwork, rwork, lrwork, iwork, liwork, info )
+    integer          :: itype
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: B(*)
+    integer          :: ldb
+    double precision :: vl
+    double precision :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    double precision :: w(*)
+    complex*16       :: work(*)
+    integer          :: lwork
+    double precision :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_zhegvdx_2stage_m( ngpu, itype, jobz, range, uplo, n, A, lda, B, ldb,  &
+        vl, vu, il, iu, mout, w, work, lwork, rwork, lrwork, iwork, liwork, info )
+    integer          :: ngpu
+    integer          :: itype
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: B(*)
+    integer          :: ldb
+    double precision :: vl
+    double precision :: vu
+    integer          :: il
+    integer          :: iu
+    integer          :: mout(*)
+    double precision :: w(*)
+    complex*16       :: work(*)
+    integer          :: lwork
+    double precision :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_zhegvr( itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, il, iu,  &
+        abstol, mout, w, Z, ldz, isuppz, work, lwork, rwork, lrwork, iwork, liwork, info  &
+        )
+    integer          :: itype
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: B(*)
+    integer          :: ldb
+    double precision :: vl
+    double precision :: vu
+    integer          :: il
+    integer          :: iu
+    double precision :: abstol
+    integer          :: mout(*)
+    double precision :: w(*)
+    complex*16       :: Z(*)
+    integer          :: ldz
+    integer          :: isuppz(*)
+    complex*16       :: work(*)
+    integer          :: lwork
+    double precision :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_zhegvx( itype, jobz, range, uplo, n, A, lda, B, ldb, vl, vu, il, iu,  &
+        abstol, mout, w, Z, ldz, work, lwork, rwork, iwork, ifail, info )
+    integer          :: itype
+    character        :: jobz
+    character        :: range
+    character        :: uplo
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: B(*)
+    integer          :: ldb
+    double precision :: vl
+    double precision :: vu
+    integer          :: il
+    integer          :: iu
+    double precision :: abstol
+    integer          :: mout(*)
+    double precision :: w(*)
+    complex*16       :: Z(*)
+    integer          :: ldz
+    complex*16       :: work(*)
+    integer          :: lwork
+    double precision :: rwork(*)
+    integer          :: iwork(*)
+    integer          :: ifail(*)
+    integer          :: info
+end
+
+subroutine magmaf_zhesv( uplo, n, nrhs, A, lda, ipiv, B, ldb, info )
+    character        :: uplo
+    integer          :: n
+    integer          :: nrhs
+    complex*16       :: A(*)
+    integer          :: lda
+    integer          :: ipiv(*)
+    complex*16       :: B(*)
+    integer          :: ldb
+    integer          :: info
+end
+
+subroutine magmaf_zhesv_nopiv_gpu( uplo, n, nrhs, dA, ldda, dB, lddb, info )
+    import int64
+    character        :: uplo
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_zhetrd( uplo, n, A, lda, d, e, tau, work, lwork, info )
+    character        :: uplo
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    double precision :: d(*)
+    double precision :: e(*)
+    complex*16       :: tau(*)
+    complex*16       :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_zhetrd_gpu( uplo, n, dA, ldda, d, e, tau, wA, ldwa, work, lwork, info )
+    import int64
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    double precision :: d(*)
+    double precision :: e(*)
+    complex*16       :: tau(*)
+    complex*16       :: wA(*)
+    integer          :: ldwa
+    complex*16       :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_zhetrd2_gpu( uplo, n, dA, ldda, d, e, tau, wA, ldwa, work, lwork,  &
+        dwork, ldwork, info )
+    import int64
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    double precision :: d(*)
+    double precision :: e(*)
+    complex*16       :: tau(*)
+    complex*16       :: wA(*)
+    integer          :: ldwa
+    complex*16       :: work(*)
+    integer          :: lwork
+    magma_devptr_t   :: dwork
+    integer          :: ldwork
+    integer          :: info
+end
+
+integer function magmaf_zhetrd_hb2st( uplo, n, nb, Vblksiz, A, lda, d, e, V, ldv, TAU,  &
+        compT, T, ldt )
+    character        :: uplo
+    integer          :: n
+    integer          :: nb
+    integer          :: Vblksiz
+    complex*16       :: A(*)
+    integer          :: lda
+    double precision :: d(*)
+    double precision :: e(*)
+    complex*16       :: V(*)
+    integer          :: ldv
+    complex*16       :: TAU(*)
+    integer          :: compT
+    complex*16       :: T(*)
+    integer          :: ldt
+end
+
+subroutine magmaf_zhetrd_he2hb( uplo, n, nb, A, lda, tau, work, lwork, dT, info )
+    import int64
+    character        :: uplo
+    integer          :: n
+    integer          :: nb
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: tau(*)
+    complex*16       :: work(*)
+    integer          :: lwork
+    magma_devptr_t   :: dT
+    integer          :: info
+end
+
+subroutine magmaf_zhetrf( uplo, n, A, lda, ipiv, info )
+    character        :: uplo
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    integer          :: ipiv(*)
+    integer          :: info
+end
+
+subroutine magmaf_zhetrf_aasen( uplo, cpu_panel, n, A, lda, ipiv, info )
+    character        :: uplo
+    integer          :: cpu_panel
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    integer          :: ipiv(*)
+    integer          :: info
+end
+
+subroutine magmaf_zhetrf_nopiv( uplo, n, A, lda, info )
+    character        :: uplo
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_zhetrf_nopiv_cpu( uplo, n, ib, A, lda, info )
+    character        :: uplo
+    integer          :: n
+    integer          :: ib
+    complex*16       :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_zhetrf_nopiv_gpu( uplo, n, dA, ldda, info )
+    import int64
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: info
+end
+
+subroutine magmaf_zhetrs_nopiv_gpu( uplo, n, nrhs, dA, ldda, dB, lddb, info )
+    import int64
+    character        :: uplo
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+integer function magmaf_zlarf_gpu( m, n, dv, dtau, dC, lddc, queue )
+    import int64
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dv
+    magma_devptr_t   :: dtau
+    magma_devptr_t   :: dC
+    integer          :: lddc
+    magma_devptr_t   :: queue
+end
+
+integer function magmaf_zlarfb2_gpu( m, n, k, dV, lddv, dT, lddt, dC, lddc, dwork,  &
+        ldwork, queue )
+    import int64
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    magma_devptr_t   :: dV
+    integer          :: lddv
+    magma_devptr_t   :: dT
+    integer          :: lddt
+    magma_devptr_t   :: dC
+    integer          :: lddc
+    magma_devptr_t   :: dwork
+    integer          :: ldwork
+    magma_devptr_t   :: queue
+end
+
+subroutine magmaf_zlatrsd( uplo, trans, diag, normin, n, A, lda, lambda, x, scale, cnorm,  &
+        info )
+    character        :: uplo
+    character        :: trans
+    character        :: diag
+    character        :: normin
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: lambda
+    complex*16       :: x(*)
+    double precision :: scale(*)
+    double precision :: cnorm(*)
+    integer          :: info
+end
+
+subroutine magmaf_zlauum( uplo, n, A, lda, info )
+    character        :: uplo
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_zlauum_gpu( uplo, n, dA, ldda, info )
+    import int64
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: info
+end
+
+subroutine magmaf_zposv( uplo, n, nrhs, A, lda, B, ldb, info )
+    character        :: uplo
+    integer          :: n
+    integer          :: nrhs
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: B(*)
+    integer          :: ldb
+    integer          :: info
+end
+
+subroutine magmaf_zposv_gpu( uplo, n, nrhs, dA, ldda, dB, lddb, info )
+    import int64
+    character        :: uplo
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_zpotf2_gpu( uplo, n, dA, ldda, queue, info )
+    import int64
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: queue
+    integer          :: info
+end
+
+subroutine magmaf_zpotrf( uplo, n, A, lda, info )
+    character        :: uplo
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_zpotrf_gpu( uplo, n, dA, ldda, info )
+    import int64
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: info
+end
+
+subroutine magmaf_zpotrf_m( ngpu, uplo, n, A, lda, info )
+    integer          :: ngpu
+    character        :: uplo
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_zpotri( uplo, n, A, lda, info )
+    character        :: uplo
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_zpotri_gpu( uplo, n, dA, ldda, info )
+    import int64
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: info
+end
+
+subroutine magmaf_zpotrs_gpu( uplo, n, nrhs, dA, ldda, dB, lddb, info )
+    import int64
+    character        :: uplo
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_zsysv_nopiv_gpu( uplo, n, nrhs, dA, ldda, dB, lddb, info )
+    import int64
+    character        :: uplo
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_zsytrf_nopiv_cpu( uplo, n, ib, A, lda, info )
+    character        :: uplo
+    integer          :: n
+    integer          :: ib
+    complex*16       :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_zsytrf_nopiv_gpu( uplo, n, dA, ldda, info )
+    import int64
+    character        :: uplo
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: info
+end
+
+subroutine magmaf_zsytrs_nopiv_gpu( uplo, n, nrhs, dA, ldda, dB, lddb, info )
+    import int64
+    character        :: uplo
+    integer          :: n
+    integer          :: nrhs
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: dB
+    integer          :: lddb
+    integer          :: info
+end
+
+subroutine magmaf_zstedx( range, n, vl, vu, il, iu, d, e, Z, ldz, rwork, lrwork, iwork,  &
+        liwork, dwork, info )
+    import int64
+    character        :: range
+    integer          :: n
+    double precision :: vl
+    double precision :: vu
+    integer          :: il
+    integer          :: iu
+    double precision :: d(*)
+    double precision :: e(*)
+    complex*16       :: Z(*)
+    integer          :: ldz
+    double precision :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    magma_devptr_t   :: dwork
+    integer          :: info
+end
+
+subroutine magmaf_zstedx_m( ngpu, range, n, vl, vu, il, iu, d, e, Z, ldz, rwork, lrwork,  &
+        iwork, liwork, info )
+    integer          :: ngpu
+    character        :: range
+    integer          :: n
+    double precision :: vl
+    double precision :: vu
+    integer          :: il
+    integer          :: iu
+    double precision :: d(*)
+    double precision :: e(*)
+    complex*16       :: Z(*)
+    integer          :: ldz
+    double precision :: rwork(*)
+    integer          :: lrwork
+    integer          :: iwork(*)
+    integer          :: liwork
+    integer          :: info
+end
+
+subroutine magmaf_ztrevc3( side, howmany, select, n, T, ldt, VL, ldvl, VR, ldvr, mm,  &
+        mout, work, lwork, rwork, info )
+    character        :: side
+    character        :: howmany
+    integer          :: select(*)
+    integer          :: n
+    complex*16       :: T(*)
+    integer          :: ldt
+    complex*16       :: VL(*)
+    integer          :: ldvl
+    complex*16       :: VR(*)
+    integer          :: ldvr
+    integer          :: mm
+    integer          :: mout(*)
+    complex*16       :: work(*)
+    integer          :: lwork
+    double precision :: rwork(*)
+    integer          :: info
+end
+
+subroutine magmaf_ztrevc3_mt( side, howmany, select, n, T, ldt, VL, ldvl, VR, ldvr, mm,  &
+        mout, work, lwork, rwork, info )
+    character        :: side
+    character        :: howmany
+    integer          :: select(*)
+    integer          :: n
+    complex*16       :: T(*)
+    integer          :: ldt
+    complex*16       :: VL(*)
+    integer          :: ldvl
+    complex*16       :: VR(*)
+    integer          :: ldvr
+    integer          :: mm
+    integer          :: mout(*)
+    complex*16       :: work(*)
+    integer          :: lwork
+    double precision :: rwork(*)
+    integer          :: info
+end
+
+integer function magmaf_ztrsm_m( ngpu, side, uplo, transa, diag, m, n, alpha, A, lda, B,  &
+        ldb )
+    integer          :: ngpu
+    character        :: side
+    character        :: uplo
+    character        :: transa
+    character        :: diag
+    integer          :: m
+    integer          :: n
+    complex*16       :: alpha
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: B(*)
+    integer          :: ldb
+end
+
+subroutine magmaf_ztrtri( uplo, diag, n, A, lda, info )
+    character        :: uplo
+    character        :: diag
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    integer          :: info
+end
+
+subroutine magmaf_ztrtri_gpu( uplo, diag, n, dA, ldda, info )
+    import int64
+    character        :: uplo
+    character        :: diag
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: info
+end
+
+subroutine magmaf_zungbr( vect, m, n, k, A, lda, tau, work, lwork, info )
+    character        :: vect
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: tau(*)
+    complex*16       :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_zunghr( n, ilo, ihi, A, lda, tau, dT, nb, info )
+    import int64
+    integer          :: n
+    integer          :: ilo
+    integer          :: ihi
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: tau(*)
+    magma_devptr_t   :: dT
+    integer          :: nb
+    integer          :: info
+end
+
+subroutine magmaf_zunghr_m( n, ilo, ihi, A, lda, tau, T, nb, info )
+    integer          :: n
+    integer          :: ilo
+    integer          :: ihi
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: tau(*)
+    complex*16       :: T(*)
+    integer          :: nb
+    integer          :: info
+end
+
+subroutine magmaf_zunglq( m, n, k, A, lda, tau, dT, nb, info )
+    import int64
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: tau(*)
+    magma_devptr_t   :: dT
+    integer          :: nb
+    integer          :: info
+end
+
+subroutine magmaf_zungqr( m, n, k, A, lda, tau, dT, nb, info )
+    import int64
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: tau(*)
+    magma_devptr_t   :: dT
+    integer          :: nb
+    integer          :: info
+end
+
+subroutine magmaf_zungqr_gpu( m, n, k, dA, ldda, tau, dT, nb, info )
+    import int64
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    complex*16       :: tau(*)
+    magma_devptr_t   :: dT
+    integer          :: nb
+    integer          :: info
+end
+
+subroutine magmaf_zungqr_m( m, n, k, A, lda, tau, T, nb, info )
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: tau(*)
+    complex*16       :: T(*)
+    integer          :: nb
+    integer          :: info
+end
+
+subroutine magmaf_zungqr2( m, n, k, A, lda, tau, info )
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: tau(*)
+    integer          :: info
+end
+
+subroutine magmaf_zunmbr( vect, side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork,  &
+        info )
+    character        :: vect
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: tau(*)
+    complex*16       :: C(*)
+    integer          :: ldc
+    complex*16       :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_zunmlq( side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, info )
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: tau(*)
+    complex*16       :: C(*)
+    integer          :: ldc
+    complex*16       :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_zunmrq( side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, info )
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: tau(*)
+    complex*16       :: C(*)
+    integer          :: ldc
+    complex*16       :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_zunmql( side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, info )
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: tau(*)
+    complex*16       :: C(*)
+    integer          :: ldc
+    complex*16       :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_zunmql2_gpu( side, trans, m, n, k, dA, ldda, tau, dC, lddc, wA, ldwa,  &
+        info )
+    import int64
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    complex*16       :: tau(*)
+    magma_devptr_t   :: dC
+    integer          :: lddc
+    complex*16       :: wA(*)
+    integer          :: ldwa
+    integer          :: info
+end
+
+subroutine magmaf_zunmqr( side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, info )
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: tau(*)
+    complex*16       :: C(*)
+    integer          :: ldc
+    complex*16       :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_zunmqr_gpu( side, trans, m, n, k, dA, ldda, tau, dC, lddc, hwork,  &
+        lwork, dT, nb, info )
+    import int64
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    complex*16       :: tau(*)
+    magma_devptr_t   :: dC
+    integer          :: lddc
+    complex*16       :: hwork(*)
+    integer          :: lwork
+    magma_devptr_t   :: dT
+    integer          :: nb
+    integer          :: info
+end
+
+subroutine magmaf_zunmqr2_gpu( side, trans, m, n, k, dA, ldda, tau, dC, lddc, wA, ldwa,  &
+        info )
+    import int64
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    complex*16       :: tau(*)
+    magma_devptr_t   :: dC
+    integer          :: lddc
+    complex*16       :: wA(*)
+    integer          :: ldwa
+    integer          :: info
+end
+
+subroutine magmaf_zunmqr_m( ngpu, side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork,  &
+        info )
+    integer          :: ngpu
+    character        :: side
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    integer          :: k
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: tau(*)
+    complex*16       :: C(*)
+    integer          :: ldc
+    complex*16       :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_zunmtr( side, uplo, trans, m, n, A, lda, tau, C, ldc, work, lwork, info  &
+        )
+    character        :: side
+    character        :: uplo
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: tau(*)
+    complex*16       :: C(*)
+    integer          :: ldc
+    complex*16       :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+subroutine magmaf_zunmtr_gpu( side, uplo, trans, m, n, dA, ldda, tau, dC, lddc, wA, ldwa,  &
+        info )
+    import int64
+    character        :: side
+    character        :: uplo
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    complex*16       :: tau(*)
+    magma_devptr_t   :: dC
+    integer          :: lddc
+    complex*16       :: wA(*)
+    integer          :: ldwa
+    integer          :: info
+end
+
+subroutine magmaf_zunmtr_m( ngpu, side, uplo, trans, m, n, A, lda, tau, C, ldc, work,  &
+        lwork, info )
+    integer          :: ngpu
+    character        :: side
+    character        :: uplo
+    character        :: trans
+    integer          :: m
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: tau(*)
+    complex*16       :: C(*)
+    integer          :: ldc
+    complex*16       :: work(*)
+    integer          :: lwork
+    integer          :: info
+end
+
+integer function magmaf_z_isnan( x )
+    complex*16       :: x
+end
+
+integer function magmaf_z_isinf( x )
+    complex*16       :: x
+end
+
+integer function magmaf_z_isnan_inf( x )
+    complex*16       :: x
+end
+
+integer function magmaf_znan_inf( uplo, m, n, A, lda, cnt_nan, cnt_inf )
+    character        :: uplo
+    integer          :: m
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+    integer          :: cnt_nan(*)
+    integer          :: cnt_inf(*)
+end
+
+integer function magmaf_znan_inf_gpu( uplo, m, n, dA, ldda, cnt_nan, cnt_inf, queue )
+    import int64
+    character        :: uplo
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    integer          :: cnt_nan(*)
+    integer          :: cnt_inf(*)
+    magma_devptr_t   :: queue
+end
+
+subroutine magmaf_zprint( m, n, A, lda )
+    integer          :: m
+    integer          :: n
+    complex*16       :: A(*)
+    integer          :: lda
+end
+
+subroutine magmaf_zprint_gpu( m, n, dA, ldda, queue )
+    import int64
+    integer          :: m
+    integer          :: n
+    magma_devptr_t   :: dA
+    integer          :: ldda
+    magma_devptr_t   :: queue
+end
+
+subroutine magmaf_zpanel_to_q( uplo, ib, A, lda, work )
+    character        :: uplo
+    integer          :: ib
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: work(*)
+end
+
+subroutine magmaf_zq_to_panel( uplo, ib, A, lda, work )
+    character        :: uplo
+    integer          :: ib
+    complex*16       :: A(*)
+    integer          :: lda
+    complex*16       :: work(*)
+end
+
+end interface
+
+end module magma_zfortran
diff --git a/lib/netcdf/Makefile.loc b/lib/netcdf/Makefile.loc
index 0818370133..99be560019 100644
--- a/lib/netcdf/Makefile.loc
+++ b/lib/netcdf/Makefile.loc
@@ -19,7 +19,7 @@ include ../config/external_libs_defs.mk
 # Configure flags
 #
 CONFFLAGS=--prefix=$(LIBPATH) --libdir=$(LIBPATH)/lib \
-          --without-pic --enable-static --disable-shared --disable-dap  $(netcdf_opt) \
+          --without-pic --enable-static --disable-shared --disable-dap  $(netcdf_opt) --disable-libxml2 \
           CC="$(cc)" \
 	  CPP="$(cpp)" \
 	  CFLAGS="$(cflags)" \
diff --git a/lib/petsc/Makefile.loc b/lib/petsc/Makefile.loc
index 14ccfab10c..7701df0e88 100644
--- a/lib/petsc/Makefile.loc
+++ b/lib/petsc/Makefile.loc
@@ -16,10 +16,12 @@ TARBALL=$(tarball_petsc)
 include ../config/external_libs_commons.mk
 include ../config/external_libs_defs.mk
 #         FOPTFLAGS="$(fcflags)" COPTFLAGS="$(cflags)" \
+#         FOPTFLAGS="-O1 -std=f2008 " COPTFLAGS="-O2" CXXOPTFLAGS="-O2" \
+#         CC=mpiicx CXX=mpiicx FC=mpiifx \
 #
 PETSC_ARCH=yambo_${yprecision}_complex
 CONFFLAGS=--prefix="$(LIBPATH)" $(petsc_flgs) \
-         FOPTFLAGS="-O1" COPTFLAGS="-O2" \
+         CC=$(cc) FC=$(fc) \
          --PETSC_ARCH=${PETSC_ARCH} \
          --with-ssl=0 \
          --with-x=0 \
@@ -28,8 +30,8 @@ CONFFLAGS=--prefix="$(LIBPATH)" $(petsc_flgs) \
          --with-shared-libraries=$(spetsc) \
          --with-blaslapack-lib="$(llapackpetsc) $(lblaspetsc)" \
          --with-scalar-type="complex" \
-         --with-precision="${yprecision}" \
-         --with-mpi-dir=$(mpipath)
+         --with-precision="${yprecision}"
+#	 --with-mpi-dir=$(mpipath)
 #
 # MAIN target
 #
@@ -63,7 +65,7 @@ configure: uncompress
 compile: uncompress configure
 	@if test -d $(PACKAGE) && ! test -f compiled.stamp ; then \
          echo "\t[$(PACKAGE)] compilation"; \
-         ( cd $(PACKAGE);  $(make) -j1 PETSC_DIR=$(PWD)/$(PACKAGE) PETSC_ARCH=$(PETSC_ARCH) >> ${compdir}/log/compile_$(PACKAGE).log 2>&1 ) ; \
+         ( cd $(PACKAGE);  $(make) $(MAKEFLAGS) PETSC_DIR=$(PWD)/$(PACKAGE) PETSC_ARCH=$(PETSC_ARCH) >> ${compdir}/log/compile_$(PACKAGE).log 2>&1 ) ; \
          touch compiled.stamp; \
         fi
 
diff --git a/lib/scalapack/SLmake.inc_lib b/lib/scalapack/SLmake.inc_lib
index b004170124..1cde9fb4ec 100644
--- a/lib/scalapack/SLmake.inc_lib
+++ b/lib/scalapack/SLmake.inc_lib
@@ -32,7 +32,7 @@ FC            = $(fc)
 CC            = $(cc) 
 NOOPT         = $(fcuflags)
 FCFLAGS       = $(fcflags) $(slkfcflags)
-CCFLAGS       = $(cflags)
+CCFLAGS       = $(cflags) -std=gnu90
 FCLOADER      = $(fc)
 CCLOADER      = $(cc)
 FCLOADFLAGS   = $(fcflags)
diff --git a/lib/slepc/Makefile.loc b/lib/slepc/Makefile.loc
index fd32d8ad8a..307af6a873 100644
--- a/lib/slepc/Makefile.loc
+++ b/lib/slepc/Makefile.loc
@@ -16,7 +16,7 @@ TARBALL=$(tarball_slepc)
 include ../config/external_libs_commons.mk
 include ../config/external_libs_defs.mk
 #
-CONFFLAGS=--prefix="$(LIBPATH)"
+CONFFLAGS=--prefix=$(LIBPATH)
 #
 # MAIN target
 #
@@ -50,14 +50,14 @@ configure: uncompress
 compile: uncompress configure
 	@if test -e $(PACKAGE) && ! test -f compiled.stamp ; then \
          echo "\t[$(PACKAGE)] compilation"; \
-         ( cd $(PACKAGE); $(make) -j1 SLEPC_DIR=$(PWD)/$(PACKAGE) PETSC_DIR=$(LIBPATH)  >> ${compdir}/log/compile_$(PACKAGE).log 2>&1 ) ; \
+         ( cd $(PACKAGE); $(make) $(MAKEFLAGS) SLEPC_DIR=$(PWD)/$(PACKAGE) PETSC_DIR=$(LIBPATH)  >> ${compdir}/log/compile_$(PACKAGE).log 2>&1 ) ; \
          touch compiled.stamp; \
         fi
 
 install: uncompress configure compile
 	@if ! test -f installed.stamp ; then \
          echo "\t[$(PACKAGE)] installation"; \
-         ( cd $(PACKAGE); $(make) -j1 SLEPC_DIR=$(PWD)/$(PACKAGE) PETSC_DIR=$(LIBPATH) install  >> ${compdir}/log/install_$(PACKAGE).log 2>&1 ); \
+         ( cd $(PACKAGE); $(make) $(MAKEFLAGS) SLEPC_DIR=$(PWD)/$(PACKAGE) PETSC_DIR=$(LIBPATH) install  >> ${compdir}/log/install_$(PACKAGE).log 2>&1 ); \
          touch installed.stamp; \
         fi
 
diff --git a/lib/ydiago/Makefile.loc b/lib/ydiago/Makefile.loc
deleted file mode 100644
index 76b7c4c023..0000000000
--- a/lib/ydiago/Makefile.loc
+++ /dev/null
@@ -1,66 +0,0 @@
-#
-#===============================
-# Yambo package
-#===============================
-#
-include ../../config/setup
-include ../archive/package.list
-#
-LIBNAME=libydiago.a
-LIBPATH=$(libs_prefix)/$(fc_kind)/${fc}/$(ydiago_gpu_support)
-LIBRARY=$(LIBPATH)/lib/$(LIBNAME)
-#
-PACKAGE=$(pkgname_Ydiago)
-#
-include ../config/external_libs_commons.mk
-include ../config/external_libs_defs.mk
-#
-#
-# MAIN target
-#
-all: $(LIBRARY)
-#
-uncompress:
-	@$(uncompress)
-
-configure: uncompress
-	@if ! test -e configured.stamp; then \
-	 rm -f ${compdir}/log/config_$(PACKAGE).log; \
-	 echo "\t[$(PACKAGE)] configuration"; \
-	 if test -e ${compdir}/lib/ydiago/make_ydiago.inc ; then \
-	   echo "cp ${compdir}/lib/ydiago/make_ydiago.inc ${compdir}/lib/ydiago/$(PACKAGE)/src/make.inc" >> ${compdir}/log/config_$(PACKAGE).log ; \
-	   cp ${compdir}/lib/ydiago/make_ydiago.inc ${compdir}/lib/ydiago/$(PACKAGE)/src/make.inc ; \
-	 fi ; \
-	 touch configured.stamp;\
-	fi
-#
-compile: uncompress configure
-	@if ! test -e compiled.stamp ; then \
-	  cd ${compdir}/lib/ydiago/$(PACKAGE)/src ; make ; \
-	  cd ${compdir}/lib/ydiago/ ; touch compiled.stamp; \
-	fi
-
-install: uncompress configure compile
-	@if ! test -e installed.stamp ; then \
-	 echo "\t[$(PACKAGE)] installation"; \
-	 echo "cp ${compdir}/lib/ydiago/$(PACKAGE)/src/libdiago.a $(LIBRARY)" >> ${compdir}/log/install_$(PACKAGE).log ; \
-	 echo "cp ${compdir}/lib/ydiago/$(PACKAGE)/src/ydiago_interface.mod $(LIBPATH)/include/" >> ${compdir}/log/install_$(PACKAGE).log ; \
-	 echo "chmod +x $(LIBPATH)/lib/libydiago.a" >> ${compdir}/log/install_$(PACKAGE).log ; \
-	 cp ${compdir}/lib/ydiago/$(PACKAGE)/src/libdiago.a $(LIBRARY) ;\
-	 cp ${compdir}/lib/ydiago/$(PACKAGE)/src/ydiago_interface.mod $(LIBPATH)/include/ ;\
-	 chmod +x $(LIBRARY) ; \
-	 touch ${compdir}/config/stamps_and_lists/libydiago.a.stamp;\
-	 echo ydiago_interface >> ${compdir}/src/bse/modules.list;\
-	 touch installed.stamp;\
-	fi
-#
-$(LIBRARY): uncompress configure compile install
-#
-# cleaning
-#
-clean:
-	@$(clean_the_lib)
-#
-clean_all: clean
-	@$(rm_the_lib)
-#
diff --git a/local_files/PL_diago_residual_Pedro.F b/local_files/PL_diago_residual_Pedro.F
new file mode 100755
index 0000000000..cbd1df55be
--- /dev/null
+++ b/local_files/PL_diago_residual_Pedro.F
@@ -0,0 +1,236 @@
+!
+!        Copyright (C) 2000-2022 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): AM
+! 
+! This file is distributed under the terms of the GNU 
+! General Public License. You can redistribute it and/or 
+! modify it under the terms of the GNU General Public 
+! License as published by the Free Software Foundation; 
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will 
+! be useful, but WITHOUT ANY WARRANTY; without even the 
+! implied warranty of MERCHANTABILITY or FITNESS FOR A 
+! PARTICULAR PURPOSE.  See the GNU General Public License 
+! for more details.
+!
+! You should have received a copy of the GNU General Public 
+! License along with this program; if not, write to the Free 
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, 
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+subroutine PL_diago_herm_residual(BS_V_left,BS_V_right,BS_R_PL,BS_overlap) 
+ !
+ ! Resonant PL Residuals
+ !
+ use pars,           ONLY:SP,rZERO,cZERO,cI
+ use wrapper_omp,    ONLY:V_dot_V_omp,Vstar_dot_V_omp
+ use parallel_m,     ONLY:PP_indexes,myid,PP_indexes_reset
+ use parallel_int,   ONLY:PP_redux_wait,PARALLEL_index 
+ use LIVE_t,         ONLY:live_timing
+ use BS,             ONLY:BS_H_dim,BS_K_dim
+ use BS_solvers,     ONLY:BSS_eh_f,BSS_n_eig
+ use PHOTOLUM,       ONLY:BSS_dipoles_PL,BSS_PL_f
+ !
+ implicit none
+ !
+ complex(SP),intent(in)  :: BS_V_left(BS_H_dim,BS__dim),BS_V_right(BS_H_dim,BS_H_dim)
+ real(SP)   ,intent(out) :: BS_R_PL(2,BSS_n_eig)
+ complex(SP),optional,intent(in)  :: BS_overlap(BS_H_dim,BS_H_dim)
+ !
+ ! Workspace
+ !
+ type(PP_indexes)     ::px
+ integer              ::i_l,i_c,i_K,i_lr,i_ll, ik
+ complex(SP)          ::P_x_fZ(BS_H_dim,3,2),Pi_x_R_x_adA_left(3),Pi_x_R_x_adA_right(3),adA_x_R_over_R_x_A(2),&
+&                       R_over_R_x_A(2,BS_K_dim(1)),B(BS_H_dim)
+ !
+ ! Parallel indexes
+ !
+ call PP_indexes_reset(px)
+ call PARALLEL_index(px,(/BS_n_eig/))
+ !
+ ! sqrt(R_i) Re[ (Pi_i)* A^l_i (A^l_j)* R<_j/R_j A^l_j (A^l_k)* Pi_k ] sqrt(R_k)
+ !
+ ! Zeroing
+ !
+ BS_R_PL       =rZERO
+ P_x_fZ        =cZERO
+ R_over_R_x_A  =cZERO
+ !
+ !
+ ! Live-Timing
+ !
+ call live_timing('PL    residuals',px%n_of_elements(myid+1))
+ write(*,*) "BS_H_dim", BS_H_dim
+ write(*,*) "BSS_n_eig", BSS_n_eig
+!
+! Hermitian residual
+!====================
+!
+do i_c=1,3
+  P_x_fZ(1:BS_H_dim,i_c,1)=BSS_dipoles_PL(i_c,1:BS_H_dim)*sqrt(BSS_eh_f(1:BS_H_dim)) ! Pi multiplied by (fc-fv) dipoles of
+  !current <(vk|gradCk)> C is conduction
+enddo
+!
+write(*,*) 'BS_K_dim(1)', BS_K_dim(1)
+write(*,*) 'shape(BSS_eh_f)', shape(BSS_eh_f)
+write(*,*) 'shape(BS_V_right)', shape(BS_V_right)
+write(*,*) 'shape(BSS_PL_f)', shape(BSS_PL_f)
+do i_l=1,BS_K_dim(1)
+  !
+  if (.not.px%element_1D(i_l)) cycle
+  !                       
+  ! R_over_R_x_A = R<_j/R_j A^l_j
+  !
+  do ik = 1, BS_K_dim(1)
+    if (BSS_eh_f(ik) .gt. rZERO) then
+      R_over_R_x_A(1,ik) =  BSS_PL_f(ik)*BS_V_right(ik,i_l)/BSS_eh_f(ik)
+      R_over_R_x_A(2,ik) =  BSS_PL_f(BS_K_dim(1)+ik)*BS_V_right(ik,i_l)/BSS_eh_f(ik)
+    endif
+  enddo
+  !
+  ! adA_x_R_over_R_x_A = (A^l_j)* R<_j/R_j A^l_j
+  !
+  adA_x_R_over_R_x_A(1)=V_dot_V_omp(BS_K_dim(1),conjg(BS_V_right(1:BS_K_dim(1),i_l)), R_over_R_x_A(1,1:BS_K_dim(1)))!resonant
+  adA_x_R_over_R_x_A(2)=V_dot_V_omp(BS_K_dim(1),conjg(BS_V_right(BS_K_dim(1)+1:2*BS_K_dim(1),i_l)),R_over_R_x_A(2,1:BS_K_dim(1)))
+  !antiresonant, no error if two arrays have different dimensions
+  !
+  do i_c=1,3
+    !                                   
+    ! Pi_x_R_x_adA_right =   Pi sqrt(R)  A^l_i
+    !
+    Pi_x_R_x_adA_right(i_c)=V_dot_V_omp(BS_K_dim(1),BS_V_right(1:BS_K_dim(1),i_l),P_x_fZ(1:BS_K_dim(1),i_c,1))
+    !
+  enddo
+  !
+  ! Note that the 2nd component (anti-resonant) should take a -1  from BSS_eh_f and a i^2 from sqrt(R)
+  ! that cancel out.
+  !
+  BS_R_PL(:,i_l)=real(dot_product(Pi_x_R_x_adA_right,Pi_x_R_x_adA_right)*adA_x_R_over_R_x_A(:))
+  !
+  call live_timing(steps=1)
+  !
+enddo
+!
+call live_timing()
+!
+call PP_redux_wait(BS_R_PL)
+!
+call PP_indexes_reset(px)
+!
+end subroutine PL_diago_herm_residual
+
+subroutine PL_diago_non_herm_residual(BS_V_left,BS_V_right,BS_R_PL,BS_overlap) 
+ !
+ ! Non-Hermitian residual
+ !========================
+ !
+  !
+ ! Resonant PL Residuals
+ !
+ use pars,           ONLY:SP,rZERO,cZERO,cI
+ use wrapper_omp,    ONLY:V_dot_V_omp,Vstar_dot_V_omp
+ use parallel_m,     ONLY:PP_indexes,myid,PP_indexes_reset
+ use parallel_int,   ONLY:PP_redux_wait,PARALLEL_index 
+ use LIVE_t,         ONLY:live_timing
+ use BS,             ONLY:BS_H_dim,BS_K_dim
+ use BS_solvers,     ONLY:BSS_eh_f,BSS_n_eig
+ use PHOTOLUM,       ONLY:BSS_dipoles_PL,BSS_PL_f
+ !
+ implicit none
+ !
+ complex(SP),intent(in)  :: BS_V_left(BS_H_dim,BSS_n_eig),BS_V_right(BS_H_dim,BSS_n_eig)
+ real(SP)   ,intent(out) :: BS_R_PL(2,BSS_n_eig)
+ complex(SP),optional,intent(in)  :: BS_overlap(BS_n_eig,BS_n_eig)
+ !
+ ! Workspace
+ !
+ type(PP_indexes)     ::px
+ integer              ::i_l,i_c,i_K,i_lr,i_ll, ik
+ complex(SP)          ::P_x_fZ(BS_H_dim,3,2),Pi_x_R_x_adA_left(3),Pi_x_R_x_adA_right(3),adA_x_R_over_R_x_A(2),&
+&                       R_over_R_x_A(2,BS_K_dim(1)),B(BS_H_dim)
+ !
+ ! Parallel indexes
+ !
+ call PP_indexes_reset(px)
+ call PARALLEL_index(px,(/BS_n_eig/))
+ !
+ ! sqrt(R_i) Re[ (Pi_i)* A^l_i (A^l_j)* R<_j/R_j A^l_j (A^l_k)* Pi_k ] sqrt(R_k)
+ !
+ ! Zeroing
+ !
+ BS_R_PL       =rZERO
+ P_x_fZ        =cZERO
+ R_over_R_x_A  =cZERO
+ !
+ !
+ ! Live-Timing
+ !
+ call live_timing('PL    residuals',px%n_of_elements(myid+1))
+ write(*,*) "BS_H_dim", BS_H_dim
+ write(*,*) "BSS_n_eig", BSS_n_eig
+   !
+   ! 1=> left
+   ! 2=> right
+   !
+  do i_c=1,3
+    do i_K=1,BS_K_dim(1)
+      P_x_fZ(i_K,i_c,1)=conjg(BSS_dipoles_PL(i_c,i_K))*sqrt(BSS_eh_f(i_K))
+      P_x_fZ(i_K,i_c,2)=      BSS_dipoles_PL(i_c,i_K) *sqrt(BSS_eh_f(i_K))
+    enddo
+    do i_K=BS_K_dim(1)+1,BS_H_dim
+      P_x_fZ(i_K,i_c,1)=cI*conjg(BSS_dipoles_PL(i_c,i_K))*sqrt(BSS_eh_f(i_K-BS_K_dim(1)))
+      P_x_fZ(i_K,i_c,2)=cI*      BSS_dipoles_PL(i_c,i_K) *sqrt(BSS_eh_f(i_K-BS_K_dim(1)))
+    enddo
+  enddo
+  !
+  do i_lr=1,BS_H_dim
+    !
+    if (.not.px%element_1D(i_lr)) cycle
+    !
+    ! B_i = \sum_lp Overlap^*(lp,l) A^ll_i
+    !
+    B=cZERO
+    do i_ll=1,BS_H_dim
+      B(:)=B(:)+conjg(BS_overlap(i_ll,i_lr))*BS_V_left(:,i_ll)
+    enddo
+    !
+    ! R_over_R_x_A = R<_j/R_j B^l_j
+    !
+    R_over_R_x_A(1,:) = BSS_PL_f(:)*B(:)/BSS_eh_f(:)
+    !
+    ! adA_x_R_over_R_x_A = B^l*_j    R<_j/R_j B^l_j
+    !
+    adA_x_R_over_R_x_A(1)=Vstar_dot_V_omp(BS_H_dim,B,R_over_R_x_A(1,:))
+    !
+    do i_c=1,3
+      !
+      ! Pi_x_R_x_adA_left =   sqrt(R) Pi_i* A^lr_i
+      !
+      Pi_x_R_x_adA_left(i_c) =V_dot_V_omp(BS_H_dim,P_x_fZ(:,i_c,1),BS_V_right(:,i_lr))
+      !
+      ! Pi_x_R_x_adA_right=   A^lr*_i Pi_i sqrt(R)  
+      !
+      Pi_x_R_x_adA_right(i_c)=Vstar_dot_V_omp(BS_H_dim,BS_V_right(:,i_lr),P_x_fZ(:,i_c,2))
+      !
+    enddo
+    !
+    BS_R_PL(1,i_lr)=real((Pi_x_R_x_adA_left(1)*Pi_x_R_x_adA_right(1)+&
+&                          Pi_x_R_x_adA_left(2)*Pi_x_R_x_adA_right(2)+&
+&                          Pi_x_R_x_adA_left(3)*Pi_x_R_x_adA_right(3))*adA_x_R_over_R_x_A(1))
+    !
+    call live_timing(steps=1)
+    !
+  enddo
+ !
+ call live_timing()
+ !
+ call PP_redux_wait(BS_R_PL)
+ !
+ call PP_indexes_reset(px)
+ !
+
+ end subroutine PL_diago_non_herm_residual
diff --git a/local_files/PL_diago_residual_full_n_eig.F b/local_files/PL_diago_residual_full_n_eig.F
new file mode 100755
index 0000000000..6fae5abc59
--- /dev/null
+++ b/local_files/PL_diago_residual_full_n_eig.F
@@ -0,0 +1,248 @@
+!
+!        Copyright (C) 2000-2022 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): AM PM
+! 
+! This file is distributed under the terms of the GNU 
+! General Public License. You can redistribute it and/or 
+! modify it under the terms of the GNU General Public 
+! License as published by the Free Software Foundation; 
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will 
+! be useful, but WITHOUT ANY WARRANTY; without even the 
+! implied warranty of MERCHANTABILITY or FITNESS FOR A 
+! PARTICULAR PURPOSE.  See the GNU General Public License 
+! for more details.
+!
+! You should have received a copy of the GNU General Public 
+! License along with this program; if not, write to the Free 
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, 
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+subroutine PL_diago_herm_residual(BS_V_left,BS_V_right,BS_R_PL) 
+ !
+ ! Resonant PL Residuals
+ !
+ use pars,           ONLY:SP,rZERO,cZERO,cI
+ use wrapper_omp,    ONLY:V_dot_V_omp,Vstar_dot_V_omp
+ use parallel_m,     ONLY:PP_indexes,myid,PP_indexes_reset
+ use parallel_int,   ONLY:PP_redux_wait,PARALLEL_index 
+ use LIVE_t,         ONLY:live_timing
+ use BS,             ONLY:BS_H_dim,BS_K_dim
+ use BS_solvers,     ONLY:BSS_eh_f,BSS_n_eig
+ use PHOTOLUM,       ONLY:BSS_dipoles_PL,BSS_PL_f
+ !
+ implicit none
+ !
+ complex(SP),intent(in)  :: BS_V_left(BSS_n_eig,BSS_n_eig),BS_V_right(BSS_n_eig,BSS_n_eig)
+ real(SP)   ,intent(out) :: BS_R_PL(2,BSS_n_eig)
+ real(SP)                :: half_BSS_n_eig
+ !
+ ! Workspace
+ !
+ type(PP_indexes)     ::px
+ integer              ::i_l,i_c,i_K,i_lr,i_ll, ik
+ complex(SP)          ::P_x_fZ(BS_H_dim,3,2),Pi_x_R_x_adA_left(3),Pi_x_R_x_adA_right(3),adA_x_R_over_R_x_A(2),&
+&                       R_over_R_x_A(2,BS_K_dim(1)),B(BSS_n_eig)
+ !
+ ! Parallel indexes
+ !
+ call PP_indexes_reset(px)
+ call PARALLEL_index(px,(/BSS_n_eig/))
+ !
+ ! sqrt(R_i) Re[ (Pi_i)* A^l_i (A^l_j)* R<_j/R_j A^l_j (A^l_k)* Pi_k ] sqrt(R_k)
+ !
+ ! Zeroing
+ !
+ BS_R_PL       =rZERO
+ P_x_fZ        =cZERO
+ R_over_R_x_A  =cZERO
+ !
+ !
+ ! Live-Timing
+ !
+ call live_timing('PL    residuals',px%n_of_elements(myid+1))
+ write(*,*) "Hermitian run"
+ write(*,*) "BS_H_dim", BS_H_dim
+ write(*,*) "BSS_n_eig", BSS_n_eig
+ write(*,*) "BS_K_dim", BS_K_dim(2)
+ half_BSS_n_eig=BSS_n_eig/2.
+!
+! Hermitian residual
+!====================
+!
+do i_c=1,3
+  P_x_fZ(1:BSS_n_eig,i_c,1)=BSS_dipoles_PL(i_c,1:BSS_n_eig)*sqrt(BSS_eh_f(1:BSS_n_eig)) ! Pi multiplied by (fc-fv) dipoles of
+  !current <(vk|gradCk)> C is conduction
+enddo
+!
+write(*,*) 'BS_K_dim(1)', BS_K_dim(1)
+write(*,*) 'shape(BSS_eh_f)', shape(BSS_eh_f)
+write(*,*) 'shape(BS_V_right)', shape(BS_V_right)
+write(*,*) 'shape(BSS_PL_f)', shape(BSS_PL_f)
+!write(*,*) 'BSS_V_right(577,576)', BS_V_right(577,576)
+do i_l=1,BSS_n_eig
+  !
+  if (.not.px%element_1D(i_l)) cycle
+  !                       
+  ! R_over_R_x_A = R<_j/R_j A^l_j
+  !
+  do ik = 1, BS_K_dim(1)
+      if (BSS_eh_f(ik) .gt. rZERO) then
+      R_over_R_x_A(1,ik) =  BSS_PL_f(ik)*BS_V_right(ik,i_l)/BSS_eh_f(ik)
+      R_over_R_x_A(2,ik) =  BSS_PL_f(BS_K_dim(1)+ik)*BS_V_right(ik,i_l)/BSS_eh_f(ik)
+    endif
+  enddo
+  !
+  ! adA_x_R_over_R_x_A = (A^l_j)* R<_j/R_j A^l_j
+  !
+  adA_x_R_over_R_x_A(1)=V_dot_V_omp(BS_K_dim(1),conjg(BS_V_right(1:BSS_n_eig,i_l)),&
+          R_over_R_x_A(1,1:BSS_n_eig)) !resonant
+  adA_x_R_over_R_x_A(2)=V_dot_V_omp(BS_K_dim(1),conjg(BS_V_right(BSS_N_eig+1:BS_K_dim(1),i_l)),&
+          R_over_R_x_A(2,BSS_n_eig:BS_K_dim(1)))
+  !antiresonant, no error if two arrays have different dimensions
+  !
+  do i_c=1,3
+    !                                   
+    ! Pi_x_R_x_adA_right =   Pi sqrt(R)  A^l_i
+    !
+    Pi_x_R_x_adA_right(i_c)=V_dot_V_omp(BS_K_dim(1),BS_V_right(1:BS_K_dim(1),i_l),P_x_fZ(1:BS_K_dim(1),i_c,1))
+    !
+  enddo
+  !
+  ! Note that the 2nd component (anti-resonant) should take a -1  from BSS_eh_f and a i^2 from sqrt(R)
+  ! that cancel out.
+  !
+  BS_R_PL(:,i_l)=real(dot_product(Pi_x_R_x_adA_right,Pi_x_R_x_adA_right)*adA_x_R_over_R_x_A(:))
+  !
+  call live_timing(steps=1)
+  !
+enddo
+!
+call live_timing()
+!
+call PP_redux_wait(BS_R_PL)
+!
+call PP_indexes_reset(px)
+!
+end subroutine PL_diago_herm_residual
+
+subroutine PL_diago_non_herm_residual(BS_V_left,BS_V_right,BS_R_PL,BS_overlap) 
+ !
+ ! Non-Hermitian residual
+ !========================
+ !
+  !
+ ! Resonant PL Residuals
+ !
+ use pars,           ONLY:SP,rZERO,cZERO,cI
+ use wrapper_omp,    ONLY:V_dot_V_omp,Vstar_dot_V_omp
+ use parallel_m,     ONLY:PP_indexes,myid,PP_indexes_reset
+ use parallel_int,   ONLY:PP_redux_wait,PARALLEL_index 
+ use LIVE_t,         ONLY:live_timing
+ use BS,             ONLY:BS_H_dim,BS_K_dim
+ use BS_solvers,     ONLY:BSS_eh_f,BSS_n_eig
+ use PHOTOLUM,       ONLY:BSS_dipoles_PL,BSS_PL_f
+ !
+ implicit none
+ !
+ complex(SP),intent(in)  :: BS_V_left(BS_H_dim,BSS_n_eig),BS_V_right(BS_H_dim,BSS_n_eig)
+ real(SP)   ,intent(out) :: BS_R_PL(2,BSS_n_eig)
+ complex(SP),optional,intent(in)  :: BS_overlap(BSS_n_eig,BSS_n_eig)
+ !
+ ! Workspace
+ !
+ type(PP_indexes)     ::px
+ integer              ::i_l,i_c,i_K,i_lr,i_ll, ik
+ complex(SP)          ::P_x_fZ(BS_H_dim,3,2),Pi_x_R_x_adA_left(3),Pi_x_R_x_adA_right(3),adA_x_R_over_R_x_A(2),&
+&                       R_over_R_x_A(2,BS_H_dim),B(BS_H_dim)
+ !
+ ! Parallel indexes
+ !
+ call PP_indexes_reset(px)
+ call PARALLEL_index(px,(/BSS_n_eig/))
+ !
+ ! sqrt(R_i) Re[ (Pi_i)* A^l_i (A^l_j)* R<_j/R_j A^l_j (A^l_k)* Pi_k ] sqrt(R_k)
+ !
+ ! Zeroing
+ !
+ BS_R_PL       =rZERO
+ P_x_fZ        =cZERO
+ R_over_R_x_A  =cZERO
+ !
+ !
+ ! Live-Timing
+ !
+ call live_timing('PL    residuals',px%n_of_elements(myid+1))
+ write(*,*) "Non-Hermitian run"
+ write(*,*) "BS_H_dim", BS_H_dim
+ write(*,*) "BSS_n_eig", BSS_n_eig
+   !
+   ! 1=> left
+   ! 2=> right
+   !
+  do i_c=1,3
+    do i_K=1,BS_K_dim(1)
+      P_x_fZ(i_K,i_c,1)=conjg(BSS_dipoles_PL(i_c,i_K))*sqrt(BSS_eh_f(i_K))
+      P_x_fZ(i_K,i_c,2)=      BSS_dipoles_PL(i_c,i_K) *sqrt(BSS_eh_f(i_K))
+    enddo
+    do i_K=BS_K_dim(1)+1,BS_H_dim
+      P_x_fZ(i_K,i_c,1)=cI*conjg(BSS_dipoles_PL(i_c,i_K))*sqrt(BSS_eh_f(i_K-BS_K_dim(1)))
+      P_x_fZ(i_K,i_c,2)=cI*      BSS_dipoles_PL(i_c,i_K) *sqrt(BSS_eh_f(i_K-BS_K_dim(1)))
+    enddo
+  enddo
+  !
+  do i_lr=1,BS_H_dim
+    !
+    if (.not.px%element_1D(i_lr)) cycle
+    !
+    ! B_i = \sum_lp Overlap^*(lp,l) A^ll_i
+    !
+    B=cZERO
+    do i_ll=1,BS_H_dim
+      B(:)=B(:)+conjg(BS_overlap(i_ll,i_lr))*BS_V_left(:,i_ll)
+    enddo
+    !
+    ! R_over_R_x_A = R<_j/R_j B^l_j
+    !
+    write (*,*) "shape(BSS_PL_f)", shape(BSS_PL_f)
+    R_over_R_x_A(1,:) = BSS_PL_f(:)*B(:)/BSS_eh_f(:)
+    !
+    ! adA_x_R_over_R_x_A = B^l*_j    R<_j/R_j B^l_j
+    !
+    adA_x_R_over_R_x_A(1)=Vstar_dot_V_omp(BS_H_dim,B,R_over_R_x_A(1,:))
+    !
+    do i_c=1,3
+      !
+      ! Pi_x_R_x_adA_left =   sqrt(R) Pi_i* A^lr_i
+      !
+      Pi_x_R_x_adA_left(i_c) =V_dot_V_omp(BS_H_dim,P_x_fZ(:,i_c,1),BS_V_right(:,i_lr))
+      !
+      ! Pi_x_R_x_adA_right=   A^lr*_i Pi_i sqrt(R)  
+      !
+      Pi_x_R_x_adA_right(i_c)=Vstar_dot_V_omp(BS_H_dim,BS_V_right(:,i_lr),P_x_fZ(:,i_c,2))
+      !
+    enddo
+    !
+    BS_R_PL(1,i_lr)=real((Pi_x_R_x_adA_left(1)*Pi_x_R_x_adA_right(1)+&
+&                          Pi_x_R_x_adA_left(2)*Pi_x_R_x_adA_right(2)+&
+&                          Pi_x_R_x_adA_left(3)*Pi_x_R_x_adA_right(3))*adA_x_R_over_R_x_A(1))
+    !
+    call live_timing(steps=1)
+    !
+     !
+     call live_timing(steps=1)
+     !
+   enddo
+   !
+ !
+ call live_timing()
+ !
+ call PP_redux_wait(BS_R_PL)
+ !
+ call PP_indexes_reset(px)
+ !
+
+ end subroutine PL_diago_non_herm_residual
diff --git a/local_files/PL_diago_residual_halfBSE.F b/local_files/PL_diago_residual_halfBSE.F
new file mode 100755
index 0000000000..85d02484bb
--- /dev/null
+++ b/local_files/PL_diago_residual_halfBSE.F
@@ -0,0 +1,244 @@
+!
+!        Copyright (C) 2000-2022 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): AM
+! 
+! This file is distributed under the terms of the GNU 
+! General Public License. You can redistribute it and/or 
+! modify it under the terms of the GNU General Public 
+! License as published by the Free Software Foundation; 
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will 
+! be useful, but WITHOUT ANY WARRANTY; without even the 
+! implied warranty of MERCHANTABILITY or FITNESS FOR A 
+! PARTICULAR PURPOSE.  See the GNU General Public License 
+! for more details.
+!
+! You should have received a copy of the GNU General Public 
+! License along with this program; if not, write to the Free 
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, 
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+subroutine PL_diago_herm_residual(BS_V_left,BS_V_right,BS_R_PL) 
+ !
+ ! Resonant PL Residuals
+ !
+ use pars,           ONLY:SP,rZERO,cZERO,cI
+ use wrapper_omp,    ONLY:V_dot_V_omp,Vstar_dot_V_omp
+ use parallel_m,     ONLY:PP_indexes,myid,PP_indexes_reset
+ use parallel_int,   ONLY:PP_redux_wait,PARALLEL_index 
+ use LIVE_t,         ONLY:live_timing
+ use BS,             ONLY:BS_H_dim,BS_K_dim
+ use BS_solvers,     ONLY:BSS_eh_f,BSS_n_eig
+ use PHOTOLUM,       ONLY:BSS_dipoles_PL,BSS_PL_f
+ !
+ implicit none
+ !
+ complex(SP),intent(in)  :: BS_V_left(BSS_n_eig,BSS_n_eig),BS_V_right(BSS_n_eig,BSS_n_eig)
+ real(SP)   ,intent(out) :: BS_R_PL(2,BSS_n_eig)
+ real(SP)                :: half_BSS_n_eig
+ !
+ ! Workspace
+ !
+ type(PP_indexes)     ::px
+ integer              ::i_l,i_c,i_K,i_lr,i_ll, ik
+ complex(SP)          ::P_x_fZ(BS_H_dim,3,2),Pi_x_R_x_adA_left(3),Pi_x_R_x_adA_right(3),adA_x_R_over_R_x_A(2),&
+&                       R_over_R_x_A(2,BS_K_dim(1)),B(BSS_n_eig)
+ !
+ ! Parallel indexes
+ !
+ call PP_indexes_reset(px)
+ call PARALLEL_index(px,(/BSS_n_eig/))
+ !
+ ! sqrt(R_i) Re[ (Pi_i)* A^l_i (A^l_j)* R<_j/R_j A^l_j (A^l_k)* Pi_k ] sqrt(R_k)
+ !
+ ! Zeroing
+ !
+ BS_R_PL       =rZERO
+ P_x_fZ        =cZERO
+ R_over_R_x_A  =cZERO
+ !
+ !
+ ! Live-Timing
+ !
+ call live_timing('PL    residuals',px%n_of_elements(myid+1))
+ write(*,*) "Hermitian run"
+ write(*,*) "BS_H_dim", BS_H_dim
+ write(*,*) "BSS_n_eig", BSS_n_eig
+ write(*,*) "BS_K_dim", BS_K_dim(2)
+ half_BSS_n_eig=BSS_n_eig/2.
+!
+! Hermitian residual
+!====================
+!
+do i_c=1,3
+  P_x_fZ(1:BSS_n_eig,i_c,1)=BSS_dipoles_PL(i_c,1:BSS_n_eig)*sqrt(BSS_eh_f(1:BSS_n_eig)) ! Pi multiplied by (fc-fv) dipoles of
+  !current <(vk|gradCk)> C is conduction
+enddo
+!
+write(*,*) 'BS_K_dim(1)', BS_K_dim(1)
+write(*,*) 'shape(BSS_eh_f)', shape(BSS_eh_f)
+write(*,*) 'shape(BS_V_right)', shape(BS_V_right)
+write(*,*) 'shape(BSS_PL_f)', shape(BSS_PL_f)
+!write(*,*) 'BSS_V_right(577,576)', BS_V_right(577,576)
+do i_l=1,BSS_n_eig
+  !
+  if (.not.px%element_1D(i_l)) cycle
+  !                       
+  ! R_over_R_x_A = R<_j/R_j A^l_j
+  !
+  do ik = 1, BS_K_dim(1)
+      if (BSS_eh_f(ik) .gt. rZERO) then
+      R_over_R_x_A(1,ik) =  BSS_PL_f(ik)*BS_V_right(ik,i_l)/BSS_eh_f(ik)
+      R_over_R_x_A(2,ik) =  BSS_PL_f(BS_K_dim(1)+ik)*BS_V_right(ik,i_l)/BSS_eh_f(ik)
+    endif
+  enddo
+  !
+  ! adA_x_R_over_R_x_A = (A^l_j)* R<_j/R_j A^l_j
+  !
+  adA_x_R_over_R_x_A(1)=V_dot_V_omp(BS_K_dim(1),conjg(BS_V_right(1:half_BSS_n_eig,i_l)),&
+          R_over_R_x_A(1,1:half_BSS_n_eig)) !resonant
+  adA_x_R_over_R_x_A(2)=V_dot_V_omp(BS_K_dim(1),conjg(BS_V_right(half_BSS_N_eig+1:BS_K_dim(1),i_l)),&
+          R_over_R_x_A(2,half_BSS_n_eig:BS_K_dim(1)))
+  !antiresonant, no error if two arrays have different dimensions
+  !
+  do i_c=1,3
+    !                                   
+    ! Pi_x_R_x_adA_right =   Pi sqrt(R)  A^l_i
+    !
+    Pi_x_R_x_adA_right(i_c)=V_dot_V_omp(BS_K_dim(1),BS_V_right(1:BS_K_dim(1),i_l),P_x_fZ(1:BS_K_dim(1),i_c,1))
+    !
+  enddo
+  !
+  ! Note that the 2nd component (anti-resonant) should take a -1  from BSS_eh_f and a i^2 from sqrt(R)
+  ! that cancel out.
+  !
+  BS_R_PL(:,i_l)=real(dot_product(Pi_x_R_x_adA_right,Pi_x_R_x_adA_right)*adA_x_R_over_R_x_A(:))
+  !
+  call live_timing(steps=1)
+  !
+enddo
+!
+call live_timing()
+!
+call PP_redux_wait(BS_R_PL)
+!
+call PP_indexes_reset(px)
+!
+end subroutine PL_diago_herm_residual
+
+subroutine PL_diago_non_herm_residual(BS_V_left,BS_V_right,BS_R_PL,BS_overlap) 
+ !
+ ! Non-Hermitian residual
+ !========================
+ !
+  !
+ ! Resonant PL Residuals
+ !
+ use pars,           ONLY:SP,rZERO,cZERO,cI
+ use wrapper_omp,    ONLY:V_dot_V_omp,Vstar_dot_V_omp
+ use parallel_m,     ONLY:PP_indexes,myid,PP_indexes_reset
+ use parallel_int,   ONLY:PP_redux_wait,PARALLEL_index 
+ use LIVE_t,         ONLY:live_timing
+ use BS,             ONLY:BS_H_dim,BS_K_dim
+ use BS_solvers,     ONLY:BSS_eh_f,BSS_n_eig
+ use PHOTOLUM,       ONLY:BSS_dipoles_PL,BSS_PL_f
+ !
+ implicit none
+ !
+ complex(SP),intent(in)  :: BS_V_left(BS_H_dim,BSS_n_eig),BS_V_right(BS_H_dim,BSS_n_eig)
+ real(SP)   ,intent(out) :: BS_R_PL(2,BSS_n_eig)
+ complex(SP),optional,intent(in)  :: BS_overlap(BSS_n_eig,BSS_n_eig)
+ !
+ ! Workspace
+ !
+ type(PP_indexes)     ::px
+ integer              ::i_l,i_c,i_K,i_lr,i_ll, ik
+ complex(SP)          ::P_x_fZ(BS_H_dim,3,2),Pi_x_R_x_adA_left(3),Pi_x_R_x_adA_right(3),adA_x_R_over_R_x_A(2),&
+&                       R_over_R_x_A(2,BS_H_dim),B(BS_H_dim)
+ !
+ ! Parallel indexes
+ !
+ call PP_indexes_reset(px)
+ call PARALLEL_index(px,(/BSS_n_eig/))
+ !
+ ! sqrt(R_i) Re[ (Pi_i)* A^l_i (A^l_j)* R<_j/R_j A^l_j (A^l_k)* Pi_k ] sqrt(R_k)
+ !
+ ! Zeroing
+ !
+ BS_R_PL       =rZERO
+ P_x_fZ        =cZERO
+ R_over_R_x_A  =cZERO
+ !
+ !
+ ! Live-Timing
+ !
+ call live_timing('PL    residuals',px%n_of_elements(myid+1))
+ write(*,*) "Non-Hermitian run"
+ write(*,*) "BS_H_dim", BS_H_dim
+ write(*,*) "BSS_n_eig", BSS_n_eig
+   !
+   ! 1=> left
+   ! 2=> right
+   !
+  do i_c=1,3
+    do i_K=1,BS_K_dim(1)
+      P_x_fZ(i_K,i_c,1)=conjg(BSS_dipoles_PL(i_c,i_K))*sqrt(BSS_eh_f(i_K))
+      P_x_fZ(i_K,i_c,2)=      BSS_dipoles_PL(i_c,i_K) *sqrt(BSS_eh_f(i_K))
+    enddo
+    do i_K=BS_K_dim(1)+1,BS_H_dim
+      P_x_fZ(i_K,i_c,1)=cI*conjg(BSS_dipoles_PL(i_c,i_K))*sqrt(BSS_eh_f(i_K-BS_K_dim(1)))
+      P_x_fZ(i_K,i_c,2)=cI*      BSS_dipoles_PL(i_c,i_K) *sqrt(BSS_eh_f(i_K-BS_K_dim(1)))
+    enddo
+  enddo
+  !
+  do i_lr=1,BS_H_dim
+    !
+    if (.not.px%element_1D(i_lr)) cycle
+    !
+    ! B_i = \sum_lp Overlap^*(lp,l) A^ll_i
+    !
+    B=cZERO
+    do i_ll=1,BS_H_dim
+      B(:)=B(:)+conjg(BS_overlap(i_ll,i_lr))*BS_V_left(:,i_ll)
+    enddo
+    !
+    ! R_over_R_x_A = R<_j/R_j B^l_j
+    !
+    write (*,*) "shape(BSS_PL_f)", shape(BSS_PL_f)
+    R_over_R_x_A(1,:) = BSS_PL_f(:)*B(:)/BSS_eh_f(:)
+    !
+    ! adA_x_R_over_R_x_A = B^l*_j    R<_j/R_j B^l_j
+    !
+    adA_x_R_over_R_x_A(1)=Vstar_dot_V_omp(BS_H_dim,B,R_over_R_x_A(1,:))
+    !
+    do i_c=1,3
+      !
+      ! Pi_x_R_x_adA_left =   sqrt(R) Pi_i* A^lr_i
+      !
+      Pi_x_R_x_adA_left(i_c) =V_dot_V_omp(BS_H_dim,P_x_fZ(:,i_c,1),BS_V_right(:,i_lr))
+      !
+      ! Pi_x_R_x_adA_right=   A^lr*_i Pi_i sqrt(R)  
+      !
+      Pi_x_R_x_adA_right(i_c)=Vstar_dot_V_omp(BS_H_dim,BS_V_right(:,i_lr),P_x_fZ(:,i_c,2))
+      !
+    enddo
+    !
+    BS_R_PL(1,i_lr)=real((Pi_x_R_x_adA_left(1)*Pi_x_R_x_adA_right(1)+&
+&                          Pi_x_R_x_adA_left(2)*Pi_x_R_x_adA_right(2)+&
+&                          Pi_x_R_x_adA_left(3)*Pi_x_R_x_adA_right(3))*adA_x_R_over_R_x_A(1))
+    !
+    call live_timing(steps=1)
+    !
+  enddo
+ !
+ call live_timing()
+ !
+ call PP_redux_wait(BS_R_PL)
+ !
+ call PP_indexes_reset(px)
+ !
+
+ end subroutine PL_diago_non_herm_residual
diff --git a/sbin/.clang-format b/sbin/.clang-format
new file mode 100644
index 0000000000..7a41564904
--- /dev/null
+++ b/sbin/.clang-format
@@ -0,0 +1,6 @@
+BasedOnStyle: Google 
+IndentWidth: 4
+BreakBeforeBraces: Allman
+AlignAfterOpenBracket: Align
+InsertNewlineAtEOF : true
+InsertBraces : true
diff --git a/sbin/clang_format.sh b/sbin/clang_format.sh
new file mode 100755
index 0000000000..49773492c3
--- /dev/null
+++ b/sbin/clang_format.sh
@@ -0,0 +1,3 @@
+CLANG_FMT=clang-format
+$CLANG_FMT -i -style=file \
+      ../src/*/*.c ../include/driver/*.h ../include/headers/parser/*.h 
diff --git a/sbin/compilation/check_updated_locks.sh b/sbin/compilation/check_updated_locks.sh
index 7c9ac7c8d7..6de5f680e3 100755
--- a/sbin/compilation/check_updated_locks.sh
+++ b/sbin/compilation/check_updated_locks.sh
@@ -63,15 +63,24 @@ fi
 if [ "$DIR_restored" == "yes" ] ; then
  source ./sbin/compilation/verbosity.sh "check_updated_locks.sh: $dir has been restored"
 fi
+
+
+if [[ "$unmatched" == *"DOUBLE"* ]]; then
+ # in this case I already recompile the whole
+ # source, no need to check for other locks
+ unmatched="DOUBLE";
+fi
 #
 # tag new objects to be compiled
 #
+files_done=""
 for lock in $unmatched
 do
  #
  if test -f "$dir/${lock}_project.dep"; then
   deps=`cat $dir/${lock}_project.dep`
   for dep_file in $deps; do
+   if [[ "$files_done" == *"$dep_file"* ]]; then continue; fi
    source ./sbin/compilation/verbosity.sh "check_updated_locks.sh: $dep_file must be recompiled"
    source ./sbin/compilation/name_me.sh $dir/$dep_file "no_search"
    if [ ! "$DIR_restored" == "yes" ] ; then
@@ -81,6 +90,7 @@ do
     source ./sbin/compilation/object_remove.sh "remove" "locks"
     continue;
    fi
+   files_done="$file $files_done"
    source ./sbin/compilation/check_object_childs.sh "locks"
   done
  fi
diff --git a/sbin/compilation/dependencies_project.sh b/sbin/compilation/dependencies_project.sh
index 0ebb517cbe..b6b585fd56 100755
--- a/sbin/compilation/dependencies_project.sh
+++ b/sbin/compilation/dependencies_project.sh
@@ -32,8 +32,15 @@ if [ ${#sources} -eq 2 ]; then
   continue 
 fi
 #
+if [ !  -f .objects ] ; then
+  #this is needed to avoid stc/tools where .objects.in exist in place of .objects
+  cd $BASE
+  continue 
+fi
+
 cp .objects objects.c
 $cpp $cppflags $precomp_flags objects.c  > no_pj.mk
+
 #
 # Projects 
 #==========
diff --git a/sbin/compilation/libraries.sh b/sbin/compilation/libraries.sh
index 2dbb4a3721..c68584aa79 100755
--- a/sbin/compilation/libraries.sh
+++ b/sbin/compilation/libraries.sh
@@ -17,7 +17,7 @@ do
 done
 #
 llocal="-lqe_pseudo -lmath77 -lslatec -llocal"
-lPLA="\$(lydiago) \$(lelpa) \$(lscalapack) \$(lblacs) \$(llapack) \$(lblas)"
+lPLA="\$(lldiago) \$(lelpa) \$(lscalapack) \$(lblacs) \$(lmagma) \$(llapack) \$(lblas)"
 lSL="\$(lslepc) \$(lpetsc)"
 lIO="\$(liotk) \$(lnetcdff) \$(lnetcdf) \$(lhdf5)"
 lextlibs="\$(llibxc) \$(lfft) \$(lfutile) \$(lyaml)  \$(ldevxlib) \$(llapack) \$(lblas) \$(lcudalib)"
diff --git a/sbin/developer_tools/move_include_files.sh b/sbin/developer_tools/move_include_files.sh
new file mode 100755
index 0000000000..d43658e032
--- /dev/null
+++ b/sbin/developer_tools/move_include_files.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+for file in */*/*.F; do
+
+ if [ "$file" == "interfaces/p2y/num_interpolation_module.F" ]; then continue; fi
+ if [ "$file" == "interfaces/p2y/pw_pseudo_module.F" ]; then continue; fi
+ if [ "$file" == "src/Ymodules/mod_cufft.F" ]; then continue; fi
+ if [ "$file" == "src/Ymodules/mod_cusolverdn_y.F" ]; then continue; fi
+ if [ "$file" == "src/Ymodules/mod_interfaces.F" ]; then continue; fi
+ if [ "$file" == "src/bse/K_diago_driver.F" ]; then continue; fi
+ if [[ "$file" == *"_incl.F"* ]]; then continue; fi
+
+ if grep -q "! headers" "$file"; then continue; fi
+
+ echo "Checking file $file"
+
+awk '
+{
+    lines[NR] = $0;  # Store each line in an array
+}
+END {
+    for (i = 1; i <= NR; i++) {
+        if (lines[i] ~ /! Authors/) {
+            print lines[i];  # Print the line with ! Authors
+	    k = 0;
+            for (j = 1; j <= NR; j++) {
+                if (lines[j] ~ /#include/) {
+		    k++ ;
+		    if (k == 1) {
+		      print "!";
+		      print "! headers";
+		      print "!";
+                      print lines[j];  # Print #include lines
+		      line_printed[k++]=lines[j]
+		    } else {
+		      print_line = 1;
+		      for (kp = 1; kp < k; kp++) {
+		      if ( lines[j] == line_printed[kp] ){ print_line = 0 }
+		      }
+		      if (print_line == 1){
+			print lines[j]
+		        line_printed[k++]=lines[j]
+  		      }
+		    }
+                }
+            }
+        }
+        if (lines[i] !~ /! Authors/ && lines[i] !~ /#include/) {
+            print lines[i];  # Print other lines
+        }
+        if (lines[i] ~ /#include<y_memory.h>/ || lines[i] ~ /#include <y_memory.h>/) {
+            print " use y_memory_alloc"   ;
+	    print " !";
+            print " implicit none";  # Resume implicit none
+        }
+    }
+}
+' $file > $file.tmp
+mv $file.tmp $file
+done
+
diff --git a/src/Yio/io_fragment.F b/src/Yio/io_fragment.F
index 50ae930f15..ccf1ddf2a0 100644
--- a/src/Yio/io_fragment.F
+++ b/src/Yio/io_fragment.F
@@ -44,7 +44,7 @@ subroutine io_fragment(ID,ID_frag,i_fragment,j_fragment,extension,ierr)
  select case (trim(io_raw_extension(ID)))
    case("Vnl","wf","kb_pp","kb_pp_pwscf")
      if (.not.frag_WF) return
-   case("kindx")
+   case("kindx","kindx_pq")
      if (.not.frag_QINDX) return
    case("dip_iR_and_P","Overlaps")
      if (.not.frag_DIP) return
diff --git a/src/Yio/io_fragment_disconnect.F b/src/Yio/io_fragment_disconnect.F
index 8242ca1458..2d92ccca8b 100644
--- a/src/Yio/io_fragment_disconnect.F
+++ b/src/Yio/io_fragment_disconnect.F
@@ -22,7 +22,7 @@ subroutine io_fragment_disconnect(ID,ID_frag)
  select case (trim(io_raw_extension(ID)))
    case("Vnl","wf","kb_pp","kb_pp_pwscf")
      if (.not.frag_WF) return
-   case("kindx")
+   case("kindx","kindx_pq")
      if (.not.frag_QINDX) return
    case("dip_iR_and_P","Overlaps")
      if (.not.frag_DIP) return
diff --git a/src/acfdt/acfdt_tot_energy.F b/src/acfdt/acfdt_tot_energy.F
index 1011de7705..c262898f04 100644
--- a/src/acfdt/acfdt_tot_energy.F
+++ b/src/acfdt/acfdt_tot_energy.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine acfdt_tot_energy(Xen,Xk,En,k,q,X,Dip)
  !
  use pars,           ONLY:SP,schlen,pi,DP
@@ -32,8 +36,9 @@ subroutine acfdt_tot_energy(Xen,Xk,En,k,q,X,Dip)
  use xc_functionals, ONLY:E_xc,XC_EXCHANGE_CORRELATION,XC_LDA_C_KP,XC_EXCHANGE,&
 &                         XC_LDA_X,XC_CORRELATION,XC_potential_driver
  use global_XC,      ONLY:WF_xc_functional
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)  ::Xen,En
  type(bz_samp) ::Xk,q,k
@@ -222,7 +227,7 @@ subroutine acfdt_tot_energy(Xen,Xk,En,k,q,X,Dip)
          enddo
          call LINEAR_ALGEBRA_driver(INV,M=tddftk,lib_in=USE_LK)
          X_s=matmul(tddftk,X_mat(:,:,iw))
-         forall(i1=1:X%ng) dX(i1)=(X_s(i1,i1)-X_mat(i1,i1,iw))*4.*pi*isc%gamp(i1,1)
+	 forall (i1=1:X%ng) dX(i1)=(X_s(i1,i1)-X_mat(i1,i1,iw))*4.*pi*isc%gamp(i1,1)
          ACFDT_Ec=ACFDT_Ec-q%nstar(iqibz)*lambda_weight(il)*freq_weight(iw)*sum(dX)
          call live_timing(steps=1)
        enddo
diff --git a/src/allocations/DIPOLE_ALLOC_elemental.F b/src/allocations/DIPOLE_ALLOC_elemental.F
index 76213ed5d7..48b071a77d 100644
--- a/src/allocations/DIPOLE_ALLOC_elemental.F
+++ b/src/allocations/DIPOLE_ALLOC_elemental.F
@@ -5,7 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
 #include <dev_defs.h>
+#include <y_memory.h>
+!
 !
 subroutine DIPOLE_ALLOC_elemental(what,d)
  !
@@ -14,9 +18,9 @@ subroutine DIPOLE_ALLOC_elemental(what,d)
  use electrons,      ONLY:n_sp_pol,n_spinor
  use pars,           ONLY:cZERO
  use gpu_m,          ONLY:have_gpu
+ use y_memory_alloc
  !
-#include <dev_defs.h>
-#include <y_memory.h>
+ implicit none
  !
  character(*)                :: what
  integer, optional           :: d(:)
diff --git a/src/allocations/DIPOLE_ALLOC_global.F b/src/allocations/DIPOLE_ALLOC_global.F
index ac78ab08be..6a1072081a 100644
--- a/src/allocations/DIPOLE_ALLOC_global.F
+++ b/src/allocations/DIPOLE_ALLOC_global.F
@@ -25,7 +25,6 @@ subroutine DIPOLE_ALLOC_global(D,NK)
    call DIPOLE_ALLOC_elemental('DIP_orbital')
    call DIPOLE_ALLOC_elemental('DIP_spin')
    call DIPOLE_ALLOC_elemental('DIP_P_spinor')
-   call DIPOLE_ALLOC_elemental('DIP_S')
    call DIPOLE_ALLOC_elemental('P_square') 
    return
  endif
@@ -34,12 +33,11 @@ subroutine DIPOLE_ALLOC_global(D,NK)
  call DIPOLE_ALLOC_elemental('DIP_v' ,(/3,D%ib_lim(2),D%ib(2),D%ib(1),D%ib_lim(1),NK/))
  call DIPOLE_ALLOC_elemental('DIP_P' ,(/3,D%ib_lim(2),D%ib(2),D%ib(1),D%ib_lim(1),NK/))
  !
- if (STRING_match(D%computed,"M_orb").and.STRING_match(D%computed,"M_it")) then
+ if (STRING_match(D%computed,"M_orbRT").and.STRING_match(D%computed,"M_it")) then
    call DIPOLE_ALLOC_elemental('DIP_orbital',(/3,D%ib_lim(2),D%ib(2),D%ib(1),D%ib_lim(1),NK,2/))
  endif
  !
- if ((STRING_match(D%computed,"M_orb").or.STRING_match(D%computed,"M_CD_orb")).and.&
-&     .not.STRING_match(D%computed,"M_it")) then
+ if ((STRING_match(D%computed,"M_orbCD"))) then
    call DIPOLE_ALLOC_elemental('DIP_orbital',(/3,D%ib_lim(2),D%ib(2),D%ib(1),D%ib_lim(1),NK,1/))
  endif
  !
diff --git a/src/allocations/ELPH_alloc.F b/src/allocations/ELPH_alloc.F
index 27bb436030..7179d12b84 100644
--- a/src/allocations/ELPH_alloc.F
+++ b/src/allocations/ELPH_alloc.F
@@ -5,21 +5,26 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine ELPH_alloc(what,GKKP,Nb,Nm,Nk_mem,Nq_mem)
  !
- use pars,          ONLY:rZERO,cZERO,schlen
+ use pars,          ONLY:SP,rZERO,cZERO,schlen
  use D_lattice,     ONLY:n_atoms
  use R_lattice,     ONLY:nkbz,nqbz
  use electrons,     ONLY:E_reset
  use QP_m,          ONLY:QP_n_states
  use ELPH,          ONLY:GKKP_me,PH_pol_vector,ph_modes,PH_freqs_sq,&
 &                        PH_acoustic_speed,PH_acoustic_branch,gsqF_life_f,gsqF_life_bose,gsqF_fan,&
-&                        gsqF_ca_corr,gsqF_dw,PH_Q_modulus,PH_qpt,elph_nb,elph_nQ,eval_G_using_KK,&
+&                        gsqF_ca_corr,gsqF_dw,PH_Q_modulus,PH_qpt,elph_bands,elph_nQ,eval_G_using_KK,&
 &                        gsqF_energy_steps,PH_kpt_bz,elph_nk_bz,elph_use_q_grid,FineGd_E_kpq_components_reset
  use parallel_m,    ONLY:PAR_nQ_bz
  use stderr,        ONLY:STRING_split
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  character(*)            ::what
  type(GKKP_me), optional ::GKKP
@@ -34,7 +39,7 @@ subroutine ELPH_alloc(what,GKKP,Nb,Nm,Nk_mem,Nq_mem)
  !
  if (present(GKKP)) then
    GKKP%Nq_mem=1
-   GKKP%Nb=(/1,elph_nb/)
+   GKKP%Nb=elph_bands
    GKKP%Nmodes=(/1,ph_modes/)
    GKKP%Nk_mem=nkbz
    if (present(Nq_mem)) then
@@ -73,8 +78,9 @@ subroutine ELPH_alloc(what,GKKP,Nb,Nm,Nk_mem,Nq_mem)
        if (.not.allocated(GKKP%E_kpq)) then
          allocate(GKKP%E_kpq(elph_nQ))
          do iq=1,elph_nQ
-           YAMBO_ALLOC(GKKP%E_kpq(iq)%E,(elph_nb,nkbz,1))
-           YAMBO_ALLOC(GKKP%E_kpq(iq)%f,(elph_nb,nkbz,1))
+           YAMBO_ALLOC(GKKP%E_kpq(iq)%E,(GKKP%Nb(1):GKKP%Nb(2),nkbz,1))
+           YAMBO_ALLOC(GKKP%E_kpq(iq)%f,(GKKP%Nb(1):GKKP%Nb(2),nkbz,1))
+           GKKP%E_kpq(iq)%E=0._SP
          enddo
        endif
      case ('DGRID')
diff --git a/src/allocations/MPA_ALLOC_parallel.F b/src/allocations/MPA_ALLOC_parallel.F
index 42356b539b..3ee3336d79 100644
--- a/src/allocations/MPA_ALLOC_parallel.F
+++ b/src/allocations/MPA_ALLOC_parallel.F
@@ -5,14 +5,19 @@
 !
 ! Authors (see AUTHORS file for details): MB, AF
 !
+! headers
+!
+#include <y_memory.h>
+!
 subroutine MPA_ALLOC_parallel(MPA_E_par,MPA_R_par,NG,NW,mode)
   !
   use pars,          ONLY:SP,DP,cZERO,cONE,cI
   use matrix,        ONLY:MATRIX_reset,PAR_matrix
   use parallel_int,  ONLY:PARALLEL_live_message
   use gpu_m,         ONLY:have_gpu
+  use y_memory_alloc
   !
-#include <y_memory.h>
+  implicit none
   !
   type(PAR_matrix) :: MPA_E_par,MPA_R_par
   integer          :: NG,NW
@@ -30,7 +35,6 @@ subroutine MPA_ALLOC_parallel(MPA_E_par,MPA_R_par,NG,NW,mode)
   !
   ! Type of allocation....
   !
-  !l_XUP      = index(mode,"XUP")>0
   l_CPU_ONLY = index(mode,"CPU_ONLY")>0
   !
   ! Initialization
diff --git a/src/allocations/X_ALLOC_elemental.F b/src/allocations/X_ALLOC_elemental.F
index 993978d8d9..b1fac2e70f 100644
--- a/src/allocations/X_ALLOC_elemental.F
+++ b/src/allocations/X_ALLOC_elemental.F
@@ -5,18 +5,23 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
 #include<dev_defs.h>
+#include<y_memory.h>
+!
 !
 subroutine X_ALLOC_elemental(WHAT,DIM)
  !
  use pars,          ONLY:rZERO,cZERO
  use X_m,           ONLY:Resp_ii,Resp_ij,Epsilon_ii,Joint_DOS,BS_E_sorted,BS_E_sorted_indx, &
-&                        Epsilon_ij,X_magnons,X_dichroism,                  &
+&                        Epsilon_ij,X_magnons,X_dichroism,X_mespin,X_meorb,&
 &                        X_fxc,EEL,Alpha,X_mat,DEV_VAR(X_mat),X_drude_term,X_RLcomp_ig
  use PHOTOLUM,      ONLY:PL
  use stderr,        ONLY:STRING_match
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  character(*)      :: WHAT
  integer, optional :: DIM(:)
@@ -73,6 +78,14 @@ subroutine X_ALLOC_elemental(WHAT,DIM)
 #ifdef _GPU
      YAMBO_ALLOC_GPU_SOURCE(DEV_VAR(X_mat),X_mat)
 #endif
+   case('MEspin')
+     if (allocated(X_mespin)) return
+     YAMBO_ALLOC(X_mespin,(DIM(1),DIM(2)))
+     X_mespin = cZERO
+   case('MEorb')
+     if (allocated(X_meorb)) return
+     YAMBO_ALLOC(X_meorb,(DIM(1),DIM(2)))
+     X_meorb = cZERO
    end select
    return
  endif
@@ -103,6 +116,12 @@ subroutine X_ALLOC_elemental(WHAT,DIM)
  if (STRING_match(WHAT,"JDOS").or.STRING_match(WHAT,"ALL")) then
    YAMBO_FREE(Joint_DOS)
  endif
+ if (STRING_match(WHAT,"MEspin").or.STRING_match(WHAT,"ALL")) then
+   YAMBO_FREE(X_mespin)
+ endif
+ if (STRING_match(WHAT,"MEorb").or.STRING_match(WHAT,"ALL")) then
+   YAMBO_FREE(X_meorb)
+ endif
  if (STRING_match(WHAT,"BS_E_SORTED").or.STRING_match(WHAT,"ALL")) then
    YAMBO_FREE(BS_E_sorted)
    YAMBO_FREE(BS_E_sorted_indx)
diff --git a/src/allocations/X_ALLOC_global.F b/src/allocations/X_ALLOC_global.F
index 3a385c4bbe..85fffef618 100644
--- a/src/allocations/X_ALLOC_global.F
+++ b/src/allocations/X_ALLOC_global.F
@@ -5,18 +5,24 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine X_ALLOC_global(WHAT,W)
  !
  use pars,          ONLY:cZERO
  use ALLOC,         ONLY:X_ALLOC_elemental
  use X_m,           ONLY:Resp_ii,Resp_ij,Epsilon_ii,Epsilon_ij,&
-&                        Alpha,Joint_DOS,X_magnons,X_dichroism,EEL,X_do_obs
+&                        Alpha,Joint_DOS,X_magnons,X_dichroism,EEL,X_do_obs,&
+&                        X_mespin,X_meorb
  use frequency,     ONLY:w_samp
  use drivers,       ONLY:l_optics
  use BS_solvers,    ONLY:B_Hall
  use PHOTOLUM,      ONLY:PL,PL_init
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  character(*)   :: WHAT
  type(w_samp)   :: W
@@ -75,6 +81,14 @@ subroutine X_ALLOC_global(WHAT,W)
    if (X_do_obs("Esort")) call X_ALLOC_elemental("BS_E_SORTED",(/1000,9,2/))
    if (X_do_obs("fxc")) call X_ALLOC_elemental("FXC",(/W%n_freqs/))
    !
+   if (X_do_obs("MEspin")) then
+     call X_ALLOC_elemental("MEspin",(/W%n_freqs,4/))
+     X_mespin(:,1)=W%p(:)
+   endif
+   if (X_do_obs("MEorb")) then
+     call X_ALLOC_elemental("MEorb",(/W%n_freqs,4/))
+     X_meorb(:,1)=W%p(:)
+   endif
  else if (WHAT=="FREE") then
    call X_ALLOC_elemental("FREE ALL")
  endif
diff --git a/src/allocations/X_ALLOC_parallel.F b/src/allocations/X_ALLOC_parallel.F
index 6b9849761d..1c5df7bf05 100644
--- a/src/allocations/X_ALLOC_parallel.F
+++ b/src/allocations/X_ALLOC_parallel.F
@@ -5,6 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<dev_defs.h>
+#include<y_memory.h>
+!
 subroutine X_ALLOC_parallel(X_par,NG,NW,mode)
  !
  use pars,          ONLY:cZERO
@@ -14,9 +19,9 @@ subroutine X_ALLOC_parallel(X_par,NG,NW,mode)
  use parallel_int,  ONLY:PARALLEL_live_message
  use gpu_m,         ONLY:have_gpu
  use devxlib,       ONLY:devxlib_memset_d
+ use y_memory_alloc
  !
-#include<dev_defs.h>
-#include<y_memory.h>
+ implicit none
  !
  type(PAR_matrix) :: X_par
  integer          :: NG,NW
@@ -24,24 +29,18 @@ subroutine X_ALLOC_parallel(X_par,NG,NW,mode)
  !
  ! Work Space
  !
- logical :: l_XUP,l_PAR,l_CPU_ONLY
- !
- ! Reset...
- !
- call MATRIX_reset(X_par)
+ logical :: l_PAR,l_CPU_ONLY,l_INI_ONLY
  !
  ! Type of allocation....
  !
- l_XUP      = index(mode,"XUP")>0
- l_PAR      = index(mode,"X")>0.and.index(mode,"XUP")==0
+ l_PAR      = index(mode,"X")>0
  l_CPU_ONLY = index(mode,"CPU_ONLY")>0
+ l_INI_ONLY = index(mode,"INIT_ONLY")>0
  !
  ! Initialization
  !
- if (l_XUP) then
-   call MATRIX_init( "XUP", X_par, NG, NW )
- else if (l_PAR) then
-   call MATRIX_init( "X"  , X_par, NG, NW )
+ if (l_PAR) then
+   call MATRIX_init( "X", X_par, NG, NW )
  endif
  !
  X_rows = X_par%rows
@@ -49,6 +48,8 @@ subroutine X_ALLOC_parallel(X_par,NG,NW,mode)
  X_cols = X_par%cols
  !DEV_ACC update device(X_cols)
  !
+ if (l_INI_ONLY) return
+ !
  YAMBO_ALLOC(X_par%blc,(X_par%rows(1):X_par%rows(2),X_par%cols(1):X_par%cols(2),NW))
  X_par%blc=cZERO
  if (have_gpu.and..not.l_CPU_ONLY) then
diff --git a/src/bse/.objects b/src/bse/.objects
index 93e2b9c477..9a29d99f58 100644
--- a/src/bse/.objects
+++ b/src/bse/.objects
@@ -6,7 +6,7 @@ SLEPC_objects = K_stored_in_a_slepc_matrix.o K_shell_matrix.o K_stored_in_a_nest
 PARIO_objects = K_compress.o
 #endif
 #if defined _SCALAPACK
-SLK_objects = K_inversion_do_it_SLK.o K_fill_block_cyclic.o YDIAGO_driver.o
+SLK_objects = K_inversion_do_it_SLK.o LDIAGO_driver.o  K_fill_block_cyclic.o
 #endif
 objs =	K_blocks.o K_driver_init.o K_dipoles.o K_IP.o K_IP_sort.o \
         K_Transitions_setup.o K_Transition_check.o \
@@ -15,7 +15,7 @@ objs =	K_blocks.o K_driver_init.o K_dipoles.o K_IP.o K_IP_sort.o \
         K_correlation_kernel_dir.o K_correlation_kernel_std.o K_exchange_kernel.o \
         K_restart.o K_solvers.o K_Haydock.o K_Haydock_response.o K_Haydock_gather.o K_Haydock_scatter.o K_screened_interaction.o \
         K_inversion_do_it_full.o EPS_via_perturbative_inversion.o \
-        K_inversion_driver.o K_diagonal.o K_inversion_Lo.o  K_inversion_restart.o K_inversion_engine.o \
+        K_diagonal.o K_inversion_driver.o K_inversion_Lo.o  K_inversion_restart.o K_inversion_engine.o \
         K_diago_driver.o K_diago_left_residuals.o K_diago_right_residuals.o K_diago_overlap_matrix.o \
         K_diago_perturbative.o K_diago_response_functions.o K_eps_interpolate.o \
         K_observables.o K_multiply_by_V.o K_multiply_by_V_transpose.o K_dot_product.o K_components_folded_in_serial_arrays.o \
diff --git a/src/bse/BSE_utilities.F b/src/bse/BSE_utilities.F
index a9fa0d7756..b3e171c4b6 100644
--- a/src/bse/BSE_utilities.F
+++ b/src/bse/BSE_utilities.F
@@ -5,37 +5,116 @@
 !
 ! Authors (see AUTHORS file for details): AM MG DS
 !
+! headers
+!
 #include<dev_defs.h>
+#include<y_memory.h>
+!
+!
+subroutine build_inverse_BS_eh_table_from_BSS_table(nk)
+ !
+ use electrons,       ONLY:n_sp_pol
+ use BS,              ONLY:BS_H_dim,BS_bands
+ use BS_solvers,      ONLY:BSS_eh_table,BSS_eh_table_m1
+ !
+ implicit none
+ !
+ integer, intent(in) :: nk
+ !
+ integer :: iv,ic,i_H,ik_bz,i_sp_c,i_sp_v
+ !
+ if (.not.allocated(BSS_eh_table_m1)) then
+   allocate(BSS_eh_table_m1(nk,BS_bands(1):BS_bands(2),BS_bands(1):BS_bands(2),n_sp_pol,n_sp_pol))
+   BSS_eh_table_m1=0
+ endif 
+ !call live_timing('BSE inverse eh-table',BS_H_dim)
+ !
+ do i_H=1,BS_H_dim
+   !
+   ik_bz = BSS_eh_table(i_H,1)
+   iv    = BSS_eh_table(i_H,2)
+   ic    = BSS_eh_table(i_H,3)
+   i_sp_c= BSS_eh_table(i_H,4)
+   i_sp_v= BSS_eh_table(i_H,5)
+   !
+   BSS_eh_table_m1(ik_bz,iv,ic,i_sp_c,i_sp_v)=i_H
+   !call live_timing(steps=1)
+   !
+ enddo
+ !call live_timing( )
+ !
+end subroutine build_inverse_BS_eh_table_from_BSS_table
+!
+!
+subroutine build_inverse_BS_eh_table_from_BS_kernel(nk)
+ !
+ use electrons,       ONLY:n_sp_pol
+ use BS,              ONLY:BS_bands,BS_K_dim,BS_blk
+ use BS_solvers,      ONLY:BSS_eh_table_m1
+ !
+ implicit none
+ !
+ integer, intent(in) :: nk
+ !
+ integer :: i_H,ik_bz,iv,ic,i_sp_c,i_sp_v
+ !
+ allocate(BSS_eh_table_m1(nk,BS_bands(1):BS_bands(2),BS_bands(1):BS_bands(2),n_sp_pol,n_sp_pol))
+ BSS_eh_table_m1=0
+ !call live_timing('BSE inverse eh-table',BS_H_dim)
+ !
+ do i_H=1,2*BS_K_dim(1)
+   !
+   ik_bz    = BS_blk(i_H)%table(1,1)
+   iv       = BS_blk(i_H)%table(2,1)
+   ic       = BS_blk(i_H)%table(3,1)
+   i_sp_c   = BS_blk(i_H)%table(4,1)
+   i_sp_v   = BS_blk(i_H)%table(5,1)
+   !
+   BSS_eh_table_m1(ik_bz,iv,ic,i_sp_c,i_sp_v)=i_H
+   !
+   !call live_timing(steps=1)
+   !
+ enddo
+ !call live_timing( )
+ !
+end subroutine build_inverse_BS_eh_table_from_BS_kernel
+!
 !
 subroutine BS_exchange_oscillators_alloc(iG)
  use BS,        ONLY:BS_T_grp,BS_n_g_exch
  use gpu_m,     ONLY:have_gpu
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  integer, intent(in) :: iG
  if (BS_T_grp(iG)%size==0) return
  !DEV_ACC enter data copyin(BS_T_grp(iG))
  YAMBO_ALLOC(BS_T_grp(iG)%O_x,(BS_n_g_exch,BS_T_grp(iG)%size))
  if (have_gpu) then
    YAMBO_ALLOC_GPU(DEV_VAR(BS_T_grp(iG)%O_x),(BS_n_g_exch,BS_T_grp(iG)%size))
- endif   
+ endif
 end subroutine BS_exchange_oscillators_alloc
 !
 subroutine BS_correlation_oscillators_alloc(iB)
  use BS,        ONLY:BS_blk,O_ng
  use gpu_m,     ONLY:have_gpu
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  integer, intent(in) :: iB
  if (BS_blk(iB)%N_oscillators==0) return
  !DEV_ACC enter data copyin(BS_blk(iB))
  YAMBO_ALLOC(BS_blk(iB)%O_c,(O_ng,BS_blk(iB)%N_oscillators))
  if (have_gpu) then
    YAMBO_ALLOC_GPU(DEV_VAR(BS_blk(iB)%O_c),(O_ng,BS_blk(iB)%N_oscillators))
- endif   
+ endif
 end subroutine BS_correlation_oscillators_alloc
 !
 subroutine BS_exchange_oscillators_free(iG_ref)
  use BS,        ONLY:BS_T_grp
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  integer, intent(in) :: iG_ref
  integer :: iG
  do iG=iG_ref,1,-1
@@ -47,7 +126,9 @@ end subroutine BS_exchange_oscillators_free
 !
 subroutine BS_correlation_oscillators_free(iB_ref,l_std_alg)
  use BS,        ONLY:BS_T_grp,l_BSE_minimize_memory,BS_blk,n_BS_blks
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  integer, intent(in) :: iB_ref
  logical, intent(in) :: l_std_alg
  integer :: iB,ik_loop,ip_loop,ik_now,ip_now
@@ -76,8 +157,11 @@ subroutine TDDFT_oscillators_alloc_L(iGL)
  use electrons, ONLY:n_spin,n_spinor
  use BS,        ONLY:BS_T_grp,BS_n_g_fxc
  use gpu_m,     ONLY:have_gpu
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  integer, intent(in) :: iGL
+ !
  if (BS_T_grp(iGL)%size>0) then
    !DEV_ACC enter data copyin(BS_T_grp(iGL))
    YAMBO_ALLOC(BS_T_grp(iGL)%O_tddft_L,(BS_n_g_fxc,BS_T_grp(iGL)%size,n_spinor,n_spinor))
@@ -91,7 +175,9 @@ subroutine TDDFT_oscillators_alloc_R(iGR)
  use electrons, ONLY:n_spin,n_spinor
  use BS,        ONLY:BS_T_grp,BS_n_g_fxc
  use gpu_m,     ONLY:have_gpu
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  integer, intent(in) :: iGR
  if (BS_T_grp(iGR)%size>0) then
    !DEV_ACC enter data copyin(BS_T_grp(iGR))
@@ -104,7 +190,9 @@ end subroutine TDDFT_oscillators_alloc_R
 !
 subroutine TDDFT_oscillators_free_L(iG_ref)
  use BS,        ONLY:BS_T_grp
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  integer, intent(in) :: iG_ref
  integer :: iGL
  do iGL=iG_ref,1,-1
@@ -116,7 +204,9 @@ end subroutine TDDFT_oscillators_free_L
 !
 subroutine TDDFT_oscillators_free_R(iG_ref)
  use BS,        ONLY:BS_T_grp
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  integer, intent(in) :: iG_ref
  integer :: iGR
  do iGR=iG_ref,1,-1
diff --git a/src/bse/DOUBLE_project.dep b/src/bse/DOUBLE_project.dep
index fca1371b0b..274f365e98 100644
--- a/src/bse/DOUBLE_project.dep
+++ b/src/bse/DOUBLE_project.dep
@@ -51,7 +51,7 @@
  K_stored_in_a_big_matrix.o
  K_stored_in_a_nest_matrix.o
  K_stored_in_a_slepc_matrix.o
+ LDIAGO_driver.o
  PL_diago_residual.o
  PL_via_perturbative_inversion.o
- YDIAGO_driver.o
 
diff --git a/src/bse/EPS_via_perturbative_inversion.F b/src/bse/EPS_via_perturbative_inversion.F
index dee4c421cb..1d0e200047 100644
--- a/src/bse/EPS_via_perturbative_inversion.F
+++ b/src/bse/EPS_via_perturbative_inversion.F
@@ -5,7 +5,7 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
-integer function EPS_via_perturbative_inversion(n,m,n_loop,D,C,f,Lo)
+integer function EPS_via_perturbative_inversion(n,m,D,C,f,Lo)
  !
  ! Here I  wante to calculate
  !
@@ -33,7 +33,7 @@ integer function EPS_via_perturbative_inversion(n,m,n_loop,D,C,f,Lo)
  !
  implicit none
  !
- integer,    intent(in)  ::n,m,n_loop
+ integer,    intent(in)  ::n,m
  complex(SP),intent(in)  ::C,D(n),Lo(n,m)
  complex(SP),intent(out) ::f
  !
@@ -51,7 +51,7 @@ integer function EPS_via_perturbative_inversion(n,m,n_loop,D,C,f,Lo)
  !
  R_x_D_nm1(:,1)=Lo(:,1)*D(:)
  fo=C*Vstar_dot_V_omp(n,D,R_x_D_nm1(:,1))
- if(n_loop==2) then
+ if(m==2) then
    R_x_D_nm1(:,2)=Lo(:,2)*conjg(D(:))
    fo=fo+C*V_dot_V_omp(n,D,R_x_D_nm1(:,2))
  endif
@@ -69,17 +69,17 @@ integer function EPS_via_perturbative_inversion(n,m,n_loop,D,C,f,Lo)
    !
    if (.not.K_slk%kind=="SLK") then
      call M_by_V_omp('n',n,BS_mat,R_x_D_nm1(:,1),R_x_D_n(:,1))
-     if (n_loop==2) call M_by_V_omp('n',n,conjg(BS_mat),R_x_D_nm1(:,2),R_x_D_n(:,2))
+     if (m==2) call M_by_V_omp('n',n,conjg(BS_mat),R_x_D_nm1(:,2),R_x_D_n(:,2))
    else
 #if defined _SCALAPACK
      call PARALLEL_M_by_V('n',n,K_slk,R_x_D_nm1(:,1),R_x_D_n(:,1))
-     if (n_loop==2) call PARALLEL_M_by_V('C',n,K_slk,R_x_D_nm1(:,2),R_x_D_n(:,2))
+     if (m==2) call PARALLEL_M_by_V('C',n,K_slk,R_x_D_nm1(:,2),R_x_D_n(:,2))
 #endif
    endif
    !
    R_x_D_n(:,1)=Lo(:,1)*R_x_D_n(:,1)
    delta_f=C*Vstar_dot_V_omp(n,D,R_x_D_n(1:n,1))
-   if (n_loop==2) then 
+   if (m==2) then 
      R_x_D_n(:,2)=Lo(:,2)*R_x_D_n(:,2)
      delta_f=delta_f+C*V_dot_V_omp(n,D,R_x_D_n(:,2))
    endif
diff --git a/src/bse/K_Haydock.F b/src/bse/K_Haydock.F
index 3a55538fe8..7a227eb2df 100644
--- a/src/bse/K_Haydock.F
+++ b/src/bse/K_Haydock.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM MG DS IMA
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine K_Haydock(iq,W,OBS,Xk)
  !
  use pars,           ONLY:SP,schlen,cI,cZERO,cONE
@@ -30,8 +34,9 @@ subroutine K_Haydock(iq,W,OBS,Xk)
  use timing_m,       ONLY:timing
  use descriptors,    ONLY:IO_desc_add
  use R_lattice,      ONLY:bz_samp
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(w_samp), intent(in) :: W
  integer,      intent(in) :: iq
@@ -52,7 +57,7 @@ subroutine K_Haydock(iq,W,OBS,Xk)
  !
  real(SP)           ::reached_threshold
  real(SP)           ::Bf(Haydock_iterMAX+1),Af(Haydock_iterMAX)
- integer            ::ItParity,Cf_size,Haydock_n_converged,Haydock_required_converged
+ integer            ::ItParity,Cf_size,Ares_fac,Haydock_n_converged,Haydock_required_converged
  complex(SP)        ::v0_mod_
  character(schlen)  ::dumb_ch
  logical            ::impose_pseudo_herm_scheme,use_left_and_right,velocity_correction,tmp_converged
@@ -85,15 +90,21 @@ subroutine K_Haydock(iq,W,OBS,Xk)
  use_left_and_right=.false.
  scheme="hermitian"
  if(BS_K_Coupling.or.impose_pseudo_herm_scheme) scheme="pseudo-herm"
- use_left_and_right=trim(OBS)=="kerr".or.trim(OBS)=="dichroism"
+ use_left_and_right=trim(OBS)=="kerr".or.&
+&                   trim(OBS)=="dichroism".or.&
+&                   trim(OBS)=="mespin".or.&
+&                   trim(OBS)=="meorb"
  velocity_correction= trim(global_gauge)=="velocity" .and. (trim(OBS)=="optics" .or. trim(OBS)=="kerr")
  !
  if (isec(2)/=0) sec="="
  if (isec(2)==0) sec="+"
  call section(sec,'Haydock Solver in the '//trim(OBS)//' basis @q'//trim(intc(iq))//" using the "//trim(scheme)//" scheme")
  !
- if (     l_BS_ares_from_res) HAYVEC_size = BS_K_dim(1)
- if (.not.l_BS_ares_from_res) then
+ if (l_BS_ares_from_res) then
+   Ares_fac=2
+   HAYVEC_size = BS_K_dim(1)
+ else
+   Ares_fac=1
    if(.not.BS_K_coupling) HAYVEC_size = BS_K_dim(1)
    if(     BS_K_coupling) HAYVEC_size = BS_H_dim
  endif
@@ -128,6 +139,10 @@ subroutine K_Haydock(iq,W,OBS,Xk)
    call BS_HAYVEC_alloc(W0,BS_nkFGbz_in_Tgrp)
  endif
  !
+ ! For the coupling case if A = -R* (l_BS_ares_from_res=.TRUE.), V0 = (u0, -iu0*)
+ ! At each iteration however the relation changes V1= (u1, iu1*),
+ ! this is why the ItParity is needed 
+ !
  ItParity=1
  it_on_disk=0
  reached_threshold=100._SP
@@ -227,9 +242,12 @@ subroutine K_Haydock(iq,W,OBS,Xk)
  do i_g=1,BS_nT_grps
    if (.not.PAR_IND_T_groups%element_1D(i_g)) cycle
    do i_ColFG=1,BS_nkFGbz_in_Tgrp(i_g)
+     if (trim(OBS)=="jdos")        V0(i_g)%fragment(:,i_ColFG) = 1._SP
      if (trim(OBS)=="optics")      V0(i_g)%fragment(:,i_ColFG) = BS_T_grp(i_g)%dipoles_opt(1,:,1)
      if (trim(OBS)=="kerr")        V0(i_g)%fragment(:,i_ColFG) = BS_T_grp(i_g)%dipoles_opt(2,:,1)
-     if (trim(OBS)=="dichroism")   V0(i_g)%fragment(:,i_ColFG) = BS_T_grp(i_g)%dipoles_dic(1,:,1)
+     if (trim(OBS)=="dichroism")   V0(i_g)%fragment(:,i_ColFG) = BS_T_grp(i_g)%dipoles_orb(1,:,1)
+     if (trim(OBS)=="mespin")      V0(i_g)%fragment(:,i_ColFG) = BS_T_grp(i_g)%dipoles_spin(1,:,1)
+     if (trim(OBS)=="meorb")       V0(i_g)%fragment(:,i_ColFG) = BS_T_grp(i_g)%dipoles_orb(1,:,1)
      if (trim(OBS)=="magnons")     V0(i_g)%fragment(:,i_ColFG) = BS_T_grp(i_g)%dipoles_mag(1,:,1)
      V0(i_g)%fragment(:,i_ColFG) = V0(i_g)%fragment(:,i_ColFG)*sqrt(cmplx(BS_T_grp(i_g)%f_RES,kind=SP))
    enddo
@@ -268,7 +286,11 @@ subroutine K_Haydock(iq,W,OBS,Xk)
      if (.not.PAR_IND_T_groups%element_1D(i_g)) cycle
      do i_ColFG=1,BS_nkFGbz_in_Tgrp(i_g)
        if (trim(OBS)=="kerr") W0(i_g)%fragment(:,i_ColFG) = BS_T_grp(i_g)%dipoles_opt(1,:,1)
-       if (trim(OBS)=="dichroism") W0(i_g)%fragment(:,i_ColFG) = BS_T_grp(i_g)%dipoles_opt(1,:,1)/q0_def_norm
+       if (trim(OBS)=="dichroism".or.&
+&          trim(OBS)=="mespin".or.&
+&          trim(OBS)=="meorb") then 
+         W0(i_g)%fragment(:,i_ColFG) = BS_T_grp(i_g)%dipoles_opt(1,:,1)/q0_def_norm
+       endif
        W0(i_g)%fragment(:,i_ColFG) = W0(i_g)%fragment(:,i_ColFG)*(sqrt(cmplx(BS_T_grp(i_g)%f_RES,kind=SP)))
      enddo
      if(velocity_correction) V_tmp(i_g)%fragment = W0(i_g)%fragment/BS_T_grp(i_g)%E
@@ -302,15 +324,12 @@ subroutine K_Haydock(iq,W,OBS,Xk)
      !
      Cf= cZERO
      !
+     ! First K_multiply_by_V is with parity=-1 in pseudo-hermitian case
+     !
      call K_multiply_by_V(V0,Vnp1,-1,iq,"all")
      call K_dot_product(v0_mod_,V0,Vnp1,1)
      !
-     ! For the coupling case if A = -R* (l_BS_ares_from_res=.FALSE.), V0 = (u0, -iu0*)
-     ! At each iteration however the relation changes V1= (u1, iu1*),
-     ! this is why the ItParity is needed 
-     !
-     if(     l_BS_ares_from_res) Haydock_v0_mod=sqrt(2*real(v0_mod_))
-     if(.not.l_BS_ares_from_res) Haydock_v0_mod=sqrt(  real(v0_mod_))
+     Haydock_v0_mod=sqrt(Ares_fac*real(v0_mod_,SP))
      !
      do i_g=1,BS_nT_grps
        if (.not.PAR_IND_T_Haydock%element_1D(i_g)) cycle
@@ -461,8 +480,7 @@ subroutine K_Haydock(iq,W,OBS,Xk)
      call K_dot_product(v0_mod_,Vn,Vnp1,1) 
      !       = (2*Re(<Vn|Vnp1>))^(1/2) by symmetry,
      !          where the dot_product is just on eh pair space
-     if (     l_BS_ares_from_res) Bf(it+1)=sqrt(2*real(v0_mod_,SP))
-     if (.not.l_BS_ares_from_res) Bf(it+1)=sqrt(  real(v0_mod_,SP))
+     Bf(it+1)=sqrt(Ares_fac*real(v0_mod_,SP))
      ! Simple test for INF
      if( Bf(it+1)==(Bf(it+1)+1._SP) ) call error('Bf=INF likely because some eigenvalue of the BSE is negative.')
      ! Tests for NaN
diff --git a/src/bse/K_Haydock_response.F b/src/bse/K_Haydock_response.F
index 8d59e1c6a0..9e1eb18dcb 100644
--- a/src/bse/K_Haydock_response.F
+++ b/src/bse/K_Haydock_response.F
@@ -3,14 +3,14 @@
 !
 ! Copyright (C) 2020 The Yambo Team
 !
-! Authors (see AUTHORS file for details): AM MG DS IMA
+! Authors (see AUTHORS file for details): AM MG DS IMA TG
 !
 subroutine K_Haydock_response(iq,it,Cf_size,OBS,scheme,W,Af,Bf,Cf,reached_threshold,FG_factor)
  !
  use pars,           ONLY:SP,cZERO,cONE,cI,pi
  use units,          ONLY:SPEED_OF_LIGHT
  use X_m,            ONLY:Resp_ii,Joint_DOS,global_gauge, &
-&                         X_magnons,Resp_ij,X_dichroism,i_G_shift
+&                         X_magnons,Resp_ij,X_dichroism,X_mespin,X_meorb,i_G_shift
  use R_lattice,      ONLY:bare_qpg
  use frequency,      ONLY:w_samp
  use BS,             ONLY:BSE_mode
@@ -226,6 +226,10 @@ subroutine K_Haydock_response(iq,it,Cf_size,OBS,scheme,W,Af,Bf,Cf,reached_thresh
      X_magnons(iw,1,i_resp(:))=-X_o(1:2,1)*Co/(4._SP*pi)
    case("dichroism")
      X_dichroism(iw,i_resp(:))=-X_o(1:2,1)*Co/(4._SP*pi)*(cI/SPEED_OF_LIGHT)
+   case("mespin")
+     X_mespin(iw,i_resp(:))=-X_o(1:2,1)*(-1._SP*Co)/(8._SP*pi)
+   case("meorb")
+     X_mespin(iw,i_resp(:))=-X_o(1:2,1)*(-1._SP*Co)/(8._SP*pi)
    end select
    !
  enddo
@@ -248,6 +252,12 @@ subroutine K_Haydock_response(iq,it,Cf_size,OBS,scheme,W,Af,Bf,Cf,reached_thresh
    case("dichroism")
      sums=(X_dichroism(iw,2)+X_dichroism(iw,4))/2._SP
      diff= X_dichroism(iw,2)-X_dichroism(iw,4)
+   case("mespin")
+     sums=(X_mespin(iw,2)+X_mespin(iw,4))/2._SP
+     diff= X_mespin(iw,2)-X_mespin(iw,4)
+   case("meorb")
+     sums=(X_meorb(iw,2)+X_meorb(iw,4))/2._SP
+     diff= X_meorb(iw,2)-X_meorb(iw,4)
    end select
    !
    update_thresh = abs(real(sums))>0._SP .and. abs(aimag(sums))>0._SP
diff --git a/src/bse/K_IP.F b/src/bse/K_IP.F
index d424a93297..6884cf8efb 100644
--- a/src/bse/K_IP.F
+++ b/src/bse/K_IP.F
@@ -3,7 +3,11 @@
 !
 ! Copyright (C) 2012 The Yambo Team
 !
-! Authors (see AUTHORS file for details): DS AM IMA
+! Authors (see AUTHORS file for details): DS AM IMA TG
+!
+! headers
+!
+#include<y_memory.h>
 !
 subroutine K_IP(iq,Ken,Xk,Dip,W_bss)
  !
@@ -18,12 +22,12 @@ subroutine K_IP(iq,Ken,Xk,Dip,W_bss)
  use electrons,     ONLY:levels,spin_occ,spin,nel,n_sp_pol,n_spinor
  use X_m,           ONLY:Resp_ii,Joint_DOS,alpha_dim,eps_2_alpha,&
 &                        X_drude_term,global_gauge,N_BS_E_sorted,i_G_shift,&
-&                        skip_cond_sum_rule,X_magnons,Resp_ij,X_dichroism
+&                        skip_cond_sum_rule,X_magnons,Resp_ij,X_dichroism,X_mespin,X_meorb
  use BS_solvers,    ONLY:Co_factor,diam_term_exact,para_term_w0,BSS_Wd,BSS_mode,BSS_uses_DbGd,FG_factor
  use BS,            ONLY:BS_T_grp,BS_nkFGbz_in_Tgrp,BS_nT_grps,BS_perturbative_SOC,&
 &                        BS_dip_size,l_BS_jdos,l_BS_esort,l_BS_trace,BS_H_dim,l_BS_kerr_asymm,&
 &                        l_BS_abs,l_BS_kerr,l_BS_magnons,l_BS_dichroism,l_BS_photolum,&
-&                        BS_K_anti_resonant,l_BS_ares_from_res
+&                        BS_K_anti_resonant,l_BS_ares_from_res,l_BS_mespin,l_BS_meorb
  use parallel_int,  ONLY:PP_redux_wait
  use parallel_m,    ONLY:PAR_BS_nT_col_grps,PAR_COM_eh_INDEX,PAR_IND_T_groups,&
 &                        PAR_COM_Xk_ibz_INDEX
@@ -32,8 +36,9 @@ subroutine K_IP(iq,Ken,Xk,Dip,W_bss)
  use BS_solvers,    ONLY:B_Hall
  use PHOTOLUM,      ONLY:PL,PL_prefactor,PL_weights
  use units,         ONLY:SPEED_OF_LIGHT
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer       ::iq
  type(levels)  ::Ken
@@ -44,14 +49,14 @@ subroutine K_IP(iq,Ken,Xk,Dip,W_bss)
  ! Work space
  !
  integer           ::i_T_g,i_T,n_SOC,i_pert_SOC,i_res_ares,i_sort,nVAR,Indexes(5)
- real(SP)          ::f_eh,f_eh_PL,res_PL
+ real(SP)          ::f_eh,f_eh_PL(2),res_PL(2)
  logical           ::dip_v,dip_r
  complex(SP)       ::res_abs,E_plus_W0,E_plus_W(2/n_sp_pol),Z_eh,E_eh,g_fac
  !
  integer       :: i_ColFG
  complex(SP), allocatable :: E_DbGd(:)
  !
- complex(SP)       ::P_weighted(3),res_kerr,res_magn(2),res_dich
+ complex(SP)       ::P_weighted(3),res_kerr,res_magn(2),res_dich,res_mespin,res_meorb
  integer,        allocatable :: vtmpI(:,:)
  complex(SP),    allocatable :: vtmpC(:,:)
  !
@@ -123,6 +128,8 @@ subroutine K_IP(iq,Ken,Xk,Dip,W_bss)
    !
    i_res_ares=BS_T_grp(i_T_g)%i_res_ares
    !
+   if (l_BS_ares_from_res .and. i_res_ares==2) cycle
+   !
    do i_T=1,BS_T_grp(i_T_g)%size
      !
      i_sort =sum(BS_T_grp(BS_T_grp(i_T_g)%i_T_ref:i_T_g-1)%size)+i_T 
@@ -195,21 +202,37 @@ subroutine K_IP(iq,Ken,Xk,Dip,W_bss)
        !
        ! Note that res_PL is always positive as f_eh_PL>0
        !
-       f_eh_PL=BS_T_grp(i_T_g)%f_PL(i_T,1)
-       P_weighted(:)=BS_T_grp(i_T_g)%dipoles_opt(:,i_T,1)*PL_weights(:)
-       res_PL=real(dot_product(P_weighted,P_weighted)*f_eh_PL*Z_eh*E_eh)
+       f_eh_PL=BS_T_grp(i_T_g)%f_PL(i_T,:)
+       res_PL(1)=abs(BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1))**2
+       if(l_BS_trace) then
+         res_PL(1)=abs(BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1))**2+ &
+                  +abs(BS_T_grp(i_T_g)%dipoles_opt(2,i_T,1))**2+ &
+                  +abs(BS_T_grp(i_T_g)%dipoles_opt(3,i_T,1))**2
+       endif
+       res_PL(1)=res_PL(1)*f_eh_PL(1)*Z_eh*E_eh
+       res_PL(2)=res_PL(1)*f_eh_PL(2)*Z_eh*E_eh
      endif
      !
      ! Dichroism
      !
      if(l_BS_dichroism) then
-       res_dich=(BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_dic(1,i_T,1))+     &
-       &         BS_T_grp(i_T_g)%dipoles_opt(2,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_dic(2,i_T,1)))/2._SP
+       ! The dicroism for an oriented sample (DD contribution only) should be
+       ! 1/2 (beta_xx + beta_yy) = 1/2 (x*m_x + y*m_y)
+       ! with
+       ! m_x = r_y*p_z-r_z*p_y
+       ! This follows from PhD thesis ...
+       res_dich=(BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_orb(1,i_T,1))+     &
+       &         BS_T_grp(i_T_g)%dipoles_opt(2,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_orb(2,i_T,1)))/2._SP
+       ! The quadripolar term would be similar, but involving terms of the form
+       ! Q_yz = r_y*p_z+r_z*p_y
+       ! See eq. (45) of PRB 107, 224430 (2025)
+       ! To check why (i) the equations are "hermitianized", and
+       !              (ii) with "mixed gauge" r*p in place of r*r
        if(l_BS_trace) then
          ! x*L = DIP_x*DIP_orbital;
-         res_dich=(BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_dic(1,i_T,1))+     &
-         &         BS_T_grp(i_T_g)%dipoles_opt(2,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_dic(2,i_T,1))+     &
-         &         BS_T_grp(i_T_g)%dipoles_opt(3,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_dic(3,i_T,1)))/3._SP
+         res_dich=(BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_orb(1,i_T,1))+     &
+         &         BS_T_grp(i_T_g)%dipoles_opt(2,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_orb(2,i_T,1))+     &
+         &         BS_T_grp(i_T_g)%dipoles_opt(3,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_orb(3,i_T,1)))/3._SP
        endif
        res_dich=res_dich*f_eh*Z_eh/(q0_def_norm)
        if (l_BS_esort.and.i_res_ares==1) call K_IP_sort("add",i_sort,nVAR,Indexes,vtmpI,res_dich,vtmpC)
@@ -225,11 +248,44 @@ subroutine K_IP(iq,Ken,Xk,Dip,W_bss)
        res_magn=res_magn*f_eh*Z_eh
        if (l_BS_esort.and.i_res_ares==1) then
          call K_IP_sort("add",i_sort,nVAR,Indexes,vtmpI,res_magn(1),vtmpC)
-	 if(n_spinor==2) call K_IP_sort("add",i_sort,nVAR,Indexes,vtmpI,res_magn(2),vtmpC)
+         if(n_spinor==2) call K_IP_sort("add",i_sort,nVAR,Indexes,vtmpI,res_magn(2),vtmpC)
+       endif
+     endif
+     !
+     if (l_BS_mespin) then
+
+       ! Magnetoelectric spin part
+       res_mespin=BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_spin(1,i_T,1))
+       if(l_BS_trace) then
+         ! x*sigma = DIP_x*DIP_spin;
+         res_mespin=(BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_spin(1,i_T,1))+     &
+         &           BS_T_grp(i_T_g)%dipoles_opt(2,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_spin(2,i_T,1))+     &
+         &           BS_T_grp(i_T_g)%dipoles_opt(3,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_spin(3,i_T,1)))/3._SP
+       endif
+       res_mespin=res_mespin*f_eh*Z_eh/(q0_def_norm)
+       if (l_BS_esort.and.i_res_ares==1) then 
+         call K_IP_sort("add",i_sort,nVAR,Indexes,vtmpI,res_mespin,vtmpC)
+       endif
+     endif
+
+     if (l_BS_meorb) then
+       ! Orbital part
+       res_meorb=BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_orb(1,i_T,1))
+       if(l_BS_trace) then
+         ! x*L = DIP_x*DIP_orbital;
+         res_meorb=(BS_T_grp(i_T_g)%dipoles_opt(1,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_orb(1,i_T,1))+     &
+         &          BS_T_grp(i_T_g)%dipoles_opt(2,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_orb(2,i_T,1))+     &
+         &          BS_T_grp(i_T_g)%dipoles_opt(3,i_T,1)*conjg(BS_T_grp(i_T_g)%dipoles_orb(3,i_T,1)))/3._SP
        endif
+       res_meorb=res_meorb*f_eh*Z_eh/(q0_def_norm)
+
+       if (l_BS_esort.and.i_res_ares==1) then 
+         call K_IP_sort("add",i_sort,nVAR,Indexes,vtmpI,res_meorb,vtmpC)
+       endif
+
      endif
      !
-     if ( abs(real(E_plus_W0)) < Dip%Energy_treshold .and. iq==1 .and. dip_r ) then
+     if ( abs(real(E_plus_W0)) < Dip%Energy_threshold .and. iq==1 .and. dip_r ) then
        res_abs =0._SP
        res_kerr=0._SP
        res_PL  =0._SP
@@ -279,9 +335,12 @@ subroutine K_IP(iq,Ken,Xk,Dip,W_bss)
        endif
        !
        if (l_BS_photolum) then
-         PL(:,3)=PL(:,3) +PL_prefactor*res_PL*aimag(-1._SP/(W_bss%p(:)-E_plus_W(i_pert_SOC)))/pi
+         !
+         ! I^PL = L<
+         !  
+         PL(:,3)=PL(:,3) + PL_prefactor*res_PL(1)*aimag(-1._SP/(W_bss%p(:)-E_plus_W(i_pert_SOC)))/pi
          if (BS_K_anti_resonant.and.l_BS_ares_from_res) then
-           PL(:,3)=PL(:,3)-PL_prefactor*res_PL*aimag(-1._SP/(W_bss%p(:)+conjg(E_plus_W(i_pert_SOC))))/pi
+           PL(:,3)=PL(:,3) + PL_prefactor*res_PL(2)*aimag(-1._SP/(W_bss%p(:)+conjg(E_plus_W(i_pert_SOC))))/pi
          endif
        endif
        !
@@ -301,6 +360,19 @@ subroutine K_IP(iq,Ken,Xk,Dip,W_bss)
          endif
        endif
        !
+       if (l_BS_mespin) then
+           X_mespin(:,3)=X_mespin(:,3)-res_mespin/(W_bss%p(:)-E_plus_W(i_pert_SOC))
+           if (BS_K_anti_resonant.and.l_BS_ares_from_res) then
+             X_mespin(:,3)=X_mespin(:,3)+conjg(res_mespin)/(W_bss%p(:)+conjg(E_plus_W(i_pert_SOC)))
+           endif
+       endif
+       !
+       if (l_BS_meorb) then
+           X_meorb(:,3)=X_meorb(:,3)-res_meorb/(W_bss%p(:)-E_plus_W(i_pert_SOC))
+           if (BS_K_anti_resonant.and.l_BS_ares_from_res) then
+             X_meorb(:,3)=X_meorb(:,3)+conjg(res_meorb)/(W_bss%p(:)+conjg(E_plus_W(i_pert_SOC)))
+           endif
+       endif
      enddo
      !
      deallocate(E_DbGd)
@@ -354,6 +426,14 @@ subroutine K_IP(iq,Ken,Xk,Dip,W_bss)
    call PP_redux_wait( X_magnons(:,:,3) ,COMM=PAR_COM_Xk_ibz_INDEX%COMM )
    call PP_redux_wait( X_magnons(:,:,3) ,COMM=PAR_COM_eh_INDEX%COMM )
  endif
+ !
+ if(l_BS_mespin) then
+   call PP_redux_wait( X_mespin(:,3) ,COMM=PAR_COM_Xk_ibz_INDEX%COMM )
+ endif
+ if(l_BS_meorb) then
+   call PP_redux_wait( X_meorb(:,3) ,COMM=PAR_COM_eh_INDEX%COMM )
+ endif
+
  !
  ! Factors and more
  !==================
@@ -374,15 +454,25 @@ subroutine K_IP(iq,Ken,Xk,Dip,W_bss)
    !  beta_ij=-((i*hbar*me)/(omega*qe^2))*G_ij;
    !  DeltaN=((8*pi*N*omega/(3*c))*Tr(beta_ij);
    !  N = molecular density = ??
+   !
+   !  Yambo atomic units
    !  SPEED_OF_LIGHT =  c in a.u.
+   !  hbar=1 ; me=1 ; qe=1 => beta=-i/omega*G_{ij}
+   !
+   !  this is (omega/c)*beta=-i/c*G_{ij}
    !
-   X_dichroism(:,3)=X_dichroism(:,3)*(cI/SPEED_OF_LIGHT)*Co_factor/(4._SP*pi)   !  this is (omega/c)*beta, with beta=-((i*hbar*me)/(omega*qe^2))*G_ij
+   X_dichroism(:,3)=X_dichroism(:,3)*(cI/SPEED_OF_LIGHT)*Co_factor/(4._SP*pi)
    !
  endif
  !
  if (l_BS_magnons) X_magnons(:,:,3)=X_magnons(:,:,3)*Co_factor/(4._SP*pi)
  !
- if (     l_rpa_IP) call K_OUTPUT(iq,W_bss,"open dump IP close reset","optics kerr magnons PL dichroism")
+ if (l_BS_mespin) X_mespin(:,3)=X_mespin(:,3)*(-1._SP*Co_factor/(8._SP*pi))
+ !
+ if (l_BS_meorb) X_meorb(:,3)=X_meorb(:,3)*(-1._SP*Co_factor/(8._SP*pi))
+ !
+ if (     l_rpa_IP) call K_OUTPUT(iq,W_bss,"open dump IP close reset","optics kerr magnons PL dichroism &
+&                                                                      mespin meorb")
  call K_OUTPUT(iq,W_bss,"open dump close reset bare","E_IP E_INDX_IP")
  !
  ! Clean
diff --git a/src/bse/K_IP_sort.F b/src/bse/K_IP_sort.F
index e67771a6d3..1ac252e070 100644
--- a/src/bse/K_IP_sort.F
+++ b/src/bse/K_IP_sort.F
@@ -5,17 +5,22 @@
 !
 ! Authors (see AUTHORS file for details): DS AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine K_IP_sort(what,I,N,Indexes,vI,El,vC)
  !
  use pars,          ONLY:SP,cZERO
  use parallel_m,    ONLY:PAR_COM_eh_INDEX,PAR_COM_Xk_ibz_INDEX
  use X_m,           ONLY:BS_E_sorted,N_BS_E_sorted,BS_E_sorted_indx
  use BS,            ONLY:BS_H_dim,l_BS_esort_indx
+ use BS_solvers,    ONLY:deg_exc_thrshld
  use vec_operate,   ONLY:sort,degeneration_finder
- use units,         ONLY:HA2EV
  use parallel_int,  ONLY:PP_redux_wait
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  character(*)  ::what
  integer       ::I,N,Indexes(5),vI(BS_H_dim,5)
@@ -55,7 +60,10 @@ subroutine K_IP_sort(what,I,N,Indexes,vI,El,vC)
  allocate(n_of_el(EDIM))
  !
  ! Threshold is 1meV
- call degeneration_finder(vR(:EDIM),EDIM,first_el,n_of_el,n_deg_grp,0.001_SP/HA2EV,Include_single_values=.TRUE.)
+ call degeneration_finder(EDIM,first_el,n_of_el,n_deg_grp,&
+ &    Er=vR(:EDIM),deg_accuracy=deg_exc_thrshld,Include_single_values=.TRUE.)
+ !
+ if (any(n_of_el(:)>18)) call warning("More than 18 degenerate transitions detected. Printing only first 18")
  !
  l_print_warning=.false.
  !
diff --git a/src/bse/K_Transition_check.F b/src/bse/K_Transition_check.F
index 68828e57a1..054d54d797 100644
--- a/src/bse/K_Transition_check.F
+++ b/src/bse/K_Transition_check.F
@@ -12,10 +12,11 @@ logical function K_Transition_check(Ken,Dip,iq,ik,ikp,iv,ic,i_sp_c,i_res_ares,&
  use X_m,          ONLY:global_gauge
  use DIPOLES,      ONLY:DIPOLE_t
  use electrons,    ONLY:levels,n_sp_pol,filled_tresh,spin_occ
- use BS,           ONLY:BS_not_const_eh_f,BS_perturbative_SOC,BS_eh_en,l_BS_magnons,l_BS_photolum
+ use BS,           ONLY:BS_not_const_eh_f,BS_perturbative_SOC,BS_eh_en,&
+                        l_BS_magnons,l_BS_photolum,l_BS_res_from_E
 #if defined _RT
- use drivers,      ONLY:l_rt_carriers_in_use
- use RT_control,   ONLY:NEQ_Residuals,NEQ_Kernel,EQ_Transitions,EQ_NoOcc
+ use drivers,      ONLY:l_rt_carriers_in_use,l_rpa_IP
+ use RT_control,   ONLY:NEQ_Residuals,NEQ_Kernel,EQ_Transitions,ALL_NoOcc,EQ_NoOcc
 #endif
  !
  implicit none
@@ -23,7 +24,7 @@ logical function K_Transition_check(Ken,Dip,iq,ik,ikp,iv,ic,i_sp_c,i_res_ares,&
  type(levels),   intent(in) :: Ken
  type(DIPOLE_t), intent(in) :: Dip
  integer,        intent(in) :: iq,ik,ikp,iv,ic,i_sp_c,i_res_ares
- real(SP),       intent(out):: E_eh,E_eh_SOC_corr(2/n_sp_pol),f_eh,f_eh_RES,f_eh_PL
+ real(SP),       intent(out):: E_eh,E_eh_SOC_corr(2/n_sp_pol),f_eh,f_eh_RES,f_eh_PL(2)
  !
  ! Work Space
  !
@@ -41,7 +42,7 @@ logical function K_Transition_check(Ken,Dip,iq,ik,ikp,iv,ic,i_sp_c,i_res_ares,&
  !
  ! Small energy transitions. Dipoles in length gauge are not defined
  if ((.not.l_BS_magnons).and.iq==1) then
-   if ( abs(Eo_eh) < Dip%Energy_treshold .and. trim(global_gauge)=="length") K_Transition_check=.FALSE.
+   if ( abs(Eo_eh) < Dip%Energy_threshold .and. trim(global_gauge)=="length") K_Transition_check=.FALSE.
  endif
  ! Null energy transitions are always removed
  if ( abs(E_eh) == 0._SP )  K_Transition_check=.FALSE.
@@ -61,6 +62,7 @@ logical function K_Transition_check(Ken,Dip,iq,ik,ikp,iv,ic,i_sp_c,i_res_ares,&
  ! Occupations Factors
  !=====================
  !
+ ! f_res = (fv-fc) [ f_ares = (fc-fv) ]
  f_eh_REF =(Ken%f(iv,ikp,i_sp_v)-Ken%f(ic,ik ,i_sp_c))/spin_occ
  f_eh     =f_eh_REF
  f_eh_RES =f_eh_REF
@@ -76,6 +78,7 @@ logical function K_Transition_check(Ken,Dip,iq,ik,ikp,iv,ic,i_sp_c,i_res_ares,&
    !
    if (     EQ_Transitions) f_eh_REF=f_eh_EQ
    if (.not.NEQ_Kernel    ) f_eh    =f_eh_EQ
+   ! NEQ_Residuals means we have in input ndb.RT_carriers
    if (.not.NEQ_Residuals ) f_eh_RES=f_eh_EQ
    !
  endif
@@ -89,38 +92,58 @@ logical function K_Transition_check(Ken,Dip,iq,ik,ikp,iv,ic,i_sp_c,i_res_ares,&
    f_h = (spin_occ-Ken%f(iv,ikp,i_sp_v))/spin_occ
  endif
  !
- if ( E_eh>=0._SP ) f_eh_PL =  Ken%f(ic,ikp,i_sp_c)*(spin_occ-Ken%f(iv,ik ,i_sp_v))/spin_occ/spin_occ
- if ( E_eh< 0._SP ) f_eh_PL = -Ken%f(iv,ik ,i_sp_v)*(spin_occ-Ken%f(ic,ikp,i_sp_c))/spin_occ/spin_occ
+ !  f< =  fc(1-fv)
+ f_eh_PL(1) =  Ken%f(ic,ikp,i_sp_c)*(spin_occ-Ken%f(iv,ik ,i_sp_v))/spin_occ/spin_occ
+ !  f> =  fv(1-fc)
+ f_eh_PL(2) =  Ken%f(iv,ik ,i_sp_v)*(spin_occ-Ken%f(iv,ikp,i_sp_c))/spin_occ/spin_occ
+ !
+ !!  f< =  fc(1-fv)
+ !if ( E_eh>=0._SP ) f_eh_PL =  Ken%f(ic,ikp,i_sp_c)*(spin_occ-Ken%f(iv,ik ,i_sp_v))/spin_occ/spin_occ
+ !! -f> = -fv(1-fc)
+ !! Here we put a minus to mimic what happens in the retarded functions
+ !if ( E_eh< 0._SP ) f_eh_PL = -Ken%f(iv,ik ,i_sp_v)*(spin_occ-Ken%f(ic,ikp,i_sp_c))/spin_occ/spin_occ
  !
 #if defined _RT
- if(EQ_NoOcc) then
-   f_eh_REF=1._SP
-   f_eh    =1._SP
-   f_eh_RES=1._SP
-   f_eh_EQ =1._SP
-   f_e     =1._SP
-   f_h     =1._SP
+ if(EQ_NoOcc.or.ALL_NoOcc) then
+   f_eh_REF=spin_occ
+   f_eh    =spin_occ
+   f_eh_RES=spin_occ
+   f_eh_EQ =spin_occ
+   f_e     =spin_occ
+   f_h     =spin_occ
  endif
 #endif
  !
  ! A 0.001 % fractional occupation swiches on the particular treatment of finite temp. BSE 
  if ( abs(abs(f_eh_REF)-1._SP)> filled_tresh ) BS_not_const_eh_f=.TRUE.
  !
- ! f_eh<0 transitions are included in the anti-resonant part
- if ( f_eh_REF*(-1._SP)**(i_res_ares-1)< 0._SP ) K_Transition_check=.FALSE.
+ if (l_BS_res_from_E) then
+   !
+   ! E_eh<0 transitions are included in the anti-resonant part
+   if ( E_eh*(-1._SP)**(i_res_ares-1)< 0._SP ) K_Transition_check=.FALSE.
+   !
+   if ( E_eh==0._SP .and. f_eh_REF*(-1._SP)**(i_res_ares-1)<0._SP ) K_Transition_check=.FALSE.
+   !
+ else
+   !
+   ! f_eh<0 transitions are included in the anti-resonant part
+   if ( f_eh_REF*(-1._SP)**(i_res_ares-1)< 0._SP ) K_Transition_check=.FALSE.
+   !
+   !   f_eh=0 transitions give no contribution to absorption at the IP level
+   ! and they do not mix with others neither since the kernel is weighted by f_eh
+   !   However they contribute to the JDOS. We need to take only the E>0 component,
+   ! since the E<0 component is then accounted for by the anti-resonant part
+   if ( f_eh_REF==0._SP .and. E_eh*(-1._SP)**(i_res_ares-1)<0._SP ) K_Transition_check=.FALSE.
+   !
+ endif
  !
- !   f_eh=0 transitions give no contribution to absorption at the IP level
- ! and they do not mix with others neither since the kernel is weighted by f_eh
- !   However they contribute to the JDOS. We need to take only the E>0 component,
- ! since the E<0 component is then accounted for by the anti-resonant part
- if ( f_eh_REF==0._SP .and. E_eh*(-1._SP)**(i_res_ares-1)<0._SP ) K_Transition_check=.FALSE.
  !   f_eh_RES=0 transitions give issue in PL. Removed in this case
  if (l_BS_photolum .and. f_eh_RES==0._SP) K_Transition_check=.FALSE.
  !
 #if defined _RT
  !
  ! In case of population inversion, it may be problematic if f_eh>0. and f_eh_RES<0.
- if ( f_eh_REF*f_eh<0._SP .or. f_eh_REF*f_eh_RES<0._SP ) then
+ if ( (f_eh_REF*f_eh<0._SP .or. f_eh_REF*f_eh_RES<0._SP) .and. .not.l_rpa_IP) then
     call error(' I cannot split the R and K BSE contribution.'//&
 &              ' ACTION: Use the same occupations in K and R.')
  endif
@@ -143,4 +166,8 @@ logical function K_Transition_check(Ken,Dip,iq,ik,ikp,iv,ic,i_sp_c,i_res_ares,&
  if (any((/abs(E_eh)<BS_eh_en(1).and.BS_eh_en(1)>0._SP,&
 &          abs(E_eh)>BS_eh_en(2).and.BS_eh_en(2)>0._SP/))) K_Transition_check=.FALSE.
  !
+#if defined _RT
+ if(ALL_NoOcc) K_Transition_check=All_NoOcc
+#endif
+ !
 end function
diff --git a/src/bse/K_Transitions_setup.F b/src/bse/K_Transitions_setup.F
index b4651a6a20..72c8121368 100644
--- a/src/bse/K_Transitions_setup.F
+++ b/src/bse/K_Transitions_setup.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DS IMA
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine K_Transitions_setup(iq,Ken,Xk,Dip,V_bands,C_bands)
  !
  ! Filling of Transition groups
@@ -25,11 +29,11 @@ subroutine K_Transitions_setup(iq,Ken,Xk,Dip,V_bands,C_bands)
  use BS_solvers,   ONLY:BSS_mode,BSS_uses_DbGd
  use BS,           ONLY:T_group,BS_bands,BS_T_grp,BS_nT_grps,BS_n_eh_spaces,&
 &                       BS_Blocks_and_Transitions_alloc,BS_perturbative_SOC,&
-&                       BS_nkFGbz_in_Tgrp
+&                       BS_nkFGbz_in_Tgrp,BS_bands_frozen
  use DIPOLES,      ONLY:DIPOLE_t
  use parallel_m,   ONLY:PAR_IND_WF_b_and_k,PAR_IND_T_ordered,PAR_IND_T_groups,&
 &                       PAR_IND_T_Haydock,PP_indexes_reset
- use electrons,    ONLY:levels,n_sp_pol
+ use electrons,    ONLY:levels,n_sp_pol,i_spin_majority
  use com,          ONLY:msg
  use R_lattice,    ONLY:qindx_X,bz_samp
  use BS,           ONLY:l_BS_magnons,l_BS_photolum
@@ -38,8 +42,9 @@ subroutine K_Transitions_setup(iq,Ken,Xk,Dip,V_bands,C_bands)
  use drivers,      ONLY:l_rt_carriers_in_use
  use RT_control,   ONLY:EQ_Transitions,EQ_NoOcc
 #endif
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels),    intent(in)   :: Ken
  type(bz_samp),   intent(in) :: Xk
@@ -49,7 +54,7 @@ subroutine K_Transitions_setup(iq,Ken,Xk,Dip,V_bands,C_bands)
  !
  ! Work Space
  !
- real(SP)     :: E_eh,E_eh_SOC_corr(2/n_sp_pol),f_eh,f_eh_RES,f_eh_PL
+ real(SP)     :: E_eh,E_eh_SOC_corr(2/n_sp_pol),f_eh,f_eh_RES,f_eh_PL(2)
  logical      :: l_trans_in_group,l_grp_to_grp
  integer      :: ikbz,ikpbz,iv,ic,ik,ikp,i_sp_v,i_sp_c,i_star,ib1,ib2,  &
 &                i_T_now,i_T_group_now,i_T_group_last,i_T_group_start,i_g,n_loops,i_loop,i_T_in_the_grp,i_res_ares,&
@@ -83,9 +88,6 @@ subroutine K_Transitions_setup(iq,Ken,Xk,Dip,V_bands,C_bands)
    !
    nColFG_in_T_group_now=1
    !
-   ! DS: WARNING
-   ! I cannot revert ik and i_sp_pol loop here ...
-   !
    do i_res_ares=1,BS_n_eh_spaces
      !
      i_T_group_start=i_T_group_now+1
@@ -108,13 +110,18 @@ subroutine K_Transitions_setup(iq,Ken,Xk,Dip,V_bands,C_bands)
        ! since i_k_bz and i_p_bz change at each iteration of the loop
        !
        do iv=V_bands(1),V_bands(2)
+         !
+	 if (BS_bands_frozen(iv)==1) cycle
+	 !
          do ic=C_bands(1),C_bands(2)
+	   !
+	   if (BS_bands_frozen(ic)==1) cycle
            !
            do i_sp_c=1,n_sp_pol
              i_sp_v=i_sp_c
              !
              if (l_BS_magnons.and.n_sp_pol==2) then
-               if(i_sp_c==1) cycle
+               if(i_sp_c==i_spin_majority) cycle
                i_sp_v=mod(i_sp_c,n_sp_pol)+1
              endif
              !
@@ -192,8 +199,9 @@ subroutine K_Transitions_setup(iq,Ken,Xk,Dip,V_bands,C_bands)
                  endif
                end if
                !
-               ! To be fixed
-               if (l_BS_photolum) BS_T_grp(i_T_group_now)%f_PL(i_T_in_the_grp,i_res_ares) =f_eh_PL
+               if (l_BS_photolum) then 
+                 BS_T_grp(i_T_group_now)%f_PL(i_T_in_the_grp,:) =f_eh_PL
+               endif
                !
                BS_T_grp(i_T_group_now)%f_RES(i_T_in_the_grp)=f_eh_RES
                !
@@ -230,23 +238,25 @@ subroutine K_Transitions_dimensions(iq,Ken,Xk,Dip,V_bands,C_bands)
  !   BS_H_dim
  !   BS_nT_at_k
  !
- use pars,         ONLY:SP
- use stderr,       ONLY:intc
+ use pars,         ONLY:SP,schlen
+ use stderr,       ONLY:intc,STRING_split
+ use parser_m,     ONLY:parser
  use BS,           ONLY:BS_K_dim,BS_H_dim,BS_nT_at_k,BS_K_coupling,   &
 &                       l_BS_ares_from_res,BS_res_ares_n_mat,BS_bands,&
-&                       BS_n_eh_spaces
+&                       BS_n_eh_spaces,BS_bands_frozen
  use DIPOLES,      ONLY:DIPOLE_t
- use electrons,    ONLY:levels,n_sp_pol
+ use electrons,    ONLY:levels,n_sp_pol,i_spin_majority
  use com,          ONLY:msg
  use R_lattice,    ONLY:qindx_X,bz_samp
  use BS,           ONLY:l_BS_magnons
 #if defined _RT
  use electrons,    ONLY:n_full_bands,n_met_bands
  use drivers,      ONLY:l_rt_carriers_in_use
- use RT_control,   ONLY:EQ_Transitions,EQ_NoOcc
+ use RT_control,   ONLY:EQ_Transitions,ALL_NoOcc
 #endif
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels),    intent(in) :: Ken
  type(bz_samp),   intent(in) :: Xk
@@ -256,13 +266,12 @@ subroutine K_Transitions_dimensions(iq,Ken,Xk,Dip,V_bands,C_bands)
  !
  ! Work Space
  !
- real(SP)     :: E_eh,E_eh_SOC_corr(2/n_sp_pol),f_eh,f_eh_RES,f_eh_PL
+ real(SP)     :: E_eh,E_eh_SOC_corr(2/n_sp_pol),f_eh,f_eh_RES,f_eh_PL(2)
  integer      :: ikbz,ikpbz,iv,ic,ik,ikp,i_sp_v,i_sp_c,i_star,ib1,ib2,  &
 &                i_res_ares,BS_nT_at_k_tmp(Xk%nibz*2)
  !
  logical, external :: K_Transition_check
  !
- !
  call section("+P",'Transition pre-analysis @q'//trim(intc(iq)))
  !
  call k_build_up_BZ_tables(Xk)
@@ -272,17 +281,29 @@ subroutine K_Transitions_dimensions(iq,Ken,Xk,Dip,V_bands,C_bands)
  !
  V_bands=(/BS_bands(1),maxval(Ken%nbm)/)
  C_bands=(/minval(Ken%nbf)+1,BS_bands(2)/)
+ !
 #if defined _RT
  ! Bands range. In the _RT case %nbf and %nbm turn metallic when carriers are used.
  if (l_rt_carriers_in_use.and.EQ_Transitions) then
    V_bands=(/BS_bands(1),maxval(n_met_bands)/)
    C_bands=(/minval(n_full_bands)+1,BS_bands(2)/)
  endif
- if (EQ_NoOcc) then
-   V_bands=BS_bands
-   C_bands=BS_bands
- endif
+ !if (ALL_NoOcc) then
+ !  l_skip_unoccupied=.false.
+ !  V_bands=BS_bands
+ !  C_bands=BS_bands
+ !endif
 #endif
+ !
+ if (BS_bands(2) < V_bands(2) ) then
+   call exp_user_warning("BS_bands(2) lower then number of metallic bands")
+   V_bands(2)=BS_bands(2)
+ endif
+ !
+ if (BS_bands(1) > C_bands(1) ) then
+   call exp_user_warning("BS_bands(1) greater then number of filled bands")
+   C_bands(1)=BS_bands(1)
+ endif
  !
  do i_res_ares=1,BS_n_eh_spaces
    !
@@ -291,7 +312,7 @@ subroutine K_Transitions_dimensions(iq,Ken,Xk,Dip,V_bands,C_bands)
      i_sp_v=i_sp_c
      !
      if (l_BS_magnons.and.n_sp_pol==2) then
-       if(i_sp_c==1) cycle
+       if(i_sp_c==i_spin_majority) cycle
        i_sp_v=mod(i_sp_c,n_sp_pol)+1
      endif
      !
@@ -303,7 +324,12 @@ subroutine K_Transitions_dimensions(iq,Ken,Xk,Dip,V_bands,C_bands)
          ikp  =Xk%sstar(ikpbz,1)
          !
          do iv=V_bands(1),V_bands(2)
+           !
+	   if (BS_bands_frozen(iv)==1) cycle
+	   !
            do ic=C_bands(1),C_bands(2)
+	     !
+	     if (BS_bands_frozen(ic)==1) cycle
              !
              if (i_res_ares==1) then; ib1=iv; ib2=ic; endif
              if (i_res_ares==2) then; ib1=ic; ib2=iv; endif
diff --git a/src/bse/K_WF_phases.F b/src/bse/K_WF_phases.F
index 852a0c3f2d..44bda05e80 100644
--- a/src/bse/K_WF_phases.F
+++ b/src/bse/K_WF_phases.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine K_WF_phases(Xk)
  !
  ! This routine evaluates the WF phases needed to evaluate the correlation
@@ -35,8 +39,9 @@ subroutine K_WF_phases(Xk)
  use electrons,     ONLY:n_sp_pol,n_spinor
  use interfaces,    ONLY:WF_apply_symm
  use timing_m,      ONLY:timing
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  ! Work Space
  !
diff --git a/src/bse/K_blocks.F b/src/bse/K_blocks.F
index 816e06da17..ec5593a868 100644
--- a/src/bse/K_blocks.F
+++ b/src/bse/K_blocks.F
@@ -5,7 +5,12 @@
 !
 ! Authors (see AUTHORS file for details): AM HM DS
 !
-subroutine K_blocks( )
+! headers
+!
+#include<y_memory.h>
+#include<parallel_memory.h>
+!
+subroutine K_blocks(iq)
  !
  ! Create and Fill the BSK blocks
  !
@@ -26,16 +31,20 @@ subroutine K_blocks( )
 &                       BS_K_dim,BS_Block_size,BS_LiveTiming_steps,BS_H_dim,n_BS_blks_min,&
 &                       BS_n_eh_spaces,BS_res_ares_n_mat,BS_IO_dim,l_BS_ares_from_res
  use com,          ONLY:msg
+ use y_memory_alloc
+ use y_memory_alloc_par
  !
-#include<parallel_memory.h>
+ implicit none
+ !
+ integer, intent(in)      :: iq
  !
  complex(SP), allocatable :: BS_MAT(:)
  !
  ! Work Space
  !
- integer(IPL) :: TMP_SIZE
+ integer(IPL) :: TMP_SIZE(1)
  integer      :: i_T,j_T,i_T_start,j_T_start,i_block,diag_size,iB
- integer      :: i_res_ares,j_res_ares,n_new_blocks,n_BS_blks_CPUs(ncpu)
+ integer      :: i_res_ares,j_res_ares,n_blocks_per_grp,n_BS_blks_CPUs(ncpu)
  integer(IPL) :: local_steps
  real(SP)     :: N_Ts_total,N_Ts_local
  !
@@ -43,8 +52,8 @@ subroutine K_blocks( )
  ! and the anti-res block can be derived from the res. In this case
  ! only the resonant transitions are included and we need for each pair 2 blocks
  !
- n_new_blocks=1
- if (BS_K_coupling.and.l_BS_ares_from_res) n_new_blocks=2
+ n_blocks_per_grp=1
+ if (BS_K_coupling.and.l_BS_ares_from_res.and.iq==1) n_blocks_per_grp=2
  !
  ! Count the blocks
  n_BS_blks=0
@@ -53,10 +62,15 @@ subroutine K_blocks( )
      !
      i_res_ares=BS_T_grp(i_T)%i_res_ares
      j_res_ares=BS_T_grp(j_T)%i_res_ares
+     !
+     ! Without coupling only R (and A if not l_BS_ares_from_res) are computed
      if((.not.BS_K_coupling) .and. i_res_ares/=j_res_ares) cycle
      !
+     ! With coupling and l_BS_ares_from_res only A and C are computed
+     if (l_BS_ares_from_res.and.i_res_ares>1) cycle
+     !
      if (.not.PAR_IND_T_ordered%element_2D(i_T,j_T)) cycle
-     n_BS_blks=n_BS_blks+n_new_blocks
+     n_BS_blks=n_BS_blks+n_blocks_per_grp
      !
    enddo
  enddo
@@ -78,18 +92,23 @@ subroutine K_blocks( )
      !
      i_res_ares=BS_T_grp(i_T)%i_res_ares
      j_res_ares=BS_T_grp(j_T)%i_res_ares
+     !
+     ! Without coupling only R (and A if not l_BS_ares_from_res) are computed
      if((.not.BS_K_coupling) .and. i_res_ares/=j_res_ares) cycle
      !
+     ! With l_BS_ares_from_res only R (and C if coupling) are computed
+     if (l_BS_ares_from_res.and.i_res_ares>1) cycle
+     !
      i_T_start=BS_T_grp(i_T)%i_T_ref
      j_T_start=BS_T_grp(j_T)%i_T_ref
      !
      if (.not.PAR_IND_T_ordered%element_2D(i_T,j_T)) cycle
      !
-     do i_block=1,n_new_blocks
+     do i_block=1,n_blocks_per_grp
        iB = n_BS_blks + i_block
        BS_blk(iB)%size=(/BS_T_grp(i_T)%size,BS_T_grp(j_T)%size/)
        TMP_SIZE=TMP_SIZE+int(BS_T_grp(i_T)%size*BS_T_grp(j_T)%size,IPL)
-       if (l_BS_ares_from_res) then
+       if (l_BS_ares_from_res.and.BS_n_eh_spaces==1) then
          if (i_block==1) BS_blk(iB)%mode="R"
          if (i_block==2) BS_blk(iB)%mode="C"
        else
@@ -114,7 +133,7 @@ subroutine K_blocks( )
        BS_blk(iB)%coordinate=(/sum(BS_T_grp(i_T_start:i_T-1)%size)+1,sum(BS_T_grp(j_T_start:j_T-1)%size)+1/)
      enddo
      !
-     n_BS_blks=n_BS_blks+n_new_blocks
+     n_BS_blks=n_BS_blks+n_blocks_per_grp
      !
    enddo
  enddo
@@ -126,7 +145,7 @@ subroutine K_blocks( )
  !
  ! Allocate the blocks
  !
- YAMBO_PAR_ALLOC_CHECK1(BS_MAT,(/TMP_SIZE/))
+ YAMBO_PAR_ALLOC_CHECK1(BS_MAT,TMP_SIZE)
  !
  ! Live Timing Blocks
  !====================
@@ -154,7 +173,7 @@ subroutine K_blocks( )
  !
  call PARALLEL_live_message("Kernel matrix elements",ENVIRONMENT="Response_T_space",LOADED_r=N_Ts_local,TOTAL_r=N_Ts_total)
  !
-end subroutine
+end subroutine K_blocks
 !
 !
 subroutine K_block_alloc( iB, mode )
@@ -162,8 +181,9 @@ subroutine K_block_alloc( iB, mode )
  use pars,         ONLY:SP,cZERO
  use BS,           ONLY:BS_blk,l_BSE_kernel_complete
  use BS_solvers,   ONLY:BSS_kernel_IO_on_the_fly
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer, intent(in) :: iB
  character(*), intent(in) :: mode
@@ -193,8 +213,9 @@ subroutine K_block_free( iB, mode )
  !
  use BS,           ONLY:BS_blk,l_BSE_kernel_complete
  use BS_solvers,   ONLY:BSS_kernel_IO_on_the_fly
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer, intent(in) :: iB
  character(*), intent(in) :: mode
diff --git a/src/bse/K_components_folded_in_serial_arrays.F b/src/bse/K_components_folded_in_serial_arrays.F
index 47cd16f8c4..148b5dfcbf 100644
--- a/src/bse/K_components_folded_in_serial_arrays.F
+++ b/src/bse/K_components_folded_in_serial_arrays.F
@@ -17,11 +17,12 @@ subroutine K_components_folded_in_serial_arrays(iq)
  use BS_solvers,     ONLY:BSS_eh_E_SOC_corr,&
 &                         BSS_dipoles_opt,BSS_eh_table,BSS_eh_E,BSS_eh_f,BSS_eh_f_RES,BSS_eh_W,BSS_eh_Z,BSS_alloc
  use BS,             ONLY:BS_K_dim,BS_nT_grps,BS_T_grp,BS_K_coupling,BS_perturbative_SOC,&
-&                         BS_n_eh_spaces,l_BS_optics,l_BS_magnons,l_BS_dichroism,l_BS_photolum
+&                         BS_n_eh_spaces,l_BS_optics,l_BS_magnons,l_BS_dichroism,l_BS_mespin,l_BS_meorb,&
+&                         l_BS_photolum,BS_dip_size,BS_H_dim,l_BS_ares_from_res
  use timing_m,       ONLY:timing
  use LIVE_t,         ONLY:live_timing
  use MAGNONS,        ONLY:BSS_dipoles_magn,BSS_MAGN_alloc
- use DICHROISM,      ONLY:BSS_dipoles_dich,BSS_DICH_alloc
+ use DICHROISM,      ONLY:BSS_dipoles_dich,BSS_DICH_alloc,BSS_dipoles_mespin,BSS_MEspin_alloc,BSS_dipoles_meorb,BSS_MEorb_alloc
  use PHOTOLUM,       ONLY:BSS_dipoles_PL,BSS_PL_alloc,BSS_PL_f,PL_weights
  use timing_m,       ONLY:timing
  !
@@ -32,7 +33,7 @@ subroutine K_components_folded_in_serial_arrays(iq)
  ! Work Space
  !
  integer     :: i_Tr,H_pos,i_g,i_g_start,i_k_bz,i_v,i_c,i_sp_c,i_sp_v,&
-&               BS_n_eh_spaces_solver,i_res_ares,i_res_ares_solver,ares_fac
+&               n_blocks_per_grp,i_res_ares,i_res_ares_solver,ares_fac
  !
  !========================
  ! BSS COMPONENTS SECTION
@@ -48,12 +49,17 @@ subroutine K_components_folded_in_serial_arrays(iq)
  call BSS_alloc( )
  call BSS_MAGN_alloc( )
  call BSS_DICH_alloc( )
+ call BSS_MEspin_alloc( )
+ call BSS_MEorb_alloc( )
  call BSS_PL_alloc( )
  !
  if (BS_nT_grps>0) call live_timing("Unfolding BSE components",PAR_IND_T_groups%n_of_elements(PAR_COM_eh_INDEX%CPU_id+1))
  !
- BS_n_eh_spaces_solver=1
- if(BS_n_eh_spaces==1.and.BS_K_coupling) BS_n_eh_spaces_solver=2
+ ! In the coulping case at q=1 when there are 2 blocks per group
+ ! See also src/bse/K_blocks.F and K_dipoles.F
+ !
+ n_blocks_per_grp=1
+ if (BS_K_coupling.and.l_BS_ares_from_res.and.iq==1) n_blocks_per_grp=2
  !
  ! FILLING BSS_eh_* and BSS_dipoles
  !==================================
@@ -64,7 +70,7 @@ subroutine K_components_folded_in_serial_arrays(iq)
    !
    if (.not.PAR_IND_T_groups%element_1D(i_g)) cycle
    !
-   if(BS_n_eh_spaces==2) then
+   if(n_blocks_per_grp==1) then
      i_res_ares=BS_T_grp(i_g)%i_res_ares
      i_g_start =BS_T_grp(i_g)%i_T_ref
    endif
@@ -77,12 +83,7 @@ subroutine K_components_folded_in_serial_arrays(iq)
      i_sp_c=BS_T_grp(i_g)%table(i_Tr,4)
      i_sp_v=BS_T_grp(i_g)%table(i_Tr,5)
      !
-     !  This loop is for the case
-     !  R/A symm    RES/RET/CPL   BSS_f/BSS_E/BSS_dip   res_ares_dim    BS_mats_to_solve     BS_mat_size    
-     !    S          CPL             BS_H_dim                 1              1                 BS_H_dim
-     !  where the antiresonant transitions are not in the groups
-     !
-     do i_res_ares_solver=1,BS_n_eh_spaces_solver
+     do i_res_ares_solver=1,n_blocks_per_grp
        !
        H_pos=sum(BS_T_grp(i_g_start:I_g-1)%size)+i_Tr+(max(i_res_ares_solver,i_res_ares)-1)*BS_K_dim(1)
        ares_fac=(-1)**(i_res_ares_solver-1)
@@ -92,23 +93,27 @@ subroutine K_components_folded_in_serial_arrays(iq)
        BSS_eh_E(H_pos)=BS_T_grp(i_g)%E(i_Tr,1)*ares_fac
        BSS_eh_f(H_pos)=BS_T_grp(i_g)%f(i_Tr)*ares_fac
        BSS_eh_f_RES(H_pos)=BS_T_grp(i_g)%f_RES(i_Tr)*ares_fac
+       if (l_BS_photolum) then
+         BSS_PL_f(H_pos,i_res_ares_solver)=BS_T_grp(i_g)%f_PL(i_Tr,i_res_ares_solver)
+         if (BS_n_eh_spaces==1.and.n_blocks_per_grp==1) &
+         & BSS_PL_f(H_pos,2)=BS_T_grp(i_g)%f_PL(i_Tr,2)
+       endif
+       !
        if (allocated(BS_T_grp(i_g)%Z)) BSS_eh_Z(H_pos)=BS_T_grp(i_g)%Z(i_Tr)*ares_fac
        if (allocated(BS_T_grp(i_g)%W)) BSS_eh_W(H_pos)=BS_T_grp(i_g)%W(i_Tr)*ares_fac
-       !
        if (BS_perturbative_SOC) BSS_eh_E_SOC_corr(:,H_pos)=BS_T_grp(i_g)%E_SOC_corr(:,i_Tr)*ares_fac
        !
-       if (l_BS_photolum) then
-         BSS_PL_f(H_pos)=BS_T_grp(i_g)%f_PL(i_Tr,i_res_ares_solver)
-         if (BS_n_eh_spaces==1.and.BS_n_eh_spaces_solver==1) BSS_PL_f(H_pos+BS_K_dim(1))=BS_T_grp(i_g)%f_PL(i_Tr,2)
-       endif
-       !
        if (l_BS_optics) BSS_dipoles_opt(:,H_pos)=BS_T_grp(i_g)%dipoles_opt(:,i_Tr,i_res_ares_solver)
        !
        if (l_BS_magnons) BSS_dipoles_magn(:,H_pos)=BS_T_grp(i_g)%dipoles_mag(:,i_Tr,i_res_ares_solver)
        !
-       if (l_BS_dichroism) BSS_dipoles_dich(:,H_pos)=BS_T_grp(i_g)%dipoles_dic(:,i_Tr,i_res_ares_solver)
+       if (l_BS_dichroism) BSS_dipoles_dich(:,H_pos)=BS_T_grp(i_g)%dipoles_orb(:,i_Tr,i_res_ares_solver)
+       !
+       if (l_BS_mespin) BSS_dipoles_mespin(:,H_pos)=BS_T_grp(i_g)%dipoles_spin(:,i_Tr,i_res_ares_solver)
        !
-       if (l_BS_photolum) BSS_dipoles_PL(:,H_pos)=BS_T_grp(i_g)%dipoles_opt(:,i_Tr,i_res_ares_solver)*PL_weights(:)
+       if (l_BS_meorb)  BSS_dipoles_meorb(:,H_pos)=BS_T_grp(i_g)%dipoles_orb(:,i_Tr,i_res_ares_solver)
+       !
+       if (l_BS_photolum) BSS_dipoles_PL(:,H_pos)=BS_T_grp(i_g)%dipoles_opt(:,i_Tr,i_res_ares_solver)*PL_weights(:BS_dip_size)
        !
      enddo
      !
@@ -157,6 +162,16 @@ subroutine K_components_folded_in_serial_arrays(iq)
    call PP_redux_wait(BSS_dipoles_dich,COMM=PAR_COM_eh_INDEX%COMM)
  endif
  !
+ if (l_BS_mespin) then
+   call PP_redux_wait(BSS_dipoles_mespin,COMM=PAR_COM_Xk_ibz_INDEX%COMM )
+   call PP_redux_wait(BSS_dipoles_mespin,COMM=PAR_COM_eh_INDEX%COMM)
+ endif
+ !
+ if (l_BS_meorb) then
+   call PP_redux_wait(BSS_dipoles_meorb,COMM=PAR_COM_Xk_ibz_INDEX%COMM )
+   call PP_redux_wait(BSS_dipoles_meorb,COMM=PAR_COM_eh_INDEX%COMM)
+ endif
+ !
  if (l_BS_photolum) then
    call PP_redux_wait(BSS_dipoles_PL,COMM=PAR_COM_Xk_ibz_INDEX%COMM )
    call PP_redux_wait(BSS_dipoles_PL,COMM=PAR_COM_eh_INDEX%COMM)
diff --git a/src/bse/K_correlation_collisions_dir.F b/src/bse/K_correlation_collisions_dir.F
index a209061a96..6ee05f8770 100644
--- a/src/bse/K_correlation_collisions_dir.F
+++ b/src/bse/K_correlation_collisions_dir.F
@@ -4,8 +4,13 @@
 ! Copyright (C) 2022 The Yambo Team
 !
 ! Authors (see AUTHORS file for details): AM, DS
+!
+! headers
+!
+#include<dev_defs.h>
+#include<y_memory.h>
 ! 
-subroutine K_correlation_collisions_dir(iq,i_block,qindx_ID_frag,Xk,q)
+subroutine K_correlation_collisions_dir(iq,i_block,win1_ID,win2_ID,Xk,q)
  !
  ! This routine evaluates the correlation collisions:
  !
@@ -20,11 +25,13 @@ subroutine K_correlation_collisions_dir(iq,i_block,qindx_ID_frag,Xk,q)
  use collision_el,  ONLY:elemental_collision_free,elemental_collision_alloc
  use openmp,        ONLY:OPENMP_update,master_thread
  use timing_m,      ONLY:timing
+ use y_memory_alloc
  !
-#include<dev_defs.h>
-#include<y_memory.h>
+ implicit none
  !
- integer,       intent(in)  :: iq,i_block,qindx_ID_frag
+ integer,       intent(in)  :: iq,i_block
+ integer,       intent(in)  :: win1_ID
+ integer,       intent(in)  :: win2_ID
  type(bz_samp), intent(in)  :: Xk,q
  !
  ! Work Space
@@ -33,7 +40,7 @@ subroutine K_correlation_collisions_dir(iq,i_block,qindx_ID_frag,Xk,q)
  !
  integer ::     i_k_bz,i_p_bz,i_k,i_p,i_kp_s,& 
 &               i_kmq_bz,i_pmq_bz,i_kmq,i_pmq,i_pmq_s,i_kp_mq_s,i_p_s,i_kmq_s,i_k_s,& 
-&               i_Tk,i_Tp,i_Tgrp_k,i_Tgrp_p,I_Tgrp_k_st,I_Tgrp_p_st,H_pos(2),& 
+&               i_Tk,i_Tp,i_Tgrp_k,i_Tgrp_p,I_Tgrp_k_st,I_Tgrp_p_st,H_pos(2),H_pos_start(2),& 
 &               i_v_k,i_v_p,i_c_k,i_c_p,i_k_sp_pol_c,i_p_sp_pol_c,i_k_sp_pol_v,i_p_sp_pol_v,iq_W,& 
 &               i_kmq_t,i_pmq_t 
  ! OMP LAST DEF
@@ -107,16 +114,19 @@ subroutine K_correlation_collisions_dir(iq,i_block,qindx_ID_frag,Xk,q)
  mode_now=BS_blk(i_block)%mode
  if (.not.l_BS_ares_from_res) mode_now="F"
  !
+ H_pos_start(1)=sum(BS_T_grp(I_Tgrp_k_st:I_Tgrp_k-1)%size)+&
+ &             (BS_T_grp(I_Tgrp_k)%i_res_ares-1)*BS_K_dim(1)
+ H_pos_start(2)=sum(BS_T_grp(I_Tgrp_p_st:I_Tgrp_p-1)%size)+&
+ &             (BS_T_grp(I_Tgrp_p)%i_res_ares-1)*BS_K_dim(1)
+ !
  select case (mode_now)
  case("R","A","F")
    !
    do i_Tk=1,BS_T_grp(i_Tgrp_k)%size
      do i_Tp=1,BS_T_grp(i_Tgrp_p)%size
        !
-       H_pos(1) = sum(BS_T_grp(I_Tgrp_k_st:I_Tgrp_k-1)%size)+i_Tk+& 
-&              (BS_T_grp(I_Tgrp_k)%i_res_ares-1)*BS_K_dim(1) 
-       H_pos(2) = sum(BS_T_grp(I_Tgrp_p_st:I_Tgrp_p-1)%size)+i_Tp+& 
-&              (BS_T_grp(I_Tgrp_p)%i_res_ares-1)*BS_K_dim(1) 
+       H_pos(1) = H_pos_start(1)+i_Tk 
+       H_pos(2) = H_pos_start(2)+i_Tp
        !
        if (H_pos(1)>H_pos(2)) cycle
        !
@@ -145,11 +155,8 @@ subroutine K_correlation_collisions_dir(iq,i_block,qindx_ID_frag,Xk,q)
    do i_Tk=1,BS_T_grp(i_Tgrp_k)%size
      do i_Tp=1,BS_T_grp(i_Tgrp_p)%size
        !
-       !
-       H_pos(1) = sum(BS_T_grp(I_Tgrp_k_st:I_Tgrp_k-1)%size)+i_Tk+& 
-&              (BS_T_grp(I_Tgrp_k)%i_res_ares-1)*BS_K_dim(1) 
-       H_pos(2) = sum(BS_T_grp(I_Tgrp_p_st:I_Tgrp_p-1)%size)+i_Tp+& 
-&              (BS_T_grp(I_Tgrp_p)%i_res_ares-1)*BS_K_dim(1) 
+       H_pos(1) = H_pos_start(1)+i_Tk 
+       H_pos(2) = H_pos_start(2)+i_Tp
        !
        if (H_pos(1)>H_pos(2)) cycle
        !
@@ -198,10 +205,8 @@ subroutine K_correlation_collisions_dir(iq,i_block,qindx_ID_frag,Xk,q)
    do i_Tk=1,BS_T_grp(i_Tgrp_k)%size
      do i_Tp=1,BS_T_grp(i_Tgrp_p)%size
        !
-       H_pos(1) = sum(BS_T_grp(I_Tgrp_k_st:I_Tgrp_k-1)%size)+i_Tk+& 
-&              (BS_T_grp(I_Tgrp_k)%i_res_ares-1)*BS_K_dim(1) 
-       H_pos(2) = sum(BS_T_grp(I_Tgrp_p_st:I_Tgrp_p-1)%size)+i_Tp+& 
-&              (BS_T_grp(I_Tgrp_p)%i_res_ares-1)*BS_K_dim(1) 
+       H_pos(1) = H_pos_start(1)+i_Tk 
+       H_pos(2) = H_pos_start(2)+i_Tp
        !
        if (H_pos(1)>H_pos(2)) cycle
        !
@@ -234,10 +239,8 @@ subroutine K_correlation_collisions_dir(iq,i_block,qindx_ID_frag,Xk,q)
    do i_Tk=1,BS_T_grp(i_Tgrp_k)%size
      do i_Tp=1,BS_T_grp(i_Tgrp_p)%size
        !
-       H_pos(1) = sum(BS_T_grp(I_Tgrp_k_st:I_Tgrp_k-1)%size)+i_Tk+& 
-&              (BS_T_grp(I_Tgrp_k)%i_res_ares-1)*BS_K_dim(1) 
-       H_pos(2) = sum(BS_T_grp(I_Tgrp_p_st:I_Tgrp_p-1)%size)+i_Tp+& 
-&              (BS_T_grp(I_Tgrp_p)%i_res_ares-1)*BS_K_dim(1) 
+       H_pos(1) = H_pos_start(1)+i_Tk 
+       H_pos(2) = H_pos_start(2)+i_Tp
        !
        if (H_pos(1)>H_pos(2)) cycle
        !
@@ -281,9 +284,13 @@ subroutine fill_indexes(j_Tk,j_Tp,j_Tgrp_k,j_Tgrp_p,j_block,j_q,            &
        &                        j_v_k,j_c_k,j_k_sp_pol_v,j_k_sp_pol_c,j_k_s,   &
        &                        j_p_bz_last,j_k_bz_last,j_q_W,j_q_W_bz_last,j_g_W_last)
        !
-       use R_lattice,     ONLY:qindx_B,qindx_B_load
+       use R_lattice,     ONLY:qindx_B
        use D_lattice,     ONLY:sop_inv,sop_tab
        use parallel_m,    ONLY:PAR_K_scheme
+       use mpi
+       ! Using the following line instead of the general use mpi
+       ! Gives compilation error with impi due to MPI_Get
+       !use mpi,           ONLY:MPI_INTEGER,MPI_ADDRESS_KIND,MPI_Get
        !
        implicit none
        !
@@ -300,6 +307,12 @@ subroutine fill_indexes(j_Tk,j_Tp,j_Tgrp_k,j_Tgrp_p,j_block,j_q,            &
        &                        j_kmq_s_m1,j_k_bz_mem,qindx_tmp(2),    &
        &                        j_q_W_bz,j_q_W_s,j_g_W
        !
+       ! MPI window
+       !
+       integer(kind=MPI_ADDRESS_KIND) :: get_index
+       integer  :: mpi_err
+       integer  :: remote_id
+       !
        j_k_bz  = BS_T_grp(j_Tgrp_k)%table(j_Tk,1) 
        j_p_bz  = BS_T_grp(j_Tgrp_p)%table(j_Tp,1) 
        ! 
@@ -328,12 +341,13 @@ subroutine fill_indexes(j_Tk,j_Tp,j_Tgrp_k,j_Tgrp_p,j_block,j_q,            &
        if (j_p_bz_last/=j_p_bz.or.j_k_bz_last/=j_k_bz) then 
          j_p_bz_last=j_p_bz 
          j_k_bz_last=j_k_bz 
-         if (j_k_bz_mem==0) then 
-           !DEV_OMP critical 
-           qindx_tmp=qindx_B_load(j_p_bz,j_k_bz,qindx_ID_frag) 
+         if (j_k_bz_mem<0) then 
+           get_index = (-j_k_bz_mem-1)*Xk%nbz+j_p_bz-1
+           remote_id=PAR_K_scheme%bz_id(i_k_bz)
+           call MPI_Get(qindx_tmp(1), 1, MPI_INTEGER, remote_id, get_index, 1, MPI_INTEGER, win1_ID, mpi_err)
+           call MPI_Get(qindx_tmp(2), 1, MPI_INTEGER, remote_id, get_index, 1, MPI_INTEGER, win2_ID, mpi_err)
            j_q_W_bz=qindx_tmp(1) 
            j_g_W   =qindx_tmp(2) 
-           !DEV_OMP end critical 
          else 
            j_q_W_bz=qindx_B(j_p_bz,j_k_bz_mem,1) 
            j_g_W   =qindx_B(j_p_bz,j_k_bz_mem,2) 
diff --git a/src/bse/K_correlation_collisions_std.F b/src/bse/K_correlation_collisions_std.F
index 9aa18494dc..cc088fe55c 100644
--- a/src/bse/K_correlation_collisions_std.F
+++ b/src/bse/K_correlation_collisions_std.F
@@ -5,6 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): AM DS HM
 !
+! headers
+!
+#include<dev_defs.h>
+#include<y_memory.h>
+!
 !************************* WARNING **************************
 !* In this subroutine there is a bug for finite q (iq/=1)   *
 !* plus spin-orbit that we were not able to solve yet.      * 
@@ -14,7 +19,7 @@
 !* iq/=1 and spin_orbit is present.                         *
 !************************************************************
 !
-subroutine K_correlation_collisions_std(iq,i_block,qindx_ID_frag,Xk,q)
+subroutine K_correlation_collisions_std(iq,i_block,win1_ID,win2_ID,Xk,q)
  !
  ! This routine evaluates the correlation collisions:
  !
@@ -37,16 +42,18 @@ subroutine K_correlation_collisions_std(iq,i_block,qindx_ID_frag,Xk,q)
  use R_lattice,     ONLY:qindx_X,bz_samp
  use BS,            ONLY:O_ng,BS_bands,BS_res_K_corr,BS_blk,&
 &                        K_CORR_collision,BS_T_grp,&
-&                        BS_K_dim,l_BS_ares_from_res
+&                        BS_K_dim,l_BS_ares_from_res,l_BSE_kernel_full
  use electrons,     ONLY:n_sp_pol
  use collision_el,  ONLY:elemental_collision_free,elemental_collision_alloc
  use openmp,        ONLY:OPENMP_update,master_thread
  use timing_m,      ONLY:timing
+ use y_memory_alloc
  !
-#include<dev_defs.h>
-#include<y_memory.h>
+ implicit none
  !
- integer,       intent(in)  :: iq,i_block,qindx_ID_frag
+ integer,       intent(in)  :: iq,i_block
+ integer,       intent(in)  :: win1_ID
+ integer,       intent(in)  :: win2_ID
  type(bz_samp), intent(in)  :: Xk,q
  !
  ! Work Space
@@ -55,7 +62,7 @@ subroutine K_correlation_collisions_std(iq,i_block,qindx_ID_frag,Xk,q)
  !
  integer ::     i_k_bz,i_p_bz,i_k,i_p,i_kp_s,& 
 &               i_kmq_bz,i_pmq_bz,i_kmq,i_pmq,i_kp_mq_s,& 
-&               i_Tk,i_Tp,i_Tgrp_k,i_Tgrp_p,I_Tgrp_k_st,I_Tgrp_p_st,H_pos(2),& 
+&               i_Tk,i_Tp,i_Tgrp_k,i_Tgrp_p,I_Tgrp_k_st,I_Tgrp_p_st,H_pos(2),H_pos_start(2),& 
 &               i_v_k,i_v_p,i_c_k,i_c_p,i_k_sp_pol_c,i_p_sp_pol_c,i_k_sp_pol_v,i_p_sp_pol_v,iq_W,& 
 &               i_kmq_t,i_pmq_t 
  ! OMP LAST DEF
@@ -129,18 +136,21 @@ subroutine K_correlation_collisions_std(iq,i_block,qindx_ID_frag,Xk,q)
  mode_now=BS_blk(i_block)%mode
  if (.not.l_BS_ares_from_res) mode_now="F"
  !
+ H_pos_start(1)=sum(BS_T_grp(I_Tgrp_k_st:I_Tgrp_k-1)%size)+&
+ &             (BS_T_grp(I_Tgrp_k)%i_res_ares-1)*BS_K_dim(1)
+ H_pos_start(2)=sum(BS_T_grp(I_Tgrp_p_st:I_Tgrp_p-1)%size)+&
+ &             (BS_T_grp(I_Tgrp_p)%i_res_ares-1)*BS_K_dim(1)
+ !
  select case (mode_now)
  case("R","A","F")
    !
    do i_Tk=1,BS_T_grp(i_Tgrp_k)%size
      do i_Tp=1,BS_T_grp(i_Tgrp_p)%size
        !
-       H_pos(1) = sum(BS_T_grp(I_Tgrp_k_st:I_Tgrp_k-1)%size)+i_Tk+& 
-&              (BS_T_grp(I_Tgrp_k)%i_res_ares-1)*BS_K_dim(1) 
-       H_pos(2) = sum(BS_T_grp(I_Tgrp_p_st:I_Tgrp_p-1)%size)+i_Tp+& 
-&              (BS_T_grp(I_Tgrp_p)%i_res_ares-1)*BS_K_dim(1) 
+       H_pos(1) = H_pos_start(1)+i_Tk 
+       H_pos(2) = H_pos_start(2)+i_Tp
        !
-       if (H_pos(1)>H_pos(2)) cycle
+       if (H_pos(1)>H_pos(2) .and. .not. l_BSE_kernel_full) cycle
        !
        call fill_indexes(i_Tk,i_Tp,i_Tgrp_k,i_Tgrp_p,i_block,iq,               &
        &                        i_kp_s,i_s_star,i_kp_mq_s,i_s_mq_star,         &
@@ -170,12 +180,10 @@ subroutine K_correlation_collisions_std(iq,i_block,qindx_ID_frag,Xk,q)
      do i_Tp=1,BS_T_grp(i_Tgrp_p)%size
        !
        !
-       H_pos(1) = sum(BS_T_grp(I_Tgrp_k_st:I_Tgrp_k-1)%size)+i_Tk+& 
-&              (BS_T_grp(I_Tgrp_k)%i_res_ares-1)*BS_K_dim(1) 
-       H_pos(2) = sum(BS_T_grp(I_Tgrp_p_st:I_Tgrp_p-1)%size)+i_Tp+& 
-&              (BS_T_grp(I_Tgrp_p)%i_res_ares-1)*BS_K_dim(1) 
+       H_pos(1) = H_pos_start(1)+i_Tk
+       H_pos(2) = H_pos_start(2)+i_Tp
        !
-       if (H_pos(1)>H_pos(2)) cycle
+       if (H_pos(1)>H_pos(2) .and. .not. l_BSE_kernel_full) cycle
        !
        call fill_indexes(i_Tk,i_Tp,i_Tgrp_k,i_Tgrp_p,i_block,iq,               &
        &                        i_kp_s,i_s_star,i_kp_mq_s,i_s_mq_star,         &
@@ -224,12 +232,10 @@ subroutine K_correlation_collisions_std(iq,i_block,qindx_ID_frag,Xk,q)
    do i_Tk=1,BS_T_grp(i_Tgrp_k)%size
      do i_Tp=1,BS_T_grp(i_Tgrp_p)%size
        !
-       H_pos(1) = sum(BS_T_grp(I_Tgrp_k_st:I_Tgrp_k-1)%size)+i_Tk+& 
-&              (BS_T_grp(I_Tgrp_k)%i_res_ares-1)*BS_K_dim(1) 
-       H_pos(2) = sum(BS_T_grp(I_Tgrp_p_st:I_Tgrp_p-1)%size)+i_Tp+& 
-&              (BS_T_grp(I_Tgrp_p)%i_res_ares-1)*BS_K_dim(1) 
+       H_pos(1) = H_pos_start(1)+i_Tk
+       H_pos(2) = H_pos_start(2)+i_Tp
        !
-       if (H_pos(1)>H_pos(2)) cycle
+       if (H_pos(1)>H_pos(2) .and. .not. l_BSE_kernel_full) cycle
        !
        call fill_indexes(i_Tk,i_Tp,i_Tgrp_k,i_Tgrp_p,i_block,iq,               &
        &                        i_kp_s,i_s_star,i_kp_mq_s,i_s_mq_star,         &
@@ -262,12 +268,10 @@ subroutine K_correlation_collisions_std(iq,i_block,qindx_ID_frag,Xk,q)
    do i_Tk=1,BS_T_grp(i_Tgrp_k)%size
      do i_Tp=1,BS_T_grp(i_Tgrp_p)%size
        !
-       H_pos(1) = sum(BS_T_grp(I_Tgrp_k_st:I_Tgrp_k-1)%size)+i_Tk+& 
-&              (BS_T_grp(I_Tgrp_k)%i_res_ares-1)*BS_K_dim(1) 
-       H_pos(2) = sum(BS_T_grp(I_Tgrp_p_st:I_Tgrp_p-1)%size)+i_Tp+& 
-&              (BS_T_grp(I_Tgrp_p)%i_res_ares-1)*BS_K_dim(1) 
+       H_pos(1) = H_pos_start(1)+i_Tk
+       H_pos(2) = H_pos_start(2)+i_Tp
        !
-       if (H_pos(1)>H_pos(2)) cycle
+       if (H_pos(1)>H_pos(2) .and. .not. l_BSE_kernel_full) cycle
        !
        call fill_indexes(i_Tk,i_Tp,i_Tgrp_k,i_Tgrp_p,i_block,iq,               &
        &                        i_kp_s,i_s_star,i_kp_mq_s,i_s_mq_star,         &
@@ -321,9 +325,13 @@ subroutine fill_indexes(j_Tk,j_Tp,j_Tgrp_k,j_Tgrp_p,j_block,j_q,            &
        &                        j_v_k,j_c_k,j_k_sp_pol_v,j_k_sp_pol_c,         &
        &                        j_p_bz_last,j_k_bz_last,j_q_W,j_q_W_bz_last,j_g_W_last)
        !
-       use R_lattice,     ONLY:qindx_B,qindx_B_load
+       use R_lattice,     ONLY:qindx_B
        use D_lattice,     ONLY:sop_inv,sop_tab
        use parallel_m,    ONLY:PAR_K_scheme
+       use mpi
+       ! Using the following line instead of the general use mpi
+       ! Gives compilation error with impi due to MPI_Get
+       !use mpi,           ONLY:MPI_INTEGER,MPI_ADDRESS_KIND,MPI_Get
        !
        implicit none
        !
@@ -340,6 +348,12 @@ subroutine fill_indexes(j_Tk,j_Tp,j_Tgrp_k,j_Tgrp_p,j_block,j_q,            &
        &                        j_kmq_s,j_pmq_s,j_kmq_s_m1,j_k_bz_mem,qindx_tmp(2),    &
        &                        j_q_W_bz,j_q_W_s,j_g_W
        !
+       ! MPI window
+       !
+       integer(kind=MPI_ADDRESS_KIND) :: get_index
+       integer  :: mpi_err
+       integer  :: remote_id
+       !
        j_k_bz  = BS_T_grp(j_Tgrp_k)%table(j_Tk,1) 
        j_p_bz  = BS_T_grp(j_Tgrp_p)%table(j_Tp,1) 
        ! 
@@ -368,12 +382,13 @@ subroutine fill_indexes(j_Tk,j_Tp,j_Tgrp_k,j_Tgrp_p,j_block,j_q,            &
        if (j_p_bz_last/=j_p_bz.or.j_k_bz_last/=j_k_bz) then 
          j_p_bz_last=j_p_bz 
          j_k_bz_last=j_k_bz 
-         if (j_k_bz_mem==0) then 
-           !DEV_OMP critical 
-           qindx_tmp=qindx_B_load(j_p_bz,j_k_bz,qindx_ID_frag) 
+         if (j_k_bz_mem<0) then 
+           get_index = (-j_k_bz_mem-1)*Xk%nbz+j_p_bz-1
+           remote_id=PAR_K_scheme%bz_id(i_k_bz)
+           call MPI_Get(qindx_tmp(1), 1, MPI_INTEGER, remote_id, get_index, 1, MPI_INTEGER, win1_ID, mpi_err)
+           call MPI_Get(qindx_tmp(2), 1, MPI_INTEGER, remote_id, get_index, 1, MPI_INTEGER, win2_ID, mpi_err)
            j_q_W_bz=qindx_tmp(1) 
            j_g_W   =qindx_tmp(2) 
-           !DEV_OMP end critical 
          else 
            j_q_W_bz=qindx_B(j_p_bz,j_k_bz_mem,1) 
            j_g_W   =qindx_B(j_p_bz,j_k_bz_mem,2) 
diff --git a/src/bse/K_correlation_kernel_dir.F b/src/bse/K_correlation_kernel_dir.F
index b800ca05ed..121a010648 100644
--- a/src/bse/K_correlation_kernel_dir.F
+++ b/src/bse/K_correlation_kernel_dir.F
@@ -5,7 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): DS
 !
+! headers
+!
 #include<dev_defs.h>
+!
 ! 
 function K_correlation_kernel_dir(i_block,i_p,i_pmq,                               &
 &                                 i_k_s,i_p_s,i_n_k,i_n_p,i_kmq_s,i_pmq_s,i_m_k,i_m_p,  &
diff --git a/src/bse/K_correlation_kernel_std.F b/src/bse/K_correlation_kernel_std.F
index 6d39cd92c1..fd0bbf7bad 100644
--- a/src/bse/K_correlation_kernel_std.F
+++ b/src/bse/K_correlation_kernel_std.F
@@ -5,6 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): AM, DS
 !
+! headers
+!
+#include<dev_defs.h>
+#include<y_memory.h>
+!
 !************************* WARNING **************************
 !* In this subroutine there is a bug for finite q (iq/=1)   *
 !* plus spin-orbit that we were not able to solve yet.      * 
@@ -14,7 +19,6 @@
 !* iq/=1 and spin_orbit is present.                         *
 !************************************************************
 !
-#include<dev_defs.h>
 !
 function K_correlation_kernel_std(i_block,i_p,i_pmq,                               &
 &                                 i_k_s,i_kp_s,i_n_k,i_n_p,i_kmq_s,i_kp_mq_s,i_m_k,i_m_p,  &
@@ -28,8 +32,9 @@ function K_correlation_kernel_std(i_block,i_p,i_pmq,
  use R_lattice,    ONLY:DEV_VAR(G_m_G),DEV_VAR(g_rot)
  use wrapper,      ONLY:V_by_V_pwise_gpu,Vstar_dot_V_gpu
  use devxlib,      ONLY:devxlib_conjg_d,devxlib_xgemv_gpu
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer,     intent(in) :: i_block,i_p,i_pmq,                                            &
 &                                 i_k_s,i_kp_s,i_n_k,i_n_p,i_kmq_s,i_kp_mq_s,i_m_k,i_m_p, &
diff --git a/src/bse/K_diago_driver.F b/src/bse/K_diago_driver.F
index fcc082c73b..6f4a8419a3 100644
--- a/src/bse/K_diago_driver.F
+++ b/src/bse/K_diago_driver.F
@@ -7,6 +7,8 @@
 !
 ! headers
 !
+#include<y_memory.h>
+!
 #if defined _SLEPC && !defined _NL
 #include <petsc/finclude/petscsys.h>
 #include <petsc/finclude/petscvec.h>
@@ -19,64 +21,58 @@ subroutine K_diago_driver(iq,W,X_static)
  !
  ! Generic diagonalization method to solve resonant and non resonant Hamiltonians.
  !
- ! 10/06/2015 Added by HM: Generic slepc method to solve 
- !
  use pars,           ONLY:SP
  use com,            ONLY:msg
  use stderr,         ONLY:intc
  use units,          ONLY:HA2EV
  use BS,             ONLY:BS_K_coupling,BS_H_dim,BS_K_dim,BS_perturbative_SOC,&
-&                         BS_res_ares_n_mat,l_BS_optics,l_BS_dichroism,l_BS_ares_from_res !,BS_Blocks_symmetrize_K,&
- use BS_solvers,     ONLY:io_BSS_diago,BSS_perturbative_width,BSS_mode, BSS_resize,BSS_n_eig_Input, &
+&                         BS_res_ares_n_mat,l_BS_ares_from_res,BS_nT_grps,&
+&                         l_BS_abs,l_BS_kerr,l_BS_kerr_asymm,l_BS_magnons,l_BS_photolum,l_BS_dichroism,&
+&                         l_BS_mespin,l_BS_meorb
+ use BS_solvers,     ONLY:io_BSS_diago,BSS_perturbative_width,BSS_mode,BSS_resize_only,BSS_resize_and_save,BSS_n_eig_Input, &
 &                         BSS_write_eig_2_db,BSS_eh_W,BSS_n_eig,BS_E,BSS_slepc_pseudo_herm,write_bs_ev_par,&
-&                         BSS_ydiago_solver,BSS_first_eig,BSS_target_E,BSS_trange_E,BSS_er,BSS_slepc_precondition
+&                         BSS_ldiago_solver,BSS_first_eig,BSS_target_E,BSS_trange_E,BSS_er,BSS_slepc_precondition,&
+&                         load_Diago
  use X_m,            ONLY:X_t,X_OUTPUT_driver
  use electrons,      ONLY:n_sp_pol,n_spinor
  use frequency,      ONLY:w_samp
  use com,            ONLY:isec
  use LIVE_t,         ONLY:live_timing
  use linear_algebra, ONLY:DIAGO
- use interfaces,     ONLY:K_diago_response_functions,LINEAR_ALGEBRA_driver,YDIAGO_driver,&
+ use interfaces,     ONLY:K_diago_response_functions,LINEAR_ALGEBRA_driver,LDIAGO_driver,&
  &                        PL_diago_residual,K_diago_L_res,K_diago_R_res,&
  &                        K_diago_perturbative, K_stored_in_a_big_matrix
  use IO_int,         ONLY:io_control
  use IO_m,           ONLY:REP,VERIFY,OP_WR_CL,OP_RD_CL
  use parser_m,       ONLY:parser
  use timing_m,       ONLY:timing
+ use parallel_m,     ONLY:CPU_structure
  use parallel_int,   ONLY:PP_wait
 #if defined _SLEPC && !defined _NL
- use interfaces,     ONLY:MATRIX_slepc
- use petscmat
- use slepceps
- use slepcepsdef
- use petscmatdef
- !
- use BS_solvers,     ONLY:BSS_slepc_approach,BSS_slepc_matrix_format,&
-&                         Slepc_v,BS_HAYVEC_free
-#endif
- use BS,             ONLY:l_BS_abs,l_BS_kerr,l_BS_kerr_asymm,l_BS_magnons,l_BS_photolum,l_BS_dichroism
- use MAGNONS,        ONLY:BSS_MAGN_free
- use PHOTOLUM,       ONLY:BSS_PL_free
- !
-#if defined _SLEPC && !defined _NL
+ use petscsys
  use slepceps,       ONLY:SlepcInitialize
  use petscmat,       ONLY:PETSC_NULL_CHARACTER
  use petscmatdef,    ONLY:tMat
+ !
+ use interfaces,     ONLY:MATRIX_slepc
+ use BS_solvers,     ONLY:BSS_slepc_approach,BSS_slepc_matrix_format,&
+&                         Slepc_v,BS_HAYVEC_free,BS_HAYVEC_alloc
 #endif
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  ! 
  type(w_samp)  :: W
  integer       :: iq
  !
  ! Work Space
  !
- integer          :: i_BS_mat,Input_n_eig, BS_mat_dim, neigs_this_cpu, neig_shift, elpa_solver = 2
+ integer          :: i_BS_mat,BSS_n_eig_before_solver, BS_mat_dim, neigs_this_cpu, neig_shift, elpa_solver = 2
  logical          :: K_is_not_hermitian,l_diago,l_slepc,l_dip_more,l_target_erange,l_neig_input, &
- &                   l_ydiago,l_kerr_lft,l_kerr_rgt,l_abs_res
+ &                   l_ldiago,l_kerr_lft,l_kerr_rgt,l_abs_res
  character(1)     :: sec
  character(10)    :: Solver
- ! Ydiago varibles
+ ! Ldiago varibles
  integer, target  :: neigs_range(2)
  real(SP), target :: eigvals_range(2)
  !
@@ -91,66 +87,66 @@ subroutine K_diago_driver(iq,W,X_static)
  ! Excitonic hamiltonian, residuals independent
  !==============================================
  real(SP),allocatable    :: BS_E_real(:)
- real(SP)   ,pointer     :: BS_E_SOC_corr(:,:) !=> null()
+ real(SP)   ,pointer     :: BS_E_SOC_corr(:,:)
  complex(SP),allocatable :: BS_corrections(:,:)
  !
  ! Coupling
  complex(SP),allocatable         :: BS_mat(:,:)
  complex(SP),allocatable, target :: BS_V_left(:,:)
  complex(SP),allocatable, target :: BS_V_right(:,:)
- complex(SP),pointer             :: BS_overlap(:,:)   ! => null()
+ complex(SP),pointer             :: BS_overlap(:,:)
  !
  ! Residuals, different for each response function
  !=================================================
- complex(SP),pointer     :: BS_R_left_abs(:)     !=> null()
- complex(SP),pointer     :: BS_R_right_abs(:)    !=> null()
+ complex(SP),pointer     :: BS_R_left_abs(:)
+ complex(SP),pointer     :: BS_R_right_abs(:)
  !
- real(SP),   pointer     :: BS_R_PL(:,:)         !=> null()
+ real(SP),   pointer     :: BS_R_PL(:,:)
  !
- complex(SP),pointer     :: BS_R_left_kerr(:)    !=> null()
- complex(SP),pointer     :: BS_R_right_kerr(:)   !=> null()
- complex(SP),pointer     :: BS_R_right_dich(:,:) !=> null()
+ complex(SP),pointer     :: BS_R_left_kerr(:)
+ complex(SP),pointer     :: BS_R_right_kerr(:)
+ complex(SP),pointer     :: BS_R_right_dich(:,:)
+ complex(SP),pointer     :: BS_R_right_mespin(:,:)
+ complex(SP),pointer     :: BS_R_right_meorb(:,:)
  !
- complex(SP),pointer     :: BS_R_left_magn(:,:)  !=> null()
- complex(SP),pointer     :: BS_R_right_magn(:,:) !=> null()
+ complex(SP),pointer     :: BS_R_left_magn(:,:)
+ complex(SP),pointer     :: BS_R_right_magn(:,:)
  !
  ! Temporary pointer 
- complex(SP),pointer     :: tmp_res_ptrR(:)     !=> null()
+ complex(SP),pointer     :: tmp_res_ptr(:)
  !
  ! I/O
  integer                 :: io_err,ID
  type(X_t),intent(in)    :: X_static
  !
- !
- l_abs_res =l_BS_abs.or.l_BS_kerr.or.l_BS_dichroism
+ l_abs_res =l_BS_abs.or.l_BS_kerr.or.l_BS_dichroism.or.l_BS_mespin.or.l_BS_meorb
  l_kerr_lft=l_BS_kerr_asymm.or.l_BS_dichroism
  l_kerr_rgt=l_BS_kerr      .or.l_BS_dichroism
  !
+ l_slepc     = index(BSS_mode,'s')/=0
 #if defined _SCALAPACK
  l_diago     = index(BSS_mode,'o')/=0.or. (     l_BS_photolum.and.(index(BSS_mode,'d')/=0))
- l_ydiago    = index(BSS_mode,'d')/=0.and. .not.l_BS_photolum
+ l_ldiago    = index(BSS_mode,'d')/=0.and. .not.l_BS_photolum
 #else
  l_diago     = index(BSS_mode,'o')/=0 .or. index(BSS_mode,'d')/=0
- l_ydiago    = .false. ! This must be set to false else u.b
 #endif
- l_slepc     = index(BSS_mode,'s')/=0
  !
 #if defined _NL
- if (l_ydiago) then
+ if (l_ldiago) then
    ! The issue is present if yambo was configured in single precision
-   ! If the coufingure was done in double precision, the issue is not present
+   ! If the confingure was done in double precision, the issue is not present
    ! However, at this stage, it is not possible to know how the user configured yambo
-   call warning("non-linear optics forces DP compilation and breaks Ydiago library interface in SP")
-   l_ydiago = .false.
+   call warning("non-linear optics forces DP compilation and breaks Ldiago library interface in SP")
+   l_ldiago = .false.
    l_diago  = .true.
  endif
 #endif
  !
  if(l_diago) Solver="Diago"
+ if(l_ldiago) Solver="Ldiago" 
 #if defined _SLEPC && !defined _NL
  if(l_slepc) Solver="Slepc"
 #endif
- if(l_ydiago) Solver="Ydiago" 
  !
  call timing(trim(Solver)//' Solver',OPR='start')
  !
@@ -171,23 +167,11 @@ subroutine K_diago_driver(iq,W,X_static)
  if (.not.BSS_write_eig_2_db) call parser('WRbsWF',BSS_write_eig_2_db)
  call parser('BSSPertWidth',BSS_perturbative_width)
  !
- do i_BS_mat=1,BS_res_ares_n_mat
+ do i_BS_mat=1, BS_res_ares_n_mat
    !
    if(.not.BS_K_coupling) BS_mat_dim = BS_K_dim(i_BS_mat)
    if(     BS_K_coupling) BS_mat_dim = BS_H_dim
    !
-   NULLIFY(BS_overlap)
-   NULLIFY(BS_E_SOC_corr)
-   NULLIFY(BS_R_right_abs)
-   NULLIFY(BS_R_left_abs)
-   NULLIFY(BS_R_right_kerr)
-   NULLIFY(BS_R_left_kerr)
-   NULLIFY(BS_R_right_dich)
-   NULLIFY(BS_R_right_magn)
-   NULLIFY(BS_R_left_magn)
-   NULLIFY(BS_R_PL)
-   NULLIFY(tmp_res_ptrR)
-   !
    call BSS_n_eig_init()
    !
    ! Allocation (BS_R and BS_E)
@@ -199,11 +183,14 @@ subroutine K_diago_driver(iq,W,X_static)
    !========================
    call io_control(ACTION=OP_RD_CL,COM=REP,MODE=VERIFY,SEC=(/1,2/),ID=ID)
    io_err=io_BSS_diago(iq,i_BS_mat,ID,X_static,BS_E,BS_R_left_abs,BS_R_right_abs,BS_E_SOC_corr,&
-  &                    BS_R_left_magn,BS_R_right_magn,BS_R_left_kerr,BS_R_right_kerr,BS_R_right_dich,BS_R_PL)
+  &                    BS_R_left_magn,BS_R_right_magn,BS_R_left_kerr,BS_R_right_kerr,BS_R_right_dich,&
+  &                    BS_R_right_mespin,BS_R_right_meorb,BS_R_PL)
    !
    ! Kernel and/or BSS components loading
    !======================================
    if (io_err<0) then
+     !
+     if (load_Diago) call error("Loading of ndb.BS_diago failed in load diago mode") 
      !
      call K_components_folded_in_serial_arrays(iq)
      !
@@ -231,6 +218,8 @@ subroutine K_diago_driver(iq,W,X_static)
      !
 #if defined _SLEPC && !defined _NL
      if(l_slepc) then
+       !
+       call K_slepc_local_init()
        !
        if (index(BSS_slepc_matrix_format,"shell")>0) then
          ! 1. Here we will define a PetscShell matrix and define the matrix-vector multiplication
@@ -241,13 +230,17 @@ subroutine K_diago_driver(iq,W,X_static)
          ! 2. Here we create a distributed PETSC matrix from the BS_blks
          !    There are 2 cases
          ! control of hermitian / pseudo-hermitian / non hermitian is in part explicit here
+#if PETSC_VERSION_GE(3,22,0)
          if (BSS_slepc_pseudo_herm) then
            ! 2.1. Nest matrix uses pseudo-hermitian structure of BSE, only for the coupling case
            call K_stored_in_a_nest_matrix(i_BS_mat,iq,slepc_mat)
          else
+#endif
            ! 2.2. Single PETSc matrix, for resonant case or non-hermitian coupling
            call K_stored_in_a_slepc_matrix(i_BS_mat,iq,slepc_mat)
+#if PETSC_VERSION_GE(3,22,0)
          endif
+#endif
        endif        
      endif
 #endif
@@ -256,26 +249,26 @@ subroutine K_diago_driver(iq,W,X_static)
    !
    ! Allocation (BS_E_real or BS_V)
    !===============================
-   call local_alloc("V")
+   if (.not. l_ldiago) call local_alloc("V")
    !
    if (io_err<0) then
      !
      ! Diagonalization of the excitonic hamiltonian
      !==============================================
      if(l_diago) call live_timing('BSK diagonalize',1)
-     !
-     if (K_is_not_hermitian) then
+     ! 
+     if (K_is_not_hermitian) then !coupling
        !
 #if defined _SCALAPACK
-       if (l_ydiago) then 
+       if (l_ldiago) then 
          ! BS_V_left are only referenced in non-hermitian case
          ! In coupling case, if the user requested n values,
          ! we compute the first min(n/2,BS_H_dim/2) positive and 
          ! min(n/2,BS_ham/2) negative eigvals and eigvectors, making it a total of min(n,BS_ham)
-         call YDIAGO_driver(i_BS_mat, BS_E, BS_V_right, neigs_this_cpu, neig_shift, &
+         call LDIAGO_driver(i_BS_mat, BS_E, BS_V_right, neigs_this_cpu, neig_shift, &
          &                   neigs_range=neigs_range, eigvals_range=eigvals_range,  &
          &                   BS_VL=BS_V_left,BS_overlap=BS_overlap,                 &
-         &                   solver_type=BSS_ydiago_solver, elpasolver=elpa_solver)
+         &                   solver_type=BSS_ldiago_solver, elpasolver=elpa_solver)
          BSS_n_eig = size(BS_E)
       endif
 #endif
@@ -295,13 +288,13 @@ subroutine K_diago_driver(iq,W,X_static)
        endif
 #endif
        !
-     else
+     else !no coupling
        !
 #if defined _SCALAPACK
-       if (l_ydiago) then
-         call YDIAGO_driver(i_BS_mat, BS_E, BS_V_right, neigs_this_cpu, neig_shift, &
+       if (l_ldiago) then
+         call LDIAGO_driver(i_BS_mat, BS_E, BS_V_right, neigs_this_cpu, neig_shift, &
 &                           neigs_range=neigs_range, eigvals_range=eigvals_range,   &
-&                           solver_type=BSS_ydiago_solver, elpasolver=elpa_solver)
+&                           solver_type=BSS_ldiago_solver, elpasolver=elpa_solver)
          BSS_n_eig = size(BS_E)
        endif
 #endif
@@ -322,13 +315,15 @@ subroutine K_diago_driver(iq,W,X_static)
        !
      endif
      !
+     ! Here we are resizing all the residuals which have not been used yet.
+     ! We could just allocated them here.
+     !
 #if defined _SLEPC && !defined _NL
      !
      if (l_slepc) then
+       call K_slepc_local_free()
        ! BSS_n_eig could be lowered by MATRIX_slepc
-       if (Input_n_eig>BSS_n_eig) call K_resize_variables()
-       ! Destroy the matrix
-       call MatDestroy(slepc_mat,ierr)
+       if (BSS_n_eig_before_solver>BSS_n_eig) call K_slepc_resize_variables()
      endif
      !
 #endif
@@ -336,14 +331,15 @@ subroutine K_diago_driver(iq,W,X_static)
      if (l_diago) call live_timing( )
      !
 #if defined _SCALAPACK
-     if (l_ydiago) then
-       if (Input_n_eig /= BSS_n_eig) call K_resize_variables()
+     if (l_ldiago) then
+       ! BSS_n_eig could be lowered by LDIAGO_driver
+       if (BSS_n_eig_before_solver/=BSS_n_eig) call K_ldiago_resize_variables()
      endif
 #endif
      !
      ! Compute residuals
      !
-     YAMBO_ALLOC_P(tmp_res_ptrR,(BSS_n_eig))
+     YAMBO_ALLOC_P(tmp_res_ptr,(BSS_n_eig))
      !
      ! Construct the residuals of epsilon
      !====================================
@@ -355,26 +351,40 @@ subroutine K_diago_driver(iq,W,X_static)
      endif
      !
      if(l_BS_dichroism) then
-       call K_diago_R_res("dic1",i_BS_mat,BS_E,neigs_this_cpu,neig_shift,tmp_res_ptrR,BS_V_right)
-       BS_R_right_dich(:,1) = tmp_res_ptrR
-       call K_diago_R_res("dic2",i_BS_mat,BS_E,neigs_this_cpu,neig_shift,tmp_res_ptrR,BS_V_right)
-       BS_R_right_dich(:,2) = tmp_res_ptrR
+       call K_diago_R_res("dic1",i_BS_mat,BS_E,neigs_this_cpu,neig_shift,tmp_res_ptr,BS_V_right)
+       BS_R_right_dich(:,1) = tmp_res_ptr
+       call K_diago_R_res("dic2",i_BS_mat,BS_E,neigs_this_cpu,neig_shift,tmp_res_ptr,BS_V_right)
+       BS_R_right_dich(:,2) = tmp_res_ptr
+     endif
+     !
+     if(l_BS_mespin) then
+       call K_diago_R_res("mespin1",i_BS_mat,BS_E,neigs_this_cpu,neig_shift,tmp_res_ptr,BS_V_right)
+       BS_R_right_mespin(:,1) = tmp_res_ptr
+       call K_diago_R_res("mespin2",i_BS_mat,BS_E,neigs_this_cpu,neig_shift,tmp_res_ptr,BS_V_right)
+       BS_R_right_mespin(:,2) = tmp_res_ptr
+     endif
+     !
+     if(l_BS_meorb) then
+       call K_diago_R_res("meorb1",i_BS_mat,BS_E,neigs_this_cpu,neig_shift,tmp_res_ptr,BS_V_right)
+       BS_R_right_meorb(:,1) = tmp_res_ptr
+       call K_diago_R_res("meorb2",i_BS_mat,BS_E,neigs_this_cpu,neig_shift,tmp_res_ptr,BS_V_right)
+       BS_R_right_meorb(:,2) = tmp_res_ptr
      endif
      !
      if(l_BS_magnons) then
-       call K_diago_R_res("mag1",i_BS_mat,BS_E,neigs_this_cpu,neig_shift,tmp_res_ptrR,BS_V_right)
-       BS_R_right_magn(:,1) = tmp_res_ptrR
+       call K_diago_R_res("mag1",i_BS_mat,BS_E,neigs_this_cpu,neig_shift,tmp_res_ptr,BS_V_right)
+       BS_R_right_magn(:,1) = tmp_res_ptr
        if(n_spinor==2) then
-         call K_diago_R_res("mag2",i_BS_mat,BS_E,neigs_this_cpu,neig_shift,tmp_res_ptrR,BS_V_right)
-         BS_R_right_magn(:,2) = tmp_res_ptrR
+         call K_diago_R_res("mag2",i_BS_mat,BS_E,neigs_this_cpu,neig_shift,tmp_res_ptr,BS_V_right)
+         BS_R_right_magn(:,2) = tmp_res_ptr
        endif
      endif
      !
      ! Left residuals needs to be computed only if the kernel is not hermitian
      if (K_is_not_hermitian) then
        !
-       ! In the ydiago solver, the BS_overlap is an identity matrix.
-       if (.not.l_ydiago) call K_diago_overlap_matrix(BS_V_left,BS_V_right,BS_overlap)
+       ! In the ldiago solver, the BS_overlap is an identity matrix.
+       if (.not.l_ldiago) call K_diago_overlap_matrix(BS_V_left,BS_V_right,BS_overlap)
        !
        ! Left residuals
        if(l_abs_res) then
@@ -383,11 +393,11 @@ subroutine K_diago_driver(iq,W,X_static)
        endif
        !
        if(l_BS_magnons) then
-         call K_diago_L_res("mag1",i_BS_mat,BS_E,neigs_this_cpu,neig_shift,tmp_res_ptrR,BS_V_left,BS_overlap)
-         BS_R_left_magn(:,1) = tmp_res_ptrR
+         call K_diago_L_res("mag1",i_BS_mat,BS_E,neigs_this_cpu,neig_shift,tmp_res_ptr,BS_V_left,BS_overlap)
+         BS_R_left_magn(:,1) = tmp_res_ptr
          if(n_spinor==2) then
-           call K_diago_L_res("mag2",i_BS_mat,BS_E,neigs_this_cpu,neig_shift,tmp_res_ptrR,BS_V_left,BS_overlap)
-           BS_R_left_magn(:,2) = tmp_res_ptrR
+           call K_diago_L_res("mag2",i_BS_mat,BS_E,neigs_this_cpu,neig_shift,tmp_res_ptr,BS_V_left,BS_overlap)
+           BS_R_left_magn(:,2) = tmp_res_ptr
          endif
        endif
        !
@@ -400,15 +410,15 @@ subroutine K_diago_driver(iq,W,X_static)
        !
      endif
      !
-     YAMBO_FREE_P(tmp_res_ptrR)
+     YAMBO_FREE_P(tmp_res_ptr)
      !
      ! DS: this also should be changed according to the general structure
      if(l_BS_photolum) then
-       if (l_ydiago) call error("PL not implemented in Ydiago solver")
+       if (l_ldiago) call error("PL not implemented in Ldiago solver")
        if (K_is_not_hermitian) then
-         call PL_diago_residual(BS_V_left,BS_V_right,BS_R_PL,K_is_not_hermitian,BS_overlap)
+         call PL_diago_non_herm_residual(BS_V_left,BS_V_right,BS_R_PL,BS_overlap)
        else
-         call PL_diago_residual(BS_V_right,BS_V_right,BS_R_PL,K_is_not_hermitian)
+         call PL_diago_herm_residual(BS_V_right,BS_R_PL)
        endif
      endif
      !
@@ -431,7 +441,8 @@ subroutine K_diago_driver(iq,W,X_static)
    ! Now I calculate the physical quantities
    !=========================================
    call K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left_abs,BS_R_right_abs,BS_E_SOC_corr,&
-   &               BS_R_left_magn,BS_R_right_magn,BS_R_left_kerr,BS_R_right_kerr,BS_R_right_dich,BS_R_PL)
+   &               BS_R_left_magn,BS_R_right_magn,BS_R_left_kerr,BS_R_right_kerr,BS_R_right_dich,&
+   &               BS_R_right_mespin,BS_R_right_meorb,BS_R_PL)
    !
    ! Diagonalization DB (OUT)
    !==========================
@@ -439,33 +450,31 @@ subroutine K_diago_driver(iq,W,X_static)
      !
      call io_control(ACTION=OP_WR_CL,COM=REP,MODE=VERIFY,SEC=(/1,2,3/),ID=ID)
      io_err=io_BSS_diago(iq,i_BS_mat,ID,X_static,BS_E,BS_R_left_abs,BS_R_right_abs,BS_E_SOC_corr,&
-     &     BS_R_left_magn,BS_R_right_magn,BS_R_left_kerr,BS_R_right_kerr,BS_R_right_dich,BS_R_PL,&
-     &                                                   BS_mat=BS_V_right,write_ev=.not.l_ydiago)
+     &     BS_R_left_magn,BS_R_right_magn,BS_R_left_kerr,BS_R_right_kerr,BS_R_right_dich,&
+     &                                                   BS_R_right_mespin,BS_R_right_meorb,BS_R_PL,&
+     &                                                   BS_mat=BS_V_right,write_ev=.not.l_ldiago)
      !
      call PP_wait()
 #if defined _SCALAPACK
-     if (l_ydiago .and. BSS_write_eig_2_db) call write_bs_ev_par(iq, i_BS_mat, &
+     if (l_ldiago .and. BSS_write_eig_2_db) call write_bs_ev_par(iq, i_BS_mat, &
 &                             neigs_this_cpu, neig_shift, BS_V_right, BS_V_left)
 #endif
    endif
    !
+   ! CLEAN
+   !
    YAMBO_FREE(BS_E)
    call local_free( )
    !
  enddo
  !
- ! Initialize & write the output file 
- !====================================
- call K_OUTPUT(iq,W,"open dump close "//trim(Solver),"ALL")
- !
- ! CLEAN
- !
 #if defined _SLEPC && !defined _NL
- if(l_slepc) call K_slepc_local_free()
+ if (l_slepc) call SlepcFinalize(ierr)
 #endif
  !
- call BSS_PL_free( )
- call BSS_MAGN_free( )
+ ! Initialize & write the output file 
+ !====================================
+ call K_OUTPUT(iq,W,"open dump close "//trim(Solver),"ALL")
  !
  call timing(trim(Solver)//' Solver',OPR='stop')
  !
@@ -495,6 +504,12 @@ subroutine local_alloc(what)
        if (l_BS_dichroism) then
          YAMBO_ALLOC_P(BS_R_right_dich,(BSS_n_eig,2))
        endif
+       if (l_BS_mespin) then
+         YAMBO_ALLOC_P(BS_R_right_mespin,(BSS_n_eig,2))
+       endif
+       if (l_BS_meorb) then
+         YAMBO_ALLOC_P(BS_R_right_meorb,(BSS_n_eig,2))
+       endif
        if (l_BS_magnons) then
          YAMBO_ALLOC_P(BS_R_left_magn,(BSS_n_eig,2))
          YAMBO_ALLOC_P(BS_R_right_magn,(BSS_n_eig,2))
@@ -504,8 +519,6 @@ subroutine local_alloc(what)
        endif
        !
      case("V")
-       !
-       if (l_ydiago)  return
        !
        if(K_is_not_hermitian) then
          YAMBO_ALLOC(BS_V_left,(BS_H_dim,BSS_n_eig))
@@ -541,6 +554,8 @@ subroutine local_free()
      YAMBO_FREE_P(BS_R_right_kerr)
      YAMBO_FREE_P(BS_R_left_kerr)
      YAMBO_FREE_P(BS_R_right_dich)
+     YAMBO_FREE_P(BS_R_right_mespin)
+     YAMBO_FREE_P(BS_R_right_meorb)
      YAMBO_FREE_P(BS_R_right_magn)
      YAMBO_FREE_P(BS_R_left_magn)
      YAMBO_FREE_P(BS_R_PL)
@@ -548,36 +563,62 @@ subroutine local_free()
    end subroutine local_free
    !
    !
-   subroutine K_resize_variables()
-     !
-     ! We need to realloc as sizes can change.
-     if(allocated(BS_corrections)) call BSS_resize(BS_corrections)
-     if(associated(BS_E_SOC_corr)) call BSS_resize(BS_E_SOC_corr)
+   subroutine K_ldiago_resize_variables()
+    !
+    ! We need to realloc as sizes can change.
+    if(allocated(BS_corrections)) call BSS_resize_only(BS_corrections)
+    if(associated(BS_E_SOC_corr)) call BSS_resize_only(BS_E_SOC_corr)
+    !
+    if(associated(BS_R_left_abs) )  call BSS_resize_only(BS_R_left_abs)
+    if(associated(BS_R_right_abs))  call BSS_resize_only(BS_R_right_abs)
+    !
+    if(associated(BS_R_left_kerr) )   call BSS_resize_only(BS_R_left_kerr)
+    if(associated(BS_R_right_kerr))   call BSS_resize_only(BS_R_right_kerr)
+    if(associated(BS_R_right_dich))   call BSS_resize_only(BS_R_right_dich)
+    if(associated(BS_R_right_mespin)) call BSS_resize_only(BS_R_right_mespin)
+    if(associated(BS_R_right_meorb))  call BSS_resize_only(BS_R_right_meorb)
+    if(associated(BS_R_left_magn) ) call BSS_resize_only(BS_R_left_magn)
+    if(associated(BS_R_right_magn)) call BSS_resize_only(BS_R_right_magn)
+    if(associated(BS_R_PL)        ) call BSS_resize_only(BS_R_PL)
+    !
+   end subroutine K_ldiago_resize_variables
+   !
+#if defined _SLEPC && !defined _NL
+   !
+   subroutine K_slepc_resize_variables()
      !
-     if(associated(BS_R_left_abs) )  call BSS_resize(BS_R_left_abs)
-     if(associated(BS_R_right_abs))  call BSS_resize(BS_R_right_abs)
+     call warning(" resizing alla variables whose size depends on BSS_n_eig ")
+     if(allocated(BS_E))          call BSS_resize_and_save(BS_E)
      !
-     if(associated(BS_R_left_kerr) ) call BSS_resize(BS_R_left_kerr)
-     if(associated(BS_R_right_kerr)) call BSS_resize(BS_R_right_kerr)
-     if(associated(BS_R_right_dich)) call BSS_resize(BS_R_right_dich)
-     if(associated(BS_R_left_magn) ) call BSS_resize(BS_R_left_magn)
-     if(associated(BS_R_right_magn)) call BSS_resize(BS_R_right_magn)
-     if(associated(BS_R_PL)        ) call BSS_resize(BS_R_PL)
+     if(allocated(BS_corrections)) call BSS_resize_only(BS_corrections)
+     if(associated(BS_E_SOC_corr)) call BSS_resize_only(BS_E_SOC_corr)
      !
-     if (l_ydiago) return
+     if(associated(BS_R_left_abs) )  call BSS_resize_only(BS_R_left_abs)
+     if(associated(BS_R_right_abs))  call BSS_resize_only(BS_R_right_abs)
      !
-     if(allocated(BS_E))          call BSS_resize(BS_E)
+     if(associated(BS_R_left_kerr) )   call BSS_resize_only(BS_R_left_kerr)
+     if(associated(BS_R_right_kerr))   call BSS_resize_only(BS_R_right_kerr)
+     if(associated(BS_R_right_dich))   call BSS_resize_only(BS_R_right_dich)
+     if(associated(BS_R_right_mespin)) call BSS_resize_only(BS_R_right_mespin)
+     if(associated(BS_R_right_meorb))  call BSS_resize_only(BS_R_right_meorb)
+     if(associated(BS_R_left_magn) )   call BSS_resize_only(BS_R_left_magn)
+     if(associated(BS_R_right_magn))   call BSS_resize_only(BS_R_right_magn)
+     if(associated(BS_R_PL)        )   call BSS_resize_only(BS_R_PL)
      !
-     if(allocated(BS_V_left))   call BSS_resize(BS_V_left)
-     if(allocated(BS_V_right))  call BSS_resize(BS_V_right)
-     if(associated(BS_overlap)) call BSS_resize(BS_overlap,.true.)
+     if(allocated(BS_V_left))   call BSS_resize_and_save(BS_V_left)
+     if(allocated(BS_V_right))  call BSS_resize_and_save(BS_V_right)
+     if(associated(BS_overlap)) call BSS_resize_only(BS_overlap,.true.)
      !
-     if(allocated(BS_E_real))   call BSS_resize(BS_E_real)
+     if(allocated(BS_E_real))   call BSS_resize_and_save(BS_E_real)
      !
-   end subroutine K_resize_variables
+   end subroutine
    !
+#endif
    !
    subroutine K_local_init()
+     ! 
+     BSS_n_eig=BSS_n_eig_Input
+     !
      !
      K_is_not_hermitian=BS_K_coupling.or.(allocated(BSS_eh_W).and..not.BSS_perturbative_width)
      !
@@ -604,31 +645,30 @@ subroutine K_local_init()
        BSS_slepc_pseudo_herm = .false. 
        call warning('Preconditioning not supported in pseudo-hermitian case. Switching to non-hermitian diagonalization')
      endif
-
      !
 #if defined _SCALAPACK
-     if (l_ydiago) then
+     if (l_ldiago) then
        !
-       if (BSS_ydiago_solver/="s" .and. BSS_ydiago_solver/="e") then
-         call warning("Invalid ydiago solver. Switching to default")
+       if (BSS_ldiago_solver/="s" .and. BSS_ldiago_solver/="e") then
+         call warning("Invalid ldiago solver. Switching to default")
 #if defined _ELPA
-         BSS_ydiago_solver="e"
+         BSS_ldiago_solver="e"
 #else
-         BSS_ydiago_solver="s"
+         BSS_ldiago_solver="s"
 #endif
        endif
        !
 #if defined _GPU && defined _ELPA
-       if (BSS_ydiago_solver=="s") then
+       if (BSS_ldiago_solver=="s") then
          call warning("gpu ported code. Switching to Elpa solver")
-         BSS_ydiago_solver="e"
+         BSS_ldiago_solver="e"
        endif
 #endif
        !
 #if !defined _ELPA
-       if (BSS_ydiago_solver=="e") then
+       if (BSS_ldiago_solver=="e") then
          call warning("Elpa solver selected, but elpa not linked. Switching to scalapack")
-         BSS_ydiago_solver="s"
+         BSS_ldiago_solver="s"
        endif
 #endif
        !
@@ -637,39 +677,42 @@ subroutine K_local_init()
        ! Here I double the number of n_eig to get correct allocations
        if (l_neig_input .and. K_is_not_hermitian) BSS_n_eig=BSS_n_eig*2
        !
-       if (l_target_erange .and. BSS_ydiago_solver=="e") then
+       if (l_target_erange .and. BSS_ldiago_solver=="e") then
 #if defined _GPU && defined _ELPA
          call warning(' energy range not compatible with elpa. Switching it off on gpu')
          l_target_erange=.false.
 #else
          call warning(' energy range not compatible with elpa. Switching to scalapack on cpu')
-         BSS_ydiago_solver="s"
+         BSS_ldiago_solver="s"
 #endif
        endif
        !
-       if (BSS_first_eig>1 .and. BSS_ydiago_solver=="e") then
+       if (BSS_first_eig>1 .and. BSS_ldiago_solver=="e") then
 #if defined _GPU && defined _ELPA
          call warning(' eigenvalues range not compatible with elpa. Switching it off on gpu')
          BSS_first_eig=1
 #else
          call warning(' eigenvalues range not compatible with elpa. Switching to scalapack on cpu')
-         BSS_ydiago_solver="s"
+         BSS_ldiago_solver="s"
 #endif
        endif
        !
-       if (l_neig_input .and. BSS_ydiago_solver=="e" .and. K_is_not_hermitian) then
+       if (l_neig_input .and. BSS_ldiago_solver=="e" .and. K_is_not_hermitian) then
 #if defined _GPU && defined _ELPA
          call warning(' Few eigenvalues not compatible with non hermitian elpa. Switching it off on gpu')
          l_neig_input=.false.
 #else
          call warning(' Few eigenvalues not compatible with non hermitian elpa. Switching to scalapack on cpu')
-         BSS_ydiago_solver="s"
+         BSS_ldiago_solver="s"
 #endif
        endif
        !
      endif
 #endif
      !
+     ! LA for old diago solver
+     if (l_diago) call PARALLEL_assign_LA_COMMs("Response_T_space",DIAGO,CPU_structure(4)%nCPU_lin_algebra_DIAGO)
+     !
    end subroutine K_local_init
    !
    subroutine BSS_n_eig_init()
@@ -679,6 +722,20 @@ subroutine BSS_n_eig_init()
      !
      BSS_n_eig=BSS_n_eig_Input
      !
+     nullify(BS_overlap)
+     nullify(BS_E_SOC_corr)
+     nullify(BS_R_right_abs)
+     nullify(BS_R_left_abs)
+     nullify(BS_R_right_kerr)
+     nullify(BS_R_left_kerr)
+     nullify(BS_R_right_dich)
+     nullify(BS_R_right_mespin)
+     nullify(BS_R_right_meorb)
+     nullify(BS_R_right_magn)
+     nullify(BS_R_left_magn)
+     nullify(BS_R_PL)
+     nullify(tmp_res_ptr)
+     !
      if (l_slepc) then
        !
        if (BSS_n_eig<1) then
@@ -687,14 +744,9 @@ subroutine BSS_n_eig_init()
          call warning('N_eigen < 1. Calculating only 1% of the eigenvalues.')
        endif
        !
-       if (BSS_n_eig>BS_H_dim) then
-         call warning('N_eigen > BS_H_dim. Calculating 10% of the eigenvalues.')
-         BSS_n_eig = nint(BS_H_dim*0.1)
-       endif
-       !
-       if (BSS_n_eig>BS_K_dim(1) .and. BSS_slepc_pseudo_herm) then
-         call warning('N_eigen > BS_K_dim and pseudo-hermitian case. Calculating 10% of the eigenvalues.')
-         BSS_n_eig = nint(BS_K_dim(1)*0.1)
+       if (BSS_n_eig>BS_mat_dim) then
+         call warning('N_eigen > BS_mat_dim. Calculating 10% of the eigenvalues.')
+         BSS_n_eig = nint(BS_mat_dim*0.1)
        endif
        !
      endif
@@ -702,10 +754,10 @@ subroutine BSS_n_eig_init()
      if(l_diago) BSS_n_eig = BS_mat_dim
      !
 #if defined _SCALAPACK
-     ! set a Ydiago defaults.
-     if(l_ydiago) then
+     ! set a Ldiago defaults.
+     if(l_ldiago) then
        !
-       ! In case of ydiago, BSS_n_eig will be overwritten after the solver is called 
+       ! In case of ldiago, BSS_n_eig will be overwritten after the solver is called 
        if( BSS_n_eig==0 ) BSS_n_eig = BS_mat_dim
        !
        neigs_range  = 0
@@ -725,7 +777,7 @@ subroutine BSS_n_eig_init()
          eigvals_range= BSS_trange_E
        ! The following is a good idea but it is the source of many fails in the
        ! test-suite. Commenting it for now
-       !else if (BSS_ydiago_solver=="s") then
+       !else if (BSS_ldiago_solver=="s") then
        !  ! with scalapack use the energy range needed by default
        !  neigs_range  = 0
        !  eigvals_range= BSS_er
@@ -734,18 +786,32 @@ subroutine BSS_n_eig_init()
      endif
 #endif
      !
-     Input_n_eig=BSS_n_eig
+     BSS_n_eig_before_solver=BSS_n_eig
      !
      ! For backward compatibitly I set  
      neig_shift = 0
      neigs_this_cpu = BSS_n_eig
      !
-     ! These will be changed by the YDIAGO_driver routine
+     ! These will be changed by the LDIAGO_driver routine
      ! each cpu contains eigen-vectors from [neig_shift + 1, neigs_this_cpu + neig_shift]
      !
    end subroutine BSS_n_eig_init
    !
 #if defined _SLEPC && !defined _NL
+   !
+   subroutine K_slepc_local_init()
+     integer          :: domk(BS_nT_grps)
+     !
+     if (BSS_slepc_matrix_format=='shell') then
+       call PARALLEL_Haydock_VEC_COMMs('assign')
+       allocate(Slepc_v%Vi(BS_nT_grps))
+       allocate(Slepc_v%Vo(BS_nT_grps))
+       domk=1
+       call BS_HAYVEC_alloc(Slepc_v%Vi,domk)
+       call BS_HAYVEC_alloc(Slepc_v%Vo,domk)
+     endif
+     !
+   end subroutine K_slepc_local_init
    !
    subroutine K_slepc_local_free()
      !
@@ -757,7 +823,9 @@ subroutine K_slepc_local_free()
        call PARALLEL_Haydock_VEC_COMMs('reset')
      endif
      !
-     call SlepcFinalize(ierr)
+     ! Destroy the matrix
+     call MatDestroy(slepc_mat,ierr)
+     !
    end subroutine K_slepc_local_free
 #endif
    !
diff --git a/src/bse/K_diago_left_residuals.F b/src/bse/K_diago_left_residuals.F
index 629ae5dbcd..38825837f8 100644
--- a/src/bse/K_diago_left_residuals.F
+++ b/src/bse/K_diago_left_residuals.F
@@ -23,22 +23,23 @@ subroutine K_diago_L_res(mode, i_BS_mat, BS_E, &
  use parallel_int,   ONLY:PP_redux_wait,PARALLEL_index 
  use LIVE_t,         ONLY:live_timing
  use X_m,            ONLY:global_gauge
- use BS,             ONLY:BS_H_dim,BS_K_dim,BS_K_coupling
+ use BS,             ONLY:BS_H_dim,BS_K_dim,BS_K_coupling,&
+ &                        l_BS_res_from_E,BS_not_const_eh_f
  use BS_solvers,     ONLY:BSS_dipoles_opt,BSS_eh_E,BSS_eh_Z,BSS_eh_f
  use MAGNONS,        ONLY:BSS_dipoles_magn
- use DICHROISM,      ONLY:BSS_dipoles_dich
+ use DICHROISM,      ONLY:BSS_dipoles_dich,BSS_dipoles_mespin,BSS_dipoles_meorb
  !
  implicit none
  !
  character(*),intent(in)  :: mode
  integer,     intent(in)  :: i_BS_mat, neigs_this_cpu, neig_shift
  complex(SP), allocatable,         intent(in)  :: BS_E(:)
- complex(SP), pointer,             intent(out) :: BS_R_left(:)
+ complex(SP), pointer,             intent(inout) :: BS_R_left(:)
  complex(SP), target, allocatable, intent(in)  :: BS_V_left(:,:)
  complex(SP), pointer, optional,   intent(in)  :: BS_Overlap(:,:)
  !
  ! If BS_Overlap is null(), then it is treated as a identity matrix
- ! Incase Ydiago solver is used, BS_Overlap is always null()
+ ! Incase Ldiago solver is used, BS_Overlap is always null()
  !
  ! Workspace
  !
@@ -51,6 +52,9 @@ subroutine K_diago_L_res(mode, i_BS_mat, BS_E, &
  complex(SP),allocatable :: tmp_res(:)
  complex(SP),pointer     :: BS_R_tmp(:) => null()
  !
+ if (l_BS_res_from_E.and.BS_not_const_eh_f) &
+ & call error(" To be fixed, since sqrt of BSS_eh_f takes a conjg and it should not")
+ !
  ! Sanity checks
  if (.not. allocated(BS_E))      return 
  if (.not. allocated(BS_V_left)) return 
@@ -108,6 +112,10 @@ subroutine K_diago_L_res(mode, i_BS_mat, BS_E, &
  if(trim(mode)=="opt2") tmp_res=BSS_dipoles_opt(2,res_range(1):res_range(2))
  if(trim(mode)=="dic1") tmp_res=BSS_dipoles_dich(1,res_range(1):res_range(2)) 
  if(trim(mode)=="dic2") tmp_res=BSS_dipoles_dich(2,res_range(1):res_range(2)) 
+ if(trim(mode)=="mespin1") tmp_res=BSS_dipoles_mespin(1,res_range(1):res_range(2)) 
+ if(trim(mode)=="mespin2") tmp_res=BSS_dipoles_mespin(2,res_range(1):res_range(2)) 
+ if(trim(mode)=="meorb1") tmp_res=BSS_dipoles_meorb(1,res_range(1):res_range(2)) 
+ if(trim(mode)=="meorb2") tmp_res=BSS_dipoles_meorb(2,res_range(1):res_range(2)) 
  if(trim(mode)=="mag1") tmp_res=BSS_dipoles_magn(1,res_range(1):res_range(2))
  if(trim(mode)=="mag2") tmp_res=BSS_dipoles_magn(2,res_range(1):res_range(2))
  !
@@ -130,7 +138,7 @@ subroutine K_diago_L_res(mode, i_BS_mat, BS_E, &
  !
  call live_timing()
  !
- ! Incase of Ydiago solver Overlap is always identity.
+ ! Incase of Ldiago solver Overlap is always identity.
  !
  if (trim(scheme)=="hermitian")  return
  !
diff --git a/src/bse/K_diago_perturbative.F b/src/bse/K_diago_perturbative.F
index e2f524a991..77f3c9ae36 100644
--- a/src/bse/K_diago_perturbative.F
+++ b/src/bse/K_diago_perturbative.F
@@ -104,7 +104,7 @@ subroutine K_diago_perturbative(pert_dim, what, i_BS_mat, &
  !
  if (BS_K_coupling .and. associated(BS_Overlap)) then 
     !
-    ! NM : If you use Ydiago solver, you should never see this because, Ydiago outputs 
+    ! NM : If you use Ldiago solver, you should never see this because, Ldiago outputs 
     ! left and right eigenvectors with overlap = null() i.e identity.
     call error(" BS pertubative residuals not implemented in case of Overlap matrix")
  endif
diff --git a/src/bse/K_diago_response_functions.F b/src/bse/K_diago_response_functions.F
index e7fed70cf5..421ac07e27 100644
--- a/src/bse/K_diago_response_functions.F
+++ b/src/bse/K_diago_response_functions.F
@@ -3,10 +3,11 @@
 !
 ! Copyright (C) 2015 The Yambo Team
 !
-! Authors (see AUTHORS file for details): DS AM MG
+! Authors (see AUTHORS file for details): DS AM MG TG
 !
 subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left_abs,BS_R_right_abs,BS_E_SOC_corr,&
- &                    BS_R_left_magn,BS_R_right_magn,BS_R_left_kerr,BS_R_right_kerr,BS_R_right_dich,BS_R_PL)
+ &                    BS_R_left_magn,BS_R_right_magn,BS_R_left_kerr,BS_R_right_kerr,BS_R_right_dich,&
+ &                    BS_R_right_mespin,BS_R_right_meorb,BS_R_PL)
  !
  ! eps2(iw)= 1 - Sum  [BS_R_left(i)*BS_R_right(i)] / [w+i*eta - E(i)]  
  !
@@ -21,14 +22,15 @@ subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left_abs,BS_R_righ
  use LIVE_t,         ONLY:live_timing
  use BS,             ONLY:BS_perturbative_SOC,BS_res_ares_n_mat,l_BS_abs,l_BS_jdos,l_BS_esort,&
 &                         BS_K_anti_resonant,l_BS_ares_from_res,BS_K_coupling
- use BS_solvers,     ONLY:BSS_n_eig,Co_factor
+ use BS_solvers,     ONLY:BSS_n_eig,Co_factor,deg_exc_thrshld
  use R_lattice,      ONLY:bare_qpg,q0_def_norm
  use units,          ONLY:HA2EV
  use vec_operate,    ONLY:degeneration_finder,sort
  use BS_solvers,     ONLY:B_Hall,para_term_w0
- use X_m,            ONLY:Resp_ij,X_magnons,X_dichroism
+ use X_m,            ONLY:Resp_ij,X_magnons,X_dichroism,X_mespin,X_meorb
  use PHOTOLUM,       ONLY:PL,PL_prefactor
- use BS,             ONLY:l_BS_kerr,l_BS_kerr_asymm,l_BS_magnons,l_BS_photolum,l_BS_dichroism
+ use BS,             ONLY:l_BS_kerr,l_BS_kerr_asymm,l_BS_magnons,l_BS_photolum,&
+&                         l_BS_dichroism,l_BS_mespin,l_BS_meorb
  !
  implicit none
  !
@@ -38,7 +40,9 @@ subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left_abs,BS_R_righ
  complex(SP), pointer, intent(in) :: BS_R_left_abs(:),BS_R_right_abs(:)
  real(SP),    pointer, intent(in) :: BS_E_SOC_corr(:,:)
  complex(SP), pointer, intent(in) :: BS_R_left_magn(:,:),BS_R_right_magn(:,:)
- complex(SP), pointer, intent(in) :: BS_R_left_kerr(:),BS_R_right_kerr(:),BS_R_right_dich(:,:)
+ complex(SP), pointer, intent(in) :: BS_R_left_kerr(:),&
+                                   & BS_R_right_kerr(:),BS_R_right_dich(:,:),&
+                                   & BS_R_right_mespin(:,:),BS_R_right_meorb(:,:)
  real(SP),    pointer, intent(in) :: BS_R_PL(:,:)
  !
  ! Work space
@@ -49,7 +53,7 @@ subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left_abs,BS_R_righ
  real(SP), allocatable     :: vtmpR(:)
  complex(SP), allocatable  :: BS_E_sorted_tmp(:,:)
  integer, allocatable      :: sort_indx(:),first_exc(:),n_of_exc(:)
- complex(SP)               :: g_fac,res_kerr,res_dich,res_magn(2)
+ complex(SP)               :: g_fac,res_kerr,res_dich,res_mespin,res_meorb,res_magn(2),res_pl(2)
  logical                   :: l_ADD_the_ARES_using_the_RES
  !
  n_SOC=1
@@ -68,6 +72,8 @@ subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left_abs,BS_R_righ
    if(l_BS_kerr)      nVAR=nVAR+1
    if(l_BS_dichroism) nVAR=nVAR+1
    if(l_BS_magnons)   nVAR=nVAR+n_spinor
+   if(l_BS_mespin)    nVAR=nVAR+1
+   if(l_BS_meorb)     nVAR=nVAR+1
    allocate(BS_E_sorted_tmp(EDIM,nVAR))
    BS_E_sorted_tmp=cZERO
  endif
@@ -98,6 +104,12 @@ subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left_abs,BS_R_righ
      res_dich=(BS_R_left_abs(i1)*BS_R_right_dich(i1,1)+BS_R_left_kerr(i1)*BS_R_right_dich(i1,2))
      res_dich=res_dich*ares_fac/q0_def_norm/2._SP
    endif
+   if(l_BS_mespin) then
+     res_mespin=BS_R_left_abs(i1)*BS_R_right_mespin(i1,1)*ares_fac/q0_def_norm
+   endif
+   if(l_BS_meorb) then
+     res_meorb=BS_R_left_abs(i1)*BS_R_right_meorb(i1,1)*ares_fac/q0_def_norm
+   endif
    if(l_BS_magnons) then
      res_magn(1)=BS_R_left_magn(i1,1)*BS_R_right_magn(i1,1)*ares_fac
      res_magn(2)=BS_R_left_magn(i1,2)*BS_R_right_magn(i1,2)*ares_fac
@@ -158,6 +170,28 @@ subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left_abs,BS_R_righ
        endif
      endif
      !
+     if(l_BS_mespin) then
+       if(l_BS_esort.and.i_BS_mat==1) then
+         BS_E_sorted_tmp(i1,nVAR+2:nVAR+2)=res_mespin
+         nVAR=nVAR+1
+       endif
+       X_mespin(:,2)=X_mespin(:,2)-res_mespin/(W%p(:)-EXC_E(i_pert_SOC))
+       if (l_ADD_the_ARES_using_the_RES) then
+         X_mespin(:,2)=X_mespin(:,2)+conjg(res_mespin)/(W%p(:)+conjg(EXC_E(i_pert_SOC)))
+       endif
+     endif
+     !
+     if(l_BS_meorb) then
+       if(l_BS_esort.and.i_BS_mat==1) then
+         BS_E_sorted_tmp(i1,nVAR+2:nVAR+2)=res_mespin
+         nVAR=nVAR+1
+       endif
+       X_meorb(:,2)=X_meorb(:,2)-res_meorb/(W%p(:)-EXC_E(i_pert_SOC))
+       if (l_ADD_the_ARES_using_the_RES) then
+         X_meorb(:,2)=X_meorb(:,2)+conjg(res_meorb)/(W%p(:)+conjg(EXC_E(i_pert_SOC)))
+       endif
+     endif
+     !
      if(l_BS_magnons) then
        if(l_BS_esort.and.i_BS_mat==1) then
          BS_E_sorted_tmp(i1,nVAR+1:nVAR+n_spinor)=res_magn(:n_spinor)
@@ -172,7 +206,7 @@ subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left_abs,BS_R_righ
      endif
      !
      if(l_BS_photolum) then
-       PL(:,2)=PL(:,2)+PL_prefactor*ares_fac*BS_R_PL(1,i1)*aimag(-1._SP/(W%p(:)-EXC_E(i_pert_SOC)))/pi
+       PL(:,2)=PL(:,2)+PL_prefactor*BS_R_PL(1,i1)*aimag(-1._SP/(W%p(:)-EXC_E(i_pert_SOC)))/pi
        if (l_ADD_the_ARES_using_the_RES) then
          PL(:,2)=PL(:,2)+PL_prefactor*BS_R_PL(2,i1)*aimag(-1._SP/(W%p(:)+conjg(EXC_E(i_pert_SOC))))/pi
        endif
@@ -200,7 +234,8 @@ subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left_abs,BS_R_righ
    allocate(first_exc(EDIM)) 
    allocate(n_of_exc(EDIM))
    ! Threshold is 1meV
-   call degeneration_finder(vtmpR(i1:i1-1+EDIM),EDIM,first_exc,n_of_exc,n_deg_grp,0.001_SP/HA2EV,Include_single_values=.TRUE.)
+   call degeneration_finder(EDIM,first_exc,n_of_exc,n_deg_grp,&
+   &      Er=vtmpR(i1:i1-1+EDIM),deg_accuracy=deg_exc_thrshld,Include_single_values=.TRUE.)
    EDIM=min(BSS_n_eig-i1+1,1000)
    do i_VAR=1,nVAR
      BS_E_sorted(:,i_VAR,2)=cZERO
@@ -241,6 +276,14 @@ subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left_abs,BS_R_righ
    call PP_redux_wait(X_dichroism(:,2))
  endif
  !
+ if(l_BS_mespin) then
+   call PP_redux_wait(X_mespin(:,2))
+ endif
+ !
+ if(l_BS_meorb) then
+   call PP_redux_wait(X_meorb(:,2))
+ endif
+ !
  if(l_BS_magnons) then
    call PP_redux_wait(X_magnons(:,:,2))
  endif
@@ -264,6 +307,10 @@ subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left_abs,BS_R_righ
  ! Factors to be fixed
  if (l_BS_dichroism) X_dichroism(:,2)=X_dichroism(:,2)*(cI/SPEED_OF_LIGHT)*Co_factor/(4._SP*pi)
  !
+ if (l_BS_mespin) X_mespin(:,2)=X_mespin(:,2)*(-1._SP*Co_factor/(8._SP*pi))
+ !
+ if (l_BS_meorb) X_meorb(:,2)=X_meorb(:,2)*(-1._SP*Co_factor/(8._SP*pi))
+ !
  if (l_BS_magnons) X_magnons(:,:,2)=X_magnons(:,:,2)*Co_factor/(4._SP*pi)
  !
 end subroutine K_diago_response_functions
diff --git a/src/bse/K_diago_right_residuals.F b/src/bse/K_diago_right_residuals.F
index 133c46c44b..eb19ca6bc1 100644
--- a/src/bse/K_diago_right_residuals.F
+++ b/src/bse/K_diago_right_residuals.F
@@ -16,41 +16,53 @@ subroutine K_diago_R_res(mode, i_BS_mat, BS_E, &
  !                 =conjg(BSS_dipoles(j))*BS_V_right(j,i)
  !
  use pars,           ONLY:SP,cZERO
+ use stderr,         ONLY:intc
  use wrapper_omp,    ONLY:V_dot_V_omp
  use parallel_m,     ONLY:PP_indexes,myid,PP_indexes_reset,ncpu
  use parallel_int,   ONLY:PP_redux_wait,PARALLEL_index 
  use LIVE_t,         ONLY:live_timing
  use X_m,            ONLY:global_gauge
  use BS_solvers,     ONLY:BSS_eh_E,BSS_eh_Z,BSS_eh_f,BSS_dipoles_opt
- use BS,             ONLY:BS_K_dim,BS_H_dim,BS_K_coupling
+ use BS,             ONLY:BS_H_dim,BS_K_dim,BS_K_coupling,&
+ &                        l_BS_res_from_E,BS_not_const_eh_f
  use MAGNONS,        ONLY:BSS_dipoles_magn
- use DICHROISM,      ONLY:BSS_dipoles_dich
+ use DICHROISM,      ONLY:BSS_dipoles_dich,BSS_dipoles_mespin,BSS_dipoles_meorb
  !
  implicit none
  !
  character(*),intent(in)  :: mode
  integer,     intent(in)  :: i_BS_mat, neigs_this_cpu, neig_shift
  complex(SP), allocatable,         intent(in)  :: BS_E(:)
- complex(SP), pointer,             intent(out) :: BS_R_right(:)
+ complex(SP), pointer,             intent(inout) :: BS_R_right(:)
  complex(SP), target, allocatable, intent(in)  :: BS_V_right(:,:)
  !
  ! Workspace
  !
  type(PP_indexes)     :: px
  integer                  :: i1, i_lambda, neig_total, SL_H_dim, res_range(2), &
- &                           n_steps, step_shift, res_shift, i_res
+ &                           n_steps, step_shift, res_shift, i_res, n_size
  logical                  :: velocity_correction,l_duplicate_mem_par
  !
  complex(SP), allocatable :: tmp_res(:), BS_R_tmp(:) 
  !
+ if (l_BS_res_from_E.and.BS_not_const_eh_f) &
+ & call error(" To be fixed, since sqrt of BSS_eh_f takes a conjg and it should not")
+ !
  ! Sanity checks
  if (.not. allocated(BS_E)) return 
  if (.not. associated(BS_R_right) .or. .not. allocated(BS_V_right)) return 
  !
  neig_total = size(BS_E)
  !
- if (size(BS_R_right)       /= neig_total)     call error("Size mismatch between eigvals and R.residuals.")
- if (size(BS_V_right,dim=2) /= neigs_this_cpu) call error("Size mismatch between eigvecs and R.residuals.")
+ n_size=size(BS_R_right)
+ if (n_size /= neig_total) &
+ &  call error("Different size for energ. ("//trim(intc(neig_total))//")"// &
+ &             " and Rres. ("//trim(intc(n_size))//"), mode= "//trim(mode))
+ !
+ n_size=size(BS_V_right,dim=2)
+ if (n_size /= neigs_this_cpu) &
+ &  call error("Different size for eigen. ("//trim(intc(neig_total))//")"// &
+ &             " and Rres. ("//trim(intc(n_size))//"), mode= "//trim(mode))
  !
  velocity_correction=(index(mode,"opt")>0).and.(trim(global_gauge)=="velocity")
  !
@@ -91,6 +103,10 @@ subroutine K_diago_R_res(mode, i_BS_mat, BS_E, &
  if(trim(mode)=="dic2") tmp_res=BSS_dipoles_dich(2,res_range(1):res_range(2)) 
  if(trim(mode)=="mag1") tmp_res=BSS_dipoles_magn(1,res_range(1):res_range(2))
  if(trim(mode)=="mag2") tmp_res=BSS_dipoles_magn(2,res_range(1):res_range(2))
+ if(trim(mode)=="mespin1") tmp_res=BSS_dipoles_mespin(1,res_range(1):res_range(2)) 
+ if(trim(mode)=="mespin2") tmp_res=BSS_dipoles_mespin(2,res_range(1):res_range(2)) 
+ if(trim(mode)=="meorb1") tmp_res=BSS_dipoles_meorb(1,res_range(1):res_range(2)) 
+ if(trim(mode)=="meorb2") tmp_res=BSS_dipoles_meorb(2,res_range(1):res_range(2)) 
  !
  tmp_res = conjg(tmp_res)*sqrt(cmplx(BSS_eh_f(res_range(1):res_range(2)),kind=SP)) 
  if (allocated(BSS_eh_Z)) tmp_res=tmp_res*sqrt(BSS_eh_Z(res_range(1):res_range(2)))
diff --git a/src/bse/K_diagonal.F b/src/bse/K_diagonal.F
index 6ba26e2b30..8b7410a7c8 100644
--- a/src/bse/K_diagonal.F
+++ b/src/bse/K_diagonal.F
@@ -5,16 +5,21 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine K_diagonal(iq,what)
  !
  use pars,           ONLY:SP,cZERO,cI
  use parser_m,       ONLY:parser
  use BS_solvers,     ONLY:BS_mat,BSS_eh_f,BSS_eh_E,K_diago_in_BS_mat,BSS_eh_W,BS_diagonal,K_slk
- use BS,             ONLY:BS_K_dim,BSE_mode,BS_H_dim
+ use BS,             ONLY:BS_K_dim,BSE_mode,BS_H_dim,BS_n_eh_spaces
  use SLK_m,          ONLY:SLK_POOL
  use parallel_int,   ONLY:PP_redux_wait
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer,       intent(in) :: iq
  character(*)              :: what
@@ -113,17 +118,22 @@ subroutine K_diagonal(iq,what)
    !
    if (.not.K_diago_in_BS_mat) then
      !
-     YAMBO_ALLOC(BS_diagonal,(BS_K_dim(1)))
-     BS_diagonal=cZERO
+     if (BS_n_eh_spaces==1) then
+       YAMBO_ALLOC(BS_diagonal,(BS_K_dim(1)))
+       BS_diagonal=cZERO
+     else
+       YAMBO_ALLOC(BS_diagonal,(BS_H_dim))
+       BS_diagonal=cZERO
+     endif
      !
      if (rows(1)<=rows(2)) then
        do i_r=rows(1),rows(2)
          !
          if (K_slk%kind=="SLK") then
-           if (.not.cpl(i_r).and.SLK_POOL%ID==0) BS_diagonal(i_r)=K_slk%blc(i_r,i_r,1)
+           if ( ((.not.cpl(i_r)).or.BS_n_eh_spaces==1).and.SLK_POOL%ID==0) BS_diagonal(i_r)=K_slk%blc(i_r,i_r,1)             
            K_slk%blc(i_r,i_r,1)=cZERO
          else
-           if (.not.cpl(i_r)) BS_diagonal(i_r)=BS_mat(i_r,i_r)
+           if ( ((.not.cpl(i_r)).or.BS_n_eh_spaces==1).and.SLK_POOL%ID==0) BS_diagonal(i_r)=BS_mat(i_r,i_r)
            BS_mat(i_r,i_r)=cZERO
          endif
        enddo
diff --git a/src/bse/K_dipoles.F b/src/bse/K_dipoles.F
index 9680d24773..473721e1f4 100644
--- a/src/bse/K_dipoles.F
+++ b/src/bse/K_dipoles.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): DS AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine K_dipoles(iq,Ken,Xk,q,X_oscillators,Dip)
  !
  use pars,          ONLY:SP,IP,pi,cZERO,cI
@@ -18,22 +22,24 @@ subroutine K_dipoles(iq,Ken,Xk,q,X_oscillators,Dip)
  use ALLOC,         ONLY:DIPOLE_ALLOC_global
  use stderr,        ONLY:STRING_match
  use interfaces,    ONLY:WF_load,WF_free
- use electrons,     ONLY:levels,spin
+ use electrons,     ONLY:levels,spin,i_spin_majority,i_spin_minority
  use X_m,           ONLY:X_t,global_gauge,i_G_shift
  use DIPOLES,       ONLY:DIPOLE_t
  use BS_solvers,    ONLY:BSS_Vnl_included,BSS_Q_dir,BSS_E_dir,BSS_P_dir
  use BS,            ONLY:BS_T_grp,BS_K_coupling,BS_nT_grps,BSE_L_kind,BS_bands,&
 &                        BS_dip_size,l_BS_trace,BS_n_eh_spaces,&
 &                        l_BS_abs,l_BS_kerr,l_BS_magnons,l_BS_dichroism,l_BS_photolum,&
-&                        BSE_dipole_geometry,BS_field_direction,l_BS_ares_from_res
+&                        BSE_dipole_geometry,BS_field_direction,l_BS_ares_from_res,&
+&                        l_BS_mespin,l_BS_meorb
  use parallel_int,  ONLY:PP_redux_wait
  use parallel_m,    ONLY:PAR_BS_nT_col_grps,PAR_COM_eh_INDEX,PAR_IND_T_groups,&
-&                        PAR_COM_Xk_ibz_INDEX
+&                        PAR_COM_Xk_ibz_INDEX,PARALLEL_default_mode
  use collision_el,  ONLY:elemental_collision,elemental_collision_free,elemental_collision_alloc
  use timing_m,      ONLY:timing
  use vec_operate,   ONLY:v_rotate,cross_product
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer       ::iq
  type(levels)  ::Ken
@@ -151,8 +157,9 @@ subroutine K_dipoles(iq,Ken,Xk,q,X_oscillators,Dip)
      ! *length*   : DIP(c,v,k) is q_ver*<v|r|c> while I need q_ver*<c|r|v> = conjg(q_ver*<v|r|c>)
      ! *velocity* : DIP(c,v,k) is q_ver*<v|p|c> while I need q_ver*<c|r|v> = conjg(q_ver*<v|r|c>)
      !
-     ! Absorption, KERR, PL, Dichroism
-     if(l_BS_abs.or.l_BS_kerr.or.l_BS_photolum.or.l_BS_dichroism) then
+     ! Absorption, KERR, PL, Dichroism, Magnetoelectric
+     if(l_BS_abs.or.l_BS_kerr.or.l_BS_photolum.or.&
+&       l_BS_dichroism.or.l_BS_meorb.or.l_BS_mespin) then
        !
        if(iq==1.and.i_G_shift==1) then
          call DIPOLE_rotate(ic,iv,ik_bz,i_sp_pol_c,trim(DIP_kind),Xk,DIP_bare)
@@ -169,30 +176,34 @@ subroutine K_dipoles(iq,Ken,Xk,q,X_oscillators,Dip)
        endif
        !
        BS_T_grp(i_T_g)%dipoles_opt(:BS_dip_size,i_T,1)=conjg(DIP_projected(:BS_dip_size))
-       if (BS_K_coupling.and.l_BS_ares_from_res) then
+       if (BS_K_coupling.and.l_BS_ares_from_res.and.iq==1) then
+         ! In the coulping case at q=1 when there are 2 blocks per group
+         ! See also src/bse/K_blocks.F and K_components_folded_in_serial_arrays.F
+         ! Possibly to be extended also to i_G_shift==1
          BS_T_grp(i_T_g)%dipoles_opt(:BS_dip_size,i_T,2)=DIP_projected(:BS_dip_size)
        endif
        !
      endif
      !
-     ! Dichroism
-     if (l_BS_dichroism) then
+     ! Dichroism or orbital magnetoelectric part
+     if (l_BS_dichroism.or.l_BS_meorb) then
        if(iq==1.and.i_G_shift==1) then  
          call DIPOLE_rotate(ic,iv,ik_bz,i_sp_pol_c,"DIP_L",Xk,DIP_expanded)
          do i_dip=1,BS_dip_size
            DIP_projected(i_dip)=dot_product(BS_field_direction(:,i_dip),DIP_expanded)
          enddo
        else
-         call error(' finite q not implemented for dichroism')
+         if (l_BS_dichroism) call error(' finite q not implemented for dichroism effect')
+         if (l_BS_meorb)     call error(' finite q not implemented for magnetoelectric effect')
        endif
-       BS_T_grp(i_T_g)%dipoles_dic(:BS_dip_size,i_T,1)=conjg(DIP_projected(:BS_dip_size))
+       BS_T_grp(i_T_g)%dipoles_orb(:BS_dip_size,i_T,1)=conjg(DIP_projected(:BS_dip_size))
        if (BS_K_coupling.and.l_BS_ares_from_res) then
-         BS_T_grp(i_T_g)%dipoles_dic(:BS_dip_size,i_T,2)=DIP_projected(:BS_dip_size)
+         BS_T_grp(i_T_g)%dipoles_orb(:BS_dip_size,i_T,2)=DIP_projected(:BS_dip_size)
        endif
      endif
      !
-     ! Magnons
-     if (l_BS_magnons) then
+     ! Magnons and Magnetoelectric spin part
+     if (l_BS_magnons.or.l_BS_mespin) then
        !
        ! notice:
        ! In optics q=0 requires a special treatment due to the non analytic behaviour
@@ -203,6 +214,7 @@ subroutine K_dipoles(iq,Ken,Xk,q,X_oscillators,Dip)
        if(iq==1.and.i_G_shift==1) then
          call DIPOLE_rotate(ic,iv,ik_bz,i_sp_pol_c,"DIP_S",Xk,DIP_expanded)
        else
+         if (l_BS_mespin) call error(' finite q not implemented for magnetoelectric effect')
          call scatter_Bamp_spin(BSE_scatt,'x')
          DIP_expanded(1)=-conjg(BSE_scatt%rhotw(1))
          call scatter_Bamp_spin(BSE_scatt,'y')
@@ -212,20 +224,37 @@ subroutine K_dipoles(iq,Ken,Xk,q,X_oscillators,Dip)
          DIP_expanded(3)=-conjg(BSE_scatt%rhotw(1))
        endif
        !
-       ! DIP_Smins(c,v,k) is  <v|S-|c>(i_sp_pol_c), to get <c|S-|v>(i_sp_pol_v) = conjg(<v|S+|c>(i_sp_pol_c))
-       ! DIP_Splus(c,v,k) is  <v|S+|c>(i_sp_pol_c), to get <c|S+|v>(i_sp_pol_v) = conjg(<v|S-|c>(i_sp_pol_c))
+       ! Magnons
+       if (l_BS_magnons) then
+         !
+         ! DIP_Smins(c,v,k) is  <v|S-|c>(i_sp_pol_c), to get <c|S-|v>(i_sp_pol_v) = conjg(<v|S+|c>(i_sp_pol_c))
+         ! DIP_Splus(c,v,k) is  <v|S+|c>(i_sp_pol_c), to get <c|S+|v>(i_sp_pol_v) = conjg(<v|S-|c>(i_sp_pol_c))
+         !
+         DIP_Splus = 0.5_SP*(DIP_expanded(1)+cI*DIP_expanded(2))  ! S+ for c\dn> transitions: <v\up|c\dn>
+         DIP_Smins = 0.5_SP*(DIP_expanded(1)-cI*DIP_expanded(2))  ! S- for c\up> transitions: <v\dn|c\up>
+         !
+         BS_T_grp(i_T_g)%dipoles_mag(i_spin_majority,i_T,1)=conjg(DIP_Splus)   ! S- for |c\dn> trans: <c\dn|S-|v\up>(i_sp_pol_v) = conjg(<v\up|S+|c\dn>(i_sp_pol_c))
+         BS_T_grp(i_T_g)%dipoles_mag(i_spin_minority,i_T,1)=conjg(DIP_Smins)   ! S+ for |c\up> trans: <c\dn|S+|v\up>(i_sp_pol_v) = conjg(<v\dn|S-|c\up>(i_sp_pol_c))
+                                                                 !      or the right R residual associated to (eps_c\dn-eps_v\up)
+                                                                 ! S- for |v\dn> trans: <v\dn|S-|c\up>(i_sp_pol_c) = conjg(<c\up|S+|v\dn>(i_sp_pol_v))
+                                                                 ! S+ for |v\up> trans: <v\up|S+|c\dn>(i_sp_pol_c) = conjg(<c\dn|S-|v\up>(i_sp_pol_v))
+         if (BS_K_coupling.and.l_BS_ares_from_res) then          ! This is the right A residual associated to (eps_v\up-eps_c\dn)
+           BS_T_grp(i_T_g)%dipoles_mag(i_spin_majority,i_T,2)=DIP_Smins        ! S- for |v\dn> trans: <v\dn|S-|c\up>(i_sp_pol_c)
+           BS_T_grp(i_T_g)%dipoles_mag(i_spin_minority,i_T,2)=DIP_Splus        ! S+ for |v\up> trans: <v\up|S+|c\dn>(i_sp_pol_c)
+         endif
+         !
+       endif
        !
-       DIP_Splus = 0.5_SP*(DIP_expanded(1)+cI*DIP_expanded(2))  ! S+ for c\dn> transitions: <v\up|c\dn>
-       DIP_Smins = 0.5_SP*(DIP_expanded(1)-cI*DIP_expanded(2))  ! S- for c\up> transitions: <v\dn|c\up>
        !
-       BS_T_grp(i_T_g)%dipoles_mag(1,i_T,1)=conjg(DIP_Splus)   ! S- for |c\dn> trans: <c\dn|S-|v\up>(i_sp_pol_v) = conjg(<v\up|S+|c\dn>(i_sp_pol_c))
-       BS_T_grp(i_T_g)%dipoles_mag(2,i_T,1)=conjg(DIP_Smins)   ! S+ for |c\up> trans: <c\dn|S+|v\up>(i_sp_pol_v) = conjg(<v\dn|S-|c\up>(i_sp_pol_c))
-                                                               !      or the right R residual associated to (eps_c\dn-eps_v\up)
-                                                               ! S- for |v\dn> trans: <v\dn|S-|c\up>(i_sp_pol_c) = conjg(<c\up|S+|v\dn>(i_sp_pol_v))
-                                                               ! S+ for |v\up> trans: <v\up|S+|c\dn>(i_sp_pol_c) = conjg(<c\dn|S-|v\up>(i_sp_pol_v))
-       if (BS_K_coupling.and.l_BS_ares_from_res) then          ! This is the right A residual associated to (eps_v\up-eps_c\dn)
-         BS_T_grp(i_T_g)%dipoles_mag(1,i_T,2)=DIP_Smins        ! S- for |v\dn> trans: <v\dn|S-|c\up>(i_sp_pol_c)
-         BS_T_grp(i_T_g)%dipoles_mag(2,i_T,2)=DIP_Splus        ! S+ for |v\up> trans: <v\up|S+|c\dn>(i_sp_pol_c)
+       ! Magnetoelectric spin part
+       if (l_BS_mespin) then
+         do i_dip=1,BS_dip_size
+           DIP_projected(i_dip)=dot_product(BS_field_direction(:,i_dip),DIP_expanded)
+         enddo
+         BS_T_grp(i_T_g)%dipoles_spin(:BS_dip_size,i_T,1)=conjg(DIP_projected(:BS_dip_size))
+         if (BS_K_coupling.and.l_BS_ares_from_res) then
+           BS_T_grp(i_T_g)%dipoles_spin(:BS_dip_size,i_T,2)=DIP_projected(:BS_dip_size)
+         endif
        endif
        !
      endif
@@ -255,8 +284,10 @@ subroutine K_dipoles(iq,Ken,Xk,q,X_oscillators,Dip)
    !
    call DIPOLE_ALLOC_global( )
    !
-   ! Here I need the distribution over k
-   call PARALLEL_SETUP_K_scheme("Kdef")
+   ! The call to DIPOLE_IO might have changed the PAR_K_scheme.
+   ! Here I reset it to the previous value
+   if(     trim(PARALLEL_default_mode)=="KQmemory") call PARALLEL_SETUP_K_scheme("K")
+   if(.not.trim(PARALLEL_default_mode)=="KQmemory") call PARALLEL_SETUP_K_scheme("Kdef")
    !
  endif
  !
diff --git a/src/bse/K_driver.F b/src/bse/K_driver.F
index 02d897c18a..ecc893f33a 100644
--- a/src/bse/K_driver.F
+++ b/src/bse/K_driver.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM IMA
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine K_driver(Xen,Ken,Xk,q,X_static_pp,Xw,Dip)
  !
  use pars,           ONLY:IP
@@ -19,7 +23,8 @@ subroutine K_driver(Xen,Ken,Xk,q,X_static_pp,Xw,Dip)
  use BS_solvers,     ONLY:BSS_n_freqs,BSS_er,BSS_dr,BSS_mode,BSS_damp_reference,BSS_q0,&
 &                         BSS_Q_dir,BSS_E_dir
  use BS,             ONLY:BS_bands,BS_q,BS_n_g_exch,BS_res_K_corr,BS_n_g_W,BS_cpl_K_corr,&
-&                         BS_Blocks_and_Transitions_free,BSqpts,O_ng,BS_iq_now
+&                         BS_Blocks_and_Transitions_free,BSqpts,O_ng,BSE_L_kind,BS_iq_now,&
+&                         BS_kpt_bz,BS_kpt_ibz
  use TDDFT,          ONLY:FXC_per_memstps,FXC_n_mem_freqs,FXC_is_retarded
  use drivers,        ONLY:l_rpa_IP,l_bs_fxc,l_bss
  use parallel_m,     ONLY:PAR_IND_WF_b_and_k,PAR_K_scheme,PARALLEL_default_mode
@@ -27,8 +32,9 @@ subroutine K_driver(Xen,Ken,Xk,q,X_static_pp,Xw,Dip)
  use interfaces,     ONLY:eval_G_minus_G
  use vec_operate,    ONLY:v_norm
  use R_lattice,      ONLY:bz_samp,qindx_B,qindx_X,qindx_B_max,qindx_free,G_m_G_maxval,q0_def_norm
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)  ::Xen,Ken
  type(bz_samp) ::Xk,q
@@ -45,7 +51,7 @@ subroutine K_driver(Xen,Ken,Xk,q,X_static_pp,Xw,Dip)
  ! I/O
  !
  integer, allocatable :: qindx_B_max_CPU(:)
- integer              :: io_err,ID
+ integer              :: io_err,ID,ik
  integer, external    :: io_QINDX
  !
  call section('*','Bethe Salpeter Equation driver')
@@ -59,6 +65,7 @@ subroutine K_driver(Xen,Ken,Xk,q,X_static_pp,Xw,Dip)
  !=========================
  call K_driver_init("init",1,Ken,Xk)
  !
+ call msg('s','[BSE] Response kind is: ',trim(BSE_L_kind))
  ! q0 renormalization and field direction
  !
  BSS_E_dir(:) = BSS_E_dir(:)/v_norm(BSS_E_dir)
@@ -105,7 +112,7 @@ subroutine K_driver(Xen,Ken,Xk,q,X_static_pp,Xw,Dip)
    !
    ! here qindx_B is allocated
    call io_control(ACTION=OP_RD_CL,COM=REP,SEC=(/5/),ID=ID)
-   io_err=io_QINDX(Xk,q,ID)
+   io_err=io_QINDX(Xk,q,ID,'minus_q')
    !
    YAMBO_ALLOC(qindx_B_max_CPU,(PAR_K_scheme%COM_ibz_INDEX%n_CPU))
    qindx_B_max_CPU=0
@@ -116,6 +123,17 @@ subroutine K_driver(Xen,Ken,Xk,q,X_static_pp,Xw,Dip)
    !
  endif
  !
+ YAMBO_ALLOC(BS_kpt_ibz,(3,Xk%nibz))
+ do ik=1,Xk%nibz
+   BS_kpt_ibz(:,ik)=Xk%pt(ik,:)
+ enddo
+ call k_ibz2bz(Xk,'i',.false.)
+ YAMBO_ALLOC(BS_kpt_bz,(3,Xk%nbz))
+ do ik=1,Xk%nbz
+   BS_kpt_bz(:,ik)=Xk%ptbz(ik,:)
+ enddo
+ call k_ibz2bz(Xk,'d',.false.)
+ !
  ! Tranferred momentum
  !======================
  YAMBO_ALLOC(BSqpts,(3,BS_q(1):BS_q(2)))
@@ -180,7 +198,7 @@ subroutine K_driver(Xen,Ken,Xk,q,X_static_pp,Xw,Dip)
    call K_Transitions_setup(iq,Ken,Xk,Dip,VB,CB)
    !
    !... Blocks build-up
-   if (.not.l_rpa_IP) call K_blocks()
+   if (.not.l_rpa_IP) call K_blocks(iq)
    !
    ! Wave Functions distribution 
    !=============================
@@ -239,6 +257,8 @@ subroutine K_driver(Xen,Ken,Xk,q,X_static_pp,Xw,Dip)
  call PARALLEL_global_indexes(Ken,Xk,q,"Response_T_space",X=X_oscillators,RESET=.TRUE.)
  !
  YAMBO_FREE(BSqpts)
+ YAMBO_FREE(BS_kpt_bz)
+ YAMBO_FREE(BS_kpt_ibz)
  call qindx_free('B')
  !
 end subroutine
diff --git a/src/bse/K_driver_init.F b/src/bse/K_driver_init.F
index 0ace8836ab..1f482cd88d 100644
--- a/src/bse/K_driver_init.F
+++ b/src/bse/K_driver_init.F
@@ -3,7 +3,13 @@
 !
 ! Copyright (C) 2021 The Yambo Team
 !
-! Authors (see AUTHORS file for details): AM
+! Authors (see AUTHORS file for details): AM DS
+!
+! headers
+!
+#if defined _SLEPC && !defined _NL
+#include <petsc/finclude/petscsys.h>
+#endif
 !
 subroutine K_driver_init(what,iq,Ken,Xk)
  !
@@ -13,20 +19,24 @@ subroutine K_driver_init(what,iq,Ken,Xk)
  use parser_m,     ONLY:parser
  use parallel_m,   ONLY:PARALLEL_default_mode
  use stderr,       ONLY:STRING_match,STRING_split
- use BS_solvers,   ONLY:BSS_mode,BSS_slepc_matrix_format,BSS_uses_DbGd,BSS_uses_FKE,BSS_ydiago_solver,&
+ use BS_solvers,   ONLY:BSS_mode,BSS_slepc_matrix_format,BSS_uses_DbGd,BSS_uses_FKE,BSS_ldiago_solver,&
 &                       BSS_slepc_pseudo_herm,BSS_slepc_double_grp,run_Slepc,BSS_mode,BSS_first_eig,&
 &                       l_abs_prop_chi_bse,l_eels_can_be_computed,l_eels_from_inversion,&
 &                       BSS_kernel_IO_on_the_fly,BSS_kernel_stored_size,BSS_slepc_approach
  use BS,           ONLY:BSE_L_kind,BSE_mode,BS_K_is_ALDA,BS_dip_size,l_BSE_minimize_memory,BS_perturbative_SOC,&
-&                       BS_perturbative_SOC,l_BS_magnons,l_BS_photolum,BSK_IO_mode,&
+&                       BS_perturbative_SOC,l_BS_magnons,l_BS_photolum,BSK_IO_mode,l_BSE_kernel_full,&
 &                       BS_cpl_K_exchange,BS_n_g_exch,BS_res_K_exchange,BS_K_coupling,BS_res_ares_n_mat,&
-&                       BS_n_eh_spaces,l_BS_ares_from_res,BS_bands,BS_K_anti_resonant
+&                       BS_cpl_K_corr,BS_res_K_corr,&
+&                       BS_n_eh_spaces,l_BS_ares_from_res,BS_bands,BS_K_anti_resonant,l_BS_res_from_E
  use TDDFT,        ONLY:FXC_is_retarded,FXC_n_g_corr,l_Fxc_from_Vxc
  use drivers,      ONLY:l_col_cut,l_rpa_IP,l_alda_fxc,l_bs_fxc
  use D_lattice,    ONLY:i_time_rev,i_space_inv,l_3D
 #if defined _RT
  use drivers,      ONLY:l_rt_carriers_in_use
- use RT_control,   ONLY:NEQ_Kernel,EQ_Transitions,EQ_NoOcc,NEQ_Residuals,RT_BSE_Occ_Mode
+ use RT_control,   ONLY:NEQ_Kernel,EQ_Transitions,EQ_NoOcc,ALL_NoOcc,NEQ_Residuals,RT_BSE_Occ_Mode
+#endif
+#if defined _CUDA
+ use TDDFT,        ONLY:FXC_mode
 #endif
 #if defined _CUDA
  use TDDFT,        ONLY:FXC_mode
@@ -139,20 +149,32 @@ subroutine K_driver_init(what,iq,Ken,Xk)
 #if defined _RT 
  if (l_rt_carriers_in_use) BS_K_anti_resonant=.TRUE.
 #endif
+
+ ! In the resonat only case l_BSE_ares_from_res=.true.
+ ! This is needed to switch off all the lines related to the excplicit calculation of the ARES part
+ ! (see io_BS_PAR_init for example)
+ !
+ ! Developer options. Without the code works in default mode
+ ! With the user can switch to experimental coding
+ call parser('SelectResFromE',l_BS_res_from_E)
+ if (l_BS_photolum) l_BS_res_from_E=.TRUE.
+ call parser('SelectResFromF',l_flag)
+ if (l_flag)        l_BS_res_from_E=.FALSE.
  !
  ! Is ARES derivable from RES?
  !-----------------------------
  ! Developer options. Without the code works in default mode
  ! With the user can switch to experimental coding
- call parser('ImposeAsym',l_flag)
- if (l_flag)        l_BS_ares_from_res=.FALSE.
- !
- if (iq/=1.and.i_time_rev==0.and.i_space_inv==0 )  l_BS_ares_from_res=.FALSE.
- if (l_BS_photolum)                                l_BS_ares_from_res=.FALSE.
- if (l_BS_magnons.and.n_sp_pol==2)                 l_BS_ares_from_res=.FALSE.
- ! The next line is to switch off sections of the code due to the calculation
- ! of the ARES part without checking the BSE_mode (see io_BS_PAR_init for example)
- if (trim(BSE_mode)=="resonant")                   l_BS_ares_from_res=.TRUE.
+ call parser('ImposeFullBSE',l_BSE_kernel_full)
+ call parser('ImposeAsymBSE',l_flag)
+ !
+ ! In all other cases the code checks what to do
+ if (.not.trim(BSE_mode)=="resonant") then
+   if (l_flag)                                       l_BS_ares_from_res=.FALSE.
+   if (iq/=1.and.i_time_rev==0.and.i_space_inv==0 )  l_BS_ares_from_res=.FALSE.
+   !if (l_BS_photolum)                                l_BS_ares_from_res=.FALSE.
+   if (l_BS_magnons.and.n_sp_pol==2)                 l_BS_ares_from_res=.FALSE.
+ endif
  !
  ! I need ARES and I cannot get it from RES
  ! 
@@ -161,12 +183,15 @@ subroutine K_driver_init(what,iq,Ken,Xk)
    if (.not.BS_K_coupling) BS_res_ares_n_mat=2
  endif
  !
+ ! In this case the anti-resonant space is used to build the coupling matrix
+ if (BS_K_coupling.and.iq/=1) BS_n_eh_spaces=2
+ !
  if (what=="loop-init") return
  !
  ! BSE_L_kind check
  !==============
- if (.not.STRING_match(BSE_L_kind,"bar").and..not.STRING_match(BSE_L_kind,"full")) &
- & call error('set Lkind = DEFAULT or BAR or FULL')
+ if (.not.STRING_match(BSE_L_kind,"bar").and..not.STRING_match(BSE_L_kind,"full").and..not.STRING_match(BSE_L_kind,"tilde")) &
+ &   call error('set Lkind = DEFAULT or BAR or FULL or TILDE')
  !
  BS_K_is_ALDA=l_alda_fxc
  if (l_alda_fxc) then
@@ -233,6 +258,14 @@ subroutine K_driver_init(what,iq,Ken,Xk)
  !===============
  if (l_rpa_IP.and.STRING_match(BSE_mode,'coupling')) BSE_mode='retarded'
  !
+ if (any((/BS_res_K_corr,BS_cpl_K_corr,BS_K_is_ALDA/)) ) then
+   if(l_BS_magnons.and.n_sp_pol==2) then
+     BS_res_K_exchange=.FALSE.
+     BS_cpl_K_exchange=.FALSE.
+     BS_n_g_exch=0
+   endif
+ endif
+ !
  ! TR-ABS logicals
  !-----------------
 #if defined _RT
@@ -240,6 +273,7 @@ subroutine K_driver_init(what,iq,Ken,Xk)
  NEQ_Residuals=STRING_match(RT_BSE_Occ_Mode,"r").and.allocated(Ken%fo)
  call parser('ForceEqTrans',EQ_Transitions)
  call parser('ForceEqNoOcc',EQ_NoOcc)
+ call parser('ForceAllNoOcc',ALL_NoOcc)
 #endif
  !
  ! SLEPC
@@ -259,6 +293,12 @@ subroutine K_driver_init(what,iq,Ken,Xk)
  !
  BSS_slepc_pseudo_herm=BS_K_coupling .and. l_BS_ares_from_res .and. &
  &                     (.not.BSS_slepc_double_grp) .and. index(BSS_slepc_approach,"NonHerm")==0
+#if PETSC_VERSION_LT(3,22,0)
+ if (BSS_slepc_pseudo_herm) then
+   call warning(" Pseudo hermitian case. Link slepc >= 3.22 to improve solver efficiency")
+   BSS_slepc_pseudo_herm=.false.
+ endif 
+#endif
  !
 #endif
  !
@@ -269,6 +309,8 @@ subroutine K_driver_init(what,iq,Ken,Xk)
  !
  call parser('BSS_FKE',BSS_uses_FKE)
  !
+ ! I/O structure
+ !===============
  BSS_kernel_IO_on_the_fly=index(BSK_IO_mode,"read_on_the_fly")>0 .and. (index(BSS_mode,'h')/=0 .or. index(BSS_mode,'s')/=0)
  if (BSS_kernel_IO_on_the_fly) then
    call STRING_split(BSK_IO_mode,BSK_IO_strings,n_non_empty_strings=BSK_IO_n_strings)
@@ -280,7 +322,7 @@ subroutine K_driver_init(what,iq,Ken,Xk)
  endif
  !
 #if defined _ELPA
- if ( index(BSS_mode,'d')/=0 .and. BSS_ydiago_solver=='e' .and. BSS_first_eig>1) then
+ if ( index(BSS_mode,'d')/=0 .and. BSS_ldiago_solver=='e' .and. BSS_first_eig>1) then
    call warning('ELPA does not support BSS_First_eig>1')
    BSS_first_eig=1
  endif
diff --git a/src/bse/K_exchange_collisions.F b/src/bse/K_exchange_collisions.F
index ef1bc31cbc..fbba10d5ba 100644
--- a/src/bse/K_exchange_collisions.F
+++ b/src/bse/K_exchange_collisions.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<dev_defs.h>
+!
 subroutine K_exchange_collisions(iq,Xk,i_T_grp,NG,l_bs_exch_wf_in_loop)
  !
  use pars,           ONLY:SP,cZERO
@@ -22,7 +26,6 @@ subroutine K_exchange_collisions(iq,Xk,i_T_grp,NG,l_bs_exch_wf_in_loop)
  use devxlib,        ONLY:devxlib_memcpy_d2d,devxlib_memcpy_h2d,devxlib_memcpy_d2h,&
 &                         devxlib_conjg_d,devxlib_memset_d
  !
-#include<dev_defs.h>
  !
  implicit none
  !
diff --git a/src/bse/K_exchange_kernel.F b/src/bse/K_exchange_kernel.F
index fd7ab4d4ea..8517bd400f 100644
--- a/src/bse/K_exchange_kernel.F
+++ b/src/bse/K_exchange_kernel.F
@@ -5,9 +5,12 @@
 !
 ! Authors (see AUTHORS file for details): AF DS
 !
+! headers
+!
 #include<dev_defs.h>
 !
-function K_exchange_kernel_resonant(iq, BS_n_g_exch, BS_T_grp_ip, i_Tp, BS_T_grp_ik, i_Tk) result(H_x)
+!
+function K_exchange_kernel(iq, BS_n_g_exch, BS_T_grp_ip, i_Tp, BS_T_grp_ik, i_Tk) result(H_x)
   !==============================
   !
   ! :::EXCHANGE    (resonant):::
@@ -62,7 +65,7 @@ function K_exchange_kernel_resonant(iq, BS_n_g_exch, BS_T_grp_ip, i_Tp, BS_T_grp
                                    BS_T_grp_ik%O_x(:,i_Tk)/bare_qpg(iq,:BS_n_g_exch)**2)
 #endif
   !
-end function K_exchange_kernel_resonant
+end function K_exchange_kernel
 !
 !
 !
diff --git a/src/bse/K_fill_block_cyclic.F b/src/bse/K_fill_block_cyclic.F
index 2dd87b25bf..155c368145 100644
--- a/src/bse/K_fill_block_cyclic.F
+++ b/src/bse/K_fill_block_cyclic.F
@@ -11,7 +11,7 @@ subroutine K_fill_block_cyclic(i_BS_mat, diago_mat)
   !  K = |                       |
   !      | (-cI*K_c^*)  (-K_r^*) |
   !
-  use pars,           ONLY:cI,cONE
+  use pars,           ONLY:cI,cONE,cZERO
   use BS,             ONLY:BS_K_dim,BS_H_dim,BS_blk,n_BS_blks,BS_K_coupling,&
   &                        BS_res_ares_n_mat,l_BS_ares_from_res
   use BS_solvers,     ONLY:BSS_eh_E,BSS_eh_W,BSS_perturbative_width, &
@@ -19,12 +19,16 @@ subroutine K_fill_block_cyclic(i_BS_mat, diago_mat)
   use  gpu_m,         ONLY:have_cuda
   !
   !use, intrinsic :: iso_c_binding, only: c_ptr
-  use ydiago_interface
+  use ldiago_interface
   !
   implicit none
   !
   integer, intent(in)               :: i_BS_mat
   type(c_ptr), intent(in)           :: diago_mat
+  !
+  ! Workspace
+  !
+  logical                           :: l_mat_allocated
   integer                           :: i_c,i_r,i_Tk,i_Tp,i_B,H_shift(2)
   complex(YDIAGO_CMPLX)             :: Mij,Mij_star
   integer(YDIAGO_INT)               :: H_pos(2),SL_K_dim(2),SL_H_dim
@@ -97,6 +101,7 @@ subroutine K_fill_block_cyclic(i_BS_mat, diago_mat)
   error_diago = initiateSetQueue(diago_mat, nelements)
   if (error_diago /= 0) call error("Initiating set Queue failed")
   !
+  !
   ! // fill the elements
   do i_B=1,n_BS_blks
     !
@@ -105,6 +110,8 @@ subroutine K_fill_block_cyclic(i_BS_mat, diago_mat)
     !
     if (i_BS_mat/=BS_blk(i_B)%ira_k .and. BS_res_ares_n_mat==2) cycle
     !
+    l_mat_allocated = allocated(BS_blk(i_B)%mat)
+    !
     H_shift=0
     if(BS_blk(i_B)%mode=="C") H_shift(2)=BS_K_dim(1)
     if(BS_blk(i_B)%mode=="A") H_shift(:)=BS_K_dim(1)
@@ -122,8 +129,13 @@ subroutine K_fill_block_cyclic(i_BS_mat, diago_mat)
         if (H_pos(1)+H_shift(1)>H_pos(2)+H_shift(2)) cycle
         if (l_BS_ares_from_res.and.H_pos(1)>H_pos(2)) cycle
         !
-        Mij     =      BS_blk(i_B)%mat(i_r,i_c)
-        Mij_star= real(BS_blk(i_B)%mat(i_r,i_c))-cI*aimag(BS_blk(i_B)%mat(i_r,i_c))
+        if (l_mat_allocated) then
+          Mij     =      BS_blk(i_B)%mat(i_r,i_c)
+          Mij_star= real(BS_blk(i_B)%mat(i_r,i_c))-cI*aimag(BS_blk(i_B)%mat(i_r,i_c))
+        else
+          Mij     =cZERO
+          Mij_star=cZERO
+        endif
         !
         ! Add energies to the diagonal
         !
diff --git a/src/bse/K_inversion_Lo.F b/src/bse/K_inversion_Lo.F
index 7f472010d4..73c2f1c724 100644
--- a/src/bse/K_inversion_Lo.F
+++ b/src/bse/K_inversion_Lo.F
@@ -5,7 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
-subroutine K_inversion_Lo(iq,W,E,k,Lo_dim,Lo_n_loop,Lo) 
+! headers
+!
+#include<y_memory.h>
+!
+subroutine K_inversion_Lo(iq,W,E,k,Lo_dim,Lo) 
  !
  use pars,           ONLY:SP,cZERO,cI,pi,rZERO,schlen
  use R_lattice,      ONLY:bz_samp
@@ -28,20 +32,21 @@ subroutine K_inversion_Lo(iq,W,E,k,Lo_dim,Lo_n_loop,Lo)
 #endif
  use BS,             ONLY:l_BS_photolum
  use PHOTOLUM,       ONLY:BSS_PL_f,BSS_dipoles_PL,PL,PL_prefactor
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(w_samp),  intent(inout) :: W
  type(levels),  intent(in)    :: E
  type(bz_samp), intent(in)    :: k
- integer,       intent(in)    :: iq,Lo_dim,Lo_n_loop
+ integer,       intent(in)    :: iq,Lo_dim
  complex(SP),   intent(out)   :: Lo(BS_H_dim,W%n_freqs,Lo_dim)
  !
  ! Work Space
  !
  type(w_samp)     :: W_mins,W_plus
  integer          :: iw_ref,i_bsk,i_res,i_cpl,i_Lo_trans,ik_bz,iv,ic,iw,i_sp_c,i_sp_v,ik_ibz,&
- &                   i_res_ares,i_para,i_loop,n_loop,i_eps,i_Lo,transition(4)
+ &                   i_res_ares,i_para,i_loop,n_loop,i_eps,i_Lo,transition(5)
  complex(SP)      :: E_plus_W,residual
  complex(SP)      :: SF(W%n_freqs,2)
  character(schlen):: LT_title
@@ -120,7 +125,7 @@ subroutine K_inversion_Lo(iq,W,E,k,Lo_dim,Lo_n_loop,Lo)
        !
        i_Lo_trans=i_res
        i_Lo      =1
-       transition=(/ik_bz,iv,ic,i_sp_c/)
+       transition=(/ik_bz,iv,ic,i_sp_c,i_sp_v/)
        !
        if (BS_n_eh_spaces==2) then
          if ( i_res_ares==1) i_Lo      =1
@@ -139,8 +144,8 @@ subroutine K_inversion_Lo(iq,W,E,k,Lo_dim,Lo_n_loop,Lo)
        if (     allocated(BSS_eh_W)) E_plus_W=BSS_eh_E(i_res)-cI*BSS_eh_W(i_res)
        !
        if(i_loop==2) then
-         forall(iw=1:W%n_freqs) W_mins%p(iw)=W%p(iw)-BS_diagonal(i_res)*BSS_eh_f(i_res)
-         forall(iw=1:W%n_freqs) W_plus%p(iw)=W%p(iw)+BS_diagonal(i_res)*BSS_eh_f(i_res)
+         forall (iw=1:W%n_freqs) W_mins%p(iw)=W%p(iw)-BS_diagonal(i_res)*BSS_eh_f(i_res)
+         forall (iw=1:W%n_freqs) W_plus%p(iw)=W%p(iw)+BS_diagonal(i_res)*BSS_eh_f(i_res)
        endif
        !
        if ( .not.allocated(E%GreenF) ) then
@@ -158,7 +163,7 @@ subroutine K_inversion_Lo(iq,W,E,k,Lo_dim,Lo_n_loop,Lo)
            if (BS_K_anti_resonant) then
              call X_GreenF_analytical(iq,transition,W,E,k,SF(:,2),"Ra","eh",.TRUE.,.FALSE.)
            endif
-           Lo(i_Lo_trans,:,i_Lo+Lo_n_loop)=BSS_PL_f(i_res)/BSS_eh_f(i_res)**2*aimag(W%p(1))/pi
+           Lo(i_Lo_trans,:,i_Lo)=BSS_PL_f(i_res,i_Lo)/BSS_eh_f(i_res)**2*aimag(W%p(1))/pi
          endif
          !
        else 
@@ -188,9 +193,9 @@ subroutine K_inversion_Lo(iq,W,E,k,Lo_dim,Lo_n_loop,Lo)
        !
        if (l_BS_photolum.and.i_loop==1) then
          residual=dot_product(BSS_dipoles_PL(:,i_res),BSS_dipoles_PL(:,i_res))
-         PL(:,i_eps)=PL(:,i_eps)-PL_prefactor*residual*aimag(SF(:,1))/pi*BSS_PL_f(i_res)
+         PL(:,i_eps)=PL(:,i_eps)-PL_prefactor*residual*aimag(SF(:,1))/pi*BSS_PL_f(i_res,1)
          if (BS_K_anti_resonant.and.l_BS_ares_from_res) then
-           PL(:,i_eps)=PL(:,i_eps)+PL_prefactor*residual*aimag(SF(:,2))/pi*BSS_PL_f(i_cpl)
+           PL(:,i_eps)=PL(:,i_eps)-PL_prefactor*residual*aimag(SF(:,2))/pi*BSS_PL_f(i_cpl,2)
          endif
        endif
        !
diff --git a/src/bse/K_inversion_driver.F b/src/bse/K_inversion_driver.F
index 26b0c68510..f1c8fa270f 100644
--- a/src/bse/K_inversion_driver.F
+++ b/src/bse/K_inversion_driver.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine K_inversion_driver(iq,W,E,k,q) 
  !
  use pars,           ONLY:SP
@@ -19,7 +23,9 @@ subroutine K_inversion_driver(iq,W,E,k,q)
  use com,            ONLY:isec,msg
  use LIVE_t,         ONLY:live_timing
  use parallel_int,   ONLY:PP_wait,PP_redux_wait,PARALLEL_index,PARALLEL_live_message
- use parallel_m,     ONLY:PP_indexes,PP_indexes_reset,PAR_COM_SLK_INDEX_local,PAR_COM_SLK_INDEX_local
+ use parallel_m,     ONLY:PP_indexes,PP_indexes_reset,PAR_COM_SLK_INDEX_local,PAR_COM_SLK_INDEX_local,&
+&                         CPU_structure
+ use linear_algebra, ONLY:INV
  use IO_int,         ONLY:io_control
  use IO_m,           ONLY:REP,NONE,OP_WR_CL
  use matrix,         ONLY:MATRIX_reset
@@ -36,8 +42,9 @@ subroutine K_inversion_driver(iq,W,E,k,q)
  use stderr,         ONLY:intc
  use descriptors,    ONLY:IO_desc_add
  use PHOTOLUM,       ONLY:BSS_PL_free,PL
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(w_samp) :: W
  integer      :: iq
@@ -62,7 +69,7 @@ subroutine K_inversion_driver(iq,W,E,k,q)
  !
  ! I/O
  !
- integer           :: i_err,ID,Lo_dim,Lo_n_loop
+ integer           :: i_err,ID,Lo_dim
  integer, external :: io_BSS_invert
  !
  if (BS_res_ares_n_mat>1) then
@@ -72,6 +79,8 @@ subroutine K_inversion_driver(iq,W,E,k,q)
  !
  call timing('Inversion Solver',OPR='start')
  !
+ call PARALLEL_assign_LA_COMMs("Response_T_space",INV,CPU_structure(4)%nCPU_lin_algebra_INV)
+ !
  ! Sectioning
  !============
  if (isec(2)/=0) sec="="
@@ -155,7 +164,6 @@ subroutine K_inversion_driver(iq,W,E,k,q)
  !===========================================================================
  Lo_dim=1
  if (BS_K_anti_resonant.and.l_BS_ares_from_res) Lo_dim=2
- Lo_n_loop=Lo_dim
  !
  ! In the PL case we need, in addition to the standard GF, also some additional L_k^(r)(w) A_k(w) L_k^(a)(w), where
  ! L^(a/r) are deduced from the standard components.
@@ -167,7 +175,7 @@ subroutine K_inversion_driver(iq,W,E,k,q)
  !
  YAMBO_ALLOC(Lo,(BS_H_dim,W%n_freqs,Lo_dim))
  !
- call K_inversion_Lo(iq,W,E,k,Lo_dim,Lo_n_loop,Lo)
+ call K_inversion_Lo(iq,W,E,k,Lo_dim,Lo)
  !
  ! Initialize & write the output file 
  !====================================
@@ -186,7 +194,7 @@ subroutine K_inversion_driver(iq,W,E,k,q)
 &                              TOTAL=W%n_freqs,NCPU=PAR_COM_SLK_INDEX_local%n_CPU)
    call live_timing('Perturbative inversion ',PAR_IND_freqs%n_of_elements(PAR_IND_FREQ_ID))
    !
-   call K_inversion_engine("any",1,W%n_freqs,PAR_IND_freqs,W,Lo,Lo_dim,Lo_n_loop,.FALSE.)
+   call K_inversion_engine("any",1,W%n_freqs,PAR_IND_freqs,W,Lo,Lo_dim,.FALSE.)
    !
    call PP_redux_wait(K_INV_EPS%err,COMM=PAR_COM_SLK_INDEX_local%COMM)
    call PP_redux_wait(Resp_ii(:,2),COMM=PAR_COM_SLK_INDEX_local%COMM)
@@ -199,7 +207,7 @@ subroutine K_inversion_driver(iq,W,E,k,q)
    !
    K_INV_EPS%spectra(:,1)=Resp_ii(:,2)
    if (l_BS_photolum) then
-     K_INV_PL%spectra(:,1)=PL(:,2)
+     K_INV_PL%spectra(:,1)=cmplx(PL(:,2),0._SP,kind=SP)
      !
      call io_control(ACTION=OP_WR_CL,COM=NONE,SEC=(/1,2,3,4/),ID=ID)
      i_err=io_BSS_invert(iq,W,ID)
@@ -251,7 +259,7 @@ subroutine K_inversion_driver(iq,W,E,k,q)
  ! I/O [after full inversion]
  !=====
  K_INV_EPS%spectra(:,1)=Resp_ii(:,2)
- if (l_BS_photolum) K_INV_PL%spectra(:,1)=PL(:,2)
+ if (l_BS_photolum) K_INV_PL%spectra(:,1)=cmplx(PL(:,2),0._SP,kind=SP)
  !
  if (l_BS_photolum) then
    call io_control(ACTION=OP_WR_CL,COM=REP,SEC=(/1,2,3,4/),ID=ID)
@@ -294,7 +302,7 @@ subroutine do_it_FULL(TYP)
        !
        iw=TYP%iw_full(i1)
        !
-       call K_inversion_engine(TYP%what,iw,1,PAR_IND_freqs,W,Lo,Lo_dim,Lo_n_loop,.TRUE.)
+       call K_inversion_engine(TYP%what,iw,1,PAR_IND_freqs,W,Lo,Lo_dim,.TRUE.)
        !
        call live_timing(steps=1)
        !
diff --git a/src/bse/K_inversion_engine.F b/src/bse/K_inversion_engine.F
index df1c856344..f56611705a 100644
--- a/src/bse/K_inversion_engine.F
+++ b/src/bse/K_inversion_engine.F
@@ -5,7 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
-subroutine K_inversion_engine(what,iw_2start,iw_2do,px,W,Lo,Lo_dim,Lo_n_loop,do_it_full)
+! headers
+!
+#include<y_memory.h>
+!
+subroutine K_inversion_engine(what,iw_2start,iw_2do,px,W,Lo,Lo_dim,do_it_full)
  !
  use pars,           ONLY:SP,cZERO
  use parallel_m,     ONLY:PP_indexes
@@ -17,13 +21,14 @@ subroutine K_inversion_engine(what,iw_2start,iw_2do,px,W,Lo,Lo_dim,Lo_n_loop,do_
  use BS,             ONLY:BS_H_dim,l_BS_photolum
  use BS_solvers,     ONLY:Co_factor,K_INV_EPS,BSS_dipoles_opt,Co_factor,K_inv_slk,K_slk,K_INV_PL
  use PHOTOLUM,       ONLY:PL,PL_prefactor,BSS_dipoles_PL
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(w_samp),intent(in)  :: W
  complex(SP), intent(in)  :: Lo(BS_H_dim,W%n_freqs,Lo_dim)
  logical,     intent(in)  :: do_it_full
- integer,     intent(in)  :: Lo_dim,iw_2start,iw_2do,Lo_n_loop
+ integer,     intent(in)  :: Lo_dim,iw_2start,iw_2do
  character(3),intent(in)  :: what
  type(PP_indexes) :: px
  !
@@ -56,20 +61,20 @@ subroutine K_inversion_engine(what,iw_2start,iw_2do,px,W,Lo,Lo_dim,Lo_n_loop,do_
      endif
      !
      if (K_INV_EPS%err(iw)/=0) then
-       K_INV_EPS%err(iw)=EPS_via_perturbative_inversion(BS_H_dim,Lo_dim,Lo_n_loop,BSS_dipoles_opt(1,:),-Co_factor,&
+       K_INV_EPS%err(iw)=EPS_via_perturbative_inversion(BS_H_dim,Lo_dim,BSS_dipoles_opt(1,:),-Co_factor,&
 &                                                       Resp_ii(iw,2),Lo(:,iw,:))
      endif
      !
      if(l_BS_photolum) then
        if (K_INV_PL%err(iw)/=0) then
-         K_INV_PL%err(iw)=PL_via_perturbative_inversion(BS_H_dim,Lo_dim,Lo_n_loop,BSS_dipoles_PL,PL_prefactor,&
+         K_INV_PL%err(iw)=PL_via_perturbative_inversion(BS_H_dim,Lo_dim,BSS_dipoles_PL,PL_prefactor,&
 &                                                       PL(iw,2),Lo(:,iw,:))
        endif
      endif
      !
    else
      !
-     do i_L=1,Lo_n_loop
+     do i_L=1,Lo_dim
        !
        if (i_L==1) Gr="r"
        if (i_L==2) Gr="a"
@@ -113,18 +118,18 @@ subroutine PL_compose(i_L_)
      do i_c=1,3
        if (K_slk%kind=="SLK") then
 #if defined _SCALAPACK
-         if(i_L_==1) call PARALLEL_M_by_V('c',BS_H_dim,K_inv_slk,BSS_dipoles_PL(:,i_c),V)
-         if(i_L_==2) call PARALLEL_M_by_V('n',BS_H_dim,K_inv_slk,conjg(BSS_dipoles_PL(:,i_c)),V)
+         if(i_L_==1) call PARALLEL_M_by_V('c',BS_H_dim,K_inv_slk,BSS_dipoles_PL(i_c,:),V)
+         if(i_L_==2) call PARALLEL_M_by_V('n',BS_H_dim,K_inv_slk,conjg(BSS_dipoles_PL(i_c,:)),V)
 #endif
        else
-         if(i_L_==1) call M_by_V('c',BS_H_dim,Mm1,BSS_dipoles_PL(:,i_c),V)
-         if(i_L_==2) call M_by_V('n',BS_H_dim,Mm1,conjg(BSS_dipoles_PL(:,i_c)),V)
+         if(i_L_==1) call M_by_V('c',BS_H_dim,Mm1,BSS_dipoles_PL(i_c,:),V)
+         if(i_L_==2) call M_by_V('n',BS_H_dim,Mm1,conjg(BSS_dipoles_PL(i_c,:)),V)
        endif
        do i_cv=1,BS_H_dim
-         Vp(i_cv)=V(i_cv)*Lo(i_cv,iw,i_L_+Lo_n_loop)
+         Vp(i_cv)=V(i_cv)*Lo(i_cv,iw,i_L_)
        enddo
        PL(iw,2)=PL(iw,2)+PL_prefactor*Vstar_dot_V(BS_H_dim,V,Vp)
-    enddo
+     enddo
    end subroutine
    !
    subroutine EPS_compose(i_L_)
diff --git a/src/bse/K_inversion_restart.F b/src/bse/K_inversion_restart.F
index 0186dcaee8..e82677e2a9 100644
--- a/src/bse/K_inversion_restart.F
+++ b/src/bse/K_inversion_restart.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine K_inversion_restart(iq,W)
  !
  use pars,           ONLY:SP,cZERO
@@ -19,8 +23,9 @@ subroutine K_inversion_restart(iq,W)
  use BS,             ONLY:l_BS_photolum
  use BS_solvers,     ONLY:K_INV_PL
  use PHOTOLUM,       ONLY:PL
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer,             intent(inout) :: iq
  type(w_samp)        ,intent(inout) :: W
diff --git a/src/bse/K_kernel.F b/src/bse/K_kernel.F
index 0a66333988..926fcf97b3 100644
--- a/src/bse/K_kernel.F
+++ b/src/bse/K_kernel.F
@@ -5,6 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): AM DS AF
 !
+! headers
+!
+#include<dev_defs.h>
+#include<y_memory.h>
+!
 subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
  !
  ! K = <2V-W> for n_spin=1; K=<V-W> for n_spin>1
@@ -18,7 +23,7 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
  use interfaces,    ONLY:WF_load,WF_free,eval_G_minus_G,TDDFT_ALDA_eh_space_R_kernel
  use D_lattice,     ONLY:nsym,DL_vol,i_time_rev,i_space_inv,sop_inv,sop_tab
  use R_lattice,     ONLY:qindx_X,bz_samp,&
-&                        qindx_B,qindx_B_max,qindx_B_load,RIM_W,RIM_W_d,&
+&                        qindx_B,qindx_B_max,RIM_W,RIM_W_d,&
 &                        RIM_ng,RIM_W_is_diagonal,RIM_W_ng
  use com,           ONLY:msg
  use stderr,        ONLY:intc
@@ -26,16 +31,17 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
  use X_m,           ONLY:X_t
  use interfaces,    ONLY:el_density_and_current
  use QP_m,          ONLY:QP_ng_Vxc
- use BS_solvers,    ONLY:BSS_kernel_last_stored_block
+ use BS_solvers,    ONLY:BSS_kernel_last_stored_block,load_Diago
  use BS,            ONLY:BS_bands,BS_res_K_corr,BS_W,BS_W_is_diagonal,BS_ID,&
-&                        BS_res_K_exchange,BS_Block_size,BS_n_g_W,&
+&                        BS_res_K_exchange,BS_Block_size,l_BSE_kernel_full,BS_n_g_W,&
 &                        O_ng,BS_n_g_exch,BS_n_g_fxc,BS_identifier,BS_LiveTiming_steps,&
-&                        BS_K_dim,BS_K_is_ALDA,BS_cpl_K_exchange,&
+&                        BS_K_dim,BS_K_is_ALDA,BS_K_coupling,BS_cpl_K_exchange,&
 &                        BS_cpl_K_corr,K_EXCH_collision,K_CORR_collision,&
 &                        WF_phase,n_BS_blks,BS_blk,BS_T_grp,BSK_IO_mode,BSK_IO_sum_value,&
-&                        BS_nT_grps,l_BS_ares_from_res,BS_K_has_been_calculated_loaded,&
+&                        BS_nT_grps,BS_n_eh_spaces,BS_K_has_been_calculated_loaded,&
 &                        l_BSE_minimize_memory,l_BSE_restart,l_BSE_kernel_complete,&
-&                        BS_perturbative_SOC,BS_K_cutoff,BS_max_val,l_BS_magnons
+&                        BS_perturbative_SOC,BS_K_cutoff,BS_max_val,l_BS_magnons,&
+&                        l_BS_ares_from_res
  use collision_el,  ONLY:elemental_collision_free
  use IO_int,        ONLY:io_control
  use IO_m,          ONLY:REP,OP_WR,RD,WR,WR_CL,OP_APP,io_BS_K,deliver_IO_error_message
@@ -43,17 +49,23 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
 &                        FXC_mode,tddft_wf_t,l_Fxc_from_Vxc
  use xc_functionals,ONLY:V_xc,F_xc,F_xc_mat,magn,XC_potential_driver
  use global_XC,     ONLY:WF_xc_functional,WF_kind,WF_exx_fraction
- use wave_func,     ONLY:WF
+ use wave_func,     ONLY:WF,rho_map,rho_map_thresh,rho_map_size
  use openmp,        ONLY:OPENMP_update,master_thread
  use timing_m,      ONLY:timing
- use parallel_m,    ONLY:myid,ncpu,PAR_K_scheme,PARALLEL_default_mode,master_cpu
+ use parallel_m,    ONLY:myid,ncpu,PAR_K_scheme,PARALLEL_default_mode,&
+ &                       master_cpu,mpi_comm_world
  use parallel_int,  ONLY:PP_redux_wait
  use MAGNONS,       ONLY:BSS_MAGN_free
  use PHOTOLUM,      ONLY:BSS_PL_free
  use parser_m,      ONLY:parser
+ use mpi
+ ! Using the following line instead of the general use mpi
+ ! Gives compilation error with impi due to MPI_Get and MPI_Win_create
+ !use mpi,           ONLY:MPI_INTEGER,MPI_ADDRESS_KIND,MPI_Win_create,MPI_Win_fence,MPI_Get,&
+ !&                       MPI_Info_create,MPI_Info_set,MPI_info_free
+ use y_memory_alloc
  !
-#include<dev_defs.h>
-#include<y_memory.h>
+ implicit none
  !
  type(levels)  ::Ken 
  type(bz_samp) ::Xk,q
@@ -73,7 +85,7 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
  !
  integer ::     i_k_s,i_k_s_m1,i_p_s,i_k_bz,i_p_bz,i_k,i_p,i_kp_s,& 
 &               i_kmq_s,i_kmq_s_m1,i_pmq_s,i_kmq_bz,i_pmq_bz,i_kmq,i_pmq,i_kp_mq_s,& 
-&               i_Tk,i_Tp,i_Tgrp_k,i_Tgrp_p,I_Tgrp_k_st,I_Tgrp_p_st,H_pos(2),& 
+&               i_Tk,i_Tp,i_Tgrp_k,i_Tgrp_p,I_Tgrp_k_st,I_Tgrp_p_st,H_pos(2),H_pos_start(2),& 
 &               i_v_k,i_v_p,i_c_k,i_c_p,i_k_sp_pol_c,i_p_sp_pol_c,i_k_sp_pol_v,i_p_sp_pol_v,iq_W,iq_W_bz,iq_W_s,ig_W,& 
 &               i_kmq_t,i_pmq_t,i_k_bz_mem,i_kmq_bz_mem,qindx_tmp(2)
  ! OMP LAST DEF
@@ -91,19 +103,28 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
  complex(SP), allocatable DEV_ATTR :: O1(:),O2(:),O_times_W(:)
  !
  complex(SP),   external :: K_correlation_kernel_std,K_correlation_kernel_dir
- complex(SP),   external :: TDDFT_ALDA_eh_space_G_kernel
- complex(SP),   external :: K_exchange_kernel_resonant,K_exchange_kernel_coupling
+ complex(SP),   external :: TDDFT_ALDA_eh_space_G_kernel,TDDFT_ALDA_eh_space_G_kernel_coupling
+ complex(SP),   external :: K_exchange_kernel,K_exchange_kernel_coupling
  !
  logical    :: l_bs_exch,l_bs_corr,l_bs_exch_wf_in_loop,l_bs_tddft_wf_in_loop,l_matrix_init,BS_blk_todo,l_tddft_comp,&
  &             l_load_kernel,l_write_kernel,l_write_kernel_step,l_skip_phases,l_std_alg,l_dir_alg,l_tddft_gsum,l_tddft_rsum
  !
  ! I/O
  !
- integer           ::io_X_err,io_BS_Fxc_err,io_BS_err,qindx_ID,qindx_ID_frag,ID,ID_head,ID_compr,io_QINDX_err
+ integer           ::io_X_err,io_BS_Fxc_err,io_BS_err,ID,ID_head,ID_compr,io_QINDX_err
  integer, external ::io_BS,io_BS_header
- integer, external ::qindx_B_init,qindx_B_close
  integer, external ::io_BS_PAR_init
  !
+ ! MPI window
+ !
+ integer(kind=MPI_ADDRESS_KIND) :: mpi_size
+ integer(kind=MPI_ADDRESS_KIND) :: get_index
+ integer  :: mpi_info_nolocks
+ integer  :: mpi_err
+ integer  :: win1_ID,win2_ID
+ integer  :: N2
+ integer  :: remote_id
+ !
  ! Timing and restart
  !
  logical :: BS_blk_done(n_BS_blks),l_partial_kernel_loaded
@@ -124,6 +145,11 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
  !
  call k_build_up_BZ_tables(Xk)
  !
+ if (load_Diago) then
+   call msg('s','[BSE] Kernel calculation skipped, trying to load ndb.BS_diago directly ')
+   return
+ endif
+ !
  ! Titles
  !========
  !
@@ -188,7 +214,7 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
    endif
    l_bs_tddft_wf_in_loop= ((iq==1) .and. (Xk%nbz>=1).and.(.not.trim(PARALLEL_default_mode)=="workload") ) .or. &
    &                      ((iq> 1) .and.                       trim(PARALLEL_default_mode)=="KQmemory" )
-   call msg('r','[BSE] ALDA        components ',BS_n_g_fxc)
+   call msg('r','[BSE] ALDA     components ',BS_n_g_fxc)
  endif
  !
  if (l_bs_corr) then
@@ -211,11 +237,7 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
  ID      =BS_ID(2)
  ID_compr=BS_ID(3)
  !
- if( l_BSE_kernel_complete ) return
- !
- io_QINDX_err=0
- if (l_bs_corr) io_QINDX_err=qindx_B_init(qindx_ID,qindx_ID_frag)
- if(io_QINDX_err/=0) call error('Error reading qindx_B database ')
+ if( l_BSE_kernel_complete) return
  !
  ! Screened interaction
  !======================
@@ -300,7 +322,9 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
  l_matrix_init=.true.
  l_load_kernel=.true.
  l_write_kernel=.false.
+#if defined _MPI
  l_write_kernel_step= index(BSK_IO_mode,"write_all_steps")/=0
+#endif
  !
  if (l_write_kernel_step) then
    call warning("Experimental mode, IO at every step. Restart not possible")
@@ -331,7 +355,14 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
    !
    call WF_load(WF,NG(1),NG(2),(/1,Ken%nbm/),(/1,Xk%nibz/),space='R',title="-TDDFT",keep_states_to_load=.true.)
    !
-   QP_ng_Vxc=BS_n_g_fxc
+   ! This part could be done just by the master bypassing the PAR_IND_WF_linear ...
+   !
+   if(rho_map_thresh>0._SP) then
+     YAMBO_ALLOC(rho,(fft_size))
+     call el_density_and_current(Ken,Xk,rho=rho)
+     call compute_rho_map(rho)
+     YAMBO_FREE(rho)
+   endif
    !
    YAMBO_ALLOC(F_xc,(fft_size,n_spin,n_spin))
    if(l_BS_magnons .or. n_spinor==2 .or. l_Fxc_from_Vxc) then
@@ -342,7 +373,15 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
    if(n_spin>1) then
      YAMBO_ALLOC(magn,(fft_size,3))
    endif
+   !
+   ! In principle here there is a factor 4 between the energy cutoff on the potential and the one of the kernel
+   ! The following line, or something similar, should be used here and before
+   !QP_nG=G_index_energy_factor(BS_n_g_fxc,0.25_SP)
+   ! However in practice the one on the kernel is never set 4 the potential, i.e. 16 times the one onf the WFs
+   !
+   QP_ng_Vxc=BS_n_g_fxc
    call XC_potential_driver(Ken,Xk,WF_kind,WF_xc_functional,2)
+   !
    YAMBO_ALLOC(F_xc_mat,(fft_size,n_spin,n_spin,n_spin,n_spin))
    call Build_F_xc_mat(V_xc,F_xc,F_xc_mat)
    YAMBO_FREE(F_xc)
@@ -359,6 +398,28 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
    !
  endif
  !
+ if(iHxc==3 .and. trim(PARALLEL_default_mode)=="KQmemory") then
+   !
+   call msg("s","Buffering qindx_B in memory via MPI_Win_create ")
+   !
+   call MPI_Info_create(mpi_info_nolocks, mpi_err)
+   call MPI_Info_set(mpi_info_nolocks, "no_locks",  "true", mpi_err)  ! Disable locking
+   call MPI_Info_set(mpi_info_nolocks, "read_only", "true", mpi_err)  ! Read_only_mode
+   !
+   ! MPI_Win_create will duplicate in memory qindx_B
+   ! If I want to avoid this, I need to use MPI_Win_Alloc ...
+   mpi_size=size(qindx_B(:,:,1))
+   N2=sizeof(qindx_B(1,1,1))
+   call MPI_Win_create(qindx_B(:,:,1), mpi_size*N2, N2, mpi_info_nolocks, mpi_comm_world, win1_ID, mpi_err)
+   call MPI_Win_create(qindx_B(:,:,2), mpi_size*N2, N2, mpi_info_nolocks, mpi_comm_world, win2_ID, mpi_err)
+   call MPI_Win_fence(0, win1_ID, mpi_err)
+   call MPI_Win_fence(0, win2_ID, mpi_err)
+   !
+   ! Free the MPI_Info object as it's no longer needed
+   call MPI_Info_free(mpi_info_nolocks, mpi_err)
+   !
+ endif
+ !
  ! Timing
  !========
  !
@@ -454,9 +515,22 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
  !
  block_loop:&
  do i_block=1,n_BS_blks
+   !
+   ! BS_blk(i_block)%mode can have 4 values: "R", "C", "A", "Q"
+   ! - The resonant block "R" is always computed
+   ! - The coupling block "C":
+   !   (i)  It is computed using the indexes of the resonant block when BS_n_eh_space=1
+   !        In this case some relations are used, which holds at q=0 only and 
+   !        if l_BS_ares_from_res=.true.
+   !   (ii) It is directly computed using the indexes of the coupling block BS_n_eh_space=2
+   !        In this case the code switches to mode_now="F"(ull) and standard relations are used
+   ! - The anti-resonant "A" and anti-coupling "Q" blocks are expliciltly needed only when l_BS_ares_from_res=.false.
+   !   also in this case the code switches to mode_now="F"(ull) and standard relations are used
+   !
+   call timing('X T_space Kernel more',OPR='start')
    !
    mode_now=BS_blk(i_block)%mode
-   if (.not.l_BS_ares_from_res) mode_now="F"
+   if (BS_n_eh_spaces==2) mode_now="F"
    !
    if (iHxc==1) l_write_kernel=.not.(BS_K_is_ALDA.or.l_bs_corr)
    if (iHxc==2) l_write_kernel=.not. l_bs_corr
@@ -500,6 +574,14 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
    i_k        =BS_blk(i_block)%ik
    i_p        =BS_blk(i_block)%ip
    !
+   H_pos_start(1) = sum(BS_T_grp(I_Tgrp_k_st:I_Tgrp_k-1)%size)
+   H_pos_start(2) = sum(BS_T_grp(I_Tgrp_p_st:I_Tgrp_p-1)%size)
+   !
+   if (.not.l_BS_ares_from_res) then
+     H_pos_start(1) = H_pos_start(1)+(BS_T_grp(I_Tgrp_k)%i_res_ares-1)*BS_K_dim(1) 
+     H_pos_start(2) = H_pos_start(2)+(BS_T_grp(I_Tgrp_p)%i_res_ares-1)*BS_K_dim(1)
+   endif
+   !
    ! Exchange oscillators
    !----------------------
    if(iHxc==1) then
@@ -520,8 +602,8 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
      l_tddft_comp = .not.(allocated(BS_T_grp(i_Tgrp_k)%O_tddft_L).and.allocated(BS_T_grp(i_Tgrp_p)%O_tddft_R))
      !
      if (l_tddft_comp) then
-       YAMBO_ALLOC(tddft_wf%WF_symm1,(fft_size,n_spinor))
-       YAMBO_ALLOC(tddft_wf%WF_symm2,(fft_size,n_spinor))
+       YAMBO_ALLOC(tddft_wf%WF_symm1,(rho_map_size,n_spinor))
+       YAMBO_ALLOC(tddft_wf%WF_symm2,(rho_map_size,n_spinor))
        YAMBO_ALLOC(tddft_wf%rhotwr_DP,(fft_size))
      endif
      !
@@ -542,11 +624,13 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
      !
    endif
    !
+   call timing('X T_space Kernel more',OPR='stop')
+   !
    ! Correlation oscillators
    !----------------------
    if(iHxc==3) then
-     if (l_std_alg) call K_correlation_collisions_std(iq,i_block,qindx_ID_frag,Xk,q)
-     if (l_dir_alg) call K_correlation_collisions_dir(iq,i_block,qindx_ID_frag,Xk,q)
+     if (l_std_alg) call K_correlation_collisions_std(iq,i_block,win1_ID,win2_ID,Xk,q)
+     if (l_dir_alg) call K_correlation_collisions_dir(iq,i_block,win1_ID,win2_ID,Xk,q)
    endif
    !
    call timing('X T_space Kernel sum',OPR='start')
@@ -555,8 +639,8 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
    !------------------
    !
    !DEV_OMP parallel default(shared), &
-   !DEV_OMP private(i_Tk,i_Tp, O1,O2,O_times_W, &
-   !DEV_OMP &       sqrt_f_itk,i_k_bz,i_v_k,i_c_k,i_k_sp_pol_c,i_k_sp_pol_v,i_k_s,is_k,os_k,i_k_s_m1,H_pos, &
+   !DEV_OMP private(i_Tk,i_Tp, O1,O2,O_times_W,H_pos, &
+   !DEV_OMP &       sqrt_f_itk,i_k_bz,i_v_k,i_c_k,i_k_sp_pol_c,i_k_sp_pol_v,i_k_s,is_k,os_k,i_k_s_m1,&
    !DEV_OMP &       sqrt_f_itp,i_p_bz,i_v_p,i_c_p,i_p_sp_pol_c,i_p_sp_pol_v,i_p_s,is_p,os_p,i_kp_s, &
    !DEV_OMP &       i_kmq_bz,ig_kmq,i_kmq,i_kmq_s,i_kmq_t,i_kmq_s_m1,i_kp_mq_s, &
    !DEV_OMP &       i_pmq_bz,ig_pmq,i_pmq,i_pmq_s,i_pmq_t,ig_W_final, &
@@ -568,12 +652,12 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
    call OPENMP_update(master_thread) 
    !
    !
-   ! Allocate tddft_wf variables neeeded within the OMP loop
+   ! Allocate variables tddft_wf variables neeeded within the OMP loop
    if (l_tddft_rsum.and.iHxc==2) then
-     YAMBO_ALLOC(tddft_wf%WF_symm1,(fft_size,n_spinor))
-     YAMBO_ALLOC(tddft_wf%WF_symm2,(fft_size,n_spinor))
-     YAMBO_ALLOC(tddft_wf%rhotwr1,(fft_size*n_spinor*n_spinor))
-     YAMBO_ALLOC(tddft_wf%rhotwr2,(fft_size*n_spinor*n_spinor))
+     YAMBO_ALLOC(tddft_wf%WF_symm1,(rho_map_size,n_spinor))
+     YAMBO_ALLOC(tddft_wf%WF_symm2,(rho_map_size,n_spinor))
+     YAMBO_ALLOC(tddft_wf%rhotwr1,(rho_map_size*n_spinor*n_spinor))
+     YAMBO_ALLOC(tddft_wf%rhotwr2,(rho_map_size*n_spinor*n_spinor))
    endif
    !
    i_p_bz_last=0   ; i_k_bz_last=0
@@ -598,12 +682,18 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
        !
        if (BS_blk(i_block)%done(i_Tk,i_Tp)=="t".and.l_BSE_restart) cycle
        !
-       H_pos(1) = sum(BS_T_grp(I_Tgrp_k_st:I_Tgrp_k-1)%size)+i_Tk+&
-       &              (BS_T_grp(I_Tgrp_k)%i_res_ares-1)*BS_K_dim(1) 
-       H_pos(2) = sum(BS_T_grp(I_Tgrp_p_st:I_Tgrp_p-1)%size)+i_Tp+& 
-       &              (BS_T_grp(I_Tgrp_p)%i_res_ares-1)*BS_K_dim(1)
+       ! H_pos defines the position inside the full bse matrix when
+       ! l_BS_ares_from_res.  Otherwise it defines the position inside the R/C blocks
        !
-       if (H_pos(1)>H_pos(2)) cycle
+       H_pos(1) = H_pos_start(1)+i_Tk
+       H_pos(2) = H_pos_start(2)+i_Tp
+       !
+       ! When l_BS_ares_from_res=.false., the full coupling block, and
+       ! the upper part of the anti-resonant blocks are also computed
+       !
+       ! When l_BSE_kernel_full the full blocks are computed
+       !
+       if (H_pos(1)>H_pos(2) .and. .not. l_BSE_kernel_full) cycle
        !
        i_k_bz  = BS_T_grp(i_Tgrp_k)%table(i_Tk,1) 
        i_p_bz  = BS_T_grp(i_Tgrp_p)%table(i_Tp,1) 
@@ -636,12 +726,13 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
          if (i_p_bz_last/=i_p_bz.or.i_k_bz_last/=i_k_bz) then 
            i_p_bz_last=i_p_bz 
            i_k_bz_last=i_k_bz 
-           if (i_k_bz_mem==0) then 
-             !DEV_OMP critical 
-             qindx_tmp=qindx_B_load(i_p_bz,i_k_bz,qindx_ID_frag) 
+           if (i_k_bz_mem<0) then
+             get_index = (-i_k_bz_mem-1)*Xk%nbz+i_p_bz-1
+             remote_id=PAR_K_scheme%bz_id(i_k_bz)
+             call MPI_Get(qindx_tmp(1), 1, MPI_INTEGER, remote_id, get_index, 1, MPI_INTEGER, win1_ID, mpi_err)
+             call MPI_Get(qindx_tmp(2), 1, MPI_INTEGER, remote_id, get_index, 1, MPI_INTEGER, win2_ID, mpi_err)
              iq_W_bz=qindx_tmp(1) 
              ig_W   =qindx_tmp(2) 
-             !DEV_OMP end critical 
            else 
              iq_W_bz=qindx_B(i_p_bz,i_k_bz_mem,1) 
              ig_W   =qindx_B(i_p_bz,i_k_bz_mem,2) 
@@ -675,12 +766,13 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
          if (i_pmq_bz_last/=i_pmq_bz.or.i_kmq_bz_last/=i_kmq_bz) then 
            i_pmq_bz_last=i_pmq_bz 
            i_kmq_bz_last=i_kmq_bz 
-           if (i_kmq_bz_mem==0) then 
-             !DEV_OMP critical 
-             qindx_tmp=qindx_B_load(i_pmq_bz,i_kmq_bz,qindx_ID_frag) 
+           if (i_kmq_bz_mem<0) then 
+             get_index = (-i_kmq_bz_mem-1)*Xk%nbz+i_pmq_bz-1
+             remote_id=PAR_K_scheme%bz_id(i_kmq_bz)
+             call MPI_Get(qindx_tmp(1), 1, MPI_INTEGER, remote_id, get_index, 1, MPI_INTEGER, win1_ID, mpi_err)
+             call MPI_Get(qindx_tmp(2), 1, MPI_INTEGER, remote_id, get_index, 1, MPI_INTEGER, win2_ID, mpi_err)
              iq_W_bz_mq=qindx_tmp(1) 
              ig_W_mq   =qindx_tmp(2) 
-             !DEV_OMP end critical 
            else 
              iq_W_bz_mq=qindx_B(i_pmq_bz,i_kmq_bz_mem,1) 
              ig_W_mq   =qindx_B(i_pmq_bz,i_kmq_bz_mem,2) 
@@ -722,13 +814,13 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
        !
        select case (mode_now)
        !
-       case ("R","A","F")
+       case ("R","F")
          !
          ! :::RESONANT/ANTI-RESONANT TERM:::
          !===================================
          !
          ! Exchange
-         if (iHxc==1) H_x=K_exchange_kernel_resonant(iq, BS_n_g_exch,               &
+         if (iHxc==1) H_x=K_exchange_kernel(iq, BS_n_g_exch,               &
 &                                   BS_T_grp(i_Tgrp_p),i_Tp, BS_T_grp(i_Tgrp_k),i_Tk)
          !
          ! ALDA
@@ -752,10 +844,16 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
 &                                   BS_n_g_W,O1,O2,O_times_W)
          endif
          !         
-       case ("C","Q")
+       case ("C")
          !
          ! :::COUPLING TERM:::
          !=============================
+         ! 
+         ! This implementation is used only at iq=1 when l_BS_ares_from_res=.true.
+         ! It uses only hald of the BS_T_grps, saving memory & cpu time for the oscillators
+         ! Otherwise the exchange and tddft implementation are not corret
+         ! The correlation imlpementation instead should be correct (since iv,ic
+         ! indexes are properly inverted).
          !
          ! Exchange
          if (iHxc==1) H_x=K_exchange_kernel_coupling(iq, BS_n_g_exch,              &
@@ -764,12 +862,15 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
          ! ALDA
          if (iHxc==2) then
            if(l_tddft_rsum)     H_x=H_x+TDDFT_ALDA_eh_space_R_kernel(is_k,os_k,is_p,os_p,tddft_wf,'CPL')
-           if(l_tddft_gsum)     H_x=H_x+TDDFT_ALDA_eh_space_G_kernel(BS_n_g_fxc,&
+           if(l_tddft_gsum)     H_x=H_x+TDDFT_ALDA_eh_space_G_kernel_coupling(BS_n_g_fxc,&
 &                                 BS_T_grp(i_Tgrp_p),i_Tp, BS_T_grp(i_Tgrp_k),i_Tk)
          endif
          !
          ! Correlations
          if (iHxc==3) then
+           ! To check for magnons
+           ! When doing correlation i invert i_c_p and i_v_p
+           ! Here also i_p_sp_pol_c and i_p_sp_pol_v are inverted ...
            if (l_std_alg)       H_c=K_correlation_kernel_std(i_block,i_p,i_pmq, &
 &                                 i_k_s,i_kp_s,i_c_k,i_v_p,i_kmq_s,i_kp_mq_s,i_v_k,i_c_p,    &
 &                                 i_kmq_t,i_pmq_t,i_k_sp_pol_c,i_p_sp_pol_v,i_k_sp_pol_v,i_p_sp_pol_c,  &
@@ -809,12 +910,12 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
        i_p_sp_pol_v=BS_T_grp(i_Tgrp_p)%table(i_Tp,5) 
 
        if (H_pos(1)==H_pos(2)) then
-         BS_blk(i_block)%table(:,i_Tk)=BS_T_grp(i_Tgrp_k)%table(i_Tk,:)
-         BS_blk(i_block)%E(i_Tk)      =BS_T_grp(i_Tgrp_k)%E(i_Tk,1)
+         BS_blk(i_block)%table(:,i_Tp)=BS_T_grp(i_Tgrp_p)%table(i_Tp,:)
+         BS_blk(i_block)%E(i_Tp)      =BS_T_grp(i_Tgrp_p)%E(i_Tp,1)
          if (mode_now=="C") then
            ! Swap c and v
-           BS_blk(i_block)%table(2,i_Tk)=BS_T_grp(i_Tgrp_k)%table(i_Tk,3)
-           BS_blk(i_block)%table(3,i_Tk)=BS_T_grp(i_Tgrp_k)%table(i_Tk,2)
+           BS_blk(i_block)%table(2,i_Tp)=BS_T_grp(i_Tgrp_p)%table(i_Tp,3)
+           BS_blk(i_block)%table(3,i_Tp)=BS_T_grp(i_Tgrp_p)%table(i_Tp,2)
          endif
        endif
        !
@@ -847,6 +948,8 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
    !
    call timing('X T_space Kernel sum',OPR='stop')
    !
+   call timing('X T_space Kernel more',OPR='start')
+   !
    if(BS_K_cutoff>0._SP.and.l_write_kernel) &
 &     BS_max_val_CPU(myid+1)=maxval( (/BS_max_val_CPU(myid+1),abs(BS_blk(i_block)%mat(:,:))/) )
    !
@@ -875,6 +978,8 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
    !
    call live_timing(steps=i_steps)
    !
+   call timing('X T_space Kernel more',OPR='stop')
+   !
  enddo block_loop
  !
  if (m_steps>0) call live_timing( )
@@ -891,6 +996,8 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
      call TDDFT_oscillators_free_R(BS_nT_grps)
    endif
    !
+   if(rho_map_thresh>0._SP) deallocate(rho_map)
+   !
    if (l_bs_fxc) then
      YAMBO_FREE(F_xc_gspace)
      YAMBO_FREE(FXC_K_diagonal)
@@ -904,7 +1011,13 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
    if (l_std_alg) then
      YAMBO_FREE(WF_phase)
    endif
-   qindx_tmp=qindx_B_close(qindx_ID,qindx_ID_frag)
+   !
+   if(trim(PARALLEL_default_mode)=="KQmemory") then
+     ! Free the window
+     call MPI_Win_free(win1_ID, mpi_err)
+     call MPI_Win_free(win2_ID, mpi_err)
+   endif
+   !
    YAMBO_FREE_GPU(BS_W)
    YAMBO_FREE(BS_W)
    !
diff --git a/src/bse/K_multiply_by_V.F b/src/bse/K_multiply_by_V.F
index f1a205aa12..957f19120a 100644
--- a/src/bse/K_multiply_by_V.F
+++ b/src/bse/K_multiply_by_V.F
@@ -46,7 +46,7 @@ subroutine K_multiply_by_V(Vi,Vo,iter_par,iq,mode)
  !
  if(BSS_uses_FKE) FG_factor_c = cmplx(FG_factor,0._SP)
  !
- forall(i_g=1:BS_nT_grps,PAR_IND_T_Haydock%element_1D(i_g)) Vo(i_g)%fragment = cZERO
+ forall (i_g=1:BS_nT_grps,PAR_IND_T_Haydock%element_1D(i_g)) Vo(i_g)%fragment = cZERO
  !
  do i_B=1,n_BS_blks
    !
@@ -55,11 +55,13 @@ subroutine K_multiply_by_V(Vi,Vo,iter_par,iq,mode)
    ColsFG_Tk = size(Vi(i_Tk)%fragment,2)
    ColsFG_Tp = size(Vi(i_Tp)%fragment,2)
    !
+#if defined _PAR_IO
    if (i_B>BSS_kernel_last_stored_block) then
      call io_control(ACTION=RD,ID=BS_ID(2))
      call io_BS_PAR_block(iq,i_B,BS_ID(2),"matonly")
      call K_block_symmetrize(i_B)
    endif
+#endif
    !
    select case(BS_blk(i_B)%mode)
    case("R")
diff --git a/src/bse/K_multiply_by_V_slepc.F b/src/bse/K_multiply_by_V_slepc.F
index 4cd287e44b..314dc88b82 100644
--- a/src/bse/K_multiply_by_V_slepc.F
+++ b/src/bse/K_multiply_by_V_slepc.F
@@ -5,10 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): HM DS IMA
 !
+! headers
+!
 #include <petsc/finclude/petscsys.h>
 #include <petsc/finclude/petscvec.h>
 #include <petsc/finclude/petscmat.h>
-#include <petsc/finclude/petscvec.h>
 #include <slepc/finclude/slepcsys.h>
 #include <slepc/finclude/slepceps.h>
 !
diff --git a/src/bse/K_multiply_by_V_slepc_nest.F b/src/bse/K_multiply_by_V_slepc_nest.F
index 7981f3c659..f69f834ae9 100644
--- a/src/bse/K_multiply_by_V_slepc_nest.F
+++ b/src/bse/K_multiply_by_V_slepc_nest.F
@@ -5,6 +5,7 @@
 !
 ! Authors (see AUTHORS file for details): HM DS IMA
 !
+! headers
 !
 #include <petsc/finclude/petscsys.h>
 #include <petsc/finclude/petscvec.h>
@@ -189,4 +190,4 @@ subroutine K_multiply_by_V_slepc_C(slepc_mat,vi,vo,ierr)
   call VecAssemblyEnd(vo,ierr)
   !
  end subroutine K_multiply_by_V_slepc_C
- 
\ No newline at end of file
+ 
diff --git a/src/bse/K_multiply_by_V_transpose.F b/src/bse/K_multiply_by_V_transpose.F
index f4ea5ff9ac..1ade25480c 100644
--- a/src/bse/K_multiply_by_V_transpose.F
+++ b/src/bse/K_multiply_by_V_transpose.F
@@ -40,18 +40,20 @@ subroutine K_multiply_by_V_transpose(Vi,Vo,iter_par,iq)
  if(.not.BSS_Slepc_double_grp) alpha=-real(iter_par,SP)*cI
  if(     BSS_Slepc_double_grp) alpha=-real(iter_par,SP)
  !
- forall(i_g=1:BS_nT_grps,PAR_IND_T_Haydock%element_1D(i_g)) Vo(i_g)%fragment = cZERO
+ forall (i_g=1:BS_nT_grps,PAR_IND_T_Haydock%element_1D(i_g)) Vo(i_g)%fragment = cZERO
  !
  do i_B=1,n_BS_blks
    !
    i_Tk  =BS_blk(i_B)%iT_k
    i_Tp  =BS_blk(i_B)%iT_p
    !
+#if defined _PAR_IO
    if (i_B>BSS_kernel_last_stored_block) then
      call io_control(ACTION=RD,ID=BS_ID(2))
      call io_BS_PAR_block(iq,i_B,BS_ID(2),"matonly")
      call K_block_symmetrize(i_B)
    endif
+#endif
    !
    select case(BS_blk(i_B)%mode)
    case("R")
diff --git a/src/bse/K_multiply_by_V_transpose_slepc.F b/src/bse/K_multiply_by_V_transpose_slepc.F
index 36c9c4a8da..05ad4b0666 100644
--- a/src/bse/K_multiply_by_V_transpose_slepc.F
+++ b/src/bse/K_multiply_by_V_transpose_slepc.F
@@ -5,10 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): HM DS IMA
 !
+! headers
+!
 #include <petsc/finclude/petscsys.h>
 #include <petsc/finclude/petscvec.h>
 #include <petsc/finclude/petscmat.h>
-#include <petsc/finclude/petscvec.h>
 #include <slepc/finclude/slepcsys.h>
 #include <slepc/finclude/slepceps.h>
 !
diff --git a/src/bse/K_observables.F b/src/bse/K_observables.F
index 636e7bbc4a..0888cd0903 100644
--- a/src/bse/K_observables.F
+++ b/src/bse/K_observables.F
@@ -14,11 +14,11 @@ subroutine K_observables(W,Xk)
  use frequency,    ONLY:w_samp
  use X_m,          ONLY:global_gauge
  use R_lattice,    ONLY:bz_samp
- use com,          ONLY:exp_user
- use BS_solvers,   ONLY:run_Haydock,run_inversion,run_Diago,BSS_mode
+ use BS_solvers,   ONLY:run_Haydock,run_inversion,run_Diago,load_Diago,BSS_mode
  use BS,           ONLY:BS_dip_size,l_BS_anomalous_Hall,l_BS_abs,l_BS_jdos,l_BS_trace,BSE_prop,&
 &                       l_BS_esort,l_BS_kerr,l_BS_magnons,l_BS_dichroism,l_BS_anomalous_Hall,l_BS_photolum,&
-&                       l_BS_optics,BS_dip_size,l_BS_magnons,l_BS_photolum,l_BS_kerr_asymm,l_BS_esort_indx
+&                       l_BS_optics,BS_dip_size,l_BS_magnons,l_BS_photolum,l_BS_kerr_asymm,l_BS_esort_indx,&
+&                       l_BS_mespin,l_BS_meorb
  use parser_m,     ONLY:parser
  use stderr,       ONLY:STRING_match
  use electrons,    ONLY:levels,n_sp_pol,n_spin
@@ -39,9 +39,11 @@ subroutine K_observables(W,Xk)
  l_BS_dichroism = STRING_match(BSE_prop,'dich')
  l_BS_kerr      = STRING_match(BSE_prop,'kerr')
  l_BS_photolum  = STRING_match(BSE_prop,'photolum')
- l_BS_magnons   = STRING_match(BSE_prop,'magn') .and.n_spin>1
- l_BS_kerr_asymm= STRING_match(BSE_prop,'asymm') .and. l_BS_kerr
+ l_BS_kerr_asymm= STRING_match(BSE_prop,'asymm')  .and. l_BS_kerr
  l_BS_anomalous_Hall = STRING_match(BSE_prop,'anHall') .and. l_BS_kerr
+ l_BS_magnons = STRING_match(BSE_prop,'magn')   .and.n_spin>1
+ l_BS_MEspin = STRING_match(BSE_prop,'MEspin') .and.n_spin>1
+ l_BS_MEorb = STRING_match(BSE_prop,'MEorb')  .and.n_spin>1
  !
  ! How ...
  !---------
@@ -72,8 +74,7 @@ subroutine K_observables(W,Xk)
  endif
  if (l_BS_dichroism) then
    if (.not.(l_col_cut.and.Xk%nbz==1) ) then
-     if(.not.exp_user) call error(" Periodic system detected. Natural dichroism coded only for isolated systems")
-     if(     exp_user) call warning(" Periodic system detected. Natural dichroism coded only for isolated systems")
+     call exp_user_warning(" Periodic system detected. Natural dichroism coded only for isolated systems")
    endif
    if (.not.l_BS_trace) then
      !  NB: in the case without trace the quadripolar terms should be taken into account,
@@ -82,26 +83,36 @@ subroutine K_observables(W,Xk)
      call warning(" Natural dichroism without trace. Quadripolar terms are neglected")
    endif
  endif
+ if (l_BS_meorb) then
+   if (.not.(l_col_cut.and.Xk%nbz==1) ) then
+     call warning(" Periodic system detected. Orbital ME tensor not fully implemented.")
+   else
+     call warning(" Non-periodic system detected. Orbital ME tensor assumes periodic system.")
+   endif
+   if (.not.l_BS_trace) BS_dip_size=2
+ endif
  !
  if (l_BS_trace   ) BS_dip_size = 3
  if (l_BS_trace .and. .not.l_rpa_IP) call error(" trace coded only at the IP level")
  !
- l_BS_optics=l_BS_abs.or.l_BS_kerr.or.l_BS_dichroism.or.l_BS_photolum
+ l_BS_optics=l_BS_abs.or.l_BS_kerr.or.l_BS_dichroism.or.l_BS_photolum.or.l_BS_mespin.or.l_BS_meorb
  !
  ! Now fix BSE_prop string
  !
  BSE_prop=""
- if(l_BS_abs)            BSE_prop=trim(BSE_prop)//' abs'
- if(l_BS_jdos)           BSE_prop=trim(BSE_prop)//' jdos'
- if(l_BS_dichroism)      BSE_prop=trim(BSE_prop)//' dich'
- if(l_BS_kerr)           BSE_prop=trim(BSE_prop)//' kerr'
- if(l_BS_kerr_asymm)     BSE_prop=trim(BSE_prop)//' asymm'
- if(l_BS_anomalous_Hall) BSE_prop=trim(BSE_prop)//' anHall'
- if(l_BS_photolum)       BSE_prop=trim(BSE_prop)//' photolum'
- if(l_BS_magnons)        BSE_prop=trim(BSE_prop)//' magn'
- if(l_BS_esort)          BSE_prop=trim(BSE_prop)//' esrt'
- if(l_BS_esort)          BSE_prop=trim(BSE_prop)//' indx'
- if(l_BS_trace)          BSE_prop=trim(BSE_prop)//' trace'
+ if(l_BS_abs)             BSE_prop=trim(BSE_prop)//' abs'
+ if(l_BS_jdos)            BSE_prop=trim(BSE_prop)//' jdos'
+ if(l_BS_dichroism)       BSE_prop=trim(BSE_prop)//' dich'
+ if(l_BS_kerr)            BSE_prop=trim(BSE_prop)//' kerr'
+ if(l_BS_kerr_asymm)      BSE_prop=trim(BSE_prop)//' asymm'
+ if(l_BS_anomalous_Hall)  BSE_prop=trim(BSE_prop)//' anHall'
+ if(l_BS_photolum)        BSE_prop=trim(BSE_prop)//' photolum'
+ if(l_BS_magnons)         BSE_prop=trim(BSE_prop)//' magn'
+ if(l_BS_esort)           BSE_prop=trim(BSE_prop)//' esrt'
+ if(l_BS_esort)           BSE_prop=trim(BSE_prop)//' indx'
+ if(l_BS_trace)           BSE_prop=trim(BSE_prop)//' trace'
+ if(l_BS_mespin)          BSE_prop=trim(BSE_prop)//' MEspin'
+ if(l_BS_meorb)           BSE_prop=trim(BSE_prop)//' MEorb'
  !
  ! Solver Logicals
  !=================
@@ -111,6 +122,9 @@ subroutine K_observables(W,Xk)
 #if defined _SLEPC && !defined _NL
  run_Slepc     = STRING_match(BSS_mode,'s')
 #endif
+ !
+ load_Diago = STRING_match(BSS_mode,'l').and.run_Diago
+ !
  if (l_BS_photolum.and.run_Haydock) then
    call warning('PL currently implemented only using the diagonalization/inversion solver')
    run_Haydock  =.false.
diff --git a/src/bse/K_screened_interaction.F b/src/bse/K_screened_interaction.F
index 4dba870e9b..6ddcd94b38 100644
--- a/src/bse/K_screened_interaction.F
+++ b/src/bse/K_screened_interaction.F
@@ -5,13 +5,16 @@
 !
 ! Authors (see AUTHORS file for details): DS
 !
+! headers
+!
 #include<dev_defs.h>
+#include<y_memory.h>
+!
 !
 subroutine K_screened_interaction(X,Xw,q,io_X_err)
  !
  use pars,         ONLY:SP
  use stderr,       ONLY:intc
- use com,          ONLY:exp_user
  use frequency,    ONLY:w_samp
  use drivers,      ONLY:l_td_hf,l_tddft,l_rim_w
  use collision_el, ONLY:elemental_collision,elemental_collision_free
@@ -26,8 +29,9 @@ subroutine K_screened_interaction(X,Xw,q,io_X_err)
 &                       OP_RD,RD,RD_CL,RD_CL_IF_END
  use devxlib,      ONLY:devxlib_memcpy_d2d,devxlib_memcpy_h2d,devxlib_memcpy_d2h
  use gpu_m,        ONLY:have_gpu
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp) ::q
  type(X_t)     ::X
@@ -65,10 +69,7 @@ subroutine K_screened_interaction(X,Xw,q,io_X_err)
    if(     l_rim_w) call section('=','Screened interaction header I/O')
    !===================================================
    !
-   if (X%ng<BS_n_g_W) then
-     if (     exp_user) call warning("Using screening with less G than Weh interaction")
-     if (.not.exp_user) call   error("Using screening with less G than Weh interaction")
-   endif
+   if (X%ng<BS_n_g_W) call exp_user_warning("Using screening with less G than Weh interaction")
    !
    call io_control(ACTION=OP_RD_CL,COM=REP,SEC=(/1,2/),MODE=VERIFY,ID=XID)
    io_X_err=io_X(X,Xw,XID) 
@@ -142,7 +143,7 @@ subroutine K_screened_interaction(X,Xw,q,io_X_err)
        return
      endif
      !
-     forall(i_g2=1:X%ng) X_mat(i_g2,i_g2,1)=X_mat(i_g2,i_g2,1)+1._SP
+     forall (i_g2=1:X%ng) X_mat(i_g2,i_g2,1)=X_mat(i_g2,i_g2,1)+1._SP
      !
      ! device <- host transfer
      if (have_gpu) call devxlib_memcpy_h2d(X_mat_p,X_mat)
diff --git a/src/bse/K_shell_matrix.F b/src/bse/K_shell_matrix.F
index b368a44a02..0fec7d9b8e 100644
--- a/src/bse/K_shell_matrix.F
+++ b/src/bse/K_shell_matrix.F
@@ -5,40 +5,41 @@
 !
 ! Authors (see AUTHORS file for details): HM IMA
 !
+! headers
+!
+#include <petsc/finclude/petscsys.h>
+#include <petsc/finclude/petscvec.h>
+#include <petsc/finclude/petscmat.h>
+#include <slepc/finclude/slepcsys.h>
+#include <slepc/finclude/slepceps.h>
+#include <y_memory.h>
+!
 subroutine K_shell_matrix(i_BS_mat,slepc_mat)
  !
  ! Create a PETSC ShellMatrix and define the matrix-vector product
  ! function using the routine K_multiply_by_V 
  !
- use BS,             ONLY:BS_H_dim, BS_nT_grps,BS_res_ares_n_mat,BS_K_coupling,&
-&                         BS_Blocks_symmetrize_K,l_BS_ares_from_res
- use BS_solvers,     ONLY:BSS_kernel_IO_on_the_fly,BSS_slepc_pseudo_herm
- !
  !      | (K_r)     (cI*K_c)    |  
  !  K = |                       |
  !      | (-cI*K_c^*)  (-K_r^*) |
  !
- use BS_solvers,     ONLY:Slepc_v,BS_HAYVEC_alloc
- !
-#include <petsc/finclude/petscsys.h>
-#include <petsc/finclude/petscvec.h>
-#include <petsc/finclude/petscmat.h>
-#include <petsc/finclude/petscvec.h>
-#include <slepc/finclude/slepcsys.h>
-#include <slepc/finclude/slepceps.h>
- !
+ use BS,             ONLY:BS_H_dim,BS_res_ares_n_mat,BS_K_coupling,&
+&                         BS_Blocks_symmetrize_K,l_BS_ares_from_res
+ use BS_solvers,     ONLY:BSS_kernel_IO_on_the_fly,BSS_slepc_pseudo_herm
+ use BS_solvers,     ONLY:Slepc_v
  use petscmat
  use slepceps
  use petscmatdef
  use slepcepsdef
+ use y_memory_alloc
  !
-#include <y_memory.h>
+ implicit none
  !
  integer, intent(in)  :: i_BS_mat
  Mat,     intent(out) :: slepc_mat
  Mat                  :: slepc_mat_R,slepc_mat_C
  !
- integer          :: i_B,domk(BS_nT_grps)
+ integer          :: i_B
  PetscInt         :: SL_H_dim,SL_K_dim
  PetscErrorCode   :: ierr
  external K_multiply_by_V_slepc
@@ -52,13 +53,7 @@ subroutine K_shell_matrix(i_BS_mat,slepc_mat)
  !
  ! Allocate haydock vectors
  ! 
- call PARALLEL_Haydock_VEC_COMMs('assign')
  Slepc_v%it=1
- allocate(Slepc_v%Vi(BS_nT_grps))
- allocate(Slepc_v%Vo(BS_nT_grps))
- domk=1
- call BS_HAYVEC_alloc(Slepc_v%Vi,domk)
- call BS_HAYVEC_alloc(Slepc_v%Vo,domk)
  !
  ! Allocate slepc shell matrix
  !
@@ -68,6 +63,7 @@ subroutine K_shell_matrix(i_BS_mat,slepc_mat)
  !
  if(BS_K_coupling) then
    if (BSS_slepc_pseudo_herm) then
+#if PETSC_VERSION_GE(3,22,0)
      call MatCreateShell(PETSC_COMM_WORLD,PETSC_DECIDE,PETSC_DECIDE,SL_H_dim/2,SL_H_dim/2,0,slepc_mat_R,ierr)
      call MatSetOption(slepc_mat_R,MAT_HERMITIAN,PETSC_TRUE,ierr)
      call MatCreateShell(PETSC_COMM_WORLD,PETSC_DECIDE,PETSC_DECIDE,SL_H_dim/2,SL_H_dim/2,0,slepc_mat_C,ierr)
@@ -77,6 +73,7 @@ subroutine K_shell_matrix(i_BS_mat,slepc_mat)
      call MatCreateBSE(slepc_mat_R,slepc_mat_C,slepc_mat,ierr)
      call MatDestroy(slepc_mat_R,ierr)
      call MatDestroy(slepc_mat_C,ierr)
+#endif
    else
      call MatCreateShell(PETSC_COMM_WORLD,PETSC_DECIDE,PETSC_DECIDE,SL_H_dim,SL_H_dim,0,slepc_mat,ierr)
      call MatShellSetOperation(slepc_mat,MATOP_MULT,K_multiply_by_V_slepc,ierr)
diff --git a/src/bse/K_solvers.F b/src/bse/K_solvers.F
index bf4e33c874..374217ea55 100644
--- a/src/bse/K_solvers.F
+++ b/src/bse/K_solvers.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM IMA
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine K_solvers(iq,Ken,Xk,q,X_static,W_bss)
  !
  use stderr,         ONLY:intc
@@ -12,18 +16,19 @@ subroutine K_solvers(iq,Ken,Xk,q,X_static,W_bss)
  use electrons,      ONLY:levels
  use R_lattice,      ONLY:bz_samp
  use X_m,            ONLY:X_t
- use BS_solvers,     ONLY:BSS_desc,BSS_free,BS_mat,run_Haydock,&
+ use BS_solvers,     ONLY:BSS_desc,BSS_free,BS_mat,run_Haydock,load_Diago,&
 &                         run_inversion,run_Diago,run_Slepc,BSS_kernel_IO_on_the_fly
- use BS,             ONLY:BS_K_has_been_calculated_loaded,BS_ID,&
-&                         l_BS_abs,l_BS_kerr,l_BS_magnons,l_BS_dichroism
+ use BS,             ONLY:BS_K_has_been_calculated_loaded,BS_ID,BS_perturbative_SOC,&
+&                         l_BS_abs,l_BS_kerr,l_BS_magnons,l_BS_dichroism,&
+&                         l_BS_mespin,l_BS_meorb,l_BS_jdos
  use parallel_m,     ONLY:master_cpu,CPU_structure
  use IO_int,         ONLY:IO_and_Messaging_switch
- use linear_algebra, ONLY:INV,DIAGO
  use PHOTOLUM,       ONLY:BSS_PL_free
  use MAGNONS,        ONLY:BSS_MAGN_free
- use DICHROISM,      ONLY:BSS_DICH_free
+ use DICHROISM,      ONLY:BSS_DICH_free,BSS_MEspin_free,BSS_MEorb_free
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels) ::Ken
  type(bz_samp)::Xk,q
@@ -33,11 +38,18 @@ subroutine K_solvers(iq,Ken,Xk,q,X_static,W_bss)
  !
  logical :: l_partial_kernel_loaded
  !
- ! If the BS database was not loaded in memory, call K_restart
- !
- if (.not. BS_K_has_been_calculated_loaded ) call K_restart(iq,X_static,BS_ID(1),BS_ID(2),BS_ID(3),l_partial_kernel_loaded)
+ if ( (run_Haydock.or.run_inversion) .and. BS_perturbative_SOC) &
+ &  call error("Perturbative SOC requires eigenvectors. Use diago or slec solver")
  !
- if (.not. BS_K_has_been_calculated_loaded ) call error("Something went wrong in K_restart, cannot proceed with solver")
+ if (.not. load_Diago) then
+   !
+   ! If the BS database was not loaded in memory, call K_restart
+   !
+   if (.not. BS_K_has_been_calculated_loaded ) call K_restart(iq,X_static,BS_ID(1),BS_ID(2),BS_ID(3),l_partial_kernel_loaded)
+   !
+   if (.not. BS_K_has_been_calculated_loaded ) call error("Something went wrong in K_restart, cannot proceed with solver")
+   !
+ endif
  !
  call section('-','BSE solver(s) @q'//trim(intc(iq)))
  !
@@ -56,25 +68,11 @@ subroutine K_solvers(iq,Ken,Xk,q,X_static,W_bss)
  !
  ! Diagonalization
  !
- if (run_Diago) then 
-   !
-   call PARALLEL_assign_LA_COMMs("Response_T_space",DIAGO,CPU_structure(4)%nCPU_lin_algebra_DIAGO)
-   !
-   call K_diago_driver(iq,W_bss,X_static)
-   !
- endif
- !
- if (run_Slepc) call K_diago_driver(iq,W_bss,X_static)
+ if (run_Diago .or. run_Slepc) call K_diago_driver(iq,W_bss,X_static)
  !
  ! Inversion
  !
- if (run_inversion) then 
-   !
-   call PARALLEL_assign_LA_COMMs("Response_T_space",INV,CPU_structure(4)%nCPU_lin_algebra_INV)
-   !
-   call K_inversion_driver(iq,W_bss,Ken,Xk,q)
-   !
- endif
+ if (run_inversion) call K_inversion_driver(iq,W_bss,Ken,Xk,q)
  !
  ! CLEAN: diago/invertion workspace
  !
@@ -82,6 +80,8 @@ subroutine K_solvers(iq,Ken,Xk,q,X_static,W_bss)
  call BSS_MAGN_free( )
  call BSS_DICH_free( )
  call BSS_PL_free( )
+ call BSS_MEspin_free()
+ call BSS_MEorb_free()
  !
  ! CLEAN: BS_mat
  !
@@ -103,10 +103,13 @@ subroutine K_solvers(iq,Ken,Xk,q,X_static,W_bss)
    ! A possible alternative could be to use the algorithm with Cf
    ! from both the left and the right hand side to get from a single Haydock loop
    !
+   !if (l_BS_jdos)    call K_Haydock(iq,W_bss,"jdos",Xk)
    if (l_BS_abs)     call K_Haydock(iq,W_bss,"optics",Xk)
    if (l_BS_kerr)    call K_Haydock(iq,W_bss,"kerr",Xk)
    if (l_BS_magnons) call K_Haydock(iq,W_bss,"magnons",Xk)
    if (l_BS_dichroism) call K_Haydock(iq,W_bss,"dichroism",Xk)
+   if (l_BS_mespin) call K_Haydock(iq,W_bss,"mespin",Xk)
+   if (l_BS_meorb) call K_Haydock(iq,W_bss,"meorb",Xk)
    !
  endif
  !
diff --git a/src/bse/K_stored_in_a_big_matrix.F b/src/bse/K_stored_in_a_big_matrix.F
index 9da817778e..0fc933d7eb 100644
--- a/src/bse/K_stored_in_a_big_matrix.F
+++ b/src/bse/K_stored_in_a_big_matrix.F
@@ -5,21 +5,43 @@
 !
 ! Authors (see AUTHORS file for details): AM MG DS
 !
+! headers
+!
+#include <y_memory.h>
+!
 subroutine K_stored_in_a_big_matrix(i_BS_mat,BS_mat_dim,BS_mat,iq,what)
  !
  ! Here I fill the kernel (coupling included) in an entire BIG matrix to be
  ! used for diagonalization and/or invertion
  !
+ ! Below how the matrix is computed
+ ! - coupling (c) and anti-coupling (q) blocks are computed only if coupling is requested
+ ! - in case 2 the diagonal terms of coupling and anti-coupling are both computed, although this is not needed
+ ! - cases 3 and 4 are for debugging purposes only
+ !
+ !         Case 1                           Case 2                              Case 3                          Case 4
+ !   l_BS_ares_from_res=.true.      l_BS_ares_from_res=.false.           l_BS_ares_from_res=.true.        l_BS_ares_from_res=.false.
+ !   l_BSE_matrix_full=.false.      l_BSE_matrix_full=.false.            l_BSE_matrix_full=.true.         l_BSE_matrix_full=.true.
+ !   coupling block is a square     coupling block maybe rectangular     coupling block is a square       coupling block maybe rectangular
+ !
+ !       (r r r c c c)                    (r r r c c)                        (r r r c c c)                    (r r r c c)
+ !       (- r r - c c)                    (- r r c c)                        (r r r c c c)                    (r r r c c)
+ !       (- - r - - c)                    (- - r c c)                        (r r r c c c)                    (r r r c c)
+ !       (- - - - - -)                    (- - - a a)                        (- - - - - -)                    (q q q a a)
+ !       (- - - - - -)                    (- - - - a)                        (- - - - - -)                    (q q q a a)
+ !       (- - - - - -)                                                       (- - - - - -)
+ !
  use pars,           ONLY:SP,cI,cZERO,cONE
  use parallel_int,   ONLY:PP_redux_wait
  use BS,             ONLY:BS_K_coupling,n_BS_blks,BS_res_ares_n_mat,&
-&                         l_BS_ares_from_res
+&                         l_BS_ares_from_res,l_BSE_kernel_full
  use BS_solvers,     ONLY:BS_blk,BS_H_dim,BS_K_dim,&
 &                         BSS_perturbative_width,run_inversion,run_Haydock,&
 &                         BSS_eh_E,BSS_eh_W
  use timing_m,       ONLY:timing
+ use y_memory_alloc
  !
-#include <y_memory.h>
+ implicit none
  !
  integer,      intent(in) :: i_BS_mat,BS_mat_dim,iq
  complex(SP), allocatable, target :: BS_mat(:,:) 
@@ -46,8 +68,15 @@ subroutine K_stored_in_a_big_matrix(i_BS_mat,BS_mat_dim,BS_mat,iq,what)
    !
    H_shift=0
    if(BS_blk(i_B)%mode=="C") H_shift(2)=BS_K_dim(1)
+   if(BS_blk(i_B)%mode=="Q") H_shift(1)=BS_K_dim(1)
    if(BS_blk(i_B)%mode=="A") H_shift(:)=BS_K_dim(1)
    !
+   ! Skip Q and A block
+   if (   l_BS_ares_from_res   .and. H_shift(1)>0) cycle
+   !
+   ! Skip Q block
+   if ((.not.l_BSE_kernel_full).and. H_shift(1)>H_shift(2)) cycle
+   !
    do i_r=1,BS_blk(i_B)%size(1)
      !
      H_pos(1)=BS_blk(i_B)%coordinate(1)+i_r-1
@@ -56,10 +85,13 @@ subroutine K_stored_in_a_big_matrix(i_BS_mat,BS_mat_dim,BS_mat,iq,what)
        !
        H_pos(2)=BS_blk(i_B)%coordinate(2)+i_c-1
        !
-       ! Then the upper triangle of each block and direct symmetrization
-       !
-       if (H_pos(1)+H_shift(1)>H_pos(2)+H_shift(2)) cycle
-       if (l_BS_ares_from_res.and.H_pos(1)>H_pos(2)) cycle
+       if ( .not.l_BSE_kernel_full) then
+         ! The upper triangle only of the whole matrix
+         if( H_pos(1)+H_shift(1)>H_pos(2)+H_shift(2)) cycle
+         !
+         ! The upper triangle only of each block
+         if (l_BS_ares_from_res  .and.  H_pos(1)>H_pos(2)) cycle
+       endif
        !
        if(     allocated(BS_blk(i_B)%mat)) Mij=BS_blk(i_B)%mat(i_r,i_c)
        if(.not.allocated(BS_blk(i_B)%mat)) Mij=cZERO
@@ -77,35 +109,47 @@ subroutine K_stored_in_a_big_matrix(i_BS_mat,BS_mat_dim,BS_mat,iq,what)
        case("R")
            BS_mat(H_pos(1)            ,H_pos(2)            )=       Mij
            ! The resonant block is hermitial
-           BS_mat(H_pos(2)            ,H_pos(1)            )= conjg(Mij)
+           if (.not.l_BSE_kernel_full) &
+           & BS_mat(H_pos(2)          ,H_pos(1)            )= conjg(Mij)
            if (l_BS_ares_from_res.and.BS_K_coupling) then
              ! The anti-resonant block is A=-R*
              BS_mat(H_pos(1)+BS_K_dim(1),H_pos(2)+BS_K_dim(1))=-conjg(Mij)
              ! The anti-resonant block is hermitian
-             BS_mat(H_pos(2)+BS_K_dim(1),H_pos(1)+BS_K_dim(1))=      -Mij
+             if (.not.l_BSE_kernel_full) &
+             & BS_mat(H_pos(2)+BS_K_dim(1),H_pos(1)+BS_K_dim(1))=    -Mij
            endif
        case("C")
            BS_mat(H_pos(1)            ,H_pos(2)+BS_K_dim(1))=       Mij
-           ! Anti-coupling from coupling: the whole BSE matrix is Pseudo-HErmitian
-           BS_mat(H_pos(2)+BS_K_dim(1),H_pos(1)            )=-conjg(Mij)
+           ! Anti-coupling from coupling: the whole BSE matrix is Pseudo-Hermitian
+           if (.not.l_BSE_kernel_full) &
+           & BS_mat(H_pos(2)+BS_K_dim(1),H_pos(1)            )=-conjg(Mij)
+           ! In case l_BS_ares_from_res=.false. the coupling an anti-couling 
+           ! bocks maybe rectangular and I cannot symmetrize them
            if (l_BS_ares_from_res) then
              ! The coupling block and the anti-coupling block are symmetric
              BS_mat(H_pos(2)            ,H_pos(1)+BS_K_dim(1))=       Mij
-             BS_mat(H_pos(1)+BS_K_dim(1),H_pos(2)            )=-conjg(Mij)
+             if (.not.l_BSE_kernel_full) &
+             & BS_mat(H_pos(1)+BS_K_dim(1),H_pos(2)            )=-conjg(Mij)
            endif
        case("A")
-         ! The anti-resonant block is hermitial
          if(BS_res_ares_n_mat==1) then
+           ! Case with coupling
            BS_mat(H_pos(1)+BS_K_dim(1),H_pos(2)+BS_K_dim(1))=       Mij
-           BS_mat(H_pos(2)+BS_K_dim(1),H_pos(1)+BS_K_dim(1))= conjg(Mij)
+           ! The anti-resonant block is hermitial
+           if (.not.l_BSE_kernel_full) &
+           & BS_mat(H_pos(2)+BS_K_dim(1),H_pos(1)+BS_K_dim(1))= conjg(Mij)
          else
+           ! Case without coupling, there are two BS_mat
            BS_mat(H_pos(1)            ,H_pos(2)            )=       Mij
-           BS_mat(H_pos(2)            ,H_pos(1)            )= conjg(Mij)
+           ! The anti-resonant block is hermitial
+           if (.not.l_BSE_kernel_full) &
+           & BS_mat(H_pos(2)            ,H_pos(1)            )= conjg(Mij)
          endif
        case("Q")
            BS_mat(H_pos(1)+BS_K_dim(1),H_pos(2)            )=       Mij
            ! Coupling from anti-coupling: the whole BSE matrix is Pseudo-HErmitian
-           BS_mat(H_pos(2)            ,H_pos(1)+BS_K_dim(1))=-conjg(Mij)
+           if (.not.l_BSE_kernel_full) &
+           & BS_mat(H_pos(2)            ,H_pos(1)+BS_K_dim(1))=-conjg(Mij)
        end select
        !
      enddo
diff --git a/src/bse/K_stored_in_a_nest_matrix.F b/src/bse/K_stored_in_a_nest_matrix.F
index 5b7fd1b61f..d8aab2b722 100644
--- a/src/bse/K_stored_in_a_nest_matrix.F
+++ b/src/bse/K_stored_in_a_nest_matrix.F
@@ -10,7 +10,6 @@
 #include <petsc/finclude/petscsys.h>
 #include <petsc/finclude/petscvec.h>
 #include <petsc/finclude/petscmat.h>
-#include <petsc/finclude/petscvec.h>
 #include <slepc/finclude/slepcsys.h>
 #include <slepc/finclude/slepceps.h>
 !
@@ -51,6 +50,7 @@ subroutine K_stored_in_a_nest_matrix(i_BS_mat,iq,slepc_mat)
  PetscInt            :: H_pos(2),SL_K_dim(2),SL_H_dim
  PetscErrorCode      :: ierr
  !
+#if PETSC_VERSION_GE(3,22,0)
  call timing('BSE mat slepc redist',OPR='start')
  !
  SL_H_dim=BS_H_dim
@@ -165,5 +165,6 @@ subroutine K_stored_in_a_nest_matrix(i_BS_mat,iq,slepc_mat)
  call MatCreateBSE(R,C,slepc_mat,ierr);
  !
  call timing('BSE mat slepc redist',OPR='stop')
+#endif
  !
 end subroutine K_stored_in_a_nest_matrix
diff --git a/src/bse/K_stored_in_a_slepc_matrix.F b/src/bse/K_stored_in_a_slepc_matrix.F
index 6b3b73d826..6578f4faf7 100644
--- a/src/bse/K_stored_in_a_slepc_matrix.F
+++ b/src/bse/K_stored_in_a_slepc_matrix.F
@@ -10,7 +10,6 @@
 #include <petsc/finclude/petscsys.h>
 #include <petsc/finclude/petscvec.h>
 #include <petsc/finclude/petscmat.h>
-#include <petsc/finclude/petscvec.h>
 #include <slepc/finclude/slepcsys.h>
 #include <slepc/finclude/slepceps.h>
 !
@@ -24,7 +23,7 @@ subroutine K_stored_in_a_slepc_matrix(i_BS_mat,iq,slepc_mat)
  !
  use pars,           ONLY:cI,cONE,lchlen
  use BS,             ONLY:BS_K_dim,BS_H_dim,BS_blk,n_BS_blks,BS_K_coupling,&
- &                        BS_res_ares_n_mat,l_BS_ares_from_res,BS_ID
+ &                        BS_res_ares_n_mat,l_BSE_kernel_full,l_BS_ares_from_res,BS_ID
  use BS_solvers,     ONLY:BSS_eh_E,BSS_eh_W,BSS_perturbative_width,&
  &                        run_inversion,run_Haydock,BSS_kernel_last_stored_block
  use IO_int,         ONLY:io_control
@@ -49,6 +48,8 @@ subroutine K_stored_in_a_slepc_matrix(i_BS_mat,iq,slepc_mat)
  !
  call timing('BSE mat slepc redist',OPR='start')
  !
+ if (l_BSE_kernel_full) call error("Slepc with matrix not coded with full BSE mode")
+ !
  if(     BS_K_coupling) SL_H_dim=BS_H_dim
  if(.not.BS_K_coupling) SL_H_dim=BS_K_dim(i_BS_mat)
  SL_K_dim=BS_K_dim
diff --git a/src/bse/YDIAGO_driver.F b/src/bse/LDIAGO_driver.F
similarity index 98%
rename from src/bse/YDIAGO_driver.F
rename to src/bse/LDIAGO_driver.F
index f37a63bf65..61830121fb 100644
--- a/src/bse/YDIAGO_driver.F
+++ b/src/bse/LDIAGO_driver.F
@@ -5,7 +5,7 @@
 !
 ! Authors (see AUTHORS file for details): NM
 !
-subroutine YDIAGO_driver(i_BS_mat, BS_energies, BS_VR, &
+subroutine LDIAGO_driver(i_BS_mat, BS_energies, BS_VR, &
     & neigs_this_cpu, neig_shift, neigs_range, eigvals_range, &
     & BS_VL, BS_overlap, solver_type, elpasolver)
   !
@@ -30,7 +30,7 @@ subroutine YDIAGO_driver(i_BS_mat, BS_energies, BS_VR, &
   use openmp,         ONLY:n_threads_K
   use LIVE_t,         ONLY:live_timing
   use com,            ONLY:msg
-  use ydiago_interface
+  use ldiago_interface
 #if defined _MPI
   USE mpi
 #endif
@@ -95,6 +95,10 @@ subroutine YDIAGO_driver(i_BS_mat, BS_energies, BS_VR, &
   integer                            :: evec_fac = 1
   ! evec_fac = 2 if bse_solver function was used to diagonalize else 1
 
+#if !defined _MPI
+  call error("LDIAGO implementation needs MPI. Do not use LDIAGO solver.")
+#endif
+
   if(PRESENT(neigs_range)) then
     if(neigs_range(1)>0) then
       neig_tmp=neigs_range(2)-neigs_range(1)+1
@@ -428,4 +432,4 @@ subroutine YDIAGO_driver(i_BS_mat, BS_energies, BS_VR, &
   neigs_this_cpu = evec_fac*neigs_this_cpu
   neig_shift     = evec_fac*neig_shift
   !
-end subroutine YDIAGO_driver 
+end subroutine LDIAGO_driver 
diff --git a/src/bse/PL_diago_residual.F b/src/bse/PL_diago_residual.F
index 811ada9a96..f71802cdc6 100644
--- a/src/bse/PL_diago_residual.F
+++ b/src/bse/PL_diago_residual.F
@@ -5,37 +5,49 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
-subroutine PL_diago_residual(BS_V_left,BS_V_right,BS_R_PL,K_is_not_hermitian,BS_overlap) 
+subroutine PL_diago_herm_residual(BS_mat,BS_R_PL) 
  !
- ! PL Residuals
+ ! Resonant PL Residuals
+ !
+  ! Resonant Residuals
+ !====================
+ ! BS_R_right(i) = \sum_k <q0|k><k|i> =
+ !                 \sum_k BS_mat(k,i) x conjg( BSS_dipoles(k) )
+ ! BS_R_left(i)  = conjg( BS_R_right(i) )
  !
  use pars,           ONLY:SP,rZERO,cZERO,cI
+ use wrapper,        ONLY:V_dot_V
  use wrapper_omp,    ONLY:V_dot_V_omp,Vstar_dot_V_omp
  use parallel_m,     ONLY:PP_indexes,myid,PP_indexes_reset
  use parallel_int,   ONLY:PP_redux_wait,PARALLEL_index 
  use LIVE_t,         ONLY:live_timing
- use BS,             ONLY:BS_H_dim,BS_K_dim
- use BS_solvers,     ONLY:BSS_eh_f
+ use com,            ONLY:msg
+ use BS,             ONLY:BS_H_dim,BS_K_dim,BS_dip_size
+ use BS_solvers,     ONLY:BSS_eh_f_RES,BSS_n_eig, BS_E
  use PHOTOLUM,       ONLY:BSS_dipoles_PL,BSS_PL_f
+ use functions,      ONLY:boltzman_f
+#if defined _RT
+ use RT_control,     ONLY:NEQ_Residuals
+#endif
+ use D_lattice,      ONLY:Boltz_Temp
  !
  implicit none
  !
- logical,    intent(in)  :: K_is_not_hermitian
- complex(SP),intent(in)  :: BS_V_left(BS_H_dim,BS_H_dim),BS_V_right(BS_H_dim,BS_H_dim)
- real(SP)   ,intent(out) :: BS_R_PL(2,BS_H_dim)
- complex(SP),optional,intent(in)  :: BS_overlap(BS_H_dim,BS_H_dim)
+ complex(SP),intent(in)  :: BS_mat(BS_K_dim(1),BSS_n_eig)
+ real(SP)   ,intent(out) :: BS_R_PL(2,BSS_n_eig)
  !
  ! Workspace
  !
  type(PP_indexes)     ::px
- integer              ::i_l,i_c,i_K,i_lr,i_ll
- complex(SP)          ::P_x_fZ(2,BS_H_dim,3),Pi_x_R_x_adA_left(3),Pi_x_R_x_adA_right(3),adA_x_R_over_R_x_A(2),&
-&                       R_over_R_x_A(2,BS_H_dim),B(BS_H_dim)
- !
+ logical              ::compute_ares_explicitly
+ integer              ::i_l,i_c,i_K,i_lr,i_ll, ik
+ complex(SP)          ::P_x_fZ(BS_K_dim(1),3,2),Pi_x_R_x_adA_left(3),Pi_x_R_x_adA_right(3),adA_x_R_over_R_x_A(2),&
+&                       R_over_R_x_A(BS_K_dim(1),2)
+!Write to a file
  ! Parallel indexes
  !
  call PP_indexes_reset(px)
- call PARALLEL_index(px,(/BS_H_dim/))
+ call PARALLEL_index(px,(/BSS_n_eig/))
  !
  ! sqrt(R_i) Re[ (Pi_i)* A^l_i (A^l_j)* R<_j/R_j A^l_j (A^l_k)* Pi_k ] sqrt(R_k)
  !
@@ -45,115 +57,191 @@ subroutine PL_diago_residual(BS_V_left,BS_V_right,BS_R_PL,K_is_not_hermitian,BS_
  P_x_fZ        =cZERO
  R_over_R_x_A  =cZERO
  !
- !
  ! Live-Timing
  !
  call live_timing('PL    residuals',px%n_of_elements(myid+1))
+ !
+ ! Hermitian residual
+ !====================
+ !
+ ! 1=> left
+ ! 2=> right
+ !
+#if defined _RT
+ if (NEQ_Residuals .and. (Boltz_Temp > 0._SP) ) then
+   call error('The user can specify either boltzmann or external residual occupations, not both')
+ endif
+ ! 
+ if (NEQ_Residuals) call msg('s','Running with NEQ Residuals')
+#else
+ if (.not. (Boltz_Temp > 0._SP) ) call error("Boltz temperature not set in input")
+#endif
+ if (       Boltz_Temp > 0._SP  ) call msg('s','Running with Boltz excitonic occupations')
+ !
+ do i_c=1,BS_dip_size
+   do i_K=1,BS_K_dim(1)
+     P_x_fZ(i_K,i_c,1)=conjg(BSS_dipoles_PL(i_c,i_K))*sqrt(cmplx(BSS_eh_f_RES(i_K))) !Pi^* left sqrt(R)
+     P_x_fZ(i_K,i_c,2)=      BSS_dipoles_PL(i_c,i_K) *sqrt(cmplx(BSS_eh_f_RES(i_K))) !Pi^* left sqrt(R)
+   enddo
+ enddo
+ !
+ do i_l=1,BSS_n_eig
+  !
+  if (.not.px%element_1D(i_l)) cycle !parallelization
+  !
+  do i_c=1,BS_dip_size
+    !dim(P_x_fz) = BS_H_dim
+    ! we have a problem in this multiplication
+    Pi_x_R_x_adA_left(i_c) =V_dot_V_omp(BS_K_dim(1),P_x_fZ(:,i_c,2),      BS_mat(:,i_l) ) !resonan Pi^* Ar*sqrt(R)                                   
+    Pi_x_R_x_adA_right(i_c)=V_dot_V_omp(BS_K_dim(1),P_x_fZ(:,i_c,1),conjg(BS_mat(:,i_l))) !resonan Pi^* Ar*sqrt(R)                                   
+    ! Pi_x_R_x_adA_left =   Pi sqrt(R) dipoles_PL*BS_mat/R
+  enddo
+  !
+  !Check dimensions for these vectors to be consistent that we have only resonant and antiresonant
+  BS_R_PL(:,i_l)=real(V_dot_V(BS_dip_size,Pi_x_R_x_adA_left,Pi_x_R_x_adA_right))
+  !
+#if defined _RT
+  ! R_over_R_x_A = R<_j/R_j A^r_ij
+  ! Here I create an If statement if I want to populate L lesser with electornic or excitonic populations
+  if (NEQ_Residuals) then
+    !                       
+    ! R_over_R_x_A = R<_j/R_j A^l_j
+    !
+    R_over_R_x_A(:,1) = BSS_PL_f(:,1)*BS_mat(:,i_l)/BSS_eh_f_RES(:)
+    R_over_R_x_A(:,2) = BSS_PL_f(:,2)*BS_mat(:,i_l)/BSS_eh_f_RES(:)
+    !
+    ! adA_x_R_over_R_x_A = (A^l_j)* R<_j/R_j A^l_j
+    !
+    adA_x_R_over_R_x_A(1)=V_dot_V_omp(BS_K_dim(1),conjg(BS_mat(:,i_l)),R_over_R_x_A(:,1))
+    adA_x_R_over_R_x_A(2)=V_dot_V_omp(BS_K_dim(1),conjg(BS_mat(:,i_l)),R_over_R_x_A(:,2))
+    !
+  else if (Boltz_Temp .gt. 0) then
+#endif
+    !
+    adA_x_R_over_R_x_A=boltzman_f(real(BS_E(i_l)-BS_E(1)))
+    !
+#if defined _RT
+  endif
+#endif
+  !
+  BS_R_PL(1,i_l)=BS_R_PL(1,i_l)*adA_x_R_over_R_x_A(1) !resonant
+  BS_R_PL(2,i_l)=BS_R_PL(2,i_l)*adA_x_R_over_R_x_A(2) !resonant
+  !Pi*Al*Pi^**Ar*Al*R</R*Ar
+  !
+ enddo
+ !
+ call live_timing()
+ !
+ call PP_redux_wait(BS_R_PL)
+ !
+ call PP_indexes_reset(px)
+ !
+end subroutine PL_diago_herm_residual
+
+subroutine PL_diago_non_herm_residual(BS_V_left,BS_V_right,BS_R_PL,BS_overlap) 
  !
  ! Non-Hermitian residual
  !========================
  !
- if (K_is_not_hermitian) then
-   !
-   ! 1=> left
-   ! 2=> right
-   !
-   do i_c=1,3
-     do i_K=1,BS_K_dim(1)
-       P_x_fZ(1,i_K,i_c)=conjg(BSS_dipoles_PL(i_K,i_c))*sqrt(BSS_eh_f(i_K))
-       P_x_fZ(2,i_K,i_c)=      BSS_dipoles_PL(i_K,i_c) *sqrt(BSS_eh_f(i_K))
-     enddo
-     do i_K=BS_K_dim(1)+1,BS_H_dim
-       P_x_fZ(1,i_K,i_c)=cI*conjg(BSS_dipoles_PL(i_K,i_c))*sqrt(BSS_eh_f(i_K-BS_K_dim(1)))
-       P_x_fZ(2,i_K,i_c)=cI*      BSS_dipoles_PL(i_K,i_c) *sqrt(BSS_eh_f(i_K-BS_K_dim(1)))
-     enddo
-   enddo
-   !
-   do i_lr=1,BS_H_dim
-     !
-     if (.not.px%element_1D(i_lr)) cycle
-     !
-     ! B_i = \sum_lp Overlap^*(lp,l) A^ll_i
-     !
-     B=cZERO
-     do i_ll=1,BS_H_dim
-       B(:)=B(:)+conjg(BS_overlap(i_ll,i_lr))*BS_V_left(:,i_ll)
-     enddo
-     !
-     ! R_over_R_x_A = R<_j/R_j B^l_j
-     !
-     R_over_R_x_A(1,:) = BSS_PL_f(:)*B(:)/BSS_eh_f(:)
-     !
-     ! adA_x_R_over_R_x_A = B^l*_j    R<_j/R_j B^l_j
-     !
-     adA_x_R_over_R_x_A(1)=Vstar_dot_V_omp(BS_H_dim,B,R_over_R_x_A(1,:))
-     !
-     do i_c=1,3
-       !
-       ! Pi_x_R_x_adA_left =   sqrt(R) Pi_i* A^lr_i
-       !
-       Pi_x_R_x_adA_left(i_c) =V_dot_V_omp(BS_H_dim,P_x_fZ(1,:,i_c),BS_V_right(:,i_lr))
-       !
-       ! Pi_x_R_x_adA_right=   A^lr*_i Pi_i sqrt(R)  
-       !
-       Pi_x_R_x_adA_right(i_c)=Vstar_dot_V_omp(BS_H_dim,BS_V_right(:,i_lr),P_x_fZ(2,:,i_c))
-       !
-     enddo
-     !
-     BS_R_PL(1,i_lr)=real((Pi_x_R_x_adA_left(1)*Pi_x_R_x_adA_right(1)+&
-&                          Pi_x_R_x_adA_left(2)*Pi_x_R_x_adA_right(2)+&
-&                          Pi_x_R_x_adA_left(3)*Pi_x_R_x_adA_right(3))*adA_x_R_over_R_x_A(1))
-     !
-     call live_timing(steps=1)
-     !
-   enddo
-   !
- else
-   !
-   ! Hermitian residual
-   !====================
-   !
-   do i_c=1,3
-     P_x_fZ(1,:,i_c)=BSS_dipoles_PL(:,i_c)*sqrt(BSS_eh_f(:))
-   enddo
-   !
-   do i_l=1,BS_K_dim(1)
-     !
-     if (.not.px%element_1D(i_l)) cycle
-     !                       
-     ! R_over_R_x_A = R<_j/R_j A^l_j
-     !
-     R_over_R_x_A(1,:) =  BSS_PL_f(1:BS_K_dim(1)) *BS_V_right(:,i_l)/BSS_eh_f(:)
-     R_over_R_x_A(2,:) =  BSS_PL_f(BS_K_dim(1)+1:)*BS_V_right(:,i_l)/BSS_eh_f(:)
-     !
-     ! adA_x_R_over_R_x_A = (A^l_j)* R<_j/R_j A^l_j
-     !
-     adA_x_R_over_R_x_A(1)=V_dot_V_omp(BS_K_dim(1),conjg(BS_V_right(:,i_l)),R_over_R_x_A(1,:))
-     adA_x_R_over_R_x_A(2)=V_dot_V_omp(BS_K_dim(1),conjg(BS_V_right(:,i_l)),R_over_R_x_A(2,:))
-     !
-     do i_c=1,3
-       !                                   
-       ! Pi_x_R_x_adA_right =   Pi sqrt(R)  A^l_i
-       !
-       Pi_x_R_x_adA_right(i_c)=V_dot_V_omp(BS_K_dim(1),BS_V_right(:,i_l),P_x_fZ(1,:,i_c))
-       !
-     enddo
-     !
-     ! Note that the 2nd component (anti-resonant) should take a -1  from BSS_eh_f and a i^2 from sqrt(R)
-     ! that cancel out.
-     !
-     BS_R_PL(:,i_l)=dot_product(Pi_x_R_x_adA_right,Pi_x_R_x_adA_right)*adA_x_R_over_R_x_A(:)
-     !
-     call live_timing(steps=1)
-     !
-   enddo
-   !
- endif
+ ! Resonant PL Residuals
+ !
+ use pars,           ONLY:SP,rZERO,cZERO,cI
+ use wrapper_omp,    ONLY:V_dot_V_omp,Vstar_dot_V_omp
+ use parallel_m,     ONLY:PP_indexes,myid,PP_indexes_reset
+ use parallel_int,   ONLY:PP_redux_wait,PARALLEL_index 
+ use LIVE_t,         ONLY:live_timing
+ use BS,             ONLY:BS_H_dim,BS_K_dim,BS_dip_size
+ use BS_solvers,     ONLY:BSS_eh_f_RES,BSS_n_eig
+ use PHOTOLUM,       ONLY:BSS_dipoles_PL,BSS_PL_f
+ !
+ implicit none
+ !
+ complex(SP),intent(in)  :: BS_V_left(BS_H_dim,BSS_n_eig),BS_V_right(BS_H_dim,BSS_n_eig)
+ real(SP)   ,intent(out) :: BS_R_PL(2,BSS_n_eig)
+ complex(SP),optional,intent(in)  :: BS_overlap(BSS_n_eig,BSS_n_eig)
+ !
+ ! Workspace
+ !
+ type(PP_indexes)     ::px
+ integer              ::i_l,i_c,i_K,i_lr,i_ll, ik
+ complex(SP)          ::P_x_fZ(BS_H_dim,3,2),Pi_x_R_x_adA_left(3),Pi_x_R_x_adA_right(3),adA_x_R_over_R_x_A,&
+&                       R_over_R_x_A(BS_H_dim),B_var(BS_H_dim)
+ !
+ ! Parallel indexes
+ !
+ call PP_indexes_reset(px)
+ call PARALLEL_index(px,(/BSS_n_eig/))
+ !
+ ! sqrt(R_i) Re[ (Pi_i)* A^l_i (A^l_j)* R<_j/R_j A^l_j (A^l_k)* Pi_k ] sqrt(R_k)
+ !
+ ! Zeroing
+ !
+ BS_R_PL       =rZERO
+ P_x_fZ        =cZERO
+ R_over_R_x_A  =cZERO
+ !
+ ! Live-Timing
+ !
+ call live_timing('PL    residuals',px%n_of_elements(myid+1))
+ !
+ ! 1=> left
+ ! 2=> right
+ !
+ do i_c=1,BS_dip_size
+    do i_K=1,BS_H_dim
+      P_x_fZ(i_K,i_c,1)=conjg(BSS_dipoles_PL(i_c,i_K))*sqrt(cmplx(BSS_eh_f_RES(i_K)))
+      P_x_fZ(i_K,i_c,2)=      BSS_dipoles_PL(i_c,i_K)* sqrt(cmplx(BSS_eh_f_RES(i_K)))
+    enddo
+  enddo
+  !
+  do i_lr=1,BS_H_dim
+    !
+    if (.not.px%element_1D(i_lr)) cycle
+    !
+    ! B_i = \sum_ll Overlap^*(ll,lr) A^ll_i
+    !
+    B_var=cZERO
+    do i_ll=1,BS_H_dim
+      B_var(:)=B_var(:)+conjg(BS_overlap(i_ll,i_lr))*BS_V_left(:,i_ll)
+    enddo
+    !
+    ! R_over_R_x_A = R<_j/R_j B^l_j
+    !
+    do i_ll=1,BS_H_dim
+      R_over_R_x_A(i_ll) = BSS_PL_f(i_ll,1)*B_var(i_ll)/BSS_eh_f_RES(i_ll)
+    enddo
+    !
+    ! adA_x_R_over_R_x_A = B^l*_j    R<_j/R_j B^l_j
+    !
+    adA_x_R_over_R_x_A=Vstar_dot_V_omp(BS_H_dim,B_var,R_over_R_x_A)
+    !
+    do i_c=1,BS_dip_size
+      !
+      ! Pi_x_R_x_adA_left =   sqrt(R) Pi_i* A^lr_i
+      !
+      Pi_x_R_x_adA_left(i_c) =V_dot_V_omp(BS_H_dim,P_x_fZ(:,i_c,1),BS_V_right(:,i_lr))
+      !
+      ! Pi_x_R_x_adA_right=   A^lr*_i Pi_i sqrt(R)  
+      !
+      Pi_x_R_x_adA_right(i_c)=Vstar_dot_V_omp(BS_H_dim,BS_V_right(:,i_lr),P_x_fZ(:,i_c,2))
+      !
+      BS_R_PL(1,i_lr)=BS_R_PL(1,i_lr)+real(Pi_x_R_x_adA_left(i_c)*Pi_x_R_x_adA_right(i_c))
+      !
+    enddo
+    !
+    BS_R_PL(1,i_lr)=BS_R_PL(1,i_lr)*adA_x_R_over_R_x_A
+    !
+    call live_timing(steps=1)
+    !
+  enddo
  !
  call live_timing()
  !
  call PP_redux_wait(BS_R_PL)
  !
+ ! In the coupling case, BSS_add_ares=.false. always
+ BS_R_PL(2,:)=cZERO
+ !
  call PP_indexes_reset(px)
  !
-end subroutine PL_diago_residual
+end subroutine PL_diago_non_herm_residual
diff --git a/src/bse/PL_via_perturbative_inversion.F b/src/bse/PL_via_perturbative_inversion.F
index 5910870a45..d36d5d41c8 100644
--- a/src/bse/PL_via_perturbative_inversion.F
+++ b/src/bse/PL_via_perturbative_inversion.F
@@ -3,9 +3,9 @@
 !
 ! Copyright (C) 2016 The Yambo Team
 !
-! Authors (see AUTHORS file for details): AM
+! Authors (see AUTHORS file for details): AM PM
 !
-integer function PL_via_perturbative_inversion(n,Lo_dim,n_loop,D,C,f,Lo)
+integer function PL_via_perturbative_inversion(n,Lo_dim,D,C,f,Lo)
  !
  use pars,           ONLY:SP,rZERO
  use wrapper_omp,    ONLY:Vstar_dot_V_omp,M_by_V_omp
@@ -13,16 +13,16 @@ integer function PL_via_perturbative_inversion(n,Lo_dim,n_loop,D,C,f,Lo)
  !
  implicit none
  !
- integer,    intent(in)  ::n,Lo_dim,n_loop
+ integer,    intent(in)  ::n,Lo_dim
  real(SP),   intent(in)  ::C
- complex(SP),intent(in)  ::D(n,3),Lo(n,Lo_dim)
+ complex(SP),intent(in)  ::D(3,N),Lo(n,Lo_dim)
  real(SP)   ,intent(out) ::f
  !
  ! Work Space
  !
  integer     ::BSS_inv_iter_max=51
  integer     ::it,ic
- complex(SP) ::R_x_D_nm1(n,n_loop,3),R_x_D_n(n,n_loop,3),WK(n),CUMULATIVE(n,n_loop,3)
+ complex(SP) ::R_x_D_nm1(n,Lo_dim,3),R_x_D_n(n,Lo_dim,3),WK(n),CUMULATIVE(n,Lo_dim,3)
  real(SP)    ::f_previous,fo,delta_f
  !
  PL_via_perturbative_inversion=0
@@ -32,12 +32,12 @@ integer function PL_via_perturbative_inversion(n,Lo_dim,n_loop,D,C,f,Lo)
  fo=rZERO
  !
  do ic=1,3
-   R_x_D_nm1(:,1,ic)=conjg(Lo(:,1))*D(:,ic)
-   WK=Lo(:,1+n_loop)*R_x_D_nm1(:,1,ic)
+   R_x_D_nm1(:,1,ic)=conjg(Lo(:,1))*D(ic,:)
+   WK=Lo(:,1)*R_x_D_nm1(:,1,ic)
    fo=fo+C*Vstar_dot_V_omp(n,R_x_D_nm1(:,1,ic),WK)
-   if(n_loop==2) then
-     R_x_D_nm1(:,2,ic)=Lo(:,2)*conjg(D(:,ic))
-     WK=Lo(:,2+n_loop)*R_x_D_nm1(:,2,ic)
+   if(Lo_dim==2) then
+     R_x_D_nm1(:,2,ic)=Lo(:,2)*conjg(D(ic,:))
+     WK=Lo(:,2)*R_x_D_nm1(:,2,ic)
      fo=fo+C*Vstar_dot_V_omp(n,R_x_D_nm1(:,2,ic),WK)
    endif
  enddo
@@ -62,10 +62,10 @@ integer function PL_via_perturbative_inversion(n,Lo_dim,n_loop,D,C,f,Lo)
        call M_by_V_omp('n',n,BS_mat,R_x_D_nm1(:,1,ic),R_x_D_n(:,1,ic))
      endif
      CUMULATIVE(:,1,ic)=CUMULATIVE(:,1,ic)+conjg(Lo(:,1))*R_x_D_n(:,1,ic)
-     WK=Lo(:,1+n_loop)*CUMULATIVE(:,1,ic)
+     WK=Lo(:,1)*CUMULATIVE(:,1,ic)
      f=f+C*Vstar_dot_V_omp(n,CUMULATIVE(:,1,ic),WK)
      !
-     if (n_loop==2) then
+     if (Lo_dim==2) then
        if (K_slk%kind=="SLK") then
 #if defined _SCALAPACK
          call PARALLEL_M_by_V('n',n,K_slk,R_x_D_nm1(:,2,ic),R_x_D_n(:,2,ic))
@@ -74,7 +74,7 @@ integer function PL_via_perturbative_inversion(n,Lo_dim,n_loop,D,C,f,Lo)
          call M_by_V_omp('n',n,BS_mat,R_x_D_nm1(:,2,ic),R_x_D_n(:,2,ic))
        endif
        CUMULATIVE(:,2,ic)=CUMULATIVE(:,2,ic)+Lo(:,2)*R_x_D_n(:,2,ic)
-       WK=Lo(:,2+n_loop)*CUMULATIVE(:,2,ic)
+       WK=Lo(:,2)*CUMULATIVE(:,2,ic)
        f=f+C*Vstar_dot_V_omp(n,CUMULATIVE(:,2,ic),WK)
        !
      endif
diff --git a/src/bse/RT_project.dep b/src/bse/RT_project.dep
index ba43b0f31e..053b62454a 100644
--- a/src/bse/RT_project.dep
+++ b/src/bse/RT_project.dep
@@ -2,4 +2,5 @@
  K_Transitions_setup.o
  K_driver_init.o
  K_inversion_driver.o
+ PL_diago_residual.o
 
diff --git a/src/bse/SC_project.dep b/src/bse/SC_project.dep
new file mode 100644
index 0000000000..30d039b87a
--- /dev/null
+++ b/src/bse/SC_project.dep
@@ -0,0 +1,6 @@
+  EPS_via_perturbative_inversion.o
+ K_diago_driver.o
+ K_inversion_driver.o
+ K_inversion_engine.o
+ PL_via_perturbative_inversion.o
+
diff --git a/src/bz_ops/.objects b/src/bz_ops/.objects
index c8824fdfaf..ec8e8a3ffa 100644
--- a/src/bz_ops/.objects
+++ b/src/bz_ops/.objects
@@ -1,2 +1,2 @@
 objs = k_lattice.o k_expand.o k_ibz2bz.o k_reduce.o k_build_up_BZ_tables.o \
-       bz_samp_indexes.o k_the_nearest.o k_map_nearest_by_dir.o 
+       k_small_group.o bz_samp_indexes.o k_the_nearest.o k_map_nearest_by_dir.o 
diff --git a/src/bz_ops/DOUBLE_project.dep b/src/bz_ops/DOUBLE_project.dep
index 19ed906216..604673dfc2 100644
--- a/src/bz_ops/DOUBLE_project.dep
+++ b/src/bz_ops/DOUBLE_project.dep
@@ -5,5 +5,6 @@
  k_lattice.o
  k_map_nearest_by_dir.o
  k_reduce.o
+ k_small_group.o
  k_the_nearest.o
 
diff --git a/src/bz_ops/bz_samp_indexes.F b/src/bz_ops/bz_samp_indexes.F
index 971cd4a298..7f433ac9fd 100644
--- a/src/bz_ops/bz_samp_indexes.F
+++ b/src/bz_ops/bz_samp_indexes.F
@@ -3,7 +3,11 @@
 !
 ! Copyright (C) 2006 The Yambo Team
 !
-! Authors (see AUTHORS file for details): AM
+! Authors (see AUTHORS file for details): AM DS
+!
+! headers
+!
+#include<y_memory.h>
 !
 subroutine bz_samp_indexes(en,k,Xk,q)
  !
@@ -26,8 +30,8 @@ subroutine bz_samp_indexes(en,k,Xk,q)
  ! qindx_C(ikbz,iqbz,1)=okbz
  ! qindx_C(ikbz,iqbz,2)=iGo
  !
- use pars,           ONLY:SP,lchlen,zero_dfl
- use drivers,        ONLY:l_bse,l_elel_scatt,l_setup,l_nl_optics
+ use pars,           ONLY:SP,schlen,lchlen,zero_dfl
+ use drivers,        ONLY:l_bse,l_elel_scatt,l_setup,l_nl_optics,l_EXCPH_gkkp
  use com,            ONLY:msg,fat_log
  use parallel_m,     ONLY:PAR_K_scheme,master_cpu,PP_indexes,myid,PP_indexes_reset
  use parallel_int,   ONLY:PP_wait,PP_redux_wait,PARALLEL_index,PARALLEL_global_indexes
@@ -51,11 +55,12 @@ subroutine bz_samp_indexes(en,k,Xk,q)
 #if defined _ELPH
  use ELPH,           ONLY:elph_use_q_grid
 #endif
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
- type(levels) ::en
- type(bz_samp)::k,Xk,q
+ type(levels),      intent(in)    :: en
+ type(bz_samp),     intent(inout) :: k,Xk,q
  !
  ! Work Space
  !
@@ -67,24 +72,31 @@ subroutine bz_samp_indexes(en,k,Xk,q)
 #if defined _ELPH
  logical :: bse_setup_scatt
 #endif
+ character(schlen)    :: convention
  integer, external    :: G_index
  character(lchlen)    :: string(2)
  integer, allocatable :: q_map(:,:,:),q_iptbz(:,:)
  type(PP_indexes)     :: PAR_IND_Xk,PAR_IND_k
- integer, external    :: io_QINDX 
+ integer, external    :: io_QINDX
+ integer, external    :: io_kpts
  !
  call section('*','Transferred momenta grid and indexing')
  !========================================================
  ! .... defaults
  X_scattering    =.true.
  sigma_scattering=.true.
- bse_scattering  =l_bse .and. (BS_res_K_corr.or.BS_cpl_K_corr)
- coll_scattering =l_elel_scatt
+ bse_scattering  =(l_bse .and. (BS_res_K_corr.or.BS_cpl_K_corr))
+ coll_scattering =(l_elel_scatt.or.l_EXCPH_gkkp)
+ convention="minus_q"
  !
  ! ... User defined
+ !
 #if defined _ELPH
  call parser('BSEscatt',bse_setup_scatt)
- if (.not.bse_scattering ) bse_scattering =elph_use_q_grid.or.bse_setup_scatt.or.trim(q_source)=="Electron-Phonon databases"
+ call warning(' BSE and COLL scattering equal to TRUE')
+! if (.not.bse_scattering ) bse_scattering =elph_use_q_grid.or.bse_setup_scatt.or.trim(q_source)=="Electron-Phonon databases"
+ bse_scattering=.TRUE.
+ coll_scattering=.TRUE.
 #endif
  !
 #if defined _NL
@@ -104,6 +116,7 @@ subroutine bz_samp_indexes(en,k,Xk,q)
    coll_scattering= index(k_GRIDS_string,"C")/=0
    X_scattering=    index(k_GRIDS_string,"X")/=0
    sigma_scattering=index(k_GRIDS_string,"S")/=0
+   if (index(k_GRIDS_string,"plus_q")/=0) convention="plus_q"
  endif
  !
  ! ... some over-rule needed
@@ -115,9 +128,17 @@ subroutine bz_samp_indexes(en,k,Xk,q)
  !
  q%description='q'
  !
+ !call io_control(ACTION=OP_RD_CL,COM=REP,SEC=(/1/),MODE=VERIFY,ID=io_db)
+ !io_err=io_kpts(k,Xk,q,io_db)
+ !!
+ !if (io_err/=0) then
+ !  call io_control(ACTION=OP_WR_CL,COM=REP,SEC=(/1,2/),MODE=VERIFY,ID=io_db)
+ !  io_err=io_kpts(k,Xk,q,io_db)
+ !endif
+ !
  call io_control(ACTION=OP_RD_CL,COM=REP,SEC=(/1,2,3,4,6/),MODE=VERIFY,ID=io_db)
+ io_err=io_QINDX(Xk,q,io_db,trim(convention))
  !
- io_err=io_QINDX(Xk,q,io_db)
  if (io_err==0) then
    call k_expand(q)
    d3q_factor=RL_vol/real(q%nbz)
@@ -290,10 +311,12 @@ subroutine bz_samp_indexes(en,k,Xk,q)
      if ((.not.do_qindx_X_ik).and.(.not.do_qindx_B)) cycle
      !
      do ikbz_p=1,Xk%nbz
+       !
+       if (trim(convention)=="minus_q") v1=Xk%ptbz(ikbz,:)-Xk%ptbz(ikbz_p,:) ! K-K_p= Q + Go
+       if (trim(convention)=="plus_q" ) v1=Xk%ptbz(ikbz_p,:)-Xk%ptbz(ikbz,:) ! K_p-K= Q + Go
        !
        if(Xk_grid_new_mapping) then
          !  
-         v1=Xk%ptbz(ikbz,:)-Xk%ptbz(ikbz_p,:)
          v1=v1-nint(v1)       ! Bring in the BZ, remove the G0
          where(v1<-zero_dfl)  ! Bring between [0,1)
            v1(:)=1._SP+v1(:)
@@ -307,8 +330,8 @@ subroutine bz_samp_indexes(en,k,Xk,q)
          enddo
          !
        else
-         !  
-         v1=matmul(k_b_m1,Xk%ptbz(ikbz,:)-Xk%ptbz(ikbz_p,:)) ! K-K_p= Q + Go
+         !
+         v1=matmul(k_b_m1,v1)
          iv1=nint(v1)
          call k_grid_shift(iv1)
          !
@@ -325,7 +348,8 @@ subroutine bz_samp_indexes(en,k,Xk,q)
        if (do_qindx_X) qindx_X(iqibz,ikbz,1)=ikbz_p
        if (do_qindx_B) qindx_B(ikbz_p,ikbz_mem,1)=iqbz
        !
-       v1=Xk%ptbz(ikbz,:)-Xk%ptbz(ikbz_p,:)-q%ptbz(iqbz,:)
+       if (trim(convention)=="minus_q") v1=Xk%ptbz(ikbz,:)-Xk%ptbz(ikbz_p,:)-q%ptbz(iqbz,:)
+       if (trim(convention)=="plus_q" ) v1=Xk%ptbz(ikbz_p,:)-Xk%ptbz(ikbz,:)-q%ptbz(iqbz,:)
        call c2a(v_in=v1,mode='ka2i')
        !
        if (do_qindx_X) qindx_X(iqibz, ikbz    ,2)=G_index(v1,.true.)
@@ -402,11 +426,13 @@ subroutine bz_samp_indexes(en,k,Xk,q)
      !
      do ikbz_p=1,k%nbz
        !
-       if (.not.PAR_IND_k%element_1D(ikbz_p)) cycle 
+       if (.not.PAR_IND_k%element_1D(ikbz_p)) cycle
+       !
+       if (trim(convention)=="minus_q") v1=k%ptbz(ikbz,:)-k%ptbz(ikbz_p,:) ! K-K_p= Q + Go
+       if (trim(convention)=="plus_q" ) v1=k%ptbz(ikbz_p,:)-k%ptbz(ikbz,:) ! K_p-K= Q + Go
        !
        if(Xk_grid_new_mapping) then
          !
-         v1=Xk%ptbz(ikbz,:)-Xk%ptbz(ikbz_p,:)
          v1=v1-nint(v1)       ! Bring in the BZ, remove the G0
          where(v1<-zero_dfl)  ! Bring between [0,1)
            v1(:)=1._SP+v1(:)
@@ -420,8 +446,8 @@ subroutine bz_samp_indexes(en,k,Xk,q)
          enddo
          !
        else
-         !   
-         v1=matmul(k_b_m1,k%ptbz(ikbz,:)-k%ptbz(ikbz_p,:))
+         !
+         v1=matmul(k_b_m1,v1)
          iv1=nint(v1)
          !
          if (.not.v_is_zero(v1-real(iv1,SP),zero_=local_zero)) then
@@ -439,7 +465,8 @@ subroutine bz_samp_indexes(en,k,Xk,q)
        !
        if (iks==1) qindx_S(ikibz,iqbz,1)=ikbz_p
        if (coll_scattering) qindx_C(ikbz,iqbz,1)=ikbz_p
-       v1=k%ptbz(ikbz,:)-k%ptbz(ikbz_p,:)-q%ptbz(iqbz,:)
+       if (trim(convention)=="minus_q") v1=k%ptbz(ikbz,:)-k%ptbz(ikbz_p,:)-q%ptbz(iqbz,:)
+       if (trim(convention)=="plus_q" ) v1=k%ptbz(ikbz_p,:)-k%ptbz(ikbz,:)-q%ptbz(iqbz,:)
        call c2a(v_in=v1,mode='ka2i')
        if (iks==1) qindx_S(ikibz,iqbz,2)=G_index(v1,.false.)
        if (coll_scattering) qindx_C(ikbz,iqbz,2)=G_index(v1,.false.)
@@ -494,7 +521,7 @@ subroutine bz_samp_indexes(en,k,Xk,q)
  !
  if(master_cpu) then
    call io_control(ACTION=OP_WR_CL,COM=REP,SEC=(/1,2,3,4,6/),ID=io_db)
-   io_err=io_QINDX(Xk,q,io_db)
+   io_err=io_QINDX(Xk,q,io_db,trim(convention))
  endif
  !
  ! Before writing section 5 (qindx_B) I need to wait that the master is done
@@ -510,7 +537,7 @@ subroutine bz_samp_indexes(en,k,Xk,q)
  if (do_qindx_B) then
    call io_control(ACTION=OP_APP_CL,COM=REP,SEC=(/5/),ID=io_db,&
 &                  COMM=PAR_K_scheme%COM_ibz_index,DO_it=PAR_K_scheme%COM_ibz_A2A%CPU_id==0)
-   io_err=io_QINDX(Xk,q,io_db)
+   io_err=io_QINDX(Xk,q,io_db,trim(convention))
  endif
  !
  !CLEAN
@@ -543,6 +570,8 @@ subroutine k_GRIDS_update()
      if (bse_scattering  ) k_GRIDS_string=trim(k_GRIDS_string)//" B"
      if (coll_scattering ) k_GRIDS_string=trim(k_GRIDS_string)//" C"
      if (sigma_scattering) k_GRIDS_string=trim(k_GRIDS_string)//" S"
+     if (trim(convention)=="plus_q" ) k_GRIDS_string=trim(k_GRIDS_string)//" plus_q"
+     if (trim(convention)=="minus_q") k_GRIDS_string=trim(k_GRIDS_string)//" minus_q"
      !
      if (len_trim(k_GRIDS_string)==0) k_GRIDS_string="none"
      !
diff --git a/src/bz_ops/k_build_up_BZ_tables.F b/src/bz_ops/k_build_up_BZ_tables.F
index 89bf37e520..0d2be509d1 100644
--- a/src/bz_ops/k_build_up_BZ_tables.F
+++ b/src/bz_ops/k_build_up_BZ_tables.F
@@ -3,7 +3,11 @@
 !
 ! Copyright (C) 2015 The Yambo Team
 !
-! Authors (see AUTHORS file for details): AM
+! Authors (see AUTHORS file for details): AM DS
+!
+! headers
+!
+#include<y_memory.h>
 !
 subroutine k_build_up_BZ_tables(k)
  !
@@ -11,7 +15,7 @@ subroutine k_build_up_BZ_tables(k)
  !
  !  s_table such that
  !
- !  R k_ibz = S k_ibz with S=k%s_table(k,R)  
+ !  R k_ibz = S k_ibz with S=k%s_table(k,R)
  !  R k_ibz = k_bz    with k_bz=k%k_table(k,R)
  !
  use pars,           ONLY:SP
@@ -19,24 +23,32 @@ subroutine k_build_up_BZ_tables(k)
  use D_lattice,      ONLY:nsym
  use R_lattice,      ONLY:rl_sop,bz_samp
  use zeros,          ONLY:k_rlu_zero
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  type(bz_samp)::k
  !
  ! Work Space
  !
  integer      ::i1,i2,is1
- logical      ::s_table_is_ok,k_table_is_ok
+ logical      ::s_table_is_ok,k_table_is_ok,g_table_is_ok
  real(SP)     ::v(3)
  !
+ integer, external :: G_index
+ !
  s_table_is_ok=.FALSE.
  k_table_is_ok=.FALSE.
+ g_table_is_ok=.FALSE.
  if (allocated(k%s_table)) then
    if (size(k%s_table)==k%nibz*nsym) s_table_is_ok=.TRUE.
  endif
  if (allocated(k%k_table)) then
    if (size(k%k_table)==k%nibz*nsym) k_table_is_ok=.TRUE.
  endif
+ if (allocated(k%g_table)) then
+   if (size(k%g_table)==k%nibz*nsym) g_table_is_ok=.TRUE.
+ endif
  if (.not.k_table_is_ok) then
    YAMBO_FREE(k%k_table)
    YAMBO_ALLOC(k%k_table,(k%nibz,nsym))
@@ -45,6 +57,11 @@ subroutine k_build_up_BZ_tables(k)
    YAMBO_FREE(k%s_table)
    YAMBO_ALLOC(k%s_table,(k%nibz,nsym))
  endif
+ if (.not.g_table_is_ok) then
+   YAMBO_FREE(k%g_table)
+   YAMBO_ALLOC(k%g_table,(k%nibz,nsym))
+   k%g_table=1
+ endif
  !
  call k_ibz2bz(k,'a',.false.)
  k%s_table=0
@@ -53,11 +70,12 @@ subroutine k_build_up_BZ_tables(k)
    do is1=1,nsym
      call c2a(v_in=matmul(rl_sop(:,:,is1),k%pt(i1,:)),v_out=v,mode='ki2a')
      do i2=1,k%nbz
-       if (rlu_v_is_zero(v-k%ptbz(i2,:),zero_=k_rlu_zero)) then
-         k%s_table(i1,is1)=k%sstar(i2,2)
-         k%k_table(i1,is1)=i2
-         exit
-       endif
+       if ( .not.rlu_v_is_zero(v-k%ptbz(i2,:),zero_=k_rlu_zero) ) cycle
+       k%s_table(i1,is1)=k%sstar(i2,2)
+       k%k_table(i1,is1)=i2
+       call c2a(v_in=(v-k%ptbz(i2,:)),v_out=v,mode='ka2i')
+       k%g_table(i1,is1)=G_index(v)
+       exit
      enddo
    enddo
  enddo
diff --git a/src/bz_ops/k_expand.F b/src/bz_ops/k_expand.F
index 839fedee5b..f75255f825 100644
--- a/src/bz_ops/k_expand.F
+++ b/src/bz_ops/k_expand.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine k_expand(k)
  !
  ! Outputs:
@@ -49,7 +53,9 @@ subroutine k_expand(k)
  use D_lattice,      ONLY:nsym
  use R_lattice,      ONLY:rl_sop,bz_samp
  use zeros,          ONLY:k_rlu_zero
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  type(bz_samp)::k
  !
@@ -90,12 +96,12 @@ subroutine k_expand(k)
  k%weights(:)=k%weights(:)/sum(k%weights)
  k%nbz=sum(k%nstar(:))
  !
- YAMBO_ALLOC(k%sstar,(k%nbz,2))
+ YAMBO_ALLOC(k%sstar,(k%nbz,3))
  i2=0
  do i1=1,k%nibz
    do is=1,k%nstar(i1)
      i2=i2+1
-     k%sstar(i2,:)=(/i1,k%star(i1,is)/)
+     k%sstar(i2,:)=(/i1,k%star(i1,is),is/)
    enddo
  enddo
  !
diff --git a/src/bz_ops/k_ibz2bz.F b/src/bz_ops/k_ibz2bz.F
index 04dc73fd6d..49cb6b98c3 100644
--- a/src/bz_ops/k_ibz2bz.F
+++ b/src/bz_ops/k_ibz2bz.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine k_ibz2bz(k,units,FORCE_BZ)
  !
  ! Output
@@ -14,8 +18,9 @@ subroutine k_ibz2bz(k,units,FORCE_BZ)
  use pars,           ONLY:SP,schlen
  use vec_operate,    ONLY:c2a,k2bz
  use R_lattice,      ONLY:rl_sop,bz_samp
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp)::k
  character(1) ::units
diff --git a/src/bz_ops/k_lattice.F b/src/bz_ops/k_lattice.F
index 009ed247da..c07a9d6709 100644
--- a/src/bz_ops/k_lattice.F
+++ b/src/bz_ops/k_lattice.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 integer function k_lattice(k,Xk,G_shell,be_verbose)
  !
  ! Output(s)
@@ -21,8 +25,9 @@ integer function k_lattice(k,Xk,G_shell,be_verbose)
 &                         bz_samp,g_vec,ng_in_shell,Xk_grid_new_mapping
  use zeros,          ONLY:k_rlu_zero
  use drivers,        ONLY:l_rim_w
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp)::k,Xk
  integer      ::G_shell
diff --git a/src/bz_ops/k_map_nearest_by_dir.F b/src/bz_ops/k_map_nearest_by_dir.F
index eeff76072b..0bbbfb67ca 100644
--- a/src/bz_ops/k_map_nearest_by_dir.F
+++ b/src/bz_ops/k_map_nearest_by_dir.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): MG CA
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine k_map_nearest_by_dir(Xk,k_map)
  !
  ! Map k-points on a regular grid along the b directions
@@ -13,8 +17,9 @@ subroutine k_map_nearest_by_dir(Xk,k_map)
  use R_lattice,      ONLY:bz_samp,bz_map,WF_shifts
  use com,            ONLY:msg
  use pars,           ONLY:zero_dfl
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_map), intent(inout) :: k_map
  type(bz_samp),intent(inout) :: Xk
@@ -67,7 +72,7 @@ subroutine k_map_nearest_by_dir(Xk,k_map)
  if(dshift>zero_dfl) then
    write(dump_ch,'(a,3f12.6)') " Shifted grid: ",k_shift
    call msg("sr",dump_ch)
-   call error(' Shifted grids not implemented yet in covariant dipoles ')
+   call warning('Yambo_nl works only with a single-shift k-point grid! ')
  else
    k_shift=0._SP
  endif
diff --git a/src/bz_ops/k_reduce.F b/src/bz_ops/k_reduce.F
index d3e671fa36..b885eadd4e 100644
--- a/src/bz_ops/k_reduce.F
+++ b/src/bz_ops/k_reduce.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine k_reduce(k,FORCE_BZ)
  !
  ! Input:
@@ -21,8 +25,9 @@ subroutine k_reduce(k,FORCE_BZ)
  use D_lattice,      ONLY:nsym
  use R_lattice,      ONLY:b,rl_sop,bz_samp
  use zeros,          ONLY:k_rlu_zero
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp)::k
  logical      ::FORCE_BZ
diff --git a/src/bz_ops/k_small_group.F b/src/bz_ops/k_small_group.F
new file mode 100644
index 0000000000..0e4f4a62ec
--- /dev/null
+++ b/src/bz_ops/k_small_group.F
@@ -0,0 +1,64 @@
+!
+! License-Identifier: GPL
+!
+! Copyright (C) 2006 The Yambo Team
+!
+! Authors (see AUTHORS file for details): DS
+!
+! headers
+!
+#include<y_memory.h>
+!
+subroutine k_small_group(k)
+ !
+ ! Outputs:
+ ! 
+ !   k%grp_table  k%grp_nsym
+ !
+ !Given the kpoints and the simmetry operations
+ !this sub. computes which symmetries belong to the small group of k
+ !
+ use pars,           ONLY:SP
+ use vec_operate,    ONLY:rlu_v_is_zero,c2a,k2bz
+ use D_lattice,      ONLY:nsym
+ use R_lattice,      ONLY:rl_sop,bz_samp
+ use zeros,          ONLY:k_rlu_zero
+ !
+ use y_memory_alloc
+ !
+ implicit none
+ !
+ type(bz_samp)::k
+ !
+ ! Work Space
+ !
+ integer         :: ik,is,istark,ikbz,isk
+ real(SP)        :: kref(3),krot(3)
+ !
+ if (allocated(k%grp_table)) return
+ !
+ YAMBO_ALLOC(k%grp_nsym,(k%nbz))
+ YAMBO_ALLOC(k%grp_table,(nsym,k%nbz))
+ YAMBO_ALLOC(k%grp_table_m1,(nsym,k%nbz))
+ !
+ k%grp_nsym=0
+ k%grp_table=-1
+ k%grp_table_m1=-1
+ do ik=1,k%nibz
+   do istark=1,k%nstar(ik)
+     isk =k%star(ik,istark)
+     ikbz=k%k_table(ik,isk)
+     do is=1,nsym
+       kref=k%ptbz(ikbz,:)
+       call c2a(v_in=kref,mode='ki2a')
+       krot=matmul(rl_sop(:,:,is),k%ptbz(ikbz,:))
+       call c2a(v_in=krot,mode='ki2a')
+       if (.not.rlu_v_is_zero(krot-kref,zero_=k_rlu_zero)) cycle
+       k%grp_nsym(ikbz)=k%grp_nsym(ikbz)+1
+       k%grp_table(k%grp_nsym(ikbz),ikbz)=is
+       k%grp_table_m1(is,ikbz)=k%grp_nsym(ikbz)
+     enddo
+   enddo
+ enddo
+ !
+end subroutine
diff --git a/src/collisions/Build_LSEX_collisions.F b/src/collisions/Build_LSEX_collisions.F
index 32cae39518..44ed8dc4ed 100644
--- a/src/collisions/Build_LSEX_collisions.F
+++ b/src/collisions/Build_LSEX_collisions.F
@@ -4,18 +4,23 @@
 ! Copyright (C) 2020 the YAMBO team
 !
 ! Authors (see AUTHORS file for details): CA
+!
+! headers
+!
+#include<y_memory.h>
 ! 
 subroutine Build_LSEX_collisions(k,q,i_qp,COLL) 
  !
  ! This subroutine calculate the long-range part of the Screened exchange
  !
- use electrons,      ONLY:n_sp_pol
+ use electrons,      ONLY:n_sp_pol,spin
  use pars,           ONLY:SP,cI,cZERO
  use collision_ext,  ONLY:OSCLL,W,ng_oscll,COLL_bands
  use QP_m,           ONLY:QP_table,QP_nk,QP_n_states
  use R_lattice,      ONLY:bz_samp,qindx_S
- ! 
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  type(bz_samp), intent(in)  :: k,q
  integer,       intent(in)  :: i_qp
@@ -26,17 +31,18 @@ subroutine Build_LSEX_collisions(k,q,i_qp,COLL)
  integer :: i_np,iqbz,i_kmq ! self-energy internal indexes
  integer :: ig1,ig2
  integer :: i_n,i_m
- integer :: i_k
+ integer :: i_k,i_sp
  !
  ! Collisions output
  !
- complex(SP), intent(out) :: COLL(COLL_bands(1):COLL_bands(2),COLL_bands(1):COLL_bands(2),QP_nk)
+ complex(SP), intent(out) :: COLL(COLL_bands(1):COLL_bands(2),COLL_bands(1):COLL_bands(2),QP_nk,n_sp_pol)
  !
  ! Coll indexes
  !
  i_n=QP_table(i_qp,1)
  i_m=QP_table(i_qp,2)
  i_k=QP_table(i_qp,3)
+ i_sp=spin(QP_table(i_qp,:))
  !
  ! Build Collisions from the Oscillators
  !
@@ -45,7 +51,7 @@ subroutine Build_LSEX_collisions(k,q,i_qp,COLL)
  do i_mp=COLL_bands(1),COLL_bands(2)
     do i_np=COLL_bands(1),COLL_bands(2)
       !
-      COLL(i_np,i_mp,:)=cZERO
+      COLL(i_np,i_mp,:,i_sp)=cZERO
       ! 
       do iqbz=1,q%nbz
          i_kmq   = k%sstar(qindx_S(i_k,iqbz,1),1)
@@ -54,8 +60,8 @@ subroutine Build_LSEX_collisions(k,q,i_qp,COLL)
          !
          do ig1=1,ng_oscll
            do ig2=1,ng_oscll
-             COLL(i_np,i_mp,i_kmq)=COLL(i_np,i_mp,i_kmq)+OSCLL(i_n,i_np,i_k,iqbz,ig2) &
-&                                                 *conjg(OSCLL(i_m,i_mp,i_k,iqbz,ig1))*W(iqbz,ig2,ig1) 
+             COLL(i_np,i_mp,i_kmq,i_sp)=COLL(i_np,i_mp,i_kmq,i_sp)+OSCLL(i_n,i_np,i_k,i_sp,iqbz,ig2) &
+&                                                 *conjg(OSCLL(i_m,i_mp,i_k,i_sp,iqbz,ig1))*W(iqbz,ig2,ig1) 
             enddo
          enddo
          !
diff --git a/src/collisions/COLLISIONS_HXC.F b/src/collisions/COLLISIONS_HXC.F
index f1b36b9f9b..85dae953fb 100644
--- a/src/collisions/COLLISIONS_HXC.F
+++ b/src/collisions/COLLISIONS_HXC.F
@@ -5,53 +5,64 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
-subroutine COLLISIONS_HXC(X,Xk,E,k,q,Xw)
+! headers
+!
+#include<y_memory.h>
+!
+subroutine COLLISIONS_HXC(X,Xk,E,k,q,Xw,IO_BS_err,ID_BS)
  !
  ! HXC collisions 
  !
  use drivers,       ONLY:l_use_COH_collisions
  use pars,          ONLY:SP,pi,cZERO
+ use parser_m,      ONLY:parser
  use com,           ONLY:msg
  use frequency,     ONLY:w_samp
- use electrons,     ONLY:levels,spin_occ,spin,n_spin,n_spinor
+ use electrons,     ONLY:levels,spin_occ,spin,n_spin,n_spinor,n_full_bands,n_met_bands,n_sp_pol
  use LIVE_t,        ONLY:live_timing
  use hamiltonian,   ONLY:H_potential
+ use BS,            ONLY:BS_blk,n_BS_blks
  use D_lattice,     ONLY:DL_vol,sop_inv
  use tddft,         ONLY:tddft_wf_t
  use xc_functionals,ONLY:F_xc,V_xc,F_xc_mat,magn,XC_potential_driver
  use global_XC,     ONLY:WF_kind,WF_xc_functional
- use parallel_int,  ONLY:PP_redux_wait,PP_wait
+ use stderr,        ONLY:intc
+ use parallel_int,  ONLY:PP_redux_wait
  use parallel_m,    ONLY:PAR_IND_Q_bz,MPI_COMM_NULL,ncpu,myid,COMM_copy,yMPI_COMM,&
 &                        PAR_n_Bp_mat_elements,PAR_COM_QP_A2A,PAR_COM_QP_INDEX,PAR_COM_Q_INDEX,&
 &                        PAR_IND_Bp_mat,PAR_nQ_bz,PAR_COM_G_b_A2A,PAR_COM_G_b_index_global
  use collision_el,  ONLY:elemental_collision,elemental_collision_free,elemental_collision_alloc
  use collision_ext, ONLY:COLLISIONS_parallel_SC_structure,COLLISIONS_HXC_local,COLLISIONS_HXC_MB,&
 &                        COLL_bands,HXC_COLL_element,HXC_collisions,COH_COLL_element,COH_collisions,&
-&                        COLLISIONS_HXC_use_TDDFT,LAST_COLL_sync,PAR_COLL_min
+&                        COLL_bands_frozen,COLLISIONS_HXC_use_TDDFT,PAR_COLL_min,COLLISIONS_CV_only
  use timing_m,      ONLY:timing
  use QP_m,          ONLY:QP_ng_SH,QP_ng_Sx,QP_ng_Sc
  use IO_int,        ONLY:io_control,IO_and_Messaging_switch
  use IO_m,          ONLY:OP_RD_CL,REP,VERIFY,NONE,io_COLLs
  use ALLOC,         ONLY:X_ALLOC_elemental
  use X_m,           ONLY:X_mat,X_t
- use interfaces,    ONLY:WF_load,TDDFT_ALDA_eh_space_R_kernel
+ use interfaces,    ONLY:WF_load,TDDFT_ALDA_eh_space_R_kernel!,eval_G_minus_G
  use wave_func,     ONLY:WF,WF_buffer,WF_buffered_IO
- use R_lattice,     ONLY:qindx_S,bz_samp,g_rot,minus_G
+ use R_lattice,     ONLY:qindx_S,bz_samp,g_rot,minus_G,G_m_G
  use hamiltonian,   ONLY:B_mat_index
  use fft_m,         ONLY:fft_size
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)             ::E
  type(bz_samp)            ::k,q,Xk
  type(X_t)                ::X
  type(w_samp)             ::Xw
+ integer                  ::ID_BS
+ integer, intent(in)      ::IO_BS_err
  !
  ! Work Space
  !
  integer                 :: i_coll,first_coll,last_coll,ig1,ig2,iqbz,iqibz,alloc_err,iqs,iGo_max,NG_max,NG_XC,LOCAL_COMM(2),&
-&                           i_n,i_k,i_k_s,i_sp,i_m,i_mp,N_g_vecs_H,N_g_vecs_X,N_g_vecs_C,i_star,i_p,i_p_s,i_loop,&
-&                           i_np,i_kmq,i_kmq_bz,i_kmq_s,i_kmq_G0,PAR_NC_local,n_steps,bands_to_load(2),n_colls
+&                           i_n,i_k,i_k_s,i_sp,ip_sp,i_m,i_mp,N_g_vecs_H,N_g_vecs_X,N_g_vecs_C,i_star,i_p,i_p_s,i_loop,iB,&
+&                           i_np,i_kmq,i_kmq_bz,i_kmq_s,i_kmq_G0,PAR_NC_local,n_steps,i_steps,bands_to_load(2),n_colls,n_GmG,&
+&                           n_skip_colls,n_incl_colls
  logical                 :: l_HXC_collisions,eval_coll,l_compute_screening
  !
  type(elemental_collision):: isc_XC,iscp_XC,isc_H,iscp_H
@@ -62,6 +73,7 @@ subroutine COLLISIONS_HXC(X,Xk,E,k,q,Xw)
  !
  complex(SP), allocatable :: EM1s(:,:),rhotw_tmp(:)
  integer                  :: io_err,ID_X,ID_HXC,ID_COH,TMP_VEC(ncpu)
+ logical                  :: doing_cv_colls,cc_vv_prime_only
  integer, external        :: io_X
  !
  call timing(trim(H_potential)//' Collisions',OPR='start')
@@ -78,6 +90,8 @@ subroutine COLLISIONS_HXC(X,Xk,E,k,q,Xw)
  !
  ! Setup
  !
+ n_skip_colls      = 0
+ n_incl_colls      = 0
  l_compute_screening  = index(H_potential,"COH")>0   .or. index(H_potential,"SEX")>0
  l_HXC_collisions     = COLLISIONS_HXC_local .or. COLLISIONS_HXC_MB 
  !
@@ -85,7 +99,7 @@ subroutine COLLISIONS_HXC(X,Xk,E,k,q,Xw)
  call k_build_up_BZ_tables(q)
  call k_expand(k)
  !
- if (l_compute_screening) then
+ if (l_compute_screening.and.(IO_BS_err/=0.or.(.not.COLLISIONS_CV_only))) then
    !
    ! Plasmon-Pole/Static interaction DB I/O
    !
@@ -94,8 +108,12 @@ subroutine COLLISIONS_HXC(X,Xk,E,k,q,Xw)
    if (io_err<0) call error(' Incomplete and/or broken PPA/Static diel. fun. database')
    !
    if (X%ng<QP_ng_Sc) then
-     QP_ng_Sc=X%ng
-     call warning('SEX size (CORRLvcs) reduced to match the Screening size (NGsBlkXs)')
+     if (IO_BS_err/=0) then
+       QP_ng_Sc=X%ng
+       call warning('SEX size (CORRLvcs) reduced to match the Screening size (NGsBlkXs)')
+     else
+       call error('SEX size (CORRLvcs) bigger than Screening size (NGsBlkXs)')
+     endif
    endif 
    !
  endif
@@ -107,19 +125,37 @@ subroutine COLLISIONS_HXC(X,Xk,E,k,q,Xw)
  N_g_vecs_C =0
  iGo_max    =maxval(qindx_S(:,:,2))
  !
- if(COLLISIONS_HXC_local                      ) N_g_vecs_H  = QP_ng_SH
- if(COLLISIONS_HXC_MB                         ) N_g_vecs_X  = QP_ng_Sx
- if(COLLISIONS_HXC_MB .or.l_use_COH_collisions) N_g_vecs_C  = QP_ng_Sc
+ if (COLLISIONS_HXC_local) N_g_vecs_H  = QP_ng_SH
+ if (COLLISIONS_HXC_MB   ) N_g_vecs_X  = QP_ng_Sx
+ if (l_compute_screening ) N_g_vecs_C  = QP_ng_Sc
  !
- call fft_check_size(iGo_max,N_g_vecs_C,N_g_vecs_H,"SH")
- call fft_check_size(iGo_max,N_g_vecs_C,N_g_vecs_X,"SF")
+ if (IO_BS_err/=0.or.(.not.COLLISIONS_CV_only)) then
+   call fft_check_size(iGo_max,N_g_vecs_C,N_g_vecs_H,"SH")
+   call fft_check_size(iGo_max,N_g_vecs_C,N_g_vecs_X,"SF")
+   if (IO_BS_err==0) then
+     if(QP_ng_SH/=N_g_vecs_H) call warning("Hartree cutoff is different between cv and cc/vv channels")
+     if(QP_ng_Sx/=N_g_vecs_X) call warning("Exchange cutoff is different between cv and cc/vv channels")
+   endif
+ endif
  !
  if(COLLISIONS_HXC_local) QP_ng_SH=N_g_vecs_H
- if(COLLISIONS_HXC_MB) QP_ng_Sx=N_g_vecs_X
+ if(COLLISIONS_HXC_MB)    QP_ng_Sx=N_g_vecs_X
  !
  NG_max=maxval((/N_g_vecs_H,N_g_vecs_X,N_g_vecs_C/))
  NG_XC =max(N_g_vecs_X,N_g_vecs_C)
  !
+ ! DEBUG <
+ ! 2022/06/30 DS: This is a check for consistency with BSE
+ ! - In the BSE case the elemental collisions are always computed with iqs=1 and iqG=1
+ !   and later there are symmetry operations with G_m_G which require to increase the O_ng (i.e. NG_XC)
+ ! - In the RT/SC collisions case the symmetries and the Go shifts are directly handled by scatter Gamp
+ !   via fft_g_table. So it is not needed to increase NG_XC
+ !if (COLLISIONS_HXC_MB) then
+ !  n_GmG=eval_G_minus_G(NG_XC,iGo_max)
+ !  NG_XC=maxval(G_m_G)
+ !endif
+ ! DEBUG >
+ !
  call msg('s', '['//trim(H_potential)//'] Plane waves (H,X,C) ',(/N_g_vecs_H,N_g_vecs_X,N_g_vecs_C/))
  !
  ! WFs
@@ -130,77 +166,60 @@ subroutine COLLISIONS_HXC(X,Xk,E,k,q,Xw)
    bands_to_load(2)=max(COLL_bands(2),maxval(E%nbm))
  endif
  !
- if (WF_buffered_IO) call WF_load(WF_buffer,0,1,bands_to_load,(/1,k%nibz/),space='B',title='-BUFFER')
- !
- call WF_load(WF,NG_max,iGo_max,bands_to_load,(/1,k%nibz/),title='-'//trim(H_potential)//' Collisions')
- !
- if (COLLISIONS_HXC_local) then
-   !
-   iscp_H%qs   =1
-   iscp_H%iqref=0
-   iscp_H%ngrho=N_g_vecs_H
-   !
-   isc_H%qs    =1
-   isc_H%iqref =0
-   isc_H%ngrho =N_g_vecs_H
-   !
-   call elemental_collision_alloc(isc_H ,NG=N_g_vecs_H,GAMP_NG=(/N_g_vecs_H,    1   /),TITLE='HARTREE')
-   call elemental_collision_alloc(iscp_H,NG=N_g_vecs_H,                                TITLE='HARTREE')
-   !
-   YAMBO_ALLOC(rhotw_tmp,(N_g_vecs_H))
-   !
- endif
- !
- iscp_XC%qs   =1
- iscp_XC%iqref=0
- iscp_XC%ngrho=NG_XC
- !
- isc_XC%qs    =1
- isc_XC%iqref =0
- isc_XC%ngrho =NG_XC
- !
- call elemental_collision_alloc(iscp_XC,NG=NG_XC,GAMP_NG=(/N_g_vecs_X,  1  /),TITLE=trim(H_potential))
- !
- if (l_compute_screening) then
-   !
-   ! Test the spatial Inversion
-   !   
-   call WF_spatial_inversion(E,Xk)
-   !
-   ! Allocation (EM1S can be bigger than N_g_vecs_C)
+ if (IO_BS_err/=0.or.(.not.COLLISIONS_CV_only)) then
    !
-   call X_ALLOC_elemental('X',(/X%ng,X%ng,1/))
-   YAMBO_ALLOC(EM1s,(N_g_vecs_C,N_g_vecs_C))
+   if (WF_buffered_IO) call WF_load(WF_buffer,0,1,bands_to_load,(/1,k%nibz/),space='B',title='-BUFFER')
+   call WF_load(WF,NG_max,iGo_max,bands_to_load,(/1,k%nibz/),title='-'//trim(H_potential)//' Collisions')
    !
-   call elemental_collision_alloc(isc_XC, NG=NG_XC,GAMP_NG=(/N_g_vecs_C,N_g_vecs_C/),TITLE=trim(H_potential))
-   !
- else
+   if (COLLISIONS_HXC_local) then
+     !
+     call elemental_collision_alloc(isc_H ,NG=N_g_vecs_H,GAMP_NG=(/N_g_vecs_H,    1   /),TITLE='HARTREE')
+     call elemental_collision_alloc(iscp_H,NG=N_g_vecs_H,                                TITLE='HARTREE')
+     !
+     YAMBO_ALLOC(rhotw_tmp,(N_g_vecs_H))
+     !
+   endif
    !
-   call elemental_collision_alloc(isc_XC, NG=NG_XC,                                  TITLE=trim(H_potential))
+   call elemental_collision_alloc(iscp_XC,NG=NG_XC,GAMP_NG=(/N_g_vecs_X,  1  /),TITLE=trim(H_potential))
    !
- endif
- !
- Co=1._SP
- if (COLLISIONS_HXC_use_TDDFT) then
-   Co=4._SP*pi/DL_vol/real(q%nbz,SP)
-   YAMBO_ALLOC(tddft_wf%rhotwr1,(fft_size*n_spinor*n_spinor))
-   YAMBO_ALLOC(tddft_wf%rhotwr2,(fft_size*n_spinor*n_spinor))
-   YAMBO_ALLOC(tddft_wf%WF_symm1,(fft_size,n_spinor))
-   YAMBO_ALLOC(tddft_wf%WF_symm2,(fft_size,n_spinor))
-   YAMBO_ALLOC(F_xc,(fft_size,n_spin,n_spin))
-   YAMBO_ALLOC(V_xc,(fft_size,n_spin))
-   V_xc=0._SP
-   if(n_spin>1) then
-     YAMBO_ALLOC(magn,(fft_size,3))
+   if (l_compute_screening) then
+     !
+     ! Allocation (EM1S can be bigger than N_g_vecs_C)
+     !
+     call X_ALLOC_elemental('X',(/X%ng,X%ng,1/))
+     YAMBO_ALLOC(EM1s,(N_g_vecs_C,N_g_vecs_C))
+     !
+     call elemental_collision_alloc(isc_XC, NG=NG_XC,GAMP_NG=(/N_g_vecs_C,N_g_vecs_C/),TITLE=trim(H_potential))
+     !
+   else
+     !
+     call elemental_collision_alloc(isc_XC, NG=NG_XC,                                  TITLE=trim(H_potential))
+     !
    endif
-   call XC_potential_driver(E,K,WF_kind,WF_xc_functional,2)
-   YAMBO_ALLOC(F_xc_mat,(fft_size,n_spin,n_spin,n_spin,n_spin))
-   call Build_F_xc_mat(V_xc,F_xc,F_xc_mat)
-   YAMBO_FREE(F_xc)
-   YAMBO_FREE(V_xc)
-   if(n_spin>1) then
-     YAMBO_FREE(magn)
+   !
+   Co=1._SP
+   if (COLLISIONS_HXC_use_TDDFT) then
+     Co=4._SP*pi/DL_vol/real(q%nbz,SP)
+     YAMBO_ALLOC(tddft_wf%rhotwr1,(fft_size*n_spinor*n_spinor))
+     YAMBO_ALLOC(tddft_wf%rhotwr2,(fft_size*n_spinor*n_spinor))
+     YAMBO_ALLOC(tddft_wf%WF_symm1,(fft_size,n_spinor))
+     YAMBO_ALLOC(tddft_wf%WF_symm2,(fft_size,n_spinor))
+     YAMBO_ALLOC(F_xc,(fft_size,n_spin,n_spin))
+     YAMBO_ALLOC(V_xc,(fft_size,n_spin))
+     V_xc=0._SP
+     if(n_spin>1) then
+       YAMBO_ALLOC(magn,(fft_size,3))
+     endif
+     call XC_potential_driver(E,K,WF_kind,WF_xc_functional,2)
+     YAMBO_ALLOC(F_xc_mat,(fft_size,n_spin,n_spin,n_spin,n_spin))
+     call Build_F_xc_mat(V_xc,F_xc,F_xc_mat)
+     YAMBO_FREE(F_xc)
+     YAMBO_FREE(V_xc)
+     if(n_spin>1) then
+       YAMBO_FREE(magn)
+     endif
    endif
+   !
  endif
  !
  n_colls=max(HXC_collisions%N,COH_collisions%N)
@@ -213,43 +232,45 @@ subroutine COLLISIONS_HXC(X,Xk,E,k,q,Xw)
  first_coll=-1
  last_coll =0
  !
- do i_loop=1,2
-   if (allocated(HXC_collisions%PAR_IND%element_1D)) then
-     PAR_NC_local=0
-     do i_coll=1,HXC_collisions%N
-       if (.not.HXC_collisions%PAR_IND%element_1D(i_coll)) cycle
-       last_coll=i_coll
-       if (allocated(HXC_collisions%IO_status)) then
-         if (HXC_collisions%IO_status(i_coll)/=-1) cycle
-       endif
-       if (first_coll==-1) first_coll=i_coll
-       PAR_NC_local=PAR_NC_local+1
-       if (i_loop==2.and.PAR_NC_local<=PAR_COLL_min) LAST_COLL_sync=i_coll
-     enddo
-   else if (allocated(COH_collisions%PAR_IND%element_1D)) then
-     PAR_NC_local=0
-     do i_coll=1,COH_collisions%N
-       if (.not.COH_collisions%PAR_IND%element_1D(i_coll)) cycle
-       last_coll=i_coll
-       if (allocated(COH_collisions%IO_status)) then
-         if (COH_collisions%IO_status(i_coll)/=-1) cycle
-       endif
-       if (first_coll==-1) first_coll=i_coll
-       PAR_NC_local=PAR_NC_local+1
-       if (i_loop==2.and.PAR_NC_local<=PAR_COLL_min) LAST_COLL_sync=i_coll
-     enddo
-   endif
-   if(i_loop==1) then
-     TMP_VEC=0
-     TMP_VEC(myid+1)=PAR_NC_local
-     call PP_redux_wait(TMP_VEC)
-     PAR_COLL_min=minval(TMP_VEC)
-   endif
- enddo
+ if (allocated(HXC_collisions%PAR_IND%element_1D)) then
+   PAR_NC_local=0
+   do i_coll=1,HXC_collisions%N
+     if (.not.HXC_collisions%PAR_IND%element_1D(i_coll)) cycle
+     last_coll=i_coll
+     if (allocated(HXC_collisions%IO_status)) then
+       if (HXC_collisions%IO_status(i_coll)/=-1) cycle
+     endif
+     if (first_coll==-1) first_coll=i_coll
+     PAR_NC_local=PAR_NC_local+1
+   enddo
+ else if (allocated(COH_collisions%PAR_IND%element_1D)) then
+   PAR_NC_local=0
+   do i_coll=1,COH_collisions%N
+     if (.not.COH_collisions%PAR_IND%element_1D(i_coll)) cycle
+     last_coll=i_coll
+     if (allocated(COH_collisions%IO_status)) then
+       if (COH_collisions%IO_status(i_coll)/=-1) cycle
+     endif
+     if (first_coll==-1) first_coll=i_coll
+     PAR_NC_local=PAR_NC_local+1
+   enddo
+ endif
+ TMP_VEC=0
+ TMP_VEC(myid+1)=PAR_NC_local
+ call PP_redux_wait(TMP_VEC)
+ PAR_COLL_min=minval(TMP_VEC)
  !
  n_steps=0
- if (COLLISIONS_HXC_local)                      n_steps=n_steps+PAR_NC_local*K%nibz*PAR_n_Bp_mat_elements
- if (COLLISIONS_HXC_MB.or.l_use_COH_collisions) n_steps=n_steps+PAR_NC_local*PAR_nQ_bz*PAR_n_Bp_mat_elements
+ if (COLLISIONS_HXC_local)                      n_steps=n_steps+PAR_NC_local*K%nibz*PAR_n_Bp_mat_elements*n_sp_pol
+ if (COLLISIONS_HXC_MB.or.l_use_COH_collisions) n_steps=n_steps+PAR_NC_local*PAR_nQ_bz*PAR_n_Bp_mat_elements*n_sp_pol
+ !
+ if (IO_BS_err==0) then
+   i_steps=2
+   if (COLLISIONS_CV_only) then
+     i_steps=1
+     n_steps=PAR_NC_local*K%nibz*PAR_n_Bp_mat_elements*n_sp_pol
+   endif
+ endif
  !
  ! Define the local COMM
  !
@@ -297,18 +318,40 @@ subroutine COLLISIONS_HXC(X,Xk,E,k,q,Xw)
      i_m     = COH_collisions%state(i_coll,2)
      i_k     = COH_collisions%state(i_coll,3)
      i_sp    = COH_collisions%state(i_coll,4)
-   endif   
+   endif
+   !
+   i_k_s      = 1
    !
    if( l_HXC_collisions )     HXC_COLL_element(1)%v3_c=cZERO
    if( l_use_COH_collisions ) COH_COLL_element(1)%v3_c=cZERO
    !
-   ! HARTREE collisions
-   !
-   if ( COLLISIONS_HXC_local )                      call compute_loc_HXC_collisions()
-   !
-   ! XC collisions within MBPT
-   !
-   if ( COLLISIONS_HXC_MB.or.l_use_COH_collisions ) call compute_MBPT_XC_collisions()
+   if (IO_BS_err==0) then
+     !
+     doing_cv_colls=.not.((i_n> n_met_bands(i_sp)  .and. i_m>n_met_bands(i_sp) ) .or. &
+     &                    (i_n<=n_full_bands(i_sp) .and. i_m<=n_full_bands(i_sp))     )
+     !
+     ! HXC collisions from BSE kernel if in the cv channel
+     if (doing_cv_colls) call map_BSE_to_HXC_collisions()
+     !
+     if (.not.COLLISIONS_CV_only) then
+       !
+       ! If (in,im) is in the cv channel, I need (inp,imp) only in the cc/vv channel
+       ! If (in,im) is not in the cv channel, I need (inp,imp) everywhere
+       cc_vv_prime_only=doing_cv_colls
+       ! Hartree collisions
+       if ( COLLISIONS_HXC_local )                      call compute_Hartree_collisions(cc_vv_prime_only)
+       ! XC collisions within MBPT
+       if ( COLLISIONS_HXC_MB.or.l_use_COH_collisions ) call compute_MBPT_XC_collisions(cc_vv_prime_only)
+     endif
+     !
+   else
+     !
+     ! Hartree collisions
+     if ( COLLISIONS_HXC_local )                      call compute_Hartree_collisions(.false.)
+     ! XC collisions within MBPT
+     if ( COLLISIONS_HXC_MB.or.l_use_COH_collisions ) call compute_MBPT_XC_collisions(.false.)
+     !
+   endif
    !
    if(l_HXC_collisions) then
      call PP_redux_wait(HXC_COLL_element(1)%v3_c,COMM=LOCAL_COMM(1))
@@ -326,6 +369,9 @@ subroutine COLLISIONS_HXC(X,Xk,E,k,q,Xw)
    !
  enddo ! loop on i_coll
  !
+ if(n_skip_colls>0) call warning(" BSE conversion, ignored "//&
+ &     trim(intc(n_skip_colls))//"/"//trim(intc(n_incl_colls))//" collisions")
+ !
  call live_timing()
  !
  ! CLEAN
@@ -352,19 +398,215 @@ subroutine COLLISIONS_HXC(X,Xk,E,k,q,Xw)
  !
  contains
    !
-   subroutine compute_loc_HXC_collisions()
+   subroutine map_BSE_to_HXC_collisions()
+     !
+     ! Yambo uses the BSE matrix  with the square root of the occupations
+     ! The real time collisions must be constructed with the standard form
+     ! This requires a cI factor in the coupling terms.
+     ! Moreover, since the occupation factors are contained in the density matrix,
+     ! there is a -1 factor for "Q" and "A"
+     !
+     ! The excitonic matrix contains the occupations and it is pseudo-hermitian.
+     !  R      iC       R  iC 
+     ! -(iC)*  -R*  =  iC* -R*
+     !
+     ! The collisions do not contain the occupations and the matrix is hermitian.
+     !   R   C
+     !   C*  R*
+     !
+     use electrons,     ONLY:spin_occ
+     use pars,          ONLY:cI,cZERO
+     use BS,            ONLY:BS_blk,BS_K_dim,BS_K_io_map,l_BSE_kernel_full
+     use BS_solvers,    ONLY:BSS_eh_table_m1
+     use IO_int,        ONLY:io_control
+     use IO_m,          ONLY:RD
+     !
+     complex(SP)  :: M_ij
+     integer      :: i_BSE,j_BSE,i_k_bz_bse,i_p_bz_bse,iB,jB,i_q_fake,i_p_bz
+     logical      :: l_use_mat,l_conj,l_cI,l_m1
+     !
+     iB=-1
+     jB=-1
+     !
+     i_k_bz_bse=BS_K_io_map(k%k_table(i_k,1))
+     !
+     i_BSE=BSS_eh_table_m1(i_k_bz_bse,i_m,i_n,i_sp,i_sp)
+     iB=mod(i_BSE-1,BS_K_dim(1))+1
+     !
+     if (i_BSE>0) then
+       !
+       do jB=iB,iB+BS_K_dim(1),BS_K_dim(1)
+         allocate(BS_blk(jB)%mat (BS_blk(jB)%size(1),BS_blk(jB)%size(2)))
+         allocate(BS_blk(jB)%tam (BS_blk(jB)%zise(1),BS_blk(jB)%zise(2)))
+         BS_blk(jB)%mat=cZERO
+         BS_blk(jB)%tam=cZERO
+         call io_control(ACTION=RD,ID=ID_BS)
+         call read_BS_par_collisions(1,jB,ID_BS)
+       enddo
+       !
+     else
+       !DEBUG<
+       !write(*,*) "Block ignored ",i_k_bz_bse,i_m,i_n,i_sp,i_sp
+       !DEBUG>
+     endif
+     !
+     do ip_sp=1,n_sp_pol
+     do i_q_fake=1,q%nbz
+       !
+       if (.not.PAR_IND_Q_bz%element_1D(i_q_fake)) cycle
+       !
+       i_p_bz=qindx_S(i_k,i_q_fake,1)
+       i_p   =k%sstar(i_p_bz,1)
+       !i_p_s =k%sstar(i_p_bz,2)
+       !
+       do i_mp=COLL_bands(1),COLL_bands(2)
+         !
+         if ( COLL_bands_frozen(i_mp)==1 ) cycle
+         !
+         do i_np=COLL_bands(1),COLL_bands(2)
+           !
+           if ( COLL_bands_frozen(i_mp)==1 ) cycle
+           !
+           if ( (i_np> n_met_bands(ip_sp)  .and. i_mp> n_met_bands(ip_sp) ) ) cycle
+           if ( (i_np<=n_full_bands(ip_sp) .and. i_mp<=n_full_bands(ip_sp)) ) cycle
+           !
+           if ( .not. PAR_IND_Bp_mat%element_1D(B_mat_index(i_np,i_mp,COLL_bands) ) ) cycle
+           !
+           !i_p_bz_bse=BS_K_io_map(k%k_table(i_p,i_p_s))
+           i_p_bz_bse=BS_K_io_map(i_p_bz)
+           !
+           i_BSE=BSS_eh_table_m1(i_k_bz_bse,i_m,i_n,i_sp,i_sp)
+           j_BSE=BSS_eh_table_m1(i_p_bz_bse,i_mp,i_np,ip_sp,ip_sp)
+           !
+           if (i_BSE==0.or.j_BSE==0) then
+             n_skip_colls=n_skip_colls+1
+             !DEBUG<
+             !if(j_BSE==0) write(*,*) "Transition ignored ",i_p_bz_bse,i_mp,i_np,i_sp
+             !DEBUG>
+             cycle
+           else
+             n_incl_colls=n_incl_colls+1
+           endif
+           !
+           if (i_BSE<=BS_K_dim(1) .and. j_BSE<=BS_K_dim(1)) then
+             !
+             l_cI=.false.
+             l_m1=.false.
+             if (l_BSE_kernel_full) then
+               ! Resonant block
+               iB=i_BSE ; jB=j_BSE; l_conj=.false.
+               l_use_mat=.true. 
+             else
+               iB=i_BSE
+               ! Resonant block
+               if(j_BSE>=i_BSE) then; l_use_mat=.true. ; jB=j_BSE-i_BSE+1; l_conj=.false.; endif
+               ! Resonant is hermitian
+               if(j_BSE< i_BSE) then; l_use_mat=.false.; jB=j_BSE  ; l_conj=.true. ; endif
+             endif
+             !
+           else if(i_BSE<=BS_K_dim(1) .and. j_BSE>BS_K_dim(1)) then
+             !
+             l_cI=.true.
+             l_m1=.false.
+             l_conj=.false.
+             j_BSE=j_BSE-BS_K_dim(1)
+             if (l_BSE_kernel_full) then
+               ! Coupling block
+               iB=i_BSE+BS_K_dim(1) ; jB=j_BSE
+               l_use_mat=.true.
+             else
+               iB=i_BSE+BS_K_dim(1)
+               ! Coupling block
+               if(j_BSE>=i_BSE) then ; l_use_mat=.true. ; jB=j_BSE-i_BSE+1; endif
+               ! Coupling is symmetric
+               if(j_BSE< i_BSE) then ; l_use_mat=.false.; jB=j_BSE ; endif
+             endif
+             !
+           else if(i_BSE> BS_K_dim(1) .and. j_BSE<=BS_K_dim(1)) then
+             !
+             l_cI=.true.
+             l_m1=.true.
+             l_conj=.true.
+             i_BSE=i_BSE-BS_K_dim(1)
+             if (l_BSE_kernel_full) then
+               ! Anti-Coupling
+               iB=j_BSE+BS_K_dim(1); jB=i_BSE
+               l_use_mat=.true.
+             else
+               iB=i_BSE+BS_K_dim(1)
+               ! Anti-Coupling from coupling: the whole BSE matrix is Pseudo-Hermitian
+               if(j_BSE>=i_BSE) then; l_use_mat=.true. ; jB=j_BSE-i_BSE+1; endif
+               ! Anti-Coupling is symmetric
+               if(j_BSE< i_BSE) then; l_use_mat=.false.; jB=j_BSE  ; endif
+             endif
+             !
+           else if(i_BSE> BS_K_dim(1) .and. j_BSE> BS_K_dim(1)) then
+             !
+             l_cI=.false.
+             l_m1=.true.
+             i_BSE=i_BSE-BS_K_dim(1)
+             j_BSE=j_BSE-BS_K_dim(1)
+             if (l_BSE_kernel_full) then
+               ! Anti-resonant from Resonant
+               iB=j_BSE; jB=i_BSE-j_BSE+1; l_conj=.true.
+               l_use_mat=.true.
+             else
+               iB=i_BSE
+               ! Anti-resonant from Resonant: the whole BSE matrix is Pseudo-Hermitian
+               if(j_BSE>=i_BSE) then; l_use_mat=.true. ; jB=j_BSE-i_BSE+1; l_conj=.true.; endif
+               ! Anti-resonant is hermitian
+               if(j_BSE< i_BSE) then; l_use_mat=.false.; jB=j_BSE  ; l_conj=.false.; endif
+             endif
+             !
+           endif
+           !
+           if (     l_use_mat) M_ij= BS_blk(iB)%mat(1,jB)
+           if (.not.l_use_mat) M_ij= BS_blk(iB)%tam(jB,1)
+           !
+           if(l_conj) M_ij= conjg(M_ij)
+           if(l_cI)   M_ij=  cI * M_ij
+           if(l_m1)   M_ij=     - M_ij
+           !
+           ! - The occupation factor here is already contained in the density matrix
+           ! - Compared to the BSE case, the collisions are summed over the points in the star of i_p
+           !
+           HXC_COLL_element(1)%v3_c(i_np,i_mp,i_p,ip_sp)=HXC_COLL_element(1)%v3_c(i_np,i_mp,i_p,ip_sp)-M_ij/spin_occ
+           !
+           call live_timing(steps=i_steps)
+           !
+         enddo
+       enddo
+     enddo
+     enddo
+     !
+     i_BSE=BSS_eh_table_m1(i_k_bz_bse,i_m,i_n,i_sp,i_sp)
+     iB=mod(i_BSE-1,BS_K_dim(1))+1
+     !
+     if (i_BSE>0) then
+       do jB=iB,iB+BS_K_dim(1),BS_K_dim(1)
+         deallocate(BS_blk(jB)%mat)
+         deallocate(BS_blk(jB)%tam)
+       enddo
+     endif
+     !
+   end subroutine map_BSE_to_HXC_collisions
+   !
+   subroutine compute_Hartree_collisions(cc_vv_only)
      !
      ! Scattering geometry
      !=====================           _ \ _
      !                               /  /  \
-     !                   G_p{np,mp} |      | (i_p,i_p_s)
+     !             G_p{np,mp,ip_sp} |      | (i_p,i_p_s)
      !                              \_ _ _/
      !                                 | 
      !                                 | W(q=0)
      !                                 |
-     !     (i_n,i_k,1)------>----------.---------->----(i_m,i_k,1)
-     !                          
+     !  (i_n,i_k,i_sp)------>----------.---------->----(i_m,i_k,i_sp)
      !                          
+     !
+     logical, intent(in)  :: cc_vv_only
+     !
+     logical :: cc_vv_colls
      integer :: i_q_fake, i_p_bz
      !                          
      isc_H%qs   = 1
@@ -372,13 +614,12 @@ subroutine compute_loc_HXC_collisions()
      !
      call scatter_Gamp(isc_H,'x')
      !
-     i_k_s      = 1
-     !
      isc_H%is  =(/i_n,i_k,i_k_s,i_sp/)
      isc_H%os  =(/i_m,i_k,i_k_s,i_sp/)
      !
      call scatter_Bamp(isc_H)
      !
+     do ip_sp=1,n_sp_pol
      do i_q_fake=1,q%nbz
        !
        if (.not.PAR_IND_Q_bz%element_1D(i_q_fake)) cycle
@@ -389,13 +630,24 @@ subroutine compute_loc_HXC_collisions()
        if (k%sstar(i_p_bz,2)/=1) cycle
        !
        do i_mp=COLL_bands(1),COLL_bands(2)
+         !
+         if ( COLL_bands_frozen(i_mp)==1 ) cycle
+         !
          do i_np=i_mp,COLL_bands(2)
+           !
+           if ( COLL_bands_frozen(i_np)==1 ) cycle
+           !
+           cc_vv_colls=(i_np> n_met_bands(ip_sp)  .and. i_mp>n_met_bands(ip_sp) ) .or. &
+           &           (i_np<=n_full_bands(ip_sp) .and. i_mp<=n_full_bands(ip_sp))
+           !
+           if ( COLLISIONS_CV_only .and.       cc_vv_colls  ) cycle
+           if (         cc_vv_only .and. (.not.cc_vv_colls) ) cycle
            !
            if ( .not. PAR_IND_Bp_mat%element_1D( B_mat_index(i_np,i_mp,COLL_bands) ) .and. &
 &               .not. PAR_IND_Bp_mat%element_1D( B_mat_index(i_mp,i_np,COLL_bands) )         ) cycle
            !
-           iscp_H%is =(/i_np,i_p,1,i_sp/)
-           iscp_H%os =(/i_mp,i_p,1,i_sp/)
+           iscp_H%is =(/i_np,i_p,1,ip_sp/)
+           iscp_H%os =(/i_mp,i_p,1,ip_sp/)
            !
            eval_coll=.not.all((/i_n==i_np,i_m==i_mp,i_k==i_p/))
            if (     eval_coll) call scatter_Bamp(iscp_H)
@@ -436,7 +688,11 @@ subroutine compute_loc_HXC_collisions()
                enddo
              endif
              !
-             HXC_COLL_element(1)%v3_c(i_np,i_mp,i_p)=4._SP*pi*pre_factor1+Co*pre_factor2
+             HXC_COLL_element(1)%v3_c(i_np,i_mp,i_p,ip_sp)=HXC_COLL_element(1)%v3_c(i_np,i_mp,i_p,ip_sp) &
+             &                                                        +4._SP*pi*pre_factor1+Co*pre_factor2
+             ! DS: 2023/10/25 Merge Check
+             !HXC_COLL_element(1)%v3_c(i_np,i_mp,i_p,ip_sp)=HXC_COLL_element(1)%v3_c(i_np,i_mp,i_p,ip_sp) &
+             !&                                                 +4._SP*pi*conjg(pre_factor1)+Co*pre_factor2
              !
              call live_timing(steps=1)
              !
@@ -465,7 +721,8 @@ subroutine compute_loc_HXC_collisions()
                enddo
              endif
              !
-             HXC_COLL_element(1)%v3_c(i_mp,i_np,i_p)=4._SP*pi*pre_factor1+Co*pre_factor2
+             HXC_COLL_element(1)%v3_c(i_mp,i_np,i_p,ip_sp)=HXC_COLL_element(1)%v3_c(i_mp,i_np,i_p,ip_sp) &
+             &                                                        +4._SP*pi*pre_factor1+Co*pre_factor2
              !
              call live_timing(steps=1)
              !
@@ -476,10 +733,11 @@ subroutine compute_loc_HXC_collisions()
        enddo
        !
      enddo
+     enddo
      !
-   end subroutine compute_loc_HXC_collisions
+   end subroutine compute_Hartree_collisions
    !
-   subroutine compute_MBPT_XC_collisions()
+   subroutine compute_MBPT_XC_collisions(cc_vv_only)
      !
      ! Scattering geometry
      !=====================
@@ -490,11 +748,13 @@ subroutine compute_MBPT_XC_collisions()
      !                        _ _ _ _ \ _ _ _ _
      !                       /        /        \
      !                      /                   \
-     !                     /     G_{np,mp}       \
-     !     (i_n,i_k,1)-->--.---------->----------.---->----(i_m,i_k,1)
+     !                     /   G_{np,mp,i_sp}    \
+     !  (i_n,i_k,i_sp)-->--.----------->---------.---->----(i_m,i_k,i_sp)
      !                          (i_kmq,i_kmq_s)
      !
+     logical, intent(in)  :: cc_vv_only
      !
+     logical :: cc_vv_colls
      integer :: TMP_INDEX(COLL_bands(1):COLL_bands(2))
      !
      if (l_compute_screening.and.i_coll==first_coll) then
@@ -509,27 +769,39 @@ subroutine compute_MBPT_XC_collisions()
        iqibz    =q%sstar(iqbz,1)
        iqs      =q%sstar(iqbz,2)
        !
-       isc_XC%qs   =(/0,iqibz,iqs/)
-       iscp_XC%qs  =(/0,iqibz,iqs/)
-       !
-       i_k_s   = 1
-       !
        i_kmq_bz =qindx_S(i_k,iqbz,1)
        i_kmq_G0 =qindx_S(i_k,iqbz,2)
        !
        i_kmq   = k%sstar(i_kmq_bz,1)
        i_kmq_s = k%sstar(i_kmq_bz,2)
        !
+       isc_XC%qs  =(/i_kmq_G0,iqibz,iqs/)
        isc_XC%is  =(/i_n,i_k,  i_k_s,  i_sp/)
        isc_XC%os  =(/0  ,i_kmq,i_kmq_s,i_sp/)
        !
+       iscp_XC%qs =(/i_kmq_G0,iqibz,iqs/)
        iscp_XC%is =(/i_m,i_k,  i_k_s,  i_sp/)
        iscp_XC%os =(/0,  i_kmq,i_kmq_s,i_sp/)
        !
-       isc_XC%qs(1) =i_kmq_G0
-       iscp_XC%qs(1)=i_kmq_G0
+       ! DS 2022/07/29
+       ! The G shifts "i_kmq_G0" are need to construct M_{k,p}.
+       ! The same procedure is applyed in the BSE subroutines.
+       !
+       ! However, for some reason, I get a non hermitian matrix, i.e. K_{nmk,n'm'k'} /= K^*_{n'm'k',nmk}
+       ! due to a "-1" factor in the real part of some matrix elements.
+       ! This issue was never seen in the BSE, since the hermiticity is imposed there.
+       ! However, implementing the option to compute the full BSE blocks, i.e. without imposing 
+       ! the hermiticity, the issue is there as well.
+       !
+       ! Without them the matrix M_{k,k+q}=M_{k,p+G} would be obtained
+       ! Indeed setting i_kmq_G0=1 (i.e. G=0) the results are very bad.
+       ! Both in the BSE and in the collisions, i.e. the matrix is not at all hermitian.
        !
-       if (iqibz/=isc_XC%iqref) call scatter_Gamp(iscp_XC,'x')
+       ! In the subrtouine QP_ppa_cohsex and XCo_Hartree_Fock.
+       ! There the difference M_{k,k+q} vs M_{k,p} maybe non crucial, since
+       ! the matrix is multiplied by a quantity (the occupaitons) which are invariant under G0 shift
+       !
+       if (iqibz/=iscp_XC%iqref) call scatter_Gamp(iscp_XC,'x')
        !
        if( iqibz/=isc_XC%iqref .and. l_compute_screening) then
          !
@@ -557,6 +829,8 @@ subroutine compute_MBPT_XC_collisions()
        endif
        !
        do i_mp=COLL_bands(1),COLL_bands(2)
+         !
+         if ( COLL_bands_frozen(i_mp)==1 ) cycle
          !
          do i_np=COLL_bands(1),COLL_bands(2)
            TMP_index(i_np)=B_mat_index(i_np,i_mp,COLL_bands)
@@ -569,6 +843,14 @@ subroutine compute_MBPT_XC_collisions()
          call scatter_Bamp(iscp_XC)
          !
          do i_np=COLL_bands(1),COLL_bands(2)
+           !
+           if ( COLL_bands_frozen(i_np)==1 ) cycle
+           !
+           cc_vv_colls=(i_np> n_met_bands(i_sp)  .and. i_mp>n_met_bands(i_sp) ) .or. &
+           &           (i_np<=n_full_bands(i_sp) .and. i_mp<=n_full_bands(i_sp))
+           !
+           if ( COLLISIONS_CV_only .and.       cc_vv_colls  ) cycle
+           if (         cc_vv_only .and. (.not.cc_vv_colls) ) cycle
            !
            ! COLLISIONS(n,n',qp_n_states) = \sum_{G,G',q} ( rho(m,n,G,q,k) x EM1s(G ,G') x rho*(m',n',G',q,k) /|q+G|/|q+G'| )
            !
@@ -576,6 +858,8 @@ subroutine compute_MBPT_XC_collisions()
            !
            isc_XC%os(1)=i_np
            !
+           ! DS: 2023/10/25 Merge Check
+           !if (any(isc_XC%is/=iscp_XC%is).or.any(isc_XC%os/=iscp_XC%os).or.any(isc_XC%qs/=iscp_XC%qs)) then
            if (any(isc_XC%is/=iscp_XC%is).or.any(isc_XC%os/=iscp_XC%os)) then
              call scatter_Bamp(isc_XC)
            else
@@ -587,26 +871,39 @@ subroutine compute_MBPT_XC_collisions()
              !$omp parallel do default(shared), private(ig1), reduction(+:pre_factor1)
              do ig1=1,N_g_vecs_X
                pre_factor1=pre_factor1+isc_XC%rhotw(ig1)*iscp_XC%gamp(ig1,1)*conjg(iscp_XC%rhotw(ig1))
+               ! DS: 2023/10/25 Merge Check
+               !ig2=G_m_G(ig1,minus_G(i_kmq_G0))
+               !pre_factor1=pre_factor1+conjg(isc_XC%rhotw(ig2))*iscp_XC%gamp(ig2,1)*iscp_XC%rhotw(ig2)
              enddo
              !$omp end parallel do
            endif
            !
+           ! DEBUG <
+           !ig2=G_m_G(1,minus_G(i_kmq_G0))
+           !if ( (i_n==5 .and. i_m==9 .and. i_k==2 .and.  i_np==5 .and. i_mp==10 .and. i_kmq==1)  .or. &
+           !&    (i_n==5 .and. i_m==10 .and. i_k==1 .and.  i_np==5 .and. i_mp==9 .and. i_kmq==2)  )    &
+           !& write(201,*) i_n,i_m,i_k,i_np,i_mp,i_kmq,iqbz,iqibz,i_kmq_G0, &
+           !&              conjg(isc_XC%rhotw(ig2)),iscp_XC%gamp(ig2,1),iscp_XC%rhotw(ig2)
+           ! DEBUG >
+           !
            pre_factor2=cZERO
            if(index(H_potential,"COH")>0.or.index(H_potential,"SEX")>0) then 
              !$omp parallel do default(shared), private(ig1,ig2), reduction(+:pre_factor2)
              do ig1=1,N_g_vecs_C
                do ig2=1,N_g_vecs_C
                  pre_factor2=pre_factor2+isc_XC%rhotw(ig2)*EM1s(ig2,ig1)*conjg(iscp_XC%rhotw(ig1))
+                 ! DS: 2023/10/25 Merge Check
+                 !pre_factor2=pre_factor2+conjg(isc_XC%rhotw(ig2))*EM1s(ig2,ig1)*conjg(iscp_XC%rhotw(ig1))
                enddo
              enddo
              !$omp end parallel do
            endif
            !
            if(index(H_potential,"SEX")>0 .or. index(H_potential,"FOCK")>0) &
-&            HXC_COLL_element(1)%v3_c(i_np,i_mp,i_kmq)=HXC_COLL_element(1)%v3_c(i_np,i_mp,i_kmq) &
+&            HXC_COLL_element(1)%v3_c(i_np,i_mp,i_kmq,i_sp)=HXC_COLL_element(1)%v3_c(i_np,i_mp,i_kmq,i_sp) &
 &                                                      -4._SP/spin_occ*pi*(pre_factor1+pre_factor2)
            if(index(H_potential,"COH")>0 ) &
-&            COH_COLL_element(1)%v3_c(i_np,i_mp,i_kmq)=COH_COLL_element(1)%v3_c(i_np,i_mp,i_kmq) &
+&            COH_COLL_element(1)%v3_c(i_np,i_mp,i_kmq,i_sp)=COH_COLL_element(1)%v3_c(i_np,i_mp,i_kmq,i_sp) &
 &                                                      +2._SP         *pi*             pre_factor2
            !
            call live_timing(steps=1)
diff --git a/src/collisions/COLLISIONS_NEQ_GW_static.F b/src/collisions/COLLISIONS_NEQ_GW_static.F
index 0f901378c0..822acb5b2c 100644
--- a/src/collisions/COLLISIONS_NEQ_GW_static.F
+++ b/src/collisions/COLLISIONS_NEQ_GW_static.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine COLLISIONS_NEQ_GW_static(X,Xk,E,k,q,Xw)
  !
  ! NEQ GW collisions 
@@ -31,8 +35,9 @@ subroutine COLLISIONS_NEQ_GW_static(X,Xk,E,k,q,Xw)
  use R_lattice,     ONLY:qindx_S,qindx_C,bz_samp
  use D_lattice,     ONLY:DL_vol
  use wrapper,       ONLY:M_by_V
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)             ::E
  type(bz_samp)            ::k,q,Xk
@@ -84,10 +89,6 @@ subroutine COLLISIONS_NEQ_GW_static(X,Xk,E,k,q,Xw)
  !
  call timing('NEQ GW collisions',OPR='start')
  !
- ! Test the spatial Inversion
- !   
- call WF_spatial_inversion(E,Xk)
- !
  ! ALLOCATION
  !------------
  !
@@ -156,7 +157,7 @@ subroutine COLLISIONS_NEQ_GW_static(X,Xk,E,k,q,Xw)
        !
        ! Poles and Residuals
        !
-       forall(i1=1:X%ng) X_mat(i1,i1,1)=X_mat(i1,i1,1)+1._SP
+       forall (i1=1:X%ng) X_mat(i1,i1,1)=X_mat(i1,i1,1)+1._SP
        !
 #if defined _OPENMP
 !$omp parallel do default(shared), private(i1,i2)
@@ -193,9 +194,7 @@ subroutine COLLISIONS_NEQ_GW_static(X,Xk,E,k,q,Xw)
      ! Spins
      !
      i_k_sp  = GW_NEQ_collisions%state(i_coll1,4)
-     i_p_sp  = i_k_sp
      i_kmq_sp= i_k_sp
-     i_pmq_sp= i_k_sp
      !
      isc%is  =(/i_n ,i_k,i_k_s,i_k_sp/)
      isc%os  =(/i_np,i_kmq,i_kmq_s,i_kmq_sp/)
@@ -209,6 +208,8 @@ subroutine COLLISIONS_NEQ_GW_static(X,Xk,E,k,q,Xw)
        i_m     = GW_NEQ_collisions%state(i_coll2,1)
        i_mp    = GW_NEQ_collisions%state(i_coll2,2)
        i_p     = GW_NEQ_collisions%state(i_coll2,3)
+       i_p_sp  = GW_NEQ_collisions%state(i_coll2,4)
+       i_pmq_sp= i_k_sp
        !
        do i_p_star=1,k%nstar(i_p)
          !
@@ -260,13 +261,13 @@ subroutine COLLISIONS_NEQ_GW_static(X,Xk,E,k,q,Xw)
          !
          if (io_COLLs) then
            !
-           GW_NEQ_COLL_element(1)%v3_r(i_J,1,iqbz)=&
-&             GW_NEQ_COLL_element(1)%v3_r(i_J,1,iqbz)                 +abs(pre_factor)**2
+           GW_NEQ_COLL_element(1)%v3_r(i_J,1,iqbz,i_p_sp)=&
+&             GW_NEQ_COLL_element(1)%v3_r(i_J,1,iqbz,i_p_sp)                 +abs(pre_factor)**2
            !
          else
            !
-           GW_NEQ_COLL_element(i_coll1_mem)%v3_r(i_J_mem,1,i_q_mem)=&
-&             GW_NEQ_COLL_element(i_coll1_mem)%v3_r(i_J_mem,1,i_q_mem)+abs(pre_factor)**2
+           GW_NEQ_COLL_element(i_coll1_mem)%v3_r(i_J_mem,1,i_q_mem,i_p_sp)=&
+&             GW_NEQ_COLL_element(i_coll1_mem)%v3_r(i_J_mem,1,i_q_mem,i_p_sp)+abs(pre_factor)**2
            !
          endif
          !
diff --git a/src/collisions/COLLISIONS_alloc_and_free.F b/src/collisions/COLLISIONS_alloc_and_free.F
index d7f3faf098..c41ef5af4d 100644
--- a/src/collisions/COLLISIONS_alloc_and_free.F
+++ b/src/collisions/COLLISIONS_alloc_and_free.F
@@ -5,28 +5,34 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine COLLISIONS_alloc_and_free(what,elements,actions,I)
  !
  use pars,           ONLY:cZERO,rZERO,DP,SP
  use plasma,         ONLY:N_plasma_poles_global
  use R_lattice,      ONLY:nqbz,nkibz
+ use electrons,      ONLY:n_met_bands,n_full_bands,n_sp_pol
  use parallel_m,     ONLY:PAR_nPlasma,PAR_nQ_bz
  use collision_ext,  ONLY:COLLISIONS_element,COH_COLL_element,HXC_COLL_element,GW_NEQ_COLL_element,&
-&                         COLL_bands,COLLISIONS_load_SP,&
+&                         COLL_bands,COLLISIONS_load_SP,COLLISIONS_CV_only,&
 &                         GW_NEQ_collisions,COH_collisions,HXC_collisions,P_collisions,COLLISIONS_group
 #if defined _QED
  use QP_m,           ONLY:QP_QED_ng
  use collision_ext,  ONLY:P_COLL_element
 #endif
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  character(*)             :: what,elements,actions
  integer                  :: I
  !
  ! Work Space
  !
- integer :: D(3,2),COLL_range(2),J,K,mem_size(5),size_
+ integer :: D(3,4),COLL_range(2),J,K,mem_size(5),size_
  logical :: l_COH,l_HXC,l_GW_NEQ,l_v3,l_v1,l_table,l_all,l_alloc,&
 &           l_clean,l_collision,l_distribute,l_Momentum,l_complex,l_real
  !
@@ -68,10 +74,12 @@ subroutine COLLISIONS_alloc_and_free(what,elements,actions,I)
  l_clean     =index(actions,"clean")>0
  l_distribute=index(actions,"distribute")>0
  !
+ D=0
+ !
  if (l_COH) then
-   D(1,:)=COLL_bands
-   D(2,:)=COLL_bands
-   D(3,:)=(/1,nkibz/)
+   D(1,1:2)=COLL_bands
+   D(2,1:2)=COLL_bands
+   D(3,1:2)=(/1,nkibz/)
    if (l_collision.and.l_alloc) then
      allocate(COH_COLL_element(size_))
      COH_COLL_element%N=size_
@@ -86,9 +94,32 @@ subroutine COLLISIONS_alloc_and_free(what,elements,actions,I)
  endif
  !
  if (l_HXC) then
-   D(1,:)=COLL_bands
-   D(2,:)=COLL_bands
-   D(3,:)=(/1,nkibz/)
+   ! These numbers are used in:
+   ! (i) src/io_parallel/io_COLLISIONS.F (all components of D in case CV_only)
+   !     * COLLISIONS_CV_only case
+   !       ! This one only defines the I/O dimension. The factor 2 on the kpts
+   !       ! exist because, for each k, there is the cv and the vc block
+   !       N(1)=(COLL_grp%D(3,2)-COLL_grp%D(3,1)+1)*2  ! 2*Nk
+   !       N(2)=COLL_grp%D(2,2)-COLL_grp%D(2,3)        ! Nc
+   !       N(3)=COLL_grp%D(1,4)-COLL_grp%D(1,1)+1      ! Nv
+   !     * General case
+   !       N(1)=(COLL_grp%D(3,2)-COLL_grp%D(3,1)+1)    ! Nk
+   !       N(2)=COLL_grp%D(2,2)-COLL_grp%D(2,1)        ! Nb1
+   !       N(3)=COLL_grp%D(1,2)-COLL_grp%D(1,1)+1      ! Nb2
+   ! (ii) src/collisions/COLLISIONS_linearize_and_IO.F (only components D(1:3,:))
+   !     do i3=COLL_grp%D(3,1),COLL_grp%D(3,2)       ! ik
+   !       do i2=COLL_grp%D(2,1),COLL_grp%D(2,2)     ! ib1
+   !         do i1=COLL_grp%D(1,1),COLL_grp%D(1,2)   ! ib2
+   D(1,1:2)=COLL_bands
+   D(2,1:2)=COLL_bands
+   D(3,1:2)=(/1,nkibz/)
+   D(:,3:4)=-1 ! unused
+   if (COLLISIONS_CV_only) then
+     D(1,3)=-1 ! unused
+     D(2,3)=minval(n_full_bands)+1 ! lower bound for conduction
+     D(1,4)=maxval(n_met_bands)    ! upper bound for valence
+     D(2,4)=-1 ! unused
+   endif
    if (l_collision.and.l_alloc) then
      allocate(HXC_COLL_element(size_))
      HXC_COLL_element%N=size_
@@ -105,9 +136,9 @@ subroutine COLLISIONS_alloc_and_free(what,elements,actions,I)
 #if defined _QED
  !
  if (l_Momentum) then
-   D(1,:)=(/1,QP_QED_ng/)
-   D(2,:)=COLL_bands
-   D(3,:)=(/1,nqbz/)
+   D(1,1:2)=(/1,QP_QED_ng/)
+   D(2,1:2)=COLL_bands
+   D(3,1:2)=(/1,nqbz/)
    if (l_collision.and.l_alloc) then
      allocate(P_COLL_element(size_))
      P_COLL_element%N=size_
@@ -124,12 +155,12 @@ subroutine COLLISIONS_alloc_and_free(what,elements,actions,I)
 #endif
  !
  if (l_GW_NEQ) then
-   D(1,:)=(/1,N_plasma_poles_global/)
-   D(2,:)=(/1,1/)
-   D(3,:)=(/1,nqbz/)
+   D(1,1:2)=(/1,N_plasma_poles_global/)
+   D(2,1:2)=(/1,1/)
+   D(3,1:2)=(/1,nqbz/)
    if (l_distribute.and.(l_v3.or.l_table)) then
-     D(1,:)=(/1,PAR_nPlasma/)
-     D(3,:)=(/1,PAR_nQ_bz/)
+     D(1,1:2)=(/1,PAR_nPlasma/)
+     D(3,1:2)=(/1,PAR_nQ_bz/)
    endif
    if (l_collision.and.l_alloc) then
      allocate(GW_NEQ_COLL_element(size_))
@@ -146,9 +177,9 @@ subroutine COLLISIONS_alloc_and_free(what,elements,actions,I)
  !
  contains
    !
-   subroutine local_alloc_and_free(COLL,COLL_grp)
+   subroutine local_alloc_and_free(COLL_element,COLL_grp)
      type(COLLISIONS_group)   :: COLL_grp
-     type(COLLISIONS_element) :: COLL(COLL_grp%N)
+     type(COLLISIONS_element) :: COLL_element(COLL_grp%N)
      if(COLL_grp%N==0) return
      mem_size=0
      do J=COLL_range(1),COLL_range(2)
@@ -157,64 +188,64 @@ subroutine local_alloc_and_free(COLL,COLL_grp)
        ! allocated. This is because J runs on the parallel components and not on the real ones.
        !
        do K=1,COLL_grp%N
-         if (COLL_grp%PAR_map(K)==J) COLL(J)%I=K
+         if (COLL_grp%PAR_map(K)==J) COLL_element(J)%I=K
        enddo
        !
        if (l_alloc.and.(l_v3.or.l_all)) then
          if(l_real) then
-           YAMBO_ALLOC(COLL(J)%v3_r,(D(1,1):D(1,2),D(2,1):D(2,2),D(3,1):D(3,2)))
-           COLL(J)%v3_r=rZERO
-           mem_size(1)=mem_size(1)+size(COLL(J)%v3_r)
+           YAMBO_ALLOC(COLL_element(J)%v3_r,(D(1,1):D(1,2),D(2,1):D(2,2),D(3,1):D(3,2),n_sp_pol))
+           COLL_element(J)%v3_r=rZERO
+           mem_size(1)=mem_size(1)+size(COLL_element(J)%v3_r)
          endif
          if(l_complex) then
-           YAMBO_ALLOC(COLL(J)%v3_c,(D(1,1):D(1,2),D(2,1):D(2,2),D(3,1):D(3,2)))
-           COLL(J)%v3_c=cZERO
-           mem_size(2)=mem_size(2)+size(COLL(J)%v3_c)
+           YAMBO_ALLOC(COLL_element(J)%v3_c,(D(1,1):D(1,2),D(2,1):D(2,2),D(3,1):D(3,2),n_sp_pol))
+           COLL_element(J)%v3_c=cZERO
+           mem_size(2)=mem_size(2)+size(COLL_element(J)%v3_c)
          endif
          COLL_grp%D=D
        endif
        if (l_clean.and.(l_v3.or.l_all)) then
-         if (l_real   .and.allocated(COLL(J)%v3_r)) then
-           YAMBO_FREE(COLL(J)%v3_r)
+         if (l_real   .and.allocated(COLL_element(J)%v3_r)) then
+           YAMBO_FREE(COLL_element(J)%v3_r)
          endif
-         if (l_complex.and.allocated(COLL(J)%v3_c)) then
-            YAMBO_FREE(COLL(J)%v3_c)
+         if (l_complex.and.allocated(COLL_element(J)%v3_c)) then
+            YAMBO_FREE(COLL_element(J)%v3_c)
          endif
        endif
        if (l_alloc.and.(l_v1.or.l_all)) then
          if(l_real) then
-           YAMBO_ALLOC(COLL(J)%v_r,(COLL(J)%N))
-           COLL(J)%v_r=rZERO
-           mem_size(3)=mem_size(3)+size(COLL(J)%v_r)
+           YAMBO_ALLOC(COLL_element(J)%v_r,(COLL_element(J)%N))
+           COLL_element(J)%v_r=rZERO
+           mem_size(3)=mem_size(3)+size(COLL_element(J)%v_r)
          endif
          if(l_complex) then
-           YAMBO_ALLOC(COLL(J)%v_c,(COLL(J)%N))
-           COLL(J)%v_c=cZERO
-           mem_size(4)=mem_size(4)+size(COLL(J)%v_c)
+           YAMBO_ALLOC(COLL_element(J)%v_c,(COLL_element(J)%N))
+           COLL_element(J)%v_c=cZERO
+           mem_size(4)=mem_size(4)+size(COLL_element(J)%v_c)
            if(SP==DP.and.COLLISIONS_load_SP) then
-             YAMBO_ALLOC(COLL(J)%v_c_SP,(COLL(J)%N))
-             COLL(J)%v_c_SP=cZERO
-             mem_size(4)=mem_size(4)+size(COLL(J)%v_c_SP)
+             YAMBO_ALLOC(COLL_element(J)%v_c_SP,(COLL_element(J)%N))
+             COLL_element(J)%v_c_SP=cZERO
+             mem_size(4)=mem_size(4)+size(COLL_element(J)%v_c_SP)
            endif
          endif
        endif
        if (l_clean.and.(l_v1.or.l_all)) then
-         YAMBO_FREE(COLL(J)%v_c)
-         if(SP==DP.and.allocated(COLL(J)%v_c_SP)) then
-            YAMBO_FREE(COLL(J)%v_c_SP)
+         YAMBO_FREE(COLL_element(J)%v_c)
+         if(SP==DP.and.allocated(COLL_element(J)%v_c_SP)) then
+            YAMBO_FREE(COLL_element(J)%v_c_SP)
          endif
-         YAMBO_FREE(COLL(J)%v_r)
+         YAMBO_FREE(COLL_element(J)%v_r)
        endif
        if (l_alloc.and.(l_table.or.l_all)) then
-         !AMBO_ALLOC(COLL(J)%table,(D(1,1):D(1,2),D(2,1):D(2,2),D(3,1):D(3,2)))
-         allocate(COLL(J)%table(D(1,1):D(1,2),D(2,1):D(2,2),D(3,1):D(3,2)))
-         COLL(J)%table='f'
+         !AMBO_ALLOC(COLL_element(J)%table,(D(1,1):D(1,2),D(2,1):D(2,2),D(3,1):D(3,2)))
+         allocate(COLL_element(J)%table(D(1,1):D(1,2),D(2,1):D(2,2),D(3,1):D(3,2),n_sp_pol))
+         COLL_element(J)%table='f'
          COLL_grp%D=D
-         mem_size(5)=mem_size(5)+size(COLL(J)%table)
+         mem_size(5)=mem_size(5)+size(COLL_element(J)%table)
        endif
        if (l_clean.and.(l_table.or.l_all)) then
-         !AMBO_FREE(COLL(J)%table)
-         deallocate(COLL(J)%table)
+         !AMBO_FREE(COLL_element(J)%table)
+         deallocate(COLL_element(J)%table)
        endif
      enddo
      !
diff --git a/src/collisions/COLLISIONS_basic_operations.F b/src/collisions/COLLISIONS_basic_operations.F
index 7c1468711a..04e86a45d6 100644
--- a/src/collisions/COLLISIONS_basic_operations.F
+++ b/src/collisions/COLLISIONS_basic_operations.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine COLLISIONS_basic_operations( what, COLL )
  !
  ! Here I allocate and define some objects of the COLLISIONS_group kind.
@@ -17,13 +21,15 @@ subroutine COLLISIONS_basic_operations( what, COLL )
  !  integer,     pointer  :: IO_status(:) => null()
  ! end type
  !
- use collision_ext, ONLY:COLLISIONS_group,COLL_bands,COLLISIONS_CV_only
+ use collision_ext, ONLY:COLLISIONS_group,COLLISIONS_CV_only,&
+ &                       COLL_bands,COLL_bands_frozen
  use R_lattice,     ONLY:nXkibz
  use electrons,     ONLY:n_sp_pol,n_full_bands,n_met_bands
  use wave_func,     ONLY:states_to_load
  use IO_m,          ONLY:io_COLLs
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  character(*)           :: what
  type(COLLISIONS_group) :: COLL
@@ -77,23 +83,26 @@ subroutine COLLISIONS_basic_operations( what, COLL )
      !
      COLL%N=0
      !
-     do i_k=1,nXkibz
-       do i_n=COLL_bands(1),COLL_bands(2)
-         do i_mix=0,COLL_bands(2)-COLL_bands(1)
-           !
-           i_m=i_n+i_mix
-           !
-           ! No Off-diagonal elements in the case of P collisions
+     do i_sp=1,n_sp_pol
+       do i_k=1,nXkibz
+         do i_n=COLL_bands(1),COLL_bands(2)
            !
-           if (trim(COLL%name)=="Momentum".and.i_mix>0)  cycle
+           if ( COLLISIONS_CV_only .and. i_n> n_met_bands(i_sp)  ) cycle
+           if ( COLL_bands_frozen(i_n)==1 ) cycle
            !
-           if (i_m>COLL_bands(2)) cycle
-           !
-           do i_sp=1,n_sp_pol
+           do i_mix=0,COLL_bands(2)-COLL_bands(1)
+             !
+             i_m=i_n+i_mix
              !
-             if ( COLLISIONS_CV_only .and. i_n> n_met_bands(i_sp)  ) cycle
              if ( COLLISIONS_CV_only .and. i_m<=n_full_bands(i_sp) ) cycle
              !
+             if (i_m>COLL_bands(2)) cycle
+             if ( COLL_bands_frozen(i_m)==1 ) cycle
+             !
+             ! No Off-diagonal elements in the case of P collisions
+             !
+             if (trim(COLL%name)=="Momentum".and.i_mix>0)  cycle
+             !
              COLL%N=COLL%N+1
              if (i_c==2) then
                COLL%state(COLL%N,:4)=(/i_n,i_m,i_k,i_sp/)
@@ -104,6 +113,7 @@ subroutine COLLISIONS_basic_operations( what, COLL )
                  COLL%state(COLL%N,:4)=(/i_m,i_n,i_k,i_sp/)
                endif
              endif
+             !
            enddo
          enddo
        enddo
diff --git a/src/collisions/COLLISIONS_compose_nl.F b/src/collisions/COLLISIONS_compose_nl.F
index 01430ecef7..a75c7f611a 100644
--- a/src/collisions/COLLISIONS_compose_nl.F
+++ b/src/collisions/COLLISIONS_compose_nl.F
@@ -11,8 +11,9 @@ subroutine COLLISIONS_compose_nl(G_in)
  ! 
  use pars,             ONLY:SP,cZERO,cI
  use drivers,          ONLY:l_sc_fock,l_sc_coh,l_sc_sex,l_sc_exx,l_sc_hartree!l_sc_hf,l_sc_cohsex,l_sc_exx
- use collision_ext,    ONLY:HXC_COLL_element,COLL_bands,HXC_collisions,COLLISIONS_load_SP
- use electrons,        ONLY:spin
+ use collision_ext,    ONLY:HXC_COLL_element,COLL_bands,HXC_collisions,COLLISIONS_load_SP,&
+ &                          COLLISIONS_CV_only,COLL_bands_frozen
+ use electrons,        ONLY:spin,n_met_bands,n_full_bands,n_sp_pol
  use R_lattice,        ONLY:nXkibz
  use parallel_m,       ONLY:HEAD_QP_cpu,PAR_COM_Xk_ibz_INDEX
  use parallel_int,     ONLY:PP_redux_wait
@@ -23,13 +24,14 @@ subroutine COLLISIONS_compose_nl(G_in)
  !
  implicit none
  !
- complex(SP), intent(in)   :: G_in(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),nXkibz)
+ complex(SP), intent(in)   :: G_in(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),nXkibz,n_sp_pol)
  !
  ! Work Space
  !
- integer           :: i_coll,ib,ibp,i_k,i_k_NL,i_kmq,i_comp,i_coll_mem,i_spin
+ integer           :: i_coll,ib,ibp,i_k,i_k_NL,i_kmq,i_comp,i_coll_mem,i_sp,ip_sp
  integer           :: i_n,i_m
  complex(SP)       :: COLL
+ logical           :: do_collisions(COLL_bands(1):COLL_bands(2),COLL_bands(1):COLL_bands(2),n_sp_pol),coll_sp
  !
  if(.not.l_sc_fock.and..not.l_sc_coh.and..not.l_sc_sex.and..not.l_sc_exx.and..not.l_sc_hartree) return
  !
@@ -37,7 +39,24 @@ subroutine COLLISIONS_compose_nl(G_in)
  call timing('NL COLL_compose',OPR='start')
 #endif
  !
+ do_collisions=.false.
+ coll_sp=COLLISIONS_load_SP
+ !
+ do i_sp=1,n_sp_pol
+ do ibp=COLL_bands(1),COLL_bands(2)
+   if ( COLL_bands_frozen(ibp)==1 ) cycle
+   do ib=COLL_bands(1),COLL_bands(2)
+     if ( COLLISIONS_CV_only .and. (ib> n_met_bands(i_sp)  .and. ibp> n_met_bands(i_sp) ) ) cycle
+     if ( COLLISIONS_CV_only .and. (ib<=n_full_bands(i_sp) .and. ibp<=n_full_bands(i_sp)) ) cycle
+     if ( COLL_bands_frozen(ib)==1 ) cycle
+     do_collisions(ib,ibp,i_sp)=.true.
+   enddo
+ enddo
+ enddo
+ !
+!$OMP PARALLEL WORKSHARE  
  RT_Vnl_xc=cZERO
+!$OMP END PARALLEL WORKSHARE  
  !
  do i_coll=1,HXC_collisions%N
    !
@@ -46,31 +65,35 @@ subroutine COLLISIONS_compose_nl(G_in)
    i_k       =HXC_collisions%state(i_coll,3)
    i_n       =HXC_collisions%state(i_coll,1)
    i_m       =HXC_collisions%state(i_coll,2)
-   i_spin    =HXC_collisions%state(i_coll,4)
+   i_sp      =HXC_collisions%state(i_coll,4)
    i_coll_mem=HXC_collisions%PAR_map(i_coll)
    !
    i_comp=1
    !
    i_k_NL=i_k
    !
+   do ip_sp=1,n_sp_pol
    do i_kmq=1,nXkibz
      !
       do ibp=COLL_bands(1),COLL_bands(2)
        do ib=COLL_bands(1),COLL_bands(2)
          !
-         if (HXC_COLL_element(i_coll_mem)%table(ib,ibp,i_kmq)=='t') then
-           if(COLLISIONS_load_SP) then
-             COLL=cmplx(HXC_COLL_element(i_coll_mem)%v_c_SP(i_comp),kind=SP)
-           else
-             COLL=cmplx(HXC_COLL_element(i_coll_mem)%v_c(i_comp),kind=SP)
-           endif
-           RT_Vnl_xc(i_n,i_m,i_k_NL,i_spin)=RT_Vnl_xc(i_n,i_m,i_k_NL,i_spin)-cI*COLL*G_in(ib,ibp,i_kmq)
-           i_comp=i_comp+1
+         if (HXC_COLL_element(i_coll_mem)%table(ib,ibp,i_kmq,ip_sp)=='f') cycle
+         !
+         if ( .not.do_collisions(ib,ibp,ip_sp) ) cycle
+         !
+         if(coll_sp) then
+           COLL=cmplx(HXC_COLL_element(i_coll_mem)%v_c_SP(i_comp),kind=SP)
+         else
+           COLL=cmplx(HXC_COLL_element(i_coll_mem)%v_c(i_comp),kind=SP)
          endif
+         RT_Vnl_xc(i_n,i_m,i_k_NL,i_sp)=RT_Vnl_xc(i_n,i_m,i_k_NL,i_sp)-cI*COLL*G_in(ib,ibp,i_kmq,ip_sp)
+         i_comp=i_comp+1
          !
        enddo
      enddo
    enddo
+   enddo
    !
  enddo
  !
@@ -82,9 +105,10 @@ subroutine COLLISIONS_compose_nl(G_in)
    i_k       =HXC_collisions%state(i_coll,3)
    i_n       =HXC_collisions%state(i_coll,1)
    i_m       =HXC_collisions%state(i_coll,2)
+   i_sp      =HXC_collisions%state(i_coll,4)
    !
-   RT_Vnl_xc(i_n,i_n,i_k,:) =real(RT_Vnl_xc(i_n,i_n,i_k,:),SP)
-   RT_Vnl_xc(i_m,i_n,i_k,:)=conjg(RT_Vnl_xc(i_n,i_m,i_k,:))
+   RT_Vnl_xc(i_n,i_n,i_k,i_sp) =real(RT_Vnl_xc(i_n,i_n,i_k,i_sp),SP)
+   RT_Vnl_xc(i_m,i_n,i_k,i_sp)=conjg(RT_Vnl_xc(i_n,i_m,i_k,i_sp))
    ! 
  enddo
  !
diff --git a/src/collisions/COLLISIONS_compose_rt.F b/src/collisions/COLLISIONS_compose_rt.F
index 63f05bb381..467b37d6c6 100644
--- a/src/collisions/COLLISIONS_compose_rt.F
+++ b/src/collisions/COLLISIONS_compose_rt.F
@@ -5,31 +5,30 @@
 !
 ! Authors (see AUTHORS file for details): CA AM DS
 !
-subroutine COLLISIONS_compose_rt(dG_in)
+subroutine COLLISIONS_compose_rt(dG_avg)
  !
  ! HF/SEX(state) = - (0.,1.) * \sum_{n,n',q} COLLISIONS(n,n',q,state) x G_lesser(n,n',k-q)
  ! 
  use pars,             ONLY:SP,cZERO,cI
  use drivers,          ONLY:l_sc_fock,l_sc_coh,l_sc_sex,l_sc_exx,l_sc_hartree
- use collision_ext,    ONLY:HXC_COLL_element,COLL_bands,HXC_collisions
- use electrons,        ONLY:spin
+ use collision_ext,    ONLY:HXC_COLL_element,COLL_bands,HXC_collisions,COLLISIONS_CV_only,&
+ &                          COLL_bands_frozen
+ use electrons,        ONLY:spin,n_met_bands,n_full_bands,n_sp_pol
  use R_lattice,        ONLY:nXkibz
  use parallel_int,     ONLY:PP_redux_wait
- use parallel_m,       ONLY:PAR_COM_Xk_ibz_INDEX,PAR_COM_Q_INDEX,&
-&                           PAR_IND_Xk_ibz,PAR_IND_Bp_mat,PAR_G_k_range
+ use parallel_m,       ONLY:PAR_COM_Xk_ibz_A2A,PAR_IND_Xk_ibz,PAR_IND_Bp_mat
  use hamiltonian,      ONLY:B_mat_index
- use real_time,        ONLY:RTibz,RT_Vnl_xc,RT_bands
+ use real_time,        ONLY:RT_Vnl_xc,RT_bands
  use timing_m,         ONLY:timing
  !
  implicit none
  !
- complex(SP), intent(in)   :: dG_in(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2))
+ complex(SP), intent(in)   :: dG_avg(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),nXkibz,n_sp_pol)
  !
  ! Work Space
  !
- integer           :: i_coll,i_comp,i_coll_mem,i_spin,i_n,i_m,i_kmq,ib,ibp,i_k,i_k_RT
- logical           :: do_collisions(COLL_bands(1):COLL_bands(2),COLL_bands(1):COLL_bands(2))
- complex(SP)       :: G_avg(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),nXkibz)
+ integer           :: i_coll,i_comp,i_coll_mem,i_sp,ip_sp,i_n,i_m,i_kmq,ib,ibp,i_k
+ logical           :: do_collisions(COLL_bands(1):COLL_bands(2),COLL_bands(1):COLL_bands(2),n_sp_pol,2)
  !
  if(.not.l_sc_fock.and..not.l_sc_coh.and..not.l_sc_sex.and..not.l_sc_exx.and..not.l_sc_hartree) return
  !
@@ -37,40 +36,20 @@ subroutine COLLISIONS_compose_rt(dG_in)
  !
  do_collisions=.false.
  !
+ do i_sp=1,n_sp_pol
  do ibp=COLL_bands(1),COLL_bands(2)
+   if ( COLL_bands_frozen(ibp)==1 ) cycle
    do ib=COLL_bands(1),COLL_bands(2)
+     if ( COLLISIONS_CV_only .and. (ib> n_met_bands(i_sp)  .and. ibp> n_met_bands(i_sp) ) ) cycle
+     if ( COLLISIONS_CV_only .and. (ib<=n_full_bands(i_sp) .and. ibp<=n_full_bands(i_sp)) ) cycle
+     if ( COLL_bands_frozen(ib)==1 ) cycle
+     do_collisions(ib,ibp,i_sp,1)=.true.
      if (.not.PAR_IND_Bp_mat%element_1D( B_mat_index(ib,ibp,COLL_bands) ) ) cycle
-     do_collisions(ib,ibp)=.true.
+     do_collisions(ib,ibp,i_sp,2)=.true.
    enddo
  enddo
- !
- G_avg=cZERO
- !
- do i_k=1,nXkibz
-   !
-   if (.not.PAR_IND_Xk_ibz%element_1D(i_k) ) cycle
-   !
-   do i_k_RT=RTibz%k_range(i_k,1),RTibz%k_range(i_k,2)
-     G_avg(:,:,i_k)=G_avg(:,:,i_k)+dG_in(:,:,i_k_RT)
-   enddo
-   !
-   ! The 1/real(RT_k_FT%N_mapped(i_k),SP) factor is needed here because I'm integrating
-   ! over kmq. With the DbGd the size of the integrating factor has to be reduced
-   ! 
-   G_avg(:,:,i_k)=G_avg(:,:,i_k)/real(RTibz%N_mapped(i_k),SP)/real(PAR_COM_Q_INDEX%n_CPU)
-   !
  enddo
  !
- call timing('COLLISIONS (compose)',OPR='stop')
- !
- call timing('COLLISIONS (redux)',OPR='start')
- !
- call PP_redux_wait(G_avg,COMM=PAR_COM_Xk_ibz_INDEX%COMM)
- !
- call timing('COLLISIONS (redux)',OPR='stop')
- !
- call timing('COLLISIONS (compose)',OPR='start')
- !
  RT_Vnl_xc=cZERO
  !
  do i_coll=1,HXC_collisions%N
@@ -80,7 +59,7 @@ subroutine COLLISIONS_compose_rt(dG_in)
    i_k       =HXC_collisions%state(i_coll,3)
    i_n       =HXC_collisions%state(i_coll,1)
    i_m       =HXC_collisions%state(i_coll,2)
-   i_spin    =HXC_collisions%state(i_coll,4)
+   i_sp      =HXC_collisions%state(i_coll,4)
    i_coll_mem=HXC_collisions%PAR_map(i_coll)
    !
    ! If the COLLISIONS are read from a DB with more bands than RT_bands:
@@ -90,43 +69,107 @@ subroutine COLLISIONS_compose_rt(dG_in)
    !
    i_comp=0
    !
+   do ip_sp=1,n_sp_pol
    do i_kmq=1,nXkibz
      !
      do ibp=COLL_bands(1),COLL_bands(2)
        do ib=COLL_bands(1),COLL_bands(2)
          !
-         if (HXC_COLL_element(i_coll_mem)%table(ib,ibp,i_kmq)=='f') cycle
+         if (.not.do_collisions(ib,ibp,ip_sp,1)) cycle
+         !
+         if (HXC_COLL_element(i_coll_mem)%table(ib,ibp,i_kmq,ip_sp)=='f') cycle
          !
          i_comp=i_comp+1
          !
-         if (.not.do_collisions(ib,ibp)) cycle
+         if (.not.do_collisions(ib,ibp,ip_sp,2)) cycle
          !
-         RT_Vnl_xc(i_n,i_m,i_k,i_spin)=RT_Vnl_xc(i_n,i_m,i_k,i_spin) &
-&                                      -cI*HXC_COLL_element(i_coll_mem)%v_c(i_comp)*G_avg(ib,ibp,i_kmq)
+         RT_Vnl_xc(i_n,i_m,i_k,i_sp)=RT_Vnl_xc(i_n,i_m,i_k,i_sp) &
+&                                      -cI*HXC_COLL_element(i_coll_mem)%v_c(i_comp)*dG_avg(ib,ibp,i_kmq,ip_sp)
          !
        enddo
      enddo
      !
    enddo
+   enddo
    !
  enddo
  !
- ! All 2 All
- !===========
+ call timing('COLLISIONS (compose)',OPR='stop')
+ !
+ call timing('COLLISIONS (sym-red)',OPR='start')
  !
- call PP_redux_wait(RT_Vnl_xc)
+ ! Redux over bands
+ !==================
+ call PP_redux_wait(RT_Vnl_xc,COMM=PAR_COM_Xk_ibz_A2A%COMM)
  !
  ! Symmetrization
  !================
+ do i_sp=1,n_sp_pol
  do i_k=1,nXkibz
+   if (.not.PAR_IND_Xk_ibz%element_1D(i_k) ) cycle
    do i_n=COLL_bands(1),COLL_bands(2)
-     RT_Vnl_xc(i_n,i_n,i_k,:)= real(RT_Vnl_xc(i_n,i_n,i_k,:))
+     RT_Vnl_xc(i_n,i_n,i_k,i_sp)= real(RT_Vnl_xc(i_n,i_n,i_k,i_sp))
      do i_m=i_n+1,COLL_bands(2)
-       RT_Vnl_xc(i_m,i_n,i_k,:)=conjg(RT_Vnl_xc(i_n,i_m,i_k,:))
+       RT_Vnl_xc(i_m,i_n,i_k,i_sp)=conjg(RT_Vnl_xc(i_n,i_m,i_k,i_sp))
      enddo
    enddo
  enddo
+ enddo
  !
- call timing('COLLISIONS (compose)',OPR='stop')
+ ! Redux over kpts (is this needed?)
+ !====================================
+ !call PP_redux_wait(RT_Vnl_xc,COMM=PAR_COM_Xk_ibz_INDEX%COMM)
+ !
+ call timing('COLLISIONS (sym-red)',OPR='stop')
  !
 end subroutine COLLISIONS_compose_rt
+!
+!
+subroutine Build_dG_avg(dG_in,dG_avg)
+ !
+ use pars,             ONLY:SP,cZERO
+ use drivers,          ONLY:l_sc_fock,l_sc_coh,l_sc_sex,l_sc_exx,l_sc_hartree
+ use electrons,        ONLY:n_sp_pol
+ use R_lattice,        ONLY:nXkibz
+ use parallel_int,     ONLY:PP_redux_wait
+ use parallel_m,       ONLY:PAR_COM_Q_INDEX,PAR_COM_Xk_ibz_INDEX,PAR_IND_Xk_ibz,PAR_G_k_range
+ use real_time,        ONLY:RTibz,RT_bands
+ use timing_m,         ONLY:timing
+ !
+ implicit none
+ !
+ complex(SP), intent(in)   :: dG_in(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2),n_sp_pol)
+ complex(SP), intent(out)  :: dG_avg(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),nXkibz,n_sp_pol)
+ !
+ ! Work Space
+ !
+ integer           :: i_k,i_k_RT,i_sp
+ !
+ if(.not.l_sc_fock.and..not.l_sc_coh.and..not.l_sc_sex.and..not.l_sc_exx.and..not.l_sc_hartree) return
+ !
+ call timing('COLLISIONS (G average)',OPR='start')
+ !
+ dG_avg=cZERO
+ !
+ do i_sp=1,n_sp_pol
+ do i_k=1,nXkibz
+   !
+   if (.not.PAR_IND_Xk_ibz%element_1D(i_k) ) cycle
+   !
+   do i_k_RT=RTibz%k_range(i_k,1),RTibz%k_range(i_k,2)
+     dG_avg(:,:,i_k,i_sp)=dG_avg(:,:,i_k,i_sp)+dG_in(:,:,i_k_RT,i_sp)
+   enddo
+   !
+   ! The 1/real(RT_k_FT%N_mapped(i_k),SP) factor is needed here because I'm integrating
+   ! over kmq. With the DbGd the size of the integrating factor has to be reduced
+   ! 
+   dG_avg(:,:,i_k,i_sp)=dG_avg(:,:,i_k,i_sp)/real(RTibz%N_mapped(i_k),SP)/real(PAR_COM_Q_INDEX%n_CPU)
+   !
+ enddo
+ enddo
+ !
+ call PP_redux_wait(dG_avg,COMM=PAR_COM_Xk_ibz_INDEX%COMM)
+ !
+ call timing('COLLISIONS (G average)',OPR='stop')
+ !
+end subroutine Build_dG_avg
diff --git a/src/collisions/COLLISIONS_compose_sc.F b/src/collisions/COLLISIONS_compose_sc.F
index 415a859437..296b8fb388 100644
--- a/src/collisions/COLLISIONS_compose_sc.F
+++ b/src/collisions/COLLISIONS_compose_sc.F
@@ -10,21 +10,22 @@ subroutine COLLISIONS_compose_sc(G_sex,G_coh)
  ! HF(qp_state) = - (0.,1.) * \sum_{n,n',q} Collisions(n,n',q,qp_state) x G_sex/coh(n,n',k-q)
  ! 
  use pars,             ONLY:SP,cZERO,cI
+ use electrons,        ONLY:n_sp_pol
  use drivers,          ONLY:l_sc_fock,l_sc_coh,l_sc_sex,l_sc_exx,l_sc_hartree
  use QP_m,             ONLY:QP_nk,QP_nb,QP_n_states,QP_Sc,QP_Vnl_xc
  use collision_ext,    ONLY:COH_COLL_element,HXC_COLL_element,COLL_bands,HXC_collisions,&
-&                           COH_collisions,COLLISIONS_have_HARTREE
+&                           COH_collisions,COLLISIONS_have_HARTREE,COLLISIONS_CV_only
  use parallel_m,       ONLY:PAR_IND_QP,HEAD_b_cpu,PAR_IND_G_k
  use parallel_int,     ONLY:PP_redux_wait
  use timing_m,         ONLY:timing
  implicit none
  !
- complex(SP), intent(in)   :: G_sex(COLL_bands(1):COLL_bands(2),COLL_bands(1):COLL_bands(2),QP_nk)
- complex(SP), intent(in)   :: G_coh(COLL_bands(1):COLL_bands(2),COLL_bands(1):COLL_bands(2),QP_nk)
+ complex(SP), intent(in)   :: G_sex(COLL_bands(1):COLL_bands(2),COLL_bands(1):COLL_bands(2),QP_nk,n_sp_pol)
+ complex(SP), intent(in)   :: G_coh(COLL_bands(1):COLL_bands(2),COLL_bands(1):COLL_bands(2),QP_nk,n_sp_pol)
  !
  ! Work Space
  !
- integer           :: i_qp,ib,ibp,i_kmq,ic1,ic2,i_coll_mem
+ integer           :: i_qp,ib,ibp,i_kmq,ic1,ic2,i_coll_mem,i_sp
  logical           :: l_sex
  !
  if(.not.l_sc_fock.and..not.l_sc_coh.and..not.l_sc_sex.and..not.l_sc_exx.and.&
@@ -37,6 +38,8 @@ subroutine COLLISIONS_compose_sc(G_sex,G_coh)
  !
  if (.not.allocated(QP_Vnl_xc))  allocate(QP_Vnl_xc(QP_n_states))
  !
+ if (COLLISIONS_CV_only) call error(" cannot use cv only collisions in yambo_sc")
+ !
  l_sex    =l_sc_sex.or.l_sc_fock.or.(l_sc_hartree.and.COLLISIONS_have_HARTREE)
  QP_Vnl_xc=cZERO
  !
@@ -55,14 +58,15 @@ subroutine COLLISIONS_compose_sc(G_sex,G_coh)
    ic1=1
    ic2=1
    !
+   do i_sp=1,n_sp_pol
    do i_kmq=1,QP_nk
      !
      if (.not.PAR_IND_G_k%element_1D(i_kmq)) then 
        if (l_sc_coh) then
-         ic1=ic1+count(COH_COLL_element(i_coll_mem)%table(COLL_bands(1):QP_nb,COLL_bands(1):QP_nb,i_kmq)=='t')
+         ic1=ic1+count(COH_COLL_element(i_coll_mem)%table(COLL_bands(1):QP_nb,COLL_bands(1):QP_nb,i_kmq,i_sp)=='t')
        endif
        if (l_sex) then
-         ic2=ic2+count(HXC_COLL_element(i_coll_mem)%table(COLL_bands(1):QP_nb,COLL_bands(1):QP_nb,i_kmq)=='t')
+         ic2=ic2+count(HXC_COLL_element(i_coll_mem)%table(COLL_bands(1):QP_nb,COLL_bands(1):QP_nb,i_kmq,i_sp)=='t')
        endif
        cycle
      endif
@@ -73,8 +77,8 @@ subroutine COLLISIONS_compose_sc(G_sex,G_coh)
          ! COH part
          !
          if (l_sc_coh) then
-           if (COH_COLL_element(i_coll_mem)%table(ib,ibp,i_kmq)=='t') then
-             QP_Vnl_xc(i_qp)=QP_Vnl_xc(i_qp)-cI*COH_COLL_element(i_coll_mem)%v_c(ic1)*G_coh(ib,ibp,i_kmq)
+           if (COH_COLL_element(i_coll_mem)%table(ib,ibp,i_kmq,i_sp)=='t') then
+             QP_Vnl_xc(i_qp)=QP_Vnl_xc(i_qp)-cI*COH_COLL_element(i_coll_mem)%v_c(ic1)*G_coh(ib,ibp,i_kmq,i_sp)
              ic1=ic1+1
            endif
          endif
@@ -82,8 +86,8 @@ subroutine COLLISIONS_compose_sc(G_sex,G_coh)
          ! SEX/HF/HARTREE parts
          !
          if (l_sex) then
-           if (HXC_COLL_element(i_coll_mem)%table(ib,ibp,i_kmq)=='t') then
-             QP_Vnl_xc(i_qp)=QP_Vnl_xc(i_qp)-cI*HXC_COLL_element(i_coll_mem)%v_c(ic2)*G_sex(ib,ibp,i_kmq)
+           if (HXC_COLL_element(i_coll_mem)%table(ib,ibp,i_kmq,i_sp)=='t') then
+             QP_Vnl_xc(i_qp)=QP_Vnl_xc(i_qp)-cI*HXC_COLL_element(i_coll_mem)%v_c(ic2)*G_sex(ib,ibp,i_kmq,i_sp)
              ic2=ic2+1
            endif
          endif
@@ -92,6 +96,7 @@ subroutine COLLISIONS_compose_sc(G_sex,G_coh)
      enddo
      !
    enddo
+   enddo
  enddo
  !
  call PP_redux_wait(QP_Vnl_xc)
diff --git a/src/collisions/COLLISIONS_compress.F b/src/collisions/COLLISIONS_compress.F
index 3999aeba6e..72ad22cefd 100644
--- a/src/collisions/COLLISIONS_compress.F
+++ b/src/collisions/COLLISIONS_compress.F
@@ -5,23 +5,30 @@
 !
 ! Authors (see AUTHORS file for details): CA
 !
+! headers
+!
+#include<y_memory.h>
+!
 ! This subroutine works directly with collisions in single-precision
 ! if present
 !
 subroutine COLLISIONS_compress(COLL)
  !
  use pars,            ONLY:SP,DP,SP6,cZERO
- use collision_ext,   ONLY:COLLISIONS_element,COLL_bands,COLLISIONS_cutoff
+ use collision_ext,   ONLY:COLLISIONS_element,COLL_bands,COLLISIONS_cutoff,COLLISIONS_CV_only,&
+ &                         COLL_bands_frozen
  use R_lattice,       ONLY:nXkibz
+ use electrons,       ONLY:n_met_bands,n_full_bands,n_sp_pol
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(COLLISIONS_element), intent(inout) :: COLL
  !
  ! Work Space
  !
  real(SP)  :: COLLISIONS_max
- integer   :: ib,ibp,i_kmq,ic
+ integer   :: ib,ibp,i_kmq,ic,i_sp
  logical   :: l_coll_SP
  !
  if(allocated(COLL%v_c_SP)) then
@@ -35,27 +42,38 @@ subroutine COLLISIONS_compress(COLL)
  !
  ! Search for non-zero collisions
  !
- YAMBO_ALLOC(COLL%v3_c,(COLL_bands(1):COLL_bands(2),COLL_bands(1):COLL_bands(2),1:nXkibz))
+ YAMBO_ALLOC(COLL%v3_c,(COLL_bands(1):COLL_bands(2),COLL_bands(1):COLL_bands(2),1:nXkibz,n_sp_pol))
  !
  ic=0
  !
- COLL%table='t'
+ COLL%table='f'
+ !
+ do i_sp=1,n_sp_pol
  do i_kmq=1,nXkibz
    do ibp=COLL_bands(1),COLL_bands(2)
+     !
+     if ( COLL_bands_frozen(ibp)==1 ) cycle
+     !
      do ib=COLL_bands(1),COLL_bands(2)
+       !
+       if ( COLLISIONS_CV_only .and. (ib> n_met_bands(i_sp)  .and. ibp> n_met_bands(i_sp) ) ) cycle
+       if ( COLLISIONS_CV_only .and. (ib<=n_full_bands(i_sp) .and. ibp<=n_full_bands(i_sp)) ) cycle
+       !
+       if ( COLL_bands_frozen(ib)==1 ) cycle
        !
        ic=ic+1
        !
-       if(l_coll_SP)      COLL%v3_c(ib,ibp,i_kmq)=cmplx(COLL%v_c_SP(ic),kind=SP)
-       if(.not.l_coll_SP) COLL%v3_c(ib,ibp,i_kmq)=COLL%v_c(ic)
+       if(     l_coll_SP) COLL%v3_c(ib,ibp,i_kmq,i_sp)=cmplx(COLL%v_c_SP(ic),kind=SP)
+       if(.not.l_coll_SP) COLL%v3_c(ib,ibp,i_kmq,i_sp)=      COLL%v_c(ic)
        !
-       if(abs(COLL%v3_c(ib,ibp,i_kmq))<COLLISIONS_max*COLLISIONS_cutoff)  COLL%table(ib,ibp,i_kmq)='f'
+       if(abs(COLL%v3_c(ib,ibp,i_kmq,i_sp))>=COLLISIONS_max*COLLISIONS_cutoff) COLL%table(ib,ibp,i_kmq,i_sp)='t'
        !
      enddo
    enddo
  enddo
+ enddo
  !
- COLL%N=count(COLL%table(:,:,:)=='t')
+ COLL%N=count(COLL%table(:,:,:,:)=='t')
  !      
  if(l_coll_SP) then
    YAMBO_FREE(COLL%v_c_SP)
@@ -68,20 +86,21 @@ subroutine COLLISIONS_compress(COLL)
  ! Fill collisions again
  !
  ic=0
+ do i_sp=1,n_sp_pol
  do i_kmq=1,nXkibz
    do ibp=COLL_bands(1),COLL_bands(2)
      do ib=COLL_bands(1),COLL_bands(2)
        !
-       if(COLL%table(ib,ibp,i_kmq)=='t') then
-         ic=ic+1
-         if(l_coll_SP)      COLL%v_c_SP(ic)=cmplx(COLL%v3_c(ib,ibp,i_kmq),kind=SP6)
-         if(.not.l_coll_SP) COLL%v_c(ic)   =COLL%v3_c(ib,ibp,i_kmq)
-         !
-       endif
+       if(.not.COLL%table(ib,ibp,i_kmq,i_sp)=='t') cycle
+       !
+       ic=ic+1
+       if(l_coll_SP)      COLL%v_c_SP(ic)=cmplx(COLL%v3_c(ib,ibp,i_kmq,i_sp),kind=SP6)
+       if(.not.l_coll_SP) COLL%v_c(ic)   =COLL%v3_c(ib,ibp,i_kmq,i_sp)
        !
      enddo
    enddo
  enddo
+ enddo
  !
  YAMBO_FREE(COLL%v3_c)
  !
diff --git a/src/collisions/COLLISIONS_eval.F b/src/collisions/COLLISIONS_eval.F
index 939ba28555..5972a252e1 100644
--- a/src/collisions/COLLISIONS_eval.F
+++ b/src/collisions/COLLISIONS_eval.F
@@ -5,6 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<collisions.h>
+#include<y_memory.h>
+!
 subroutine COLLISIONS_eval(E,X,k,xk,q,Xw)
  !
  ! The EOM looks like:
@@ -30,13 +35,15 @@ subroutine COLLISIONS_eval(E,X,k,xk,q,Xw)
  use wave_func,      ONLY:WF
  use hamiltonian,    ONLY:H_potential,H_kind
  use global_XC,      ONLY:QP_SE_GW_NEQ,QP_SE_GW_QED
- use collision_ext,  ONLY:COLLISIONS_parallel_SC_structure,COLLISIONS_group,&
+ use collision_ext,  ONLY:COLLISIONS_parallel_SC_structure,COLLISIONS_group,COLLISIONS_from_BSE,&
 &                        COH_collisions,HXC_collisions,P_collisions,GW_NEQ_collisions,COLLISIONS_naming
  use X_m,            ONLY:X_t
  use plasma,         ONLY:EH_gas
  use frequency,      ONLY:w_samp
+ use BS,             ONLY:BS_kpt_bz,BS_kpt_ibz
  use IO_int,         ONLY:io_control
- use IO_m,           ONLY:OP_RD_CL,OP_RD,RD_CL_IF_END,VERIFY,REP,io_COLLs,manage_action
+ use IO_m,           ONLY:OP_RD_CL,OP_RD,RD_CL,RD_CL_IF_END,VERIFY,REP,NONE,DUMP,&
+&                         io_COLLs,manage_action,netcdf_dim_size
  use parallel_m,     ONLY:PAR_IND_G_k,PAR_IND_Bp_mat,PAR_IND_Plasma
  use parallel_int,   ONLY:PP_wait,PARALLEL_global_indexes,PARALLEL_WF_distribute,PARALLEL_WF_index
  use interfaces,     ONLY:WF_free
@@ -51,9 +58,13 @@ subroutine COLLISIONS_eval(E,X,k,xk,q,Xw)
  !
  ! WorkSpace
  !
- integer       :: ID,IO_err,IO_ACT
+ logical       :: l_collisions_from_bse,l_tmp
+ integer       :: ID,IO_err_COLL,IO_err_BS,IO_ACT,ID_head,ID_mat,ID_cmpr,&
+ &                BS_nk_bz,BS_nk_ibz
  character(20) :: PAR_ch,FREE_ch
  !
+ integer, external :: io_BS_header,io_BS_PAR_init
+ !
  call timing('COLLISIONS_eval',OPR='start')
  !
  call section('*','Extended collisions')
@@ -123,9 +134,42 @@ subroutine COLLISIONS_eval(E,X,k,xk,q,Xw)
    if(l_use_Hxc_collisions) call CHECK_and_ALLOC(HXC_collisions)
    if(l_use_COH_collisions) call CHECK_and_ALLOC(COH_collisions)
    !
-   if (IO_err/=0) then
+   if (.not.COLLISIONS_from_BSE) IO_err_BS=-1
+   !
+   if (l_use_Hxc_collisions.and.COLLISIONS_from_BSE) then
+     !
+     ! Check if I can use the BSE kernel for the collisions
+     !
+     call io_control(ACTION=OP_RD,COM=NONE,MODE=DUMP,ID=ID_head)
+     IO_err_BS=io_BS_header(1,X(2),ID_head,"connect")
+     BS_nk_bz =netcdf_dim_size(ID_head,"n_kpt_bz")
+     BS_nk_ibz=netcdf_dim_size(ID_head,"n_kpt_ibz")
+     allocate(BS_kpt_bz(3,BS_nk_bz))
+     allocate(BS_kpt_ibz(3,BS_nk_ibz))
+     call io_control(ACTION=RD_CL,COM=NONE,MODE=DUMP,ID=ID_head)
+     IO_err_BS=io_BS_header(1,X(2),ID_head,"full")
+     !
+     if (IO_err_BS/=0) then
+        call error("Collisions from BSE, error while reading io_BS_header")
+     else
+     !if (IO_err_BS==0) then
+       call set_BS_blks(Xk)
+       call io_control(ACTION=OP_RD,COM=REP,MODE=DUMP,ID=ID_mat)
+       IO_err_BS=io_BS_PAR_init(1,ID_mat,"full")
+       call build_inverse_BS_eh_table(Xk,ID_mat)!,BS_nk_bz)
+       call check_BS_parameters_consistency(Xk,BS_nk_bz,BS_nk_ibz)
+       call build_BS_kpt_map(Xk)
+     endif
+     deallocate(BS_kpt_bz)
+     deallocate(BS_kpt_ibz)
+     !
+     if (IO_err_BS/=0) call error("Collisions from BSE, error while reading io_BS_PAR")
+     !
+   endif
+   !
+   if (IO_err_COLL/=0) then
      !
-     call COLLISIONS_HXC(X(2),Xk,E,k,q,Xw(2))
+     call COLLISIONS_HXC(X(2),Xk,E,k,q,Xw(2),IO_err_BS,ID_mat)
      !
      ! COHSEX clean
      !--------------
@@ -135,6 +179,11 @@ subroutine COLLISIONS_eval(E,X,k,xk,q,Xw)
      !
    endif
    !
+   if (l_use_Hxc_collisions.and.IO_err_BS==0) then 
+     call io_control(ACTION=RD_CL,COM=REP,ID=ID_mat)
+     call io_BS_PAR_free(ID_head,ID_mat,"full",.true.)
+   endif
+   !
  endif
  !
 #if defined _QED
@@ -147,7 +196,7 @@ subroutine COLLISIONS_eval(E,X,k,xk,q,Xw)
    !
    call CHECK_and_ALLOC(P_collisions)
    !
-   if (IO_err/=0) then
+   if (IO_err_COLL/=0) then
      !
      call COLLISIONS_momentum(E,k,q)
      !
@@ -179,7 +228,7 @@ subroutine COLLISIONS_eval(E,X,k,xk,q,Xw)
    !
    call CHECK_and_ALLOC(GW_NEQ_collisions)
    !
-   if (IO_err/=0) then 
+   if (IO_err_COLL/=0) then 
      !
      call COLLISIONS_NEQ_GW_static(X(2),Xk,E,k,q,Xw(2)) 
      !
@@ -214,7 +263,7 @@ subroutine COLLISIONS_eval(E,X,k,xk,q,Xw)
      !
    endif
    !
-   if (IO_err==0) return
+   if (IO_err_COLL==0) return
    !
    FREE_ch="all"
    !
@@ -230,9 +279,8 @@ subroutine COLLISIONS_eval(E,X,k,xk,q,Xw)
  !
  contains
    !
-   subroutine CHECK_and_ALLOC(COLL)
+   subroutine CHECK_and_ALLOC(COLL_grp)
      !
-#include<collisions.h>
      !
      ! Define io_COLLs_header to handle either serial or parallel I/O
      !
@@ -243,27 +291,27 @@ subroutine CHECK_and_ALLOC(COLL)
      !
      integer :: i_coll
      !
-     type(COLLISIONS_group) :: COLL
+     type(COLLISIONS_group) :: COLL_grp
      !
-     IO_err=-1
+     IO_err_COLL=-1
      !
      if (io_COLLs) then
-       COLL%IO_status=-1
+       COLL_grp%IO_status=-1
        call io_control(ACTION=OP_RD_CL,COM=REP,SEC=(/1/),MODE=VERIFY,ID=ID)
-       IO_err=io_COLLs_header(ID,COLL)
+       IO_err_COLL=io_COLLs_header(ID,COLL_grp)
 #if defined _PAR_IO
        call io_control(ACTION=OP_RD,COM=REP,SEC=(/1/),ID=ID)
-       io_err=io_COLLISIONS(ID,COLL)
-       if(io_err==0) then
+       io_err_COLL=io_COLLISIONS(ID,COLL_grp)
+       if(io_err_COLL==0) then
          call warning(" Restarting from previous database")
-         do i_coll=1,COLL%N
-           IO_ACT=manage_action(RD_CL_IF_END,i_coll,1,COLL%N)
+         do i_coll=1,COLL_grp%N
+           IO_ACT=manage_action(RD_CL_IF_END,i_coll,1,COLL_grp%N)
            call io_control(ACTION=IO_ACT,COM=REP,SEC=(/4/),ID=ID)
-           io_err=io_COLLISIONS(ID,COLL,i_coll=i_coll)
+           io_err_COLL=io_COLLISIONS(ID,COLL_grp,i_coll=i_coll)
          enddo
        endif
 #endif
-       if (any(COLL%IO_status/=1)) IO_err=-1
+       if (any(COLL_grp%IO_status/=1)) IO_err_COLL=-1
      endif
      !
      ! In case io_Colls is /=0 all CPU have to sincronize before starting
@@ -271,21 +319,209 @@ subroutine CHECK_and_ALLOC(COLL)
      !
      call PP_wait()
      !
-     if (IO_err==0) return
+     if (IO_err_COLL==0) return
      !
      if (io_COLLs) then
-       call COLLISIONS_alloc_and_free(trim(COLL%name),"collision","alloc",1)
-       call COLLISIONS_alloc_and_free(trim(COLL%name),"v3 table","alloc",1)
+       call COLLISIONS_alloc_and_free(trim(COLL_grp%name),"collision","alloc",1)
+       call COLLISIONS_alloc_and_free(trim(COLL_grp%name),"v3 table","alloc",1)
      else
-       call COLLISIONS_alloc_and_free(trim(COLL%name),"collision","alloc",0)
-       if (COLL%kind==QP_SE_GW_NEQ.or.COLL%kind==QP_SE_GW_QED) then
-         call COLLISIONS_alloc_and_free(trim(COLL%name),"v3","alloc distribute",0)
+       call COLLISIONS_alloc_and_free(trim(COLL_grp%name),"collision","alloc",0)
+       if (COLL_grp%kind==QP_SE_GW_NEQ.or.COLL_grp%kind==QP_SE_GW_QED) then
+         call COLLISIONS_alloc_and_free(trim(COLL_grp%name),"v3","alloc distribute",0)
        else
-         call COLLISIONS_alloc_and_free(trim(COLL%name),"v3","alloc",1)
-         call COLLISIONS_alloc_and_free(trim(COLL%name),"table","alloc",0)
+         call COLLISIONS_alloc_and_free(trim(COLL_grp%name),"v3","alloc",1)
+         call COLLISIONS_alloc_and_free(trim(COLL_grp%name),"table","alloc",0)
        endif
      endif
      !
-   end subroutine
+   end subroutine CHECK_and_ALLOC
    !
-end subroutine
+end subroutine COLLISIONS_eval
+!
+!
+subroutine set_BS_blks(Xk)
+ !
+ use pars,         ONLY:cZERO
+ use BS,           ONLY:BS_K_dim,n_BS_blks,n_BS_blks_min,BS_blk,BS_res_ares_n_mat, &
+&                       BSK_IO_mode,l_BSE_kernel_full
+ use R_lattice,    ONLY:bz_samp
+ use y_memory_alloc
+ !
+ implicit none
+ !
+ type(bz_samp), intent(in) :: Xk
+ !
+ integer   :: iB
+ !
+ BS_res_ares_n_mat=1
+ n_BS_blks=BS_K_dim(1)*2
+ n_BS_blks_min=n_BS_blks
+ !
+ allocate(BS_blk(n_BS_blks))
+ BS_blk(:)%size(1)=1
+ ! dual part
+ BS_blk(:)%zise(2)=1
+ !
+ do iB=1,BS_K_dim(1)
+   BS_blk(iB            )%coordinate(1)=iB
+   BS_blk(iB+BS_K_dim(1))%coordinate(1)=iB
+   ! dual part
+   BS_blk(iB            )%poordinate(1)=1
+   BS_blk(iB+BS_K_dim(1))%poordinate(1)=1
+   ! mode
+   BS_blk(iB            )%mode="R"
+   BS_blk(iB+BS_K_dim(1))%mode="C"
+ enddo
+ !
+ if (l_BSE_kernel_full) then
+   BS_blk(:)%size(2)=BS_K_dim(1)
+   BS_blk(:)%coordinate(2)=1
+ else
+   do iB=1,BS_K_dim(1)
+     BS_blk(iB            )%size(2)=BS_K_dim(1)-iB+1
+     BS_blk(iB+BS_K_dim(1))%size(2)=BS_K_dim(1)-iB+1
+     BS_blk(iB            )%coordinate(2)=iB
+     BS_blk(iB+BS_K_dim(1))%coordinate(2)=iB
+     ! dual part
+     BS_blk(iB            )%zise(1)=BS_K_dim(1)-BS_blk(iB)%size(2)
+     BS_blk(iB+BS_K_dim(1))%zise(1)=BS_K_dim(1)-BS_blk(iB)%size(2)
+     BS_blk(iB            )%poordinate(2)=iB
+     BS_blk(iB+BS_K_dim(1))%poordinate(2)=iB
+   enddo
+ endif
+ !
+end subroutine set_BS_blks
+!
+!
+subroutine check_BS_parameters_consistency(Xk,BS_nk_bz,BS_nk_ibz)
+ !
+ use R_lattice,       ONLY:bz_samp
+ use D_lattice,       ONLY:nsym
+ use QP_m,            ONLY:QP_ng_SH,QP_ng_Sx,QP_ng_Sc
+ use BS,              ONLY:BS_blk,BS_n_g_W,BS_n_g_exch,BS_K_dim,BS_bands
+ use collision_ext,   ONLY:COLL_bands
+ !
+ implicit none
+ !
+ type(bz_samp), intent(in) :: Xk
+ integer,       intent(in) :: BS_nk_bz,BS_nk_ibz
+ !
+ integer :: i_H
+ !
+ if ( Xk%nbz/=BS_nk_bz ) &
+ & call error(" Different number of k-points between bse kernel and SAVE folder")
+ !
+ if ( Xk%nibz/=BS_nk_ibz .and. BS_nk_ibz/=0 ) then
+  ! This will lead to wrong results due to phases issues. Indeed the k-pts
+  ! outside the new IBZ maybe reached by a new sym, if the original one
+  ! is not available anymore
+  if ( nsym>1  ) call error("nsym>1 and BSE kernel generated with more syms then SAVE")
+  ! This works only if the SAVE without symmetries has been generated by
+  ! directly expanding the SAVE used of the BSE run
+  if ( nsym==1 ) call warning("nsym=1 and BSE kernel generated with syms")
+ endif
+ !
+ if (BS_n_g_exch/=QP_ng_SH) &
+ & call error(" Different cutoff between coll(hartree) and bse(exch)")
+ !
+ if (BS_n_g_W   /=QP_ng_Sx) &
+ & call error(" Different cutoff between coll(fock) and bse(direct)")
+ !
+ if (BS_n_g_W   /=QP_ng_Sc) &
+ & call error(" Different cutoff between coll(corr) and bse(direct)")
+ !
+ if ( any(BS_bands/=COLL_bands) ) then
+   if ( BS_bands(1)>COLL_bands(1) .or. BS_bands(2)<COLL_bands(2) ) then
+     call error(" Too few bands in BSE kernel")
+   else
+     call warning(" More bands than needed in BSE kernel")
+   endif
+ endif
+ !
+end subroutine check_BS_parameters_consistency
+!
+!
+subroutine build_inverse_BS_eh_table(Xk,ID_mat)!,BS_nk)
+ !
+ use electrons,       ONLY:n_sp_pol
+ use R_lattice,       ONLY:bz_samp
+ use BS,              ONLY:BS_bands,BS_K_dim,BS_blk
+ use BS_solvers,      ONLY:BSS_eh_table_m1
+ !
+ implicit none
+ !
+ type(bz_samp), intent(in) :: Xk
+ integer, intent(in)       :: ID_mat
+ !integer, intent(out)      :: BS_nk
+ !
+ integer :: i_res_ares,i_H,ik_bz,iv,ic,i_sp_c,i_sp_v
+ !
+ allocate(BS_blk(1)%table(5,BS_K_dim(1)))
+ allocate(BS_blk(2)%table(5,BS_K_dim(1)))
+ !
+ call read_BS_table(1,ID_mat)
+ !
+ !BS_nk=-1
+ !do i_res_ares=1,2
+ !do i_H=1,BS_K_dim(1)
+ !  BS_nk = max(BS_blk(i_res_ares)%table(1,i_H),BS_nk)
+ !enddo
+ !enddo
+ !
+ allocate(BSS_eh_table_m1(Xk%nbz,BS_bands(1):BS_bands(2),BS_bands(1):BS_bands(2),n_sp_pol,n_sp_pol))
+ BSS_eh_table_m1=0
+ !call live_timing('BSE inverse eh-table',BS_H_dim)
+ !
+ do i_res_ares=1,2
+ do i_H=1,BS_K_dim(1)
+   !
+   ik_bz    = BS_blk(i_res_ares)%table(1,i_H)
+   iv       = BS_blk(i_res_ares)%table(2,i_H)
+   ic       = BS_blk(i_res_ares)%table(3,i_H)
+   i_sp_c   = BS_blk(i_res_ares)%table(4,i_H)
+   i_sp_v   = BS_blk(i_res_ares)%table(5,i_H)
+   !
+   BSS_eh_table_m1(ik_bz,iv,ic,i_sp_c,i_sp_v)=i_H+(i_res_ares-1)*BS_K_dim(1)
+   !
+   !call live_timing(steps=1)
+   !
+ enddo
+ enddo
+ !call live_timing( )
+ !
+ deallocate(BS_blk(1)%table)
+ deallocate(BS_blk(2)%table)
+ !
+end subroutine build_inverse_BS_eh_table
+!
+!
+subroutine build_BS_kpt_map(Xk)
+ !
+ use pars,           ONLY:SP
+ use BS,             ONLY:BS_K_io_map,BS_kpt_bz
+ use R_lattice,      ONLY:bz_samp
+ use vec_operate,    ONLY:v_is_zero,rlu_v_is_zero,c2a
+ use zeros,          ONLY:k_rlu_zero
+ !
+ implicit none
+ !
+ type(bz_samp), intent(in) :: Xk
+ !
+ integer  :: ik1,ik2
+ real(SP) :: tmp_kpt(3)
+ !
+ allocate(BS_K_io_map(Xk%nbz))
+ BS_K_io_map=0
+ !
+ call k_ibz2bz(Xk,'i',.false.)
+ do ik1=1,Xk%nbz
+   do ik2=1,Xk%nbz
+     call c2a(v_in=Xk%ptbz(ik1,:)-BS_kpt_bz(:,ik2),v_out=tmp_kpt,mode="ki2a")
+     if (.not.rlu_v_is_zero(tmp_kpt,zero_=k_rlu_zero)) cycle
+     BS_K_io_map(ik1)=ik2
+     exit
+   enddo
+ enddo
+ call k_ibz2bz(Xk,'d',.false.)
+ !
+end subroutine build_BS_kpt_map
diff --git a/src/collisions/COLLISIONS_linearize_and_IO.F b/src/collisions/COLLISIONS_linearize_and_IO.F
index 6a9c51171d..0ead5a424d 100644
--- a/src/collisions/COLLISIONS_linearize_and_IO.F
+++ b/src/collisions/COLLISIONS_linearize_and_IO.F
@@ -3,14 +3,20 @@
 !
 ! Copyright (C) 2013 The Yambo Team
 !
-! Authors (see AUTHORS file for details): CA AM
+! Authors (see AUTHORS file for details): CA AM DS
 !
-subroutine COLLISIONS_linearize_and_IO(what,i_coll,last_coll,COLL,COLL_element,COLL_ID,COMM)
+! headers
+!
+#include<collisions.h>
+!
+subroutine COLLISIONS_linearize_and_IO(what,i_coll,last_coll,COLL_grp,COLL_element,COLL_ID,COMM)
  !
  use pars,            ONLY:SP,cZERO
  use parallel_m,      ONLY:master_cpu,yMPI_comm
  use parallel_int,    ONLY:PP_wait
- use collision_ext,   ONLY:COLLISIONS_element,COLLISIONS_cutoff,COLLISIONS_group
+ use electrons,       ONLY:n_met_bands,n_full_bands,n_sp_pol
+ use collision_ext,   ONLY:COLLISIONS_element,COLLISIONS_cutoff,COLLISIONS_group,&
+&                          COLLISIONS_CV_only,COLL_bands_frozen
  use IO_int,          ONLY:io_control
  use IO_m,            ONLY:REP,OP_APP_CL,OP_WR_CL,OP_APP,WR,WR_CL,io_COLLs
  use COLL_interfaces, ONLY:io_COLLISIONS
@@ -18,20 +24,23 @@ subroutine COLLISIONS_linearize_and_IO(what,i_coll,last_coll,COLL,COLL_element,C
  !
  implicit none                                              
  !
-#include<collisions.h>
  !
  character(*)                            :: what
  integer,                  intent(in)    :: i_coll,last_coll
  type(yMPI_comm),          intent(in)    :: COMM
  integer,                  intent(inout) :: COLL_ID
- type(COLLISIONS_group),   intent(inout) :: COLL
- type(COLLISIONS_element), intent(inout) :: COLL_element(COLL%N)
+ type(COLLISIONS_group),   intent(inout) :: COLL_grp
+ type(COLLISIONS_element), intent(inout) :: COLL_element(COLL_grp%N)
  !
  ! Work Space
  !
- integer   :: ic,i1,i2,i3,i_cycle,io_err,i_ref
+ integer   :: ic,i1,i2,i3,i4,i_cycle,io_err,i_ref
  logical   :: store_it,l_HXC,l_GW_NEQ,l_QED_P,l_COH
  real(SP)  :: COLLISIONS_max
+ ! DEBUG <
+ !character(1):: str
+ !complex(SP) :: Mij
+ ! DEBUG >
  !
 #if defined _PAR_IO
  integer, external :: io_COLLs_header
@@ -42,7 +51,7 @@ subroutine COLLISIONS_linearize_and_IO(what,i_coll,last_coll,COLL,COLL_element,C
      if(master_cpu) then
        !Only the master writes the header
        call io_control(ACTION=OP_WR_CL,COM=REP,SEC=(/1/),ID=COLL_ID)
-       io_err=io_COLLs_header(COLL_ID,COLL)
+       io_err=io_COLLs_header(COLL_ID,COLL_grp)
      endif
      call PP_wait()
 #if defined _PAR_IO
@@ -51,7 +60,7 @@ subroutine COLLISIONS_linearize_and_IO(what,i_coll,last_coll,COLL,COLL_element,C
      !However, since there is a partial redux of the collisions, only few in practice are writing
      !Setting DO_IT=.true. would also work. However some info would be written twice in the same place
      call io_control(ACTION=OP_APP,COM=REP,SEC=(/1,2/),ID=COLL_ID,COMM=COMM)
-     io_err=io_COLLISIONS(COLL_ID,COLL,COLL_element=COLL_element(1),i_coll=i_coll)
+     io_err=io_COLLISIONS(COLL_ID,COLL_grp,COLL_element=COLL_element(1),i_coll=i_coll)
 #endif
    endif
    call PP_wait()
@@ -66,50 +75,73 @@ subroutine COLLISIONS_linearize_and_IO(what,i_coll,last_coll,COLL,COLL_element,C
  l_QED_P    = index(what,"Momentum")>0
  !
  i_ref=1
- if (.not.io_COLLs) i_ref=COLL%PAR_map(i_coll)
+ if (.not.io_COLLs) i_ref=COLL_grp%PAR_map(i_coll)
  !
  ! Reference value for the reduction of the tiny elements of COHSEX/HF collision
  ! integrals.
  !
- if ( l_HXC .or. l_COH ) COLLISIONS_max=maxval(abs(COLL_element(1)%v3_c(:,:,:)))        
+ if ( l_HXC .or. l_COH ) COLLISIONS_max=maxval(abs(COLL_element(1)%v3_c(:,:,:,:)))        
  !
- COLL_element(i_ref)%table(:,:,:)='f'
+ COLL_element(i_ref)%table(:,:,:,:)='f'
  store_it=.true.
  !
  do i_cycle=1,2
    !
    ic=0
    !
-   do i3=COLL%D(3,1),COLL%D(3,2)
-     do i2=COLL%D(2,1),COLL%D(2,2)
-       do i1=COLL%D(1,1),COLL%D(1,2)
+   do i4=1,n_sp_pol                            ! i_sp_pol
+   do i3=COLL_grp%D(3,1),COLL_grp%D(3,2)       ! ik
+     do i2=COLL_grp%D(2,1),COLL_grp%D(2,2)     ! ib1
+       if ( COLL_bands_frozen(i2)==1 ) cycle
+       do i1=COLL_grp%D(1,1),COLL_grp%D(1,2)   ! ib2
+         !
+         if ( COLLISIONS_CV_only .and. (i1> n_met_bands(i4)  .and. i2> n_met_bands(i4) ) ) cycle
+         if ( COLLISIONS_CV_only .and. (i1<=n_full_bands(i4) .and. i2<=n_full_bands(i4)) ) cycle
+         !
+         if ( COLL_bands_frozen(i1)==1 ) cycle
          !
          ! In the COHSEX/HF case a cutoff can be applied
          !
          if (l_HXC.or.l_COH) then
-           store_it=abs(COLL_element(1)%v3_c(i1,i2,i3))>=COLLISIONS_max*COLLISIONS_cutoff .or. COLLISIONS_cutoff<0.0
+           store_it=abs(COLL_element(1)%v3_c(i1,i2,i3,i4))>=COLLISIONS_max*COLLISIONS_cutoff .or. COLLISIONS_cutoff<0.0
          endif
-#if defined _PAR_IO
-         if (.not.store_it) then
-           COLL_element(1)%v3_c(i1,i2,i3)=cZERO
+         if ((.not.store_it).and.io_COLLs) then
+           COLL_element(1)%v3_c(i1,i2,i3,i4)=cZERO
            store_it=.true.
          endif
-#endif
          !
-         if (store_it) then
-           ic=ic+1
-           if (i_cycle==2) then
-             !
-             COLL_element(i_ref)%table(i1,i2,i3)='t'
-             if (l_HXC   .or.l_COH   ) COLL_element(i_ref)%v_c(ic) = COLL_element(1)%v3_c(i1,i2,i3)
-             if (l_GW_NEQ.or.l_QED_P ) COLL_element(i_ref)%v_r(ic) = COLL_element(1)%v3_r(i1,i2,i3)
-             !
-           endif
+         if (.not.store_it) cycle
+         ic=ic+1
+         !
+         if (i_cycle==2) then
+           !
+           COLL_element(i_ref)%table(i1,i2,i3,i4)='t'
+           if (l_HXC   .or.l_COH   ) COLL_element(i_ref)%v_c(ic) = COLL_element(1)%v3_c(i1,i2,i3,i4)
+           if (l_GW_NEQ.or.l_QED_P ) COLL_element(i_ref)%v_r(ic) = COLL_element(1)%v3_r(i1,i2,i3,i4)
+           !
+           ! DEBUG <
+           !Mij=COLL_element(1)%v3_c(i1,i2,i3,i4)
+           !if (abs( real(Mij))<1.e-10) Mij=cmplx(0._SP,aimag(Mij),kind=SP)
+           !if (abs(aimag(Mij))<1.e-10) Mij=cmplx(real(Mij),0._SP,kind=SP)
+           !str="N"
+           !if (COLL_grp%state(i_coll,1)<COLL_grp%state(i_coll,2) .and. i1<i2 ) str="R"
+           !if (COLL_grp%state(i_coll,1)>COLL_grp%state(i_coll,2) .and. i1<i2 ) str="Q"
+           !if (COLL_grp%state(i_coll,1)<COLL_grp%state(i_coll,2) .and. i1>i2 ) str="C"
+           !if (COLL_grp%state(i_coll,1)>COLL_grp%state(i_coll,2) .and. i1>i2 ) str="A"
+           !!
+           !              write(100,*) COLL_grp%state(i_coll,1:3),i1,i2,i3,i4,str,real(Mij),aimag(Mij)
+           !if (str=="R") write(101,*) COLL_grp%state(i_coll,1:3),i1,i2,i3,i4,str,real(Mij),aimag(Mij)
+           !if (str=="C") write(102,*) COLL_grp%state(i_coll,1:3),i1,i2,i3,i4,str,real(Mij),aimag(Mij)
+           !if (str=="Q") write(103,*) COLL_grp%state(i_coll,1:3),i1,i2,i3,i4,str,real(Mij),aimag(Mij)
+           !if (str=="A") write(104,*) COLL_grp%state(i_coll,1:3),i1,i2,i3,i4,str,real(Mij),aimag(Mij)
+           ! DEBUG >
+           !
          endif
          !
        enddo
      enddo
    enddo
+   enddo
    !
    COLL_element(i_ref)%N=ic
    !
@@ -119,22 +151,16 @@ subroutine COLLISIONS_linearize_and_IO(what,i_coll,last_coll,COLL,COLL_element,C
  !
  if (IO_colls) then
    !
-#if defined _PAR_IO
    call io_control(ACTION=WR,SEC=(/3,4/),ID=COLL_ID)
-#else
-   call io_control(ACTION=OP_APP_CL,SEC=(/2,3/),ID=COLL_ID)
-#endif
-   io_err=io_COLLISIONS(COLL_ID,COLL,COLL_element=COLL_element(1),i_coll=i_coll)
+   io_err=io_COLLISIONS(COLL_ID,COLL_grp,COLL_element=COLL_element(1),i_coll=i_coll)
    !
    call COLLISIONS_alloc_and_free(what,"v1","clean",1)
    !
-#if defined _PAR_IO
    if(i_coll==last_coll) then
      call PP_wait()
      call io_control(ACTION=WR_CL,SEC=(/5/),ID=COLL_ID)
-     io_err=io_COLLISIONS(COLL_ID,COLL,COLL_element=COLL_element(1),i_coll=i_coll)
+     io_err=io_COLLISIONS(COLL_ID,COLL_grp,COLL_element=COLL_element(1),i_coll=i_coll)
    endif
-#endif
    !
  endif
  !
diff --git a/src/collisions/COLLISIONS_load.F b/src/collisions/COLLISIONS_load.F
index d00322cae5..93a7758f5e 100644
--- a/src/collisions/COLLISIONS_load.F
+++ b/src/collisions/COLLISIONS_load.F
@@ -5,6 +5,13 @@
 !
 ! Authors (see AUTHORS file for details): AM CA DS
 !
+! headers
+!
+#include<collisions.h>
+! Inside collisions.h, io_COLLs_header is defined as either
+! - io_COLLISIONS        (serial I/O) or
+! - io_COLLISIONS_header (parallel I/O case)
+!
 subroutine COLLISIONS_load(CHECK_EXISTENCE_ONLY)
  !
  ! Load all collisions contribution from any kind of self-energy 
@@ -16,8 +23,10 @@ subroutine COLLISIONS_load(CHECK_EXISTENCE_ONLY)
  use LIVE_t,          ONLY:live_timing
  use QP_m,            ONLY:QP_QED_ng
  use R_lattice,       ONLY:nqbz
+ use electrons,       ONLY:n_sp_pol
  use plasma,          ONLY:N_plasma_poles_global
  use hamiltonian,     ONLY:H_kind
+ use xc_functionals,  ONLY:GS_xc_kind
  use global_XC,       ONLY:H_SE_FOCK,H_SE_COH,H_SE_SEX,QP_SE_GW_NEQ,H_SE_HARTREE
  use collision_ext,   ONLY:HXC_COLL_element,COH_COLL_element,COLLISIONS_have_HARTREE,&
 &                          COH_collisions,HXC_collisions,COLLISIONS_group,COLLISIONS_naming, &
@@ -35,9 +44,6 @@ subroutine COLLISIONS_load(CHECK_EXISTENCE_ONLY)
  use collision_ext,   ONLY:P_COLL_element,COLL_bands,P_collisions,GW_NEQ_collisions,&
 &                          GW_NEQ_COLL_element
 #endif
- !
- ! Define io_COLLs_header to handle either serial or parallel I/O
-#include<collisions.h>
  !
  implicit none
  !
@@ -168,13 +174,14 @@ subroutine local_IO( COLL )
      !
      type(COLLISIONS_group) :: COLL
      integer :: iqbz,i_J,ic,i_coll,&
-&               i_coll_mem,i_q_mem,i_J_mem,i_b,i_g
+&               i_coll_mem,i_q_mem,i_J_mem,i_b,i_g,i_sp
      logical :: l_COH,l_HXC,l_NGW,l_QED,l_NGW_init,l_QED_init
      !
      call live_timing(trim(COLL%name)//' collisions I/O',COLL%PAR_N)
      !
      l_COH=(COLL%kind==H_SE_COH)
-     l_HXC=(COLL%kind==H_SE_FOCK.or.COLL%kind==H_SE_SEX.or.COLL%kind>=H_SE_HARTREE)
+     l_HXC=(COLL%kind==H_SE_FOCK.or.COLL%kind==H_SE_SEX.or.&
+     &      COLL%kind==GS_xc_KIND.or.COLL%kind>=H_SE_HARTREE)
 #if defined _QED
      l_QED=(COLL%kind==QP_SE_GW_QED)
 #endif
@@ -285,6 +292,7 @@ subroutine local_IO( COLL )
          !
          ic=0
          !
+         do i_sp=1,n_sp_pol
          do iqbz=1,nqbz
            !
            i_q_mem=PAR_Q_bz_index(iqbz)
@@ -299,18 +307,19 @@ subroutine local_IO( COLL )
                ic=ic+1
                if (.not.PAR_IND_Plasma%element_1D(i_J)) cycle
                i_J_mem  =PAR_PLASMA_index(i_J)
-               GW_NEQ_COLL_element(i_coll_mem)%v3_r(i_J_mem,1,i_q_mem)=GW_NEQ_COLL_element(1)%v_r(ic)
+               GW_NEQ_COLL_element(i_coll_mem)%v3_r(i_J_mem,1,i_q_mem,i_sp)=GW_NEQ_COLL_element(1)%v_r(ic)
              enddo
            else
              do i_b=COLL_bands(1),COLL_bands(2)
                do i_g=1,QP_QED_ng
                  ic=ic+1
-                 P_COLL_element(i_coll_mem)%v3_r(i_g,i_b,iqbz)=P_COLL_element(1)%v_r(ic)
+                 P_COLL_element(i_coll_mem)%v3_r(i_g,i_b,iqbz,i_sp)=P_COLL_element(1)%v_r(ic)
                enddo
              enddo
            endif
            !
          enddo
+         enddo
        endif
 #endif
        !
diff --git a/src/collisions/COLLISIONS_map_to_QP_table.F b/src/collisions/COLLISIONS_map_to_QP_table.F
index fb00938725..3f19a897d4 100644
--- a/src/collisions/COLLISIONS_map_to_QP_table.F
+++ b/src/collisions/COLLISIONS_map_to_QP_table.F
@@ -5,14 +5,19 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine COLLISIONS_map_to_QP_table(what,COLL)
  !
  use collision_ext, ONLY:COLLISIONS_group,COLL_bands
  use QP_m,          ONLY:QP_nk,QP_nb,QP_table,QP_n_states
  use electrons,     ONLY:n_sp_pol
  use R_lattice,     ONLY:nXkibz
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  character(*)                            :: what
  type(COLLISIONS_group),   intent(inout) :: COLL
diff --git a/src/collisions/LSEX_potential.F b/src/collisions/LSEX_potential.F
index e1dbf75cb8..c9f2a2f825 100644
--- a/src/collisions/LSEX_potential.F
+++ b/src/collisions/LSEX_potential.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AC
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine LSEX_potential(q,X,Xw) 
  !
  ! Construct the screeend potential W(G=G'=0,q)
@@ -28,8 +32,9 @@ subroutine LSEX_potential(q,X,Xw)
  use collision_ext,       ONLY:ng_oscll
  use stderr,              ONLY:intc
  use parallel_int,        ONLY:PP_wait
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp), intent(in) :: q
  type(w_samp)              :: Xw
@@ -60,15 +65,15 @@ subroutine LSEX_potential(q,X,Xw)
  if(Correlation==LSEX) then
    call io_control(ACTION=OP_RD,COM=REP,SEC=(/1/),ID=ID)
    io_err=io_X(X,Xw,ID)
-   if (io_err<0) call error('Incomplete and/or broken tatic diel. fun. database')
+   if (io_err<0) call error('Incomplete and/or broken static diel. fun. database')
  endif
  !
  YAMBO_ALLOC(W,(q%nbz,ng_oscll,ng_oscll))
  iqref=0
  !
-!$OMP WORKSHARE 
+!$OMP PARALLEL WORKSHARE 
  W=cZERO
-!$OMP END WORKSHARE 
+!$OMP END PARALLEL WORKSHARE 
  ! 
  do iqbz=1,q%nbz
    !
@@ -80,11 +85,9 @@ subroutine LSEX_potential(q,X,Xw)
    !
    ! The bare exchange v
    !
-!$OMP WORKSHARE 
-   forall(ig1=1:X%ng)
+   do ig1=1,X%ng
      W(iqbz,ig1,ig1)=-4._SP/spin_occ*pi*isc%gamp(ig1,ig1)
-   end forall
-!$OMP END WORKSHARE 
+   enddo
    !
    if(Correlation==LSEX) then
      !      
@@ -103,11 +106,9 @@ subroutine LSEX_potential(q,X,Xw)
      !
      !  Screened part of the exchange  \tilde W = W - v
      !
-!$OMP WORKSHARE 
      forall(ig1=1:X%ng,ig2=1:X%ng)
-         W(iqbz,ig1,ig2)=W(iqbz,ig1,ig2)-4._SP/spin_occ*pi*X_mat(ig1,ig2,1)*isc%gamp(ig1,ig2)
+       W(iqbz,ig1,ig2)=W(iqbz,ig1,ig2)-4._SP/spin_occ*pi*X_mat(ig1,ig2,1)*isc%gamp(ig1,ig2)
      end forall
-!$OMP END WORKSHARE 
      !
    endif
    !
diff --git a/src/collisions/OSCLL_compose_collision.F b/src/collisions/OSCLL_compose_collision.F
index 70ef858e7e..8bdf6735c7 100644
--- a/src/collisions/OSCLL_compose_collision.F
+++ b/src/collisions/OSCLL_compose_collision.F
@@ -20,7 +20,7 @@
 ! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, 
 ! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
 !
-subroutine OSCLL_compose_collision(k,q,i_n,i_m,i_k,COLL) 
+subroutine OSCLL_compose_collision(k,q,i_n,i_m,i_k,i_sp,COLL) 
  !
  ! This subroutine use long-range part of the Screened exchange
  ! and the oscillators to reconstruct the collisions
@@ -30,6 +30,7 @@ subroutine OSCLL_compose_collision(k,q,i_n,i_m,i_k,COLL)
  use nl_optics,      ONLY:NL_bands,Correlation,LSEX,LHF
  use collision_ext,  ONLY:OSCLL,W,ng_oscll
  use QP_m,           ONLY:QP_nk
+ use electrons,      ONLY:n_sp_pol
  use R_lattice,      ONLY:bz_samp,qindx_S
  use parallel_m,     ONLY:PAR_Xk_ibz_index
  use wrapper,        ONLY:V_dot_V,M_by_V
@@ -42,7 +43,7 @@ subroutine OSCLL_compose_collision(k,q,i_n,i_m,i_k,COLL)
  type(bz_samp), intent(in)  :: k,q
  !
  ! Collision indexes
- integer,       intent(in)  :: i_n,i_m,i_k
+ integer,       intent(in)  :: i_n,i_m,i_k,i_sp
  !
  ! Work Space
  !
@@ -53,7 +54,7 @@ subroutine OSCLL_compose_collision(k,q,i_n,i_m,i_k,COLL)
  !
  ! Collisions output
  !
- complex(SP), intent(out) :: COLL(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),QP_nk)
+ complex(SP), intent(out) :: COLL(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),QP_nk,n_sp_pol)
  !
 #if defined _TIMING
  call timing('OSCLL compose',OPR='start')
@@ -74,23 +75,19 @@ subroutine OSCLL_compose_collision(k,q,i_n,i_m,i_k,COLL)
      !      
      do ig1=1,ng_oscll
        do ig2=1,ng_oscll
-!$OMP WORKSHARE	 
-	 forall(ibp=NL_bands(1):NL_bands(2),ib=NL_bands(1):NL_bands(2)) 
-           COLL(ib,ibp,i_kmq)=COLL(ib,ibp,i_kmq)+OSCLL(i_n,ib ,ik_mem,iqbz,ig2)*conjg(OSCLL(i_m,ibp,ik_mem,iqbz,ig1)) &
-& 			      *W(iqbz,ig2,ig1) 
+         forall (ibp=NL_bands(1):NL_bands(2),ib=NL_bands(1):NL_bands(2)) 
+           COLL(ib,ibp,i_kmq,i_sp)=COLL(ib,ibp,i_kmq,i_sp)+OSCLL(i_n,ib ,ik_mem,i_sp,iqbz,ig2)* &
+           &                            conjg(OSCLL(i_m,ibp,ik_mem,i_sp,iqbz,ig1))*W(iqbz,ig2,ig1) 
          end forall
-!$OMP END WORKSHARE 
        enddo
      enddo
      !
    elseif(Correlation==LHF) then
     do ig1=1,ng_oscll
-!$OMP WORKSHARE	 
-      forall(ibp=NL_bands(1):NL_bands(2),ib=NL_bands(1):NL_bands(2)) 
-        COLL(ib,ibp,i_kmq)=COLL(ib,ibp,i_kmq)+OSCLL(i_n,ib ,ik_mem,iqbz,ig1)*conjg(OSCLL(i_m,ibp,ik_mem,iqbz,ig1)) &
-& 			      *W(iqbz,ig1,ig1) 
+      forall (ibp=NL_bands(1):NL_bands(2),ib=NL_bands(1):NL_bands(2)) 
+        COLL(ib,ibp,i_kmq,i_sp)=COLL(ib,ibp,i_kmq,i_sp)+OSCLL(i_n,ib,ik_mem,i_sp,iqbz,ig1)* &
+        &                          conjg(OSCLL(i_m,ibp,ik_mem,i_sp,iqbz,ig1))*W(iqbz,ig1,ig1) 
       end forall
-!$OMP END WORKSHARE 
     enddo
    endif
    !
diff --git a/src/collisions/OSCLL_compose_nl.F b/src/collisions/OSCLL_compose_nl.F
index 5e952b6eca..499871c1b9 100644
--- a/src/collisions/OSCLL_compose_nl.F
+++ b/src/collisions/OSCLL_compose_nl.F
@@ -15,6 +15,7 @@ subroutine OSCLL_compose_nl(dG,H_nl_sc,k,q,E,i_k,i_sp_pol)
  use nl_optics,      ONLY:NL_bands,NL_nbands
  use collision_ext,  ONLY:ng_oscll,OSCLL,W
  use QP_m,           ONLY:QP_nk
+ use electrons,      ONLY:n_sp_pol
  use R_lattice,      ONLY:bz_samp,qindx_S,nXkibz
  use wrapper_omp,    ONLY:V_dot_V
  use parallel_m,     ONLY:PAR_Xk_ibz_index
@@ -28,11 +29,11 @@ subroutine OSCLL_compose_nl(dG,H_nl_sc,k,q,E,i_k,i_sp_pol)
  integer,     intent(in)    :: i_k,i_sp_pol
  type(bz_samp), intent(in)  :: k,q
  type(levels),  intent(in)  :: E
- complex(SP),  intent(in)   :: dG(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),QP_nk)
+ complex(SP),  intent(in)   :: dG(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),QP_nk,n_sp_pol)
  !
  ! Work Space
  !
- complex(SP) :: COLL(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),QP_nk)
+ complex(SP) :: COLL(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),QP_nk,n_sp_pol)
 #if defined _DOUBLE
  complex(SP), external :: ZDOTU
 #else
@@ -47,12 +48,12 @@ subroutine OSCLL_compose_nl(dG,H_nl_sc,k,q,E,i_k,i_sp_pol)
  do i_n=NL_bands(1),NL_bands(2)
    do i_m=i_n,NL_bands(2)
     !
-    call OSCLL_compose_collision(k,q,i_n,i_m,i_k,COLL)
+    call OSCLL_compose_collision(k,q,i_n,i_m,i_k,i_sp_pol,COLL)
     !
 #if defined _DOUBLE
-        H_nl_sc(i_n,i_m)=H_nl_sc(i_n,i_m)-cI*ZDOTU(NL_nbands**2*QP_nk,COLL,1,dG,1)
+        H_nl_sc(i_n,i_m)=H_nl_sc(i_n,i_m)-cI*ZDOTU(NL_nbands**2*QP_nk*n_sp_pol,COLL,1,dG,1)
 #else
-        H_nl_sc(i_n,i_m)=H_nl_sc(i_n,i_m)-cI*CDOTU(NL_nbands**2*QP_nk,COLL,1,dG,1)
+        H_nl_sc(i_n,i_m)=H_nl_sc(i_n,i_m)-cI*CDOTU(NL_nbands**2*QP_nk*n_sp_pol,COLL,1,dG,1)
 #endif
     !
    enddo
diff --git a/src/collisions/OSCLL_compose_vbands.F b/src/collisions/OSCLL_compose_vbands.F
index d83534cdf4..90be8f2242 100644
--- a/src/collisions/OSCLL_compose_vbands.F
+++ b/src/collisions/OSCLL_compose_vbands.F
@@ -75,26 +75,20 @@ subroutine OSCLL_compose_vbands(V_input,H_nl_sc,k,q,E,i_k,i_sp_pol)
  !
  ! Build Oscillators for the time-dependent valence bands
  !
-!$OMP WORKSHARE
-forall(iv=1:E%nbf(i_sp_pol),iqbz=1:q%nbz,i_n=NL_bands(1):NL_bands(2),ig1=1:ng_oscll)
-   OSCLL_val(iv,i_n,ig1,iqbz)=sum(OSCLL(i_n,:,ik_mem,iqbz,ig1)*V_kmq(NL_bands(1):,iv,iqbz))
- end forall
-!$OMP END WORKSHARE
+ forall (iv=1:E%nbf(i_sp_pol),iqbz=1:q%nbz,i_n=NL_bands(1):NL_bands(2),ig1=1:ng_oscll) &
+ & OSCLL_val(iv,i_n,ig1,iqbz)=sum(OSCLL(i_n,:,ik_mem,i_sp_pol,iqbz,ig1)*V_kmq(NL_bands(1):,iv,iqbz))
  !
  ! Remove equilibrium Sigma-sex
  !
-!$OMP WORKSHARE
+!$OMP PARALLEL WORKSHARE
  H_nl_sc=H_nl_sc-Sigma_SEX_EQ(:,:,i_sp_pol,ik_mem)
-!$OMP END WORKSHARE
+!$OMP END PARALLEL WORKSHARE
  !
  do i_n=NL_bands(1),NL_bands(2)
    do i_m=i_n,NL_bands(2)
     !
-!$OMP WORKSHARE
-    forall(iqbz=1:q%nbz,ig1=1:ng_oscll,ig2=1:ng_oscll)
-      OSCLL_sum(iqbz,ig1,ig2)=dot_product(OSCLL_val(:,i_m,ig2,iqbz),OSCLL_val(:,i_n,ig2,iqbz))
-    end forall
-!$OMP END WORKSHARE
+    forall (iqbz=1:q%nbz,ig1=1:ng_oscll,ig2=1:ng_oscll) &
+    &  OSCLL_sum(iqbz,ig1,ig2)=dot_product(OSCLL_val(:,i_m,ig2,iqbz),OSCLL_val(:,i_n,ig2,iqbz))
     !
 #if defined _DOUBLE
     H_nl_sc(i_n,i_m)=H_nl_sc(i_n,i_m)+spin_occ*ZDOTU(q%nbz*ng_oscll**2,OSCLL_sum,1,W,1)
diff --git a/src/collisions/OSCLL_eval.F b/src/collisions/OSCLL_eval.F
index da9a126a6e..b73619a115 100644
--- a/src/collisions/OSCLL_eval.F
+++ b/src/collisions/OSCLL_eval.F
@@ -5,6 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): CA
 !
+! headers
+!
+#include<dev_defs.h>
+#include<y_memory.h>
+!
 !> @brief Calculate oscillators  <\psi_k | exp[i*(q+G)] |\psi_{k+q} > for few G-vectors
 !
 !! @param[out]      OSCLL_k	              Oscillators for each k-point
@@ -33,9 +38,9 @@ subroutine OSCLL_eval(q,k,en)
  use parallel_int,        ONLY:PP_redux_wait,PP_wait
  use parallel_m,          ONLY:PAR_IND_Xk_ibz,PAR_COM_Xk_ibz_INDEX,master_cpu,PAR_IND_G_b, &
 &                              PAR_Xk_nibz,PAR_Xk_ibz_index,PAR_COM_Xk_ibz_A2A,PAR_COM_G_b_INDEX
+ use y_memory_alloc
  !
-#include<dev_defs.h>
-#include<y_memory.h>
+ implicit none
  !  
  type(bz_samp), intent(in) :: q,k
  type(levels),  intent(in) :: en
@@ -125,6 +130,7 @@ subroutine OSCLL_eval(q,k,en)
  !
  COLL_nbands=COLL_bands(2)-COLL_bands(1)+1
  !
+ do i_sp_pol=1,n_sp_pol
  do i_k=1,QP_nk
    !
    if (.not.PAR_IND_Xk_ibz%element_1D(i_k)) cycle
@@ -149,10 +155,8 @@ subroutine OSCLL_eval(q,k,en)
      i_kmq   = k%sstar(qindx_S(i_k,iqbz,1),1)
      i_kmq_s = k%sstar(qindx_S(i_k,iqbz,1),2)
      !
-     !  Spin-Polarization not supported!!
-     ! 
-     i_k_sp  = 1
-     i_kmq_sp= 1
+     i_k_sp  = i_sp_pol
+     i_kmq_sp= i_sp_pol
      !
      ! Load only two k-points 
      !	   
@@ -194,6 +198,7 @@ subroutine OSCLL_eval(q,k,en)
    call live_timing(steps=1)
    !
  enddo
+ enddo
  !
  call IO_and_Messaging_switch("RESTORE")
  !
diff --git a/src/collisions/OSCLL_load.F b/src/collisions/OSCLL_load.F
index 54848b8971..b30b2d5a2f 100644
--- a/src/collisions/OSCLL_load.F
+++ b/src/collisions/OSCLL_load.F
@@ -4,6 +4,10 @@
 ! Copyright (C) 2020 the YAMBO team
 !
 ! Authors (see AUTHORS file for details): CA
+!
+! headers
+!
+#include<y_memory.h>
 ! 
 subroutine OSCLL_load(q,k) 
  !
@@ -17,8 +21,9 @@ subroutine OSCLL_load(q,k)
  use IO_m,                ONLY:OP_RD,RD_CL_IF_END,VERIFY,REP,manage_action,deliver_IO_error_message
  use IO_int,              ONLY:io_control,IO_and_Messaging_switch
  use com,                 ONLY:msg
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp), intent(in) :: q,k
  !
diff --git a/src/collisions/PLASMA_parallel_setup.F b/src/collisions/PLASMA_parallel_setup.F
index ed943354af..54c927101e 100644
--- a/src/collisions/PLASMA_parallel_setup.F
+++ b/src/collisions/PLASMA_parallel_setup.F
@@ -5,14 +5,19 @@
 !
 ! Authors (see AUTHORS file for details): DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine PLASMA_parallel_setup(EH_gas)
  !
  use parallel_m,    ONLY:PAR_IND_Plasma,PAR_COM_PLASMA_INDEX,PAR_IND_PLASMA_ID,PAR_PLASMA_index,&
 &                        PAR_nPlasma
  use parallel_int,  ONLY:PARALLEL_live_message,PARALLEL_index
  use plasma,        ONLY:plasma_gas
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(plasma_gas) :: EH_gas
  !
diff --git a/src/collisions/PLASMA_tables_and_dimensions.F b/src/collisions/PLASMA_tables_and_dimensions.F
index a95886934a..bf01a00871 100644
--- a/src/collisions/PLASMA_tables_and_dimensions.F
+++ b/src/collisions/PLASMA_tables_and_dimensions.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): DS AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 integer function PLASMA_tables_and_dimensions(iqbz,E,k,q,poles,N)
  !
  use pars,          ONLY:SP
@@ -14,8 +18,9 @@ integer function PLASMA_tables_and_dimensions(iqbz,E,k,q,poles,N)
  use plasma,        ONLY:EH_gas
  use parallel_m,    ONLY:PAR_nQ_bz,PAR_Q_bz_index,PAR_IND_Q_bz
  use collision_ext, ONLY:GW_NEQ_collisions
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer          :: iqbz,N
  type(levels)     :: E 
diff --git a/src/common/.objects b/src/common/.objects
index abfa642cda..7f3e03a2b5 100644
--- a/src/common/.objects
+++ b/src/common/.objects
@@ -8,6 +8,7 @@ objs = eval_G_minus_G.o G_rot_grid.o G_index.o G_index_energy_factor.o eval_Grad
        Kramers_Kronig.o Convolve.o RIntegrate.o Laplace_transform.o \
        CIntegrate.o QPartilize.o Lorentzian_FT.o \
        OCCUPATIONS_Fermi.o OCCUPATIONS_Extend.o OCCUPATIONS_Gaps.o OCCUPATIONS_Extend_Double_Grid.o \
-       LEVELS_respect_degenerations.o LEVELS_mirror.o f_build_ext.o \
+       LEVELS_respect_degenerations.o OCCUPATIONS_Merge.o OCCUPATIONS_Quasi_Fermi.o \
+       LEVELS_mirror.o f_build_ext.o \
        crystal_lattice.o QP_state_print.o QP_state_group.o \
        spline_fit.o OBS_rotate.o $(RT_objects)
diff --git a/src/common/DOUBLE_project.dep b/src/common/DOUBLE_project.dep
index 4ef61512e2..8673e6055d 100644
--- a/src/common/DOUBLE_project.dep
+++ b/src/common/DOUBLE_project.dep
@@ -23,6 +23,8 @@
  OCCUPATIONS_Extend_Double_Grid.o
  OCCUPATIONS_Fermi.o
  OCCUPATIONS_Gaps.o
+ OCCUPATIONS_Merge.o
+ OCCUPATIONS_Quasi_Fermi.o
  QP_state_group.o
  QP_state_print.o
  QPartilize.o
@@ -30,7 +32,6 @@
  crystal_lattice.o
  eval_G_minus_G.o
  eval_Gradient.o
- eval_Gradient_cpu.o
  eval_Gradient_gpu.o
  f_build_ext.o
  spline_fit.o
diff --git a/src/common/FREQUENCIES_coarse_grid.F b/src/common/FREQUENCIES_coarse_grid.F
index 0ce5af4c83..033c03207a 100644
--- a/src/common/FREQUENCIES_coarse_grid.F
+++ b/src/common/FREQUENCIES_coarse_grid.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine FREQUENCIES_coarse_grid(title,bg_pt,npts,cg_percentual,in_pt,lterm)
  !
  ! Input
@@ -31,8 +35,9 @@ subroutine FREQUENCIES_coarse_grid(title,bg_pt,npts,cg_percentual,in_pt,lterm)
  use com,         ONLY:msg
  use frequency,   ONLY:bare_grid_N,coarse_grid_N,coarse_grid_Pt,ordered_grid_index,coarse_grid_index
  use vec_operate, ONLY:sort
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  character(*),intent(in) :: title
  integer,intent(in)      :: npts
@@ -78,8 +83,10 @@ subroutine FREQUENCIES_coarse_grid(title,bg_pt,npts,cg_percentual,in_pt,lterm)
    coarse_grid_N=npts
    bare_grid_N=1
    coarse_grid_Pt=bg_pt
-   forall ( i_bg=1:npts) ordered_grid_index(i_bg)=i_bg
-   forall ( i_bg=1:npts) coarse_grid_index(i_bg)=i_bg
+   do i_bg=1,npts
+     ordered_grid_index(i_bg)=i_bg
+     coarse_grid_index(i_bg) =i_bg
+   enddo
    return
  endif
  !
@@ -172,7 +179,9 @@ subroutine FREQUENCIES_coarse_grid(title,bg_pt,npts,cg_percentual,in_pt,lterm)
      i_vec(i_bg)=i_cg
    enddo
  enddo
- forall(i_bg=1:npts) coarse_grid_index(i_bg)=i_vec( ordered_grid_index(i_bg) )
+ do i_bg=1,npts
+   coarse_grid_index(i_bg)=i_vec( ordered_grid_index(i_bg) )
+ enddo
  YAMBO_FREE(i_vec)
  !
  if(.not.trim(title)=='COLL') then
diff --git a/src/common/FREQUENCIES_damping.F b/src/common/FREQUENCIES_damping.F
index 64ee46b415..3780bd8534 100644
--- a/src/common/FREQUENCIES_damping.F
+++ b/src/common/FREQUENCIES_damping.F
@@ -11,12 +11,14 @@ function FREQUENCIES_damping(W,E)
  use frequency, ONLY:w_samp
  implicit none
  !
- real(SP)      :: FREQUENCIES_damping,E,beta
+ real(SP)      :: FREQUENCIES_damping
+ real(SP),     intent(in)  :: E
  type(w_samp)  :: W
  ! 
  ! Work Space
  !
- real(SP):: e1,e2,d1,d2,E_
+ real(SP) :: e1,e2,d1,d2,E_
+ real(SP) :: beta
  !
  e1=W%er(1)
  e2=W%er(2)
diff --git a/src/common/FREQUENCIES_merge.F b/src/common/FREQUENCIES_merge.F
index 74bc9862cb..bd376d0620 100644
--- a/src/common/FREQUENCIES_merge.F
+++ b/src/common/FREQUENCIES_merge.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine FREQUENCIES_merge(W1,W2,table)
  !
  ! Merge W1 into W2 and create a table of the W1 points in the new merged type
@@ -13,7 +17,9 @@ subroutine FREQUENCIES_merge(W1,W2,table)
  !
  use pars,           ONLY:SP,cZERO
  use frequency,      ONLY:w_samp,W_reset
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  type(w_samp), intent(in)   :: W1
  type(w_samp), intent(inout):: W2
diff --git a/src/common/FREQUENCIES_reset.F b/src/common/FREQUENCIES_reset.F
index 8ce7936094..d88aed251b 100644
--- a/src/common/FREQUENCIES_reset.F
+++ b/src/common/FREQUENCIES_reset.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine FREQUENCIES_reset(Xw,what)
  !
  ! Input
@@ -18,7 +22,9 @@ subroutine FREQUENCIES_reset(Xw,what)
  !
  use frequency,   ONLY:w_samp,bare_grid_N,coarse_grid_Pt,ordered_grid_index,coarse_grid_index
  use X_m,         ONLY:X_poles_tab
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  type(w_samp)  :: Xw
  character(*)  :: what
diff --git a/src/common/FREQUENCIES_setup.F b/src/common/FREQUENCIES_setup.F
index 8b877b32a2..99ba7c5eed 100644
--- a/src/common/FREQUENCIES_setup.F
+++ b/src/common/FREQUENCIES_setup.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine FREQUENCIES_setup(W)
  !
  ! %er %dr %n_freqs -> %p
@@ -12,7 +16,9 @@ subroutine FREQUENCIES_setup(W)
  use pars,          ONLY:SP,cI
  use frequency,     ONLY:w_samp
  use parser_m,      ONLY:parser
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  type(w_samp):: W 
  ! 
@@ -68,7 +74,7 @@ subroutine FREQUENCIES_setup(W)
  !
  delta_w=(W%er(2)-W%er(1))/(W%n_freqs-1)
  !
- forall(i1=1:W%n_freqs-1) W%p(i1+1)=cmplx(W%er(1)+delta_w*real(i1,SP),0.,SP)
+ forall (i1=1:W%n_freqs-1) W%p(i1+1)=cmplx(W%er(1)+delta_w*real(i1,SP),0.,SP)
  !
  do i1=1,W%n_freqs
    W%p(i1)=real(W%p(i1))+FREQUENCIES_damping(W,real(W%p(i1)))*cI
diff --git a/src/common/G_index_energy_factor.F b/src/common/G_index_energy_factor.F
index d3923740d4..95ca8a7bf7 100644
--- a/src/common/G_index_energy_factor.F
+++ b/src/common/G_index_energy_factor.F
@@ -8,6 +8,7 @@
 integer function G_index_energy_factor(ig_in,factor)
  !
  use pars,         ONLY:SP
+ use stderr,       ONLY:real2ch
  use vec_operate,  ONLY:iku_v_norm
  use R_lattice,    ONLY:g_vec,ng_in_shell,n_g_shells,E_of_shell
  !
@@ -25,13 +26,15 @@ integer function G_index_energy_factor(ig_in,factor)
  ig_out=0
  do i_shell=2,n_g_shells
    delta1=(E_of_shell(i_shell)-E_of_shell(i_shell-1))/2._SP
-   delta2=(E_of_shell(i_shell+1)-E_of_shell(i_shell))/2._SP
+   if (i_shell< n_g_shells) delta2=(E_of_shell(i_shell+1)-E_of_shell(i_shell))/2._SP
+   if (i_shell==n_g_shells) delta2=delta1
    if( .not. ( (E_of_shell(i_shell)-delta1)<E_of_shell_g_out .and. E_of_shell_g_out<(E_of_shell(i_shell)+delta2) ) ) cycle
    ig_out=ng_in_shell(i_shell)
    exit
  enddo
  !
- if(ig_out==0) call error(" Requested g-index is above maximum g-vector")
+ if(ig_out==0) call error(" Requested g-index energy "//trim(real2ch(E_of_shell_g_out))//&
+ &                        " is above maximum energy "//trim(real2ch(E_of_shell(n_g_shells))))
  !
  G_index_energy_factor=ig_out
  !
diff --git a/src/common/G_rot_grid.F b/src/common/G_rot_grid.F
index 6cb176d914..9b2da9f175 100644
--- a/src/common/G_rot_grid.F
+++ b/src/common/G_rot_grid.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine G_rot_grid(is,call_kind)
  !
  use pars,         ONLY:SP
@@ -12,7 +16,9 @@ subroutine G_rot_grid(is,call_kind)
  use D_lattice,    ONLY:dl_sop,alat
  use zeros,        ONLY:G_iku_zero
  use vec_operate,  ONLY:v_is_zero
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  integer               :: is
  character(*)          :: call_kind
@@ -32,8 +38,12 @@ subroutine G_rot_grid(is,call_kind)
  !
  identity=reshape((/ 1._SP, 0._SP, 0._SP, 0._SP, 1._SP, 0._SP, 0._SP, 0._SP, 1._SP/),(/3,3/))
  !
- if(is>0) forall (i2=1:3,i3=1:3) rl_sop_local(i2,i3)=dl_sop(i2,i3,is)*alat(i2)/alat(i3)
- if(is<0) forall (i2=1:3,i3=1:3) rl_sop_local(i2,i3)=-identity(i2,i3)*alat(i2)/alat(i3)
+ if(is>0) then 
+   forall (i2=1:3,i3=1:3) rl_sop_local(i2,i3)=dl_sop(i2,i3,is)*alat(i2)/alat(i3)
+ endif
+ if(is<0) then
+   forall (i2=1:3,i3=1:3) rl_sop_local(i2,i3)=-identity(i2,i3)*alat(i2)/alat(i3)
+ endif
  !
  rl_sop_square=matmul(rl_sop_local,rl_sop_local)
  ! 
diff --git a/src/common/LEVELS_mirror.F b/src/common/LEVELS_mirror.F
index 053e6edf87..759764b4ed 100644
--- a/src/common/LEVELS_mirror.F
+++ b/src/common/LEVELS_mirror.F
@@ -16,17 +16,17 @@ subroutine LEVELS_mirror(what,E,X,Dip,l_USER_field)
  use stderr,        ONLY:STRING_same
  use QP_m,          ONLY:QP_n_G_bands
 #if defined _SC 
- use SC,            ONLY:SC_bands
+ use SC,            ONLY:SC_bands,SC_bands_frozen
 #endif
 #if defined _RT || defined _QED || defined _NL
- use real_time,     ONLY:RT_bands
+ use real_time,     ONLY:RT_bands,RT_bands_frozen
 #endif
 #if defined _SC || defined _RT || defined _QED || defined _NL
- use collision_ext, ONLY:COLL_bands
- use hamiltonian,   ONLY:H_ref_bands
+ use collision_ext, ONLY:COLL_bands,COLL_bands_frozen
+ use hamiltonian,   ONLY:H_ref_bands,H_ref_bands_frozen
 #endif
 #if defined _NL
- use nl_optics,      ONLY:NL_bands
+ use nl_optics,      ONLY:NL_bands,NL_bands_frozen
 #endif
  !
  implicit none
@@ -35,9 +35,10 @@ subroutine LEVELS_mirror(what,E,X,Dip,l_USER_field)
  type(levels)   ::E
  type(DIPOLE_t) ::Dip
  type(X_t)      ::X(5) 
- logical        ::l_USER_field(16)
+ logical        ::l_USER_field(16),dip_bands_defined
  !
- integer :: i1,SCATT_bands(2),DIP_bands(2)
+ integer :: i1,SCATT_bands(2),SCATT_hole_bands(2),DIP_bands(2)
+ logical :: SCATT_bands_frozen(E%nb)
  !
  if (STRING_same(what,"USER_fields")) then
    !
@@ -81,17 +82,24 @@ subroutine LEVELS_mirror(what,E,X,Dip,l_USER_field)
    ! Bands (Electronic levels)
    !---------------------------
    SCATT_bands=-1
+   SCATT_bands_frozen=.false.
    do i1=8,12
      if(.not.(l_USER_field(i1))) cycle
      !
 #if defined _RT || defined _QED || defined _NL
-     if(i1==8) SCATT_bands=RT_bands
+     if(i1==8) then
+       SCATT_bands=RT_bands
+       SCATT_bands_frozen=RT_bands_frozen
+     endif
 #endif
 #if defined _SC
      if(i1==9) SCATT_bands=SC_bands
 #endif
 #if defined _NL
-     if(i1==10) SCATT_bands=NL_bands
+     if(i1==10) then
+       SCATT_bands=NL_bands
+       SCATT_bands_frozen=NL_bands_frozen
+     endif
 #endif
 #if defined _SC 
      if(i1==11) then
@@ -102,7 +110,9 @@ subroutine LEVELS_mirror(what,E,X,Dip,l_USER_field)
 #if defined _RT || defined _QED || defined _NL
      if(i1==11) then
        RT_bands   =COLL_bands
+       RT_bands_frozen=COLL_bands_frozen
        SCATT_bands=COLL_bands
+       SCATT_bands_frozen=COLL_bands_frozen
      endif
 #endif
 #if defined _QED
@@ -113,32 +123,39 @@ subroutine LEVELS_mirror(what,E,X,Dip,l_USER_field)
    ! Bands (Dipoles)
    !----------------
    DIP_bands=-1
-   do i1=1,6
+   dip_bands_defined=.false.
+   ! Start from input if present
+   if( l_USER_field(6) )  call def_dip_bands(Dip%ib)
+   !
+   ! Test scatt bands
+   if( all((/SCATT_bands/=-1/) ) ) call def_dip_bands(SCATT_bands)
+   !
+   ! Test other fields
+   do i1=1,5
      if(.not.(l_USER_field(i1))) cycle
-     if(i1< 6) DIP_bands=X(i1)%ib
-     if(i1==6) DIP_bands=Dip%ib
+     call def_dip_bands(X(i1)%ib)
    enddo
-   if (all((/DIP_bands<0/))) then
-     if(     l_USER_field(7) ) DIP_bands=BS_bands
-     if(.not.l_USER_field(7) ) DIP_bands=SCATT_bands
+   !
+   if( l_USER_field(7) ) call def_dip_bands(BS_bands)
 #if defined _RT || defined _QED || defined _NL
-     if(     l_USER_field(8) ) DIP_bands=RT_bands
+   if( l_USER_field(8) ) call def_dip_bands(RT_bands)
 #endif
 #if defined _NL
-     if(     l_USER_field(10)) DIP_bands=NL_bands
+   if( l_USER_field(10)) call def_dip_bands(NL_bands)
 #endif
-   endif
    !
    ! Any negative bands?
    !---------------------
    call fix_me(SCATT_bands)
    call fix_me(DIP_bands)
    !
-   ! Now MIRROR in related bands ranges!
+   ! Now MIRROR in related bands ranges
    !------------------------------------
 #if defined _SC || defined _RT || defined _QED || defined _NL
    COLL_bands =SCATT_bands
+   COLL_bands_frozen=SCATT_bands_frozen
    H_ref_bands=SCATT_bands     
+   H_ref_bands_frozen=SCATT_bands_frozen
 #endif
    Dip%ib=DIP_bands
    !
@@ -154,4 +171,103 @@ subroutine fix_me(bands)
      if (bands(2)<0) bands(2)=E%nb
    end subroutine
    !
+   subroutine def_dip_bands(bands_in)
+       integer, intent(in)    :: bands_in(2)
+       if (dip_bands_defined) then
+          DIP_bands(1)=min(DIP_bands(1),bands_in(1))
+          DIP_bands(2)=max(DIP_bands(2),bands_in(2))
+       else
+          dip_bands_defined=.true.
+          DIP_bands=bands_in
+       endif
+   end subroutine def_dip_bands
+   !
 end subroutine
+!
+!
+subroutine LEVELS_frozen(en)
+ !
+ use electrons,     ONLY:levels
+ use BS,            ONLY:BS_bands,BS_bands_frozen,BS_bands_frozen_ch
+#if defined _SC 
+ use SC,            ONLY:SC_bands,SC_bands_frozen,SC_bands_frozen_ch
+#endif
+#if defined _RT || defined _QED || defined _NL
+ use real_time,     ONLY:RT_bands,RT_bands_frozen,RT_bands_frozen_ch
+#endif
+#if defined _SC || defined _RT || defined _QED || defined _NL
+ use collision_ext, ONLY:COLL_bands,COLL_bands_frozen,COLL_bands_frozen_ch
+ use hamiltonian,   ONLY:H_ref_bands,H_ref_bands_frozen
+#endif
+#if defined _NL
+ use nl_optics,      ONLY:NL_bands,NL_bands_frozen,NL_bands_frozen_ch
+#endif
+ !
+ implicit none
+ !
+ type(levels), intent(in) :: en          
+ !
+ if(allocated(BS_bands_frozen)) deallocate(BS_bands_frozen)
+ allocate(BS_bands_frozen(BS_bands(1):BS_bands(2)))
+ call LEVELS_frozen_local(BS_bands,BS_bands_frozen,BS_bands_frozen_ch,"BSE",BS_bands(2)-BS_bands(1)+1)
+ !
+#if defined _SC || defined _RT || defined _NL
+ if(allocated(H_ref_bands_frozen)) deallocate(H_ref_bands_frozen)
+ allocate(H_ref_bands_frozen(en%nb))
+ H_ref_bands_frozen=0
+ if(allocated(COLL_bands_frozen)) deallocate(COLL_bands_frozen)
+ allocate(COLL_bands_frozen(en%nb))
+ call LEVELS_frozen_local((/1,COLL_bands(2)/),COLL_bands_frozen,COLL_bands_frozen_ch,"COLL",en%nb)
+#endif
+#if defined _RT
+ if(allocated(RT_bands_frozen)) deallocate(RT_bands_frozen)
+ allocate(RT_bands_frozen(en%nb))
+ call LEVELS_frozen_local((/1,RT_bands(2)/),RT_bands_frozen,RT_bands_frozen_ch,"RT",en%nb)
+#endif
+#if defined _SC
+ if(allocated(SC_bands_frozen)) deallocate(SC_bands_frozen)
+ allocate(SC_bands_frozen(en%nb))
+ call LEVELS_frozen_local((/1,SC_bands(2)/),SC_bands_frozen,SC_bands_frozen_ch,"SC",en%nb)
+#endif
+#if defined _NL
+ if(allocated(NL_bands_frozen)) deallocate(NL_bands_frozen)
+ allocate(NL_bands_frozen(en%nb))
+ call LEVELS_frozen_local((/1,NL_bands(2)/),NL_bands_frozen,NL_bands_frozen_ch,"NL",en%nb)
+#endif
+ !
+end subroutine LEVELS_frozen
+!
+!
+subroutine LEVELS_frozen_local(TMP_bands,TMP_bands_frozen,TMP_bands_frozen_ch,TMP_kind,nb)
+ !
+ use pars,          ONLY:LP,schlen
+ use parser_m,      ONLY:parser
+ use stderr,        ONLY:STRING_split,intc
+ !
+ implicit none
+ !
+ integer,           intent(in)  :: TMP_bands(2),nb
+ character(schlen), intent(in)  :: TMP_bands_frozen_ch
+ character(*),      intent(in)  :: TMP_kind
+ integer(LP),       intent(out) :: TMP_bands_frozen(nb)
+ !
+ integer :: ib1,ib2
+ logical :: l_freeze_bands
+ character(schlen) :: TMP_bands_frozen_list(nb)
+ !
+ TMP_bands_frozen=0
+ !
+ call parser(trim(TMP_kind)//'FrozenBands', l_freeze_bands)
+ if (l_freeze_bands) then
+   call STRING_split(TMP_bands_frozen_ch,TMP_bands_frozen_list)
+   ib1=1
+   do ib2=TMP_bands(1),TMP_bands(2)
+     !write(*,*) ib1,ib2,trim(intc(ib2))," ",trim(TMP_bands_frozen_list(ib1)),&
+     !&                  trim(intc(ib2))==trim(TMP_bands_frozen_list(ib1))
+     if( .not. trim(intc(ib2))==trim(TMP_bands_frozen_list(ib1)) ) cycle
+     TMP_bands_frozen(ib2-TMP_bands(1)+1)=1
+     ib1=ib1+1
+   enddo
+ endif
+ !
+end subroutine LEVELS_frozen_local
diff --git a/src/common/LEVELS_respect_degenerations.F b/src/common/LEVELS_respect_degenerations.F
index daff75a6f3..ca519255af 100644
--- a/src/common/LEVELS_respect_degenerations.F
+++ b/src/common/LEVELS_respect_degenerations.F
@@ -7,10 +7,10 @@
 !
 subroutine LEVELS_respect_degenerations(E,Dip,X)
  !
- use electrons,     ONLY:levels
+ use electrons,     ONLY:levels,deg_threshold
  use stderr,        ONLY:intc
  use vec_operate,   ONLY:degeneration_finder
- use BS,            ONLY:BS_bands
+ use BS,            ONLY:BS_bands,BS_bands_frozen
  use X_m,           ONLY:X_t
  use DIPOLES,       ONLY:DIPOLE_t
  use QP_m,          ONLY:QP_n_G_bands
@@ -107,6 +107,7 @@ subroutine LEVELS_respect_degenerations(E,Dip,X)
  !
  ! MIRROR
  !--------
+ call LEVELS_frozen(E)
  call LEVELS_mirror("MIRROR_bands",E,X,Dip,l_USER_fields)
  !
  contains
@@ -128,7 +129,7 @@ subroutine levels_check(Ein,Bin)
    !
    do ik=1,nkibz
      !
-     call degeneration_finder(E%E(:n_bands,ik,1),n_bands,first_el,n_of_el,n_deg_grp,0.0001/HA2EV)
+     call degeneration_finder(n_bands,first_el,n_of_el,n_deg_grp,Er=E%E(:n_bands,ik,1),deg_accuracy=deg_threshold)
      !
      do i_g=1,n_deg_grp
        do i_c=1,2
diff --git a/src/common/Lorentzian_FT.F b/src/common/Lorentzian_FT.F
index a349da0e46..f60b332202 100644
--- a/src/common/Lorentzian_FT.F
+++ b/src/common/Lorentzian_FT.F
@@ -12,14 +12,18 @@ function Lorentzian_FT(W,pole,ordering)
  ! Fourier transform of the Lorentzian function
  ! theta(t)*e^{-i*pole*t} [this is the retarded version, other ordering are computed]
  !
- ! AM,01/08/16. Note that aimag(pole) is always negative while aimag(W)>0
+ ! AM,01/08/16. Note that aimag(pole)<0, while aimag(W)>0
  !
- ! DS,31/10/18. Note that the present structure uses the relation e_{n,k}=e_{n,-k},
+ ! DS,31/10/18. Note that the present structure, at finite q, uses the relation e_{n,k}=e_{n,-k},
  !              See Fetter-Walecka, p.158 from Eq.(12.29) to (12.30)
- !              Such relation is always true. However it
- !              implies that f_{n,k}=f_{n,-k} and rho_{n,k}=rho_{n,-k} (eventually with a star)
- !              when the occupations and the oscillators are used.
- !              Here "n" is a generalized index for the transition when speaking about oscillators
+ !              Such relation is not always true. See for example magnetic systems with SOC and no SI.
+ !              See also finite-q BSE where resonant and anti-resonant block are explicitly computed if needed
+ !              via the flag l_BS_res_from_ares
+ !
+ !              Moreover, as used in Xo, it implies
+ !              - f_{n,k}=f_{n,-k} (occupations)
+ !              - R_{nm,k}=R_{nm,-k} (residuals, eventually with a star)
+ !              For chi_rr, R_{nmk}=rho_{nm,k}
  !              There are two cases where this is not true:
  !              a) with SOC rho_{n,k}/=rho_{n,-k} if SI is not a symmetry of the system
  !              b) in NEQ f_{n,k}=f_{n,-k} may not be true
diff --git a/src/common/OBS_rotate.F b/src/common/OBS_rotate.F
index 6c5756ba97..e01ad7e0dd 100644
--- a/src/common/OBS_rotate.F
+++ b/src/common/OBS_rotate.F
@@ -37,7 +37,9 @@ subroutine OBS_rotate(R,O,N,direction)
  complex(SP) :: O_rot(N,N),R_(N,N)
  !
  if (direction ==  1) R_=R
- if (direction == -1) forall(i=1:N,j=1:N) R_(i,j)=conjg(R(j,i))
+ if (direction == -1) then 
+   forall (i=1:N,j=1:N) R_(i,j)=conjg(R(j,i))
+ endif
  !
  ! [1] O_rot = (R_^{\dagger}) O
  !
diff --git a/src/common/OCCUPATIONS_Extend.F b/src/common/OCCUPATIONS_Extend.F
index 631a5f1e98..bb3e8dca73 100644
--- a/src/common/OCCUPATIONS_Extend.F
+++ b/src/common/OCCUPATIONS_Extend.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine OCCUPATIONS_Extend(Xe,E,k)
  !
  ! The occupation of E levels are decided using the Fermi "Levels" of Xe.
@@ -20,8 +24,9 @@ subroutine OCCUPATIONS_Extend(Xe,E,k)
  use electrons,  ONLY:levels,n_sp_pol
  use R_lattice,  ONLY:bz_samp
  use interfaces, ONLY:OCCUPATIONS_Gaps
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)    ::Xe,E
  type(bz_samp)   ::k
diff --git a/src/common/OCCUPATIONS_Fermi.F b/src/common/OCCUPATIONS_Fermi.F
index bc1d6836c5..0cac67b854 100644
--- a/src/common/OCCUPATIONS_Fermi.F
+++ b/src/common/OCCUPATIONS_Fermi.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine OCCUPATIONS_Fermi(E,K,OBJ,mode,n_bands,impose_balance)
  !
  ! Mode = "FERMI" 
@@ -31,8 +35,9 @@ subroutine OCCUPATIONS_Fermi(E,K,OBJ,mode,n_bands,impose_balance)
 #if defined _SC
  use SC,            ONLY:SC_neq_kind,SC_mu,SC_occ
 #endif 
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)  ::E
  type(bz_samp) ::K
@@ -118,6 +123,8 @@ subroutine OCCUPATIONS_Fermi(E,K,OBJ,mode,n_bands,impose_balance)
  ! Start Loop
  ! 
  i_trials=1
+ ! Setting this to 1.E-4 is needed for CrI3,
+ ! however, it make Benzene tests fail
  nel_diff_zero=1.E-5
  Fermi_is_converged=.false.
  !
diff --git a/src/common/OCCUPATIONS_Gaps.F b/src/common/OCCUPATIONS_Gaps.F
index 25f9137220..4a5fca5a39 100644
--- a/src/common/OCCUPATIONS_Gaps.F
+++ b/src/common/OCCUPATIONS_Gaps.F
@@ -8,7 +8,8 @@
 subroutine OCCUPATIONS_Gaps(E,E_g_dir,E_g_ind,Nbf,Nbm,I_dir,E_k_dir,E_k_ind)
  !
  use pars,          ONLY:SP
- use electrons,     ONLY:levels,n_sp_pol,filled_tresh,spin_occ
+ use electrons,     ONLY:levels,n_sp_pol,filled_tresh,spin_occ,&
+&                         get_spin_majority
  !
  implicit none
  !
@@ -75,6 +76,9 @@ subroutine OCCUPATIONS_Gaps(E,E_g_dir,E_g_ind,Nbf,Nbm,I_dir,E_k_dir,E_k_ind)
  !
  E%nbf=Nbf_
  E%nbm=Nbm_
+ !
+ call get_spin_majority(E)
+ !
 #if defined _RT
  if(all(E%nbc/=0)) then
    do i_sp_pol=1,n_sp_pol
diff --git a/src/common/OCCUPATIONS_Merge.F b/src/common/OCCUPATIONS_Merge.F
new file mode 100644
index 0000000000..01a3a33381
--- /dev/null
+++ b/src/common/OCCUPATIONS_Merge.F
@@ -0,0 +1,83 @@
+!
+!        Copyright (C) 2000-2017 Claudio Attaccalite
+!
+! Authors (see AUTHORS file for details): CA
+! 
+! This file is distributed under the terms of the GNU 
+! General Public License. You can redistribute it and/or 
+! modify it under the terms of the GNU General Public 
+! License as published by the Free Software Foundation; 
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will 
+! be useful, but WITHOUT ANY WARRANTY; without even the 
+! implied warranty of MERCHANTABILITY or FITNESS FOR A 
+! PARTICULAR PURPOSE.  See the GNU General Public License 
+! for more details.
+!
+! You should have received a copy of the GNU General Public 
+! License along with this program; if not, write to the Free 
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, 
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+subroutine OCCUPATIONS_Merge(E,E_minus,E_plus,K)
+ !
+ use pars,          ONLY:SP,DP
+ use electrons,     ONLY:levels,default_nel
+ use com,           ONLY:msg
+ use R_lattice,     ONLY:bz_samp
+ !
+ implicit none
+ !
+ type(levels)  :: E,E_minus,E_plus
+ type(bz_samp) :: K
+ !
+ ! Work Space
+ !
+ integer :: nval
+ !
+ nval=nint(default_nel/2._SP) !nel/2._SP ! number of valence bands 
+ !
+ E%nbm=E_plus%nbm   ! from negative charged distribution
+ E%nbf=E_minus%nbf  ! from positive charged distribution
+ !
+ ! I do not change E_fermi so the top valence of the un-excited system
+ ! is set to zero
+ !
+ if(.not.allocated(E%f)) return 
+ !
+ E%f(:nval,:,:)  =E_minus%f(:nval,:,:)
+ E%f(nval+1:,:,:)=E_plus%f(nval+1:,:,:)
+ !
+ ! Check number of electorns
+ !
+ call msg("sr","Number of electrons in Quasi-Fermi: ",N_electrons(E,k))
+ !
+ contains 
+   !  
+   real(SP) function N_electrons(E_in,K_in)
+     !  
+     use electrons,     ONLY:n_sp_pol,spin_occ
+     !
+     implicit none
+     !
+     type(levels)  :: E_in
+     type(bz_samp) :: K_in
+     !
+     ! Work Space
+     !
+     integer  :: ib,ik,is
+     !
+     N_electrons=0._SP
+     !
+     do ib=1,E_in%nb
+       do ik=1,E_in%nk
+         do is=1,n_sp_pol
+           N_electrons=N_electrons+E_in%f(ib,ik,is)*K_in%weights(ik)
+         enddo
+       enddo
+     enddo
+     !
+   end function N_electrons
+ !
+end subroutine
diff --git a/src/common/OCCUPATIONS_Quasi_Fermi.F b/src/common/OCCUPATIONS_Quasi_Fermi.F
new file mode 100644
index 0000000000..a6d73835e0
--- /dev/null
+++ b/src/common/OCCUPATIONS_Quasi_Fermi.F
@@ -0,0 +1,120 @@
+!
+!        Copyright (C) 2000-2017 Claudio Attaccalite
+!
+! Authors (see AUTHORS file for details): CA
+! 
+! This file is distributed under the terms of the GNU 
+! General Public License. You can redistribute it and/or 
+! modify it under the terms of the GNU General Public 
+! License as published by the Free Software Foundation; 
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will 
+! be useful, but WITHOUT ANY WARRANTY; without even the 
+! implied warranty of MERCHANTABILITY or FITNESS FOR A 
+! PARTICULAR PURPOSE.  See the GNU General Public License 
+! for more details.
+!
+! You should have received a copy of the GNU General Public 
+! License along with this program; if not, write to the Free 
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, 
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+subroutine OCCUPATIONS_Quasi_Fermi(E,K,E_qf,E_fermi_h,E_fermi_e)
+ !
+ ! Mode = 0 
+ !  Update the Fermi energy
+ !  Output : E%E_fermi
+ !
+ ! Mode = 1 -> Mode 0 +
+ !  Define E%f and reports variations in Ef/nbf/nbm
+ !  Output : E%nbf,E%nbm
+ !  E%E are shifted 
+ !
+ ! Mode > 1 -> Mode 1 +
+ !  Full report of system electronic character
+ !
+ use pars,          ONLY:SP,DP
+ use units,         ONLY:HA2EV
+ use electrons,     ONLY:levels,nel_cond,nel,E_duplicate,E_reset,n_sp_pol
+ use R_lattice,     ONLY:bz_samp
+ use IO_int,        ONLY:IO_and_Messaging_switch
+ use com,           ONLY:msg
+ use parser_m,      ONLY:parser
+ use interfaces,    ONLY:OCCUPATIONS_Fermi
+ !
+ implicit none
+ !
+ type(levels), intent(out) :: E_qf
+ real(SP),     intent(out) :: E_fermi_h,E_fermi_e
+ type(levels), intent(in)  :: E 
+ type(bz_samp),intent(in)  :: K
+ !
+ ! Work Space
+ !
+ type(levels) :: E_plus,E_minus
+ real(SP) :: nel_save
+ integer  :: nbf_save,nbm_save
+ integer  :: nval,ik 
+ !
+ call msg('rs','Quasi-Fermi occupation')
+ !
+ call  OCCUPATIONS_Fermi(E,K,'EQ',mode="OCCUPATIONS")
+ !
+ if(nel_cond==0._SP) return
+ !
+ nel_save=nel
+ !
+ call E_reset(E_plus)
+ call E_reset(E_minus)
+ call E_reset(E_qf)
+ call E_duplicate(E,E_plus)
+ call E_duplicate(E,E_minus)
+ call E_duplicate(E,E_qf)
+ !
+ if(any(E%nbf/=E%nbm)) call error("Quasi-Fermi distribution only for insulators/semiconductors")
+ !
+ if(n_sp_pol/=1) call error("Quasi-Fermi distribution not implemented for spin-polarized systems")
+ !
+ nval=E%nbf(1)
+ nel =nel+nel_cond
+ !
+ call IO_and_Messaging_switch("-report -log -io_in -io_out")
+ call OCCUPATIONS_Fermi(E_plus,K,'Eplus',mode="OCCUPATIONS")
+ !
+ nbm_save=E_plus%nbm(1)
+ !
+ nel =nel_save
+ nel =nel-nel_cond
+ !
+ call OCCUPATIONS_Fermi(E_minus,K,'Eminus',mode="OCCUPATIONS")
+ call IO_and_Messaging_switch("+report +log +io_in +io_out")
+ !
+ nbf_save=E_minus%nbf(1)
+ !
+ call REPORT_Occupations('E_minus',E_minus)
+ call REPORT_Occupations('E_plus',E_plus)
+ !
+ call OCCUPATIONS_Merge(E_qf,E_minus,E_plus,K) ! Generate a Quasi-Fermi distribution in E using E_minus and E_plus
+ !
+ nel  =nel_save
+ E_fermi_h=E_minus%E_fermi
+ E_fermi_e= E_plus%E_fermi
+ !
+ call msg('rsn','Number of metalic bands : ',E_qf%nbm(1))
+ call msg('rsn','Number of filled  bands : ',E_qf%nbf(1))
+ call msg('rsn','Number of valence bands : ',nval)
+ call msg('rsm','Hole Fermi level        : ',E_fermi_h*HA2EV,'[eV]')
+ call msg('rsm','Electron Fermi level    : ',E_fermi_e*HA2EV,'[eV]')
+ !
+ call REPORT_Occupations('E quasi-fermi',E_qf)
+ !
+ K_LOOP: do ik=1,K%nibz
+   call REPORT_Energies(E_qf%E,K,K,E_qf%nb,(/ik,ik/),'E',.FALSE.,.TRUE.)
+   call REPORT_Energies(E_qf%f/HA2EV,K,K,E_qf%nb,(/ik,ik/),'occ',.FALSE.,.TRUE.)
+ enddo K_LOOP
+ !
+ call E_reset(E_plus)
+ call E_reset(E_minus)
+ !
+end subroutine
diff --git a/src/common/QP_state_print.F b/src/common/QP_state_print.F
index d15a6134a0..80d566e913 100644
--- a/src/common/QP_state_print.F
+++ b/src/common/QP_state_print.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine QP_state_print(qp)
  !
  ! Print the contents of the QP_state logical array
@@ -14,8 +18,9 @@ subroutine QP_state_print(qp)
  use com,           ONLY:msg
  use stderr,        ONLY:intc
  use descriptors,   ONLY:IO_desc_add
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(QP_t), optional :: qp    
  !
diff --git a/src/common/eval_G_minus_G.F b/src/common/eval_G_minus_G.F
index b43b72e21d..b3ff740f28 100644
--- a/src/common/eval_G_minus_G.F
+++ b/src/common/eval_G_minus_G.F
@@ -5,9 +5,13 @@
 !
 ! Authors (see AUTHORS file for details): AMAFDS
 !
+! headers
+!
 #include<dev_defs.h>
+#include<y_memory.h>
 !
-integer function eval_G_minus_G(iG,iGo,COMM)
+!
+integer function eval_G_minus_G(iG,iGo,opr_in,COMM)
  !
  ! Evaluates the  G-G' table :
  !
@@ -16,6 +20,10 @@ integer function eval_G_minus_G(iG,iGo,COMM)
  ! and returns the orginal iG that is redefined in output
  ! in such a way that G_m_G(iG,j) exists for all j
  !
+ ! If operation is "+" fills the table with:
+ !
+ !  g_vec( G_m_G(i,j) ) = g_vec(i)+g_vec(j)
+ !
  use pars,         ONLY:SP
  use vec_operate,  ONLY:iku_v_norm, c2a
  use parallel_m,   ONLY:yMPI_comm,PP_indexes,PP_indexes_reset,myid
@@ -23,28 +31,36 @@ integer function eval_G_minus_G(iG,iGo,COMM)
  use R_lattice,    ONLY:G_m_G,DEV_VAR(G_m_G),G_m_G_maxval,g_vec,ng_in_shell,n_g_shells,E_of_shell
  use gpu_m,        ONLY:have_gpu
  use timing_m,     ONLY:timing
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer :: iG,iGo
+ character(1),    optional :: opr_in
  type(yMPI_comm), optional :: COMM
  !
  ! Work Space
  !
  integer :: i1,i2,is
- integer :: iG_shell,iGo_shell,iG_shell_0,iG_shell_max,iG_,iGo_,iG_alloc,iGo_alloc
- integer :: ngx1,ngx2,ngx3, iv1_rlu(3)
+ integer :: iG_shell,iGo_shell,iG_shell_0,iG_shell_max,iG_,iGo_,&
+ &          iG_alloc,iGo_alloc,iG_closed,iGo_closed
+ integer :: ngx1,ngx2,ngx3, iv1_rlu(3),iG_max
  real(SP):: E_iG, E_iGo, E_max
  real(SP):: v1(3),v2(3)
  real(SP), allocatable :: E_G_m_G(:,:),g_vec_rlu(:,:)
  integer,  allocatable :: ig_vec_rlu(:,:), imap(:,:,:)
  integer,  allocatable :: G_m_G_maxval_tmp(:)
+ character(1)          :: opr
  type(PP_indexes)  :: PAR_IND_G
  !
  integer, external :: G_index
+ integer, external :: G2G
  !
  ! Init
  !
+ opr = "-"
+ if(present(opr_in)) opr=opr_in
+ !
  iG_=iG
  iGo_=iGo
  if (iGo==0) iGo_=iG
@@ -64,31 +80,43 @@ integer function eval_G_minus_G(iG,iGo,COMM)
  call timing("eval_G_minus_G",opr="start")
  !
  ! aux data
- YAMBO_ALLOC(E_G_m_G,(iG_,iGo_))
+ iG_closed=G2G(iG_)
+ iGo_closed=G2G(iGo_)
+ YAMBO_ALLOC(E_G_m_G,(iG_closed,iGo_closed))
+ iG_max=iG_closed
  !
  ! setup MPI parallelism if required
  !
  call PP_indexes_reset(PAR_IND_G)
  if (present(COMM)) then
    !
-   call PARALLEL_index(PAR_IND_G,(/iGo_/),COMM=COMM)
+   call PARALLEL_index(PAR_IND_G,(/iGo_closed/),COMM=COMM)
    !
-   call PARALLEL_live_message("[G_m_G] G-vectors",LOADED=PAR_IND_G%n_of_elements(myid+1),TOTAL=iGo_)
+   call PARALLEL_live_message("[G_m_G] G-vectors",LOADED=PAR_IND_G%n_of_elements(myid+1),TOTAL=iGo_closed)
    !
  endif
  !
  !$omp parallel do default(shared), private(i2,i1,v1), schedule(dynamic)
- do i2=1,iGo_
+ do i2=1,iGo_closed
    !
    if(present(COMM)) then
      E_G_m_G(:,i2)=0.0_SP
      if (.not.PAR_IND_G%element_1D(i2)) cycle
    endif
    !
-   do i1=1,iG_
-     v1(:)=g_vec(i1,:)-g_vec(i2,:)
-     E_G_m_G(i1,i2)=0.5_SP*iku_v_norm(v1)**2
-   enddo
+   if (opr=="-") then
+     do i1=1,iG_closed
+       v1(:)=g_vec(i1,:)-g_vec(i2,:)
+       E_G_m_G(i1,i2)=0.5_SP*iku_v_norm(v1)**2
+     enddo
+   else if (opr=="+") then
+     do i1=1,iG_closed
+       v1(:)=g_vec(i1,:)+g_vec(i2,:)
+       E_G_m_G(i1,i2)=0.5_SP*iku_v_norm(v1)**2
+     enddo
+   else
+     call error(" eval_g_minus_g: wrong operation in input")
+   endif
  enddo
  !$omp end parallel do
  !
@@ -136,6 +164,11 @@ integer function eval_G_minus_G(iG,iGo,COMM)
    iG_= ng_in_shell(is)
    if (iGo==0) iGo_=iG_
    !
+   if(iG_>iG_max) then
+      iG_shell=is-1
+      exit shell2_loop
+   endif
+   !
    if( maxval(E_G_m_G(1:iG_,1:iGo_)) > E_max ) then
       iG_shell=is-1
       exit shell2_loop
@@ -220,12 +253,17 @@ integer function eval_G_minus_G(iG,iGo,COMM)
      if (.not.PAR_IND_G%element_1D(i2)) cycle
    endif
    !
-   do i1=1,iG_
-     !
-     iv1_rlu(:)=ig_vec_rlu(i1,:)-ig_vec_rlu(i2,:)
-     G_m_G(i1,i2)=imap(iv1_rlu(1),iv1_rlu(2),iv1_rlu(3))
-     !
-   enddo
+   if (opr=="-") then
+     do i1=1,iG_
+       iv1_rlu(:)=ig_vec_rlu(i1,:)-ig_vec_rlu(i2,:)
+       G_m_G(i1,i2)=imap(iv1_rlu(1),iv1_rlu(2),iv1_rlu(3))
+     enddo
+   else if (opr=="+") then
+     do i1=1,iG_
+       iv1_rlu(:)=ig_vec_rlu(i1,:)+ig_vec_rlu(i2,:)
+       G_m_G(i1,i2)=imap(iv1_rlu(1),iv1_rlu(2),iv1_rlu(3))
+     enddo
+   endif
    !
    G_m_G_maxval_tmp(i2)=maxval(G_m_G(:,i2)) 
    !
diff --git a/src/common/eval_Gradient.F b/src/common/eval_Gradient.F
index 67aa43997c..1d9fae06c9 100644
--- a/src/common/eval_Gradient.F
+++ b/src/common/eval_Gradient.F
@@ -3,9 +3,13 @@
 !
 ! Copyright (C) 2012 The Yambo Team
 !
-! Authors (see AUTHORS file for details): AM MG AF
+! Authors (see AUTHORS file for details): AM MG AF DS
 !
-subroutine eval_Gradient(f,f_gradient,spinor_size,f_type)
+! headers
+!
+#include<y_memory.h>
+!
+subroutine eval_Gradient(f,f_gradient,ng_)
  !
  ! The gradient of a periodic function f(r):
  ! 
@@ -21,41 +25,27 @@ subroutine eval_Gradient(f,f_gradient,spinor_size,f_type)
  use FFT_m,         ONLY:fftw_plan
 #endif
  use R_lattice,     ONLY:g_vec,ng_vec
- use wave_func,     ONLY:wf_ng
+ use wave_func,     ONLY:wf_ng,rho_map,rho_map_size
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
- integer,          intent(in) ::spinor_size
- complex(SP),      intent(in) ::f(fft_size,spinor_size)
- complex(SP),      intent(out)::f_gradient(fft_size,spinor_size,3)
- character(len=*), intent(in) ::f_type       ! "wave", "density" or "potential"
+ integer,          intent(in) ::ng_
+ complex(SP),      intent(in) ::f(rho_map_size)
+ complex(SP),      intent(out)::f_gradient(rho_map_size,3)
  !
  ! Work Space 
  !
- integer :: ng_,ic,ig,i_spinor
- complex(DP), allocatable :: Vr(:), V3g(:,:)
- !
- select case( trim(f_type) )
- case( "wave" )
-   ng_=min(wf_ng,size(fft_g_table,1))
- case( "density")
-   ng_=min(ng_vec,size(fft_g_table,1))
- case( "potential")
-   ng_=min(ng_vec,size(fft_g_table,1))
- case default
-   call error(' [FFT] unknown f_type when computing gradient')
- end select
- !
- YAMBO_ALLOC(Vr,(fft_size))
- YAMBO_ALLOC(V3g,(ng_,3))
+ integer :: ic,ig
+ complex(DP) :: Vr(fft_size), V3g(ng_,3)
+ !
  V3g(:,:) = cZERO_DP
  !
  f_gradient = cZERO
  !
- do i_spinor=1,spinor_size
-   !
-   Vr=cmplx(f(:,i_spinor),kind=DP)
-   !
+ if(rho_map_size< fft_size) Vr(rho_map(:))=cmplx(f(:),kind=DP)
+ if(rho_map_size==fft_size) Vr(     :    )=cmplx(f(:),kind=DP)
+ !
 #if defined _FFTW
  call dfftw_destroy_plan(fftw_plan)
  fftw_plan = 0
@@ -63,12 +53,12 @@ subroutine eval_Gradient(f,f_gradient,spinor_size,f_type)
 #else
  call fft_3d(Vr,fft_dim,-1)
 #endif
-   !
-   forall (ic=1:3,ig=1:ng_) V3g(ig,ic)=cI*real(g_vec(ig,ic),DP)*Vr(fft_g_table(ig,1))/real(fft_size,DP)
-   !
-   do ic = 1,3
-     Vr = cZERO_DP
-     Vr(fft_g_table(1:ng_,1)) = V3g(1:ng_,ic)
+ !
+ forall (ic=1:3,ig=1:ng_) V3g(ig,ic)=cI*real(g_vec(ig,ic),DP)*Vr(fft_g_table(ig,1))/real(fft_size,DP)
+ !
+ do ic = 1,3
+   Vr = cZERO_DP
+   Vr(fft_g_table(1:ng_,1)) = V3g(1:ng_,ic)
 #if defined _FFTW
    call dfftw_destroy_plan(fftw_plan)
    fftw_plan = 0
@@ -76,12 +66,56 @@ subroutine eval_Gradient(f,f_gradient,spinor_size,f_type)
 #else
    call fft_3d(Vr,fft_dim,1)
 #endif
-     f_gradient(:,i_spinor,ic) = cmplx(Vr(:),kind=SP)
-   enddo
+   if(rho_map_size< fft_size) f_gradient(:,ic) = cmplx(Vr(rho_map(:)),kind=SP)
+   if(rho_map_size==fft_size) f_gradient(:,ic) = cmplx(Vr(        : ),kind=SP)
    !
  enddo
  !
- YAMBO_FREE(Vr)
- YAMBO_FREE(V3g)
- !
 end subroutine eval_Gradient
+!
+!
+subroutine eval_Gradient_wf(wf,wf_gradient)
+ !
+ use pars,          ONLY:SP
+ use FFT_m,         ONLY:fft_g_table
+ use wave_func,     ONLY:wf_ng
+ use electrons,     ONLY:n_spinor
+ use wave_func,     ONLY:rho_map_size
+ !
+ implicit none
+ !
+ complex(SP),      intent(in) ::wf(rho_map_size,n_spinor)
+ complex(SP),      intent(out)::wf_gradient(rho_map_size,n_spinor,3)
+ !
+ complex(SP) :: wf_tmp(rho_map_size,3)
+ !
+ integer  :: i_spinor,ng_
+ !
+ ng_=min(wf_ng,size(fft_g_table,1))
+ !
+ do i_spinor=1,n_spinor
+   call eval_Gradient(wf(:,i_spinor),wf_tmp,ng_)
+   wf_gradient(:,i_spinor,:)=wf_tmp
+ enddo
+ !
+end subroutine eval_Gradient_wf
+!
+!
+subroutine eval_Gradient_rho_pot(Vrho,Vrho_gradient)
+ !
+ use pars,          ONLY:SP
+ use FFT_m,         ONLY:fft_g_table
+ use R_lattice,     ONLY:g_vec,ng_vec
+ use wave_func,     ONLY:rho_map_size
+ !
+ implicit none
+ !
+ complex(SP),      intent(in) ::Vrho(rho_map_size)
+ complex(SP),      intent(out)::Vrho_gradient(rho_map_size,3)
+ !
+ integer :: ng_
+ !
+ ng_=min(ng_vec,size(fft_g_table,1))
+ call eval_Gradient(Vrho,Vrho_gradient,ng_)
+ !
+end subroutine eval_Gradient_rho_pot
diff --git a/src/common/eval_Gradient_cpu.F b/src/common/eval_Gradient_cpu.F
deleted file mode 100644
index 67aa43997c..0000000000
--- a/src/common/eval_Gradient_cpu.F
+++ /dev/null
@@ -1,87 +0,0 @@
-!
-! License-Identifier: GPL
-!
-! Copyright (C) 2012 The Yambo Team
-!
-! Authors (see AUTHORS file for details): AM MG AF
-!
-subroutine eval_Gradient(f,f_gradient,spinor_size,f_type)
- !
- ! The gradient of a periodic function f(r):
- ! 
- ! f(r) = \sum f(G) exp(iGr) => FFT: f(G)
- !
- ! f'(r) = \sum iGf(G) exp(iGr) =           
- !
- ! \sum f'(G) exp(iGr)       => FFT^-1
- !
- use pars,          ONLY:SP,DP,cI,cZERO,cZERO_DP
- use FFT_m,         ONLY:fft_size,fft_dim,fft_g_table
-#if defined _FFTW
- use FFT_m,         ONLY:fftw_plan
-#endif
- use R_lattice,     ONLY:g_vec,ng_vec
- use wave_func,     ONLY:wf_ng
- !
-#include<y_memory.h>
- !
- integer,          intent(in) ::spinor_size
- complex(SP),      intent(in) ::f(fft_size,spinor_size)
- complex(SP),      intent(out)::f_gradient(fft_size,spinor_size,3)
- character(len=*), intent(in) ::f_type       ! "wave", "density" or "potential"
- !
- ! Work Space 
- !
- integer :: ng_,ic,ig,i_spinor
- complex(DP), allocatable :: Vr(:), V3g(:,:)
- !
- select case( trim(f_type) )
- case( "wave" )
-   ng_=min(wf_ng,size(fft_g_table,1))
- case( "density")
-   ng_=min(ng_vec,size(fft_g_table,1))
- case( "potential")
-   ng_=min(ng_vec,size(fft_g_table,1))
- case default
-   call error(' [FFT] unknown f_type when computing gradient')
- end select
- !
- YAMBO_ALLOC(Vr,(fft_size))
- YAMBO_ALLOC(V3g,(ng_,3))
- V3g(:,:) = cZERO_DP
- !
- f_gradient = cZERO
- !
- do i_spinor=1,spinor_size
-   !
-   Vr=cmplx(f(:,i_spinor),kind=DP)
-   !
-#if defined _FFTW
- call dfftw_destroy_plan(fftw_plan)
- fftw_plan = 0
- call fft_3d(Vr,fft_dim,-1,fftw_plan)
-#else
- call fft_3d(Vr,fft_dim,-1)
-#endif
-   !
-   forall (ic=1:3,ig=1:ng_) V3g(ig,ic)=cI*real(g_vec(ig,ic),DP)*Vr(fft_g_table(ig,1))/real(fft_size,DP)
-   !
-   do ic = 1,3
-     Vr = cZERO_DP
-     Vr(fft_g_table(1:ng_,1)) = V3g(1:ng_,ic)
-#if defined _FFTW
-   call dfftw_destroy_plan(fftw_plan)
-   fftw_plan = 0
-   call fft_3d(Vr,fft_dim,1,fftw_plan)
-#else
-   call fft_3d(Vr,fft_dim,1)
-#endif
-     f_gradient(:,i_spinor,ic) = cmplx(Vr(:),kind=SP)
-   enddo
-   !
- enddo
- !
- YAMBO_FREE(Vr)
- YAMBO_FREE(V3g)
- !
-end subroutine eval_Gradient
diff --git a/src/common/eval_Gradient_gpu.F b/src/common/eval_Gradient_gpu.F
index e53ddb4f29..5a641a10ec 100644
--- a/src/common/eval_Gradient_gpu.F
+++ b/src/common/eval_Gradient_gpu.F
@@ -5,7 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM MG AF
 !
+! headers
+!
 #include<dev_defs.h>
+#include<y_memory.h>
 !
 subroutine eval_Gradient(f,f_gradient,spinor_size,f_type)
  !
@@ -36,8 +39,9 @@ subroutine eval_Gradient(f,f_gradient,spinor_size,f_type)
  use mkl_dfti_omp_offload
 #endif
  use timing_m
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer,          intent(in) :: spinor_size
  complex(SP),      intent(in) :: f(fft_size,spinor_size)
diff --git a/src/communicate/DESC_compare.F b/src/communicate/DESC_compare.F
index f8e19c3ab7..577c88375a 100644
--- a/src/communicate/DESC_compare.F
+++ b/src/communicate/DESC_compare.F
@@ -39,14 +39,29 @@ subroutine DESC_compare(D1,D2,i_err,exclude,only)
      do i_v=1,D1%size(i_d)
        if (D1%kind(i_d)=='i'.and.D1%ival(i_v,i_d)/=D2%ival(i_v,i_dp)) i_err=(/i_d,i_dp/)
        if (D1%kind(i_d)=='r'.and.D1%rval(i_v,i_d)/=D2%rval(i_v,i_dp)) i_err=(/i_d,i_dp/)
+       ! DEBUG <
+       !if (D1%kind(i_d)=='i'.and.D1%ival(i_v,i_d)/=D2%ival(i_v,i_dp)) write(*,*) D1%ival(i_v,i_d),D2%ival(i_v,i_dp)
+       !if (D1%kind(i_d)=='r'.and.D1%rval(i_v,i_d)/=D2%rval(i_v,i_dp)) write(*,*) D1%rval(i_v,i_d),D2%rval(i_v,i_dp)
+       ! DEBUG >
      enddo
      if (D1%kind(i_d)=='d'.and.real(D1%dval(i_d))/=real(D2%dval(i_dp))) i_err=(/i_d,i_dp/)
+     ! DEBUG <
+     !if (D1%kind(i_d)=='d'.and.real(D1%dval(i_d))/=real(D2%dval(i_dp))) write(*,*) real(D1%dval(i_d)),real(D2%dval(i_dp))
+     ! DEBUG >
      if (D1%kind(i_d)=='c') then
        if (real(D1%cval(i_d)) /= real(D2%cval(i_dp))) i_err=(/i_d,i_dp/)
        if (aimag(D1%cval(i_d))/=aimag(D2%cval(i_dp))) i_err=(/i_d,i_dp/)
+       ! DEBUG <
+       !if (real(D1%cval(i_d)) /= real(D2%cval(i_dp))) write(*,*) real(D1%cval(i_d)),real(D2%cval(i_dp))
+       !if (aimag(D1%cval(i_d))/=aimag(D2%cval(i_dp))) write(*,*) aimag(D1%cval(i_d)),aimag(D2%cval(i_dp))
+       ! DEBUG >
      endif
      if (D1%kind(i_d)=='s'.and..not.STRING_same(D1%sval(i_d),D2%sval(i_dp))) i_err=(/i_d,i_dp/)
      if (D1%kind(i_d)=='l'.and.D1%lval(i_d).neqv.D2%lval(i_dp)) i_err=(/i_d,i_dp/)
+     ! DEBUG >
+     !if (D1%kind(i_d)=='s'.and..not.STRING_same(D1%sval(i_d),D2%sval(i_dp))) write(*,*) trim(D1%sval(i_d)),trim(D2%sval(i_dp))
+     !if (D1%kind(i_d)=='l'.and.D1%lval(i_d).neqv.D2%lval(i_dp)) write(*,*) D1%lval(i_d),D2%lval(i_dp)
+     ! DEBUG <
    enddo
  enddo
  !
diff --git a/src/communicate/REPORT_Occupations.F b/src/communicate/REPORT_Occupations.F
index dd3d8a7cc6..7aeea0c9bb 100644
--- a/src/communicate/REPORT_Occupations.F
+++ b/src/communicate/REPORT_Occupations.F
@@ -12,7 +12,8 @@ subroutine REPORT_Occupations(what,E)
  use D_lattice,     ONLY:Tel,T_holes,T_elecs,Bose_Temp,DL_vol
  use drivers,       ONLY:Finite_Tel
  use com,           ONLY:msg,COMPOSE_spin_msg
- use electrons,     ONLY:levels,n_sp_pol,nel,spin_string,spin_occ
+ use electrons,     ONLY:levels,n_sp_pol,nel,spin_string,spin_occ,&
+ &                       i_spin_majority
  use interfaces,    ONLY:OCCUPATIONS_Gaps
  !
  implicit none
@@ -44,8 +45,7 @@ subroutine REPORT_Occupations(what,E)
  !
  ! Initial Report
  !
- if (n_sp_pol==1) call msg('r',title(1:title_len)//"=== General ===")
- if (n_sp_pol==2) call msg('r',title(1:title_len)//"=== Spin Independent ===")
+ call msg('r',title(1:title_len)//"=== General ===")
  if (T_elecs==T_holes) call msg('r',COMPOSE_spin_msg(title,title_len,'Electronic Temperature',0),&
 &                                                                (/Tel*HA2EV,Tel*HA2KEL/),'[eV K]')
  if (T_elecs/=T_holes) call msg('r',COMPOSE_spin_msg(title,title_len,'Holes / Elecs Temp',0),    &
@@ -58,7 +58,7 @@ subroutine REPORT_Occupations(what,E)
  do i_sp_pol=1,n_sp_pol
    !
    if (n_sp_pol==2) then
-     call msg('r',title(1:title_len)//"=== Spin "//spin_string(i_sp_pol)//" channel ===")
+     call msg('r',title(1:title_len)//"=== Gaps and Widths in "//spin_string(i_sp_pol)//" channel ===")
    else
      call msg('r',title(1:title_len)//"=== Gaps and Widths ===")
    endif
@@ -95,6 +95,8 @@ subroutine REPORT_Occupations(what,E)
    !
  enddo
  !
+ if (n_sp_pol==2) call msg('rn','Spin majority channel is '//spin_string(i_spin_majority)//" channel")
+ !
  if (n_sp_pol==2) then
    if (all(Nbf==Nbm).and.all(Nbf>0)) then
      call msg('r',title(1:title_len)//"=== Overall gap ===")
diff --git a/src/communicate/com_utilities_subroutines.F b/src/communicate/com_utilities_subroutines.F
index 81b460b19f..22ce7a0a4a 100644
--- a/src/communicate/com_utilities_subroutines.F
+++ b/src/communicate/com_utilities_subroutines.F
@@ -138,7 +138,7 @@ end subroutine of_open_close
 subroutine write_the_logo(unit_,comment_)
  !
  use LIVE_t,       ONLY:LIVE_message
- use LOGO,         ONLY:pickup_a_logo,logo_line,n_logo_lines
+ use LOGO,         ONLY:pickup_a_lumen_logo,logo_line,n_logo_lines
  use pars,         ONLY:schlen
  !
  implicit none
@@ -148,7 +148,7 @@ subroutine write_the_logo(unit_,comment_)
  character(schlen)::space_,ch ! Work Space
  integer          ::i1        !
  !
- call pickup_a_logo(unit_)
+ call pickup_a_lumen_logo(unit_)
  !
  if (unit_<0) return
  !
@@ -264,6 +264,27 @@ subroutine error(mesg)
 end subroutine error
 !
 !
+subroutine exp_user_warning(mesg)
+ !
+ use com,          ONLY:exp_user
+ !
+ implicit none
+ !
+ character(*) :: mesg
+ !
+ if (exp_user) then
+   call warning(mesg)
+   call warning("Assuming experienced user and proceeding")
+ endif
+ !
+ if (.not.exp_user) then
+   call warning("Exp user error. Launch the code with -expuser to skip this ")
+   call error(mesg)
+ endif
+ !
+end subroutine exp_user_warning
+!
+!
 subroutine yambo_flush(unit)
   !
   !
diff --git a/src/coulomb/col_driver.F b/src/coulomb/col_driver.F
index 3cb5c296dd..1207b3b5e1 100644
--- a/src/coulomb/col_driver.F
+++ b/src/coulomb/col_driver.F
@@ -5,6 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<dev_defs.h>
+#include<y_memory.h>
+!
 ! DESCRIPTION :
 ! 
 ! the variables generated/read in this series of rotuines are:
@@ -24,7 +29,6 @@
 ! 4 pi / |q+G|^2 = 2 pi DLVol NqBZ qpg(q,G)                                  
 ! |q+G| = Sqrt[ 2 / ( DLVol NqBZ qpg(q,G)) ]                                  
 !
-#include<dev_defs.h>
 !
 subroutine col_driver(bare_NG,q)
  !
@@ -42,12 +46,13 @@ subroutine col_driver(bare_NG,q)
  use R_lattice,     ONLY:d3q_factor,RIM_is_diagonal,RIM_qpg,bare_qpg,&
 &                        nqibz,g_vec,q_pt,nqbz,RIM_ng,RIM_n_rand_pts,&
 &                        bz_samp,q_norm,cutoff_presets,bare_qpg_d,&
-&                        cut_geometry,cut_is_slab,idir,q0_def_norm
+&                        cut_geometry,cut_is_slab,idir,q0_def_norm,eps_env
  use wave_func,     ONLY:wf_ng
  use IO_int,        ONLY:io_control
  use IO_m,          ONLY:OP_RD_CL,OP_WR_CL,REP,VERIFY
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer       :: bare_NG
  type(bz_samp) :: q
@@ -88,8 +93,7 @@ subroutine col_driver(bare_NG,q)
  !
  if (.not.l_rim) then
    RIM_ng=0
-   call parser('RandQpts',l_RandQpts)
-   if (.not.l_RandQpts) RIM_n_rand_pts=0
+   RIM_n_rand_pts=0
  endif
  !
  call parser('QpgFull',RIM_is_diagonal)
@@ -176,6 +180,16 @@ subroutine col_driver(bare_NG,q)
    call cutoff_driver(q)
    q_norm(1)=q0_def_norm
    !
+   ! Here I rescale bare_qpg and RIM_qpg by a constant dielectric
+   ! function due to the environment. A similar rescaling is also done
+   ! in the subroutine rim_integrate_w for RIM_W. However, since RIM_W
+   ! is not linear in q+G, the rescaling there is more involved
+   !
+   ! bare_qpg= q+G
+   if (allocated(bare_qpg)) bare_qpg=bare_qpg*sqrt(eps_env)
+   ! RIM_qpg= \int C/|q+G|**2
+   if (allocated(RIM_qpg)) RIM_qpg =RIM_qpg/eps_env
+   !
  endif
  !
  ! copy again,
diff --git a/src/coulomb/cutoff_box.F b/src/coulomb/cutoff_box.F
index 0663b02305..0efe933aa2 100644
--- a/src/coulomb/cutoff_box.F
+++ b/src/coulomb/cutoff_box.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine cutoff_box(q,is_cut)
  !
  ! Vc(q,G)=1/(Vdl Nq)\sum_{q' G'} V(q'+G') F(q'+G',q+G) [3D BOX] 
@@ -25,8 +29,9 @@ subroutine cutoff_box(q,is_cut)
  use LIVE_t,        ONLY:live_timing
  use zeros,         ONLY:k_iku_zero,G_iku_zero
  use openmp,        ONLY:OPENMP_update,n_threads,master_thread,OPENMP_set_threads
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  logical      ::is_cut(3)
  type(bz_samp)::q
diff --git a/src/coulomb/cutoff_cylinder.F b/src/coulomb/cutoff_cylinder.F
index b92893d967..9ce8b03853 100644
--- a/src/coulomb/cutoff_cylinder.F
+++ b/src/coulomb/cutoff_cylinder.F
@@ -5,19 +5,24 @@
 !
 ! Authors (see AUTHORS file for details): DV
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine cutoff_cylinder(q,is_cut)
  !
- use pars,          ONLY:SP,pi
+ use pars,          ONLY:SP,pi,schlen
  use wave_func,     ONLY:wf_ng
  use D_lattice,     ONLY:alat
- use R_lattice,     ONLY:cyl_ph_radius,bare_qpg,cyl_length,cyl_cut,g_vec,&
+ use R_lattice,     ONLY:cyl_ph_radius,bare_qpg,cyl_cut,g_vec,&
 &                        bz_samp,cyl_vr_save,cyl_vz_save
  use parallel_m,    ONLY:PP_indexes,myid,PP_indexes_reset
  use parallel_int,  ONLY:PP_redux_wait,PARALLEL_index 
  use LIVE_t,        ONLY:live_timing
  use com,           ONLY:msg
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  type(bz_samp) ::q
  logical       ::is_cut(3)
  !
@@ -27,7 +32,7 @@ subroutine cutoff_cylinder(q,is_cut)
 &                lenw2,limit,leniw,maxp1
  real(SP)      ::cyl_ph_radius2,cyl_cut2,c1,c2,Vd,abserr,epsabs
  complex(SP)   ::V_cut(q%nibz,wf_ng)
- logical       ::infcyl
+ character(schlen) :: cyl_direction
 #if defined _DOUBLE
  real(SP),  external    ::DBESJ1_
 #else
@@ -40,33 +45,31 @@ subroutine cutoff_cylinder(q,is_cut)
  !
  call PP_indexes_reset(px)
  !
- infcyl=cyl_length==0.
- !
  ! Check cylinder axis in along one principal axis
  !
- if(is_cut(1).and.is_cut(2)) call error('Check cylinder axis direction')
- if(is_cut(2).and.is_cut(3)) call error('Check cylinder axis direction')
- if(is_cut(1).and.is_cut(3)) call error('Check cylinder axis direction')
+ if(all(is_cut)) call error('Infinite cylinder, but cutoff in all direction')
+ cyl_direction="unknonw"
+ if(is_cut(2).and.is_cut(3)) cyl_direction="x"
+ if(is_cut(1).and.is_cut(3)) cyl_direction="y"
+ if(is_cut(1).and.is_cut(2)) cyl_direction="z"
+ if(trim(cyl_direction)=="unkonwn") call error('Unknown cylinder direction')
  !
- if(is_cut(1)) then
+ if(trim(cyl_direction)=="x") then
    ipar=1
    iort1=2
    iort2=3
- elseif(is_cut(2)) then
+ elseif(trim(cyl_direction)=="y") then
    ipar=2
    iort1=1
    iort2=3
- elseif(is_cut(3)) then
+ elseif(trim(cyl_direction)=="z") then
    ipar=3
    iort1=2
    iort2=1
  endif
  !
- cyl_cut=cyl_length
- if (infcyl) then
-  cyl_cut=abs(1/(q%pt(2,ipar))*alat(ipar))-1.
-  call msg('r','Infinite cylinder: Length set to 1/dq     ',cyl_cut,"[a.u.]")
- endif
+ cyl_cut=abs(1/(q%pt(2,ipar))*alat(ipar))-1.
+ call msg('r','Infinite cylinder: Length set to 1/dq     ',cyl_cut,"[a.u.]")
  !
  cyl_cut2 = cyl_cut**2
  cyl_ph_radius2  = cyl_ph_radius**2
@@ -77,68 +80,8 @@ subroutine cutoff_cylinder(q,is_cut)
  !
  call live_timing('Cylinder',px%n_of_elements(myid+1))
  !
- ! finite cylinder
+ ! infinite cylinder
  !
- if (.not.infcyl) then 
-   epsabs=8e-1
-   key=4
-   limit=2
-   lenw=limit*4
-   leniw=8
-   maxp1=2
-   lenw2=leniw*2+maxp1*25
-   allocate (iwork(limit),work(lenw))
-   allocate (iwork2(leniw),work2(lenw2))
-   do ig=1,wf_ng
-     do iq=1,q%nibz
-       if (.not.px%element_2D(iq,ig)) cycle
-       cyl_vz_save=(q%pt(iq,ipar)+g_vec(ig,ipar) )*2.*pi/alat(ipar)
-       cyl_vr_save=sqrt(((q%pt(iq,iort1)+g_vec(ig,iort1))*2.*pi/alat(iort1))**2+&
-&            ((q%pt(iq,iort2)+g_vec(ig,iort2))*2.*pi/alat(iort2))**2)
-       !  
-       if (cyl_vr_save==0..and.cyl_vz_save/=0) then
-         c1=1./cyl_vz_save**2-cos(cyl_vz_save*cyl_cut)/cyl_vz_save**2-&
-&        cyl_cut*sin(cyl_vz_save*cyl_cut)/cyl_vz_save
-         c2=sin(cyl_vz_save*cyl_cut)*sqrt(cyl_ph_radius2+cyl_cut2)
-#if defined _DOUBLE
-         call dqawo (bessel_F3,0.,cyl_cut,cyl_vz_save,2,epsabs,0.,Vd,&
-&                   abserr,neval,ier,leniw,maxp1,lenw2,last,iwork2,work2)
-#else
-         call qawo (bessel_F3,0.,cyl_cut,cyl_vz_save,2,epsabs,0.,Vd,&
-&                   abserr,neval,ier,leniw,maxp1,lenw2,last,iwork2,work2)
-#endif
-         V_cut(iq,ig)=c1+(c2-Vd)/cyl_vz_save
-       elseif (cyl_vz_save==0.) then
-#if defined _DOUBLE
-         call dqag(bessel_F4,0.,cyl_ph_radius,epsabs,0.,key,Vd,abserr, &
-&                 neval,ier,limit,lenw,last,iwork,work)
-#else
-         call qag(bessel_F4,0.,cyl_ph_radius,epsabs,0.,key,Vd,abserr, &
-&                 neval,ier,limit,lenw,last,iwork,work)
-#endif
-        V_cut(iq,ig)=Vd
-       else
-#if defined _DOUBLE
-         call dqawo(bessel_F2,0.,cyl_cut,cyl_vz_save,1,epsabs,0.,Vd,  &
-&                  abserr,neval,ier,leniw,maxp1,lenw2,last,iwork2,work2)
-#else
-         call qawo(bessel_F2,0.,cyl_cut,cyl_vz_save,1,epsabs,0.,Vd,  &
-&                  abserr,neval,ier,leniw,maxp1,lenw2,last,iwork2,work2)
-#endif
-     
-         V_cut(iq,ig)=Vd
-       endif
-      call live_timing(steps=1)
-     enddo
-   enddo
-   YAMBO_FREE(iwork)
-   YAMBO_FREE(iwork2)
-   YAMBO_FREE(work)
-   YAMBO_FREE(work2)
-   !
-   ! infinite
-   !
- else   
    !
    limit=2
    lenw=limit*4
@@ -171,7 +114,6 @@ subroutine cutoff_cylinder(q,is_cut)
      enddo
    enddo
    YAMBO_FREE(work)
- endif
  !
  ! MPI 2 all
  !
@@ -183,7 +125,7 @@ subroutine cutoff_cylinder(q,is_cut)
  !
  call live_timing()
  !
- forall (iq=1:q%nibz,ig=1:wf_ng) bare_qpg(iq,ig)=sqrt(1./V_cut(iq,ig))
+ forall (iq=1:q%nibz,ig=1:wf_ng) bare_qpg(iq,ig)=sqrt(1._SP/V_cut(iq,ig))
  !
  ! cleanup
  call PP_indexes_reset(px)
diff --git a/src/coulomb/cutoff_driver.F b/src/coulomb/cutoff_driver.F
index 3c7cd0e2e0..8e3149968d 100644
--- a/src/coulomb/cutoff_driver.F
+++ b/src/coulomb/cutoff_driver.F
@@ -30,7 +30,7 @@ subroutine cutoff_driver(q)
  use pars,          ONLY:SP,schlen,lchlen
  use vec_operate,   ONLY:v_is_zero
  use com,           ONLY:msg
- use R_lattice,     ONLY:cyl_ph_radius,box_length,cyl_length,cut_geometry,bz_samp,cut_description,ws_cutoff,&
+ use R_lattice,     ONLY:cyl_ph_radius,box_length,cut_geometry,bz_samp,cut_description,ws_cutoff,&
                          cut_is_slab
  use stderr,        ONLY:STRING_split,STRING_pack
  use IO_int,        ONLY:io_control
@@ -94,7 +94,6 @@ subroutine cutoff_driver(q)
  !
  if (.not.cut_is_box)                        box_length=0.
  if (.not.cut_is_sphere.and..not.cut_is_cyl) cyl_ph_radius=0.
- if (.not.cut_is_cyl)                        cyl_length=0.
  if (.not.is_cut(1))                         box_length(1)=0.
  if (.not.is_cut(2))                         box_length(2)=0.
  if (.not.is_cut(3))                         box_length(3)=0.
@@ -131,7 +130,7 @@ subroutine cutoff_driver(q)
  call section('*',trim(ch1))
  !
  ch1=           'Cut directions       '
- ch2=' '
+ ch2= ' '
  if (is_cut(1)) ch1=STRING_pack(ch1,'X')
  if (is_cut(1)) ch2=STRING_pack(ch2,'x')
  if (is_cut(2)) ch1=STRING_pack(ch1,'Y')
@@ -170,17 +169,9 @@ subroutine cutoff_driver(q)
    call msg('r','Box sides            ',length(:i1),"[a.u.]")
    write (cut_description,'(a,3f6.3)') trim(cut_geometry)//' ', length(:i1)
  else if (cut_is_cyl) then
-   if (cyl_length > 0.) then
-     call msg('r','Cyl. length          ',cyl_length,"[a.u.]")
-   else
-     call msg('r','Cyl. length          : infinite')
-   endif
-   call msg('r',  'Cyl. radius          ',cyl_ph_radius,"[a.u.]")
-   if (cyl_length > 0.) then
-     write (cut_description,'(a,f6.3,a,f6.3)') trim(cut_geometry)//' ',cyl_length,' ',cyl_ph_radius 
-   else
-     write (cut_description,'(a,a,a,f6.3)') trim(cut_geometry)//' ','infinite',' ',cyl_ph_radius
-   endif
+   call msg('r','Cyl. length          : infinite')
+   call msg('r','Cyl. radius          ',cyl_ph_radius,"[a.u.]")
+   write (cut_description,'(a,a,a,f6.3)') trim(cut_geometry)//' ','infinite',' ',cyl_ph_radius
  endif
  !
  ! Checking geometry symmetry
@@ -191,14 +182,7 @@ subroutine cutoff_driver(q)
  ! symmetry operations.
  !
  sym_points=0.
- if (cut_is_cyl) then
-   do i1=1,3
-     if (is_cut(i1)) then
-       sym_points(1,i1)=-cyl_length/2.
-       sym_points(2,i1)= cyl_length/2.
-     endif
-   enddo
- else if (cut_is_box) then
+ if (cut_is_box) then
    i4=0
    do i1=-1,1,2
      do i2=-1,1,2
diff --git a/src/coulomb/cutoff_test.F b/src/coulomb/cutoff_test.F
index e1e3f9dd16..fbc3926845 100644
--- a/src/coulomb/cutoff_test.F
+++ b/src/coulomb/cutoff_test.F
@@ -3,14 +3,14 @@
 !
 ! Copyright (C) 2006 The Yambo Team
 !
-! Authors (see AUTHORS file for details): AMDV
+! Authors (see AUTHORS file for details): AM DV
 !
 subroutine cutoff_test(cut_is_sphere,cut_is_box,cut_is_cyl,cut_is_ws,cut_is_slab,is_cut,q)
  !
  use pars,          ONLY:SP,schlen,pi
  use vec_operate,   ONLY:v_norm
  use D_lattice,     ONLY:alat,sop_inv,a,DL_vol
- use R_lattice,     ONLY:bz_samp,cyl_ph_radius,bare_qpg,cyl_length,box_length,&
+ use R_lattice,     ONLY:bz_samp,cyl_ph_radius,bare_qpg,box_length,&
 &                        g_vec,g_rot,nqibz
  use wave_func,     ONLY:wf_ng
  use com,           ONLY:msg,of_open_close
@@ -75,9 +75,9 @@ subroutine cutoff_test(cut_is_sphere,cut_is_box,cut_is_cyl,cut_is_ws,cut_is_slab
  !
  if (cut_is_cyl) then
    dummy=(/v_norm(a(1,:)),v_norm(a(2,:)),v_norm(a(3,:))/)
-   if (is_cut(1).and.cyl_length==0) dummy(1)=2.0_SP*plot_factor*dummy(1)*10.0
-   if (is_cut(2).and.cyl_length==0) dummy(2)=2.0_SP*plot_factor*dummy(2)*10.0
-   if (is_cut(3).and.cyl_length==0) dummy(3)=2.0_SP*plot_factor*dummy(3)*10.0
+   if (is_cut(1)) dummy(1)=2.0_SP*plot_factor*dummy(1)*10.0
+   if (is_cut(2)) dummy(2)=2.0_SP*plot_factor*dummy(2)*10.0
+   if (is_cut(3)) dummy(3)=2.0_SP*plot_factor*dummy(3)*10.0
  endif
  if (cut_is_box) then
    if (is_cut(1)) dummy(1)=2.0_SP*plot_factor*box_length(1)
@@ -245,16 +245,9 @@ real(SP) function V_in_box(r)
    real(SP) function V_in_cyl(r)
      real(SP) :: r(3)
      V_in_cyl=0.
-     if (cyl_length>0) then 
-       if ((is_cut(1).and.abs(r(1))<cyl_length.and.abs(r(2))**2+abs(r(3))**2<cyl_ph_radius**2).or.&
-&           (is_cut(2).and.abs(r(2))<cyl_length.and.abs(r(1))**2+abs(r(3))**2<cyl_ph_radius**2).or.&
-&           (is_cut(3).and.abs(r(3))<cyl_length.and.abs(r(1))**2+abs(r(2))**2<cyl_ph_radius**2)) &
-&       V_in_cyl=1./v_norm(r)
-     else
        if ((is_cut(1).and.abs(r(2))**2+abs(r(3))**2<cyl_ph_radius**2).or.&
 &           (is_cut(2).and.abs(r(1))**2+abs(r(3))**2<cyl_ph_radius**2).or.&
 &           (is_cut(3).and.abs(r(1))**2+abs(r(2))**2<cyl_ph_radius**2)) V_in_cyl=1._SP/v_norm(r)
-     endif
    end function
 
    real(SP) function V_in_ws(r)
diff --git a/src/coulomb/rim.F b/src/coulomb/rim.F
index b958b4af11..701fad3379 100644
--- a/src/coulomb/rim.F
+++ b/src/coulomb/rim.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine rim(mode,Xw)
  !
  use pars,          ONLY:SP,pi,DP
@@ -21,8 +25,9 @@ subroutine rim(mode,Xw)
 &                        cut_is_slab,idir
  use timing_m,      ONLY:timing
  use frequency,     ONLY:w_samp
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  character(1)   :: mode
  type(w_samp)   :: Xw
diff --git a/src/coulomb/rim_integrate_w.F b/src/coulomb/rim_integrate_w.F
index 5f0cfcb7c5..086c28e7f9 100644
--- a/src/coulomb/rim_integrate_w.F
+++ b/src/coulomb/rim_integrate_w.F
@@ -10,7 +10,7 @@ subroutine rim_integrate_w(iq,qr,N_out,em1_anis,Xw)
  use pars,          ONLY:SP,DP,pi
  use vec_operate,   ONLY:iku_v_norm
  use R_lattice,     ONLY:g_vec,RIM_n_rand_pts,k_grid_uc_vol,q0_def_norm,&
-&                        RIM_W_ng,q_pt,b,RIM_W_is_diagonal,&
+&                        RIM_W_ng,q_pt,b,RIM_W_is_diagonal,eps_env,&
 &                        RIM_W,f_coeff,cut_is_slab,idir,RIM_id_epsm1_reference
  use D_lattice,     ONLY:alat,a
  use frequency,     ONLY:w_samp
@@ -49,12 +49,21 @@ subroutine rim_integrate_w(iq,qr,N_out,em1_anis,Xw)
 &                             ((qr(idir(3),i2)+q_pt(iq,idir(3))+g_vec(i1,idir(3)))/alat(idir(3)))**2)
      pre_factor=(1.-exp(-slab_vplane1*lcut)*cos(slab_vz1*lcut))
      !
+     ! This is the sqrt of eq. 7 in NPJ Mat 9, 44 (2023) 
+     ! Accordingly here vslab=sqrt[v_G(q+q')]
      vslab(i2,i1) = sqrt(4._SP*pi*pre_factor/(slab_vplane1**2+slab_vz1**2))
      !
    enddo
    !
  enddo
  !
+ ! Here I rescale vslab by a constant dielectric function due to the environment.
+ ! A similar rescaling is also done in col_driver for bare_qpg and RIM_qpg
+ ! The final goal is to resale RIM_W. However, since RIM_W
+ ! is not linear in q+G, I need to rescale vslab
+ !
+ !vslab=vslab/sqrt(eps_env)
+ !
  ! q /= 0 all terms
  !
  !$omp parallel do default(shared), private(i1,i2,i3,func,RIM_acc)
@@ -72,6 +81,8 @@ subroutine rim_integrate_w(iq,qr,N_out,em1_anis,Xw)
 &                                     +qr(2,i3)*(f_coeff(3,i1,i2,iq,iw)+qr(2,i3)*f_coeff(6,i1,i2,iq,iw)&
 &                                     +2._DP*qr(1,i3)*f_coeff(5,i1,i2,iq,iw))
          ! Accumulate W
+         ! This is eq. 16 + eq 14 in NPJ Mat 9, 44 (2023)
+         ! Accordingly here vslab=sqrt[v_G(q+q')]
          RIM_acc = RIM_acc + rfac*vslab(i3,i1)**2*func*vslab(i3,i2)**2/(1._DP-vslab(i3,i1)*func*vslab(i3,i2))
        enddo
        !
@@ -99,6 +110,8 @@ subroutine rim_integrate_w(iq,qr,N_out,em1_anis,Xw)
      !Regularization
      if (slab_vplane1 < 1.e-5) then
        vslab(i1,1) = sqrt(4._DP*pi*(1.-exp(-q0_def_norm*lcut))/q0_def_norm**2)
+       ! Again, rescaling of vslab due to epsilon environment
+       !vslab(i1,1) = vslab(i1,1)/sqrt(eps_env)
        RIM_acc = RIM_acc + rfac*f_coeff(1,1,1,1,iw)*(4*pi*lcut)**2
        RIM_acc_anis = RIM_acc_anis + rfac*f_coeff(1,1,1,1,iw)*(4*pi*lcut)**2 &
 &                       *0.5_SP*(em1_anis(idir(2))+em1_anis(idir(3)))
@@ -107,6 +120,9 @@ subroutine rim_integrate_w(iq,qr,N_out,em1_anis,Xw)
      !
      !Evaluate v_slab
      vslab2=4._DP*pi*(1.-exp(-slab_vplane1*lcut))
+     ! Again, rescaling of vslab due to environment.
+     ! Notice that here this is not the sqrt.
+     !vslab2 = vslab2/eps_env
      !
      !Evaluate interpolation function
      func = f_coeff(1,1,1,1,iw)*exp(-sqrt((f_coeff(2,1,1,1,iw)*(qr(2,i1)+a(2,1)/a(1,1)*qr(1,i1)))**2+&
@@ -119,7 +135,8 @@ subroutine rim_integrate_w(iq,qr,N_out,em1_anis,Xw)
      func = func*anis_fact
      RIM_acc_anis = RIM_acc_anis + rfac*vslab2*func*vslab2/(r1**2*(1-vslab2*func))
      !
-     !Store the square root of vslab
+     ! Store the square root of vslab
+     ! Here vlsab2 is already rescaled
      vslab(i1,1) = sqrt(vslab2)/r1
      !
    enddo
diff --git a/src/coulomb/rim_spherical.F b/src/coulomb/rim_spherical.F
index 13196d5e51..a01ac3bf71 100644
--- a/src/coulomb/rim_spherical.F
+++ b/src/coulomb/rim_spherical.F
@@ -51,7 +51,7 @@ subroutine rim_spherical(Np,points,p_weight,radius,power,MULTIPLY_BY_Q)
  use LIVE_t,        ONLY:live_timing
  use vec_operate,   ONLY:v_norm
  use D_lattice,     ONLY:alat
- use R_lattice,     ONLY:RIM_n_rand_pts
+ use R_lattice,     ONLY:RIM_sphe_n_rand_pts
  implicit none
  !
  integer,  intent(in)    :: Np,power
@@ -62,11 +62,11 @@ subroutine rim_spherical(Np,points,p_weight,radius,power,MULTIPLY_BY_Q)
  ! Work Space
  !
  integer   :: ip,ir,N_out,N_in,ic
- real(SP)  :: v_rand(3),qr(RIM_n_rand_pts,3),sphere_vol,box_vol,pt_cc(3)
+ real(SP)  :: v_rand(3),qr(RIM_sphe_n_rand_pts,3),sphere_vol,box_vol,pt_cc(3)
  integer            :: iseed(8)
  real(DP), external :: dlaran
  !
- if (RIM_n_rand_pts==0)then
+ if (RIM_sphe_n_rand_pts==0)then
    do ip=1,Np
      p_weight(ip)=0.
      pt_cc=points(ip,:)*2.*pi/alat(:)
@@ -92,8 +92,8 @@ subroutine rim_spherical(Np,points,p_weight,radius,power,MULTIPLY_BY_Q)
  N_in=1
  N_out=0
  !
- call live_timing('Random points',RIM_n_rand_pts)
- loop: do while(.not.N_in==RIM_n_rand_pts+1)
+ call live_timing('Random points',RIM_sphe_n_rand_pts)
+ loop: do while(.not.N_in==RIM_sphe_n_rand_pts+1)
    !
    do ic=1,3
      v_rand(ic)=( 2.*dlaran(iseed(4:))-1. )*radius*1.2
@@ -115,14 +115,14 @@ subroutine rim_spherical(Np,points,p_weight,radius,power,MULTIPLY_BY_Q)
  sphere_vol=4./3.*pi*radius**3.
  !
  call msg('r', 'Sphere volume       [au]',sphere_vol)
- call msg('rn','Integrated volume   [au]',box_vol*real(RIM_n_rand_pts)/real(N_out))
+ call msg('rn','Integrated volume   [au]',box_vol*real(RIM_sphe_n_rand_pts)/real(N_out))
  !
  call live_timing('Integrals',Np)
  !
  do ip=1,Np
    p_weight(ip)=0.
    pt_cc=points(ip,:)*2.*pi/alat(:)
-   do ir=1,RIM_n_rand_pts
+   do ir=1,RIM_sphe_n_rand_pts
      p_weight(ip)=p_weight(ip)+1./v_norm(qr(ir,:)+pt_cc(:))**power*box_vol/real(N_out)
    enddo
    p_weight(ip)=p_weight(ip)/sphere_vol/real(Np)
diff --git a/src/dipoles/.objects b/src/dipoles/.objects
index c54fe600ab..7db8e515e5 100644
--- a/src/dipoles/.objects
+++ b/src/dipoles/.objects
@@ -1,7 +1,4 @@
-MAGN_objects = DIPOLE_orb_magn_forCD.o
-#if defined _RT
-MAGN_objects = DIPOLE_orbital_magnetization.o
-#endif
+MAGN_objects = DIPOLE_orbital_magnetization_cd.o DIPOLE_orbital_magnetization_rt.o
 #if defined _SC
 SC_objects = DIPOLE_SC_rotate.o
 #endif
@@ -9,8 +6,8 @@ SC_objects = DIPOLE_SC_rotate.o
 NL_objects = Berry_polarization_EQ.o Ionic_polarization.o POLARIZATION_output.o 
 #endif
 objs = Build_Overlaps_Det_EQ.o $(SC_objects) $(MAGN_objects) $(NL_objects) \
-       DIPOLE_setup.o \
+       DIPOLE_setup.o OVERLAPS_IO.o \
        DIPOLE_IO.o DIPOLE_driver.o DIPOLE_covariant.o DIPOLE_x_real_space.o DIPOLE_spin_magnetization.o \
        DIPOLE_kb_abinit_def_dim.o DIPOLE_kb_abinit_comp.o DIPOLE_kb_sum.o DIPOLE_kb_Ylm.o DIPOLE_dimensions.o \
        DIPOLE_kb_pwscf_def_dim.o DIPOLE_kb_pwscf_comp.o DIPOLE_kb_init.o DIPOLE_p_matrix_elements.o DIPOLE_overlaps.o \
-       DIPOLE_g_space.o DIPOLE_shifted_grids.o DIPOLE_check_shifted_grids.o DIPOLE_rotate.o
+       DIPOLE_g_space.o DIPOLE_shifted_grids.o DIPOLE_check_shifted_grids.o DIPOLE_build_der_k.o DIPOLE_rotate.o
diff --git a/src/dipoles/Berry_polarization_EQ.F b/src/dipoles/Berry_polarization_EQ.F
index d2149ee9fe..ca7ae366cf 100644
--- a/src/dipoles/Berry_polarization_EQ.F
+++ b/src/dipoles/Berry_polarization_EQ.F
@@ -4,6 +4,16 @@
 ! Copyright (C) 2019 The Yambo Team
 !
 ! Authors (see AUTHORS file for details): MG CA
+!
+!> @brief Calculate polarization in terms of geometric Berry phase
+!!
+!! @param[in]       All_S_det                   Overlaps determinants
+!! @param[in]       en                          KS energies
+!! @param[in]       Xk                          k-points sampling
+!! @param[in]       k_map                       k-points map
+!!
+!! @param[out]      P_Berry_Red                 Polarization in reduce cord.
+!
 !                  
 subroutine Berry_polarization_EQ(P_Berry_Red,All_S_det,en,Xk,k_map)
  !
@@ -12,7 +22,7 @@ subroutine Berry_polarization_EQ(P_Berry_Red,All_S_det,en,Xk,k_map)
  ! Berry phase is in reduced (crystalline) coordinates
  ! 
  ! Reference:
- ! R.D. King-Smith and D. Vanderbilt, "Theory of polarization of          
+ ! R.D. King-Smith and D. Vanderbilt, "Theory of polarization of        
  ! crystaline solids", Phys Rev B 47, 1651 (1993).      
  !
  use pars,           ONLY:DP,SP,cZERO,pi,cONE
@@ -20,7 +30,6 @@ subroutine Berry_polarization_EQ(P_Berry_Red,All_S_det,en,Xk,k_map)
  use R_lattice,      ONLY:bz_samp,bz_map
  use D_lattice,      ONLY:a
  use vec_operate,    ONLY:get_id_perpendicular
- use electric,       ONLY:l_P_periodic
  !
  implicit none           
  !
@@ -69,19 +78,21 @@ subroutine Berry_polarization_EQ(P_Berry_Red,All_S_det,en,Xk,k_map)
        ! Do we miss a factor 2?
        !
        imag_zeta=aimag(log(zeta))
-       if(l_P_periodic) imag_zeta=imag_zeta-1._DP*pi*nint(imag_zeta/(1._DP*pi))
+!       Removed feature: force periodicity is a wrong idea
+!       if(l_P_periodic) imag_zeta=imag_zeta-1._DP*pi*nint(imag_zeta/(1._DP*pi))
        !
        P_Berry_RED(id_in)=P_Berry_RED(id_in)+imag_zeta
        !
      enddo
    enddo
    !
-   P_Berry_RED(id_in)=-P_Berry_RED(id_in)/real(Nperpend,SP)
+   P_Berry_RED(id_in)=P_Berry_RED(id_in)/real(Nperpend,SP)
    !
  enddo
  !
  P_Berry_RED=P_Berry_RED/(2._SP*pi)*spin_occ
  !
- if(l_P_periodic) P_Berry_RED=real(P_Berry_RED,SP)-spin_occ*NINT(real(P_Berry_RED,SP)/spin_occ)
+!       Removed feature: force periodicity is a wrong idea
+! if(l_P_periodic) P_Berry_RED=real(P_Berry_RED,SP)-spin_occ*NINT(real(P_Berry_RED,SP)/spin_occ)
  !
 end subroutine Berry_polarization_EQ
diff --git a/src/dipoles/Build_Overlaps_Det_EQ.F b/src/dipoles/Build_Overlaps_Det_EQ.F
index d3c5694f3a..8ad0f8ecaf 100644
--- a/src/dipoles/Build_Overlaps_Det_EQ.F
+++ b/src/dipoles/Build_Overlaps_Det_EQ.F
@@ -50,7 +50,7 @@ subroutine Build_Overlaps_Det_EQ(Xen,i_sp_pol,ikbz,Sm1_plus,Sm1_minus,S_det)
        !
        nb=Xen%nbm(i_sp_pol)
        !
-       ! Overlap S(k, k-dk)
+       ! Overlap S(k, k-dk)=S^\dag(k-dk,k)
        !
        S_val=hermitian(DIP_S(1:nb,1:nb,id+(istep-1)*3,ikm,i_sp_pol))
        call SERIAL_inversion(nb,S_val,det,.false.)
diff --git a/src/dipoles/DIPOLE_IO.F b/src/dipoles/DIPOLE_IO.F
index 7f1fe1afc5..1bf0fb832c 100644
--- a/src/dipoles/DIPOLE_IO.F
+++ b/src/dipoles/DIPOLE_IO.F
@@ -14,7 +14,7 @@ subroutine DIPOLE_IO(Xk,Xen,Dip,check_read_or_write,io_err,PAR_K_scheme_kind)
  use IO_m,        ONLY:io_DIP
  use R_lattice,   ONLY:bz_samp
  use electrons,   ONLY:levels,n_sp_pol
- use DIPOLES,     ONLY:DIPOLE_t,covariant_obs
+ use DIPOLES,     ONLY:DIPOLE_t,covariant_obs,der_k_obs
  use IO_int,      ONLY:io_control
  use IO_m,        ONLY:manage_action,VERIFY,DUMP,REP,OP_WR_CL,OP_WR,OP_RD_CL,&
 &                      OP_IF_START_APP_CL_IF_END,OP_IF_START_RD_CL_IF_END
@@ -35,11 +35,9 @@ subroutine DIPOLE_IO(Xk,Xen,Dip,check_read_or_write,io_err,PAR_K_scheme_kind)
  !
  ! Work space
  !
- logical           :: write_header,     read_header,     check,reading,writing,&
-&                     write_header_ovlp,read_header_ovlp,l_project_dipoles,IO_do_it
- integer           :: ID,ID_S,IO_ACTION,IO_ACT_NOW,IO_MODE,ik_mem,ik,i_sp_pol,i_fragment,iv,ic,max_count,i_count
+ logical           :: write_header,read_header,check,reading,writing,l_project_dipoles,IO_do_it
+ integer           :: ID,IO_ACTION,IO_ACT_NOW,IO_MODE,ik_mem,ik,i_sp_pol,i_fragment,iv,ic,max_count,i_count
  integer, external :: io_DIPOLES
- integer, external :: io_Overlaps
 #if !defined _PAR_IO
  type(PP_indexes)  :: IO_par_index
 #endif
@@ -60,32 +58,22 @@ subroutine DIPOLE_IO(Xk,Xen,Dip,check_read_or_write,io_err,PAR_K_scheme_kind)
  call PARALLEL_SETUP_K_scheme(PAR_K_scheme_kind)
  !
  ID=0
- ID_S=0
  io_err=0
  !
  write_header  =  (master_cpu.and.writing)
  read_header   =  .not.write_header 
  !
- write_header_ovlp  =write_header.and.len_trim(covariant_obs)>0
- read_header_ovlp   = read_header.and.len_trim(covariant_obs)>0
- !
  if(write_header) then
    call io_control(ACTION=OP_WR_CL,COM=REP,SEC=(/1/),MODE=VERIFY,ID=ID)
    io_err=io_DIPOLES(Dip,ID)
  endif
- !
- if(write_header_ovlp) then
-   call io_control(ACTION=OP_WR_CL,COM=REP,SEC=(/1/),MODE=VERIFY,ID=ID_S)
-   io_err=io_err+io_Overlaps(Dip,ID_S)
- endif
-
  !
  ! To fix problems with asynchronous I/O 
  !
  call PP_wait(COMM=PAR_K_scheme%comm_world)
  !
  IO_MODE=VERIFY
- if((read_header.or.read_header_ovlp).and.writing) IO_MODE=DUMP
+ if(read_header.and.writing) IO_MODE=DUMP
  !
  if(read_header) then
    !
@@ -101,10 +89,6 @@ subroutine DIPOLE_IO(Xk,Xen,Dip,check_read_or_write,io_err,PAR_K_scheme_kind)
    do while(io_err/=0)
      call io_control(ACTION=OP_RD_CL,COM=REP,SEC=(/1/),MODE=IO_MODE,ID=ID)
      io_err=io_DIPOLES(Dip,ID)
-     if(read_header_ovlp) then
-       call io_control(ACTION=OP_RD_CL,COM=REP,SEC=(/1/),MODE=IO_MODE,ID=ID_S)
-       io_err=io_err+io_Overlaps(Dip,ID_S)
-     endif
      if(io_err/=0) call sleep(1)
      i_count=i_count+1
      if((i_count==max_count).and.io_err/=0) then
@@ -169,55 +153,6 @@ subroutine DIPOLE_IO(Xk,Xen,Dip,check_read_or_write,io_err,PAR_K_scheme_kind)
  call io_control(ACTION=IO_ACT_NOW,COM=REP,SEC=(/2+Xk%nibz*n_sp_pol/),ID=ID)
  io_err=io_DIPOLES(Dip,ID)
 #endif
- !
- ! OVERLAPS I/O
- !==============
- !
- if(len_trim(covariant_obs)>0) then
-   !
-#if !defined _PAR_IO
-   ! IO parallel indexes needed below by manage_action
-   !---------------------
-   call PP_indexes_reset(IO_par_index)
-   allocate(IO_par_index%element_1D(n_sp_pol*Xk%nbz))
-   IO_par_index%element_1D=.FALSE.
-   do i_sp_pol=1,n_sp_pol
-     do ik=1,Xk%nbz
-       i_fragment=ik+(i_sp_pol-1)*Xk%nbz
-       if ( .not.PAR_K_scheme%IND_bz%element_1D(ik)) cycle
-       IO_par_index%element_1D(i_fragment)=.TRUE.
-     enddo
-   enddo
-#endif
-   !
-#if defined _PAR_IO
-   IO_ACT_NOW=manage_action(IO_ACTION,0,0,2)
-   call io_control(ACTION=IO_ACT_NOW,COM=REP,SEC=(/0/),ID=ID_S,COMM=PAR_K_scheme%COM_ibz_index,DO_IT=IO_do_it)
-   io_err=io_Overlaps(Dip,ID_S)
-   IO_ACT_NOW=manage_action(IO_ACTION,1,0,2)
-#endif
-   do i_sp_pol=1,n_sp_pol
-     do ik=1,Xk%nbz
-       i_fragment=ik+(i_sp_pol-1)*Xk%nbz
-       if (.not.PAR_K_scheme%IND_bz%element_1D(ik).and.writing) cycle
-#if !defined _PAR_IO
-       if (.not.PAR_K_scheme%COM_ibz_A2A%CPU_id==0 .and.writing) cycle
-       if (writing) IO_ACT_NOW=manage_action(IO_ACTION,i_fragment,1,Xk%nbz*n_sp_pol,IO_par_index)
-       if (reading) IO_ACT_NOW=manage_action(IO_ACTION,i_fragment,1,Xk%nbz*n_sp_pol)
-#endif
-       call io_control(ACTION=IO_ACT_NOW,COM=REP,SEC=(/1+i_fragment/),ID=ID_S)
-       io_err=io_Overlaps(Dip,ID_S)
-       if (io_err/=0.and.IO_do_it) then
-         call error(" OVERLAPS I/O error when (i_sp_pol,ik)= ("//trim(intc(i_sp_pol))//","//trim(intc(ik))//")")
-       endif
-     enddo
-   enddo
-#if defined _PAR_IO
-   IO_ACT_NOW=manage_action(IO_ACTION,2,0,2)
-   call io_control(ACTION=IO_ACT_NOW,COM=REP,SEC=(/2+Xk%nbz*n_sp_pol/),ID=ID_S)
-   io_err=io_Overlaps(Dip,ID_S)
-#endif
- endif
  !
  call PP_wait(COMM=PAR_K_scheme%comm_world)
  !
@@ -287,8 +222,8 @@ subroutine fix_velocity(Dip,dir)
                den=Ev_m_Ec_QP
              endif
              !
-             if (abs(den)<=Dip%Energy_treshold) DIP_v(:,ic,iv,ik_mem,i_sp_pol)=cZERO
-             if (abs(den)> Dip%Energy_treshold) DIP_v(:,ic,iv,ik_mem,i_sp_pol)=DIP_v(:,ic,iv,ik_mem,i_sp_pol)*(num/den)
+             if (abs(den)<=Dip%Energy_threshold) DIP_v(:,ic,iv,ik_mem,i_sp_pol)=cZERO
+             if (abs(den)> Dip%Energy_threshold) DIP_v(:,ic,iv,ik_mem,i_sp_pol)=DIP_v(:,ic,iv,ik_mem,i_sp_pol)*(num/den)
              !
            enddo
          enddo
diff --git a/src/dipoles/DIPOLE_build_der_k.F b/src/dipoles/DIPOLE_build_der_k.F
new file mode 100644
index 0000000000..5a874fdef2
--- /dev/null
+++ b/src/dipoles/DIPOLE_build_der_k.F
@@ -0,0 +1,294 @@
+!
+! License-Identifier: GPL
+!
+! Copyright (C) 2015 The Yambo Team
+!
+! Authors (see AUTHORS file for details): DS
+!
+! headers
+!
+#include<y_memory.h>
+!
+subroutine DIPOLE_build_der_k(Xen,Xk,Dip,direction_is_todo)
+ !
+ ! Build the dipole as k-derivative using
+ ! the overlaps to construct local rotation matrices 
+ ! see Phys. Rev. B 76, 035213 (2007) and  Phys. Rev. Research 2, 013357 (2020)
+ !
+ use pars,           ONLY:SP,cI,cZERO,pi,cONE
+ use units,          ONLY:HA2EV
+ use LIVE_t,         ONLY:live_timing
+ use electrons,      ONLY:levels,n_sp_pol
+ use DIPOLES,        ONLY:DIPOLE_t,DIP_S,DIP_iR,DIP_v,l_force_SndOrd,&
+                          der_k_obs,x_space_obs
+ use R_lattice,      ONLY:bz_samp,k_map
+ use vec_operate,    ONLY:k_periodic_idx,degeneration_finder
+ use matrix_operate, ONLY:hermitian
+ use parallel_m,     ONLY:PAR_IND_DIPk_ibz,PAR_DIPk_ibz_index,PAR_IND_DIPk_ibz_ID,&
+&                         PAR_DIPk_nbz,PAR_IND_CON_BANDS_DIP, PAR_IND_VAL_BANDS_DIP, &
+&                         PAR_IND_DIPk_bz,PAR_DIPk_bz_index,PAR_IND_DIPk_bz_ID,PAR_COM_DIPk_ibz_INDEX
+ use parallel_int,   ONLY:PP_redux_wait
+ use com,            ONLY:msg
+ use D_lattice,      ONLY:a
+ use parser_m,       ONLY:parser
+ use timing_m,       ONLY:timing
+ use stderr,         ONLY:STRING_match
+ use y_memory_alloc
+ !
+ implicit none
+ !
+ type(levels),           intent(in)     :: Xen
+ type(bz_samp),          intent(inout)  :: Xk
+ type(DIPOLE_t),         intent(inout)  :: Dip
+ logical,                intent(out)    :: direction_is_todo(3)
+ !
+ ! Work Space
+ !
+ integer     :: ik,i_sp_pol,id_red,idx(3),idir,ic,ik_mem,ikbz,iv
+ integer     :: istep,max_step,ikm1,ikp1
+ complex(SP), allocatable :: U_l(:,:,:),U_r(:,:,:),Der_k_red(:,:,:),Der_k_cart(:,:,:),delta(:,:)
+ !
+ logical     :: deg_table_k(Dip%ib(2),Dip%ib(2)),deg_table_kp(Dip%ib(2),Dip%ib(2)),deg_table_km(Dip%ib(2),Dip%ib(2))
+ integer     :: ib,ibp,i_grp,first_el_k(Dip%ib(2)),n_of_el_k(Dip%ib(2)),n_deg_grp_k
+ real(SP)    :: Ev_m_Ec_KS,deg_thresh
+ character(1)::str(3)=(/'x','y','z'/)
+ !
+ call timing("DIPOLE_buil_derk",OPR="start")
+ !
+ if(.not.Dip%force_v_g_space) Dip%Vnl_included=.true.
+ !
+ if(.not.allocated(k_map%k_map_dir)) call k_map_nearest_by_dir(Xk,k_map)
+ if(.not.allocated(Xk%k_table))      call k_build_up_BZ_tables(Xk)
+ !
+ direction_is_todo(:)= (k_map%max_kdir(:)==1)
+ !
+ if(any(direction_is_todo)) call warning(' Der-k Dipoles not in 3d. Assuming non periodic system.')
+ !
+ if(any(k_map%max_kdir(:)>=6.and..not.l_force_SndOrd)) &
+ &   call msg('rs','Using forth order approximation for der-k dipoles')
+ !
+ ! Notice that DIP_iR/DIP_v are defined only in the IBZ
+ ! while the overlaps are in the BZ
+ !
+ call live_timing("Build Rotation Matrices:",PAR_IND_DIPk_bz%n_of_elements(PAR_IND_DIPk_bz_ID+1)*n_sp_pol)
+ !
+ YAMBO_ALLOC(U_l,(Dip%ib(2),Dip%ib(2),3))
+ YAMBO_ALLOC(U_r,(Dip%ib(2),Dip%ib(2),3))
+ YAMBO_ALLOC(Der_k_cart,(Dip%ib(2),Dip%ib(2),3))
+ YAMBO_ALLOC(Der_k_red,(Dip%ib(2),Dip%ib(2),2))
+ YAMBO_ALLOC(delta,(Dip%ib(2),Dip%ib(2)))
+ !
+ delta=cZERO
+ do ib=1,Dip%ib(2)
+   delta(ib,ib)=cONE
+ enddo
+ !
+ Dip%Energy_threshold=max(Dip%Energy_threshold,0.001/HA2EV)
+ deg_thresh=Dip%Energy_threshold
+ !
+ do i_sp_pol=1,n_sp_pol
+   !
+   do ik=1,Xk%nibz 
+     !
+     if (.not.PAR_IND_DIPk_ibz%element_1D(ik)) cycle
+     !
+     ik_mem=PAR_DIPk_ibz_index(ik)
+     !
+     ikbz=Xk%k_table(ik,1)
+     !
+     call degeneration_finder(Dip%ib(2),first_el_k,n_of_el_k,n_deg_grp_k,&
+     &    Er=Xen%E(1:Dip%ib(2),ik,i_sp_pol),deg_accuracy=deg_thresh,Include_single_values=.true.)
+     !
+     deg_table_k=.false.
+     do i_grp=1,n_deg_grp_k
+       do ib=first_el_k(i_grp),first_el_k(i_grp)+n_of_el_k(i_grp)-1
+         do ibp=first_el_k(i_grp),first_el_k(i_grp)+n_of_el_k(i_grp)-1
+           deg_table_k(ib,ibp)=.true.
+           deg_table_k(ibp,ib)=.true.
+         enddo
+       enddo
+     enddo
+     !
+     U_l       =cZERO
+     U_r       =cZERO
+     !
+     Der_k_cart=cZERO
+     Der_k_red =cZERO
+     !
+     do id_red=1,3  ! Loop on the three crystal directions in the BZ
+       !
+       if(k_map%max_kdir(id_red)==1) cycle ! Non-periodic direction, skip calculation
+       !
+       max_step=1 ! Only the first neighbor
+       if(k_map%max_kdir(id_red)>=6.and..not.l_force_SndOrd) max_step=2
+       !
+       do istep=1,max_step
+         ! 
+         idx=k_map%k_map_inv(ikbz,:)
+         idx(id_red)=idx(id_red)+istep
+         idx=k_periodic_idx(idx,k_map)
+         ikp1=k_map%k_map_dir(idx(1),idx(2),idx(3))
+         !
+         idx=k_map%k_map_inv(ikbz,:)
+         idx(id_red)=idx(id_red)-istep
+         idx=k_periodic_idx(idx,k_map)
+         ikm1=k_map%k_map_dir(idx(1),idx(2),idx(3))
+         !
+         U_r(:,:,1)=          DIP_S(:,:,id_red+(istep-1)*3,ikbz,i_sp_pol)
+         call zeroing_disconnected_blocks(Xen,Xk,(/1,Dip%ib(2)/),ikp1,i_sp_pol,&
+         &                         deg_table_k,deg_thresh,U_r(:,:,1),U_r(:,:,2))
+         U_r(:,:,3)=U_r(:,:,2)
+         call SERIAL_SVD(Dip%ib(2),U_r(:,:,3),'uni',0)
+         !
+         U_l(:,:,1)=hermitian(DIP_S(:,:,id_red+(istep-1)*3,ikm1,i_sp_pol))
+         call zeroing_disconnected_blocks(Xen,Xk,(/1,Dip%ib(2)/),ikm1,i_sp_pol,&
+         &                         deg_table_k,deg_thresh,U_l(:,:,1),U_l(:,:,2))
+         U_l(:,:,3)=U_l(:,:,2)
+         call SERIAL_SVD(Dip%ib(2),U_l(:,:,3),'uni',0)
+         !
+	 ! Yambo version
+         Der_k_red(:,:,istep)=+matmul(U_r(:,:,1),hermitian(U_r(:,:,3)) )  &
+         &                    -matmul(U_l(:,:,1),hermitian(U_l(:,:,3)) )
+	 ! Virk-Sipe version
+         !Der_k_red(:,:,istep)=+matmul((U_r(:,:,1)-U_r(:,:,2)),hermitian(U_r(:,:,3)) )  &
+         !&                    -matmul((U_l(:,:,1)-U_l(:,:,2)),hermitian(U_l(:,:,3)) )
+         !
+	 ! This contains the i/2q factor 
+	 ! 1/q=k_map%max_kdir(id_red)/(4._SP*pi*real(istep,SP))
+         Der_k_red(:,:,istep)=Der_k_red(:,:,istep)*cI*k_map%max_kdir(id_red)/(4._SP*pi*real(istep,SP))/2._SP
+         !
+       enddo
+       !
+       ! Transform in cartesian coordinates 
+       !
+       if(max_step==1) then
+         !      
+         ! First order formula D(dk)      
+         !
+         do idir=1,3
+           Der_k_cart(:,:,idir)=Der_k_cart(:,:,idir)+transpose(Der_k_red(:,:,1))*a(id_red,idir)
+         enddo
+         !
+       else
+         !      
+         ! Second order formula   ( 4*D(dk) - D(2*dk) ) / 3
+         !
+         do idir=1,3
+           Der_k_cart(:,:,idir)=Der_k_cart(:,:,idir)+transpose(4._SP*Der_k_red(:,:,1)-Der_k_red(:,:,2))/3._SP*a(id_red,idir)
+         enddo
+         !
+       endif
+       !
+     enddo  ! id_red loop on crystal directions 
+     !
+     !======
+     ! <iR>
+     !======
+     do iv=Dip%ib(1),Dip%ib_lim(1)
+       if(.not.PAR_IND_VAL_BANDS_DIP%element_1D(iv)) cycle
+       do ic=max(iv,Dip%ib_lim(2)),Dip%ib(2)
+         if(.not.PAR_IND_CON_BANDS_DIP%element_1D(ic)) cycle
+         do idir=1,3
+           DIP_iR(idir,ic,iv,ik_mem,i_sp_pol)=(Der_k_cart(ic,iv,idir)+conjg(Der_k_cart(iv,ic,idir)))*cI
+         enddo
+       enddo
+     enddo
+     !
+     !=====
+     ! <v>
+     !=====
+     if(STRING_match(der_k_obs,"V")) then
+       do iv=Dip%ib(1),Dip%ib_lim(1) 
+         if(.not.PAR_IND_VAL_BANDS_DIP%element_1D(iv)) cycle
+         do ic=max(iv,Dip%ib_lim(2)),Dip%ib(2)
+           if(.not.PAR_IND_CON_BANDS_DIP%element_1D(ic)) cycle
+           Ev_m_Ec_KS   =Xen%E (iv,ik,i_sp_pol)-Xen%E (ic,ik,i_sp_pol)
+           if (allocated(Xen%Eo)) Ev_m_Ec_KS   =Xen%Eo(iv,ik,i_sp_pol)-Xen%Eo(ic,ik,i_sp_pol)
+           do idir=1,3
+             DIP_v(idir,ic,iv,ik_mem,i_sp_pol)=DIP_iR(idir,ic,iv,ik_mem,i_sp_pol)*Ev_m_Ec_KS
+           enddo
+           !
+         enddo
+       enddo
+       !
+     endif
+     !
+     call live_timing(steps=1)
+     !
+   enddo ! ik iiBZ loop
+   !
+ enddo ! i_sp_pol
+ !
+ call live_timing()
+ !
+ YAMBO_FREE(U_l)
+ YAMBO_FREE(U_r)
+ YAMBO_FREE(Der_k_red)
+ YAMBO_FREE(Der_k_cart)
+ YAMBO_FREE(delta)
+ !
+ ! Build non-periodic directions if necessary
+ ! 
+ if (any(direction_is_todo)) then
+   do ic=1,3
+     if (direction_is_todo(ic)) then
+       x_space_obs=trim(x_space_obs)//" R_"//str(ic)
+       if (STRING_match(der_k_obs,"V")) x_space_obs=trim(x_space_obs)//" V_"//str(ic)
+     endif
+   enddo 
+   Dip%computed=trim(Dip%computed)//trim(x_space_obs)//" [X-space]"
+ endif
+ !
+ call timing("DIPOLE_buil_derk",OPR="stop")
+ return
+ !
+end subroutine DIPOLE_build_der_k
+!
+!
+subroutine zeroing_disconnected_blocks(Xen,Xk,nb,ikpbz,i_sp_pol,deg_table_k,deg_thresh,U_in,U_out)
+ !
+ use pars,           ONLY:SP,cZERO
+ use R_lattice,      ONLY:bz_samp
+ use electrons,      ONLY:levels
+ use vec_operate,    ONLY:degeneration_finder
+ !
+ implicit none
+ !
+ type(levels),   intent(in) :: Xen
+ type(bz_samp),  intent(in) :: Xk
+ !
+ integer, intent(in) :: nb(2),ikpbz,i_sp_pol
+ logical, intent(in) :: deg_table_k(nb(1):nb(2),nb(1):nb(2))
+ real(SP),intent(in) :: deg_thresh
+ !
+ complex(SP), intent(in)  :: U_in(nb(1):nb(2),nb(1):nb(2))
+ complex(SP), intent(out) :: U_out(nb(1):nb(2),nb(1):nb(2))
+ !
+ ! Workspace
+ !
+ integer     :: ib,ibp,i_grp,first_el_k(nb(2)-nb(1)+1),n_of_el_k(nb(2)-nb(1)+1),n_deg_grp_k
+ logical     :: deg_table_kp(nb(1):nb(2),nb(1):nb(2))
+ !
+ call degeneration_finder(nb(2)-nb(1)+1,first_el_k,n_of_el_k,n_deg_grp_k,&
+ &    Er=Xen%E(nb(1):nb(2),Xk%sstar(ikpbz,1),i_sp_pol),deg_accuracy=deg_thresh,Include_single_values=.true.)
+ !
+ deg_table_kp=.false.
+ do i_grp=1,n_deg_grp_k
+   do ib=nb(1)+first_el_k(i_grp)-1,nb(1)+first_el_k(i_grp)+n_of_el_k(i_grp)-2
+     do ibp=nb(1)+first_el_k(i_grp)-1,nb(1)+first_el_k(i_grp)+n_of_el_k(i_grp)-2
+       deg_table_kp(ib,ibp)=.true.
+       deg_table_kp(ibp,ib)=.true.
+     enddo
+   enddo
+ enddo
+ !
+ U_out=U_in
+ !
+ do ib=nb(1),nb(2)
+   do ibp=nb(1),nb(2)
+     if(deg_table_k(ibp,ib).or.deg_table_kp(ibp,ib)) cycle
+     U_out(ib,ibp)=cZERO
+   enddo
+ enddo
+ !
+end subroutine
diff --git a/src/dipoles/DIPOLE_covariant.F b/src/dipoles/DIPOLE_covariant.F
index b8d6de6378..71fa9365d2 100644
--- a/src/dipoles/DIPOLE_covariant.F
+++ b/src/dipoles/DIPOLE_covariant.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): MG CA DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine DIPOLE_covariant(Xen,Xk,Dip,direction_is_todo)
  !
  ! Build the covariant dipole operator at zero field
@@ -13,7 +17,8 @@ subroutine DIPOLE_covariant(Xen,Xk,Dip,direction_is_todo)
  use pars,           ONLY:SP,cI,cZERO,pi
  use LIVE_t,         ONLY:live_timing
  use electrons,      ONLY:levels,n_sp_pol
- use DIPOLES,        ONLY:DIPOLE_t,DIP_S,DIP_iR,DIP_v,covariant_obs,x_space_obs
+ use DIPOLES,        ONLY:DIPOLE_t,DIP_S,DIP_iR,DIP_v,l_force_SndOrd,&
+ &                        covariant_obs,x_space_obs
  use ALLOC,          ONLY:DIPOLE_ALLOC_elemental
  use R_lattice,      ONLY:bz_samp,k_map
  use vec_operate,    ONLY:k_periodic_idx
@@ -25,8 +30,9 @@ subroutine DIPOLE_covariant(Xen,Xk,Dip,direction_is_todo)
  use parser_m,       ONLY:parser
  use timing_m,       ONLY:timing
  use stderr,         ONLY:STRING_match
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels),           intent(in)     ::Xen
  type(bz_samp),          intent(inout)  ::Xk
@@ -35,7 +41,7 @@ subroutine DIPOLE_covariant(Xen,Xk,Dip,direction_is_todo)
  !
  ! Work Space
  !
- logical     ::l_eval_polarization,l_force_SndOrd
+ logical     ::l_eval_polarization
  integer     ::ik,iv,ic,ic_min,iv_max,i_sp_pol,ikm1,id_red,idir,idx(3),ikbz,ik_mem,istep,max_step
  real(SP)    ::Ev_m_Ec_KS
  complex(SP), allocatable :: Sm1_plus (:,:,:),Sm1_minus(:,:,:)
@@ -46,20 +52,12 @@ subroutine DIPOLE_covariant(Xen,Xk,Dip,direction_is_todo)
  !
  if(.not.Dip%force_v_g_space) Dip%Vnl_included=.true.
  !
- if (any(Xen%nbf/=Xen%nbm)) call error(' Covariant dipoles not implemented for metals')
- !
- call parser('EvPolarization'  ,l_eval_polarization)
- call parser('FrSndOrd'        ,l_force_SndOrd)
- !
  if(.not.allocated(k_map%k_map_dir)) call k_map_nearest_by_dir(Xk,k_map)
  if(.not.allocated(Xk%k_table))      call k_build_up_BZ_tables(Xk)
  !
- ! Notice that DIP_S is defined in the BZ       
- !
- call DIPOLE_ALLOC_elemental('DIP_S',(/Dip%ib(2),Dip%ib(2),6,Xk%nbz/))
- DIP_S=cZERO
+ if (any(Xen%nbf/=Xen%nbm)) call error(' Covariant dipoles not implemented for metals')
  !
- call DIPOLE_overlaps(Xk,Dip)
+ call parser('EvPolarization'  ,l_eval_polarization)
  !
  direction_is_todo(:)= (k_map%max_kdir(:)==1)
  !
@@ -159,7 +157,7 @@ subroutine DIPOLE_covariant(Xen,Xk,Dip,direction_is_todo)
        do ic=max(iv,Dip%ib_lim(2)),Dip%ib(2)
          if(.not.PAR_IND_CON_BANDS_DIP%element_1D(ic)) cycle
          do idir=1,3
-           DIP_iR(idir,ic,iv,ik_mem,i_sp_pol)=(DIP_tmp(idir,ic,iv)+conjg(DIP_tmp(idir,iv,ic)))/cI
+           DIP_iR(idir,ic,iv,ik_mem,i_sp_pol)=(DIP_tmp(idir,ic,iv)+conjg(DIP_tmp(idir,iv,ic)))*cI
          enddo
        enddo
      enddo
@@ -195,7 +193,7 @@ subroutine DIPOLE_covariant(Xen,Xk,Dip,direction_is_todo)
  call live_timing()
  !
 #if defined _NL
- if (l_eval_polarization) call POLARIZATION_output(Xen,Xk,Dip)
+ if (l_eval_polarization) call POLARIZATION_output(Xen,Xk)
 #endif
  !
  if (any(direction_is_todo)) then
diff --git a/src/dipoles/DIPOLE_dimensions.F b/src/dipoles/DIPOLE_dimensions.F
index 9b5be729e1..95f3cbaec7 100644
--- a/src/dipoles/DIPOLE_dimensions.F
+++ b/src/dipoles/DIPOLE_dimensions.F
@@ -37,7 +37,7 @@ subroutine DIPOLE_dimensions(Xen,Dip,bands,q0)
  !
  ! Set up band limits
  !
- if (Dip%bands_ordered.or.Dip%Energy_treshold<0._SP) then
+ if (Dip%bands_ordered.or.Dip%Energy_threshold<0._SP) then
    Dip%ib_lim(1)=maxval(Xen%nbm)
    Dip%ib_lim(2)=minval(Xen%nbf)+1
    if (l_X_terminator) Dip%ib_lim(2)=Dip%ib(1)
diff --git a/src/dipoles/DIPOLE_driver.F b/src/dipoles/DIPOLE_driver.F
index 93acbf248d..86248224ca 100644
--- a/src/dipoles/DIPOLE_driver.F
+++ b/src/dipoles/DIPOLE_driver.F
@@ -22,17 +22,22 @@ subroutine DIPOLE_driver(Xen,Xk,Xq,Dip)
  ! 4) covariant approach    --> computes <nk|r|mk> in reciprocal space as <nk|partial_k|mk>
  !                              takes into account the phases. symmetries not yet implemented 
  !                              and then <v> as <r>*DeltaE
+ ! 4) der_k approach        --> computes <nk|r|mk> in reciprocal space as <nk|partial_k|mk>
+ !                              takes into account the phases. symmetries not yet implemented 
+ !                              and then <v> as <r>*DeltaE
  !
- ! Note that <P> is ALWAYS calculated using the g_space approach
+ ! Note that <P/v> are ALWAYS calculated using the g_space approach
  !
  use drivers,        ONLY:l_optics
  use pars,           ONLY:SP,schlen
+ use com,            ONLY:msg
  use electrons,      ONLY:levels,n_sp_pol
  use IO_m,           ONLY:io_DIP
  use R_lattice,      ONLY:bz_samp
- use DIPOLES,        ONLY:DIPOLE_t,DIP_iR,x_space_obs,g_space_obs,covariant_obs,shifted_grids_obs,&
-&                         Vnl_commutator_warning,DIP_P,DIP_v,DIP_spin,DIP_orbital,P_square
- use ALLOC,          ONLY:DIPOLE_ALLOC_global
+ use DIPOLES,        ONLY:DIPOLE_t,DIP_iR,DIP_P,DIP_v,DIP_spin,DIP_orbital,P_square,&
+&                         x_space_obs,g_space_obs,covariant_obs,der_k_obs,shifted_grids_obs,&
+&                         Vnl_commutator_warning,eval_OVERLAPS,l_force_SndOrd
+ use ALLOC,          ONLY:DIPOLE_ALLOC_global,DIPOLE_ALLOC_elemental
  use IO_int,         ONLY:IO_and_Messaging_switch
  use parallel_m,     ONLY:PAR_IND_DIPk_ibz,PAR_DIPk_nibz,PAR_COM_DIPk_ibz_A2A,&
 &                         PAR_IND_CON_BANDS_DIP,PAR_IND_VAL_BANDS_DIP
@@ -51,8 +56,8 @@ subroutine DIPOLE_driver(Xen,Xk,Xq,Dip)
  !
  ! Work Space
  !
- integer           :: ik,io_err
- character(schlen) :: msg
+ integer           :: ik,io_err,io_err_Overlaps
+ character(schlen) :: message
  logical           :: l_warning,direction_is_todo(3)
  !
  call section('*','Dipoles')
@@ -77,9 +82,15 @@ subroutine DIPOLE_driver(Xen,Xk,Xq,Dip)
  io_err=-1
  call DIPOLE_IO(Xk,Xen,Dip,'check',io_err,'DIP')
  !
+ io_err_Overlaps=-1
+ if (eval_OVERLAPS) then
+   call OVERLAPS_IO(Xk,Xen,Dip,'check',io_err_Overlaps,'DIP')
+   if (io_err_Overlaps==0)  call OVERLAPS_IO(Xk,Xen,Dip,'read',io_err_Overlaps,'DIP')
+ endif
+ !
  ! In case dipoles were not found/ok then I need to compute them
  !==============================================================
- if (io_err/=0) then
+ if (io_err/=0 .or. (eval_Overlaps.and.io_err_Overlaps/=0) ) then
    !
    ! I/O privilegies: temporarly switch it on
    !
@@ -95,16 +106,34 @@ subroutine DIPOLE_driver(Xen,Xk,Xq,Dip)
    !
    call PARALLEL_WF_index(COMM=PAR_COM_DIPk_ibz_A2A)
    !
+ endif
+ !
+ ! Overlaps (DIP_S)
+ !
+ if (eval_Overlaps.and.io_err_Overlaps/=0) then
+   !
+   ! Notice that DIP_S is defined in the BZ
+   !
+   call DIPOLE_ALLOC_elemental('DIP_S',(/Dip%ib(2),Dip%ib(2),6,Xk%nbz/))
+   !
+   call DIPOLE_overlaps(Xk,Dip)
+   !
+ endif
+ !
+ if (io_err/=0) then
+   !
+   ! <R>, <V>, <P>, <P_spinor>, <P^2>
+   !
    ! Allocation
    !
    call DIPOLE_ALLOC_global(Dip,PAR_DIPk_nibz)
    !
-   ! <R>, <V>, <P>, <P_spinor>, <P^2> S_overlap
-   !
    if (len_trim(shifted_grids_obs)>0)              call DIPOLE_shifted_grids(Xen,Xk,Dip)
    !
    if (len_trim(covariant_obs)>0)                  call DIPOLE_covariant(Xen,Xk,Dip,direction_is_todo)
    !
+   if (len_trim(der_k_obs)>0)                      call DIPOLE_build_der_k(Xen,Xk,Dip,direction_is_todo)
+   !
    if (len_trim(x_space_obs)>0)                    call DIPOLE_x_real_space(Xen,Xk,Dip,direction_is_todo)
    !
    if (len_trim(g_space_obs)>0)                    call DIPOLE_g_space(Xen,Xk,Dip)
@@ -113,11 +142,9 @@ subroutine DIPOLE_driver(Xen,Xk,Xq,Dip)
    !
    call DIPOLES_ppredux_and_symmetrize()
    !
-#if defined _RT
-   if (STRING_match(Dip%computed,"M_orb"))         call DIPOLE_orbital_magnetization(Xen,Xk,Dip)
-#else
-   if (STRING_match(Dip%computed,"M_CD_orb"))      call DIPOLE_orb_magn_forCD(Xen,Xk,Dip)
-#endif
+   if (STRING_match(Dip%computed,"M_orbRT"))       call DIPOLE_orbital_magnetization_rt(Xen,Xk,Dip)
+   !
+   if (STRING_match(Dip%computed,"M_orbCD"))       call DIPOLE_orbital_magnetization_cd(Xen,Xk,Dip) 
    !
    call DIPOLES_ppredux_and_symmetrize_orbt()
    !
@@ -125,6 +152,8 @@ subroutine DIPOLE_driver(Xen,Xk,Xq,Dip)
    !
    call DIPOLE_IO(Xk,Xen,Dip,'write',io_err,'DIP')
    !
+   if (eval_Overlaps.and.io_err_Overlaps/=0)        call OVERLAPS_IO(Xk,Xen,Dip,'write',io_err_Overlaps,'DIP')
+   !
    ! I/O privilegies: RESTORE to previous values
    !
    call IO_and_Messaging_switch("RESTORE")
@@ -138,10 +167,10 @@ subroutine DIPOLE_driver(Xen,Xk,Xq,Dip)
  ! Warn about missing [Sigma,r] commutator
  !
  if (      Dip%Vnl_included .and.l_warning.and.(len_trim(g_space_obs)>0.or.Dip%force_v_g_space)) then
-   msg=' [r,Vnl^pseudo] included in'
-   if (.not.len_trim(g_space_obs)>0) msg=trim(msg)//' velocity dipoles.'
-   if (     len_trim(g_space_obs)>0) msg=trim(msg)//' position and velocity dipoles.'
-   call warning(trim(msg))
+   message='[r,Vnl^pseudo] included in'
+   if (.not.len_trim(g_space_obs)>0) message=trim(message)//' velocity dipoles.'
+   if (     len_trim(g_space_obs)>0) message=trim(message)//' position and velocity dipoles.'
+   call msg('s',trim(message))
    call warning(' In case H contains other non local terms, these are neglected ')
    Vnl_commutator_warning=.TRUE.
  endif
@@ -149,10 +178,10 @@ subroutine DIPOLE_driver(Xen,Xk,Xq,Dip)
  ! Warn about missing [Vnl,r] commutator
  !
  if ((.not.Dip%Vnl_included).and.l_warning) then
-   msg=' [r,Vnl^pseudo] not included in'
-   if (.not.len_trim(g_space_obs)>0) msg=trim(msg)//' velocity dipoles'
-   if (     len_trim(g_space_obs)>0) msg=trim(msg)//' position and velocity dipoles'
-   call warning(trim(msg))
+   message=' [r,Vnl^pseudo] not included in'
+   if (.not.len_trim(g_space_obs)>0) message=trim(message)//' velocity dipoles'
+   if (     len_trim(g_space_obs)>0) message=trim(message)//' position and velocity dipoles'
+   call warning(trim(message))
    call warning(' In case H contains other non local terms, also these are neglected ')
    Vnl_commutator_warning=.TRUE.
  endif
@@ -161,6 +190,10 @@ subroutine DIPOLE_driver(Xen,Xk,Xq,Dip)
  !
  if (.not.io_DIP) return
  !
+ ! Overlaps
+ call DIPOLE_ALLOC_elemental('DIP_S')
+ !
+ ! Dipoles
  call DIPOLE_ALLOC_global()
  !
  contains
@@ -262,10 +295,9 @@ subroutine DIPOLES_ppredux_and_symmetrize_orbt()
     integer  :: ic,iv,i_sp_pol,ik,ik_mem,i_dips,n_dips
     !
     n_dips=0
-    if (STRING_match(Dip%computed,"M_orb").and.STRING_match(Dip%computed,"M_it")) then
-      n_dips=2
-    else if (STRING_match(Dip%computed,"M_orb").or.STRING_match(Dip%computed,"M_CD_orb")) then
-      n_dips=1
+    if (STRING_match(Dip%computed,"M_orb")) then
+      if(     STRING_match(Dip%computed,"M_it")) n_dips=2
+      if(.not.STRING_match(Dip%computed,"M_it")) n_dips=1
     endif
     !
     do i_dips=1,n_dips
diff --git a/src/dipoles/DIPOLE_g_space.F b/src/dipoles/DIPOLE_g_space.F
index 6febc9f37f..d3ee3b5173 100644
--- a/src/dipoles/DIPOLE_g_space.F
+++ b/src/dipoles/DIPOLE_g_space.F
@@ -5,7 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
 #include<dev_defs.h>
+#include<y_memory.h>
+!
 !
 subroutine DIPOLE_g_space(Xen,Xk,Dip)
  !
@@ -32,8 +36,9 @@ subroutine DIPOLE_g_space(Xen,Xk,Dip)
  use gpu_m,               ONLY:have_gpu,gpu_devsync
  use devxlib,             ONLY:devxlib_memcpy_h2d,devxlib_memset_d
  use stderr,              ONLY:STRING_match
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp), intent(in)    :: Xk
  type(levels),  intent(in)    :: Xen
@@ -49,7 +54,7 @@ subroutine DIPOLE_g_space(Xen,Xk,Dip)
  complex(SP), allocatable DEV_ATTR :: kbv_d(:,:,:,:)
  complex(SP), allocatable DEV_ATTR :: XX(:,:,:,:,:)
  complex(SP)                  :: rho_spinor(3,n_spinor,n_spinor),rho(3),P2
- real(SP)                     :: Ev_m_Ev_KS,Dipole_Energy_treshold
+ real(SP)                     :: Ev_m_Ev_KS,Dipole_Energy_threshold
  logical                      :: LT_msg=.FALSE.
  !
  !I/O and external functions
@@ -277,9 +282,9 @@ subroutine DIPOLE_g_space(Xen,Xk,Dip)
          ! define a different thr for val-val transitions
          ! used for XTerm (here Ev_m_Ev_KS may become very large)
          !
-         Dipole_Energy_treshold=Dip%Energy_treshold
+         Dipole_Energy_threshold=Dip%Energy_threshold
          ! DEBUG <
-         ! if(l_X_terminator.and.ic<=Dip%ib_lim(1)) Dipole_Energy_treshold=Dip%Energy_treshold_vv
+         ! if(l_X_terminator.and.ic<=Dip%ib_lim(1)) Dipole_Energy_threshold=Dip%Energy_threshold_vv
          ! DEBUG >
          !
          !=====
@@ -312,8 +317,8 @@ subroutine DIPOLE_g_space(Xen,Xk,Dip)
            if (.not.allocated(Xen%Eo)) Ev_m_Ev_KS=Xen%E (iv,ik,i_sp_pol)-Xen%E (ic,ik,i_sp_pol)
            if (     allocated(Xen%Eo)) Ev_m_Ev_KS=Xen%Eo(iv,ik,i_sp_pol)-Xen%Eo(ic,ik,i_sp_pol)
            !
-           if (abs(Ev_m_Ev_KS)> Dipole_Energy_treshold) DIP_iR(:,ic,iv,ik_mem,i_sp_pol)=rho/Ev_m_Ev_KS
-           if (abs(Ev_m_Ev_KS)<=Dipole_Energy_treshold) DIP_iR(:,ic,iv,ik_mem,i_sp_pol)=cZERO
+           if (abs(Ev_m_Ev_KS)> Dipole_Energy_threshold) DIP_iR(:,ic,iv,ik_mem,i_sp_pol)=rho/Ev_m_Ev_KS
+           if (abs(Ev_m_Ev_KS)<=Dipole_Energy_threshold) DIP_iR(:,ic,iv,ik_mem,i_sp_pol)=cZERO
            !
          endif
          !
diff --git a/src/dipoles/DIPOLE_kb_init.F b/src/dipoles/DIPOLE_kb_init.F
index a3fd500572..24ac4a302f 100644
--- a/src/dipoles/DIPOLE_kb_init.F
+++ b/src/dipoles/DIPOLE_kb_init.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine DIPOLE_kb_init(Xen,Dip,io_err,ID)
  !
  use pars,                ONLY:SP,pi
@@ -17,13 +21,14 @@ subroutine DIPOLE_kb_init(Xen,Dip,io_err,ID)
  use D_lattice,           ONLY:n_atomic_species,n_atoms_species
  use pseudo,              ONLY:pp_kbv_dim,pp_n_l_times_proj_max,pp_table, &
 &                              pp_factor,pp_n_l_max
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels),  intent(in)    :: Xen
  type(DIPOLE_t),intent(inout) :: Dip
  !
- integer                      :: io_err(3)
+ integer                      :: io_err(2)
  integer                      :: ID
  !
  real(SP)                     :: j,mj
@@ -136,7 +141,6 @@ subroutine DIPOLE_kb_init(Xen,Dip,io_err,ID)
  !
  io_err(1)=io_KB_abinit_err
  io_err(2)=io_KB_pwscf_err
- io_err(3)=-1
  !
  if ( pp_kbv_dim>0 ) call msg('s','[x,Vnl] computed using '//trim(intc(pp_kbv_dim))//' projectors')
  if ( pp_kbv_dim>50) call warning(' [x,Vnl] slows the Dipoles computation. To neglect it rename the ns.kb_pp file')
diff --git a/src/dipoles/DIPOLE_kb_sum.F b/src/dipoles/DIPOLE_kb_sum.F
index 060be44b4c..e742077bd4 100644
--- a/src/dipoles/DIPOLE_kb_sum.F
+++ b/src/dipoles/DIPOLE_kb_sum.F
@@ -5,8 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): AM DS AF
 !
+! headers
+!
 #include<dev_defs.h>
 !
+!
 subroutine DIPOLE_kb_project(pp_range,ib_range,ib,i_wf,wf_ncx,nbndx,npp,&
 &                                     pp_dim_atom,pp_dim_yambo,WF,kbv,XX)
  !
@@ -73,7 +76,6 @@ subroutine DIPOLE_kb_sum(rho,pp_dim_yambo,ib_range,iv,ic,XX)
  use devxlib,       ONLY:devxlib_memcpy_d2h
  use DIPOLES,       ONLY:drho=>DEV_VAR(DIP_work_drho)
  !
-#include<dev_defs.h>
  !
  implicit none
  !
@@ -106,7 +108,14 @@ subroutine DIPOLE_kb_sum(rho,pp_dim_yambo,ib_range,iv,ic,XX)
  !DEV_ACC_DEBUG end data
  !
  call devxlib_memcpy_d2h(drho_loc,drho)
- forall(i1=1:3) rho(i1)=rho(i1)+sum(drho_loc(:,i1,:))
+ !
+ ! Code commented due to fail with intel compiler
+ ! See issue https://gitlab.com/lumen-code/lumen/-/issues/244
+ ! Also many other do concurrent reverted to forall. Only one left is here
+ !do concurrent (i1=1:3) 
+ !  rho(i1)=rho(i1)+sum(drho_loc(:,i1,:))
+ !enddo
+ forall (i1=1:3) rho(i1)=rho(i1)+sum(drho_loc(:,i1,:))
  !
 end subroutine DIPOLE_kb_sum
 
diff --git a/src/dipoles/DIPOLE_orb_magn_forCD.F b/src/dipoles/DIPOLE_orbital_magnetization_cd.F
similarity index 83%
rename from src/dipoles/DIPOLE_orb_magn_forCD.F
rename to src/dipoles/DIPOLE_orbital_magnetization_cd.F
index 059b65640f..fc0d0871b0 100644
--- a/src/dipoles/DIPOLE_orb_magn_forCD.F
+++ b/src/dipoles/DIPOLE_orbital_magnetization_cd.F
@@ -5,7 +5,7 @@
 !
 ! Authors (see AUTHORS file for details): DS
 !
-subroutine Dipole_orb_magn_forCD(Xen,Xk,Dip)
+subroutine Dipole_orbital_magnetization_cd(Xen,Xk,Dip)
  !
  ! This routine returns <L>
  !
@@ -52,6 +52,9 @@ subroutine Dipole_orb_magn_forCD(Xen,Xk,Dip)
      !
      ik_mem=PAR_DIPk_ibz_index(ik)
      ! 
+     !> @var ib(1)  lowest valence, ib(2)     upper conduction
+     !> @var ib_lim(1) top valence, ib_lim(2) bottom conduction
+     !
      do iv=Dip%ib(1),Dip%ib_lim(1) 
        !
        if(.not.PAR_IND_VAL_BANDS_DIP%element_1D(iv)) cycle
@@ -62,12 +65,13 @@ subroutine Dipole_orb_magn_forCD(Xen,Xk,Dip)
          !
          do ib=Dip%ib(1),Dip%ib(2)
            !
-           DIP_x_loc(:)=-cI*DIP_iR(:,ib,iv,ik_mem,i_sp_pol)
-           
+           ! conjg because of how cross_product is coded in mod_vec_operate.F
+           DIP_x_loc(:)=conjg(-cI*DIP_iR(:,ib,iv,ik_mem,i_sp_pol))
+           !
            DIP_v_loc(:)=DIP_v(:,ic,ib,ik_mem,i_sp_pol)
            !
            DIP_orbital(:,ic,iv,ik_mem,i_sp_pol,1)=&
-&                     DIP_orbital(:,ic,iv,ik_mem,i_sp_pol,1)+cross_product(DIP_x_loc,DIP_v_loc)
+&                   DIP_orbital(:,ic,iv,ik_mem,i_sp_pol,1)+cross_product(DIP_x_loc,DIP_v_loc)
            !
            ! DEBUG <
            !write(*,*) "[DIP] writing DIP_orbital(..,ic,iv,ik,,) values"
@@ -88,4 +92,4 @@ subroutine Dipole_orb_magn_forCD(Xen,Xk,Dip)
  !
  call timing("DIPOLE_orbital_M_CD",OPR="stop")
  !
-end subroutine Dipole_orb_magn_forCD
+end subroutine Dipole_orbital_magnetization_cd
diff --git a/src/dipoles/DIPOLE_orbital_magnetization.F b/src/dipoles/DIPOLE_orbital_magnetization_rt.F
similarity index 74%
rename from src/dipoles/DIPOLE_orbital_magnetization.F
rename to src/dipoles/DIPOLE_orbital_magnetization_rt.F
index dea635a76e..50d36c42c7 100644
--- a/src/dipoles/DIPOLE_orbital_magnetization.F
+++ b/src/dipoles/DIPOLE_orbital_magnetization_rt.F
@@ -5,9 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): DS
 !
-subroutine Dipole_orbital_magnetization(Xen,Xk,Dip)
+subroutine Dipole_orbital_magnetization_rt(Xen,Xk,Dip)
  !
  ! This routine returns <L>
+ ! Using PRB 74, 024408 (2006)
  !
  use pars,                ONLY:SP,cI
  use LIVE_t,              ONLY:live_timing
@@ -33,13 +34,13 @@ subroutine Dipole_orbital_magnetization(Xen,Xk,Dip)
  real(SP)                     :: Eb,Ev,Ec
  complex(SP)                  :: DIP_pos(3,2),DIP_tmp(3)
  !
- call timing("DIPOLE_orbital_M",OPR="start")
+ call timing("DIPOLE_orbital_M_RT",OPR="start")
  !
  n_LT_steps=n_sp_pol*PAR_IND_DIPk_ibz%n_of_elements(PAR_IND_DIPk_ibz_ID+1)*&
 &                    PAR_IND_VAL_BANDS_DIP%n_of_elements(PAR_IND_VAL_BANDS_DIP_ID+1)*&
 &                    PAR_IND_CON_BANDS_DIP%n_of_elements(PAR_IND_CON_BANDS_DIP_ID+1)
  !
- if (n_LT_steps>0) call live_timing('Orbital magnetization',n_LT_steps)
+ if (n_LT_steps>0) call live_timing('Orbital magnetization (RT)',n_LT_steps)
  !
  ! Main loop over k in IBZ
  !
@@ -72,12 +73,17 @@ subroutine Dipole_orbital_magnetization(Xen,Xk,Dip)
            Eb=Xen%E(ib,ik,i_sp_pol)
            if (allocated(Xen%Eo)) Eb=Xen%Eo(ib,ik,i_sp_pol)
            !
-           if (iv< ib) DIP_pos(:,1)=conjg(-cI*DIP_iR(:,ib,iv,ik_mem,i_sp_pol))
-           if (iv>=ib) DIP_pos(:,1)=      -cI*DIP_iR(:,iv,ib,ik_mem,i_sp_pol)
-           if (ic< ib) DIP_pos(:,2)=conjg(-cI*DIP_iR(:,ib,ic,ik_mem,i_sp_pol))
-           if (ic>=ib) DIP_pos(:,2)=      -cI*DIP_iR(:,ic,ib,ik_mem,i_sp_pol)
+           !if (iv< ib) DIP_pos(:,1)=conjg(-cI*DIP_iR(:,ib,iv,ik_mem,i_sp_pol))
+           !if (iv>=ib) DIP_pos(:,1)=      -cI*DIP_iR(:,iv,ib,ik_mem,i_sp_pol)
+           !if (ic< ib) DIP_pos(:,2)=conjg(-cI*DIP_iR(:,ib,ic,ik_mem,i_sp_pol))
+           !if (ic>=ib) DIP_pos(:,2)=      -cI*DIP_iR(:,ic,ib,ik_mem,i_sp_pol)
+
+           ! conjg for first r because of how cross_product is coded in mod_vec_operate.F
+           DIP_pos(:,1)= conjg(-cI*DIP_iR(:,ib,iv,ik_mem,i_sp_pol))
+           DIP_pos(:,2)= -cI*DIP_iR(:,ic,ib,ik_mem,i_sp_pol)
            !
-           DIP_tmp=-cI*cross_product(DIP_pos(:,1),DIP_pos(:,2))
+           !DIP_tmp=-cI*cross_product(DIP_pos(:,1),DIP_pos(:,2))
+           DIP_tmp=cI*cross_product(DIP_pos(:,1),DIP_pos(:,2))
            !
            ! Local part
            !
@@ -100,6 +106,6 @@ subroutine Dipole_orbital_magnetization(Xen,Xk,Dip)
  !
  call live_timing()
  !
- call timing("DIPOLE_orbital_M",OPR="stop")
+ call timing("DIPOLE_orbital_M_RT",OPR="stop")
  !
-end subroutine Dipole_orbital_magnetization
+end subroutine Dipole_orbital_magnetization_rt
diff --git a/src/dipoles/DIPOLE_overlaps.F b/src/dipoles/DIPOLE_overlaps.F
index 2f7a6968f8..3b1b7cfca5 100644
--- a/src/dipoles/DIPOLE_overlaps.F
+++ b/src/dipoles/DIPOLE_overlaps.F
@@ -4,21 +4,28 @@
 ! Copyright (C) 2015 The Yambo Team
 !
 ! Authors (see AUTHORS file for details): MG CA
+!
+! headers
+!
+#include<dev_defs.h>
+#include<y_memory.h>
 !      
 !> @brief Calculate overlaps between wave-function <\psi_k | \psi_{k+q} > for q = 1,2
 !
+!! @param[it]       Xk                  k-points sampling for the xhi
+!! @param[it]       Dip                 Dip dipoles range
 !! @param[out]      DIP_S               Overlap matrices
 !
-#include<dev_defs.h>
 !
 subroutine DIPOLE_overlaps(Xk,Dip)
  !    
  use pars,              ONLY:SP,cZERO,pi
  use LIVE_t,            ONLY:live_timing
  use R_lattice,         ONLY:k_map,bz_samp,WF_shifts,G_m_G
- use wave_func,         ONLY:WF
+ use wave_func,         ONLY:WF,WF_load_mode
  use vec_operate,       ONLY:k_periodic_idx
  use DIPOLES,           ONLY:DIPOLE_t,DIP_S
+ use ALLOC,             ONLY:DIPOLE_ALLOC_elemental
  use electrons,         ONLY:n_spinor,n_sp_pol
  use wrapper,           ONLY:Vstar_dot_V_gpu
  use wave_func,         ONLY:wf_ng_1st_BZ,wf_ng_overlaps,wf_ng
@@ -29,8 +36,9 @@ subroutine DIPOLE_overlaps(Xk,Dip)
  use interfaces,        ONLY:WF_shift_kpoint,eval_G_minus_G,WF_load,WF_free
  use gpu_m,             ONLY:have_gpu
  use timing_m,          ONLY:timing
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp), intent(in)    :: Xk 
  type(DIPOLE_t),intent(in)    :: Dip
@@ -51,12 +59,23 @@ subroutine DIPOLE_overlaps(Xk,Dip)
  !
  call timing("DIPOLE_overlaps",OPR="start")
  !
+ if(.not.allocated(k_map%k_map_dir)) call k_map_nearest_by_dir(Xk,k_map)
+ if(.not.allocated(Xk%k_table))      call k_build_up_BZ_tables(Xk)
+ !
+ DIP_S=cZERO
+ !
  call k_find_smallest_g0(g0_idx,g0_length)
- iG0=max(maxval(g0_idx(:,1)),maxval(g0_idx(:,2)))
+ iG0=maxval(g0_idx)
  !
  ! Generate G_m_G of the needed size
  !
  YAMBO_FREE(G_m_G)
+ ng_tmp=eval_G_minus_G(iG0,iG0,opr_in="+")
+ !
+ ! New iG0 for composed shifts
+ iG0=maxval(G_m_G)
+ !
+ YAMBO_FREE(G_m_G)
  ng_tmp=eval_G_minus_G(wf_ng_1st_BZ,iG0)
  !
  wf_ng_save       =wf_ng
@@ -75,14 +94,17 @@ subroutine DIPOLE_overlaps(Xk,Dip)
  call PARALLEL_WF_distribute(K_index=PAR_IND_OVLPk_ibz,CLEAN_UP=.TRUE.)
  !
  call PARALLEL_WF_index( )
+ !
+ if (trim(WF_load_mode)=="all") call WF_load(WF,0,1,(/1,Dip%ib(2)/),(/1,Xk%nibz/),space='G',title='-Overlaps')
+
  !
  YAMBO_ALLOC_GPU(WF_symm,   (wf_ng_1st_BZ,n_spinor,1))
  YAMBO_ALLOC_GPU(WF_ikp,    (wf_ng_overlaps,n_spinor,1))
  YAMBO_ALLOC_GPU(WF_ik,     (wf_ng_overlaps,n_spinor,Dip%ib(2)))
  !
-!$OMP WORKSHARE
+!$OMP PARALLEL WORKSHARE
  DIP_S=cZERO
-!$OMP END WORKSHARE
+!$OMP END PARALLEL WORKSHARE
  !
  call live_timing("Overlaps",PAR_IND_DIPk_bz%n_of_elements(PAR_IND_DIPk_bz_ID+1)*n_sp_pol)
  !
@@ -97,8 +119,10 @@ subroutine DIPOLE_overlaps(Xk,Dip)
      ik = Xk%sstar(ikbz,1)
      is = Xk%sstar(ikbz,2)
      !
-     call WF_load(WF,0,1,(/1,Dip%ib(2)/),(/ik,ik/),(/i_sp_pol,i_sp_pol/),&
-     &            space='G',title='-Oscill/G space/Overlaps',quiet=.true.)
+     if (trim(WF_load_mode)=="on-the-fly") then
+       call WF_load(WF,0,1,(/1,Dip%ib(2)/),(/ik,ik/),(/i_sp_pol,i_sp_pol/),&
+       &            space='G',title='-Oscill/G space/Overlaps',quiet=.true.)
+     endif
      !
      do ib=1,Dip%ib(2)
        !
@@ -112,7 +136,7 @@ subroutine DIPOLE_overlaps(Xk,Dip)
        !
      enddo
      !
-     call WF_free(WF)
+     if (trim(WF_load_mode)=="on-the-fly") call WF_free(WF)
      !
      do id=1,3  ! Loop on the three crystal directions in the BZ
        !
@@ -136,8 +160,10 @@ subroutine DIPOLE_overlaps(Xk,Dip)
          ikp = Xk%sstar(ikbzp,1)
          isp = Xk%sstar(ikbzp,2)
          !
-         call WF_load(WF,0,1,(/1,Dip%ib(2)/),(/ikp,ikp/),(/i_sp_pol,i_sp_pol/),&
-         &              space='G',title='-Oscill/G space/Overlaps',quiet=.true.)
+         if (trim(WF_load_mode)=="on-the-fly") then
+           call WF_load(WF,0,1,(/1,Dip%ib(2)/),(/ikp,ikp/),(/i_sp_pol,i_sp_pol/),&
+           &              space='G',title='-Oscill/G space/Overlaps',quiet=.true.)
+         endif
          !
          ! Shift the wave-function by a G-vector if the neighbor is out of the BZ (USE_shifed_wf=.true.)
          shift=WF_shifts(ikbzp,:)
@@ -151,14 +177,17 @@ subroutine DIPOLE_overlaps(Xk,Dip)
            ! 2) the orignal points ikbz that corresponds to (k+q) was 
            !    shifted in the opposite direction the two
            !    shifts cancel out and I get shift==0 ==> WF_shifts==-1
-           ! 3) the orignal points ibbz that corresponds to (k+q) was
+           ! 3) the orignal points ikbz that corresponds to (k+q) was
            !    already shifted in the right direction I keep this shift
            !    case WF_shifts==1    
            ! 
            ! see also comments in src/bz_ops/k_map_nearest_by_dir.F 
            ! 
+           ! WF_shifts==1 --> kpt(id) < 0
            if(WF_shifts(ikbzp,id)==1) shift(id)=1
+           ! WF_shifts==0 --> 0 < kpt(id) < 1
            if(WF_shifts(ikbzp,id)==0) shift(id)=1
+           ! WF_shifts==-1 --> kpt(id) > 1
            if(WF_shifts(ikbzp,id)==-1) shift(id)=0
            !
          endif
@@ -176,7 +205,7 @@ subroutine DIPOLE_overlaps(Xk,Dip)
            enddo ! ibp
          enddo ! ib
          !
-         call WF_free(WF)
+         if (trim(WF_load_mode)=="on-the-fly") call WF_free(WF)
          !
        enddo ! istep
      enddo ! idir
@@ -210,7 +239,7 @@ subroutine DIPOLE_overlaps(Xk,Dip)
  YAMBO_FREE(WF_ik)
  YAMBO_FREE(WF_ikp)
  !
- call WF_free(WF)
+ if (trim(WF_load_mode)=="all") call WF_free(WF)
  !
  call timing("DIPOLE_overlaps",OPR="stop")
  !
diff --git a/src/dipoles/DIPOLE_p_matrix_elements.F b/src/dipoles/DIPOLE_p_matrix_elements.F
index f2c8e0f1ca..dbbf9940e3 100644
--- a/src/dipoles/DIPOLE_p_matrix_elements.F
+++ b/src/dipoles/DIPOLE_p_matrix_elements.F
@@ -5,6 +5,8 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
 #include<dev_defs.h>
 !
 subroutine DIPOLE_p_matrix_elements(Xk,iv_wf,ic_wf,ik,rho,rho_spinor,P2,l_eval_P2,l_eval_M_spin)
diff --git a/src/dipoles/DIPOLE_setup.F b/src/dipoles/DIPOLE_setup.F
index f4a70422b2..ba67952e67 100644
--- a/src/dipoles/DIPOLE_setup.F
+++ b/src/dipoles/DIPOLE_setup.F
@@ -9,9 +9,10 @@ subroutine DIPOLE_setup(Dip)
  !
  use drivers,        ONLY:l_real_time,l_sc_run,l_nl_optics
  use DIPOLES,        ONLY:DIPOLE_t,x_space_obs,g_space_obs,covariant_obs,shifted_grids_obs,&
-&                         specific_obs,eval_OVERLAPS
+&                         der_k_obs,specific_obs,eval_OVERLAPS
  use IO_m,           ONLY:io_DIP
- use BS,             ONLY:BSE_prop,l_BS_magnons,l_BS_dichroism,l_BS_photolum
+ use BS,             ONLY:BSE_prop,l_BS_magnons,l_BS_dichroism,l_BS_photolum,&
+&                         l_BS_mespin,l_BS_meorb
  use electrons,      ONLY:n_spin,n_spinor
  use parser_m,       ONLY:parser
  use stderr,         ONLY:STRING_same,STRING_match
@@ -25,6 +26,7 @@ subroutine DIPOLE_setup(Dip)
  !
  ! Work Space
  !
+ logical           :: l_flag
 #if defined _NL
  logical           :: l_dip_appr_from_user
 #endif
@@ -45,10 +47,21 @@ subroutine DIPOLE_setup(Dip)
  l_BS_magnons=   (index(BSE_prop,'magn')/=0)
  l_BS_dichroism= (index(BSE_prop,'dich')/=0)
  l_BS_photolum=  (index(BSE_prop,'photolum')/=0)
+ l_BS_mespin=    (index(BSE_prop,'MEspin')/=0)
+ l_BS_meorb=     (index(BSE_prop,'MEorb')/=0)
+ call parser('ImposeAsymBSE',l_flag)
+ if(.not.l_flag) call parser('ImposeFullBSE',l_flag)
  !
  ! Bands ordering
  !----------------
- if (l_BS_dichroism.or.l_BS_magnons .or. l_BS_photolum) Dip%bands_ordered=.false.
+ if (l_BS_dichroism.or. &
+&    l_BS_magnons  .or. &
+&    l_BS_photolum .or. &
+&    l_flag        .or. &
+&    l_BS_meorb) then
+  Dip%bands_ordered=.false.
+endif
+ !
 #if defined _SLEPC && !defined _NL
  !if ( index(BSS_mode,'s')/=0 )  Dip%bands_ordered=.false.
 #endif
@@ -97,6 +110,10 @@ subroutine DIPOLE_setup(Dip)
    covariant_obs="S R V"
    if (Dip%force_v_g_space) covariant_obs="S R"
    eval_OVERLAPS=.true.
+ else if (STRING_same(Dip%approach,"derk")) then
+   der_k_obs="S R V"
+   if (Dip%force_v_g_space) der_k_obs="S R"
+   eval_OVERLAPS=.true.
  else if (STRING_same(Dip%approach,"shifted grids")) then
    shifted_grids_obs="R V"
    if (Dip%force_v_g_space) shifted_grids_obs="R"
@@ -114,9 +131,11 @@ subroutine DIPOLE_setup(Dip)
 #if defined _RT
  if (l_real_time) then
    g_space_obs=trim(g_space_obs)//" P2"
+ else
+#endif
+   if (STRING_match(Dip%computed,"p2")) g_space_obs=trim(g_space_obs)//" P2"
+#if defined _RT
  endif
-#else
- if (STRING_match(Dip%computed,"p2")) g_space_obs=trim(g_space_obs)//" P2"
 #endif
  !
  ! ... P_spinor
@@ -127,23 +146,29 @@ subroutine DIPOLE_setup(Dip)
    if (STRING_match(Dip%computed,"spin").or.l_BS_magnons) g_space_obs=trim(g_space_obs)//" P_spinor"
 #endif
  endif
- ! ...M_spin, M_orb, M_iti, M_orb_CD
+ ! ...M_spin, M_orb (CD or RT version), M_iti
  specific_obs=""
  if (n_spin>1) then
-   if (STRING_match(Dip%computed,"spin").or.l_BS_magnons) specific_obs=" M_spin"
-   if (STRING_match(Dip%computed,"orb")) specific_obs=trim(specific_obs)//" M_orb M_it"
+   if (STRING_match(Dip%computed,"spin").or.&
+       l_BS_magnons.or.&
+       l_BS_mespin) specific_obs=" M_spin"
+   if (STRING_match(Dip%computed,"orb")) then 
+     specific_obs=trim(specific_obs)//" M_orbRT M_it"
+   endif
 #if defined _RT
    ! Always force all dipoles in real-time case
-   if (l_real_time) specific_obs=" M_spin M_orb M_it"
+   if (l_real_time) specific_obs=" M_spin M_orbRT M_it"
 #endif
  endif
- if (l_BS_dichroism) specific_obs=trim(specific_obs)//" M_CD_orb"
+ if (l_BS_dichroism) specific_obs=trim(specific_obs)//" M_orbCD"
+ if (l_BS_meorb) specific_obs=trim(specific_obs)//" M_orbRT M_it"
  ! 
  ! Computed components
  !---------------------
  Dip%computed=" "
  if (len_trim(x_space_obs  )    >0) Dip%computed=trim(x_space_obs)//" [X-space]"
- if (len_trim(covariant_obs)    >0) Dip%computed=trim(covariant_obs)//" [covarian]"
+ if (len_trim(covariant_obs)    >0) Dip%computed=trim(covariant_obs)//" [covariant]"
+ if (len_trim(der_k_obs)        >0) Dip%computed=trim(der_k_obs)//" [derk]"
  if (len_trim(shifted_grids_obs)>0) Dip%computed=trim(shifted_grids_obs)//" [shift-grids]"
  if (len_trim(Dip%computed)==0) then
    Dip%computed=trim(g_space_obs)//" [G-space]"
diff --git a/src/dipoles/DIPOLE_shifted_grids.F b/src/dipoles/DIPOLE_shifted_grids.F
index 8e626eb522..22ba8c41f9 100644
--- a/src/dipoles/DIPOLE_shifted_grids.F
+++ b/src/dipoles/DIPOLE_shifted_grids.F
@@ -5,8 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): AM CH DS
 !
+! headers
+!
 #include<dev_defs.h>
 !
+!
 subroutine DIPOLE_shifted_grids(Xen,Xk,Dip)
  !
  ! Using the wfc at the k,k-q points to construct <r>
@@ -61,16 +64,16 @@ subroutine DIPOLE_shifted_grids(Xen,Xk,Dip)
  !
  character(schlen)            :: grid_paths_vector(3)
  logical                      :: idir_not_done(3)
- integer                      :: ik,ik_mem,ic_wf,iv_wf,iv_wf_sh,i_grp,ic,iv,iv_sh,iq,n_LT_steps,&
-&                                i_spin,i_spinor,i_sp_pol,ir,grid_order(3),ic_min
+ integer                      :: ik,ik_mem,ic_wf,iv_wf,iv_wf_sh,i_grp,ic,iv,in_d,iq,n_LT_steps,&
+&                                i_spinor,i_sp_pol,ir,grid_order(3),ic_min
  integer                      :: first_el(Dip%ib_lim(1)-Dip%ib(1)+1),n_of_el(Dip%ib_lim(1)-Dip%ib(1)+1),n_deg_grp
- complex(SP)                  :: rho,DIP_rotate(Dip%ib(1):Dip%ib_lim(1),Dip%ib(1):Dip%ib_lim(1)),&
-&                                DIP_tmp(Dip%ib_lim(2):Dip%ib(2),Dip%ib(1):Dip%ib_lim(1))
+ complex(SP)                  :: rho,DIP_O(Dip%ib(1):Dip%ib_lim(1),Dip%ib(1):Dip%ib_lim(1)),&
+&                                DIP_S(Dip%ib_lim(2):Dip%ib(2),Dip%ib(1):Dip%ib_lim(1)),DIP_D(2)
  real(SP)                     :: Ev_m_Ec_KS,q_mod(3)
  !
  !ioDB1 shifted grids
  !
- type(levels)                 :: E_shift
+ type(levels)                 :: E_shift(3)
  type(bz_samp)                :: Xk_shift
  type(WAVEs)                  :: WF_shifted
  !
@@ -100,6 +103,7 @@ subroutine DIPOLE_shifted_grids(Xen,Xk,Dip)
  grid_order=0
  !
  if(.not.Dip%force_v_g_space) Dip%Vnl_included=.true.
+ ! if (STRING_match(shifted_grids_obs,"V")) Dip%Vnl_included=.true.
  !
  if ( .not.all(PAR_IND_VAL_BANDS_DIP%element_1D(Dip%ib(1):Dip%ib_lim(1))) ) &
 &  call error(' DIPOLES Parallelization over v bands not accepted')
@@ -110,9 +114,11 @@ subroutine DIPOLE_shifted_grids(Xen,Xk,Dip)
    !
    call msg('s','loading info from '//trim(grid_paths_vector(iq))//' ')
    !
-   io_err(iq)=io_DB1_selective_scan('all',trim(grid_paths_vector(iq)),E_shift,Xk_shift)
+   io_err(iq)=io_DB1_selective_scan('all',trim(grid_paths_vector(iq)),E_shift(iq),Xk_shift)
    call set_parms(grid='shifted',command=' define')
    !
+   E_shift(iq)%E=E_shift(iq)%E-Xen%E_Fermi
+   !
    if (io_err(iq)/=0) call error(' Shifted grid not found in folder "'//trim(grid_paths_vector(iq))//'" ')
    !
    if (nkibz_save/=nkibz_shifted) &
@@ -161,20 +167,20 @@ subroutine DIPOLE_shifted_grids(Xen,Xk,Dip)
        !
        wf_ng_eff = min(wf_ng_save,wf_ng_shifted)
        !
-       call degeneration_finder(Xen%E(Dip%ib(1):Dip%ib_lim(1),ik,i_sp_pol),Dip%ib_lim(1)-Dip%ib(1)+1,&
-       &                           first_el,n_of_el,n_deg_grp,0.0001/HA2EV,Include_single_values=.true.)
+       call degeneration_finder(Dip%ib_lim(1)-Dip%ib(1)+1,first_el,n_of_el,n_deg_grp,&
+       &    Er=Xen%E(Dip%ib(1):Dip%ib_lim(1),ik,i_sp_pol),deg_accuracy=Dip%Energy_threshold,Include_single_values=.true.)
        !
        ! Notice that here I sum over the shifted subspace, assuming that it has equal or less degeneracies than the non shifted subspace
        !
-       DIP_tmp=cZERO
-       DIP_rotate=cZERO
+       DIP_S=cZERO
+       DIP_O=cZERO
        !
        do i_grp=1,n_deg_grp
-       do iv_sh=first_el(i_grp)+Dip%ib(1)-1,first_el(i_grp)+n_of_el(i_grp)-1+Dip%ib(1)-1
+       do in_d=first_el(i_grp)+Dip%ib(1)-1,first_el(i_grp)+n_of_el(i_grp)-1+Dip%ib(1)-1
          !
-         if(.not.PAR_IND_VAL_BANDS_DIP%element_1D(iv_sh)) cycle
+         if(.not.PAR_IND_VAL_BANDS_DIP%element_1D(in_d)) cycle
          !
-         iv_wf_sh=WF_shifted%index(iv_sh,ik,i_sp_pol)
+         iv_wf_sh=WF_shifted%index(in_d,ik,i_sp_pol)
          !
          if (iv_wf_sh==0) call error(" Error in parallel wf_shifted distribution (Dipole iv)")
          !
@@ -186,25 +192,25 @@ subroutine DIPOLE_shifted_grids(Xen,Xk,Dip)
            !
            if (iv_wf==0) call error(" Error in parallel wf distribution (Dipole iv)")
            !
-           ! DIP_rotate = < v,k+q | v,k >
+           ! DIP_O = < n,k+q | n,k > between connected bands only
            !
            do i_spinor=1,n_spinor
              !
              rho = Vstar_dot_V_gpu(wf_ng_eff,DEV_VAR(WF_shifted%c)(1:wf_ng_eff,i_spinor,iv_wf_sh), &
 &                                  DEV_VAR(WF%c)(1:wf_ng_eff,i_spinor,iv_wf))
              !
-             if(abs(rho)>1.E-3) DIP_rotate(iv_sh,iv)=DIP_rotate(iv_sh,iv)+conjg(rho)
+             if(abs(rho)>1.E-3) DIP_O(in_d,iv)=DIP_O(in_d,iv)+conjg(rho)
              !
            enddo
            !
          enddo
          !
-         iv=iv_sh
+         iv=in_d
          !
          ! loop over conduction
          !
-         ic_min=max(iv_sh,Dip%ib_lim(2))
-         if (l_X_terminator) ic_min=iv_sh
+         ic_min=max(in_d,Dip%ib_lim(2))
+         if (l_X_terminator) ic_min=in_d
          !
          do ic=ic_min,Dip%ib(2)
            !
@@ -212,14 +218,6 @@ subroutine DIPOLE_shifted_grids(Xen,Xk,Dip)
            !
            rho=cZERO
            !
-           Ev_m_Ec_KS   =Xen%E (iv,ik,i_sp_pol)-Xen%E (ic,ik,i_sp_pol)
-           if (allocated(Xen%Eo)) Ev_m_Ec_KS   =Xen%Eo(iv,ik,i_sp_pol)-Xen%Eo(ic,ik,i_sp_pol)
-           !
-           if (abs(Ev_m_Ec_KS)<=Dip%Energy_treshold .or. iv==ic) then
-             call live_timing(steps=1)
-             cycle
-           endif
-           !
            ic_wf=WF%index(ic,ik,i_sp_pol)
            !
            if (ic_wf==0) call error(" Error in parallel wf distribution (Dipole ic)")
@@ -233,12 +231,12 @@ subroutine DIPOLE_shifted_grids(Xen,Xk,Dip)
              !
            enddo
            !
-           DIP_tmp(ic,iv_sh)=rho/q0_shift_norm
+           DIP_S(ic,in_d)=rho
            !
            call live_timing(steps=1)
            !
          enddo  ! ic
-       enddo    ! iv_sh in i_grp
+       enddo    ! in_d in i_grp
        enddo    ! i_grp
        !
        do i_grp=1,n_deg_grp
@@ -249,30 +247,60 @@ subroutine DIPOLE_shifted_grids(Xen,Xk,Dip)
            !
            ! DEBUG <
            !rho=cZERO
-           !do iv_sh=first_el(i_grp)+Dip%ib(1)-1,first_el(i_grp)+n_of_el(i_grp)-1+Dip%ib(1)-1
-           !  rho=rho+abs(DIP_rotate(iv_sh,iv))**2
+           !do in_d=first_el(i_grp)+Dip%ib(1)-1,first_el(i_grp)+n_of_el(i_grp)-1+Dip%ib(1)-1
+           !  rho=rho+abs(DIP_O(in_d,iv))**2
            !enddo
-           !write(*,*) i_sp_pol,ik,iq,iv,rho
+           !ic=(first_el(i_grp)+n_of_el(i_grp)-1+Dip%ib(1))-(first_el(i_grp)+Dip%ib(1))
+           !if (abs(ic)<=0) write(*,*) i_sp_pol,ik,iq,iv,ic+1,abs(rho)
+           !if (abs(ic)> 0) write(*,*) i_sp_pol,ik,iq,iv,ic+1,abs(rho),"  OK   "
            ! DEBUG >
            !
-           !======
-           ! <iR>
-           !======
-           do iv_sh=first_el(i_grp)+Dip%ib(1)-1,first_el(i_grp)+n_of_el(i_grp)-1+Dip%ib(1)-1
-             DIP_iR(ir,:,iv,ik_mem,i_sp_pol)=DIP_iR(ir,:,iv,ik_mem,i_sp_pol)+DIP_tmp(:,iv_sh)*DIP_rotate(iv_sh,iv)
-           enddo
-           !
-           !=====
-           ! <v>
-           !=====
-           if(.not.Dip%force_v_g_space) then
-             do ic=ic_min,Dip%ib(2)
-               Ev_m_Ec_KS   =Xen%E (iv,ik,i_sp_pol)-Xen%E (ic,ik,i_sp_pol)
-               if (allocated(Xen%Eo)) Ev_m_Ec_KS   =Xen%Eo(iv,ik,i_sp_pol)-Xen%Eo(ic,ik,i_sp_pol)
-               if (abs(Ev_m_Ec_KS)<=Dip%Energy_treshold .or. iv==ic) cycle
+           do ic=ic_min,Dip%ib(2)
+             !
+             !======
+             ! <iR>
+             !======
+             Ev_m_Ec_KS   =Xen%E (iv,ik,i_sp_pol)-Xen%E (ic,ik,i_sp_pol)
+             if (allocated(Xen%Eo)) Ev_m_Ec_KS   =Xen%Eo(iv,ik,i_sp_pol)-Xen%Eo(ic,ik,i_sp_pol)
+             !
+             if (abs(Ev_m_Ec_KS)>Dip%Energy_threshold) then
+               do in_d=first_el(i_grp)+Dip%ib(1)-1,first_el(i_grp)+n_of_el(i_grp)-1+Dip%ib(1)-1
+                 DIP_iR(ir,ic,iv,ik_mem,i_sp_pol)=DIP_iR(ir,ic,iv,ik_mem,i_sp_pol)+DIP_S(ic,in_d)*DIP_O(in_d,iv)/q0_shift_norm
+               enddo
+             else
+               ! This part needs to be fixed
+               do in_d=first_el(i_grp)+Dip%ib(1)-1,first_el(i_grp)+n_of_el(i_grp)-1+Dip%ib(1)-1
+                 DIP_iR(ir,ic,iv,ik_mem,i_sp_pol)=cZERO
+                 !DIP_D(1)=DIP_S(ic,in_d)-DIP_O(ic,in_d)
+                 !DIP_D(2)=DIP_S(iv,in_d)-DIP_O(iv,in_d)
+                 !DIP_iR(ir,ic,iv,ik_mem,i_sp_pol)=DIP_iR(ir,ic,iv,ik_mem,i_sp_pol) &
+                 !&      +(DIP_D(1)*DIP_O(in_d,iv)-conjg(DIP_O(in_d,ic)*DIP_D(2)))/2._SP/q0_shift_norm
+	               !! This is the equation according to the notes.
+		             !! However it looks like here DIP_O = O^\dag
+		             !! In case the above equation is the correct one
+                 !!& +(DIP_D(1)*conjg(DIP_O(iv,in_d))-DIP_O(ic,in_d)*conjg(DIP_D(2)))/2._SP/q0_shift_norm
+               enddo
+             endif
+             !
+             !=====
+             ! <v>
+             !=====
+             if(.not.Dip%force_v_g_space) then
                DIP_v(ir,ic,iv,ik_mem,i_sp_pol)=DIP_iR(ir,ic,iv,ik_mem,i_sp_pol)*Ev_m_Ec_KS
-             enddo
-           endif
+               if (iv==ic) then
+                 DIP_v(ir,ic,iv,ik_mem,i_sp_pol)=DIP_v(ir,ic,iv,ik_mem,i_sp_pol)+&
+                 &    (E_shift(iq)%E(ic,ik,i_sp_pol)-Xen%E(iv,ik,i_sp_pol))/q0_shift_norm
+               endif
+               ! All the diagonal <v> dipoles are identical to the case where the evaluation is directly done in G-space
+               ! The only exception are the degenerate subspaces which in G-space have an extra contribution
+               ! To check if something on the line below here can give that
+               !if (abs(Ev_m_Ec_KS)<=Dip%Energy_threshold) then
+               !  DIP_v(ir,ic,iv,ik_mem,i_sp_pol)=DIP_v(ir,ic,iv,ik_mem,i_sp_pol)+&
+               !  &    (E_shift(iq)%E(ic,ik,i_sp_pol)-Xen%E(iv,ik,i_sp_pol))/q0_shift_norm*abs(DIP_O(ic,iv))**2
+               !endif
+             endif
+             !
+           enddo
            !
          enddo
        enddo
@@ -308,7 +336,7 @@ subroutine DB1_reset()
      ! not rewritten.
      !
      core_io_path = core_io_path_save
-     io_err(1)=io_DB1_selective_scan('all',trim(core_io_path),E_shift,Xk_shift)
+     io_err(1)=io_DB1_selective_scan('all',trim(core_io_path),E_shift(1),Xk_shift)
      !
      call set_parms(grid='  local', command='  reset')
      !
@@ -347,3 +375,353 @@ subroutine set_parms(grid,command)
    end subroutine set_parms
    !
 end subroutine DIPOLE_shifted_grids
+!
+!
+subroutine DIPOLE_shifted_grids_as_derk(Xen,Xk,Dip)
+  !
+  ! Using the wfc at the k,k-q points to construct <r>
+  !
+  !  i q.<nk|r|mk> =  \sum_i=1,3 q(i) <u_n(k+q0(i))|u_mk>/q0(i)
+  !
+  use units,               ONLY:HA2EV
+  use pars,                ONLY:SP,cZERO,cONE,cI,lchlen,schlen
+  use drivers,             ONLY:l_sc_run
+  use com,                 ONLY:msg,core_io_path,grid_paths
+  use stderr,              ONLY:intc,STRING_split
+  use LIVE_t,              ONLY:live_timing
+  use electrons,           ONLY:levels,n_spinor,n_sp_pol
+  use parallel_m,          ONLY:PAR_IND_DIPk_ibz,PAR_DIPk_ibz_index,PAR_IND_DIPk_ibz_ID,&
+ &                              PAR_IND_VAL_BANDS_DIP,PAR_IND_VAL_BANDS_DIP_ID,&
+ &                              PAR_IND_CON_BANDS_DIP,PAR_IND_CON_BANDS_DIP_ID
+  use interfaces,          ONLY:WF_load,WF_free,io_DB1_selective_scan
+  use R_lattice,           ONLY:bz_samp,nkibz,q0_shift_norm
+  use X_m,                 ONLY:l_X_terminator
+  use DIPOLES,             ONLY:DIPOLE_t,DIP_iR,DIP_v,num_shifted_grids,shifted_grids_obs
+  use vec_operate,         ONLY:degeneration_finder
+  use matrix_operate,      ONLY:hermitian
+  use timing_m,            ONLY:timing
+  use wrapper,             ONLY:Vstar_dot_V_gpu
+  use wave_func,           ONLY:wf_ng,wf_igk,WF,wf_ncx,WAVEs
+#if defined _SC           
+  use SC,                  ONLY:SC_bands
+#endif
+#if defined _RT
+  use real_time,           ONLY:RT_bands
+#endif
+  !
+  implicit none
+  !
+  type(bz_samp), intent(in)    :: Xk
+  type(levels),  intent(in)    :: Xen
+  type(DIPOLE_t),intent(inout) :: Dip
+  !
+  ! Saves
+  !
+  integer                      :: wf_ng_save
+  character(lchlen)            :: core_io_path_save
+  integer                      :: nkibz_save, wf_ncx_save, wf_ncx_shifted, nkibz_shifted
+  integer                      :: wf_ng_eff, wf_ng_shifted
+#if defined _SC
+  integer                      :: SC_bands_save(2)
+#endif
+#if defined _RT
+  integer                      :: RT_bands_save(2)
+#endif
+  !
+  ! Work Space
+  !
+  character(schlen)            :: grid_paths_vector(3)
+  logical                      :: idir_not_done(3)
+  integer                      :: ik,ik_mem,ic_wf,iv_wf,iv_wf_sh,i_grp,ic,iv,in_d,in_dp,iq,n_LT_steps,&
+  &                               i_spinor,i_sp_pol,ir,grid_order(3),ic_min
+  integer                      :: first_el(Dip%ib_lim(1)-Dip%ib(1)+1),n_of_el(Dip%ib_lim(1)-Dip%ib(1)+1),n_deg_grp
+  complex(SP)                  :: rho,&
+  &                               Identity(Dip%ib(1):Dip%ib_lim(1),Dip%ib(1):Dip%ib_lim(1)),&
+  &                               DIP_S(Dip%ib(1):Dip%ib_lim(1),Dip%ib(1):Dip%ib_lim(1)),&
+  &                               DIP_O(Dip%ib_lim(2):Dip%ib(2),Dip%ib(1):Dip%ib_lim(1)),&
+  &                               DIP_Q(Dip%ib_lim(2):Dip%ib(2),Dip%ib(1):Dip%ib_lim(1)),&
+  &                               Der_k(Dip%ib_lim(2):Dip%ib(2),Dip%ib(1):Dip%ib_lim(1))
+  real(SP)                     :: Ev_m_Ec_KS,q_mod(3)
+  logical                      :: deg_table_k(Dip%ib(1):Dip%ib_lim(1),Dip%ib(1):Dip%ib_lim(1))
+  !
+  !ioDB1 shifted grids
+  !
+  type(levels)                 :: E_shift(3)
+  type(bz_samp)                :: Xk_shift
+  type(WAVEs)                  :: WF_shifted
+  !
+  !I/O and external functions
+  !
+  integer                      :: io_err(3)
+  !
+  call timing("DIPOLE_shifted_grids",OPR="start")
+  !
+  call set_parms(grid='  local', command=' define')
+  !
+  if (wf_ng.lt.0.8_SP*maxval(wf_igk)) &
+ &   call warning('All/most WF G-vectors should be used in the case of shifted grids')
+  !
+  grid_paths_vector=' '
+  call STRING_split(grid_paths,grid_paths_vector)
+  !
+  num_shifted_grids=0
+  do iq=1,3
+    if(trim(grid_paths_vector(iq))/=' ') num_shifted_grids=num_shifted_grids+1
+  enddo
+  !
+  if(num_shifted_grids==0) call error(' No shifted grid specified in input')
+  !
+  ! Check grids and determine grid displacement
+  !
+  grid_order=0
+  !
+  if(.not.Dip%force_v_g_space) Dip%Vnl_included=.true.
+  ! if (STRING_match(shifted_grids_obs,"V")) Dip%Vnl_included=.true.
+  !
+  if ( .not.all(PAR_IND_VAL_BANDS_DIP%element_1D(Dip%ib(1):Dip%ib_lim(1))) ) &
+ &  call error(' DIPOLES Parallelization over v bands not accepted')
+  !
+  idir_not_done=.true.
+  !
+  do iq =1,num_shifted_grids
+    !
+    call msg('s','loading info from '//trim(grid_paths_vector(iq))//' ')
+    !
+    io_err(iq)=io_DB1_selective_scan('all',trim(grid_paths_vector(iq)),E_shift(iq),Xk_shift)
+    call set_parms(grid='shifted',command=' define')
+    !
+    E_shift(iq)%E=E_shift(iq)%E-Xen%E_Fermi
+    !
+    if (io_err(iq)/=0) call error(' Shifted grid not found in folder "'//trim(grid_paths_vector(iq))//'" ')
+    !
+    if (nkibz_save/=nkibz_shifted) &
+ &    call error(' Shifted grid in folder "'//trim(grid_paths_vector(iq))//'": k-points not consistent')
+    !
+    if (wf_ncx_save/=wf_ncx_shifted) &
+ &    call warning(' Shifted grid in folder "'//trim(grid_paths_vector(iq))//'": change in number of WFs components')
+    !
+    call DIPOLE_check_shifted_grids( Xk_shift, Xk, iq, num_shifted_grids, q_mod, grid_order)
+    !
+    idir_not_done(grid_order(iq))=.false.
+    !
+  enddo
+  !
+  call msg('r','Shifted grids found '//trim(intc(num_shifted_grids)))
+  !
+  if (num_shifted_grids<3) call warning(' Shifted grids are '//trim(intc(num_shifted_grids))//'. Assuming non periodic system.')
+  !
+  n_LT_steps=n_sp_pol*PAR_IND_DIPk_ibz%n_of_elements(PAR_IND_DIPk_ibz_ID+1)*&
+ &                    PAR_IND_VAL_BANDS_DIP%n_of_elements(PAR_IND_VAL_BANDS_DIP_ID+1)*&
+ &                    PAR_IND_CON_BANDS_DIP%n_of_elements(PAR_IND_CON_BANDS_DIP_ID+1)*num_shifted_grids
+  !
+  call live_timing(trim(shifted_grids_obs)//" [shifted grids]",n_LT_steps)
+  !
+  Identity=cZERO
+  do in_d=Dip%ib(1),Dip%ib_lim(1)
+    Identity(in_d,in_d)=cONE
+  enddo
+  !
+  do i_sp_pol = 1,n_sp_pol
+    !
+    do ik = 1, Xk%nibz
+      !
+      if (.not.PAR_IND_DIPk_ibz%element_1D(ik)) cycle
+      !
+      ik_mem=PAR_DIPk_ibz_index(ik)
+      !
+      ! Load WFs first at k and then at k+q_iq
+      ! 
+      call WF_load(WF,0,1,Dip%ib,(/ik,ik/),sp_pol_to_load=(/i_sp_pol,i_sp_pol/),space='G',&
+ &           title='-Oscillators/G space/Shifted',force_WFo=l_sc_run,keep_states_to_load=.TRUE.)
+      !
+      call degeneration_finder(Dip%ib_lim(1)-Dip%ib(1)+1,first_el,n_of_el,n_deg_grp,&
+      &    Er=Xen%E(Dip%ib(1):Dip%ib_lim(1),ik,i_sp_pol),deg_accuracy=Dip%Energy_threshold,Include_single_values=.true.)
+      !
+      deg_table_k=.false.
+      do i_grp=1,n_deg_grp
+        do in_d=first_el(i_grp)+Dip%ib(1)-1,first_el(i_grp)+Dip%ib(1)-1+n_of_el(i_grp)-1
+          do in_dp=first_el(i_grp)+Dip%ib(1)-1,first_el(i_grp)+Dip%ib(1)-1+n_of_el(i_grp)-1
+            deg_table_k(in_d,in_dp)=.true.
+            deg_table_k(in_dp,in_d)=.true.
+          enddo
+        enddo
+      enddo
+       !
+      do iq = 1, num_shifted_grids
+        !
+        ir = grid_order(iq)
+        !
+        core_io_path = trim(grid_paths_vector(iq))
+        !
+        call WF_load(WF_shifted,0,1,(/Dip%ib(1),Dip%ib_lim(1)/),(/ik,ik/),space='G',&
+ &      title='-Oscillators/G space (shifted grid)',keep_states_to_load=.TRUE.)
+        !
+        wf_ng_eff = min(wf_ng_save,wf_ng_shifted)
+        !
+        DIP_S=cZERO
+        !
+        ! Notice that here I sum over the shifted subspace, assuming that it has equal or less degeneracies than the non shifted subspace
+        !
+        do i_grp=1,n_deg_grp
+          do in_d=first_el(i_grp)+Dip%ib(1)-1,first_el(i_grp)+n_of_el(i_grp)-1+Dip%ib(1)-1
+            !
+            if(.not.PAR_IND_VAL_BANDS_DIP%element_1D(in_d)) cycle
+            !
+            iv_wf_sh=WF_shifted%index(in_d,ik,i_sp_pol)
+            !
+            if (iv_wf_sh==0) call error(" Error in parallel wf_shifted distribution (Dipole iv)")
+            !
+            iv=in_d
+            !
+            ! loop over conduction
+            !
+            ic_min=max(in_d,Dip%ib_lim(2))
+            if (l_X_terminator) ic_min=in_d
+            !
+            do ic=ic_min,Dip%ib(2)
+              !
+              if(.not.PAR_IND_CON_BANDS_DIP%element_1D(ic)) cycle
+              !
+              rho=cZERO
+              !
+              ic_wf=WF%index(ic,ik,i_sp_pol)
+              !
+              if (ic_wf==0) call error(" Error in parallel wf distribution (Dipole ic)")
+              !
+              ! S_{cv}(k,k+q) = < c k | v k+q >
+              !
+              do i_spinor=1,n_spinor
+                !
+                rho = rho + Vstar_dot_V_gpu(wf_ng_eff,DEV_VAR(WF%c)(1:wf_ng_eff,i_spinor,ic_wf), &
+                &                            DEV_VAR(WF_shifted%c)(1:wf_ng_eff,i_spinor,iv_wf_sh))
+                !
+              enddo
+              !
+              DIP_S(ic,in_d)=rho
+              !
+              call live_timing(steps=1)
+              !
+            enddo  ! ic
+          enddo    ! in_d in i_grp
+        enddo    ! i_grp
+        !
+        DIP_O=DIP_S
+        do in_d=Dip%ib(1),Dip%ib_lim(1)
+          do in_dp=Dip%ib(1),Dip%ib_lim(1)
+            if(deg_table_k(in_d,in_dp)) cycle
+            DIP_O(in_d,in_dp)=cZERO
+          enddo
+        enddo
+        DIP_Q=DIP_O
+        call SERIAL_SVD(Dip%ib(2)-Dip%ib_lim(2),DIP_Q,'uni',0)
+        !
+        ! Yambo version
+        Der_k=-cI*(matmul(DIP_S,hermitian(DIP_Q) )-Identity)/q0_shift_norm
+        ! Virk-Sipe version
+        !Der_k=-cI*(matmul((DIP_S-DIP_O),hermitian(DIP_Q) )-Identity)/q0_shift_norm
+        !
+        do i_grp=1,n_deg_grp
+          do iv=first_el(i_grp)+Dip%ib(1)-1,first_el(i_grp)+n_of_el(i_grp)-1+Dip%ib(1)-1
+            !
+            ic_min=max(iv,Dip%ib_lim(2))
+            if (l_X_terminator) ic_min=iv
+            !
+            do ic=ic_min,Dip%ib(2)
+              !
+              ! In yambo there is a band inversion in the definition of the dipoels
+              ! which results in a conjg factor
+              DIP_iR(ir,ic,iv,ik_mem,i_sp_pol)=conjg(cI*Der_k(ic,iv))
+              !
+              !=====
+              ! <v>
+              !=====
+              if(.not.Dip%force_v_g_space) then
+                DIP_v(ir,ic,iv,ik_mem,i_sp_pol)=DIP_iR(ir,ic,iv,ik_mem,i_sp_pol)*Ev_m_Ec_KS
+                if (iv==ic) then
+                  DIP_v(ir,ic,iv,ik_mem,i_sp_pol)=DIP_v(ir,ic,iv,ik_mem,i_sp_pol)+&
+                  &    (E_shift(iq)%E(ic,ik,i_sp_pol)-Xen%E(iv,ik,i_sp_pol))/q0_shift_norm
+                endif
+                ! All the diagonal <v> dipoles are identical to the case where the evaluation is directly done in G-space
+                ! The only exception are the degenerate subspaces which in G-space have an extra contribution
+                ! To check if something on the line below here can give that
+                !if (abs(Ev_m_Ec_KS)<=Dip%Energy_threshold) then
+                !  DIP_v(ir,ic,iv,ik_mem,i_sp_pol)=DIP_v(ir,ic,iv,ik_mem,i_sp_pol)+&
+                !  &    (E_shift(iq)%E(ic,ik,i_sp_pol)-Xen%E(iv,ik,i_sp_pol))/q0_shift_norm*abs(DIP_O(ic,iv))**2
+                !endif
+              endif
+              !
+            enddo
+            !
+          enddo
+        enddo
+        !
+        call WF_free(WF_shifted)
+        !
+      enddo      ! iq / ir
+      !
+      core_io_path = trim(core_io_path_save)
+      !
+      call WF_free(WF)
+      !
+    enddo        ! ik
+    !
+  enddo          ! i_sp_pol
+  !
+  call live_timing()
+  !
+  ! CLEAN
+  !
+  call DB1_reset()
+  !
+  if (any(idir_not_done)) call DIPOLE_x_real_space(Xen,Xk,Dip,idir_not_done)
+  !
+  call timing("DIPOLE_shifted_grids",OPR="stop")
+  !
+  contains
+    !
+    subroutine DB1_reset()
+      !
+      ! Reload g_vec, dl_sop ... from db1 using _shift types
+      ! so that all type components (like occupations) are 
+      ! not rewritten.
+      !
+      core_io_path = core_io_path_save
+      io_err(1)=io_DB1_selective_scan('all',trim(core_io_path),E_shift(1),Xk_shift)
+      !
+      call set_parms(grid='  local', command='  reset')
+      !
+    end subroutine
+    !
+    subroutine set_parms(grid,command)
+      implicit none
+      character(len=7), intent(in), optional :: grid, command
+      !
+      if(      grid.eq.'  local'.and. command.eq.' define') then
+        nkibz_save   = nkibz
+        wf_ncx_save  = wf_ncx
+        wf_ng_save   = wf_ng
+        core_io_path_save = core_io_path
+#if defined _SC       
+        SC_bands_save     = SC_bands
+#endif 
+#if defined _RT
+        RT_bands_save     = RT_bands
+#endif 
+      else if( grid.eq.'shifted'.and. command.eq.' define') then
+        nkibz_shifted  = nkibz
+        wf_ncx_shifted = wf_ncx
+        wf_ng_shifted  = wf_ng
+      else if( grid.eq.'  local'.and. command.eq.'  reset') then
+        core_io_path = core_io_path_save
+        wf_ng        = wf_ng_save
+#if defined _SC       
+        SC_bands = SC_bands_save
+#endif
+#if defined _RT
+        RT_bands = RT_bands_save
+#endif
+      endif
+      return
+    end subroutine set_parms
+    !
+ end subroutine DIPOLE_shifted_grids_as_derk
diff --git a/src/dipoles/DIPOLE_spin_magnetization.F b/src/dipoles/DIPOLE_spin_magnetization.F
index 65d6824f20..eb951c14ea 100644
--- a/src/dipoles/DIPOLE_spin_magnetization.F
+++ b/src/dipoles/DIPOLE_spin_magnetization.F
@@ -86,35 +86,35 @@ subroutine Dipole_spin_magnetization(Xen,Xk,dip)
          !
          ! mx
          !
-         DIP_spin(1,ic,iv,ik_mem,2,1)=    Vstar_dot_V(fft_size,WF%c(:,1,iv_wf(1)),WF%c(:,1,ic_wf(2)))
-         DIP_spin(1,ic,iv,ik_mem,1,1)=    Vstar_dot_V(fft_size,WF%c(:,1,iv_wf(2)),WF%c(:,1,ic_wf(1)))
+         DIP_spin(1,ic,iv,ik_mem,2,1)=    Vstar_dot_V(fft_size,WF%r(:,1,iv_wf(1)),WF%r(:,1,ic_wf(2)))
+         DIP_spin(1,ic,iv,ik_mem,1,1)=    Vstar_dot_V(fft_size,WF%r(:,1,iv_wf(2)),WF%r(:,1,ic_wf(1)))
          !
          ! my
          !
-         DIP_spin(2,ic,iv,ik_mem,2,1)=-cI*Vstar_dot_V(fft_size,WF%c(:,1,iv_wf(1)),WF%c(:,1,ic_wf(2)))
-         DIP_spin(2,ic,iv,ik_mem,1,1)=+cI*Vstar_dot_V(fft_size,WF%c(:,1,iv_wf(2)),WF%c(:,1,ic_wf(1)))
+         DIP_spin(2,ic,iv,ik_mem,2,1)=-cI*Vstar_dot_V(fft_size,WF%r(:,1,iv_wf(1)),WF%r(:,1,ic_wf(2)))
+         DIP_spin(2,ic,iv,ik_mem,1,1)=+cI*Vstar_dot_V(fft_size,WF%r(:,1,iv_wf(2)),WF%r(:,1,ic_wf(1)))
          !
          ! mz
          !
-         DIP_spin(3,ic,iv,ik_mem,1,1)=    Vstar_dot_V(fft_size,WF%c(:,1,iv_wf(1)),WF%c(:,1,ic_wf(1)))
-         DIP_spin(3,ic,iv,ik_mem,2,1)=   -Vstar_dot_V(fft_size,WF%c(:,1,iv_wf(2)),WF%c(:,1,ic_wf(2)))
+         DIP_spin(3,ic,iv,ik_mem,1,1)=    Vstar_dot_V(fft_size,WF%r(:,1,iv_wf(1)),WF%r(:,1,ic_wf(1)))
+         DIP_spin(3,ic,iv,ik_mem,2,1)=   -Vstar_dot_V(fft_size,WF%r(:,1,iv_wf(2)),WF%r(:,1,ic_wf(2)))
          !
        elseif (n_spinor==2) then
          !
          ! mx
          !
-         DIP_spin(1,ic,iv,ik_mem,1,1)=    Vstar_dot_V(fft_size,WF%c(:,1,iv_wf(1)),WF%c(:,2,ic_wf(1))) &
-         &                             +Vstar_dot_V(fft_size,WF%c(:,2,iv_wf(1)),WF%c(:,1,ic_wf(1)))
+         DIP_spin(1,ic,iv,ik_mem,1,1)=    Vstar_dot_V(fft_size,WF%r(:,1,iv_wf(1)),WF%r(:,2,ic_wf(1))) &
+         &                               +Vstar_dot_V(fft_size,WF%r(:,2,iv_wf(1)),WF%r(:,1,ic_wf(1)))
          !
          ! my
          !
-         DIP_spin(2,ic,iv,ik_mem,1,1)=-cI*Vstar_dot_V(fft_size,WF%c(:,1,iv_wf(1)),WF%c(:,2,ic_wf(1))) &
-         &                          +cI*Vstar_dot_V(fft_size,WF%c(:,2,iv_wf(1)),WF%c(:,1,ic_wf(1)))
+         DIP_spin(2,ic,iv,ik_mem,1,1)=-cI*Vstar_dot_V(fft_size,WF%r(:,1,iv_wf(1)),WF%r(:,2,ic_wf(1))) &
+         &                            +cI*Vstar_dot_V(fft_size,WF%r(:,2,iv_wf(1)),WF%r(:,1,ic_wf(1)))
          !
          ! mz
          !
-         DIP_spin(3,ic,iv,ik_mem,1,1)=    Vstar_dot_V(fft_size,WF%c(:,1,iv_wf(1)),WF%c(:,1,ic_wf(1))) &
-         &                             -Vstar_dot_V(fft_size,WF%c(:,2,iv_wf(1)),WF%c(:,2,ic_wf(1)))
+         DIP_spin(3,ic,iv,ik_mem,1,1)=    Vstar_dot_V(fft_size,WF%r(:,1,iv_wf(1)),WF%r(:,1,ic_wf(1))) &
+         &                               -Vstar_dot_V(fft_size,WF%r(:,2,iv_wf(1)),WF%r(:,2,ic_wf(1)))
          !
        endif
        !
diff --git a/src/dipoles/DIPOLE_x_real_space.F b/src/dipoles/DIPOLE_x_real_space.F
index b1f44c5b1f..63efe33de4 100644
--- a/src/dipoles/DIPOLE_x_real_space.F
+++ b/src/dipoles/DIPOLE_x_real_space.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine DIPOLE_x_real_space(Xen,Xk,Dip,idir_todo)
  !
  use drivers,             ONLY:l_sc_run
@@ -23,8 +27,9 @@ subroutine DIPOLE_x_real_space(Xen,Xk,Dip,idir_todo)
  use parallel_m,          ONLY:PAR_IND_DIPk_ibz,PAR_DIPk_ibz_index,PAR_IND_DIPk_ibz_ID,&
 &                              PAR_IND_VAL_BANDS_DIP,PAR_IND_VAL_BANDS_DIP_ID,&
 &                              PAR_IND_CON_BANDS_DIP,PAR_IND_CON_BANDS_DIP_ID
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp), intent(in)    :: Xk
  type(levels),  intent(in)    :: Xen
@@ -94,8 +99,8 @@ subroutine DIPOLE_x_real_space(Xen,Xk,Dip,idir_todo)
              if(.not.idir_todo(idir)) cycle
              !
              DIP_tmp(idir,ic,iv) = DIP_tmp(idir,ic,iv) + &
-&                       Vstar_dot_V_omp(fft_size,WF%c(1:fft_size,i_spinor,iv_wf), &
-&                             x_cc(1:fft_size,idir,1)*WF%c(1:fft_size,i_spinor,ic_wf) )
+&                       Vstar_dot_V_omp(fft_size,WF%r(1:fft_size,i_spinor,iv_wf), &
+&                             x_cc(1:fft_size,idir,1)*WF%r(1:fft_size,i_spinor,ic_wf) )
              !
            enddo
          enddo
diff --git a/src/dipoles/DOUBLE_project.dep b/src/dipoles/DOUBLE_project.dep
index 2859c70e1f..77ec21695c 100644
--- a/src/dipoles/DOUBLE_project.dep
+++ b/src/dipoles/DOUBLE_project.dep
@@ -2,6 +2,7 @@
  Build_Overlaps_Det_EQ.o
  DIPOLE_IO.o
  DIPOLE_SC_rotate.o
+ DIPOLE_build_der_k.o
  DIPOLE_check_shifted_grids.o
  DIPOLE_covariant.o
  DIPOLE_dimensions.o
@@ -14,8 +15,8 @@
  DIPOLE_kb_pwscf_comp.o
  DIPOLE_kb_pwscf_def_dim.o
  DIPOLE_kb_sum.o
- DIPOLE_orb_magn_forCD.o
- DIPOLE_orbital_magnetization.o
+ DIPOLE_orbital_magnetization_cd.o
+ DIPOLE_orbital_magnetization_rt.o
  DIPOLE_overlaps.o
  DIPOLE_p_matrix_elements.o
  DIPOLE_rotate.o
@@ -24,5 +25,6 @@
  DIPOLE_spin_magnetization.o
  DIPOLE_x_real_space.o
  Ionic_polarization.o
+ OVERLAPS_IO.o
  POLARIZATION_output.o
 
diff --git a/src/dipoles/Ionic_polarization.F b/src/dipoles/Ionic_polarization.F
index 9aad29bfb0..1821799077 100644
--- a/src/dipoles/Ionic_polarization.F
+++ b/src/dipoles/Ionic_polarization.F
@@ -5,6 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): MG CA
 !                  
+!> @brief Calculate ionic contribution to the polariation
+!
+!! @param[it]       en                  KS energies
+!! @param[out]      I_Berry_RED         Ionic polarization in reduced coord.
+!
 subroutine Ionic_polarization(I_Berry_RED,en)
  !
  ! Calculate polarization due to ions
diff --git a/src/dipoles/OVERLAPS_IO.F b/src/dipoles/OVERLAPS_IO.F
new file mode 100644
index 0000000000..f4c0933fff
--- /dev/null
+++ b/src/dipoles/OVERLAPS_IO.F
@@ -0,0 +1,153 @@
+!
+! License-Identifier: GPL
+!
+! Copyright (C) 2016 The Yambo Team
+!
+! Authors (see AUTHORS file for details): DS AM
+!
+subroutine OVERLAPS_IO(Xk,Xen,Dip,check_read_or_write,io_err,PAR_K_scheme_kind)
+ !
+ use pars,        ONLY:SP,cI,cZERO
+ use parser_m,    ONLY:parser
+ use stderr,      ONLY:intc
+ use com,         ONLY:msg
+ use IO_m,        ONLY:io_DIP
+ use R_lattice,   ONLY:bz_samp
+ use electrons,   ONLY:levels,n_sp_pol
+ use DIPOLES,     ONLY:DIPOLE_t,covariant_obs,der_k_obs
+ use IO_int,      ONLY:io_control
+ use IO_m,        ONLY:manage_action,VERIFY,DUMP,REP,OP_WR_CL,OP_WR,OP_RD_CL,&
+&                      OP_IF_START_APP_CL_IF_END,OP_IF_START_RD_CL_IF_END
+ use parallel_int,ONLY:PP_wait
+ use parallel_m,  ONLY:master_cpu,PAR_K_scheme,PP_indexes,PP_indexes_reset
+ !
+ implicit none
+ !
+ type(bz_samp), intent(in)    :: Xk
+ type(levels),  intent(in)    :: Xen
+ type(DIPOLE_t),intent(inout) :: Dip
+ character(*),  intent(in)    :: check_read_or_write
+ character(*),  intent(in)    :: PAR_K_scheme_kind
+ integer,       intent(out)   :: io_err
+ !
+ ! Work space
+ !
+ logical           :: write_header,read_header,check,reading,writing,IO_do_it
+ integer           :: ID_S,IO_ACTION,IO_ACT_NOW,IO_MODE,ik_mem,ik,i_sp_pol,i_fragment,iv,ic,max_count,i_count
+ integer, external :: io_Overlaps
+#if !defined _PAR_IO
+ type(PP_indexes)  :: IO_par_index
+#endif
+ !
+ if (.not.IO_dip) return
+ !
+ check  =trim(check_read_or_write)=="check"
+ reading=trim(check_read_or_write)=="read"
+ writing=trim(check_read_or_write)=="write"
+ !
+ if(.not.any((/check,reading,writing/))) call error("Wrong call to DIPOLES_IO")
+ !
+ ID_S=0
+ io_err=0
+ !
+ write_header  =  (master_cpu.and.writing)
+ read_header   =  .not.write_header 
+ !
+ if(write_header) then
+   call io_control(ACTION=OP_WR_CL,COM=REP,SEC=(/1/),MODE=VERIFY,ID=ID_S)
+   io_err=io_Overlaps(Dip,ID_S)
+ endif
+ !
+ ! To fix problems with asynchronous I/O 
+ !
+ call PP_wait(COMM=PAR_K_scheme%comm_world)
+ !
+ IO_MODE=VERIFY
+ if(read_header.and.writing) IO_MODE=DUMP
+ !
+ if(read_header) then
+   !
+   io_err=-1
+   i_count=0
+   !
+   ! Wait until the ndb.dipoles file is available to all cores
+   ! In writing mode the loop is needed to fix for asynchronous I/O
+   !
+   if(writing)          max_count=120
+   if(reading.or.check) max_count=1
+   !
+   do while(io_err/=0)
+     call io_control(ACTION=OP_RD_CL,COM=REP,SEC=(/1/),MODE=IO_MODE,ID=ID_S)
+     io_err=io_Overlaps(Dip,ID_S)
+     if(io_err/=0) call sleep(1)
+     i_count=i_count+1
+     if((i_count==max_count).and.io_err/=0) then
+       if(writing) call error(" [OVLPs] Header not found while writing overlaps ")
+       if(reading) call error(" [OVLPs] Header not correct or missing while reading overlaps ")
+       if(  check) call msg("s","[OVLPs] Overlaps not correct or missing. To be computed")
+       return
+     endif
+   enddo
+ endif
+ !
+ if (check)   return
+ if (writing) call PP_wait(COMM=PAR_K_scheme%comm_world)
+ !
+ if(reading) IO_ACTION=OP_IF_START_RD_CL_IF_END
+ if(writing) IO_ACTION=OP_IF_START_APP_CL_IF_END
+ !
+ IO_do_it = reading .or. (writing.and.PAR_K_scheme%COM_ibz_A2A%CPU_id==0)
+ !
+ ! DIPOLES I/O
+ !==============
+ !
+#if !defined _PAR_IO
+ ! IO parallel indexes needed below by manage_action
+ !---------------------
+ call PP_indexes_reset(IO_par_index)
+ allocate(IO_par_index%element_1D(n_sp_pol*Xk%nibz))
+ IO_par_index%element_1D=.FALSE.
+ do i_sp_pol=1,n_sp_pol
+   do ik=1,Xk%nibz
+     i_fragment=ik+(i_sp_pol-1)*Xk%nibz
+     if ( .not.PAR_K_scheme%IND_ibz%element_1D(ik)) cycle
+     IO_par_index%element_1D(i_fragment)=.TRUE.
+   enddo
+ enddo
+#endif
+ !
+#if defined _PAR_IO
+ IO_ACT_NOW=manage_action(IO_ACTION,0,0,2)
+ call io_control(ACTION=IO_ACT_NOW,COM=REP,SEC=(/0/),ID=ID_S,COMM=PAR_K_scheme%COM_ibz_index,DO_IT=IO_do_it)
+ io_err=io_Overlaps(Dip,ID_S)
+ IO_ACT_NOW=manage_action(IO_ACTION,1,0,2)
+#endif
+ do i_sp_pol=1,n_sp_pol
+   do ik=1,Xk%nbz
+     i_fragment=ik+(i_sp_pol-1)*Xk%nbz
+     if (.not.PAR_K_scheme%IND_bz%element_1D(ik).and.writing) cycle
+#if !defined _PAR_IO
+     if (.not.PAR_K_scheme%COM_ibz_A2A%CPU_id==0 .and.writing) cycle
+     if (writing) IO_ACT_NOW=manage_action(IO_ACTION,i_fragment,1,Xk%nbz*n_sp_pol,IO_par_index)
+     if (reading) IO_ACT_NOW=manage_action(IO_ACTION,i_fragment,1,Xk%nbz*n_sp_pol)
+#endif
+     call io_control(ACTION=IO_ACT_NOW,COM=REP,SEC=(/1+i_fragment/),ID=ID_S)
+     io_err=io_Overlaps(Dip,ID_S)
+     if (io_err/=0.and.IO_do_it) then
+       call error(" OVERLAPS I/O error when (i_sp_pol,ik)= ("//trim(intc(i_sp_pol))//","//trim(intc(ik))//")")
+     endif
+   enddo
+ enddo
+#if defined _PAR_IO
+ IO_ACT_NOW=manage_action(IO_ACTION,2,0,2)
+ call io_control(ACTION=IO_ACT_NOW,COM=REP,SEC=(/2+Xk%nbz*n_sp_pol/),ID=ID_S)
+ io_err=io_Overlaps(Dip,ID_S)
+#endif
+ !
+ call PP_wait(COMM=PAR_K_scheme%comm_world)
+ !
+#if !defined _PAR_IO
+ call PP_indexes_reset(IO_par_index)
+#endif
+ !
+end subroutine OVERLAPS_IO
diff --git a/src/dipoles/POLARIZATION_output.F b/src/dipoles/POLARIZATION_output.F
index fab85952ad..6516c872c9 100644
--- a/src/dipoles/POLARIZATION_output.F
+++ b/src/dipoles/POLARIZATION_output.F
@@ -5,7 +5,12 @@
 !
 ! Authors (see AUTHORS file for details): MG CA
 !
-subroutine POLARIZATION_output(Xen,Xk,Dip)
+!> @brief Calculate and print total polarization = electronic + ionic
+!!
+!! @param[in]       Xen                         KS energies
+!! @param[in]       Xk                          k-points sampling
+!!
+subroutine POLARIZATION_output(Xen,Xk)
  ! 
  ! Print the intrisic Polarization of the system
  !
@@ -22,7 +27,6 @@ subroutine POLARIZATION_output(Xen,Xk,Dip)
  implicit none
  type(levels)  ::Xen
  type(bz_samp) ::Xk
- type(DIPOLE_t)::Dip
  ! 
  ! Work Space
  !
diff --git a/src/dipoles/RT_project.dep b/src/dipoles/RT_project.dep
index 3c0f2dfa8a..d32711168f 100644
--- a/src/dipoles/RT_project.dep
+++ b/src/dipoles/RT_project.dep
@@ -1,7 +1,6 @@
  DIPOLE_IO.o
  DIPOLE_driver.o
  DIPOLE_g_space.o
- DIPOLE_orbital_magnetization.o
  DIPOLE_p_matrix_elements.o
  DIPOLE_setup.o
  DIPOLE_shifted_grids.o
diff --git a/src/driver/C_driver_transfer.F b/src/driver/C_driver_transfer.F
index 5480f5fa26..94e5bfeae7 100644
--- a/src/driver/C_driver_transfer.F
+++ b/src/driver/C_driver_transfer.F
@@ -7,7 +7,7 @@
 !
 subroutine C_driver_transfer()
  !
- use C_driver,       ONLY:code_version,code_revision,code_hash,&
+ use C_driver,       ONLY:code_version,code_revision,code_hash,lumen_version,&
 &                         code_libraries,get_version,get_libraries,get_running_tool,&
 &                         get_running_project,code_bin,code_tool,code_project
  !
@@ -22,6 +22,7 @@ subroutine C_driver_transfer()
  code_libraries=" "
  call get_libraries(code_libraries)
  code_hash=" "
- call get_version(code_version(1),code_version(2),code_version(3),code_revision,code_hash)
+ lumen_version=" "
+ call get_version(lumen_version,code_version(1),code_version(2),code_version(3),code_revision,code_hash)
  !
 end subroutine
diff --git a/src/driver/SC_project.dep b/src/driver/SC_project.dep
new file mode 100644
index 0000000000..a3379bb643
--- /dev/null
+++ b/src/driver/SC_project.dep
@@ -0,0 +1,2 @@
+  options_control.o
+
diff --git a/src/driver/command_line.c b/src/driver/command_line.c
index a8ed9097ec..ef1d52e1c0 100644
--- a/src/driver/command_line.c
+++ b/src/driver/command_line.c
@@ -1,16 +1,17 @@
 /*
   License-Identifier: GPL
- 
+
   Copyright (C) 2020 The Yambo Team
- 
+
   Authors (see AUTHORS file for details): AM
 
   https://www.gnu.org/software/libc/manual/html_node/Getopt-Long-Option-Example.html
   https://www.gnu.org/software/libc/manual/html_node/Getopt-Long-Options.html#Getopt-Long-Options
 
 Data Type: struct option
-This structure describes a single long option name for the sake of getopt_long. 
-The argument longopts must be an array of these structures, one for each long option. Terminate the array with an element containing all zeros.
+This structure describes a single long option name for the sake of getopt_long.
+The argument longopts must be an array of these structures, one for each long
+option. Terminate the array with an element containing all zeros.
 
 The struct option structure has these fields:
 
@@ -18,176 +19,255 @@ The struct option structure has these fields:
   This field is the name of the option. It is a string.
 
 - int has_arg
-  This field says whether the option takes an argument. It is an integer, and there are three legitimate values: no_argument, required_argument and optional_argument.
+  This field says whether the option takes an argument. It is an integer, and
+there are three legitimate values: no_argument, required_argument and
+optional_argument.
 
 - int *flag
 
 - int val
   These fields control how to report or act on the option when it occurs.
 
-  If flag is a null pointer, then the val is a value which identifies this option. 
-  Often these values are chosen to uniquely identify particular long options.
+  If flag is a null pointer, then the val is a value which identifies this
+option. Often these values are chosen to uniquely identify particular long
+options.
 
-  If flag is not a null pointer, it should be the address of an int variable which is the flag for this option. 
-  The value in val is the value to store in the flag to indicate that the option was seen.
+  If flag is not a null pointer, it should be the address of an int variable
+which is the flag for this option. The value in val is the value to store in the
+flag to indicate that the option was seen.
 
 */
+#include <driver.h>
+#include <getopt.h>
+#include <kind.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <getopt.h>
 #include <string.h>
-#include <kind.h>
-#include <driver.h>
 
-struct yambo_seed_struct command_line(int argc, char *argv[], struct options_struct opts[],  struct tool_struct t, int *use_editor, int *use_mpi, int n_options)
+struct yambo_seed_struct command_line(int argc, char *argv[],
+                                      struct options_struct opts[],
+                                      struct tool_struct t, int *use_editor,
+                                      int *use_mpi, int n_options)
 {
- int n_active,n_vars,opt=0,i_opt;
- char opt_string[100],ch[3],dummy[50];
- /* */
- yambo_seed_struct y;
- /* 
-  Pre-sets 
- */
- y.in_file = malloc(strlen(t.tool)+4);
- y.parenv_file = NULL;
- strcpy(y.in_file,t.tool);
- strcat(y.in_file,".in");
- y.in_dir=".";
- y.out_dir=".";
- y.com_dir=".";
- y.job="";
- strcpy(y.string,"");
- strcpy(opt_string,"");
- /* 
-   Number of active options
- */
- n_active=0;
- for(i_opt=0;i_opt<n_options;i_opt++) {
-  if (opts[i_opt].short_opt==0) {break;}; 
-  n_active++;
- }
- /* 
-   Long Options structure 
- */
- struct option long_options[n_active+1];
- /* */
- n_active=0;
- for(i_opt=0;i_opt<n_options;i_opt++) {
-  /**/
-  if (use_me(opts,t,i_opt)==0) continue;
-  if (opts[i_opt].short_opt==0) {break;};
-  /**/
-  long_options[n_active].name=opts[i_opt].long_opt;
-  long_options[n_active].flag=0;
-  long_options[n_active].val=opts[i_opt].short_opt;
-  /* DEBUG
-  printf ("CMD: INIT %i \n",i_opt);
-  printf ("CMD: INIT %s \n",opts[i_opt].short_desc);
-  printf ("CMD: (opts.short_opt) %c \n",opts[i_opt].short_opt);
-  */
-  sprintf(ch,"%c",opts[i_opt].short_opt);
-  strcat(opt_string,ch);
-  /* DEBUG  printf("CMD: opt_string %s\n",opt_string);*/ 
-  if (opts[i_opt].long_opt!=NULL) {
-   long_options[n_active].name=opts[i_opt].long_opt;
-   long_options[n_active].flag=0;
-  }
-  /* DEBUG printf ("CMD: (opts.short_desc) %s \n",opts[i_opt].short_desc);*/
-  /* DEBUG printf ("CMD: (opt_string (now)) %s \n",opt_string);*/
-  /* VARS */
-  n_vars=opts[i_opt].int_var+opts[i_opt].float_var+opts[i_opt].char_var;
-  if (n_vars ==0) {
-   /* DEBUG printf ("CMD (n_vars=0)\n");*/
-   long_options[n_active].has_arg=no_argument;
-  }else{
-   /* DEBUG printf ("CMD (n_vars/=0) \n");*/
-   long_options[n_active].has_arg=required_argument;
-   strcat(opt_string,":");
-  };
-  if (opts[i_opt].optional_var==1){
-   /* DEBUG printf ("CMD (opt_var) \n");*/
-   long_options[n_active].has_arg=optional_argument;
-   strcat(opt_string,":");
-  }
-  n_active++;
- };
- /* DEBUG 
- printf("before getopt %s\n",opt_string);
- */
- long_options[n_active].name=0;
- long_options[n_active].has_arg=0;
- long_options[n_active].flag=0;
- long_options[n_active].val=0;
- int long_index =0;
- while ((opt = getopt_long_only(argc, argv,opt_string,long_options, &long_index )) != -1) {
-  /* No option valid */
-  if (opt == '?') {
-    printf ("%s","\n Use -h to list the options\n");
-    exit(EXIT_FAILURE);
-  }
-  for(i_opt=0;i_opt<n_options;i_opt++) {
-   if (use_me(opts,t,i_opt)==0) continue;
-   if (opts[i_opt].short_opt==opt) {break;};
-  }
-  /* DEBUG
-  if (opt > 0) {printf ("GETOPT ouput: %c %s",opts[i_opt].short_opt,opts[i_opt].long_opt);}
-  if (optarg != NULL) {printf (" with arg %s\n", optarg);}
-  if (optarg == NULL) {printf ("\n");}
-  */
-  /* help */
-  if (strcmp(opts[i_opt].long_opt,"help")==0){
-   /*
-   printf ("OPT %s \n",optarg);
-   printf ("OPT %i \n",optind);
-   printf ("OPT %s \n",argv[optind]);
-   */
-   if (optarg == NULL && argv[optind] != NULL && argv[optind][0] != '-') {       
-    strcpy(dummy,argv[optind]);
-    ++optind;
-   }else{
-    if (optarg == NULL) sprintf(dummy,"%s","help");
-    if (optarg != NULL) sprintf(dummy,"%s",optarg);
-   }
-   /* DEBUG 
-   printf ("USAGE call %s \n",dummy);
-   */
-   usage(opts,t,dummy,n_options);
-   exit(0);
-  }
-  /* version */
-  if (strcmp(opts[i_opt].long_opt,"version")==0){
-    usage(opts,t,"version",n_options);
-    exit(0);
-  }
-  if (strcmp(opts[i_opt].long_opt,"Input")==0){y.in_file=optarg;continue;}
-  if (strcmp(opts[i_opt].long_opt,"Job")==0){y.job=optarg;continue;}
-  if (strcmp(opts[i_opt].long_opt,"Idir")==0){y.in_dir=optarg;continue;}
-  if (strcmp(opts[i_opt].long_opt,"Cdir")==0){y.com_dir=optarg;continue;}
-  if (strcmp(opts[i_opt].long_opt,"parenv")==0){y.parenv_file=optarg;continue;}
-  if (strcmp(opts[i_opt].long_opt,"Odir")==0){y.out_dir=optarg;continue;}
-  if (strcmp(opts[i_opt].long_opt,"nompi")==0){*use_mpi=-1;continue;}
-  if (strcmp(opts[i_opt].long_opt,"Quiet")==0){*use_editor=-2;continue;}
-  if (opt > 0) {
-   strcat(y.string," ");
-   strcat(y.string,opts[i_opt].yambo_string);
-  }
-  if (optarg) {
-   strcat(y.string," ");
-   strcat(y.string,optarg);
-  }
- };
- /*
-  Sizes
- */
- y.string_N=strlen(y.string);
- y.in_file_N=strlen(y.in_file);
- y.out_dir_N=strlen(y.out_dir);
- y.in_dir_N=strlen(y.in_dir);
- y.com_dir_N=strlen(y.com_dir);
- y.job_N=strlen(y.job);
- /* */
- return(y);
+    int n_active, n_vars, opt = 0, i_opt;
+    char opt_string[100], ch[3], dummy[50];
+    /* */
+    yambo_seed_struct y;
+    /*
+     Pre-sets
+    */
+    y.in_file = malloc(strlen(t.tool) + 4);
+    y.parenv_file = NULL;
+    strcpy(y.in_file, t.tool);
+    strcat(y.in_file, ".in");
+    y.in_dir = ".";
+    y.out_dir = ".";
+    y.com_dir = ".";
+    y.job = "";
+    strcpy(y.string, "");
+    strcpy(opt_string, "");
+    /*
+      Number of active options
+    */
+    n_active = 0;
+    for (i_opt = 0; i_opt < n_options; i_opt++)
+    {
+        if (opts[i_opt].short_opt == 0)
+        {
+            break;
+        };
+        n_active++;
+    }
+    /*
+      Long Options structure
+    */
+    struct option long_options[n_active + 1];
+    /* */
+    n_active = 0;
+    for (i_opt = 0; i_opt < n_options; i_opt++)
+    {
+        /**/
+        if (use_me(opts, t, i_opt) == 0)
+        {
+            continue;
+        }
+        if (opts[i_opt].short_opt == 0)
+        {
+            break;
+        };
+        /**/
+        long_options[n_active].name = opts[i_opt].long_opt;
+        long_options[n_active].flag = 0;
+        long_options[n_active].val = opts[i_opt].short_opt;
+        /* DEBUG
+        printf ("CMD: INIT %i \n",i_opt);
+        printf ("CMD: INIT %s \n",opts[i_opt].short_desc);
+        printf ("CMD: (opts.short_opt) %c \n",opts[i_opt].short_opt);
+        */
+        sprintf(ch, "%c", opts[i_opt].short_opt);
+        strcat(opt_string, ch);
+        /* DEBUG  printf("CMD: opt_string %s\n",opt_string);*/
+        if (opts[i_opt].long_opt != NULL)
+        {
+            long_options[n_active].name = opts[i_opt].long_opt;
+            long_options[n_active].flag = 0;
+        }
+        /* DEBUG printf ("CMD: (opts.short_desc) %s
+         * \n",opts[i_opt].short_desc);*/
+        /* DEBUG printf ("CMD: (opt_string (now)) %s \n",opt_string);*/
+        /* VARS */
+        n_vars =
+            opts[i_opt].int_var + opts[i_opt].float_var + opts[i_opt].char_var;
+        if (n_vars == 0)
+        {
+            /* DEBUG printf ("CMD (n_vars=0)\n");*/
+            long_options[n_active].has_arg = no_argument;
+        }
+        else
+        {
+            /* DEBUG printf ("CMD (n_vars/=0) \n");*/
+            long_options[n_active].has_arg = required_argument;
+            strcat(opt_string, ":");
+        };
+        if (opts[i_opt].optional_var == 1)
+        {
+            /* DEBUG printf ("CMD (opt_var) \n");*/
+            long_options[n_active].has_arg = optional_argument;
+            strcat(opt_string, ":");
+        }
+        n_active++;
+    };
+    /* DEBUG
+    printf("before getopt %s\n",opt_string);
+    */
+    long_options[n_active].name = 0;
+    long_options[n_active].has_arg = 0;
+    long_options[n_active].flag = 0;
+    long_options[n_active].val = 0;
+    int long_index = 0;
+    while ((opt = getopt_long_only(argc, argv, opt_string, long_options,
+                                   &long_index)) != -1)
+    {
+        /* No option valid */
+        if (opt == '?')
+        {
+            printf("%s", "\n Use -h to list the options\n");
+            exit(EXIT_FAILURE);
+        }
+        for (i_opt = 0; i_opt < n_options; i_opt++)
+        {
+            if (use_me(opts, t, i_opt) == 0)
+            {
+                continue;
+            }
+            if (opts[i_opt].short_opt == opt)
+            {
+                break;
+            };
+        }
+        /* DEBUG
+        if (opt > 0) {printf ("GETOPT ouput: %c
+        %s",opts[i_opt].short_opt,opts[i_opt].long_opt);} if (optarg != NULL)
+        {printf (" with arg %s\n", optarg);} if (optarg == NULL) {printf
+        ("\n");}
+        */
+        /* help */
+        if (strcmp(opts[i_opt].long_opt, "help") == 0)
+        {
+            /*
+            printf ("OPT %s \n",optarg);
+            printf ("OPT %i \n",optind);
+            printf ("OPT %s \n",argv[optind]);
+            */
+            if (optarg == NULL && argv[optind] != NULL &&
+                argv[optind][0] != '-')
+            {
+                strcpy(dummy, argv[optind]);
+                ++optind;
+            }
+            else
+            {
+                if (optarg == NULL)
+                {
+                    sprintf(dummy, "%s", "help");
+                }
+                if (optarg != NULL)
+                {
+                    sprintf(dummy, "%s", optarg);
+                }
+            }
+            /* DEBUG
+            printf ("USAGE call %s \n",dummy);
+            */
+            usage(opts, t, dummy, n_options);
+            exit(0);
+        }
+        /* version */
+        if (strcmp(opts[i_opt].long_opt, "version") == 0)
+        {
+            usage(opts, t, "version", n_options);
+            exit(0);
+        }
+        if (strcmp(opts[i_opt].long_opt, "Input") == 0)
+        {
+            y.in_file = optarg;
+            continue;
+        }
+        if (strcmp(opts[i_opt].long_opt, "Job") == 0)
+        {
+            y.job = optarg;
+            continue;
+        }
+        if (strcmp(opts[i_opt].long_opt, "Idir") == 0)
+        {
+            y.in_dir = optarg;
+            continue;
+        }
+        if (strcmp(opts[i_opt].long_opt, "Cdir") == 0)
+        {
+            y.com_dir = optarg;
+            continue;
+        }
+        if (strcmp(opts[i_opt].long_opt, "parenv") == 0)
+        {
+            y.parenv_file = optarg;
+            continue;
+        }
+        if (strcmp(opts[i_opt].long_opt, "Odir") == 0)
+        {
+            y.out_dir = optarg;
+            continue;
+        }
+        if (strcmp(opts[i_opt].long_opt, "nompi") == 0)
+        {
+            *use_mpi = -1;
+            continue;
+        }
+        if (strcmp(opts[i_opt].long_opt, "Quiet") == 0)
+        {
+            *use_editor = -2;
+            continue;
+        }
+        if (opt > 0)
+        {
+            strcat(y.string, " ");
+            strcat(y.string, opts[i_opt].yambo_string);
+        }
+        if (optarg)
+        {
+            strcat(y.string, " ");
+            strcat(y.string, optarg);
+        }
+    };
+    /*
+     Sizes
+    */
+    y.string_N = strlen(y.string);
+    y.in_file_N = strlen(y.in_file);
+    y.out_dir_N = strlen(y.out_dir);
+    y.in_dir_N = strlen(y.in_dir);
+    y.com_dir_N = strlen(y.com_dir);
+    y.job_N = strlen(y.job);
+    /* */
+    return (y);
 };
-
-
-
diff --git a/src/driver/driver.c b/src/driver/driver.c
index 42ec933031..52bed08a89 100644
--- a/src/driver/driver.c
+++ b/src/driver/driver.c
@@ -1,62 +1,70 @@
 /*
   License-Identifier: GPL
- 
+
   Copyright (C) 2020 The Yambo Team
- 
+
   Authors (see AUTHORS file for details): AM
 
 */
+#include <driver.h>
+#include <kind.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
-#include <kind.h>
-#include <driver.h>
-#if defined _MPI 
- #include <mpi.h>
+#if defined _MPI
+#include <mpi.h>
 #endif
-/* 
+/*
   MAIN
 */
 int main(int argc, char *argv[])
 {
- /*
-  Work Space
- */
- int np=1,pid=0,use_mpi=1,use_editor=1,n_options=200;
- /*
-  Yambo and Tool structures
- */
- yambo_seed_struct y;
- tool_struct tool;
- struct options_struct options[n_options];
- /* 
-  TOOL & Version initialization
- */
- tool=tool_init();
- /*
-  Options "maker"
- */ 
- options_maker(options,n_options);
- /*
-  Command line parsing
- */ 
- y=command_line(argc,argv,options,tool,&use_editor,&use_mpi,n_options);
- /*
-  Launcher
- */
- launcher(argc,argv,np,pid,y,&use_editor,&use_mpi);
- /* 
-   Input File
- */
- input_file(y,tool,&use_editor);
- /* 
-   CLEAN & EXIT
- ===========================================================================
- */
+    /*
+     Work Space
+    */
+    int np = 1, pid = 0, use_mpi = 1, use_editor = 1, n_options = 200;
+    /*
+     Yambo and Tool structures
+    */
+    yambo_seed_struct y;
+    tool_struct tool;
+    struct options_struct *options = calloc(n_options, sizeof(*options));
+    if (!options)
+    {
+        printf("Failed to allocate options buffer.\n");
+        exit(EXIT_FAILURE);
+    }
+    /*
+     TOOL & Version initialization
+    */
+    tool = tool_init();
+    /*
+     Options "maker"
+    */
+    options_maker(options, n_options);
+    /*
+     Command line parsing
+    */
+    y = command_line(argc, argv, options, tool, &use_editor, &use_mpi,
+                     n_options);
+    /*
+     Launcher
+    */
+    launcher(argc, argv, np, pid, y, &use_editor, &use_mpi);
+    /*
+      Input File
+    */
+    input_file(y, tool, &use_editor);
+    /*
+      CLEAN & EXIT
+    ===========================================================================
+    */
+    free(options);
 #if defined _MPI
-  if (use_mpi==1) {
-   MPI_Barrier(MPI_COMM_WORLD);
-   MPI_Finalize();
-  };
-#endif 
+    if (use_mpi == 1)
+    {
+        MPI_Barrier(MPI_COMM_WORLD);
+        MPI_Finalize();
+    };
+#endif
 }
-
diff --git a/src/driver/get_libraries.c b/src/driver/get_libraries.c
index 2f9542e66d..ddc861e158 100644
--- a/src/driver/get_libraries.c
+++ b/src/driver/get_libraries.c
@@ -1,87 +1,95 @@
 /*
   License-Identifier: GPL
- 
+
   Copyright (C) 2020 The Yambo Team
- 
+
   Authors (see AUTHORS file for details): AM
 */
 
-#include <stdlib.h>
-#include <stdio.h>
-#include <wrapper.h>
-#include <kind.h>
 #include <driver.h>
+#include <kind.h>
+#include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
+#include <wrapper.h>
 
 char *running_libraries()
 {
- int i_str,str_len,i_c;
- char strings[20][20], *c;
- i_str=0;
+    int i_str, str_len, i_c;
+    char strings[20][20], *c;
+    i_str = 0;
 #if defined _MPI
- strcpy(strings[i_str], "MPI");
+    strcpy(strings[i_str], "MPI");
 #else
- strcpy(strings[i_str], "Serial");
+    strcpy(strings[i_str], "Serial");
+#endif
+#if defined _GAMMA_ONLY
+    i_str++;
+    strcpy(strings[i_str], "GAMMA_ONLY");
 #endif
 #if defined _GPU
- i_str++;
- strcpy(strings[i_str], "GPU");
+    i_str++;
+    strcpy(strings[i_str], "GPU");
 #endif
 #if defined _CUDA
- i_str++;
- strcpy(strings[i_str], "CUDA");
+    i_str++;
+    strcpy(strings[i_str], "CUDA");
 #endif
 #if defined _CUDAF
- i_str++;
- strcpy(strings[i_str], "CUDAF");
+    i_str++;
+    strcpy(strings[i_str], "CUDAF");
 #endif
 #if defined _OPENACC
- i_str++;
- strcpy(strings[i_str], "OpenACC");
+    i_str++;
+    strcpy(strings[i_str], "OpenACC");
 #endif
 #if defined _OPENMP
- i_str++;
- strcpy(strings[i_str], "OpenMP");
+    i_str++;
+    strcpy(strings[i_str], "OpenMP");
 #endif
 #if defined _OPENMP_GPU
- i_str++;
- strcpy(strings[i_str], "OpenMP-GPU");
+    i_str++;
+    strcpy(strings[i_str], "OpenMP-GPU");
 #endif
 #if defined _SCALAPACK
- i_str++;
- strcpy(strings[i_str], "SLK");
+    i_str++;
+    strcpy(strings[i_str], "SLK");
 #endif
 #if defined _SLEPC
- i_str++;
- strcpy(strings[i_str], "SLEPC");
+    i_str++;
+    strcpy(strings[i_str], "SLEPC");
 #endif
 #if defined _PAR_IO
- i_str++;
- strcpy(strings[i_str], "HDF5_MPI_IO");
+    i_str++;
+    strcpy(strings[i_str], "HDF5_MPI_IO");
 #elif defined _HDF5_IO
- i_str++;
- strcpy(strings[i_str], "HDF5_IO");
+    i_str++;
+    strcpy(strings[i_str], "HDF5_IO");
 #elif defined _HDF5_LIB
- i_str++;
- strcpy(strings[i_str], "HDF5_LIB");
-#endif 
- str_len=0;
- for(i_c=0;i_c<=i_str;i_c++) {
-  str_len=str_len+sizeof(strings[i_c]);
- }
- c = malloc(str_len+1);
- strcpy(c,"");
- for(i_c=0;i_c<=i_str;i_c++) {
-  if (i_c>0) strcat(c,"+");
-  strcat(c,strings[i_c]);
- }
- return c;
+    i_str++;
+    strcpy(strings[i_str], "HDF5_LIB");
+#endif
+    str_len = 0;
+    for (i_c = 0; i_c <= i_str; i_c++)
+    {
+        str_len = str_len + sizeof(strings[i_c]);
+    }
+    c = malloc(str_len + 1);
+    strcpy(c, "");
+    for (i_c = 0; i_c <= i_str; i_c++)
+    {
+        if (i_c > 0)
+        {
+            strcat(c, "+");
+        }
+        strcat(c, strings[i_c]);
+    }
+    return c;
 }
 void C_FUNC(get_libraries, GET_LIBRARIES)(char *libraries)
 {
- char *c = running_libraries();
- int len = strlen(c);
- strcpy(libraries, c);
- libraries[len] = libraries[len + 1];
+    char *c = running_libraries();
+    int len = strlen(c);
+    strcpy(libraries, c);
+    libraries[len] = libraries[len + 1];
 }
-
diff --git a/src/driver/get_runlevel.c b/src/driver/get_runlevel.c
index b43c088b9f..c41efda39f 100644
--- a/src/driver/get_runlevel.c
+++ b/src/driver/get_runlevel.c
@@ -1,33 +1,48 @@
 /*
   License-Identifier: GPL
- 
+
   Copyright (C) 2020 The Yambo Team
- 
+
   Authors (see AUTHORS file for details): AM
 */
 
-#include <stdio.h>
-#include <wrapper.h>
-#include <kind.h>
 #include <driver.h>
+#include <kind.h>
+#include <stdio.h>
 #include <string.h>
+#include <wrapper.h>
 
 char *runlevel(int *runid, int *id)
 {
- int n_options=200;
- struct options_struct options[n_options];
- options_maker(options,n_options);
- if (strcmp(options[*runid].yambo_string,"undef")==0) return "EMPTY";
- if (*id == 1) {return options[*runid].yambo_string;}
- if (*id == 2) {return options[*runid].short_desc;}
- if (*id == 3) {return options[*runid].bin;}
- if (*id == 4) {return options[*runid].no_bin;}
+    int n_options = 200;
+    struct options_struct options[n_options];
+    options_maker(options, n_options);
+    if (strcmp(options[*runid].yambo_string, "undef") == 0)
+    {
+        return "EMPTY";
+    }
+    if (*id == 1)
+    {
+        return options[*runid].yambo_string;
+    }
+    if (*id == 2)
+    {
+        return options[*runid].short_desc;
+    }
+    if (*id == 3)
+    {
+        return options[*runid].bin;
+    }
+    if (*id == 4)
+    {
+        return options[*runid].no_bin;
+    }
 }
-void C_FUNC(get_runlevel, GET_RUNLEVEL)(char *component, int *component_id, int *runlevel_id)
+void C_FUNC(get_runlevel, GET_RUNLEVEL)(char *component, int *component_id,
+                                        int *runlevel_id)
 {
- char *c = runlevel(runlevel_id,component_id);
- int len = strlen(c);
- strcpy(component, c);
- component[len] = component[len + 1];
+    char *c = runlevel(runlevel_id, component_id);
+    int len = strlen(c);
+    strcpy(component, c);
+    component[len] = component[len + 1];
 }
-
diff --git a/src/driver/get_running_project.c b/src/driver/get_running_project.c
index f800902977..e12b3670b8 100644
--- a/src/driver/get_running_project.c
+++ b/src/driver/get_running_project.c
@@ -1,37 +1,39 @@
 /*
   License-Identifier: GPL
- 
+
   Copyright (C) 2020 The Yambo Team
- 
+
   Authors (see AUTHORS file for details): AM
 */
 
-#include <stdlib.h>
-#include <stdio.h>
-#include <wrapper.h>
-#include <kind.h>
 #include <driver.h>
+#include <kind.h>
+#include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
+#include <wrapper.h>
 
 char *running_project()
 {
- tool_struct tool;
- tool=tool_init();
- char *c;
- if (tool.pj!=NULL) {
-  c = malloc(sizeof(tool.pj)+1);
-  strcpy(c,tool.pj);
- }else{
-  c = malloc(2);
-  strcpy(c,"");
- }
- return c;
+    tool_struct tool;
+    tool = tool_init();
+    char *c;
+    if (tool.pj != NULL)
+    {
+        c = malloc(sizeof(tool.pj) + 1);
+        strcpy(c, tool.pj);
+    }
+    else
+    {
+        c = malloc(2);
+        strcpy(c, "");
+    }
+    return c;
 }
 void C_FUNC(get_running_project, GET_RUNNING_TOOL)(char *code_project)
 {
- char *c = running_project();
- int len = strlen(c);
- strcpy(code_project, c);
- code_project[len] = code_project[len + 1];
+    char *c = running_project();
+    int len = strlen(c);
+    strcpy(code_project, c);
+    code_project[len] = code_project[len + 1];
 }
-
diff --git a/src/driver/get_running_tool.c b/src/driver/get_running_tool.c
index 7e392c3ddf..ef156a957e 100644
--- a/src/driver/get_running_tool.c
+++ b/src/driver/get_running_tool.c
@@ -1,32 +1,31 @@
 /*
   License-Identifier: GPL
- 
+
   Copyright (C) 2020 The Yambo Team
- 
+
   Authors (see AUTHORS file for details): AM
 */
 
-#include <stdlib.h>
-#include <stdio.h>
-#include <wrapper.h>
-#include <kind.h>
 #include <driver.h>
+#include <kind.h>
+#include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
+#include <wrapper.h>
 
 char *running_tool()
 {
- tool_struct tool;
- tool=tool_init();
- char *c;
- c = malloc(sizeof(tool.tool)+1);
- strcpy(c,tool.tool);
- return c;
+    tool_struct tool;
+    tool = tool_init();
+    char *c;
+    c = malloc(sizeof(tool.tool) + 1);
+    strcpy(c, tool.tool);
+    return c;
 }
 void C_FUNC(get_running_tool, GET_RUNNING_TOOL)(char *code_tool)
 {
- char *c = running_tool();
- int len = strlen(c);
- strcpy(code_tool, c);
- code_tool[len] = code_tool[len + 1];
+    char *c = running_tool();
+    int len = strlen(c);
+    strcpy(code_tool, c);
+    code_tool[len] = code_tool[len + 1];
 }
-
diff --git a/src/driver/get_version.c b/src/driver/get_version.c
index b0b2332fd3..da9913af06 100644
--- a/src/driver/get_version.c
+++ b/src/driver/get_version.c
@@ -1,38 +1,45 @@
 /*
   License-Identifier: GPL
- 
+
   Copyright (C) 2020 The Yambo Team
- 
+
   Authors (see AUTHORS file for details): AM
 */
 
-#include <string.h>
-#include <stdio.h>
-#include <wrapper.h>
-#include <kind.h>
 #include <driver.h>
+#include <kind.h>
+#include <stdio.h>
+#include <string.h>
 #include <version.h>
+#include <wrapper.h>
 
-struct tool_struct versions( )
+struct tool_struct versions()
 {
- tool_struct t;
- t.version=YAMBO_VERSION;
- t.subversion=YAMBO_SUBVERSION;
- t.patchlevel=YAMBO_PATCHLEVEL;
- t.revision=YAMBO_REVISION;
- sprintf(t.hash,"%s",YAMBO_HASH);
- return(t);
+    tool_struct t;
+    t.version = YAMBO_VERSION;
+    t.subversion = YAMBO_SUBVERSION;
+    t.patchlevel = YAMBO_PATCHLEVEL;
+    t.revision = YAMBO_REVISION;
+    sprintf(t.hash, "%s", YAMBO_HASH);
+    sprintf(t.lumenver, "%s", LUMEN_VERSION);
+    return (t);
 }
-void C_FUNC(get_version, GET_VERSION)(int *version,int *subversion, int *patchlevel, int *revision, char *hash)
+void C_FUNC(get_version, GET_VERSION)(char *lumenver, int *version, int *subversion,
+                                      int *patchlevel, int *revision,
+                                      char *hash)
 {
- tool_struct t;
- t=versions();
- *version=t.version;
- *subversion=t.subversion;
- *patchlevel=t.patchlevel;
- *revision=t.revision;
- strcpy(hash, t.hash);
- int len = strlen(t.hash);
- hash[len] = hash[len + 1];
-}
+    tool_struct t;
+    t = versions();
+    *version = t.version;
+    *subversion = t.subversion;
+    *patchlevel = t.patchlevel;
+    *revision = t.revision;
+
+    strcpy(hash, t.hash);
+    int ylen = strlen(t.hash);
+    hash[ylen] = hash[ylen + 1];
 
+    strcpy(lumenver, t.lumenver);
+    int llen = strlen(t.lumenver);
+    lumenver[llen] = lumenver[llen + 1];
+}
diff --git a/src/driver/input_file.c b/src/driver/input_file.c
index 9ecd6d05bd..845050e18f 100644
--- a/src/driver/input_file.c
+++ b/src/driver/input_file.c
@@ -1,41 +1,48 @@
 /*
   License-Identifier: GPL
- 
+
   Copyright (C) 2020 The Yambo Team
- 
+
   Authors (see AUTHORS file for details): AM
 
 */
+#include <kind.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <kind.h>
 
-void input_file(struct yambo_seed_struct y,struct tool_struct t,int *use_editor)
+void input_file(struct yambo_seed_struct y, struct tool_struct t,
+                int *use_editor)
 {
- int ttd;
- char edit_line[100]={'\0'},file_name[100]={'\0'};
- /*
-  External functions
- */
- extern int guess_winsize();
- /*
-  stdlog?
- */
- ttd=guess_winsize();
- /* */
- strcpy(edit_line,t.editor);
- strcpy(file_name,y.in_file);
- if (y.parenv_file !=NULL) {strcpy(file_name,y.parenv_file);};
- strncat(edit_line," ",1);
- strncat(edit_line,file_name,strlen(file_name));
-#if defined _yambo || defined _ypp 
- if (*use_editor == 1 && ttd>0 && strstr(t.editor,"none ")==0)
- {
-  system(edit_line);
- }else if (*use_editor == -2){ 
-  fprintf(stderr," \n%s%s %s %s\n\n",t.tool,": input file",file_name,"created");
-  exit (0);
- };
+    int ttd;
+    char edit_line[100] = {'\0'}, file_name[100] = {'\0'};
+    /*
+     External functions
+    */
+    extern int guess_winsize();
+    /*
+     stdlog?
+    */
+    ttd = guess_winsize();
+    /* */
+    strcpy(edit_line, t.editor);
+    strcpy(file_name, y.in_file);
+    if (y.parenv_file != NULL)
+    {
+        strcpy(file_name, y.parenv_file);
+    };
+    strncat(edit_line, " ", 1);
+    strncat(edit_line, file_name, strlen(file_name));
+#if defined _yambo || defined _ypp
+    if (*use_editor == 1 && ttd > 0 && strstr(t.editor, "none ") == 0)
+    {
+        system(edit_line);
+    }
+    else if (*use_editor == -2)
+    {
+        fprintf(stderr, " \n%s%s %s %s\n\n", t.tool, ": input file", file_name,
+                "created");
+        exit(0);
+    };
 #endif
 };
diff --git a/src/driver/launcher.c b/src/driver/launcher.c
index 21ae46d19a..909405da4f 100644
--- a/src/driver/launcher.c
+++ b/src/driver/launcher.c
@@ -1,133 +1,144 @@
 /*
   License-Identifier: GPL
- 
+
   Copyright (C) 2020 The Yambo Team
- 
+
   Authors (see AUTHORS file for details): AM
 
 */
+#include <driver.h>
+#include <fortran_driver.h>
+#include <kind.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <wrapper.h>
-#include <fortran_driver.h>
-#include <kind.h>
-#include <driver.h>
 #include <yambo_driver.h>
-#if defined _MPI 
- #include <mpi.h>
+#if defined _MPI
+#include <mpi.h>
 #endif
 
-void launcher(int argc, char *argv[],int np, int pid, struct yambo_seed_struct y,int *use_editor, int *use_mpi)
+#define F90ARG_MACRO(X)                                                    \
+    F90_FUNC(X)(&np, &pid, &y.string_N, &y.in_file_N, &y.in_dir_N,         \
+                &y.out_dir_N, &y.com_dir_N, &y.job_N, y.string, y.in_file, \
+                y.in_dir, y.out_dir, y.com_dir, y.job, y.string_N,         \
+                y.in_file_N, y.in_dir_N, y.out_dir_N, y.com_dir_N, y.job_N);
+
+void launcher(int argc, char *argv[], int np, int pid,
+              struct yambo_seed_struct y, int *use_editor, int *use_mpi)
 {
- int yambo_err;
- /* 
-   Par Environments? Yes? => Return
- */
+    int yambo_err;
+    /*
+      Par Environments? Yes? => Return
+    */
 #if defined _yambo
- if (y.parenv_file !=NULL) 
- {
-  int env_editor=load_environments(y.parenv_file);
-  if (env_editor==1) 
-  {
-   *use_editor=1;
-   return;
-  };
- };
+    if (y.parenv_file != NULL)
+    {
+        int env_editor = load_environments(y.parenv_file);
+        if (env_editor == 1)
+        {
+            *use_editor = 1;
+            return;
+        };
+    };
 #endif
- /* 
-   MPI
- */
+    /*
+      MPI
+    */
 #if defined _MPI
- if (*use_mpi==1) {
-   MPI_Init(&argc,&argv);               /* starts MPI */
-   MPI_Comm_rank(MPI_COMM_WORLD, &pid); /* get current process id */
-   MPI_Comm_size(MPI_COMM_WORLD, &np);  /* get number of processes */
- };
+    if (*use_mpi == 1)
+    {
+        MPI_Init(&argc, &argv);              /* starts MPI */
+        MPI_Comm_rank(MPI_COMM_WORLD, &pid); /* get current process id */
+        MPI_Comm_size(MPI_COMM_WORLD, &np);  /* get number of processes */
+    };
 #endif
 #if defined _yambo
- /* 
-   Running the Fortran YAMBO driver 
- ===========================================================================
- */
- yambo_err=F90_FUNC(yambo)(
-#include <fortran_arguments.h>
- );
- if(yambo_err==2) exit(0); /* DB listing mode */
+    /*
+      Running the Fortran YAMBO driver
+    ===========================================================================
+    */
+    yambo_err = F90ARG_MACRO(yambo);
+    if (yambo_err == 2)
+    {
+        exit(0); /* DB listing mode */
+    }
 #endif
 #if defined _ypp
- /* 
-   Running the Fortran YPP driver
- ===========================================================================
- */
- F90_FUNC(ypp)(
-#include <fortran_arguments.h>
- );
+    /*
+      Running the Fortran YPP driver
+    ===========================================================================
+    */
+    F90ARG_MACRO(ypp);
 #endif
 #if defined _c2y
- /* 
-   Running the Fortran c2y driver
- ===========================================================================
- */
- F90_FUNC(c2y)(
-#include <fortran_arguments.h>
- );
+    /*
+      Running the Fortran c2y driver
+    ===========================================================================
+    */
+    F90ARG_MACRO(c2y);
 #endif
 #if defined _a2y
- /* 
-   Running the Fortran a2y driver
- ===========================================================================
- */
- F90_FUNC(a2y)(
-#include <fortran_arguments.h>
- );
+    /*
+      Running the Fortran a2y driver
+    ===========================================================================
+    */
+    F90ARG_MACRO(a2y);
 #endif
 #if defined _p2y
- /* 
-   Running the Fortran p2y driver 
- ===========================================================================
- */
- F90_FUNC(p2y)(
-#include <fortran_arguments.h>
- );
+    /*
+      Running the Fortran p2y driver
+    ===========================================================================
+    */
+    F90ARG_MACRO(p2y);
 #endif
 #if defined _e2y
- /* 
-   Running the Fortran p2y driver 
- ===========================================================================
- */
- F90_FUNC(e2y)(
-#include <fortran_arguments.h>
- );
+    /*
+      Running the Fortran p2y driver
+    ===========================================================================
+    */
+    F90ARG_MACRO(e2y);
 #endif
 #if defined _eph2y
- /* 
-   Running the Fortran eph2y driver 
- ===========================================================================
- */
- F90_FUNC(eph2y)(
-#include <fortran_arguments.h>
- );
+    /*
+      Running the Fortran eph2y driver
+    ===========================================================================
+    */
+    F90ARG_MACRO(eph2y);
 #endif
- /* 
-   Input file edit ?
- ===========================================================================
- */
- if ( y.in_file_N ==1 && *use_editor ==0 ) {*use_editor=1;};
- if ( y.in_file_N ==0 || y.in_file_N ==2 ) {*use_editor=0;};
- /* 
-   Error message
- ===========================================================================
- */
- if ( y.in_file_N < 0 ) 
- {
-  if (pid==0 && y.in_file_N == -1) {
-   fprintf(stderr," \n%s\n\n","yambo: cannot access CORE database (SAVE/*db1 and/or SAVE/*wf)");
-  };
-  if (pid==0 && y.in_file_N == -2) {
-   fprintf(stderr," \n%s\n\n","yambo: invalid command line options and/or build");
-  };
+    /*
+      Input file edit ?
+    ===========================================================================
+    */
+    if (y.in_file_N == 1 && *use_editor == 0)
+    {
+        *use_editor = 1;
+    };
+    if (y.in_file_N == 0 || y.in_file_N == 2)
+    {
+        *use_editor = 0;
+    };
+    /*
+      Error message
+    ===========================================================================
+    */
+    if (y.in_file_N < 0)
+    {
+        if (pid == 0 && y.in_file_N == -1)
+        {
+            fprintf(stderr, " \n%s\n\n",
+                    "yambo: cannot access CORE database (SAVE/*db1 and/or "
+                    "SAVE/*wf)");
+        };
+        if (pid == 0 && y.in_file_N == -2)
+        {
+            fprintf(stderr, " \n%s\n\n",
+                    "yambo: invalid command line options and/or build");
+        };
 #if defined _MPI
-  if (*use_mpi==1) { MPI_Abort(MPI_COMM_WORLD,1); };
-#endif 
- }
+        if (*use_mpi == 1)
+        {
+            MPI_Abort(MPI_COMM_WORLD, 1);
+        };
+#endif
+    }
 };
diff --git a/src/driver/load_environments.c b/src/driver/load_environments.c
index 875999467c..0ac2b48467 100644
--- a/src/driver/load_environments.c
+++ b/src/driver/load_environments.c
@@ -1,69 +1,74 @@
 /*
   License-Identifier: GPL
- 
+
   Copyright (C) 2020 The Yambo Team
- 
+
   Authors (see AUTHORS file for details): AM
 */
 
+#include <kind.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <kind.h>
 
 int load_environments(char* file_name)
 {
- FILE *fp;
- char str[100];
- char* pch;
- char* token;
- char* var;
- char* value;
- fp = fopen(file_name, "r");
- if (fp) {
-  while(fgets(str, 100, fp)) {
-    pch=strchr(str,'#');
-    if (!pch) {
-      /* get the first token */
-      token=strtok(str," ");
-      /* walk through other tokens */
-      if ( token != NULL ) 
-      {
-        token = strtok(NULL," ");
-        var=token;
-        token = strtok(NULL," ");
-        value=token;
-        /* printf( " %s %s %s \n", var, value, token ); */
-      }
-      setenv(var,value,1);
+    FILE* fp;
+    char str[100];
+    char* pch;
+    char* token;
+    char* var;
+    char* value;
+    fp = fopen(file_name, "r");
+    if (fp)
+    {
+        while (fgets(str, 100, fp))
+        {
+            pch = strchr(str, '#');
+            if (!pch)
+            {
+                /* get the first token */
+                token = strtok(str, " ");
+                /* walk through other tokens */
+                if (token != NULL)
+                {
+                    token = strtok(NULL, " ");
+                    var = token;
+                    token = strtok(NULL, " ");
+                    value = token;
+                    /* printf( " %s %s %s \n", var, value, token ); */
+                }
+                setenv(var, value, 1);
+            }
+        }
+        return 0;
+    }
+    else
+    {
+        fp = fopen(file_name, "w+");
+        fputs("#\n", fp);
+        fputs("# Edit it and use with -E during runtime\n#\n", fp);
+        fputs("# CPU section (just edit, do not remove fields)\n", fp);
+        fputs("setenv YAMBO_X_q_0_CPU 1.1.1.1\n", fp);
+        fputs("setenv YAMBO_X_finite_q_CPU 1.1.1.1.1\n", fp);
+        fputs("setenv YAMBO_X_all_q_CPU 1.1.1.1.1\n", fp);
+        fputs("setenv YAMBO_BS_CPU 1.1.1\n", fp);
+        fputs("setenv YAMBO_SE_CPU 1.1.1\n", fp);
+        fputs("setenv YAMBO_RT_CPU 1.1.1.1\n", fp);
+        fputs("# Scalapack section (leave unchanged if you wish)\n", fp);
+        fputs("setenv YAMBO_X_q_0_nCPU_LinAlg_INV 1\n", fp);
+        fputs("setenv YAMBO_X_finite_q_nCPU_LinAlg_INV 1\n", fp);
+        fputs("setenv YAMBO_X_all_q_nCPU_LinAlg_INV 1\n", fp);
+        fputs("setenv YAMBO_BS_nCPU_LinAlg_INV 1\n", fp);
+        fputs("setenv YAMBO_BS_nCPU_LinAlg_DIAGO 1\n", fp);
+        fputs("# ROLEs section (leave unchanged if you wish)\n", fp);
+        fputs("setenv YAMBO_X_q_0_ROLEs g.k.c.v\n", fp);
+        fputs("setenv YAMBO_X_finite_q_ROLEs q.g.k.c.v\n", fp);
+        fputs("setenv YAMBO_X_all_q_ROLEs q.g.k.c.v\n", fp);
+        fputs("setenv YAMBO_BS_ROLEs k.eh.t\n", fp);
+        fputs("setenv YAMBO_SE_ROLEs q.qp.b\n", fp);
+        fputs("setenv YAMBO_RT_ROLEs k.b.q.qp\n", fp);
+        fclose(fp);
+        return 1;
     }
-  }
-  return 0;
- }else{
-  fp = fopen(file_name, "w+");
-  fputs("#\n",fp);
-  fputs("# Edit it and use with -E during runtime\n#\n",fp);
-  fputs("# CPU section (just edit, do not remove fields)\n",fp);
-  fputs("setenv YAMBO_X_q_0_CPU 1.1.1.1\n",fp);
-  fputs("setenv YAMBO_X_finite_q_CPU 1.1.1.1.1\n",fp);
-  fputs("setenv YAMBO_X_all_q_CPU 1.1.1.1.1\n",fp);
-  fputs("setenv YAMBO_BS_CPU 1.1.1\n",fp);
-  fputs("setenv YAMBO_SE_CPU 1.1.1\n",fp);
-  fputs("setenv YAMBO_RT_CPU 1.1.1.1\n",fp);
-  fputs("# Scalapack section (leave unchanged if you wish)\n",fp);
-  fputs("setenv YAMBO_X_q_0_nCPU_LinAlg_INV 1\n",fp);
-  fputs("setenv YAMBO_X_finite_q_nCPU_LinAlg_INV 1\n",fp);
-  fputs("setenv YAMBO_X_all_q_nCPU_LinAlg_INV 1\n",fp);
-  fputs("setenv YAMBO_BS_nCPU_LinAlg_INV 1\n",fp);
-  fputs("setenv YAMBO_BS_nCPU_LinAlg_DIAGO 1\n",fp);
-  fputs("# ROLEs section (leave unchanged if you wish)\n",fp);
-  fputs("setenv YAMBO_X_q_0_ROLEs g.k.c.v\n",fp);
-  fputs("setenv YAMBO_X_finite_q_ROLEs q.g.k.c.v\n",fp);
-  fputs("setenv YAMBO_X_all_q_ROLEs q.g.k.c.v\n",fp);
-  fputs("setenv YAMBO_BS_ROLEs k.eh.t\n",fp);
-  fputs("setenv YAMBO_SE_ROLEs q.qp.b\n",fp);
-  fputs("setenv YAMBO_RT_ROLEs k.b.q.qp\n",fp);
-  fclose(fp);
-  return 1;
- }
 };
diff --git a/src/driver/mod_C_driver.F b/src/driver/mod_C_driver.F
index 33a039e78a..64ff428a21 100644
--- a/src/driver/mod_C_driver.F
+++ b/src/driver/mod_C_driver.F
@@ -18,10 +18,12 @@ module C_driver
  character(50):: code_hash
  character(50):: code_libraries
  character(50):: code_branch
+ character(50):: lumen_version
  !
  interface  
    !
-   subroutine get_version(version,subversion,patchlevel,revision,hash)
+   subroutine get_version(lumenver,version,subversion,patchlevel,revision,hash)
+     character(*), intent(out):: lumenver
      integer     , intent(out):: version
      integer     , intent(out):: subversion
      integer     , intent(out):: patchlevel
diff --git a/src/driver/options_control.c b/src/driver/options_control.c
index 8f0982eb18..07fd05ace9 100644
--- a/src/driver/options_control.c
+++ b/src/driver/options_control.c
@@ -1,153 +1,157 @@
 /*
   License-Identifier: GPL
- 
+
   Copyright (C) 2020 The Yambo Team
- 
+
   Authors (see AUTHORS file for details): AM
 
 */
-#include <string.h>
-#include <stdio.h>
 #include <kind.h>
+#include <stdio.h>
+#include <string.h>
 
-void options_control(struct options_struct options[],int *i_opt)
-{ 
- char *desc;
- /* 
-  Input file 
- */
- desc="Input file & Directories";
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Input file";
+void options_control(struct options_struct *options, int *i_opt)
+{
+    char *desc;
+    /*
+     Input file
+    */
+    desc = "Input file & Directories";
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Input file";
 #if defined _a2y || defined _e2y
- options[*i_opt].short_desc="KSS or WFK file";
+    options[*i_opt].short_desc = "KSS or WFK file";
 #endif
- options[*i_opt].short_opt='F';
- options[*i_opt].long_opt="Input"; 
- options[*i_opt].char_var=1;
- options[*i_opt].section=desc;
- options[*i_opt].bin="yambo ypp a2y e2y";
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Input file variables verbosity";
- strcpy(options[*i_opt].long_desc[0],"<string> can be RL,kpt,sc,qp,io,gen,resp/X,ph,rt,par,nl,all");
- options[*i_opt].short_opt='V';
- options[*i_opt].long_opt="Verbosity"; 
- options[*i_opt].char_var=1;
- options[*i_opt].serial_var=1;
- options[*i_opt].yambo_string="infver";
- options[*i_opt].bin="yambo ypp";
- options[*i_opt].section=desc;
- /* 
-  Utils
- */
- desc="Utilites";
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Quiet input file creation";
- options[*i_opt].short_opt='Q';
- options[*i_opt].long_opt="Quiet"; 
- options[*i_opt].serial_var=1;
- options[*i_opt].bin="yambo ypp";
- options[*i_opt].section=desc;
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Verbose (fatter) log(s)";
- options[*i_opt].long_opt="fatlog";
- options[*i_opt].yambo_string="fatlog";
- options[*i_opt].section=desc;
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Assume experienced user";
- options[*i_opt].long_opt="expuser";
- options[*i_opt].yambo_string="expuser";
- options[*i_opt].section=desc;
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Databases properties";
- options[*i_opt].short_opt='D';
- options[*i_opt].long_opt="DBlist";
- options[*i_opt].serial_var=1;
- options[*i_opt].yambo_string="dbpr";
- options[*i_opt].bin="yambo";
- options[*i_opt].section=desc;
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Walltime";
- strcpy(options[*i_opt].long_desc[0],"Format is DdHhMm with D=days, H=hours and M=minutes");
- options[*i_opt].long_opt="walltime";
- options[*i_opt].int_var=1;
- options[*i_opt].yambo_string="wallt";
- options[*i_opt].bin="yambo";
- options[*i_opt].section=desc;
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Memory";
- strcpy(options[*i_opt].long_desc[0],"The value is per processor. The value can be provided in Mb/Gb. E.g. 1Gb");
- options[*i_opt].long_opt="memory";
- options[*i_opt].int_var=1;
- options[*i_opt].yambo_string="memorylimit";
- options[*i_opt].bin="yambo";
- options[*i_opt].section=desc;
+    options[*i_opt].short_opt = 'F';
+    options[*i_opt].long_opt = "Input";
+    options[*i_opt].char_var = 1;
+    options[*i_opt].section = desc;
+    options[*i_opt].bin = "yambo ypp a2y e2y";
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Input file variables verbosity";
+    strcpy(options[*i_opt].long_desc[0],
+           "<string> can be RL,kpt,sc,qp,io,gen,resp/X,ph,rt,par,nl,all");
+    options[*i_opt].short_opt = 'V';
+    options[*i_opt].long_opt = "Verbosity";
+    options[*i_opt].char_var = 1;
+    options[*i_opt].serial_var = 1;
+    options[*i_opt].yambo_string = "infver";
+    options[*i_opt].bin = "yambo ypp";
+    options[*i_opt].section = desc;
+    /*
+     Utils
+    */
+    desc = "Utilites";
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Quiet input file creation";
+    options[*i_opt].short_opt = 'Q';
+    options[*i_opt].long_opt = "Quiet";
+    options[*i_opt].serial_var = 1;
+    options[*i_opt].bin = "yambo ypp";
+    options[*i_opt].section = desc;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Verbose (fatter) log(s)";
+    options[*i_opt].long_opt = "fatlog";
+    options[*i_opt].yambo_string = "fatlog";
+    options[*i_opt].section = desc;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Assume experienced user";
+    options[*i_opt].long_opt = "expuser";
+    options[*i_opt].yambo_string = "expuser";
+    options[*i_opt].section = desc;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Databases properties";
+    options[*i_opt].short_opt = 'D';
+    options[*i_opt].long_opt = "DBlist";
+    options[*i_opt].serial_var = 1;
+    options[*i_opt].yambo_string = "dbpr";
+    options[*i_opt].bin = "yambo";
+    options[*i_opt].section = desc;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Walltime";
+    strcpy(options[*i_opt].long_desc[0],
+           "Format is DdHhMm with D=days, H=hours and M=minutes");
+    options[*i_opt].long_opt = "walltime";
+    options[*i_opt].int_var = 1;
+    options[*i_opt].yambo_string = "wallt";
+    options[*i_opt].bin = "yambo";
+    options[*i_opt].section = desc;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Memory";
+    strcpy(options[*i_opt].long_desc[0],
+           "The value is per processor. The value can be provided in Mb/Gb. "
+           "E.g. 1Gb");
+    options[*i_opt].long_opt = "memory";
+    options[*i_opt].int_var = 1;
+    options[*i_opt].yambo_string = "memorylimit";
+    options[*i_opt].bin = "yambo";
+    options[*i_opt].section = desc;
 #if defined _SCALAPACK
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="ScaLapacK test";
- options[*i_opt].long_opt="slktest";
- options[*i_opt].bin="yambo";
- options[*i_opt].yambo_string="slktest";
- options[*i_opt].section=desc;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "ScaLapacK test";
+    options[*i_opt].long_opt = "slktest";
+    options[*i_opt].bin = "yambo";
+    options[*i_opt].yambo_string = "slktest";
+    options[*i_opt].section = desc;
 #endif
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="GPU test";
- options[*i_opt].long_opt="gputest";
- options[*i_opt].bin="yambo";
- options[*i_opt].yambo_string="gputest";
- options[*i_opt].section=desc;
- /* 
-  Job control 
- */
- desc="Input file & Directories";
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Job string";
- options[*i_opt].short_opt='J';
- options[*i_opt].long_opt="Job"; 
- options[*i_opt].char_var=1;
- options[*i_opt].yambo_string="jobstr";
- options[*i_opt].section=desc;
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Input directory";
- options[*i_opt].short_opt='I';
- options[*i_opt].long_opt="Idir"; 
- options[*i_opt].char_var=1;
- options[*i_opt].bin="yambo ypp p2y";
- options[*i_opt].section=desc;
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="I/O directory";
- options[*i_opt].short_opt='O';
- options[*i_opt].long_opt="Odir";
- options[*i_opt].char_var=1;
- options[*i_opt].section=desc;
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Communication directory";
- options[*i_opt].short_opt='C';
- options[*i_opt].long_opt="Cdir";
- options[*i_opt].char_var=1;
- options[*i_opt].section=desc;
- /* 
-  Parallel
- */
- desc="Parallel Control";
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Environment Parallel Variables file";
- options[*i_opt].short_opt='E';
- options[*i_opt].long_opt="parenv"; 
- options[*i_opt].char_var=1;
- options[*i_opt].section=desc;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "GPU test";
+    options[*i_opt].long_opt = "gputest";
+    options[*i_opt].bin = "yambo";
+    options[*i_opt].yambo_string = "gputest";
+    options[*i_opt].section = desc;
+    /*
+     Job control
+    */
+    desc = "Input file & Directories";
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Job string";
+    options[*i_opt].short_opt = 'J';
+    options[*i_opt].long_opt = "Job";
+    options[*i_opt].char_var = 1;
+    options[*i_opt].yambo_string = "jobstr";
+    options[*i_opt].section = desc;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Input directory";
+    options[*i_opt].short_opt = 'I';
+    options[*i_opt].long_opt = "Idir";
+    options[*i_opt].char_var = 1;
+    options[*i_opt].bin = "yambo ypp p2y";
+    options[*i_opt].section = desc;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "I/O directory";
+    options[*i_opt].short_opt = 'O';
+    options[*i_opt].long_opt = "Odir";
+    options[*i_opt].char_var = 1;
+    options[*i_opt].section = desc;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Communication directory";
+    options[*i_opt].short_opt = 'C';
+    options[*i_opt].long_opt = "Cdir";
+    options[*i_opt].char_var = 1;
+    options[*i_opt].section = desc;
+    /*
+     Parallel
+    */
+    desc = "Parallel Control";
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Environment Parallel Variables file";
+    options[*i_opt].short_opt = 'E';
+    options[*i_opt].long_opt = "parenv";
+    options[*i_opt].char_var = 1;
+    options[*i_opt].section = desc;
 #if defined _MPI
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Switch off MPI support";
- options[*i_opt].long_opt="nompi";
- options[*i_opt].serial_var=1;
- options[*i_opt].section=desc;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Switch off MPI support";
+    options[*i_opt].long_opt = "nompi";
+    options[*i_opt].serial_var = 1;
+    options[*i_opt].section = desc;
 #endif
 #if defined _OPENMP
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Switch off OPENMP support";
- options[*i_opt].long_opt="noopenmp";
- options[*i_opt].yambo_string="noopenmp";
- options[*i_opt].section=desc;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Switch off OPENMP support";
+    options[*i_opt].long_opt = "noopenmp";
+    options[*i_opt].yambo_string = "noopenmp";
+    options[*i_opt].section = desc;
 #endif
 };
diff --git a/src/driver/options_help.c b/src/driver/options_help.c
index 37a8024f6b..b8de8a57b7 100644
--- a/src/driver/options_help.c
+++ b/src/driver/options_help.c
@@ -1,33 +1,32 @@
 /*
   License-Identifier: GPL
- 
+
   Copyright (C) 2020 The Yambo Team
- 
+
   Authors (see AUTHORS file for details): AM
 
 */
-#include <stdio.h>
 #include <kind.h>
+#include <stdio.h>
 #include <string.h>
 
-void options_help(struct options_struct options[],int *i_opt)
+void options_help(struct options_struct *options, int *i_opt)
 {
- char *desc="Help & version";
- /*
-  Help(s) 
- */
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="<string> can be an option (e.g. -h optics)";
- options[*i_opt].short_opt='h';
- options[*i_opt].long_opt="help";
- options[*i_opt].serial_var=1;
- options[*i_opt].optional_var=1;
- options[*i_opt].char_var=1;
- options[*i_opt].section=desc;
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Code version & libraries";
- options[*i_opt].long_opt="version"; 
- options[*i_opt].serial_var=1;
- options[*i_opt].section=desc;
-
+    char *desc = "Help & version";
+    /*
+     Help(s)
+    */
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "<string> can be an option (e.g. -h optics)";
+    options[*i_opt].short_opt = 'h';
+    options[*i_opt].long_opt = "help";
+    options[*i_opt].serial_var = 1;
+    options[*i_opt].optional_var = 1;
+    options[*i_opt].char_var = 1;
+    options[*i_opt].section = desc;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Code version & libraries";
+    options[*i_opt].long_opt = "version";
+    options[*i_opt].serial_var = 1;
+    options[*i_opt].section = desc;
 };
diff --git a/src/driver/options_interfaces.c b/src/driver/options_interfaces.c
index 2782a0c517..a881bb42ac 100644
--- a/src/driver/options_interfaces.c
+++ b/src/driver/options_interfaces.c
@@ -1,73 +1,76 @@
 /*
   License-Identifier: GPL
- 
+
   Copyright (C) 2020 The Yambo Team
- 
+
   Authors (see AUTHORS file for details): AM
 
 */
-#include <stdio.h>
 #include <kind.h>
+#include <stdio.h>
 
-void options_interfaces(struct options_struct options[],int *i_opt)
+void options_interfaces(struct options_struct *options, int *i_opt)
 {
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Do not fragment the DataBases (only for serial runs)";
- options[*i_opt].long_opt="nofrag";
- options[*i_opt].short_opt='U';
- options[*i_opt].bin="p2y a2y";
- options[*i_opt].yambo_string="nodbfr";
- options[*i_opt].serial_var=1;
- options[*i_opt].section="Interface";
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Number of bands for each fragment";
- options[*i_opt].long_opt=  "fragbands";
- options[*i_opt].short_opt='b';
- options[*i_opt].bin="p2y a2y c2y";
- options[*i_opt].yambo_string="fragnb";
- options[*i_opt].int_var=1;
- options[*i_opt].section="Interface";
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Lattice constants rescaling factor";
- options[*i_opt].long_opt=  "alat_factor";
- options[*i_opt].short_opt='a';
- options[*i_opt].float_var=1;
- options[*i_opt].bin="p2y a2y c2y";
- options[*i_opt].yambo_string="alat_f";
- options[*i_opt].section="Interface";
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Force use of spatial Inv. instead of Time Rev.";
- options[*i_opt].long_opt="notr";
- options[*i_opt].short_opt='t';
- options[*i_opt].bin="p2y a2y c2y";
- options[*i_opt].yambo_string="notr";
- options[*i_opt].section="Interface";
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Force no symmetries";
- options[*i_opt].long_opt=  "nosym";
- options[*i_opt].short_opt='n';
- options[*i_opt].bin="p2y a2y c2y";
- options[*i_opt].yambo_string="nosy";
- options[*i_opt].section="Interface";
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Force no wavefunctions";
- options[*i_opt].long_opt="nowf";
- options[*i_opt].short_opt='w';
- options[*i_opt].bin="p2y a2y c2y";
- options[*i_opt].yambo_string="nowf";
- options[*i_opt].section="Interface";
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Verbose wfc I/O reporting";
- options[*i_opt].long_opt="verbio";
- options[*i_opt].short_opt='v';
- options[*i_opt].bin="p2y";
- options[*i_opt].yambo_string="verb";
- options[*i_opt].section="Interface";
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="States duplication (artificial spin polarization)";
- options[*i_opt].long_opt="duplicate";
- options[*i_opt].short_opt='d';
- options[*i_opt].bin="a2y c2y";
- options[*i_opt].yambo_string="dupl";
- options[*i_opt].section="Interface";
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc =
+        "Do not fragment the DataBases (only for serial runs)";
+    options[*i_opt].long_opt = "nofrag";
+    options[*i_opt].short_opt = 'U';
+    options[*i_opt].bin = "p2y a2y";
+    options[*i_opt].yambo_string = "nodbfr";
+    options[*i_opt].serial_var = 1;
+    options[*i_opt].section = "Interface";
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Number of bands for each fragment";
+    options[*i_opt].long_opt = "fragbands";
+    options[*i_opt].short_opt = 'b';
+    options[*i_opt].bin = "p2y a2y c2y";
+    options[*i_opt].yambo_string = "fragnb";
+    options[*i_opt].int_var = 1;
+    options[*i_opt].section = "Interface";
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Lattice constants rescaling factor";
+    options[*i_opt].long_opt = "alat_factor";
+    options[*i_opt].short_opt = 'a';
+    options[*i_opt].float_var = 1;
+    options[*i_opt].bin = "p2y a2y c2y";
+    options[*i_opt].yambo_string = "alat_f";
+    options[*i_opt].section = "Interface";
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc =
+        "Force use of spatial Inv. instead of Time Rev.";
+    options[*i_opt].long_opt = "notr";
+    options[*i_opt].short_opt = 't';
+    options[*i_opt].bin = "p2y a2y c2y";
+    options[*i_opt].yambo_string = "notr";
+    options[*i_opt].section = "Interface";
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Force no symmetries";
+    options[*i_opt].long_opt = "nosym";
+    options[*i_opt].short_opt = 'n';
+    options[*i_opt].bin = "p2y a2y c2y";
+    options[*i_opt].yambo_string = "nosy";
+    options[*i_opt].section = "Interface";
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Force no wavefunctions";
+    options[*i_opt].long_opt = "nowf";
+    options[*i_opt].short_opt = 'w';
+    options[*i_opt].bin = "p2y a2y c2y";
+    options[*i_opt].yambo_string = "nowf";
+    options[*i_opt].section = "Interface";
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Verbose wfc I/O reporting";
+    options[*i_opt].long_opt = "verbio";
+    options[*i_opt].short_opt = 'v';
+    options[*i_opt].bin = "p2y";
+    options[*i_opt].yambo_string = "verb";
+    options[*i_opt].section = "Interface";
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc =
+        "States duplication (artificial spin polarization)";
+    options[*i_opt].long_opt = "duplicate";
+    options[*i_opt].short_opt = 'd';
+    options[*i_opt].bin = "a2y c2y";
+    options[*i_opt].yambo_string = "dupl";
+    options[*i_opt].section = "Interface";
 };
diff --git a/src/driver/options_maker.c b/src/driver/options_maker.c
index b6979f25aa..0e70742644 100644
--- a/src/driver/options_maker.c
+++ b/src/driver/options_maker.c
@@ -1,109 +1,139 @@
 /*
   License-Identifier: GPL
- 
+
   Copyright (C) 2020 The Yambo Team
- 
+
   Authors (see AUTHORS file for details): AM, AC
 
 */
-#include <stdio.h>
 #include <kind.h>
-#if defined _yambo || defined _ypp || defined _a2y || defined _p2y || defined _c2y || defined _e2y 
- #include <yambo_driver.h>
+#include <stdio.h>
+#if defined _yambo || defined _ypp || defined _a2y || defined _p2y || \
+    defined _c2y || defined _e2y
+#include <yambo_driver.h>
 #endif
-#include <string.h>
 #include <driver.h>
 #include <stdlib.h>
+#include <string.h>
 
-void options_maker(struct options_struct options[], int n_options)
+void options_maker(struct options_struct *options, int n_options)
 {
- int i_opt,i,j;
- int max_long_desc=20,non_used_short_opt[200],found;
+    int i_opt, i, j;
+    int max_long_desc = 20, non_used_short_opt[200], found;
 
- for(i_opt=0;i_opt<n_options;i_opt++) {
-  options[i_opt].long_opt=NULL;
-  options[i_opt].short_opt=0;
-  options[i_opt].short_desc=NULL;
-  for(i=0;i<max_long_desc;i++) strcpy(options[i_opt].long_desc[i],"undef");
-  options[i_opt].yambo_string="undef";
-  options[i_opt].bin="all";
-  options[i_opt].no_bin="none";
-  options[i_opt].int_var=0;
-  options[i_opt].float_var=0;
-  options[i_opt].char_var=0;
-  options[i_opt].serial_var=0;
-  options[i_opt].optional_var=0;
-  options[i_opt].section="undef";
- }
- i_opt=-1;
- /* 
-  Help(s) 
- */
- options_help(options,&i_opt);
- /* 
-  Control(s)
- */
-#if defined _yambo || defined _ypp || defined _a2y || defined _p2y || defined _c2y || defined _e2y 
- options_control(options,&i_opt);
+    for (i_opt = 0; i_opt < n_options; i_opt++)
+    {
+        options[i_opt].long_opt = NULL;
+        options[i_opt].short_opt = 0;
+        options[i_opt].short_desc = NULL;
+        for (i = 0; i < max_long_desc; i++)
+        {
+            strcpy(options[i_opt].long_desc[i], "undef");
+        }
+        options[i_opt].yambo_string = "undef";
+        options[i_opt].bin = "all";
+        options[i_opt].no_bin = "none";
+        options[i_opt].int_var = 0;
+        options[i_opt].float_var = 0;
+        options[i_opt].char_var = 0;
+        options[i_opt].serial_var = 0;
+        options[i_opt].optional_var = 0;
+        options[i_opt].section = "undef";
+    }
+    i_opt = -1;
+    /*
+     Help(s)
+    */
+    options_help(options, &i_opt);
+    /*
+     Control(s)
+    */
+#if defined _yambo || defined _ypp || defined _a2y || defined _p2y || \
+    defined _c2y || defined _e2y
+    options_control(options, &i_opt);
 #endif
 
-#if defined _yambo 
- /* 
-  Yambo
- */
- options_yambo(options,&i_opt);
- /* 
-  Projects
- */
- options_projects(options,&i_opt);
+#if defined _yambo
+    /*
+     Yambo
+    */
+    options_yambo(options, &i_opt);
+    /*
+     Projects
+    */
+    options_projects(options, &i_opt);
 #endif
 
-#if defined _ypp 
- /* 
-  Ypp
- */
- options_ypp(options,&i_opt);
+#if defined _ypp
+    /*
+     Ypp
+    */
+    options_ypp(options, &i_opt);
 #endif
 
-#if defined _a2y || defined _p2y || defined _c2y || defined _e2y 
- /* 
-  Interfaces
- */
- options_interfaces(options,&i_opt);
+#if defined _a2y || defined _p2y || defined _c2y || defined _e2y
+    /*
+     Interfaces
+    */
+    options_interfaces(options, &i_opt);
 #endif
- /* 
-  Find all short_opt alphanumerical variables not used
- */
- j=-1;
- for(i=1;i<=127;i++){
-  non_used_short_opt[i]=0;
-  if (i<=47) {continue;};
-  if (i>=58 && i<=64) {continue;};
-  if (i>=91 && i<=96) {continue;};
-  if (i>=123) {continue;};
-  found=-1;
-  for(i_opt=0;i_opt<n_options;i_opt++){
-   if (options[i_opt].short_opt == i ) { 
-    found=1; 
-    break; 
-   }
-  }
-  if (found < 0) { 
-   j++;
-   non_used_short_opt[j]=i;
-  } 
- }
-/*
- Assign those unused variables to the options without short descriptions
-*/
- j=-1;
- for(i_opt=0;i_opt<n_options;i_opt++){
-  if (options[i_opt].short_opt > 0 ) continue;
-  if (options[i_opt].short_desc==NULL) break;
-  j++;
-  options[i_opt].short_opt=non_used_short_opt[j];
-  /* DEBUG 
-  printf("Short descripton %c assigned to long var %s\n",options[i_opt].short_opt,options[i_opt].long_opt);
-  */
- } 
+    /*
+     Find all short_opt alphanumerical variables not used
+    */
+    j = -1;
+    for (i = 1; i <= 127; i++)
+    {
+        non_used_short_opt[i] = 0;
+        if (i <= 47)
+        {
+            continue;
+        };
+        if (i >= 58 && i <= 64)
+        {
+            continue;
+        };
+        if (i >= 91 && i <= 96)
+        {
+            continue;
+        };
+        if (i >= 123)
+        {
+            continue;
+        };
+        found = -1;
+        for (i_opt = 0; i_opt < n_options; i_opt++)
+        {
+            if (options[i_opt].short_opt == i)
+            {
+                found = 1;
+                break;
+            }
+        }
+        if (found < 0)
+        {
+            j++;
+            non_used_short_opt[j] = i;
+        }
+    }
+    /*
+     Assign those unused variables to the options without short descriptions
+    */
+    j = -1;
+    for (i_opt = 0; i_opt < n_options; i_opt++)
+    {
+        if (options[i_opt].short_opt > 0)
+        {
+            continue;
+        }
+        if (options[i_opt].short_desc == NULL)
+        {
+            break;
+        }
+        j++;
+        options[i_opt].short_opt = non_used_short_opt[j];
+        /* DEBUG
+        printf("Short descripton %c assigned to long var
+        %s\n",options[i_opt].short_opt,options[i_opt].long_opt);
+        */
+    }
 }
diff --git a/src/driver/options_projects.c b/src/driver/options_projects.c
index 4f7d9565fd..6f707d9109 100644
--- a/src/driver/options_projects.c
+++ b/src/driver/options_projects.c
@@ -1,152 +1,178 @@
 /*
   License-Identifier: GPL
- 
+
   Copyright (C) 2020 The Yambo Team
- 
+
   Authors (see AUTHORS file for details): AM
 
 */
-#include <string.h>
-#include <stdio.h>
 #include <kind.h>
+#include <stdio.h>
+#include <string.h>
 
-void options_projects(struct options_struct options[],int *i_opt)
+void options_projects(struct options_struct *options, int *i_opt)
 {
- char *desc;
- int i_desc=0;
- desc="Hamiltonians & Potentials";
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Self-Consistent Single-Particle Calculations";
- options[*i_opt].long_opt="sc";
- options[*i_opt].short_opt='s';
- options[*i_opt].bin="yambo_sc";
- options[*i_opt].yambo_string="scrun";
- options[*i_opt].section=desc;
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Self-Consistent Potential";
- strcpy(options[*i_opt].long_desc[0],"Hartree => <string>=h");
- strcpy(options[*i_opt].long_desc[1],"Fock    => <string>=f");
- strcpy(options[*i_opt].long_desc[2],"Coh     => <string>=coh");
- strcpy(options[*i_opt].long_desc[3],"Sex     => <string>=sex");
- strcpy(options[*i_opt].long_desc[4],"exx     => <string>=exx");
- strcpy(options[*i_opt].long_desc[5],"exxc    => <string>=exxc");
- strcpy(options[*i_opt].long_desc[6],"srpa    => <string>=srpa");
- strcpy(options[*i_opt].long_desc[7],"default => <string>=d");
- strcpy(options[*i_opt].long_desc[8],"IP      => <string>=ip");
- strcpy(options[*i_opt].long_desc[9],"LDA_X   => <string>=ldax");
- strcpy(options[*i_opt].long_desc[10],"PZ      => <string>=pz");
- strcpy(options[*i_opt].long_desc[11],"GS      => <string>=gs");
- strcpy(options[*i_opt].long_desc[12],"CVONLY  => <string>=cvonly (compute only cv collisions)");
- strcpy(options[*i_opt].long_desc[13]," ");
- strcpy(options[*i_opt].long_desc[14],"Potentials can be combined. Example: use hf for Hartree-Fock");
- options[*i_opt].long_opt="potential";
- options[*i_opt].short_opt='v';
- options[*i_opt].bin="yambo_sc yambo_rt yambo_nl";
- options[*i_opt].yambo_string="potential";
- options[*i_opt].char_var=1;
- options[*i_opt].section=desc;
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Self-Consistent Magnetic Calculations";
- options[*i_opt].long_opt="magnetic";
- options[*i_opt].bin="yambo_sc";
- options[*i_opt].yambo_string="magnetic";
- strcpy(options[*i_opt].long_desc[0],"<string>=(p)auli/(l)andau");
- options[*i_opt].char_var=1;
- options[*i_opt].section=desc;
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Self-Consistent Static Electric Field Calculations";
- options[*i_opt].long_opt="electric";
- options[*i_opt].bin="yambo_sc";
- options[*i_opt].yambo_string="electric";
- options[*i_opt].section=desc;
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Collisions";
- options[*i_opt].long_opt="collisions";
- options[*i_opt].short_opt='e';
- options[*i_opt].bin="yambo_rt yambo_sc yambo_nl";
- options[*i_opt].yambo_string="collisions";
- options[*i_opt].section=desc;
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Electron-Phonon Hamiltonian";
- options[*i_opt].long_opt="epham";
- options[*i_opt].bin="yambo_ph";
- options[*i_opt].yambo_string="ElPhHam";
- options[*i_opt].section=desc;
+    char *desc;
+    int i_desc = 0;
+    desc = "Hamiltonians & Potentials";
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Self-Consistent Single-Particle Calculations";
+    options[*i_opt].long_opt = "sc";
+    options[*i_opt].short_opt = 's';
+    options[*i_opt].bin = "yambo_sc";
+    options[*i_opt].yambo_string = "scrun";
+    options[*i_opt].section = desc;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Self-Consistent Potential";
+    strcpy(options[*i_opt].long_desc[0], "Hartree => <string>=h");
+    strcpy(options[*i_opt].long_desc[1], "Fock    => <string>=f");
+    strcpy(options[*i_opt].long_desc[2], "Coh     => <string>=coh");
+    strcpy(options[*i_opt].long_desc[3], "Sex     => <string>=sex");
+    strcpy(options[*i_opt].long_desc[4], "exx     => <string>=exx");
+    strcpy(options[*i_opt].long_desc[5], "exxc    => <string>=exxc");
+    strcpy(options[*i_opt].long_desc[6], "srpa    => <string>=srpa");
+    strcpy(options[*i_opt].long_desc[7], "default => <string>=d");
+    strcpy(options[*i_opt].long_desc[8], "IP      => <string>=ip");
+    strcpy(options[*i_opt].long_desc[9], "LDA_X   => <string>=ldax");
+    strcpy(options[*i_opt].long_desc[10], "PZ      => <string>=pz");
+    strcpy(options[*i_opt].long_desc[11], "GS      => <string>=gs");
+    strcpy(options[*i_opt].long_desc[12],
+           "CVONLY  => <string>=cvonly (compute only cv collisions)");
+    strcpy(options[*i_opt].long_desc[13],
+           "KBSE    => <string>=kbse   (compute collisions from bse)");
+    strcpy(options[*i_opt].long_desc[14], " ");
+    strcpy(options[*i_opt].long_desc[15],
+           "Potentials can be combined. Example: use hf for Hartree-Fock");
+    options[*i_opt].long_opt = "potential";
+    options[*i_opt].short_opt = 'v';
+    options[*i_opt].bin = "yambo_sc yambo_rt yambo_nl";
+    options[*i_opt].yambo_string = "potential";
+    options[*i_opt].char_var = 1;
+    options[*i_opt].section = desc;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Self-Consistent Magnetic Calculations";
+    options[*i_opt].long_opt = "magnetic";
+    options[*i_opt].bin = "yambo_sc";
+    options[*i_opt].yambo_string = "magnetic";
+    strcpy(options[*i_opt].long_desc[0], "<string>=(p)auli/(l)andau");
+    options[*i_opt].char_var = 1;
+    options[*i_opt].section = desc;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc =
+        "Self-Consistent Static Electric Field Calculations";
+    options[*i_opt].long_opt = "electric";
+    options[*i_opt].bin = "yambo_sc";
+    options[*i_opt].yambo_string = "electric";
+    options[*i_opt].section = desc;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Collisions";
+    options[*i_opt].long_opt = "collisions";
+    options[*i_opt].short_opt = 'e';
+    options[*i_opt].bin = "yambo_rt yambo_sc yambo_nl";
+    options[*i_opt].yambo_string = "collisions";
+    options[*i_opt].section = desc;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Electron-Phonon Hamiltonian";
+    options[*i_opt].long_opt = "epham";
+    options[*i_opt].bin = "yambo_ph";
+    options[*i_opt].yambo_string = "ElPhHam";
+    options[*i_opt].section = desc;
+    *i_opt=*i_opt+1;
+    options[*i_opt].short_desc="Exciton-phonon";
+    strcpy(options[*i_opt].long_desc[0],"<string>=(o)ptical spectra,(l)ife times");
+    options[*i_opt].long_opt="excph";
+    options[*i_opt].char_var=1;
+    options[*i_opt].short_opt='m';
+    options[*i_opt].bin="yambo_ph";
+    options[*i_opt].yambo_string="excph";
+    options[*i_opt].section=desc;
 
- desc="Real-Time";
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="NEQ Real-time dynamics";
- strcpy(options[*i_opt].long_desc[0],"<string>=(p)ump or probe,(pp)ump & probe, (pn) n external fields");
- options[*i_opt].char_var=1;
- options[*i_opt].long_opt="rt";
- options[*i_opt].short_opt='n';
- options[*i_opt].bin="yambo_rt";
- options[*i_opt].yambo_string="negf";
- options[*i_opt].section=desc;
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Non-linear spectroscopy";
- strcpy(options[*i_opt].long_desc[0],"<string>=(p)ump or probe,(n) non-linear optics");
- options[*i_opt].long_opt="nl";
- options[*i_opt].char_var=1;
- options[*i_opt].short_opt='u';
- options[*i_opt].bin="yambo_nl";
- options[*i_opt].yambo_string="nloptics";
- options[*i_opt].section=desc;
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="NEQ scattering kind";
- i_desc=0;
- strcpy(options[*i_opt].long_desc[i_desc],"<string>=(ee):electron-electron interaction");
-#if defined _QED 
- i_desc=i_desc+1;
- strcpy(options[*i_opt].long_desc[i_desc],"<string>=(eh):electron-photon interaction");
+    desc = "Real-Time";
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "NEQ Real-time dynamics";
+    strcpy(options[*i_opt].long_desc[0],
+           "<string>=(p)ump or probe,(pp)ump & probe, (pn) n external fields");
+    options[*i_opt].char_var = 1;
+    options[*i_opt].long_opt = "rt";
+    options[*i_opt].short_opt = 'n';
+    options[*i_opt].bin = "yambo_rt";
+    options[*i_opt].yambo_string = "negf";
+    options[*i_opt].section = desc;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Non-linear spectroscopy";
+    strcpy(options[*i_opt].long_desc[0],
+           "<string>=(p)ump or probe,(n) non-linear optics");
+    options[*i_opt].long_opt = "nl";
+    options[*i_opt].char_var = 1;
+    options[*i_opt].short_opt = 'u';
+    options[*i_opt].bin = "yambo_nl";
+    options[*i_opt].yambo_string = "nloptics";
+    options[*i_opt].section = desc;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "NEQ scattering kind";
+    i_desc = 0;
+    strcpy(options[*i_opt].long_desc[i_desc],
+           "<string>=(ee):electron-electron interaction");
+#if defined _QED
+    i_desc = i_desc + 1;
+    strcpy(options[*i_opt].long_desc[i_desc],
+           "<string>=(eh):electron-photon interaction");
 #endif
-#if defined _ELPH 
- i_desc=i_desc+1;
- strcpy(options[*i_opt].long_desc[i_desc],"<string>=(ep):electron-phonon interaction");
+#if defined _ELPH
+    i_desc = i_desc + 1;
+    strcpy(options[*i_opt].long_desc[i_desc],
+           "<string>=(ep):electron-phonon interaction");
 #endif
-#if defined _ELPH 
- i_desc=i_desc+1;
- strcpy(options[*i_opt].long_desc[i_desc],"<string>=(pe):phonon-electron interaction");
+#if defined _ELPH
+    i_desc = i_desc + 1;
+    strcpy(options[*i_opt].long_desc[i_desc],
+           "<string>=(pe):phonon-electron interaction");
 #endif
 #if defined _PHEL || defined _PHEL
- i_desc=i_desc+1;
- strcpy(options[*i_opt].long_desc[i_desc]," ");
- i_desc=i_desc+1;
- strcpy(options[*i_opt].long_desc[i_desc],"Use -scattering ee+ep/ee+pe to activate more than one kind simultaneously");
+    i_desc = i_desc + 1;
+    strcpy(options[*i_opt].long_desc[i_desc], " ");
+    i_desc = i_desc + 1;
+    strcpy(options[*i_opt].long_desc[i_desc],
+           "Use -scattering ee+ep/ee+pe to activate more than one kind "
+           "simultaneously");
 #endif
- options[*i_opt].long_opt="scattering";
- options[*i_opt].short_opt='s';
- options[*i_opt].bin="yambo_rt";
- options[*i_opt].yambo_string="scattp";
- options[*i_opt].char_var=1;
- options[*i_opt].section=desc;
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Correlation kind";
- i_desc=0;
- strcpy(options[*i_opt].long_desc[i_desc],"<string>=(ee):electron-electron interaction");
-#if defined _QED 
- i_desc=i_desc+1;
- strcpy(options[*i_opt].long_desc[i_desc],"<string>=(eh):electron-photon interaction");
+    options[*i_opt].long_opt = "scattering";
+    options[*i_opt].short_opt = 's';
+    options[*i_opt].bin = "yambo_rt";
+    options[*i_opt].yambo_string = "scattp";
+    options[*i_opt].char_var = 1;
+    options[*i_opt].section = desc;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Correlation kind";
+    i_desc = 0;
+    strcpy(options[*i_opt].long_desc[i_desc],
+           "<string>=(ee):electron-electron interaction");
+#if defined _QED
+    i_desc = i_desc + 1;
+    strcpy(options[*i_opt].long_desc[i_desc],
+           "<string>=(eh):electron-photon interaction");
 #endif
-#if defined _ELPH 
- i_desc=i_desc+1;
- strcpy(options[*i_opt].long_desc[i_desc],"<string>=(ep):electron-phonon interaction");
+#if defined _ELPH
+    i_desc = i_desc + 1;
+    strcpy(options[*i_opt].long_desc[i_desc],
+           "<string>=(ep):electron-phonon interaction");
 #endif
-#if defined _ELPH 
- i_desc=i_desc+1;
- strcpy(options[*i_opt].long_desc[i_desc],"<string>=(pe):phonon-electron interaction");
+#if defined _ELPH
+    i_desc = i_desc + 1;
+    strcpy(options[*i_opt].long_desc[i_desc],
+           "<string>=(pe):phonon-electron interaction");
 #endif
- options[*i_opt].long_opt="correlation";
- options[*i_opt].short_opt='c';
- options[*i_opt].bin="yambo_ph yambo_qed";
- options[*i_opt].yambo_string="corrtp";
- options[*i_opt].section="Self-Energy";
- options[*i_opt].char_var=1;
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Photo-Luminescence";
- options[*i_opt].long_opt="pl";
- options[*i_opt].short_opt='u';
- options[*i_opt].bin="yambo_pl";
- options[*i_opt].yambo_string="photolum";
- options[*i_opt].section=desc;
+    options[*i_opt].long_opt = "correlation";
+    options[*i_opt].short_opt = 'c';
+    options[*i_opt].bin = "yambo_ph yambo_qed";
+    options[*i_opt].yambo_string = "corrtp";
+    options[*i_opt].section = "Self-Energy";
+    options[*i_opt].char_var = 1;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Photo-Luminescence";
+    options[*i_opt].long_opt = "pl";
+    options[*i_opt].short_opt = 'u';
+    options[*i_opt].bin = "yambo_pl";
+    options[*i_opt].yambo_string = "photolum";
+    options[*i_opt].section = desc;
 };
diff --git a/src/driver/options_yambo.c b/src/driver/options_yambo.c
index 12058d9e3d..d3523c797c 100644
--- a/src/driver/options_yambo.c
+++ b/src/driver/options_yambo.c
@@ -1,154 +1,163 @@
 /*
   License-Identifier: GPL
- 
+
   Copyright (C) 2020 The Yambo Team
- 
+
   Authors (see AUTHORS file for details): AM
 
 */
-#include <string.h>
-#include <stdio.h>
 #include <kind.h>
+#include <stdio.h>
+#include <string.h>
 
-void options_yambo(struct options_struct options[],int *i_opt)
-{ 
- char *desc;
- int i_desc=0;
- desc="Self-Energy";
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Hartree-Fock";
- options[*i_opt].long_opt="hf";
- options[*i_opt].short_opt='x';
- options[*i_opt].bin="yambo";
- options[*i_opt].yambo_string="HF_and_locXC";
- options[*i_opt].section=desc;
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="GW approximation";
- strcpy(options[*i_opt].long_desc[i_desc],"<string>=(p)PA/(m)PA/(c)HOSEX/(r)eal-axis");
+void options_yambo(struct options_struct *options, int *i_opt)
+{
+    char *desc;
+    int i_desc = 0;
+    desc = "Self-Energy";
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Hartree-Fock";
+    options[*i_opt].long_opt = "hf";
+    options[*i_opt].short_opt = 'x';
+    options[*i_opt].bin = "yambo";
+    options[*i_opt].yambo_string = "HF_and_locXC";
+    options[*i_opt].section = desc;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "GW approximation";
+    strcpy(options[*i_opt].long_desc[i_desc],
+           "<string>=(p)PA/(m)PA/(c)HOSEX/(r)eal-axis");
 #if defined _ELPH
- i_desc=i_desc+1;
- strcpy(options[*i_opt].long_desc[i_desc],"<string>=fan");
+    i_desc = i_desc + 1;
+    strcpy(options[*i_opt].long_desc[i_desc], "<string>=fan");
 #endif
 #if defined _ELPH
- i_desc=i_desc+1;
- strcpy(options[*i_opt].long_desc[i_desc],"<string>=X");
+    i_desc = i_desc + 1;
+    strcpy(options[*i_opt].long_desc[i_desc], "<string>=X");
 #endif
- options[*i_opt].long_opt="gw0";
- options[*i_opt].short_opt='p';
- options[*i_opt].bin="yambo";
- options[*i_opt].yambo_string="gw0";
- options[*i_opt].char_var=1;
- options[*i_opt].section=desc;
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Dyson Equation solver";
- strcpy(options[*i_opt].long_desc[0],"<string>=(g)reen [any scattering]");
- strcpy(options[*i_opt].long_desc[1],"<string>=(n)ewton [order 1]/(s)ecant [e-e scattering]");
+    options[*i_opt].long_opt = "gw0";
+    options[*i_opt].short_opt = 'p';
+    options[*i_opt].bin = "yambo";
+    options[*i_opt].yambo_string = "gw0";
+    options[*i_opt].char_var = 1;
+    options[*i_opt].section = desc;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Dyson Equation solver";
+    strcpy(options[*i_opt].long_desc[0], "<string>=(g)reen [any scattering]");
+    strcpy(options[*i_opt].long_desc[1],
+           "<string>=(n)ewton [order 1]/(s)ecant [e-e scattering]");
 #if defined _PHEL
- strcpy(options[*i_opt].long_desc[2],"<string>=(n)ewton [order 2] [p-e scattering]");
+    strcpy(options[*i_opt].long_desc[2],
+           "<string>=(n)ewton [order 2] [p-e scattering]");
 #endif
- options[*i_opt].long_opt="dyson";
- options[*i_opt].short_opt='g';
- options[*i_opt].bin="yambo";
- options[*i_opt].yambo_string="dyson";
- options[*i_opt].char_var=1;
- options[*i_opt].section=desc;
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="GoWo Quasiparticle lifetimes";
- options[*i_opt].long_opt="lifetimes";
- options[*i_opt].short_opt='l';
- options[*i_opt].bin="yambo";
- options[*i_opt].yambo_string="life";
- options[*i_opt].section=desc;
+    options[*i_opt].long_opt = "dyson";
+    options[*i_opt].short_opt = 'g';
+    options[*i_opt].bin = "yambo";
+    options[*i_opt].yambo_string = "dyson";
+    options[*i_opt].char_var = 1;
+    options[*i_opt].section = desc;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "GoWo Quasiparticle lifetimes";
+    options[*i_opt].long_opt = "lifetimes";
+    options[*i_opt].short_opt = 'l';
+    options[*i_opt].bin = "yambo";
+    options[*i_opt].yambo_string = "life";
+    options[*i_opt].section = desc;
 
- desc="Initializations";
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Initialization";
- options[*i_opt].short_opt='i';
- options[*i_opt].long_opt="setup";
- options[*i_opt].bin="yambo";
- options[*i_opt].yambo_string="setup";
- options[*i_opt].section=desc;
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Coulomb potential";
- options[*i_opt].long_opt="coulomb";
- options[*i_opt].short_opt='r';
- options[*i_opt].bin="yambo";
- options[*i_opt].yambo_string="rim_cut";
- options[*i_opt].section=desc;
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Screened coulomb potential";
- options[*i_opt].long_opt="rw";
- options[*i_opt].short_opt='w';
- options[*i_opt].bin="yambo";
- options[*i_opt].yambo_string="rim_w";
- options[*i_opt].section=desc;
+    desc = "Initializations";
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Initialization";
+    options[*i_opt].short_opt = 'i';
+    options[*i_opt].long_opt = "setup";
+    options[*i_opt].bin = "yambo";
+    options[*i_opt].yambo_string = "setup";
+    options[*i_opt].section = desc;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Coulomb potential";
+    options[*i_opt].long_opt = "coulomb";
+    options[*i_opt].short_opt = 'r';
+    options[*i_opt].bin = "yambo";
+    options[*i_opt].yambo_string = "rim_cut";
+    options[*i_opt].section = desc;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Screened coulomb potential";
+    options[*i_opt].long_opt = "rw";
+    options[*i_opt].short_opt = 'w';
+    options[*i_opt].bin = "yambo";
+    options[*i_opt].yambo_string = "rim_w";
+    options[*i_opt].section = desc;
 
+    desc = "Response Functions";
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Linear Response optical properties";
+    strcpy(options[*i_opt].long_desc[0], "<string>=c Reciprocal-Space");
+    strcpy(options[*i_opt].long_desc[1],
+           "<string>=b for Transition-Space Bethe-Salpeter");
+    options[*i_opt].long_opt = "optics";
+    options[*i_opt].short_opt = 'o';
+    options[*i_opt].bin = "yambo";
+    options[*i_opt].yambo_string = "optics";
+    options[*i_opt].char_var = 1;
+    options[*i_opt].section = desc;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Inverse Dielectric/Response Matrix";
+    options[*i_opt].long_opt = "X";
+    options[*i_opt].short_opt = 'd';
+    options[*i_opt].bin = "yambo";
+    options[*i_opt].yambo_string = "screen";
+    options[*i_opt].section = desc;
+    strcpy(options[*i_opt].long_desc[0],
+           "<string>=(s)static/(p)PA/m(PA)/(d)ynamical dielectric matrix");
+    strcpy(options[*i_opt].long_desc[1],
+           "<string>=(X) dynamical response matrix");
+    options[*i_opt].char_var = 1;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Oscillator strenghts (or dipoles)";
+    options[*i_opt].long_opt = "dipoles";
+    options[*i_opt].short_opt = 'q';
+    options[*i_opt].bin = "yambo";
+    options[*i_opt].yambo_string = "dipoles";
+    options[*i_opt].section = desc;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Kernel";
+    strcpy(options[*i_opt].long_desc[0],
+           "<string>=hartree/alda/lrc/hf/sex/bsfxc/ip");
+    strcpy(options[*i_opt].long_desc[1],
+           "hf/sex only eh-space; lrc only G-space");
+    options[*i_opt].long_opt = "kernel";
+    options[*i_opt].short_opt = 'k';
+    options[*i_opt].bin = "yambo";
+    options[*i_opt].yambo_string = "kernel";
+    options[*i_opt].char_var = 1;
+    options[*i_opt].section = desc;
 
- desc="Response Functions";
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Linear Response optical properties";
- strcpy(options[*i_opt].long_desc[0],"<string>=c Reciprocal-Space");
- strcpy(options[*i_opt].long_desc[1],"<string>=b for Transition-Space Bethe-Salpeter");
- options[*i_opt].long_opt="optics";
- options[*i_opt].short_opt='o';
- options[*i_opt].bin="yambo";
- options[*i_opt].yambo_string="optics";
- options[*i_opt].char_var=1;
- options[*i_opt].section=desc;
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Inverse Dielectric/Response Matrix";
- options[*i_opt].long_opt="X";
- options[*i_opt].short_opt='d';
- options[*i_opt].bin="yambo";
- options[*i_opt].yambo_string="screen";
- options[*i_opt].section=desc;
- strcpy(options[*i_opt].long_desc[0],"<string>=(s)static/(p)PA/m(PA)/(d)ynamical dielectric matrix");
- strcpy(options[*i_opt].long_desc[1],"<string>=(X) dynamical response matrix");
- options[*i_opt].char_var=1;
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Oscillator strenghts (or dipoles)";
- options[*i_opt].long_opt="dipoles";
- options[*i_opt].short_opt='q';
- options[*i_opt].bin="yambo";
- options[*i_opt].yambo_string="dipoles";
- options[*i_opt].section=desc;
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Kernel";
- strcpy(options[*i_opt].long_desc[0],"<string>=hartree/alda/lrc/hf/sex/bsfxc");
- strcpy(options[*i_opt].long_desc[1],"hf/sex only eh-space; lrc only G-space");
- options[*i_opt].long_opt="kernel";
- options[*i_opt].short_opt='k';
- options[*i_opt].bin="yambo";
- options[*i_opt].yambo_string="kernel";
- options[*i_opt].char_var=1;
- options[*i_opt].section=desc;
-
- desc="Bethe-Salpeter Equation";
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="BSE solver";
+    desc = "Bethe-Salpeter Equation";
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "BSE solver";
 #if defined _SLEPC && !defined _NL
- strcpy(options[*i_opt].long_desc[0],"<string>=h/d/s/(p/f)i");
+    strcpy(options[*i_opt].long_desc[0], "<string>=h/d/o/s/(p/f)i");
 #else
- strcpy(options[*i_opt].long_desc[0],"<string>=h/d/(p/f)i");
+    strcpy(options[*i_opt].long_desc[0], "<string>=h/d/o/(p/f)i");
 #endif
- strcpy(options[*i_opt].long_desc[1],"(h)aydock/(d)iagonalization");
- strcpy(options[*i_opt].long_desc[2],"(pi) perturbative inversion/ (fi) full inversion");
+    strcpy(options[*i_opt].long_desc[1], "(h)aydock");
+    strcpy(options[*i_opt].long_desc[2],
+            "(d)iagonalization/(o)ld-diagonalization");
+    strcpy(options[*i_opt].long_desc[3],
+           "(pi) perturbative inversion/ (fi) full inversion");
 #if defined _SLEPC && !defined _NL
- strcpy(options[*i_opt].long_desc[2],"(s)lepc partial diagonalization");
+    strcpy(options[*i_opt].long_desc[4], "(s)lepc partial diagonalization");
 #endif
- options[*i_opt].long_opt="Ksolver";
- options[*i_opt].short_opt='y';
- options[*i_opt].bin="yambo";
- options[*i_opt].yambo_string="bss";
- options[*i_opt].char_var=1;
- options[*i_opt].section=desc;
-
- desc="Total Energy";
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="ACFDT Total Energy";
- options[*i_opt].long_opt="acfdt";
- options[*i_opt].bin="yambo";
- options[*i_opt].yambo_string="acfdt";
- options[*i_opt].section=desc;
+    options[*i_opt].long_opt = "Ksolver";
+    options[*i_opt].short_opt = 'y';
+    options[*i_opt].bin = "yambo";
+    options[*i_opt].yambo_string = "bss";
+    options[*i_opt].char_var = 1;
+    options[*i_opt].section = desc;
 
+    desc = "Total Energy";
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "ACFDT Total Energy";
+    options[*i_opt].long_opt = "acfdt";
+    options[*i_opt].bin = "yambo";
+    options[*i_opt].yambo_string = "acfdt";
+    options[*i_opt].section = desc;
 };
diff --git a/src/driver/options_ypp.c b/src/driver/options_ypp.c
index f94e8a984a..7876d920fd 100644
--- a/src/driver/options_ypp.c
+++ b/src/driver/options_ypp.c
@@ -1,211 +1,250 @@
 /*
   License-Identifier: GPL
- 
+
   Copyright (C) 2020 The Yambo Team
- 
+
   Authors (see AUTHORS file for details): AM
 
 */
-#include <string.h>
-#include <stdio.h>
 #include <kind.h>
+#include <stdio.h>
+#include <string.h>
 
-void options_ypp(struct options_struct options[],int *i_opt)
+void options_ypp(struct options_struct *options, int *i_opt)
 {
- /* 
-  Brillouin Zone 
- */
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="BZ Grid generator";
- strcpy(options[*i_opt].long_desc[0],"<string>=(k)pt,(q)pt,(s)hifted,(h)igh symmetry,(r)andom,r(e)gular");
- options[*i_opt].long_opt="grid";
- options[*i_opt].short_opt='k';
- options[*i_opt].bin="ypp";
- options[*i_opt].char_var=1;
- options[*i_opt].yambo_string="bzgrids";
- options[*i_opt].section="Brillouin Zone";
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Fine to coarse grid Map";
- options[*i_opt].long_opt="map";
- options[*i_opt].short_opt='m';
- options[*i_opt].bin="ypp";
- options[*i_opt].yambo_string="kpts_map";
- options[*i_opt].section="Brillouin Zone";
- /* 
-  Wannier
- */
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Wannier 90 interface";
- options[*i_opt].long_opt=  "wannier";
- options[*i_opt].bin="ypp";
- options[*i_opt].yambo_string="wannier";
- options[*i_opt].section="Wannier";
- /* 
-  SOC
- */
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Perturbative SOC mapping";
- options[*i_opt].long_opt="soc";
- options[*i_opt].short_opt='w';
- options[*i_opt].bin="ypp";
- options[*i_opt].yambo_string="WFs_SOC_map";
- options[*i_opt].section="SOC";
- /* 
-  Convertions
- */
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Remove symmetries not consistent with an external perturbation";
- options[*i_opt].long_opt=  "fixsym";
- options[*i_opt].short_opt='y';
- options[*i_opt].bin="ypp";
- options[*i_opt].yambo_string="fixsyms";
- options[*i_opt].section="Convertions";
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Quasiparticle Databases";
- strcpy(options[*i_opt].long_desc[0],"<string>=(g)enerate-modify/(m)erge/(e)xpand");
- strcpy(options[*i_opt].long_desc[1],"         (e)xpand uses the symmetries to generate a BZ-expanded QP database");
- options[*i_opt].long_opt="qpdb";
- options[*i_opt].bin="ypp";
- options[*i_opt].yambo_string="QPDBs";
- options[*i_opt].section="Convertions";
- options[*i_opt].char_var=1;
-#if !defined _YPP_RT  
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="gkkp databases";
- options[*i_opt].long_opt= "gkkp";
- options[*i_opt].short_opt='g';
- options[*i_opt].bin="ypp_ph";
- options[*i_opt].yambo_string="gkkp"  ;
- options[*i_opt].section="Convertions";
- strcpy(options[*i_opt].long_desc[0],"<string>=(g)kkp,(d)ouble grid,(p)lot gkkp");
- options[*i_opt].char_var=1;
+    /*
+     Brillouin Zone
+    */
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "BZ Grid generator";
+    strcpy(options[*i_opt].long_desc[0],
+           "<string>=(k)pt,(q)pt,(s)hifted,(h)igh symmetry,(r)andom,r(e)gular");
+    options[*i_opt].long_opt = "grid";
+    options[*i_opt].short_opt = 'k';
+    options[*i_opt].bin = "ypp";
+    options[*i_opt].char_var = 1;
+    options[*i_opt].yambo_string = "bzgrids";
+    options[*i_opt].section = "Brillouin Zone";
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Fine to coarse grid Map";
+    options[*i_opt].long_opt = "map";
+    options[*i_opt].short_opt = 'm';
+    options[*i_opt].bin = "ypp";
+    options[*i_opt].yambo_string = "kpts_map";
+    options[*i_opt].section = "Brillouin Zone";
+    /*
+     Wannier
+    */
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Wannier 90 interface";
+    options[*i_opt].long_opt = "wannier";
+    options[*i_opt].bin = "ypp";
+    options[*i_opt].yambo_string = "wannier";
+    options[*i_opt].section = "Wannier";
+    /*
+     SOC
+    */
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Perturbative SOC mapping";
+    options[*i_opt].long_opt = "soc";
+    options[*i_opt].short_opt = 'w';
+    options[*i_opt].bin = "ypp";
+    options[*i_opt].yambo_string = "WFs_SOC_map";
+    options[*i_opt].section = "SOC";
+    /*
+     Convertions
+    */
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc =
+        "Remove symmetries not consistent with an external perturbation";
+    options[*i_opt].long_opt = "fixsym";
+    options[*i_opt].short_opt = 'y';
+    options[*i_opt].bin = "ypp";
+    options[*i_opt].yambo_string = "fixsyms";
+    options[*i_opt].section = "Convertions";
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Quasiparticle Databases";
+    strcpy(options[*i_opt].long_desc[0],
+           "<string>=(g)enerate-modify/(m)erge/(e)xpand");
+    strcpy(options[*i_opt].long_desc[1],
+           "         (e)xpand uses the symmetries to generate a BZ-expanded QP "
+           "database");
+    options[*i_opt].long_opt = "qpdb";
+    options[*i_opt].bin = "ypp";
+    options[*i_opt].yambo_string = "QPDBs";
+    options[*i_opt].section = "Convertions";
+    options[*i_opt].char_var = 1;
+#if !defined _YPP_RT
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "gkkp databases";
+    options[*i_opt].long_opt = "gkkp";
+    options[*i_opt].short_opt = 'g';
+    options[*i_opt].bin = "ypp_ph";
+    options[*i_opt].yambo_string = "gkkp";
+    options[*i_opt].section = "Convertions";
+    strcpy(options[*i_opt].long_desc[0],
+           "<string>=(g)kkp,(d)ouble grid,(p)lot gkkp,(s)ingle gkkp db");
+    options[*i_opt].char_var = 1;
 #endif
- /* 
-  Plots
- */
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Excitonic State Momentum";
- options[*i_opt].long_opt= "BSiq";
- options[*i_opt].short_opt='b';
- options[*i_opt].bin="ypp";
- options[*i_opt].yambo_string="BSiq";
- options[*i_opt].section="Plots";
- options[*i_opt].int_var=1;
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Mean Potential";
- options[*i_opt].long_opt= "potential";
- options[*i_opt].short_opt='v';
- options[*i_opt].bin="ypp_sc";
- options[*i_opt].yambo_string="MeanPot";
- options[*i_opt].section="Plots";
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Electronic properties";
- strcpy(options[*i_opt].long_desc[0],"<string>=(h)artree,(f)ock,(coh),(sex),(cohsex),(exx),(exxc),(srpa),(d)ef,(ip)");
-#if defined _ELPH  
- strcpy(options[*i_opt].long_desc[0],"<string>=(w)ave,(d)ensity,(m)ag,do(s),(b)ands,(c)urrent,(e)lias");
-#elif defined _YPP_MAGNETIC 
- strcpy(options[*i_opt].long_desc[0],"<string>=(w)ave,(d)ensity,(m)ag,do(s),(b)ands,(c)urrent,angu(l)ar,(p)osition");
+    /*
+     Plots
+    */
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Excitonic State Momentum";
+    options[*i_opt].long_opt = "BSiq";
+    options[*i_opt].short_opt = 'b';
+    options[*i_opt].bin = "ypp";
+    options[*i_opt].yambo_string = "BSiq";
+    options[*i_opt].section = "Plots";
+    options[*i_opt].int_var = 1;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Mean Potential";
+    options[*i_opt].long_opt = "potential";
+    strcpy(options[*i_opt].long_desc[0],
+           "<string>=(h)artree,(f)ock,(coh),(sex),(cohsex),(exx),(exxc),(srpa),"
+           "(d)ef,(ip)");
+    options[*i_opt].short_opt = 'v';
+    options[*i_opt].bin = "ypp_sc";
+    options[*i_opt].yambo_string = "MeanPot";
+    options[*i_opt].section = "Plots";
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Electronic properties";
+#if defined _ELPH
+    strcpy(options[*i_opt].long_desc[0],
+           "<string>=(w)ave,(d)ensity,(m)ag,do(s),(b)ands,(c)urrent,(e)lias");
+#elif defined _YPP_MAGNETIC
+    strcpy(options[*i_opt].long_desc[0],
+           "<string>=(w)ave,(d)ensity,(m)ag,do(s),(b)ands,(c)urrent,angu(l)ar,("
+           "p)osition");
 #else
- strcpy(options[*i_opt].long_desc[0],"<string>=(w)ave,(d)ensity,(m)ag,do(s),(b)ands,(c)urrent");
+    strcpy(options[*i_opt].long_desc[0],
+           "<string>=(w)ave,(d)ensity,(m)ag,do(s),(b)ands,(c)urrent");
 #endif
- options[*i_opt].long_opt="electron";
- options[*i_opt].short_opt='s';
- options[*i_opt].bin="ypp"; 
- options[*i_opt].yambo_string="electrons";
- options[*i_opt].section="Plots";
- options[*i_opt].char_var=1;
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Excitonic properties";
- options[*i_opt].long_opt="exciton";
- options[*i_opt].short_opt='e';
- options[*i_opt].bin="ypp ypp_ph";
-#if defined _ELPH  
- strcpy(options[*i_opt].long_desc[0],"<string>=(s)ort,(sp)in,(a)mplitude,(w)ave,(i)nterpolate,");
- strcpy(options[*i_opt].long_desc[1],"         (e)lias,(g)kkp,(p)h-assisted dos");
+    options[*i_opt].long_opt = "electron";
+    options[*i_opt].short_opt = 's';
+    options[*i_opt].bin = "ypp";
+    options[*i_opt].yambo_string = "electrons";
+    options[*i_opt].section = "Plots";
+    options[*i_opt].char_var = 1;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Excitonic properties";
+    options[*i_opt].long_opt = "exciton";
+    options[*i_opt].short_opt = 'e';
+    options[*i_opt].bin = "ypp ypp_ph";
+#if defined _ELPH
+    strcpy(options[*i_opt].long_desc[0],
+           "<string>=(s)ort,(sp)in,(a)mplitude,(w)ave,(i)nterpolate,");
+    strcpy(options[*i_opt].long_desc[1],
+           "         (e)lias,(g)kkp,(p)h-assisted dos");
 #else
- strcpy(options[*i_opt].long_desc[0],"<string>=(s)ort,(sp)in,(a)mplitude,(w)ave,(i)nterpolate");
+    strcpy(options[*i_opt].long_desc[0],
+           "<string>=(s)ort,(sp)in,(a)mplitude,(w)ave,(i)nterpolate");
 #endif
- options[*i_opt].yambo_string="excitons";
- options[*i_opt].section="Plots";
- options[*i_opt].char_var=1;
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Dipole properties";
- options[*i_opt].long_opt="dipoles";
- options[*i_opt].bin="ypp";
- strcpy(options[*i_opt].long_desc[0],"<string>=(exc)itonic,(ip)independent-particle");
-#if defined _YPP_RT  
- strcpy(options[*i_opt].long_desc[0],"<string>=(exc)itonic,(ip)independent-particle,(m)ask");
+    options[*i_opt].yambo_string = "excitons";
+    options[*i_opt].section = "Plots";
+    options[*i_opt].char_var = 1;
+    *i_opt=*i_opt+1;
+    options[*i_opt].short_desc="Magnonic properties";
+    options[*i_opt].long_opt="magnon";
+    options[*i_opt].short_opt='z';
+    options[*i_opt].bin="ypp";
+    strcpy(options[*i_opt].long_desc[0],"<string>=(s)ort,(sp)in,(a)mplitude,(w)ave,(i)nterpolate");
+    options[*i_opt].yambo_string="magnons";
+    options[*i_opt].section="Plots";
+    options[*i_opt].char_var=1;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Dipole properties";
+    options[*i_opt].long_opt = "dipoles";
+    options[*i_opt].bin = "ypp";
+    strcpy(options[*i_opt].long_desc[0],
+           "<string>=(exc)itonic,(ip)independent-particle");
+#if defined _YPP_RT
+    strcpy(options[*i_opt].long_desc[0],
+           "<string>=(exc)itonic,(ip)independent-particle,(m)ask");
 #endif
- options[*i_opt].yambo_string="dipoles";
- options[*i_opt].section="Plots";
- options[*i_opt].char_var=1;
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Free hole position in the excitonic plot";
- options[*i_opt].long_opt="freehole";
- options[*i_opt].bin="ypp";
- options[*i_opt].yambo_string="freehole";
- options[*i_opt].section="Plots";
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Average hole/electron wavefunction";
- options[*i_opt].long_opt="avehole";
- options[*i_opt].bin="ypp";
- options[*i_opt].yambo_string="avehole";
- options[*i_opt].section="Plots";
-#if !defined _YPP_RT  
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Phononic properties";
- strcpy(options[*i_opt].long_desc[0],"<string>=(d)os,(e)lias,(a)mplitude");
- options[*i_opt].long_opt=  "phonon";
- options[*i_opt].short_opt='p';
- options[*i_opt].bin="ypp_ph";
- options[*i_opt].yambo_string="phonons";
- options[*i_opt].section="Plots";
- options[*i_opt].char_var=1;
+    options[*i_opt].yambo_string = "dipoles";
+    options[*i_opt].section = "Plots";
+    options[*i_opt].char_var = 1;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Free hole position in the excitonic plot";
+    options[*i_opt].long_opt = "freehole";
+    options[*i_opt].bin = "ypp";
+    options[*i_opt].yambo_string = "freehole";
+    options[*i_opt].section = "Plots";
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Average hole/electron wavefunction";
+    options[*i_opt].long_opt = "avehole";
+    options[*i_opt].bin = "ypp";
+    options[*i_opt].yambo_string = "avehole";
+    options[*i_opt].section = "Plots";
+#if !defined _YPP_RT
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Phononic properties";
+    strcpy(options[*i_opt].long_desc[0], "<string>=(d)os,(e)lias,(a)mplitude");
+    options[*i_opt].long_opt = "phonon";
+    options[*i_opt].short_opt = 'p';
+    options[*i_opt].bin = "ypp_ph";
+    options[*i_opt].yambo_string = "phonons";
+    options[*i_opt].section = "Plots";
+    options[*i_opt].char_var = 1;
 #endif
-/*
-  Real-Time
-*/
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Non-linear response analysis";
- options[*i_opt].long_opt="nl";
- options[*i_opt].short_opt='u';
- options[*i_opt].bin="ypp_nl";
- options[*i_opt].yambo_string="nonlinear";
- options[*i_opt].section="Real-Time";
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="Carriers database generation";
- strcpy(options[*i_opt].long_desc[0],"<string>=(e)nergy,(f)ermi");
- options[*i_opt].long_opt=  "rtdb";
- options[*i_opt].short_opt='c';
- options[*i_opt].bin="ypp_rt";
- options[*i_opt].yambo_string="RTDBs";
- options[*i_opt].section="Real-Time";
- options[*i_opt].char_var=1;
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="TD observables plot";
- options[*i_opt].long_opt="rtplot";
- options[*i_opt].short_opt='n';
- options[*i_opt].char_var=1;
- options[*i_opt].bin="ypp_rt";
- options[*i_opt].yambo_string="TDplots"; /* TDplots */
- options[*i_opt].section="Real-Time";
- strcpy(options[*i_opt].long_desc[0],"<string>=(X)response,(a)bsorption,(o)ccupations,(l)ifetimes,(d)ensity,(p)olariazion,(g)reen-function");
- strcpy(options[*i_opt].long_desc[1]," ");
- strcpy(options[*i_opt].long_desc[2],"(X) response calculates the response via the time-resolved polarization");
- strcpy(options[*i_opt].long_desc[3],"(a) absorption amends the Kubo expression with the time-dependent occupations");
- strcpy(options[*i_opt].long_desc[4],"(p) polarization evaluates the k-resolved components of the time-dependent polarization");
- strcpy(options[*i_opt].long_desc[5],"(g) evaluates the two-times Green`s function");
- *i_opt=*i_opt+1;
- options[*i_opt].short_desc="TD plot control";
- options[*i_opt].char_var=1;
- options[*i_opt].long_opt= "rtmode";
- options[*i_opt].short_opt='t';
- options[*i_opt].bin="ypp_rt";
- options[*i_opt].yambo_string="TDplotmode"; /* TDpol */
- options[*i_opt].section="Real-Time";
- strcpy(options[*i_opt].long_desc[0],"rtplot=X/a => <string>=(t)ime");
- strcpy(options[*i_opt].long_desc[1],"rtplot=o   => <string>=(b)ands,(t)ime,(e)nergy,(d)os");
- strcpy(options[*i_opt].long_desc[2],"rtplot=l   => <string>=(b)ands,(t)ime,(e)nergy");
- strcpy(options[*i_opt].long_desc[3],"rtplot=d   => <string>=(t)ime");
- strcpy(options[*i_opt].long_desc[4],"rtplot=p   => <string>=(t)ime");
+    /*
+      Real-Time
+    */
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Non-linear response analysis";
+    options[*i_opt].long_opt = "nl";
+    options[*i_opt].short_opt = 'u';
+    options[*i_opt].bin = "ypp_nl";
+    options[*i_opt].yambo_string = "nonlinear";
+    options[*i_opt].section = "Real-Time";
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "Carriers database generation";
+    strcpy(options[*i_opt].long_desc[0], "<string>=(e)nergy,(f)ermi");
+    options[*i_opt].long_opt = "rtdb";
+    options[*i_opt].short_opt = 'c';
+    options[*i_opt].bin = "ypp_rt";
+    options[*i_opt].yambo_string = "RTDBs";
+    options[*i_opt].section = "Real-Time";
+    options[*i_opt].char_var = 1;
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "TD observables plot";
+    options[*i_opt].long_opt = "rtplot";
+    options[*i_opt].short_opt = 'n';
+    options[*i_opt].char_var = 1;
+    options[*i_opt].bin = "ypp_rt";
+    options[*i_opt].yambo_string = "TDplots"; /* TDplots */
+    options[*i_opt].section = "Real-Time";
+    strcpy(options[*i_opt].long_desc[0],
+           "<string>=(X)response,(a)bsorption,(o)ccupations,(l)ifetimes,(d)"
+           "ensity,(p)olarization,(f)ields,(g)reen-function");
+    strcpy(options[*i_opt].long_desc[1], " ");
+    strcpy(options[*i_opt].long_desc[2],
+           "(X) response calculates the response via the time-resolved "
+           "polarization");
+    strcpy(options[*i_opt].long_desc[3],
+           "(a) absorption amends the Kubo expression with the time-dependent "
+           "occupations");
+    strcpy(options[*i_opt].long_desc[4],
+           "(p) polarization evaluates the k-resolved components of the "
+           "time-dependent polarization");
+    strcpy(options[*i_opt].long_desc[5],
+	   "(f) generate and manupulate fields file");
+    strcpy(options[*i_opt].long_desc[6],
+           "(g) evaluates the two-times Green`s function");
+    *i_opt = *i_opt + 1;
+    options[*i_opt].short_desc = "TD plot control";
+    options[*i_opt].char_var = 1;
+    options[*i_opt].long_opt = "rtmode";
+    options[*i_opt].short_opt = 't';
+    options[*i_opt].bin = "ypp_rt";
+    options[*i_opt].yambo_string = "TDplotmode"; /* TDpol */
+    options[*i_opt].section = "Real-Time";
+    strcpy(options[*i_opt].long_desc[0], "rtplot=X/a => <string>=(t)ime");
+    strcpy(options[*i_opt].long_desc[1],
+           "rtplot=o   => <string>=(b)ands,(t)ime,(e)nergy,(d)os");
+    strcpy(options[*i_opt].long_desc[2],
+           "rtplot=l   => <string>=(b)ands,(t)ime,(e)nergy");
+    strcpy(options[*i_opt].long_desc[3], "rtplot=d   => <string>=(t)ime");
+    strcpy(options[*i_opt].long_desc[4], "rtplot=p   => <string>=(t)ime");
 };
diff --git a/src/driver/title.c b/src/driver/title.c
index 24fed1ebf4..5928ef4bd1 100644
--- a/src/driver/title.c
+++ b/src/driver/title.c
@@ -1,33 +1,67 @@
 /*
   License-Identifier: GPL
- 
+
   Copyright (C) 2020 The Yambo Team
- 
+
   Authors (see AUTHORS file for details): AM
 */
+#include <driver.h>
+#include <kind.h>
 #include <stdio.h>
 #include <string.h>
-#include <kind.h>
-#include <driver.h>
 
-void title(FILE *file_name,char *cmnt,  struct tool_struct t)
+void title_yambo(FILE *file_name, char *cmnt, struct tool_struct t)
+{
+    char *tool = running_tool();
+    char *pj = running_project();
+    char *libs = running_libraries();
+    fprintf(file_name, "%s%s\n", cmnt, " ___ __  _____  __ __  _____   _____ ");
+    fprintf(file_name, "%s%s\n", cmnt, "|   Y  ||  _  ||  Y  ||  _  \\ |  _  |");
+    fprintf(file_name, "%s%s\n", cmnt, "|   |  ||. |  ||.    ||. |  / |. |  |");
+    fprintf(file_name, "%s%s\n", cmnt, " \\   _/ |. _  ||.\\ / ||. _  \\ |. |  |");
+    fprintf(file_name, "%s%s\n", cmnt, "  |: |  |: |  ||: |  ||: |   \\|: |  |");
+    fprintf(file_name, "%s%s\n", cmnt, "  |::|  |:.|:.||:.|:.||::.   /|::.  |");
+    fprintf(file_name, "%s%s\n", cmnt, "  `--\"  `-- --\"`-- --\"`-----\" `-----\"");
+    fprintf(file_name, "%s '%s' \n", cmnt, t.desc);
+    if (strlen(pj) > 0)
+    {
+        fprintf(file_name, "%s\n%s This is      : %s(%s)", cmnt, cmnt, tool,
+                pj);
+    }
+    else
+    {
+        fprintf(file_name, "%s\n%s This is      : %s", cmnt, cmnt, tool);
+    }
+    fprintf(file_name, "%s\n%s Version      : %s ", cmnt, cmnt,
+            t.version_string);
+    fprintf(file_name, "%s\n%s Configuration: %s \n", cmnt, cmnt, libs);
+};
+
+
+void title_lumen(FILE *file_name, char *cmnt, struct tool_struct t)
 {
- char *tool = running_tool();
- char *pj   = running_project();
- char *libs = running_libraries();
- fprintf(file_name,"%s%s\n",cmnt,  " ___ __  _____  __ __  _____   _____ ");
- fprintf(file_name,"%s%s\n",cmnt,  "|   Y  ||  _  ||  Y  ||  _  \\ |  _  |");
- fprintf(file_name,"%s%s\n",cmnt,  "|   |  ||. |  ||.    ||. |  / |. |  |");
- fprintf(file_name,"%s%s\n",cmnt,  " \\   _/ |. _  ||.\\ / ||. _  \\ |. |  |");
- fprintf(file_name,"%s%s\n",cmnt,  "  |: |  |: |  ||: |  ||: |   \\|: |  |");
- fprintf(file_name,"%s%s\n",cmnt,  "  |::|  |:.|:.||:.|:.||::.   /|::.  |");
- fprintf(file_name,"%s%s\n",cmnt,  "  `--\"  `-- --\"`-- --\"`-----\" `-----\"");
- fprintf(file_name,"%s '%s' \n",cmnt,t.desc);
- if (strlen(pj)>0) {
-  fprintf(file_name,"%s\n%s This is      : %s(%s)",cmnt,cmnt,tool,pj);
- }else{
-  fprintf(file_name,"%s\n%s This is      : %s",cmnt,cmnt,tool);
- }
- fprintf(file_name,"%s\n%s Version      : %s ",cmnt,cmnt,t.version_string);
- fprintf(file_name,"%s\n%s Configuration: %s \n",cmnt,cmnt,libs);
+    char *tool = running_tool();
+    char *pj = running_project();
+    char *libs = running_libraries();
+    fprintf(file_name, "%s%s\n", cmnt, "  _ ");
+    fprintf(file_name, "%s%s\n", cmnt, " | |");
+    fprintf(file_name, "%s%s\n", cmnt, " | |     _   _ _ __ ___   ____  __ _");
+    fprintf(file_name, "%s%s\n", cmnt, " | |    | | | | '_ ' _  \\/ _  \\/ _` \\");
+    fprintf(file_name, "%s%s\n", cmnt, " | |    | | | | | | | | |  ___/ | | |");
+    fprintf(file_name, "%s%s\n", cmnt, " | |___ | |_| | | | | | |  \\__| | | |");
+    fprintf(file_name, "%s%s\n", cmnt, " \\_____/\\__,__|_| |_| |_/\\____|_| |_|");
+    fprintf(file_name, "%s%s\n", cmnt, " ");
+    fprintf(file_name, "%s '%s' \n", cmnt, t.desc);
+    if (strlen(pj) > 0)
+    {
+        fprintf(file_name, "%s\n%s This is      : %s(%s)", cmnt, cmnt, tool,
+                pj);
+    }
+    else
+    {
+        fprintf(file_name, "%s\n%s This is      : %s", cmnt, cmnt, tool);
+    }
+    fprintf(file_name, "%s\n%s Version      : %s ", cmnt, cmnt,
+            t.version_string);
+    fprintf(file_name, "%s\n%s Configuration: %s \n", cmnt, cmnt, libs);
 };
diff --git a/src/driver/tool_init.c b/src/driver/tool_init.c
index a1448892ec..7081d8b227 100644
--- a/src/driver/tool_init.c
+++ b/src/driver/tool_init.c
@@ -1,81 +1,102 @@
 /*
   License-Identifier: GPL
- 
+
   Copyright (C) 2020 The Yambo Team
- 
+
   Authors (see AUTHORS file for details): AM
 
 */
 
-#include <stdlib.h>
+#include <driver.h>
 #include <kind.h>
-#include <string.h>
 #include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
 #include <wrapper.h>
-#include <tool.h>
-#include <driver.h>
 #if defined _yambo || defined _ypp
- #include <editor.h>
+#include <editor.h>
 #endif
 
-/*
- AM 29/7/2021
-
- This source is project dependent via tool.h. In order to properly compile it, then,
- I need to add fictiuous pre-compiler options
-
-*/
-#if defined _p2y || defined _a2y || defined _c2y || defined _2y
+#ifdef _yambo
+const char *tool = "yambo";
+const char *tool_desc = "A shiny pot of fun and happiness [C.D.Hogan]";
+#elif defined _ypp
+const char *tool = "ypp";
+const char *tool_desc = "Y(ambo) P(ost)/(re) P(rocessor)";
+#elif defined _a2y
+const char *tool = "a2y";
+const char *tool_desc = "A(binit) 2 Y(ambo) interface";
+#elif defined _c2y
+const char *tool = "c2y";
+const char *tool_desc = "C(pmd) 2 Y(ambo) interface";
+#elif defined _p2y
+const char *tool = "p2y";
+const char *tool_desc = "P(Wscf) 2 Y(ambo) interface";
+#elif defined _e2y
+const char *tool = "e2y";
+const char *tool_desc = "E(TSF) 2 Y(ambo) interface (0.6)";
+#else
+const char *tool = "none";
+const char *tool_desc = "Not a tool";
 #endif
 
-struct tool_struct tool_init( )
+struct tool_struct tool_init()
 {
- tool_struct t;
- t=versions();
+    tool_struct t;
+    t = versions();
 #if defined _yambo || defined _ypp
- t.editor=editor;
+    t.editor = editor;
 #else
- t.editor="vim";
+    t.editor = "vim";
 #endif
- t.tool=tool;
- t.desc=tool_desc;
- /*
-   Projects
- */ 
- char *pj=NULL;
+    t.tool = tool;
+    t.desc = tool_desc;
+    /*
+      Projects
+    */
+    char *pj = NULL;
 #if defined _YPP_ELPH || defined _ELPH
- pj="ph";
+    pj = "ph";
 #endif
 #if defined _YPP_RT || defined _RT
- pj="rt";
+    pj = "rt";
 #endif
 #if defined _YPP_SC || defined _SC
- pj="sc";
+    pj = "sc";
 #endif
 #if defined _YPP_NL || defined _NL
- pj="nl";
+    pj = "nl";
 #endif
 #if defined _YPP_FL || defined _FL
- pj="fl";
+    pj = "fl";
 #endif
 #if defined _QED
- pj="qed";
+    pj = "qed";
 #endif
 
- if (pj!=NULL) {
-  t.bin = malloc(strlen(tool) + strlen(pj) + 2);
-  strcpy(t.bin,t.tool);
-  t.pj=pj;
-  strcat(t.bin,"_");
-  strcat(t.bin,t.pj);
- }else{
-  t.bin = malloc(strlen(tool) + 1);
-  strcpy(t.bin,t.tool);
-  pj="";
-  t.pj=pj;
- }
- if (pj==NULL) pj=" ";
- sprintf(t.version_string,"%i.%i.%i Revision %i Hash %s",t.version,t.subversion,t.patchlevel,t.revision,t.hash);
- return(t);
+    if (pj != NULL)
+    {
+        t.bin = malloc(strlen(tool) + strlen(pj) + 2);
+        strcpy(t.bin, t.tool);
+        t.pj = pj;
+        strcat(t.bin, "_");
+        strcat(t.bin, t.pj);
+    }
+    else
+    {
+        t.bin = malloc(strlen(tool) + 1);
+        strcpy(t.bin, t.tool);
+        pj = "";
+        t.pj = pj;
+    }
+    if (pj == NULL)
+    {
+        pj = " ";
+    }
+    /*
+    sprintf(t.version_string, "%i.%i.%i Revision %i Hash %s", t.version,
+            t.subversion, t.patchlevel, t.revision, t.hash);
+    */
+    sprintf(t.version_string, "Fork %s", t.lumenver);
+    return (t);
 };
-
diff --git a/src/driver/usage.c b/src/driver/usage.c
index 8f7026309d..b16f00d0c2 100644
--- a/src/driver/usage.c
+++ b/src/driver/usage.c
@@ -1,131 +1,227 @@
 /*
   License-Identifier: GPL
- 
+
   Copyright (C) 2020 The Yambo Team
- 
+
   Authors (see AUTHORS file for details): AM
 */
 
+#include <driver.h>
+#include <kind.h>
+#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <stdio.h>
-#include <kind.h>
-#include <driver.h>
 
-void usage(options_struct *options, struct tool_struct t, char *what, int n_options)
+void usage(options_struct *options, struct tool_struct t, char *what,
+           int n_options)
 {
- int i_opt,i,i_o,n_blanks,n_strings;
- char *pj   = running_project();
- char *tool = running_tool();
- char *libs = running_libraries();
- int max_long_desc=20;
- /*
-   Order
- */
- int n_orders=19,n_order_elements;
- char *order[]={
- "Help & version", /* 0 */
- "Input file & Directories", /* 1 */
- "Parallel Control", /* 2 */
- "Initializations", /* 3 */
- "Response Functions", /* 4 */
- "Self-Energy", /* 5 */
- "Bethe-Salpeter Equation", /* 6 */
- "Hamiltonians & Potentials", /* 7 */
- "Real-Time", /* 8 */
- "Surface Spectroscopy", /* 9 */ 
- "Total Energy", /* 10 */
- "Interface", /* 11 */
- "Brillouin Zone", /* 12 */
- "Convertions", /* 13 */
- "Plots", /* 14 */
- "SOC", /* 15 */
- "Utilites", /* 16 */
- "Wannier", /* 17 */
- "Model Hamiltonians", /* 18 */
- "undef", /* 19 */
- };
+    int i_opt, i, i_o, n_blanks, n_strings;
+    char *pj = running_project();
+    char *tool = running_tool();
+    char *libs = running_libraries();
+    int max_long_desc = 20;
+    /*
+      Order
+    */
+    int n_orders = 19, n_order_elements;
+    char *order[] = {
+        "Help & version",            /* 0 */
+        "Input file & Directories",  /* 1 */
+        "Parallel Control",          /* 2 */
+        "Initializations",           /* 3 */
+        "Response Functions",        /* 4 */
+        "Self-Energy",               /* 5 */
+        "Bethe-Salpeter Equation",   /* 6 */
+        "Hamiltonians & Potentials", /* 7 */
+        "Real-Time",                 /* 8 */
+        "Surface Spectroscopy",      /* 9 */
+        "Total Energy",              /* 10 */
+        "Interface",                 /* 11 */
+        "Brillouin Zone",            /* 12 */
+        "Convertions",               /* 13 */
+        "Plots",                     /* 14 */
+        "SOC",                       /* 15 */
+        "Utilites",                  /* 16 */
+        "Wannier",                   /* 17 */
+        "Model Hamiltonians",        /* 18 */
+        "undef",                     /* 19 */
+    };
 
- if (strcmp(what,"help")==0) {
+    if (strcmp(what, "help") == 0)
+    {
+        title_lumen(stderr, "", t);
 
-  title(stderr,"",t);
+        n_strings = 0;
+        for (i_opt = 0; i_opt < n_options; i_opt++)
+        {
+            if (use_me(options, t, i_opt) == 0)
+            {
+                continue;
+            }
+            n_blanks = options[i_opt].int_var * 6 +
+                       options[i_opt].float_var * 7 +
+                       options[i_opt].char_var * 9;
+            if (n_blanks > n_strings)
+            {
+                n_strings = n_blanks;
+            }
+        };
 
-  n_strings=0;
-  for(i_opt=0;i_opt<n_options;i_opt++) {
-   if (use_me(options,t,i_opt)==0) continue;
-   n_blanks=options[i_opt].int_var*6+options[i_opt].float_var*7+options[i_opt].char_var*9;
-   if (n_blanks>n_strings) n_strings=n_blanks;
-  };
+        for (i_o = 0; i_o < n_orders; i_o++)
+        {
+            if (order[i_o] == NULL)
+            {
+                continue;
+            }
+            n_order_elements = 0;
+            for (i_opt = 0; i_opt < n_options; i_opt++)
+            {
+                if (use_me(options, t, i_opt) == 0)
+                {
+                    continue;
+                }
+                if (strcmp(options[i_opt].section, order[i_o]) == 0)
+                {
+                    n_order_elements++;
+                }
+            };
+            if (n_order_elements == 0)
+            {
+                continue;
+            }
+            fprintf(stderr, "\n %s:\n", order[i_o]);
+            for (i_opt = 0; i_opt < n_options; i_opt++)
+            {
+                if (strcmp(options[i_opt].section, order[i_o]) != 0)
+                {
+                    continue;
+                }
+                if (use_me(options, t, i_opt) == 0)
+                {
+                    continue;
+                }
+                fprintf(stderr, " -%s", options[i_opt].long_opt);
+                n_blanks = 15 - strlen(options[i_opt].long_opt);
+                for (i = 1; i <= n_blanks; i++)
+                {
+                    fprintf(stderr, " ");
+                }
+                if (options[i_opt].short_opt > 57)
+                {
+                    fprintf(stderr, " (-%c)", options[i_opt].short_opt);
+                }
+                else
+                {
+                    for (i = 1; i <= 5; i++)
+                    {
+                        fprintf(stderr, " ");
+                    }
+                };
+                for (i = 1; i <= options[i_opt].int_var; i++)
+                {
+                    fprintf(stderr, " %s", "<int>");
+                };
+                for (i = 1; i <= options[i_opt].float_var; i++)
+                {
+                    fprintf(stderr, " %s", "<real>");
+                };
+                for (i = 1; i <= options[i_opt].char_var; i++)
+                {
+                    fprintf(stderr, " %s", "<string>");
+                };
+                n_blanks = n_strings + 2 - options[i_opt].int_var * 6 -
+                           options[i_opt].float_var * 7 -
+                           options[i_opt].char_var * 9;
+                for (i = 1; i <= n_blanks; i++)
+                {
+                    fprintf(stderr, " ");
+                }
+                fprintf(stderr, " :%s", options[i_opt].short_desc);
+                if (strcmp(options[i_opt].long_desc[0], "undef") != 0)
+                {
+                    fprintf(stderr, " %s%s%s", "(more with -h ",
+                            options[i_opt].long_opt, ")");
+                }
+                fprintf(stderr, "\n");
+            }
+        }
 
-  for(i_o=0;i_o<n_orders;i_o++) {
-   if (order[i_o]==NULL) continue;
-   n_order_elements=0;
-   for(i_opt=0;i_opt<n_options;i_opt++) {
-    if (use_me(options,t,i_opt)==0) continue;
-    if (strcmp(options[i_opt].section,order[i_o])==0) n_order_elements++;
-   };
-   if (n_order_elements==0) continue;
-   fprintf(stderr,"\n %s:\n",order[i_o]);
-   for(i_opt=0;i_opt<n_options;i_opt++) {
-    if (strcmp(options[i_opt].section,order[i_o])!=0) continue;
-    if (use_me(options,t,i_opt)==0) continue;
-    fprintf(stderr," -%s",options[i_opt].long_opt);
-    n_blanks=15-strlen(options[i_opt].long_opt);
-    for(i=1;i<=n_blanks;i++) fprintf(stderr," "); 
-    if (options[i_opt].short_opt>57) 
-     {fprintf(stderr," (-%c)",options[i_opt].short_opt);}
+        fprintf(stderr, "\n");
+        fprintf(stderr, "%s\n\n",
+                " Lumen developers group (http://www.lumen-code.org)");
+    }
+    else if (strcmp(what, "version") == 0)
+    {
+        if (strlen(pj) > 0)
+        {
+            fprintf(stderr, "\nThis is %s(%s) - %s - Ver. %s \n\n", tool, pj,
+                    libs, t.version_string);
+        }
+        else
+        {
+            fprintf(stderr, "\nThis is %s - %s - Ver. %s \n\n", tool, libs,
+                    t.version_string);
+        }
+    }
     else
-     {for(i=1;i<=5;i++) fprintf(stderr," ");};
-    for(i=1;i<=options[i_opt].int_var;i++)   {fprintf(stderr," %s","<int>");};
-    for(i=1;i<=options[i_opt].float_var;i++) {fprintf(stderr," %s","<real>");};
-    for(i=1;i<=options[i_opt].char_var;i++)  {fprintf(stderr," %s","<string>");};
-    n_blanks=n_strings+2-options[i_opt].int_var*6-options[i_opt].float_var*7-options[i_opt].char_var*9;
-    for(i=1;i<=n_blanks;i++) fprintf(stderr," "); 
-    fprintf(stderr," :%s",options[i_opt].short_desc);
-    if (strcmp(options[i_opt].long_desc[0],"undef")!=0) fprintf(stderr," %s%s%s","(more with -h ",options[i_opt].long_opt,")");
-    fprintf(stderr,"\n");
-   }
-  }
-
-  fprintf(stderr,"\n");
-  fprintf(stderr,"%s\n\n"," YAMBO developers group (http://www.yambo-code.eu)");
-
- }else if (strcmp(what,"version")==0) {
-  if (strlen(pj)>0) {
-   fprintf(stderr,"\nThis is %s(%s) - %s - Ver. %s \n\n",tool,pj,libs,t.version_string);
-  }else{
-   fprintf(stderr,"\nThis is %s - %s - Ver. %s \n\n",tool,libs,t.version_string);
-  }
- }else{
-  for(i_opt=0;i_opt<n_options;i_opt++) {
-   if (use_me(options,t,i_opt)==0) continue;
-   if (strcmp(options[i_opt].long_opt,what)==0) break;
-  }
-  if (i_opt==n_options) {
-   fprintf(stderr,"unrecognized option '%s'\n",what);
-   exit(0);
-  }
-  title(stderr,"",t);
-  fprintf(stderr,"\n");
-  fprintf(stderr," Long  option: %s\n",options[i_opt].long_opt);
-  if (options[i_opt].short_opt>57) fprintf(stderr," Short option: %c\n",options[i_opt].short_opt);
-  if (options[i_opt].int_var+options[i_opt].float_var+options[i_opt].char_var>0) 
-  {
-   fprintf(stderr," Variables   :");
-   for(i=1;i<=options[i_opt].int_var;i++)   {fprintf(stderr," %s","<int>");};
-   for(i=1;i<=options[i_opt].float_var;i++) {fprintf(stderr," %s","<real>");};
-   for(i=1;i<=options[i_opt].char_var;i++)  {fprintf(stderr," %s","<string>");};
-   fprintf(stderr,"\n");
-  }
-  if (strcmp(options[i_opt].long_desc[0],"undef")!=0)
-  {
-   fprintf(stderr," Description :%s",options[i_opt].short_desc);
-   for(i=0;i<max_long_desc;i++) {
-    if (strcmp(options[i_opt].long_desc[i],"undef")==0) continue;
-    fprintf(stderr,"\n              %s",options[i_opt].long_desc[i]);
-   }
-  }
-  fprintf(stderr,"\n\n");
-  fprintf(stderr,"%s\n\n"," YAMBO developers group (http://www.yambo-code.eu)");
- }
+    {
+        for (i_opt = 0; i_opt < n_options; i_opt++)
+        {
+            if (use_me(options, t, i_opt) == 0)
+            {
+                continue;
+            }
+            if (strcmp(options[i_opt].long_opt, what) == 0)
+            {
+                break;
+            }
+        }
+        if (i_opt == n_options)
+        {
+            fprintf(stderr, "unrecognized option '%s'\n", what);
+            exit(0);
+        }
+        title_lumen(stderr, "", t);
+        fprintf(stderr, "\n");
+        fprintf(stderr, " Long  option: %s\n", options[i_opt].long_opt);
+        if (options[i_opt].short_opt > 57)
+        {
+            fprintf(stderr, " Short option: %c\n", options[i_opt].short_opt);
+        }
+        if (options[i_opt].int_var + options[i_opt].float_var +
+                options[i_opt].char_var >
+            0)
+        {
+            fprintf(stderr, " Variables   :");
+            for (i = 1; i <= options[i_opt].int_var; i++)
+            {
+                fprintf(stderr, " %s", "<int>");
+            };
+            for (i = 1; i <= options[i_opt].float_var; i++)
+            {
+                fprintf(stderr, " %s", "<real>");
+            };
+            for (i = 1; i <= options[i_opt].char_var; i++)
+            {
+                fprintf(stderr, " %s", "<string>");
+            };
+            fprintf(stderr, "\n");
+        }
+        if (strcmp(options[i_opt].long_desc[0], "undef") != 0)
+        {
+            fprintf(stderr, " Description :%s", options[i_opt].short_desc);
+            for (i = 0; i < max_long_desc; i++)
+            {
+                if (strcmp(options[i_opt].long_desc[i], "undef") == 0)
+                {
+                    continue;
+                }
+                fprintf(stderr, "\n              %s",
+                        options[i_opt].long_desc[i]);
+            }
+        }
+        fprintf(stderr, "\n\n");
+        fprintf(stderr, "%s\n\n",
+                " Lumen developers group (http://www.lumen-code.org)");
+    }
 };
diff --git a/src/driver/use_me.c b/src/driver/use_me.c
index cd70202d3f..b5cb0752d2 100644
--- a/src/driver/use_me.c
+++ b/src/driver/use_me.c
@@ -1,38 +1,56 @@
 /*
   License-Identifier: GPL
- 
+
   Copyright (C) 2020 The Yambo Team
- 
+
   Authors (see AUTHORS file for details): AM
 */
 
+#include <driver.h>
+#include <kind.h>
+#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <stdio.h>
-#include <kind.h>
-#include <driver.h>
 
-int use_me(struct options_struct options[], struct tool_struct t, int i_opt)
+int use_me(struct options_struct *options, struct tool_struct t, int i_opt)
 {
- char *pch,str[100];
- if (options[i_opt].short_desc==NULL) return 0;
- /* NOT allowed bin */
- strcpy(str,options[i_opt].no_bin);
- pch = strtok(str," ");
- while (pch != NULL)
- {
-   if (strcmp(pch,t.tool)==0) return 0;
-   if (strcmp(pch,t.bin)==0) return 0;
-   pch = strtok (NULL, " ");
- }
- /* allowed bin */
- strcpy(str,options[i_opt].bin);
- pch = strtok(str," ");
- while (pch != NULL)
- {
-   if (strcmp(pch,t.tool)==0) return 1;
-   if (strcmp(pch,t.bin)==0) return 1;
-   if (strcmp(pch,"all")==0) return 1;
-   pch = strtok (NULL, " ");
- }
+    char *pch, str[100];
+    if (options[i_opt].short_desc == NULL)
+    {
+        return 0;
+    }
+    /* NOT allowed bin */
+    strcpy(str, options[i_opt].no_bin);
+    pch = strtok(str, " ");
+    while (pch != NULL)
+    {
+        if (strcmp(pch, t.tool) == 0)
+        {
+            return 0;
+        }
+        if (strcmp(pch, t.bin) == 0)
+        {
+            return 0;
+        }
+        pch = strtok(NULL, " ");
+    }
+    /* allowed bin */
+    strcpy(str, options[i_opt].bin);
+    pch = strtok(str, " ");
+    while (pch != NULL)
+    {
+        if (strcmp(pch, t.tool) == 0)
+        {
+            return 1;
+        }
+        if (strcmp(pch, t.bin) == 0)
+        {
+            return 1;
+        }
+        if (strcmp(pch, "all") == 0)
+        {
+            return 1;
+        }
+        pch = strtok(NULL, " ");
+    }
 };
diff --git a/src/driver/winsize.c b/src/driver/winsize.c
index 5011d33f41..fa6d111468 100644
--- a/src/driver/winsize.c
+++ b/src/driver/winsize.c
@@ -1,31 +1,39 @@
 /*
   License-Identifier: GPL
- 
+
   Copyright (C) 2020 The Yambo Team
- 
+
   Authors (see AUTHORS file for details): AM
 */
 
-#include <string.h>
 #include <stdio.h>
+#include <string.h>
 #include <sys/ioctl.h>
-#include <wrapper.h>
 #include <unistd.h>
+#include <wrapper.h>
 
 int guess_winsize()
 {
- int width;
- struct winsize ws;
- if (!isatty(2)) {width=-1;return width;}
- if( ioctl(STDERR_FILENO, TIOCGWINSZ, &ws) == -1 || ws.ws_col == 0 )
-  width = 79;
- else
-  width = ws.ws_col - 1;
- return width;
+    int width;
+    struct winsize ws;
+    if (!isatty(2))
+    {
+        width = -1;
+        return width;
+    }
+    if (ioctl(STDERR_FILENO, TIOCGWINSZ, &ws) == -1 || ws.ws_col == 0)
+    {
+        width = 79;
+    }
+    else
+    {
+        width = ws.ws_col - 1;
+    }
+    return width;
 };
 int C_FUNC(win_size, WIN_SIZE)(int *win_width)
 {
- *win_width = 0;
- *win_width = guess_winsize();
- return 0;
+    *win_width = 0;
+    *win_width = guess_winsize();
+    return 0;
 };
diff --git a/src/el-ph/.objects b/src/el-ph/.objects
index 096f6e7316..47f21ce807 100644
--- a/src/el-ph/.objects
+++ b/src/el-ph/.objects
@@ -1,6 +1,5 @@
 #if defined _ELPH && !defined _YPP_ELPH
-objs = ELPH_Sigma_c.o ELPH_Hamiltonian.o ELPH_simmetrize_and_distribute_gsqF.o Eval_Efermi_DbGd.o \
-       ELPH_gsqF_to_Sigma.o ELPH_Sigma_c_engine.o En_k_plus_q_interpolate.o get_ph_E_DbGd.o \
-       ELPH_databases_load.o ELPH_databases_check.o ELPH_databases_grids_map.o \
-       ELPH_databases_symmetrize.o ELPH_acoustic_phonon_properties.o
+EQ_objs = ELPH_Sigma_c.o ELPH_Hamiltonian.o ELPH_simmetrize_and_distribute_gsqF.o Eval_Efermi_DbGd.o \
+          ELPH_gsqF_to_Sigma.o ELPH_Sigma_c_engine.o En_k_plus_q_interpolate.o get_ph_E_DbGd.o ELPH_databases_grids_map.o
 #endif
+objs = ${EQ_objs} 
diff --git a/src/el-ph/ELPH_Hamiltonian.F b/src/el-ph/ELPH_Hamiltonian.F
index 411257ee20..ec0278e9c5 100644
--- a/src/el-ph/ELPH_Hamiltonian.F
+++ b/src/el-ph/ELPH_Hamiltonian.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine ELPH_Hamiltonian(en,k,q)
  !
  use pars,          ONLY:SP,lchlen,rZERO
@@ -25,8 +29,9 @@ subroutine ELPH_Hamiltonian(en,k,q)
  use stderr,        ONLY:intc
  use QP_m,          ONLY:QP_G_damp
  use parser_m,      ONLY:parser
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)  ::en
  type(bz_samp) ::k,q
diff --git a/src/el-ph/ELPH_Sigma_c.F b/src/el-ph/ELPH_Sigma_c.F
index fc770af711..99934cca3b 100644
--- a/src/el-ph/ELPH_Sigma_c.F
+++ b/src/el-ph/ELPH_Sigma_c.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine ELPH_Sigma_c(en,k,q,qp)
  !
  ! This routine calculates the QP shifts due to el-ph intercation
@@ -32,7 +36,7 @@ subroutine ELPH_Sigma_c(en,k,q,qp)
 &                        On_Mass_Shell_approx,QP_time_order_sign
  use D_lattice,     ONLY:sop_inv,nsym,i_time_rev,sop_tab
  use R_lattice,     ONLY:qindx_S,bz_samp,RL_vol,nqibz
- use ELPH,          ONLY:elph_nb,PH_freqs_sq,GKKP,ph_modes,PH_freqs,FineGd_E_kpq_components_reset,&
+ use ELPH,          ONLY:elph_bands,PH_freqs_sq,GKKP,ph_modes,PH_freqs,FineGd_E_kpq_components_reset,&
 &                        QP_PH_n_G_bands,PH_W_debye,use_PH_DbGd,EkplusQ_mode,gkkp_db,elph_grids_are_expanded, &
 &                        elph_nQ,elph_use_q_grid,setup_k_plus_q_levels,elph_nQ_used,&
 &                        E_kpq_sh_fact,Efermi_DbGd,gsqF_energy_steps,eval_G_using_KK,PH_qpt,ID_E_kpq,ID_E_kpq_obj, &
@@ -46,8 +50,9 @@ subroutine ELPH_Sigma_c(en,k,q,qp)
 #if defined _RT
  use RT_control,    ONLY:RT_apply
 #endif
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)  ::en
  type(bz_samp) ::k,q
@@ -95,8 +100,8 @@ subroutine ELPH_Sigma_c(en,k,q,qp)
  call k_build_up_BZ_tables(k)
  call k_expand(k)
  !
- if (QP_PH_n_G_bands(2)<=0.or.QP_PH_n_G_bands(2)>elph_nb) QP_PH_n_G_bands=(/1,elph_nb/)
- if (QP_PH_n_G_bands(1)<=0.or.QP_PH_n_G_bands(1)>elph_nb) QP_PH_n_G_bands=(/1,elph_nb/)
+ if (QP_PH_n_G_bands(2)<=elph_bands(1).or.QP_PH_n_G_bands(2)>elph_bands(2)) QP_PH_n_G_bands=elph_bands
+ if (QP_PH_n_G_bands(1)<=elph_bands(1).or.QP_PH_n_G_bands(1)>elph_bands(2)) QP_PH_n_G_bands=elph_bands
  !
  call msg('r', '[GW/El-Ph] Bands range       ',(/QP_PH_n_G_bands(1),QP_PH_n_G_bands(2)/))
  if (trim(QP_solver)=='n') &
@@ -150,9 +155,9 @@ subroutine ELPH_Sigma_c(en,k,q,qp)
    do i_qp=1,qp%n_states
      Sc_W(i_qp)%n_freqs=QP_dSc_steps
      YAMBO_ALLOC(Sc_W(i_qp)%p,(Sc_W(i_qp)%n_freqs))
-     forall (i2=1:QP_dSc_steps) Sc_W(i_qp)%p(i2)=&
-&           en%E(QP_table(i_qp,1),QP_table(i_qp,3),1)+(i2-1)*QP_dSc_delta+&
-&           cmplx(0.,QP_G_damp,SP)
+     forall (i2=1:QP_dSc_steps) Sc_W(i_qp)%p(i2)=   &
+     &   en%E(QP_table(i_qp,1),QP_table(i_qp,3),1)+ &
+     &   (i2-1)*QP_dSc_delta+cmplx(0.,QP_G_damp,SP)
    enddo
    !
    gsqF_energy_steps=QP_dSc_steps
@@ -298,7 +303,7 @@ subroutine ELPH_Sigma_c(en,k,q,qp)
        !
        call setup_k_plus_q_levels(iq_db,E_random_shift)
        !
-       call QP_apply((/1,elph_nb/),GKKP%E_kpq(iq_db),k,"G",msg_fmt=' ')
+       call QP_apply(elph_bands,GKKP%E_kpq(iq_db),k,"G",msg_fmt=' ')
 #if defined _RT
        call RT_apply((/1,elph_nb/),GKKP%E_kpq(iq_db),k,"G",VERBOSE=iq_bz==1)
 #endif
@@ -413,7 +418,7 @@ subroutine ELPH_Sigma_c(en,k,q,qp)
  !
  ! Debye energy
  !
- call msg('nr','[Ph] Debye energy         ',(/PH_W_debye*HA2EV,PH_W_debye*HA2THZ/),"[ev/ThZ]")
+ call msg('nrs','[Ph] Debye energy         ',(/PH_W_debye*HA2EV,PH_W_debye*HA2THZ/),"[ev/ThZ]")
  !
  ! Here I use the Cardona-Allen g^2 F functions to evaluate their integrated value.
  ! I also calculate the full frequency dependent self-energy
diff --git a/src/el-ph/ELPH_Sigma_c_engine.F b/src/el-ph/ELPH_Sigma_c_engine.F
index ee354eb96e..909f093f68 100644
--- a/src/el-ph/ELPH_Sigma_c_engine.F
+++ b/src/el-ph/ELPH_Sigma_c_engine.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM CA
 !
+! headers
+!
+#include<y_memory.h>
+!
 !> @brief Calculate real and imaginary part of the electron-phonon self-energy
 !!
 !! @param[in]       i_qp                quasi-particle index
@@ -29,13 +33,14 @@ subroutine ELPH_Sigma_c_engine(i_qp,ob,ok,iq_bz,iq_ibz,iq_mem,il,elph_gkkp_sq,en
  use ELPH,          ONLY:gsqF_fan,gsqF_dw,gsqF_ca_corr,gsqF_life_bose,gsqF_life_f,GKKP,&
 &                        gsqF_energy_steps,gsqF_dw,eval_G_using_KK,elph_use_q_grid,PH_freqs,&
 &                        FAN_deltaE_treshold,DW_deltaE_treshold,PH_freqs_sq,use_PH_DbGd, &
-&                        elph_grids_are_expanded,Efermi_DbGd
+&                        elph_grids_are_expanded,Efermi_DbGd,l_GKKP_hosts_DW
  use QP_m,          ONLY:QP_table,QP_time_order_sign
  use R_lattice,     ONLY:bz_samp
  use D_lattice,     ONLY:Tel
  use functions,     ONLY:Fermi_fnc_derivative,Fermi_fnc
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer,      intent(in) :: i_qp
  integer,      intent(in) :: ob
@@ -167,7 +172,7 @@ subroutine ELPH_Sigma_c_engine(i_qp,ob,ok,iq_bz,iq_ibz,iq_mem,il,elph_gkkp_sq,en
    delta_E_at_gamma=GKKP%E_kpq(1)%E(ib,ik_bz,1) -GKKP%E_kpq(1)%E(ob,ik_bz,1)
  endif
  !
- eval_DW=(.not.abs(delta_E_at_gamma)<DW_deltaE_treshold)
+ eval_DW=(.not.abs(delta_E_at_gamma)<DW_deltaE_treshold).and.l_GKKP_hosts_DW
  !
  gsqF_RES_DW=cZERO
  !
diff --git a/src/el-ph/ELPH_databases_check.F b/src/el-ph/ELPH_databases_check.F
index db243a6106..16890eef44 100644
--- a/src/el-ph/ELPH_databases_check.F
+++ b/src/el-ph/ELPH_databases_check.F
@@ -5,15 +5,15 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
-subroutine ELPH_databases_check(n_bands)
+subroutine ELPH_databases_check(bands)
  ! 
  use IO_int,         ONLY:io_control
  use IO_m,           ONLY:REP,OP_RD_CL
- use ELPH,           ONLY:l_GKKP_DB_exists,l_GKKP_expanded_DB_exists,elph_nb
+ use ELPH,           ONLY:l_GKKP_DB_exists,l_GKKP_expanded_DB_exists,elph_bands
  !
  implicit none
  !
- integer :: n_bands
+ integer :: bands(2)
  !
  ! Work Space
  !
@@ -24,12 +24,13 @@ subroutine ELPH_databases_check(n_bands)
  !===========
  call io_control(ACTION=OP_RD_CL,COM=REP,SEC=(/1/),ID=ID)
  io_err=io_ELPH(ID,'gkkp')
- if (io_err==0) l_GKKP_DB_exists=n_bands<=elph_nb
+ if (io_err==0) l_GKKP_DB_exists= bands(1)>elph_bands(1) .and. bands(2)<=elph_bands(2)
  !
  call io_control(ACTION=OP_RD_CL,COM=REP,SEC=(/1/),ID=ID)
  io_err=io_ELPH(ID,'gkkp_expanded')
- if (io_err==0) l_GKKP_expanded_DB_exists=n_bands<=elph_nb
+ if (io_err==0) l_GKKP_expanded_DB_exists= bands(1)>elph_bands(1) .and. bands(2)<=elph_bands(2)
  !
- if (all((/l_GKKP_DB_exists,l_GKKP_expanded_DB_exists/))) call error("Both expanded and un-expandend GKKP database found")
+ if (all((/l_GKKP_DB_exists,l_GKKP_expanded_DB_exists/))) &
+ & call error("Neither expanded or un-expandend GKKP database found")
  !
 end subroutine ELPH_databases_check
diff --git a/src/el-ph/ELPH_databases_grids_map.F b/src/el-ph/ELPH_databases_grids_map.F
index 13599140a4..9188dc8e01 100644
--- a/src/el-ph/ELPH_databases_grids_map.F
+++ b/src/el-ph/ELPH_databases_grids_map.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine ELPH_databases_grids_map(k,q)
  !
  use drivers,        ONLY:l_phel_corr
@@ -14,8 +18,9 @@ subroutine ELPH_databases_grids_map(k,q)
 &                         l_GKKP_expanded_DB_exists,l_GKKP_DB_exists
  use vec_operate,    ONLY:v_is_zero,rlu_v_is_zero,c2a
  use zeros,          ONLY:k_rlu_zero
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp) ::k,q
  !
@@ -54,8 +59,15 @@ subroutine ELPH_databases_grids_map(k,q)
  call k_ibz2bz(q,'i',.false.)
  do i1=1,nQ
    do i2=1,elph_nQ
-     if (l_GKKP_expanded_DB_exists) call c2a(v_in=q%ptbz(i1,:)+PH_qpt(i2,:),v_out=v,mode="ki2a")
-     if (l_GKKP_DB_exists)          call c2a(v_in=q%pt(i1,:)+PH_qpt(i2,:),v_out=v,mode="ki2a")
+     ! For abinit I may need a -1 ?!
+     !call c2a(v_in=q%ptbz(i1,:)+PH_qpt(i2,:),v_out=v,mode="ki2a")
+     ! For pwscf no need for -1 (see comment below)
+     call c2a(v_in=q%ptbz(i1,:)-PH_qpt(i2,:),v_out=v,mode="ki2a")
+     ! DS comment 2022/04/23:
+     ! - these if are useless, if l_GKKP_DB_exists the code exits before (see return)
+     ! - why the -q here? This is already fixed by DB_Q_map. The Q_io_map should use +q. See again standard case above.
+     !if (l_GKKP_expanded_DB_exists) call c2a(v_in=q%ptbz(i1,:)+PH_qpt(i2,:),v_out=v,mode="ki2a")
+     !if (l_GKKP_DB_exists)          call c2a(v_in=q%pt(i1,:)+PH_qpt(i2,:),v_out=v,mode="ki2a")
      if (rlu_v_is_zero(v,zero_=k_rlu_zero)) Q_io_map(i1)=i2
    enddo
  enddo
@@ -72,7 +84,7 @@ subroutine ELPH_databases_grids_map(k,q)
      if (rlu_v_is_zero(v,zero_=k_rlu_zero)) K_io_map(i1)=i2
    enddo
  enddo
- call k_ibz2bz(k,'i',.false.)
+ call k_ibz2bz(k,'d',.false.)
  if (any(K_io_map==0)) call error('K-points do not match')
  !
 end subroutine
diff --git a/src/el-ph/ELPH_databases_load.F b/src/el-ph/ELPH_databases_load.F
index 0c564d46bd..de8eb7ed00 100644
--- a/src/el-ph/ELPH_databases_load.F
+++ b/src/el-ph/ELPH_databases_load.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM FP
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine ELPH_databases_load(GKKP_sq,E,k,q,n_bands,q_range,K_mode,SCATT_mode)
  ! 
  ! mode can be combination of
@@ -23,8 +27,9 @@ subroutine ELPH_databases_load(GKKP_sq,E,k,q,n_bands,q_range,K_mode,SCATT_mode)
  use LIVE_t,         ONLY:live_timing
  use parallel_m,     ONLY:PAR_Q_bz_index,PAR_IND_Q_bz,PAR_nQ_bz,PAR_Xk_nibz,PAR_Xk_ibz_index,PAR_IND_Xk_ibz,&
 &                         PAR_COM_Q_INDEX,PAR_COM_Xk_ibz_INDEX,PARs_PH_Q_ibz,PARs_PH_Q_ibz
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  ! 
  integer,      intent(in)    :: n_bands(2),q_range(2)
  type(levels), intent(in)    :: E
diff --git a/src/el-ph/ELPH_databases_symmetrize.F b/src/el-ph/ELPH_databases_symmetrize.F
index ea8497255f..8aa0e3eff3 100644
--- a/src/el-ph/ELPH_databases_symmetrize.F
+++ b/src/el-ph/ELPH_databases_symmetrize.F
@@ -9,7 +9,7 @@ subroutine ELPH_databases_symmetrize(mode,E,k,q,ik_bz,iq,n_bands,GKKP)
  ! 
  use pars,           ONLY:SP
  use units,          ONLY:HA2EV
- use electrons,      ONLY:levels
+ use electrons,      ONLY:levels,deg_threshold
  use R_lattice,      ONLY:qindx_S,bz_samp
  use ELPH,           ONLY:ph_modes,PH_freqs_sq
  use vec_operate,    ONLY:degeneration_finder
@@ -38,8 +38,10 @@ subroutine ELPH_databases_symmetrize(mode,E,k,q,ik_bz,iq,n_bands,GKKP)
    ik       =k%sstar(ik_bz,1)
    ik_m_q   =k%sstar(qindx_S(ik,iq,1),1)
    !
-   call degeneration_finder(E%E(:n_bands(2),ik,1),    n_bands(2),first_el(1,:),n_of_el(1,:),n_deg_grp(1),0.0001/HA2EV)
-   call degeneration_finder(E%E(:n_bands(2),ik_m_q,1),n_bands(2),first_el(2,:),n_of_el(2,:),n_deg_grp(2),0.0001/HA2EV)
+   call degeneration_finder(n_bands(2),first_el(1,:),n_of_el(1,:),n_deg_grp(1),&
+   &                        Er=E%E(:n_bands(2),ik,1),    deg_accuracy=deg_threshold)
+   call degeneration_finder(n_bands(2),first_el(2,:),n_of_el(2,:),n_deg_grp(2),&
+   &                        Er=E%E(:n_bands(2),ik_m_q,1),deg_accuracy=deg_threshold)
    !
    do i_g=1,n_deg_grp(1)
      do i_np=n_bands(1),n_bands(2)
@@ -72,7 +74,8 @@ subroutine ELPH_databases_symmetrize(mode,E,k,q,ik_bz,iq,n_bands,GKKP)
      ph_E(il)=sqrt(PH_freqs_sq(iq,il))
    enddo
    !
-   call degeneration_finder(ph_E,ph_modes,first_el(1,:ph_modes),n_of_el(1,:ph_modes),n_deg_grp(1),0.0001/HA2EV)
+   call degeneration_finder(ph_modes,first_el(1,:ph_modes),n_of_el(1,:ph_modes),n_deg_grp(1),&
+   &                        Er=ph_E,deg_accuracy=deg_threshold)
    !
    do i_g=1,n_deg_grp(1)
      do i_c=1,2
diff --git a/src/el-ph/ELPH_gsqF_to_Sigma.F b/src/el-ph/ELPH_gsqF_to_Sigma.F
index 4d00dcfdf6..338d5c875d 100644
--- a/src/el-ph/ELPH_gsqF_to_Sigma.F
+++ b/src/el-ph/ELPH_gsqF_to_Sigma.F
@@ -72,7 +72,7 @@ subroutine ELPH_gsqF_to_Sigma(en,q,l_GF_from_CA,Sc_W)
      !
      ! Overwrites the damping defined in FREQUENCIES_Green_Function
      !
-     forall(iw=1:QP_Sc_steps) Sc_local_W%p(iw)= real( Sc_local_W%p(iw)) + cmplx(0._SP,0.00001_SP/HA2EV)
+     forall (iw=1:QP_Sc_steps) Sc_local_W%p(iw)= real( Sc_local_W%p(iw)) + cmplx(0._SP,0.00001_SP/HA2EV)
    endif
    !
    ib   =QP_table(i_qp,1)
diff --git a/src/el-ph/ELPH_project.dep b/src/el-ph/ELPH_project.dep
index 818f59c7e4..1c2099f24d 100644
--- a/src/el-ph/ELPH_project.dep
+++ b/src/el-ph/ELPH_project.dep
@@ -1,11 +1,7 @@
  ELPH_Hamiltonian.o
  ELPH_Sigma_c.o
  ELPH_Sigma_c_engine.o
- ELPH_acoustic_phonon_properties.o
- ELPH_databases_check.o
  ELPH_databases_grids_map.o
- ELPH_databases_load.o
- ELPH_databases_symmetrize.o
  ELPH_gsqF_to_Sigma.o
  ELPH_simmetrize_and_distribute_gsqF.o
  En_k_plus_q_interpolate.o
diff --git a/src/el-ph/En_k_plus_q_interpolate.F b/src/el-ph/En_k_plus_q_interpolate.F
index 586938ef5a..9b089d040f 100644
--- a/src/el-ph/En_k_plus_q_interpolate.F
+++ b/src/el-ph/En_k_plus_q_interpolate.F
@@ -5,15 +5,30 @@
 !
 ! Authors (see AUTHORS file for details): CA
 !
+!> @brief This subroutine interpolate energies at k+q for the double-grid q-points
+!!
+!! @param[in]       k                           k-points sampling
+!! @param[in]       q                           q-points sampling
+!! @param[in]       E                           KS energies
+!! @param[in]       iqbz                        q-point where energies are interpolated
+!!
+!! @param[out]      GKKP%E_dg_kpq(iqbz)%E_kpq   interpolated energies at k+q_dbgrid
+!! @param[out]      GKKP%E_dg_kpq(iqbz)%f_kpq   occupations at k+q_dbgrid
+!
+! headers
+!
+#include<y_memory.h>
+!
 subroutine En_k_plus_q_interpolate(k,q,E,iqbz)
  !
  use pars,                ONLY:pi,rZERO
  use electrons,           ONLY:levels,n_sp_pol,E_reset,spin_occ
  use R_lattice,           ONLY:bz_samp,bz_samp_reset
- use ELPH,                ONLY:elph_nb,ID_E_kpq,GKKP,FineGd_E_kpq_alloc,Efermi_DbGd
+ use ELPH,                ONLY:elph_bands,ID_E_kpq,GKKP,FineGd_E_kpq_alloc,Efermi_DbGd
  use interpolate,         ONLY:INTERPOLATION_BZ
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp), intent(in)   :: k,q
  type(levels) , intent(in)   :: E
@@ -92,8 +107,8 @@ subroutine En_k_plus_q_interpolate(k,q,E,iqbz)
  do ik=1,k%nibz
   do iq_fine=q%FGbz%k_range(iqbz,1)+1,q%FGbz%k_range(iqbz,2),1
     iq_idx=iq_fine-q%FGbz%k_range(iqbz,1)
-    GKKP%E_dg_kpq(iqbz)%E_kpq(:elph_nb,ik,:,iq_idx)=E_kpq%E(:elph_nb,ic,:)
-    GKKP%E_dg_kpq(iqbz)%f_kpq(:elph_nb,ik,:,iq_idx)=E_kpq%f(:elph_nb,ic,:)
+    GKKP%E_dg_kpq(iqbz)%E_kpq(elph_bands(1):elph_bands(2),ik,:,iq_idx)=E_kpq%E(elph_bands(1):elph_bands(2),ic,:)
+    GKKP%E_dg_kpq(iqbz)%f_kpq(elph_bands(1):elph_bands(2),ik,:,iq_idx)=E_kpq%f(elph_bands(1):elph_bands(2),ic,:)
     ic=ic+1
    enddo
  enddo
diff --git a/src/el-ph/Eval_Efermi_DbGd.F b/src/el-ph/Eval_Efermi_DbGd.F
index b2e5ee2fb0..39bb7cb0cb 100644
--- a/src/el-ph/Eval_Efermi_DbGd.F
+++ b/src/el-ph/Eval_Efermi_DbGd.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AC
 !
+! headers
+!
+#include<y_memory.h>
+!
 ! This subroutine calculate the occupation for the double-grid
 ! energy levels at en(k+q) using the Fermi level calculate
 ! on the original grid
@@ -19,8 +23,9 @@ function Eval_Efermi_DbGd(k,q,en)
  use interpolate,   ONLY:INTERPOLATION_BZ
  use interfaces,    ONLY:OCCUPATIONS_Fermi
  use com,           ONLY:msg
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp), intent(in)  :: q,k
  type(levels) :: en
diff --git a/src/el-ph/get_ph_E_DbGd.F b/src/el-ph/get_ph_E_DbGd.F
index 38d9cedd68..bc02b0032a 100644
--- a/src/el-ph/get_ph_E_DbGd.F
+++ b/src/el-ph/get_ph_E_DbGd.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AC
 !
+! headers
+!
+#include<y_memory.h>
+!
 !> @brief Return phonon energy in the double-grid
 !
 ! @param[in]       q                   momenutm sampling
@@ -19,8 +23,9 @@ function get_ph_E_DbGd(il,iq_fg,iq_bz,q)
  use pars,          ONLY:SP
  use R_lattice,     ONLY:bz_samp
  use ELPH,          ONLY:PH_freqs,PH_freqs_sq
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer,       intent(in)  :: il,iq_bz,iq_fg
  type(bz_samp), intent(in)  :: q
diff --git a/src/exc-ph/.objects b/src/exc-ph/.objects
new file mode 100644
index 0000000000..97113df687
--- /dev/null
+++ b/src/exc-ph/.objects
@@ -0,0 +1,10 @@
+#if defined _ELPH && !defined _YPP_ELPH
+EQ_objs = EXCPH_gkkp_eval.o EXCPH_gkkp_driver.o EXCPH_load_L.o EXCPH_lifetime.o EXC_dipole.o \
+	  EXCPH_eval_sat.o EXCPH_optics.o EXCPH_X_phass.o Satellite_Simple.o Satellite_DbGrid.o \
+	  EXCPH_double_grid_setup.o EXCPH_eval_renorm.o EXCPH_ave_sat_DbGrid.o EXC_occupations.o \
+          EXCPH_X_phass_from_Hpert.o ELEC_WF_phases.o
+#endif
+#if defined _YPP_ELPH
+EQ_objs = EXC_occupations.o 
+#endif
+objs = ${EQ_objs} 
diff --git a/src/exc-ph/DOUBLE_project.dep b/src/exc-ph/DOUBLE_project.dep
new file mode 100644
index 0000000000..cb8f22c982
--- /dev/null
+++ b/src/exc-ph/DOUBLE_project.dep
@@ -0,0 +1,17 @@
+ ELEC_WF_phases.o
+ EXCPH_X_phass.o
+ EXCPH_X_phass_from_Hpert.o
+ EXCPH_ave_sat_DbGrid.o
+ EXCPH_double_grid_setup.o
+ EXCPH_eval_renorm.o
+ EXCPH_eval_sat.o
+ EXCPH_gkkp_driver.o
+ EXCPH_gkkp_eval.o
+ EXCPH_lifetime.o
+ EXCPH_load_L.o
+ EXCPH_optics.o
+ EXC_dipole.o
+ EXC_occupations.o
+ Satellite_DbGrid.o
+ Satellite_Simple.o
+
diff --git a/src/exc-ph/ELEC_WF_phases.F b/src/exc-ph/ELEC_WF_phases.F
new file mode 100644
index 0000000000..0a0d87f590
--- /dev/null
+++ b/src/exc-ph/ELEC_WF_phases.F
@@ -0,0 +1,91 @@
+!
+!        Copyright (C) 2000-2021 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): DS FP
+!
+! headers
+!
+#include<y_memory.h>
+!
+! This file is distributed under the terms of the GNU
+! General Public License. You can redistribute it and/or
+! modify it under the terms of the GNU General Public
+! License as published by the Free Software Foundation;
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will
+! be useful, but WITHOUT ANY WARRANTY; without even the
+! implied warranty of MERCHANTABILITY or FITNESS FOR A
+! PARTICULAR PURPOSE.  See the GNU General Public License
+! for more details.
+!
+! You should have received a copy of the GNU General Public
+! License along with this program; if not, write to the Free
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston,
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+!! Code taken from branch phys-excitons
+!! file ypp/excitons/exciton_symmetries.f90
+!
+subroutine ELEC_WF_phases(E,k,l_min_mem,ID_phases)
+ !
+ use D_lattice,      ONLY:nsym
+ use R_lattice,      ONLY:bz_samp
+ use wave_func,      ONLY:io_WF_phases,WF_phases_b_map,WF_phases
+ use electrons,      ONLY:levels,n_sp_pol 
+ use BS,             ONLY:BS_bands
+ use IO_m,           ONLY:RD,OP_RD,DUMP,REP
+ use IO_int,         ONLY:io_control
+ !
+ use y_memory_alloc
+ !
+ implicit none
+ !
+ type(levels),  intent(in) :: E
+ type(bz_samp), intent(in) :: k
+ integer,       intent(out):: ID_phases
+ logical,       intent(in) :: l_min_mem
+ !
+ ! Work Space
+ !
+ integer :: nsz(5)
+ integer :: io_err
+ integer :: i_sp_pol,ik,il1
+ !
+ ! Electronic part of the symmetries
+ !
+ call k_build_up_BZ_tables(k)
+ call k_ibz2bz(k,'iku',.true.)
+ call k_small_group(k)
+ !
+ call WF_phase_matrices(E,k,BS_bands,(/1,k%nibz/))
+ !
+ YAMBO_ALLOC(WF_phases_b_map,(BS_bands(1):BS_bands(2),k%nibz,n_sp_pol,2))
+ !
+ call io_control(ACTION=OP_RD,COM=REP,SEC=(/1,2/),MODE=DUMP,ID=ID_phases)
+ io_err=io_WF_phases(BS_bands,0,0,0,ID_phases,(/0,0,0,0,0/))
+ !
+ if (l_min_mem) return
+ !
+ nsz(1:2)=maxval(WF_phases_b_map(:,:,:,1))
+ nsz(3)  =maxval(WF_phases_b_map(:,:,:,2))
+ nsz(4)  =nsym
+ nsz(5)  =k%nbz
+ YAMBO_ALLOC(WF_phases,(nsz(1),nsz(2),nsz(3),nsz(4),nsz(5)))
+ !
+ do i_sp_pol=1,n_sp_pol
+   do ik=1,k%nibz
+     nsz(1:2)=maxval(WF_phases_b_map(:,ik,i_sp_pol,1))
+     nsz(3)  =maxval(WF_phases_b_map(:,ik,i_sp_pol,2))
+     nsz(4)  =nsym
+     nsz(5)  =k%nstar(ik)
+     il1=1
+     if(ik>0) il1= sum(k%nstar(:ik-1))+1
+     call io_control(ACTION=RD,COM=REP,SEC=(/3/),MODE=DUMP,ID=ID_phases)
+     io_err=io_WF_phases( BS_bands,ik,0,i_sp_pol,ID_phases,nsz, &
+&    WF_phases(:nsz(1),:nsz(2),:nsz(3),:,il1:il1+nsz(5)-1) )
+   enddo
+ enddo
+ !
+end subroutine
diff --git a/src/exc-ph/ELPH_project.dep b/src/exc-ph/ELPH_project.dep
new file mode 100644
index 0000000000..cb8f22c982
--- /dev/null
+++ b/src/exc-ph/ELPH_project.dep
@@ -0,0 +1,17 @@
+ ELEC_WF_phases.o
+ EXCPH_X_phass.o
+ EXCPH_X_phass_from_Hpert.o
+ EXCPH_ave_sat_DbGrid.o
+ EXCPH_double_grid_setup.o
+ EXCPH_eval_renorm.o
+ EXCPH_eval_sat.o
+ EXCPH_gkkp_driver.o
+ EXCPH_gkkp_eval.o
+ EXCPH_lifetime.o
+ EXCPH_load_L.o
+ EXCPH_optics.o
+ EXC_dipole.o
+ EXC_occupations.o
+ Satellite_DbGrid.o
+ Satellite_Simple.o
+
diff --git a/src/exc-ph/EXCPH_X_phass.F b/src/exc-ph/EXCPH_X_phass.F
new file mode 100644
index 0000000000..a586ca3199
--- /dev/null
+++ b/src/exc-ph/EXCPH_X_phass.F
@@ -0,0 +1,418 @@
+!
+!        Copyright (C) 2000-2021 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): AC
+!
+! headers
+!
+#include<y_memory.h>
+!
+!> @brief Calculate X(w) including ph-assisted transitions
+!
+! @param[in]       wv                  frequencies range
+! @param[in]       EXCPH_gkkp          read from file
+! @param[in]       Bose_Temp           Bose temperature for phonons
+! @param[in]       Boltz_Temp          Boltz temperature for excitons
+!
+! @param[out]      Epsilon_ii          Phonon-assisted dielectric constant
+! @param[out]      PL_ii               Phonon-assisted luminescence
+!
+subroutine EXCPH_X_phass(wv,q,Epsilon_ii,PL_ii)
+ !
+ use frequency,     ONLY:w_samp
+ use vec_operate,   ONLY:c2a
+ use pars,          ONLY:SP,cZERO,rZERO,schlen
+ use units,         ONLY:HA2EV,HA2CMm1
+ use IO_m,          ONLY:manage_action,NONE,OP_APP_CL,OP_RD_CL,DUMP,OP_RD,RD_CL_IF_END,REP,VERIFY
+ use IO_int,        ONLY:io_control
+ use EXCPH,         ONLY:EXCPH_states,EXCPH_sum,BS_Sat_WEIGHT,EXCPH_Renorm,BS_all_E,BS_R,EXC_occ,&
+&                        BS_Sat_WEIGHT_PH_abs,BS_E_in,EXCPH_Renorm_PH_abs,Sat_free,min_EXC_E,min_EXC_E_DbGd, &
+&                        EXCPH_Renorm_PL,EXCPH_Renorm_PL_PH_abs,Renorm_alloc,Renorm_free,l_DbGd_WEIGHTs
+ use ELPH,          ONLY:use_PH_DbGd,elph_branches,FAN_deltaE_treshold,Q_io_map,PH_freqs_sq
+ use parallel_m,    ONLY:PP_indexes,myid,PP_indexes_reset
+ use parallel_int,  ONLY:PP_redux_wait,PARALLEL_index
+ use R_lattice,     ONLY:bz_samp,bz_samp_reset
+ use D_lattice,     ONLY:Boltz_Temp
+ use functions,     ONLY:boltzman_f,bose_f
+ use LIVE_t,        ONLY:live_timing
+ use stderr,        ONLY:intc
+ use electrons,     ONLY:nel_cond
+ use parser_m,      ONLY:parser
+ use com,           ONLY:msg,of_open_close
+ !
+ use y_memory_alloc
+ !
+ implicit none
+ !
+ type(bz_samp), intent(in)  :: q
+ type(w_samp),  intent(in)  :: wv
+ complex(SP),   intent(out) :: Epsilon_ii(wv%n_freqs,3),PL_ii(wv%n_freqs,3)
+ !
+ ! Work space
+ !
+ complex(SP)   :: pole,W,Sat_alpha_abs,Sat_alpha_emiss
+ real(SP)      :: min_E,exc_E,PH_E,occ_fact,v(3)
+ integer       :: i_alpha,i_beta,il,iq_bz,iw,io_err,ID_exc_gkkp,ID_gkkp,min_pos(1)
+ integer       :: iq_db,iq_ibz,IO_ACT
+ integer       :: elph_branches_save(2)
+ type(PP_indexes)::px
+ logical       :: l_prt_pl_q_res
+ complex(SP), allocatable :: PL_ii_q(:,:,:)
+ character(schlen) :: pl_file_name
+ integer, parameter  :: n_output=7
+ real(SP)            :: output_data(n_output)
+ character(schlen)   :: headers(n_output)
+ !
+ real(SP), allocatable  :: max_Sat_weight(:)
+ integer,  allocatable  :: max_Sat_pos(:,:)
+ real(SP), allocatable  :: max_Sat_weight_PH_abs(:)
+ integer,  allocatable  :: max_Sat_pos_PH_abs(:,:)
+ !
+ integer, external :: io_EXCPH_gkkp,io_ELPH
+ !
+ min_pos  =minloc(BS_all_E(1,:))
+ min_EXC_E=minval(BS_all_E(1,:))
+ !
+ call msg('rs','Minimum excitonic energy : ',min_EXC_E*HA2EV,' eV, at q-point :'//intc(min_pos(1)))
+ call msg('rs','FAN treshold: ',FAN_deltaE_treshold*HA2CMm1,' cm-1')
+ !
+ ! Load the Exciton-phonon elements and phonon energies
+ !
+ call io_control(ACTION=OP_RD,COM=REP,SEC=(/1/),MODE=DUMP,ID=ID_exc_gkkp)
+ io_err=io_EXCPH_gkkp(ID_exc_gkkp)
+ if(io_err/=0) call error('excph_gkkp databases not found!')
+ !
+ elph_branches_save=elph_branches
+ call io_control(ACTION=OP_RD,COM=REP,SEC=(/1/),MODE=DUMP,ID=ID_gkkp)
+ io_err=io_ELPH(ID_gkkp,'gkkp_expanded no_matrix_elements')
+ if(io_err/=0) call error('gkkp_expanded databases not found!')
+ elph_branches=elph_branches_save
+ !
+ call parser('PLqres',l_prt_pl_q_res)
+ if(l_prt_pl_q_res) then
+   YAMBO_ALLOC(PL_ii_q,(wv%n_freqs,3,q%nibz))
+   PL_ii_q=cZERO
+ endif
+ !
+ ! Renormalization factors
+ !
+ call Renorm_alloc()
+ !
+ Epsilon_ii            =cZERO
+ PL_ii                 =cZERO
+ EXCPH_Renorm          =rZERO
+ EXCPH_Renorm_PH_abs   =rZERO
+ EXCPH_Renorm_PL       =rZERO
+ EXCPH_Renorm_PL_PH_abs=rZERO
+ !
+ ! Max and min satellites Energies
+ ! 
+ YAMBO_ALLOC(max_Sat_weight,(EXCPH_states(1):EXCPH_states(2)))
+ YAMBO_ALLOC(max_Sat_pos,(EXCPH_states(1):EXCPH_states(2),3))
+ YAMBO_ALLOC(max_Sat_weight_PH_abs,(EXCPH_states(1):EXCPH_states(2)))
+ YAMBO_ALLOC(max_Sat_pos_PH_abs,(EXCPH_states(1):EXCPH_states(2),3))
+ !
+ max_Sat_weight=rZERO
+ max_Sat_pos   =0
+ max_Sat_weight_PH_abs=rZERO
+ max_Sat_pos_PH_abs   =0
+ !
+ if(.not.allocated(q%k_table)) call k_build_up_BZ_tables(q)
+ !
+ call PP_indexes_reset(px)
+ call PARALLEL_index(px,(/wv%n_freqs/)) 
+ ! 
+ call live_timing('Satellites',px%n_of_elements(myid+1)*q%nibz)
+ !
+ do iq_ibz=1,q%nibz
+   !
+   iq_bz=q%k_table(iq_ibz,1)
+   !
+   IO_ACT=manage_action(RD_CL_IF_END,iq_bz,1,q%nbz)
+   !
+   call io_control(ACTION=IO_ACT,COM=REP,SEC=(/1+iq_bz/),MODE=DUMP,ID=ID_exc_gkkp)
+   io_err=io_EXCPH_gkkp(ID_exc_gkkp)
+   !
+   iq_db=Q_io_map(iq_bz)
+   call io_control(ACTION=IO_ACT,COM=REP,SEC=(/1+iq_db/),MODE=DUMP,ID=ID_gkkp)
+   io_err=io_ELPH(ID_gkkp,'gkkp_expanded no_matrix_elements')
+   !
+   ! Add the contribution of q-point transition to the renormalization factors
+   ! for all excionts i_alpha
+   !
+   call EXCPH_eval_sat(iq_bz,q)
+   !
+   if(use_PH_DbGd.and.l_DbGd_WEIGHTs)  call EXCPH_ave_sat_DbGrid(iq_bz,q)
+   !
+   call Find_Max_Sat()
+   !
+   call EXCPH_eval_renorm(iq_bz,q,EXCPH_Renorm,EXCPH_Renorm_PH_abs,EXCPH_Renorm_PL,EXCPH_Renorm_PL_PH_abs)
+   !
+   do iw=1,wv%n_freqs
+     !
+     if (.not.px%element_1D(iw)) cycle
+     !
+     W=wv%p(iw)
+     !
+     do i_alpha=EXCPH_states(1),EXCPH_states(2)
+       !
+       ! Satellites for the alpha exciton
+       !
+       do il=elph_branches(1),elph_branches(2)
+         !
+         iq_db=Q_io_map(iq_bz)
+         ph_E=sqrt(abs(PH_freqs_sq(iq_db,il)))
+         !
+         do i_beta=EXCPH_sum(1),EXCPH_sum(2)
+           !
+           if(.not.use_PH_DbGd) then
+             !      
+             ! Calcualte the Satellite on the course q-grid
+             !
+             call Satellite_Simple(iq_bz,q,il,i_alpha,i_beta,W,Sat_alpha_abs,Sat_alpha_emiss)
+             !
+           else
+             !      
+             ! Use the double-grid to average denominators
+             !
+             call Satellite_DbGrid(iq_bz,q,il,i_alpha,i_beta,W,Sat_alpha_abs,Sat_alpha_emiss)
+             !
+           endif
+           !
+           ! Add the satellites to the dielectric constant and luminescence
+           !
+           Epsilon_ii(iw,1)=Epsilon_ii(iw,1)+Sat_alpha_abs*BS_R(i_alpha)*q%weights(iq_ibz) !/real(q%nbz,SP)
+           Epsilon_ii(iw,3)=Epsilon_ii(iw,3)+Sat_alpha_abs*BS_R(i_alpha)*q%weights(iq_ibz) !/real(q%nbz,SP)
+           !
+           PL_ii(iw,1)     =PL_ii(iw,1)+(W+2.*ph_E)**2*Sat_alpha_emiss*BS_R(i_alpha)*q%weights(iq_ibz) !/real(q%nbz,SP)
+           PL_ii(iw,3)     =PL_ii(iw,3)+(W+2.*ph_E)**2*Sat_alpha_emiss*BS_R(i_alpha)*q%weights(iq_ibz) !/real(q%nbz,SP)
+           !
+           if(l_prt_pl_q_res) then
+              PL_ii_q(iw,1,iq_ibz)     =PL_ii_q(iw,1,iq_ibz)+(W+2.*ph_E)**2*Sat_alpha_emiss*BS_R(i_alpha)*q%weights(iq_ibz) !/real(q%nbz,SP)
+              PL_ii_q(iw,3,iq_ibz)     =PL_ii_q(iw,3,iq_ibz)+(W+2.*ph_E)**2*Sat_alpha_emiss*BS_R(i_alpha)*q%weights(iq_ibz) !/real(q%nbz,SP)
+           endif
+           !
+         enddo
+         !
+       enddo
+       !
+     enddo   
+     !
+     call live_timing(steps=1)
+     !
+   enddo   
+   !
+ enddo   
+ !
+ call live_timing( )
+ !
+ call Sat_report()
+ !
+ call msg('sr','Renormalization factors at finite temperature [Absorption]:')
+ call msg('sr','        [ph-emission]   [ph-absorption]')
+ do i_alpha=EXCPH_states(1),EXCPH_states(2)
+   call msg('sr','Exciton '//trim(intc(i_alpha))//': ',(/EXCPH_Renorm(i_alpha),EXCPH_Renorm_PH_abs(i_alpha)/))
+ enddo
+ call msg('sr','Renormalization factors at finite temperature [Emission]:')
+ call msg('sr','        [ph-emission]   [ph-absorption]')
+ do i_alpha=EXCPH_states(1),EXCPH_states(2)
+   call msg('sr','Exciton '//trim(intc(i_alpha))//': ',(/EXCPH_Renorm_PL(i_alpha),EXCPH_Renorm_PL_PH_abs(i_alpha)/))
+ enddo
+ !
+ ! The term without satellites
+ !
+ min_E=min_EXC_E
+ if(use_PH_DbGd) min_E=min_EXC_E_DbGd
+ !
+ call live_timing('Epsilon',wv%n_freqs)
+ do iw=1,wv%n_freqs
+   !
+   if (.not.px%element_1D(iw)) cycle   
+   !
+   W=wv%p(iw)
+   !
+   do i_alpha=EXCPH_states(1),EXCPH_states(2)
+     !
+     pole =BS_E_in(i_alpha)
+     Epsilon_ii(iw,1:2)=Epsilon_ii(iw,1:2)+(1._SP-EXCPH_Renorm(i_alpha)-EXCPH_Renorm_PH_abs(i_alpha))*BS_R(i_alpha)/(W-pole)
+     !
+     ! [CA] The pole of luminescence should be the pole of BS_E_in 
+     !      but I think this is not consistent with the Boltzman weight, I use always Lout.
+     ! [FP] Luminescence in RS (detailed balance) assumption requires Lin=Lout.
+     !      Maybe put a check/logical enforcing this for PL? 
+     !
+     exc_E=BS_E_in(i_alpha)-min_E
+     pole =BS_E_in(i_alpha)          
+     !
+     if(nel_cond/=rZERO) then
+       occ_fact=EXC_occ(i_alpha,1)
+     else
+       occ_fact=boltzman_f(exc_E)
+     endif
+     !
+     PL_ii(iw,1:2)     =PL_ii(iw,1:2)  + W**3*(1._SP-EXCPH_Renorm_PL(i_alpha)-EXCPH_Renorm_PL_PH_abs(i_alpha))*BS_R(i_alpha) &
+&                                      /(W-pole)*occ_fact
+     !
+   enddo
+   !
+   call live_timing(steps=1)
+   !
+ enddo
+ !
+ call live_timing( )
+ !
+ !
+ ! Eps(w)=1._SP - X(w) in agreement with definition in src/bse/
+ !
+ !
+ do iw=1,wv%n_freqs
+   if (.not.px%element_1D(iw)) cycle   
+   W=wv%p(iw)
+   Epsilon_ii(iw,:)=1._SP-Epsilon_ii(iw,:)
+   PL_ii(iw,:)=-W*PL_ii(iw,:)
+   if(l_prt_pl_q_res) then
+     PL_ii_q(iw,:,:)=-W*PL_ii_q(iw,:,:)
+   endif
+   !
+ enddo
+ !
+ if(l_prt_pl_q_res) then
+   call k_ibz2bz(q,'a',.true.)
+   ! Write on disk
+   do iq_ibz=1,q%nibz
+     !
+!     iq_ibz = q%sstar(iq_bz,1)
+     if(.not.use_PH_DbGd.and.abs(BS_all_E(EXCPH_sum(1),iq_ibz)-min_EXC_E)>3.0*Boltz_Temp)  then
+       call msg('s','Too weak signal at q-point '//intc(iq_bz)//' skipped ')
+       cycle
+     endif
+     !
+     pl_file_name ='pl_bse_resolved_q'//intc(iq_ibz)
+!     iq_ibz = q%sstar(iq_bz,1)
+     !
+     call of_open_close(pl_file_name,'ot')
+     !
+     call msg('o pl_bse','# EXCITON PHONON')    
+     call msg('o pl_bse','# Exciton_states    :',EXCPH_states,INDENT=0)    
+     call msg('o pl_bse','# EXciton sum       :',EXCPH_sum,INDENT=0)    
+     call msg('o pl_bse','#')
+     headers(1)='E[eV]'
+     headers(2:n_output)=(/'Im(pl_ph )','Re(pl_ph )','Im(pl_nos)','Re(pl_nos)','Re(pl_sat)','Im(pl_sat)'/)
+     call msg('o pl_bse','#',headers,INDENT=0,USE_TABS=.TRUE.)
+     call msg('o pl_bse','#')
+     call msg('o pl_bse','# iq_ibz = '//intc(iq_ibz))
+     call msg('o pl_bse','# q_pt_bz(rlu)   = ',q%pt(iq_ibz,:))
+!     call msg('o pl_bse','# q_pt_bz(rlu)   = ',q%ptbz(iq_bz,:))
+     !
+     ! Write output on file
+     !
+     do iw=1,wv%n_freqs
+       !
+       ! Luminescence
+       !
+       output_data=(/real(wv%p(iw))*HA2EV,aimag(PL_ii_q(iw,1,iq_ibz)),real(PL_ii_q(iw,1,iq_ibz)),          &
+&               aimag(PL_ii_q(iw,2,iq_ibz)),real(PL_ii_q(iw,2,iq_ibz)),aimag(PL_ii_q(iw,3,iq_ibz)),real(PL_ii_q(iw,3,iq_ibz))/)
+       call msg('o pl_bse','',output_data,INDENT=-2,USE_TABS=.true.)
+       !
+     enddo
+     !
+     ! Close files
+     !
+     call of_open_close(pl_file_name)
+   enddo
+   !
+ endif
+ ! 
+ !
+ call PP_redux_wait(Epsilon_ii)
+ call PP_redux_wait(PL_ii) 
+ if(l_prt_pl_q_res) call PP_redux_wait(PL_ii_q) 
+ !
+ ! Free
+ !
+ call PP_indexes_reset(px)
+ call Sat_free()
+ call Renorm_free()
+ !
+ contains
+ !
+ subroutine Find_Max_Sat()
+   implicit none
+   !
+   ! Work Space
+   !
+   integer  :: i1
+   real(SP) :: max_Sat(EXCPH_states(1):EXCPH_states(2))
+   real(SP) :: max_Sat_PH_abs(EXCPH_states(1):EXCPH_states(2))
+   !      
+   ! Find the maximum weight of the satellites
+   ! 
+   do i1=EXCPH_states(1),EXCPH_states(2)
+     max_Sat(i1)       =maxval(abs(BS_Sat_WEIGHT(:,:,i1)))
+     max_Sat_PH_abs(i1)=maxval(abs(BS_Sat_WEIGHT_PH_abs(:,:,i1)))
+   enddo
+   !
+   do i1=EXCPH_states(1),EXCPH_states(2)
+     if(max_Sat_weight(i1)<max_Sat(i1)) then
+        max_Sat_weight(i1)=max_Sat(i1)
+        max_Sat_pos(i1,1) =iq_bz
+        max_Sat_pos(i1,2:3) =maxloc(abs(BS_Sat_WEIGHT(:,:,i1)))
+     endif
+     !
+     if(max_Sat_weight_PH_abs(i1)<max_Sat_PH_abs(i1)) then
+        max_Sat_weight_PH_abs(i1)=max_Sat_PH_abs(i1)
+        max_Sat_pos_PH_abs(i1,1) =iq_bz
+        max_Sat_pos_PH_abs(i1,2:3) =maxloc(abs(BS_Sat_WEIGHT_PH_abs(:,:,i1)))
+      endif
+    enddo
+    !
+ end subroutine Find_Max_Sat
+ !
+ subroutine Sat_report()
+   use stderr,        ONLY:intc
+   use units,         ONLY:HA2EV
+   use ELPH,          ONLY:ph_freqs_sq
+   implicit none
+   !
+   ! work space
+   !
+   character(schlen) :: message
+   real(SP)          :: ph_E
+   integer           :: iq_bz,i1,iq_ibz
+   !
+   call msg('sr',' Max satellite weight: ')
+   do i1=EXCPH_states(1),EXCPH_states(2)
+     iq_bz  = max_Sat_pos(i1,1) 
+     write(*,*) 'passo qui ',iq_bz
+     iq_db  =Q_io_map(iq_bz)
+     iq_ibz =q%sstar(iq_bz,1)
+     ph_E=sqrt(abs(ph_freqs_sq(iq_db,max_Sat_pos(i1,2))))
+     write(message,*) 'Exciton '//trim(intc(i1))//' max(W_sat)=',max_Sat_weight(i1)
+     call msg('sr',message)
+     write(message,*) '   for iq = '//intc(max_Sat_pos(i1,1))//' il = '//intc(max_Sat_pos(i1,2))//'  ib = ' &
+&                                     //intc(max_Sat_pos(i1,3)),' w(iq,il) = ',ph_E*HA2EV*1000._SP,'meV'
+     call msg('sr',message)
+     write(message,*) '   E(in) = ',real(BS_E_in(i1))*HA2EV, ' E(out)= ',BS_all_E(max_Sat_pos(i1,3),iq_ibz)*HA2EV
+     call msg('sr',message)
+   enddo
+   !
+   call msg('sr',' Max satellite weight (phonon absorption): ')
+   do i1=EXCPH_states(1),EXCPH_states(2)
+      iq_bz = max_Sat_pos_PH_abs(i1,1) !  q%k_table(max_Sat_pos(i1,1), 1)
+      iq_db =Q_io_map(iq_bz)
+      iq_ibz=q%sstar(iq_bz,1)
+      ph_E=sqrt(abs(ph_freqs_sq(iq_db,max_Sat_pos_PH_abs(i1,2))))
+      write(message,*) 'Exciton '//trim(intc(i1))//' max(W_sat)=',max_Sat_weight_PH_abs(i1)
+      call msg('sr',message)
+      write(message,*) '   for iq = '//intc(max_Sat_pos_PH_abs(i1,1))//' il = '//intc(max_Sat_pos_PH_abs(i1,2))//'  ib = ' &
+&                                     //intc(max_Sat_pos_PH_abs(i1,3)),' w(iq,il) = ',ph_E*HA2EV*1000._SP,'meV'
+     call msg('sr',message)
+     write(message,*) '   E(in) = ',real(BS_E_in(i1))*HA2EV, ' E(out)= ',BS_all_E(max_Sat_pos(i1,3),iq_ibz)*HA2EV
+     call msg('sr',message)
+     enddo
+    !
+ end subroutine Sat_report
+ !
+end subroutine EXCPH_X_phass
+
diff --git a/src/exc-ph/EXCPH_X_phass_from_Hpert.F b/src/exc-ph/EXCPH_X_phass_from_Hpert.F
new file mode 100644
index 0000000000..5cc7bfed4d
--- /dev/null
+++ b/src/exc-ph/EXCPH_X_phass_from_Hpert.F
@@ -0,0 +1,209 @@
+!
+!        Copyright (C) 2000-2021 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): FP AC
+!
+! headers
+!
+#include<y_memory.h>
+!
+!> @brief Calculate PL(w) satellites including ph-assisted transitions
+!
+! @param[in]       wv                  frequencies range
+! @param[in]       EXCPH_gkkp          read from file
+! @param[in]       Bose_Temp           Bose temperature for phonons
+! @param[in]       Boltz_Temp          Boltz temperature for excitons
+!
+! @param[out]      PL_ii               Satellites of phonon-assisted luminescence
+!
+! This is only for testing and so far only works for
+! - photon emission
+! - phonon emission
+! - satellites only (no main peak renormalisation)
+! - no double grid
+! 
+subroutine EXCPH_X_phass_from_Hpert(wv,q,PL_ii)
+ !
+ use frequency,     ONLY:w_samp
+ use vec_operate,   ONLY:c2a
+ use pars,          ONLY:pi,SP,cZERO,rZERO,schlen
+ use units,         ONLY:HA2EV,HA2CMm1
+ use IO_m,          ONLY:manage_action,NONE,OP_APP_CL,OP_RD_CL,DUMP,OP_RD,RD_CL_IF_END,REP,VERIFY
+ use IO_int,        ONLY:io_control
+ use EXCPH,         ONLY:EXCPH_states,EXCPH_sum,BS_all_E,BS_R,EXC_occ,EXCPH_Gkkp,&
+&                        BS_E_in,Sat_free,min_EXC_E,BS_R_left,BS_Sat_E_PH_abs,BS_Sat_E
+ use ELPH,          ONLY:ph_modes,elph_branches,FAN_deltaE_treshold,Q_io_map,PH_freqs_sq
+ use parallel_m,    ONLY:PP_indexes,myid,PP_indexes_reset
+ use parallel_int,  ONLY:PP_redux_wait,PARALLEL_index
+ use R_lattice,     ONLY:bz_samp,bz_samp_reset,d3k_factor,bare_qpg
+ use functions,     ONLY:boltzman_f,bose_f
+ use LIVE_t,        ONLY:live_timing
+ use stderr,        ONLY:intc
+ use electrons,     ONLY:spin_occ
+ use parser_m,      ONLY:parser
+ use com,           ONLY:msg,of_open_close
+ !
+ use y_memory_alloc
+ !
+ implicit none
+ !
+ type(bz_samp), intent(in)  :: q
+ type(w_samp),  intent(in)  :: wv
+ complex(SP),   intent(out) :: PL_ii(wv%n_freqs,3)
+ !
+ ! Work space
+ !
+ complex(SP)   :: pole,W,Sat_emission,Sat_absorption,EXCPH_gaux,BS_aux
+ real(SP)      :: min_E,exc_E,ph_E,bose_factor,boltzm_factor,E_alpha,E_beta
+ integer       :: i_alpha,i_beta,il,iq_bz,iw,io_err,ID_exc_gkkp,ID_gkkp,min_pos(1)
+ integer       :: iq_db,iq_ibz,IO_ACT
+ integer       :: elph_branches_save(2)
+ type(PP_indexes)::px
+ complex(SP),allocatable :: BS_Sat_AMPLITUDE(:,:), BS_Sat_AMPLITUDE_PH_abs(:,:)
+ real(SP),allocatable :: BS_Sat_WEIGHT_sum(:,:), BS_Sat_WEIGHT_PH_abs_sum(:,:)
+ logical :: l_no_matrix_elements
+ !
+ integer, external :: io_EXCPH_gkkp,io_ELPH
+ !
+ min_pos  =minloc(BS_all_E(1,:))
+ min_EXC_E=minval(BS_all_E(1,:))
+ !
+ call msg('rs','Minimum excitonic energy : ',min_EXC_E*HA2EV,' eV, at q-point :'//intc(min_pos(1)))
+ call msg('rs','FAN treshold: ',FAN_deltaE_treshold*HA2CMm1,' cm-1')
+ !
+ ! Load the Exciton-phonon elements and phonon energies
+ !
+ call io_control(ACTION=OP_RD,COM=REP,SEC=(/1/),MODE=DUMP,ID=ID_exc_gkkp)
+ io_err=io_EXCPH_gkkp(ID_exc_gkkp)
+ if(io_err/=0) call error('excph_gkkp databases not found!')
+ !
+ elph_branches_save=elph_branches
+ call io_control(ACTION=OP_RD,COM=REP,SEC=(/1/),MODE=DUMP,ID=ID_gkkp)
+ io_err=io_ELPH(ID_gkkp,'gkkp_expanded no_matrix_elements')
+ if(io_err/=0) call error('gkkp_expanded databases not found!')
+ elph_branches=elph_branches_save
+ !
+ ! Allocate modified workspace variables
+ !
+ YAMBO_ALLOC(BS_Sat_WEIGHT_sum,(ph_modes,EXCPH_sum(1):EXCPH_sum(2)))
+ YAMBO_ALLOC(BS_Sat_WEIGHT_PH_abs_sum,(ph_modes,EXCPH_sum(1):EXCPH_sum(2)))
+ YAMBO_ALLOC(BS_Sat_AMPLITUDE,(ph_modes,EXCPH_sum(1):EXCPH_sum(2)))
+ YAMBO_ALLOC(BS_Sat_AMPLITUDE_PH_abs,(ph_modes,EXCPH_sum(1):EXCPH_sum(2)))
+ !
+ BS_Sat_WEIGHT_sum            =rZERO 
+ BS_Sat_WEIGHT_PH_abs_sum     =rZERO 
+ BS_Sat_AMPLITUDE             =cZERO 
+ BS_Sat_AMPLITUDE_PH_abs      =cZERO 
+ PL_ii                        =cZERO
+ !
+ call PP_indexes_reset(px)
+ call PARALLEL_index(px,(/wv%n_freqs/)) 
+ ! 
+ call live_timing('Satellites',px%n_of_elements(myid+1)*q%nbz)
+ !
+ do iq_bz=1,q%nbz
+   !
+   IO_ACT=manage_action(RD_CL_IF_END,iq_bz,1,q%nbz)
+   !
+   call io_control(ACTION=IO_ACT,COM=REP,SEC=(/1+iq_bz/),MODE=DUMP,ID=ID_exc_gkkp)
+   io_err=io_EXCPH_gkkp(ID_exc_gkkp)
+   !
+   iq_db=Q_io_map(iq_bz)
+   call io_control(ACTION=IO_ACT,COM=REP,SEC=(/1+iq_db/),MODE=DUMP,ID=ID_gkkp)
+   io_err=io_ELPH(ID_gkkp,'gkkp_expanded no_matrix_elements')
+   !
+   ! Add the contribution of q-point transition to the renormalization factors
+   ! for all excionts i_alpha
+   !
+   do iw=1,wv%n_freqs
+     !
+     if (.not.px%element_1D(iw)) cycle
+     !
+     W=wv%p(iw) 
+     !
+     Sat_emission                 =cZERO
+     Sat_absorption               =cZERO
+     !
+     do il=elph_branches(1),elph_branches(2) 
+       !  
+       do i_beta=EXCPH_sum(1),EXCPH_sum(2)
+         !
+         do i_alpha=EXCPH_states(1),EXCPH_states(2)
+           !
+           iq_ibz=q%sstar(iq_bz,1)
+           !
+           iq_db=Q_io_map(iq_bz)
+           ph_E=sqrt(abs(PH_freqs_sq(iq_db,il)))
+           !
+           if(ph_E<FAN_deltaE_treshold) cycle
+           !
+           bose_factor=bose_f(ph_E)
+           boltzm_factor=boltzman_f(BS_all_E(i_beta,iq_ibz)-min_EXC_E)
+           !
+           E_alpha=BS_E_in(i_alpha)
+           E_beta =BS_all_E(i_beta,iq_ibz)
+           !
+           BS_Sat_E       (il,i_beta,i_alpha)=E_beta-E_alpha+ph_E
+           BS_Sat_E_PH_abs(il,i_beta,i_alpha)=E_beta-E_alpha-ph_E
+           pole=E_beta-ph_E
+           !
+           EXCPH_gaux=EXCPH_Gkkp(il,i_beta,i_alpha)
+           BS_aux = BS_R_left(i_alpha)
+           if(l_no_matrix_elements) EXCPH_gaux=(1._SP,0._SP)
+           !
+           if(abs(BS_Sat_E(il,i_beta,i_alpha))>FAN_deltaE_treshold) then
+             BS_Sat_AMPLITUDE(il,i_beta)=BS_Sat_AMPLITUDE(il,i_beta)+&
+&                                        BS_aux*EXCPH_gaux/abs(BS_Sat_E(il,i_beta,i_alpha))
+           endif
+           !
+           if(abs(BS_Sat_E_PH_abs(il,i_beta,i_alpha))>FAN_deltaE_treshold) then
+             BS_Sat_AMPLITUDE_PH_abs(il,i_beta)=BS_Sat_AMPLITUDE_PH_abs(il,i_beta)+&
+&                                               BS_aux*EXCPH_gaux/abs(BS_Sat_E_PH_abs(il,i_beta,i_alpha))
+           endif
+           !
+         enddo ! i_alpha
+         !
+         BS_Sat_WEIGHT_sum(il,i_beta)        = abs(BS_Sat_AMPLITUDE(il,i_beta))**2._SP
+         BS_Sat_WEIGHT_PH_abs_sum(il,i_beta) = abs(BS_Sat_AMPLITUDE_PH_abs(il,i_beta))**2._SP
+         !
+         ! Phonon emission in luminescence [photon emission / phonon emission]
+         !
+         Sat_emission=Sat_emission+&
+&                     BS_Sat_WEIGHT_PH_abs_sum(il,i_beta)/(W-cmplx(pole))/pole*(1._SP+bose_factor)*boltzm_factor
+         !
+       enddo ! i_beta
+       !
+     enddo ! i_l  
+     !
+     PL_ii(iw,1)     =PL_ii(iw,1)+(W+2.*ph_E)**2*Sat_emission/real(q%nbz,SP)
+     PL_ii(iw,3)     =PL_ii(iw,3)+(W+2.*ph_E)**2*Sat_emission/real(q%nbz,SP)
+     !
+     call live_timing(steps=1)
+     !
+   enddo ! iw   
+   !
+ enddo ! i_q_bz
+ !
+ call live_timing( )
+ !
+ do iw=1,wv%n_freqs
+   if (.not.px%element_1D(iw)) cycle   
+   W=wv%p(iw)
+   PL_ii(iw,:)=-PL_ii(iw,:)*real(spin_occ,SP)/(2._SP*pi)**3*d3k_factor*4._SP*pi/bare_qpg(1,1)**2
+ enddo
+ !
+ call PP_redux_wait(PL_ii) 
+ !
+ ! Free
+ !
+ YAMBO_FREE(BS_Sat_WEIGHT_sum)
+ YAMBO_FREE(BS_Sat_WEIGHT_PH_abs_sum)
+ YAMBO_FREE(BS_Sat_AMPLITUDE)
+ YAMBO_FREE(BS_Sat_AMPLITUDE_PH_abs)
+ !
+ call PP_indexes_reset(px)
+ call Sat_free()
+ !
+end subroutine EXCPH_X_phass_from_Hpert
+
diff --git a/src/exc-ph/EXCPH_ave_sat_DbGrid.F b/src/exc-ph/EXCPH_ave_sat_DbGrid.F
new file mode 100644
index 0000000000..751d78a662
--- /dev/null
+++ b/src/exc-ph/EXCPH_ave_sat_DbGrid.F
@@ -0,0 +1,98 @@
+!
+!        Copyright (C) 2000-2021 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): AM FP
+!
+! headers
+!
+#include<y_memory.h>
+!
+subroutine EXCPH_ave_sat_DbGrid(iq_bz,q)
+ !
+ use pars,           ONLY:SP,cZERO,rZERO,cI
+ use R_lattice,      ONLY:bz_samp
+ use ELPH,           ONLY:elph_branches,FAN_deltaE_treshold,Q_io_map,PH_freqs,PH_freqs_sq
+ use EXCPH,          ONLY:BS_Sat_E,BS_Sat_E_PH_abs,BS_Sat_WEIGHT,BS_Sat_WEIGHT_PH_abs,EXCPH_deltaE_treshold, &
+&                         BS_E_in,EXCPH_Gkkp_sq,EXCPH_sum,EXCPH_states,L_damping,EXC_E_DbGd
+ !
+ use y_memory_alloc
+ !
+ implicit none
+ !
+ integer,       intent(in) :: iq_bz
+ type(bz_samp), intent(in) :: q
+ !
+ ! Work space
+ !
+ integer     :: i_alpha,i_beta,il,iq_db,iE_fg,iq_fg,nq_around
+ real(SP)    :: ph_E,weight_dbgd,weight_dbgd_PH_abs,E_alpha,E_beta
+ complex(SP) :: W_DG,W_DG_PH_abs
+ real(SP), external :: get_ph_E_DbGd
+ !
+ ! Calculate satellites weights and renormalization factors without double-grid
+ !
+ do i_alpha=EXCPH_states(1),EXCPH_states(2)
+   !
+   E_alpha =BS_E_in(i_alpha)
+   !
+   do i_beta=EXCPH_sum(1),EXCPH_sum(2)
+     !
+     do il=elph_branches(1),elph_branches(2)
+       !
+       W_DG=cZERO
+       W_DG_PH_abs=cZERO
+       !
+       weight_dbgd        =rZERO
+       weight_dbgd_PH_abs =rZERO
+       !
+       ! Renormalize satellite weight with double-grid
+       !
+       do iq_fg=q%FGbz%k_range(iq_bz,1),q%FGbz%k_range(iq_bz,2)
+         !
+         if(iq_fg==q%FGbz%k_range(iq_bz,1)) then
+           !      
+           iq_db=Q_io_map(iq_bz)
+           ph_E=sqrt(abs(PH_freqs_sq(iq_db,il)))
+           !
+           ! If there is the double-grid I read the gamma phonons from there
+           ! because they include the LO-TO splitting and the acustic sum rule
+           !
+           if(iq_bz==1) ph_E=PH_freqs%FG%E(il,1,1)
+           !
+         else
+           iE_fg=q%FGbz%E_map(iq_fg)                      ! Map w(q) from BZ to IBZ
+           ph_E =PH_freqs%FG%E(il,iE_fg,1)                ! w(q_db) phonon energy in the D-Grid
+         endif
+         !
+         ! Excitonic energies in the double-grid
+         !
+         E_beta  =EXC_E_DbGd(i_beta,iq_fg)
+         !
+         if(abs(E_beta-E_alpha+ph_E)>EXCPH_deltaE_treshold) then
+           W_DG        =W_DG + 1._SP/(E_beta-E_alpha+ph_E+cI*L_damping)
+           weight_dbgd =weight_dbgd+1._SP
+         endif
+         !  
+         if(abs(E_beta-E_alpha-ph_E)>EXCPH_deltaE_treshold) then
+           W_DG_PH_abs        =W_DG_PH_abs       + 1._SP/(E_beta-E_alpha-ph_E+cI*L_damping)
+           weight_dbgd_PH_abs =weight_dbgd_PH_abs+1._SP
+         endif
+         ! 
+       enddo
+       !
+       if(weight_dbgd/=rZERO) then
+         W_DG       =W_DG       *1._SP/weight_dbgd       *abs(BS_Sat_E(il,i_beta,i_alpha))
+         BS_Sat_WEIGHT(il,i_beta,i_alpha)       =BS_Sat_WEIGHT(il,i_beta,i_alpha)       *abs(W_DG)**2
+       endif
+       !
+       if(weight_dbgd_PH_abs/=rZERO) then
+         W_DG_PH_abs=W_DG_PH_abs*1._SP/weight_dbgd_PH_abs*abs(BS_Sat_E_PH_abs(il,i_beta,i_alpha))
+         BS_Sat_WEIGHT_PH_abs(il,i_beta,i_alpha)=BS_Sat_WEIGHT_PH_abs(il,i_beta,i_alpha)*abs(W_DG_PH_abs)**2
+       endif
+       !
+     enddo  ! il
+   enddo  ! i_beta
+ enddo ! i_alpha
+ !
+end subroutine
diff --git a/src/exc-ph/EXCPH_double_grid_setup.F b/src/exc-ph/EXCPH_double_grid_setup.F
new file mode 100644
index 0000000000..03a1e0dcb5
--- /dev/null
+++ b/src/exc-ph/EXCPH_double_grid_setup.F
@@ -0,0 +1,98 @@
+!
+!        Copyright (C) 2000-2021 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): AC
+!
+! headers
+!
+#include<y_memory.h>
+!
+!> @brief Calculate phonon-assisted absorption and emission (see PRL 122, 187401 )
+!
+subroutine EXCPH_double_grid_setup(q)
+ !       
+ use pars,           ONLY:schlen,SP,rZERO
+ use vec_operate,    ONLY:c2a
+ use units,          ONLY:HA2EV
+ use electrons,      ONLY:nel_cond
+ use R_lattice,      ONLY:bz_samp,bz_samp_reset
+ use IO_int,         ONLY:io_control
+ use IO_m,           ONLY:manage_action,OP_RD,REP,RD_CL_IF_END,DUMP
+ use ELPH,           ONLY:PH_freqs,use_PH_DbGd,ph_modes,E_kpq_sh_fact
+ use interpolate,    ONLY:INTERPOLATE_is_serial,INTERPOLATION_coefficients,INTERP_obj,INTERP_shell_factor,&
+&                        INTERPOLATE_is_quiet,INTERPOLATION_BZ
+ use EXCPH,          ONLY:min_EXC_E_DbGd,ID_EXC,ID_EXC_obj,EXC_E_DbGd,EXCPH_sum,BS_all_E, &
+&                         EXC_occ,ID_occ,ID_occ_obj,EXC_occ_DbGd,q_DbGd,min_pos_E_DbGd
+ use com,            ONLY:msg
+ !
+ use y_memory_alloc
+ !
+ implicit none
+ !
+ type(bz_samp), intent(in) :: q
+ !
+ ! Work space
+ !
+ integer  :: io_err,ID_DG
+ real(SP) :: v_tmp(3)
+ character(schlen) :: q_point
+ integer, external   :: io_Double_Grid
+ !
+ ! Fill the PH_freqs array
+ ! This structure will contains the phonon frequencies and the
+ ! double-grid phonon frequencies calculated with matdyn.x
+ !
+ PH_freqs%nk=q%nibz
+ PH_freqs%nb=ph_modes
+ YAMBO_ALLOC(PH_freqs%E,(q%nibz,ph_modes,1))
+ !
+ ! Check if double-grid is present and use it
+ ! load the double-grid phonon energies in PH_freqs
+ !
+ call io_control(ACTION=OP_RD,COM=REP,MODE=DUMP,SEC=(/1,2/),ID=ID_DG)
+ io_err=io_Double_Grid(PH_freqs,q,ID_DG,'phonons')
+ !
+ if(io_err/=0) return
+ !     
+ use_PH_DbGd=.TRUE.
+ !
+ call msg('sr','Using PH-double-grid ')
+ call msg('rs','Double grid number of q-points ',q%FGbz%N)
+ !
+ call msg('sr','EXC_E(k+q) calculated using a smooth Fourier interpolation')
+ INTERPOLATE_is_serial=.TRUE.
+ INTERPOLATE_is_quiet =.TRUE.
+ INTERP_shell_factor  =E_kpq_sh_fact
+ ID_EXC_obj=1
+ ID_EXC    =1
+ call INTERPOLATION_BZ_setup(q)
+ INTERP_obj(ID_EXC_obj)%what="EXC_E(Q)"
+ call INTERPOLATION_coefficients(R1D=BS_all_E(:EXCPH_sum(2),:),k=q,NK=q%nibz,ID=ID_EXC,ID_obj=ID_EXC_obj)
+ !
+ call bz_samp_reset(q_DbGd)
+ q_DbGd%nibz=q%FGbz%N
+ YAMBO_ALLOC(q_DbGd%pt,(q_DbGd%nibz,3))
+ q_DbGd%pt=q%FGbz%pt
+ !
+ YAMBO_ALLOC(EXC_E_DbGd,(EXCPH_sum(2),q_DbGd%nibz))
+ call INTERPOLATION_BZ(K=q_DbGd,NK=q_DbGd%nibz,ID=ID_EXC,R1D=EXC_E_DbGd)
+ !
+ min_EXC_E_DbGd=minval(EXC_E_DbGd(1,:))
+ min_pos_E_DbGd=minloc(EXC_E_DbGd(1,:))
+ call c2a(v_in=q_DbGd%pt(min_pos_E_DbGd(1),:),v_out=v_tmp,mode="ki2a")
+ call msg('rs','Minimum excitonic energy with DbGd: ',min_EXC_E_DbGd*HA2EV,'eV')
+ write(q_point,'(3f12.6,a)') v_tmp(:),' [rlu] '
+ call msg('rs','Minimum q-point in the DbGrid : '//q_point)
+ !
+ if(nel_cond/=rZERO) then
+   ID_occ_obj=2
+   ID_occ    =2
+   INTERP_obj(ID_occ_obj)%what="EXC_occ(Q)"
+   call INTERPOLATION_coefficients(R1D=EXC_occ(:EXCPH_sum(2),:),k=q,NK=q%nibz,ID=ID_occ,ID_obj=ID_occ_obj)
+   YAMBO_ALLOC(EXC_occ_DbGd,(EXCPH_sum(2),q_DbGd%nibz))
+   call INTERPOLATION_BZ(K=q_DbGd,NK=q_DbGd%nibz,ID=ID_occ,R1D=EXC_occ_DbGd)
+   call msg('rs','Excitonic occupation interpolated on the Double-Grid')
+ endif
+ !
+end subroutine
diff --git a/src/exc-ph/EXCPH_eval_renorm.F b/src/exc-ph/EXCPH_eval_renorm.F
new file mode 100644
index 0000000000..6884be87de
--- /dev/null
+++ b/src/exc-ph/EXCPH_eval_renorm.F
@@ -0,0 +1,76 @@
+!
+!        Copyright (C) 2000-2021 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): AM FP CA 
+!
+! headers
+!
+#include<y_memory.h>
+!
+!
+! @param[in]       iq_bz               iq_bz index 
+! @param[in]       q                   bz sampling
+!
+! @param[out]      R                   renormalization factor
+! @param[out]      R_PH_abs            renormalization factor for phonon-absorption
+! @param[out]      R_PL                luminescence renormalization factor
+! @param[out]      R_PL_PH_abs         luminescence renormalization factor for phonon-absorption
+!
+subroutine EXCPH_eval_renorm(iq_bz,q,R,R_PH_abs,R_PL,R_PL_PH_abs)
+ !
+ use pars,           ONLY:SP,cZERO,rZERO
+ use R_lattice,      ONLY:bz_samp
+ use functions,      ONLY:bose_f
+ use ELPH,           ONLY:elph_branches,PH_freqs_sq,FAN_deltaE_treshold,Q_io_map
+ use EXCPH,          ONLY:BS_Sat_WEIGHT,BS_Sat_WEIGHT_PH_abs,EXCPH_sum,EXCPH_states
+ !
+ use y_memory_alloc
+ !
+ implicit none
+ !
+ integer,       intent(in)    :: iq_bz
+ type(bz_samp), intent(in)    :: q
+ real(SP),      intent(inout) :: R(EXCPH_states(2)),   R_PH_abs(EXCPH_states(2))
+ real(SP),      intent(inout) :: R_PL(EXCPH_states(2)),R_PL_PH_abs(EXCPH_states(2))
+ !
+ ! Work space
+ !
+ integer  :: i_alpha,i_beta,il,iq_db
+ real(SP) :: ph_E,N_bose
+ !
+ ! Add the contribution of the iq statellites to the renormalization factors
+ !
+ do il=elph_branches(1),elph_branches(2)
+   !
+   iq_db=Q_io_map(iq_bz)
+   ph_E=sqrt(abs(PH_freqs_sq(iq_db,il)))
+   if(ph_E<FAN_deltaE_treshold) cycle
+   !
+   N_bose=bose_f(ph_E)
+   !
+   do i_alpha=EXCPH_states(1),EXCPH_states(2)
+     do i_beta=EXCPH_sum(1),EXCPH_sum(2)
+       !
+       ! Phonon emission in absorption [photon absorption / phonon emission]
+       !
+       R(i_alpha)          = R(i_alpha) + BS_Sat_WEIGHT(il,i_beta,i_alpha)*(1._SP+N_bose)/real(q%nbz,SP)
+       !
+       ! Phonon absorption in luminescence [photon emission / phonon absorption]
+       !
+       R_PL_PH_abs(i_alpha) = R_PL_PH_abs(i_alpha) + BS_Sat_WEIGHT(il,i_beta,i_alpha)*(1._SP+N_bose)/real(q%nbz,SP)
+       !
+       ! Phonon absorption in absorption [photon absorption / phonon absorption]
+       !
+       R_PH_abs(i_alpha)    = R_PH_abs(i_alpha) + BS_Sat_WEIGHT_PH_abs(il,i_beta,i_alpha)*N_bose/real(q%nbz,SP)
+       !
+       ! Phonon emission in luminescence [photon emission / phonon emission]
+       !
+       R_PL(i_alpha)        = R_PL(i_alpha) + BS_Sat_WEIGHT_PH_abs(il,i_beta,i_alpha)*N_bose/real(q%nbz,SP)
+       !
+     enddo
+   enddo
+   !
+ enddo
+ !
+end subroutine
diff --git a/src/exc-ph/EXCPH_eval_sat.F b/src/exc-ph/EXCPH_eval_sat.F
new file mode 100644
index 0000000000..d739d1168a
--- /dev/null
+++ b/src/exc-ph/EXCPH_eval_sat.F
@@ -0,0 +1,70 @@
+!
+!        Copyright (C) 2000-2021 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): AM FP
+!
+! headers
+!
+#include<y_memory.h>
+!
+!
+subroutine EXCPH_eval_sat(iq_bz,q)
+ !
+ use pars,           ONLY:SP,cZERO,rZERO
+ use R_lattice,      ONLY:bz_samp
+ use D_lattice,      ONLY:nsym
+ use ELPH,           ONLY:elph_branches,PH_freqs_sq,FAN_deltaE_treshold,Q_io_map
+ use EXCPH,          ONLY:BS_Sat_E,BS_Sat_E_PH_abs,BS_Sat_WEIGHT,BS_Sat_WEIGHT_PH_abs, &
+&                         BS_all_E,BS_E_in,EXCPH_Gkkp_sq,EXCPH_sum,EXCPH_states,DB_Q_map, &
+&                         l_no_matrix_elements,EXCPH_deltaE_treshold
+ !
+ use y_memory_alloc
+ !
+ implicit none
+ !
+ integer,       intent(in) :: iq_bz
+ type(bz_samp), intent(in) :: q
+ !
+ ! Work space
+ !
+ integer  :: i_alpha,i_beta,il,iq_db,iq_ibz
+ real(SP) :: ph_E,E_alpha,E_beta,EXCPH_gaux
+ !
+ ! Calculate satellites weights and renormalization factors without double-grid
+ !
+ iq_ibz=q%sstar(iq_bz,1)
+ !
+ BS_Sat_WEIGHT       =rZERO
+ BS_Sat_WEIGHT_PH_abs=rZERO
+ !
+ do il=elph_branches(1),elph_branches(2)
+   !
+   iq_db=Q_io_map(iq_bz)
+   ph_E=sqrt(abs(PH_freqs_sq(iq_db,il)))
+   if(ph_E<FAN_deltaE_treshold) cycle
+   !
+   do i_alpha=EXCPH_states(1),EXCPH_states(2)
+     do i_beta=EXCPH_sum(1),EXCPH_sum(2)
+       !
+       E_alpha=BS_E_in(i_alpha)
+       E_beta =BS_all_E(i_beta,iq_ibz)
+       !
+       BS_Sat_E       (il,i_beta,i_alpha)=E_beta-E_alpha+ph_E
+       BS_Sat_E_PH_abs(il,i_beta,i_alpha)=E_beta-E_alpha-ph_E
+       !
+       EXCPH_gaux=EXCPH_Gkkp_sq(il,i_beta,i_alpha)
+       if(l_no_matrix_elements) EXCPH_gaux=1._SP
+       !
+       if(abs(BS_Sat_E(il,i_beta,i_alpha))>EXCPH_deltaE_treshold) &
+&         BS_Sat_WEIGHT(il,i_beta,i_alpha)       =EXCPH_gaux/abs(BS_Sat_E(il,i_beta,i_alpha))**2
+       !
+       if(abs(BS_Sat_E_PH_abs(il,i_beta,i_alpha))>EXCPH_deltaE_treshold) &
+&         BS_Sat_WEIGHT_PH_abs(il,i_beta,i_alpha)=EXCPH_gaux/abs(BS_Sat_E_PH_abs(il,i_beta,i_alpha))**2
+       !
+     enddo
+   enddo
+   !
+ enddo
+ !
+end subroutine
diff --git a/src/exc-ph/EXCPH_gkkp_driver.F b/src/exc-ph/EXCPH_gkkp_driver.F
new file mode 100644
index 0000000000..668211dcd4
--- /dev/null
+++ b/src/exc-ph/EXCPH_gkkp_driver.F
@@ -0,0 +1,314 @@
+!
+!        Copyright (C) 2000-2021 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): AM FP AC DS
+!
+! headers
+!
+#include<y_memory.h>
+!
+!> @brief Calculate exciton-phonon matrix elements and satellites renormalizations
+!!
+!! @param[in]       E                quasi-particle index
+!! @param[in]       k                q-index in the BZ
+!! @param[in]       q                q-index in distributed array
+!! @param[in]       X                phonon-branch index
+!!
+!! @param[out]      EXCPH_gkkp                  Exciton-phonon matrix elements
+!! @param[out]      EXCPH_gkkp_sq               Exciton-phonon matrix elements squared
+!
+subroutine EXCPH_gkkp_driver(E,k,q,X)
+ !
+ use pars,           ONLY:SP,cZERO,rZERO,schlen
+ use electrons,      ONLY:levels
+ use R_lattice,      ONLY:bz_samp,qindx_B
+ use D_lattice,      ONLY:nsym
+ use parser_m,       ONLY:parser
+ use X_m,            ONLY:X_t
+ use BS_solvers,     ONLY:BS_mat,BSS_eh_table,BSS_eh_table_m1,BSS_alloc,BSS_free,BSS_n_eig!,BS_H_dim,BSS_n_eig
+ !use BS,             ONLY:BSqpts,BS_H_dim,BS_bands
+ use ELPH,           ONLY:elph_branches,l_GKKP_expanded_DB_exists!,Q_io_map
+ use IO_m,           ONLY:NONE,OP_APP_CL,OP_RD_CL,DUMP,OP_RD,RD_CL_IF_END,OP_WR_CL,deliver_IO_error_message, &
+&                         REP,VERIFY,WR_CL,OP_WR,WR_CL_IF_END,manage_action
+ use IO_int,         ONLY:io_control
+ use EXCPH,          ONLY:BS_mat_in,BS_mat_out,BSS_eh_table_in,EXCPH_gkkp,EXCPH_gkkp_sq,BS_E_in,BS_E,BSS_eh_table_m1_in,&
+&                         EXCPH_states,EXCPH_sum,k_plus_q_table,build_Q_map,EXCPH_q, &
+&                         EXCPH_gkkp_alloc,EXCPH_gkkp_free,l_const_elph,l_abs_elph,l_abs_exc,l_elec_only,l_hole_only, &
+&                         n_exc_in,n_exc_out,BSE_alloc,BSE_free,BSE_in_alloc,BSE_out_alloc,BSE_in_free
+ use LIVE_t,         ONLY:live_timing
+ use timing_m,       ONLY:timing
+ use com,            ONLY:msg
+ !
+ use y_memory_alloc
+ !
+ implicit none
+ !
+ type(levels)  ::E
+ type(bz_samp) ::k,q
+ type(X_t)     ::X
+ !
+ ! Work Space
+ !
+ integer, external     :: io_ELPH,io_EXCPH_gkkp,io_RIM
+ integer, allocatable  :: S_index(:)
+ integer  :: ID_phases
+ integer  :: ID_gkkp,ID_exc_gkkp,io_err,io_ID,IO_ACT
+ integer  :: il,iq_bz,iq,i_star,iq_s,elph_branches_save(2)!,iq_db
+ logical  :: l_min_mem
+ ! Debug
+ !integer :: ik
+ !open(1, file='K_grid_bz.dat')
+ !
+ call section('*','Excitonic gkkp')
+ !======================================
+ !
+ ! Define n_exc_in
+ !
+ n_exc_in= EXCPH_states(2)-EXCPH_states(1)+1
+ n_exc_out=EXCPH_sum(2)   -EXCPH_sum(1)+1
+ !
+ ! Check if Lout and Lin are present
+ ! and read their kind
+ ! 
+ ! Load RIM if present
+ !
+ call io_control(ACTION=OP_RD_CL,COM=NONE,SEC=(/1/),MODE=DUMP,ID=io_ID)
+ io_err=io_RIM(io_ID)
+ !
+ if(io_err==0) call msg('sr','RIM found and loaded')
+ !
+ call EXCPH_load_L(1,X,'check','Lout')
+ call EXCPH_load_L(1,X,'check','Lin')
+ !
+ call parser('ConstElph',l_const_elph)
+ call parser('AbsElph',  l_abs_elph)
+ call parser('AbsExc',   l_abs_exc)
+ call parser('ElectronContributionOnly', l_elec_only)
+ call parser('HoleContributionOnly', l_hole_only)
+ if (l_const_elph) call warning('Constant electron-phonon coupling')
+ if(l_abs_elph) call warning('Absolute value of electron-phonon coupling ')
+ if(l_abs_exc)  call warning('Absolute value of exciton wave functions  ')
+ if(l_elec_only) call warning('Only electron contribution to the exciton-phonon coupling')
+ if(l_hole_only) call warning('Only hole contribution to the exciton-phonon coupling')
+ !
+ elph_branches_save  =elph_branches
+ call io_control(ACTION=OP_RD_CL,COM=REP,MODE=DUMP,SEC=(/1/),ID=ID_gkkp)
+ ! Here we read the fully k-expanded and q-expanded matrix elements from the SAVE. 
+ io_err=io_ELPH(ID_gkkp,'gkkp_expanded')
+ elph_branches =elph_branches_save
+ !
+ ! We build the q-phonon / q-yambo map only in the case without symmetries,
+ ! where we expect the indices to differ.
+ if(io_err==0) then
+   l_GKKP_expanded_DB_exists=.true.
+   call ELPH_databases_grids_map(k,q)
+   call k_ibz2bz(q,"i",.false.)
+   call build_Q_map(q)
+   call k_ibz2bz(q,"d",.false.)
+ endif
+ !
+ ! Check if exciton-phonon matrix elements have been already calculated
+ ! and are consistent with the Lin and Lout specified in input
+ !
+ call io_control(ACTION=OP_RD_CL,COM=REP,SEC=(/1/),MODE=VERIFY,ID=ID_exc_gkkp)
+ io_err=io_EXCPH_gkkp(ID_exc_gkkp)
+ !
+ if(io_err==0) return
+ if(io_err/=0) call warning("[EXCPH] Exc-ph matrix elements not correct or missing. To be computed")
+ !
+ call timing('EXCPH_gkkp',OPR='start')
+ !
+ call k_build_up_BZ_tables(k)
+ call k_build_up_BZ_tables(q)
+ !
+ l_min_mem=.false.
+ !
+ ! Pre-requisite: calculate electronic phases (needed to rotate the excitonic wavefunctions)
+ if(nsym>1) call ELEC_WF_phases(E,k,l_min_mem,ID_phases)
+ !
+ call EXCPH_load_L(1,X,'check','Lin') ! At present the Lin can be only gamma iq=1
+ !
+ call BSS_alloc()
+ !
+ call BSE_alloc(k,q)
+ !
+ call EXCPH_load_L(1,X,'load','Lin')
+ !
+ ! Alloc BS_in variables
+ ! 
+ call BSE_in_alloc(k)  ! computed at the BSE step
+ call BSE_out_alloc()  ! rotated via unitary transformations
+ !
+ ! Sorting index is trivial, no sorting done in exc-ph.
+ allocate(S_index(BSS_n_eig))
+ do il=1,BSS_n_eig
+   S_index(il)=il
+ enddo
+ !
+ ! Allocate EXCPH_gkkp
+ !
+ call EXCPH_gkkp_alloc()
+ !
+ ! K plus q table
+ !
+ call build_k_plus_q_table()
+ !
+ ! BSS_eh_table_m1
+ !
+ call build_inverse_BS_eh_table_from_BSS_table(k%nbz)
+ !
+ ! Save the Lin in information
+ ! 
+ BS_mat_in         =BS_mat(:,EXCPH_states(1):EXCPH_states(2))
+ !
+ BSS_eh_table_in   =BSS_eh_table
+ BSS_eh_table_m1_in=BSS_eh_table_m1
+ BS_E_in           =BS_E
+ !
+ ! Open gkkp_expanded
+ !
+ elph_branches_save  =elph_branches
+ call io_control(ACTION=OP_RD,COM=REP,MODE=VERIFY,SEC=(/1/),ID=ID_gkkp)
+ io_err=io_ELPH(ID_gkkp,'gkkp_expanded')
+ call deliver_IO_error_message(io_err,"GKKP")
+ elph_branches =elph_branches_save
+ !
+ if (io_err/=0) call error('El-Ph database not found')
+ !
+ ! Write headers
+ !
+ call io_control(ACTION=OP_WR,SEC=(/1/),ID=ID_exc_gkkp)
+ io_err=io_EXCPH_gkkp(ID_exc_gkkp)
+ !
+ ! Get q-point coordinates in the q-BZ
+ YAMBO_ALLOC(EXCPH_q,(q%nbz,3))
+ call get_excph_q_momenta()
+ !
+ ! Main q-loop
+ !
+ !call k_ibz2bz(k,"i",.false.)
+ call live_timing('Excitonic Gkkp',q%nbz*(elph_branches(2)-elph_branches(1)+1))
+ !
+ !open(47, file = 'BS_mat_q.dat')
+ !write (47,*) "# BS_mat_q rotated: each q, each exciton state alpha, all transitions" 
+ !
+ do iq=1,q%nibz
+   !
+   if(iq>1) call EXCPH_load_L(iq,X,'check','Lout')
+   call EXCPH_load_L(iq,X,'load','Lout')
+   !
+   do i_star=1,q%nstar(iq)
+     !
+     iq_s=q%star(iq,i_star)
+     iq_bz = q%k_table(iq,iq_s)
+     !
+     BS_mat_out=cZERO
+     ! Here we obtain the rotated BS_mat_out at iq_bz
+     call EXC_WF_symm_qpoint(k,q,EXCPH_states(1),n_exc_out,iq,iq_s,1,ID_phases,S_index,BS_mat_out,l_min_mem)
+     !
+     EXCPH_gkkp          =cZERO
+     EXCPH_gkkp_sq       =rZERO
+     !
+     do il=elph_branches(1),elph_branches(2)
+       !
+       call EXCPH_gkkp_eval(iq_s,iq_bz,il,k,q,ID_gkkp) ! Calculation of the exc-ph matrix element at iq_bz
+       !
+       call live_timing(steps=1)
+       !
+     enddo
+     !
+     !iq_db=Q_io_map(iq_bz)
+     !if (iq_db/=iq_bz) write(*,*) "WARNING: iq_db/=iq_bz",iq_db,iq_bz
+     !
+     ! I/O
+     !=====
+     IO_ACT=manage_action(WR_CL_IF_END,iq_bz,1,q%nbz)
+     call io_control(ACTION=IO_ACT,SEC=(/iq_bz+1/),ID=ID_exc_gkkp)
+     io_err=io_EXCPH_gkkp(ID_exc_gkkp)
+     !
+   enddo
+   !
+ enddo
+ !
+ !close(47)
+ !
+ call live_timing( )
+ !
+ call BSS_free()
+ call BSE_free()
+ call BSE_in_free()
+ call EXCPH_gkkp_free()
+ !
+ ! Report all relevant data of the exciton-phonon coupling
+ !
+ YAMBO_FREE(EXCPH_q)
+ ! 
+ call timing('EXCPH_gkkp',OPR='stop')
+ !
+ return
+ !
+ contains
+   !
+   subroutine get_excph_q_momenta()
+     !    
+     integer iq,i_star,iq_s,iq_bz
+     !
+     call k_ibz2bz(q,"i",.false.)
+     !
+     do iq=1,q%nibz
+       !
+       do i_star=1,q%nstar(iq)
+         !
+         iq_s=q%star(iq,i_star)
+         iq_bz = q%k_table(iq, iq_s)
+         !
+         EXCPH_q(iq_bz,:) = q%ptbz(iq_bz,:)
+         !
+       enddo
+     enddo
+     !
+     call k_ibz2bz(q,"d",.false.)
+     !
+   end subroutine get_excph_q_momenta
+   !
+   subroutine build_k_plus_q_table()
+     !      
+     use parallel_int,        ONLY:PARALLEL_global_indexes
+     use R_lattice,           ONLY:bse_scattering
+     !
+     !integer, external ::qindx_B_init,qindx_B_close,io_QINDX 
+     !integer :: qindx_ID,qindx_ID_frag,qindx_tmp(2),ID,io_err
+     integer, external ::io_QINDX 
+     integer :: ID,io_err
+     integer :: ik_bz,iq_bz,ip_bz
+     !
+     if(.not.bse_scattering) call error(' Please run setup with BSEscatt flag ')
+     !
+     call PARALLEL_SETUP_K_scheme("Kdef")
+     !
+     ! here qindx_B is allocated
+     call io_control(ACTION=OP_RD_CL,COM=REP,SEC=(/5/),ID=ID)
+     io_err=io_QINDX(k,q,ID,'minus_q')
+     if (io_err/=0) then
+       call msg('s',"Missing k/q scattering database")
+       call error("Please run a setup with the BSEscatt flag activated")
+     endif
+     !
+     call live_timing('K plus q table',k%nbz)
+     do ik_bz=1,k%nbz
+       call live_timing(steps=1)
+       IQ_bz_loop: do iq_bz=1,q%nbz
+         do ip_bz=1,k%nbz
+           if (qindx_B(ik_bz,ip_bz,1)==iq_bz) then
+             k_plus_q_table(ik_bz,iq_bz)=ip_bz
+             cycle IQ_bz_loop
+           endif
+         enddo
+       enddo IQ_bz_loop
+     enddo
+     call live_timing( )
+     !
+   end subroutine build_k_plus_q_table
+   !
+end subroutine
diff --git a/src/exc-ph/EXCPH_gkkp_eval.F b/src/exc-ph/EXCPH_gkkp_eval.F
new file mode 100644
index 0000000000..6ae058379b
--- /dev/null
+++ b/src/exc-ph/EXCPH_gkkp_eval.F
@@ -0,0 +1,196 @@
+!
+!        Copyright (C) 2000-2021 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): AM FP DS
+!
+! headers
+!
+#include<y_memory.h>
+!
+! GKKP formulation following the F. Paleari PhD thesis (Pg. 109, Eq : 4.25)
+! The exciton phonon matrix elements are defined as <q=Q,S'|dV_{Q}|q=0,S>
+! These is for phonon absorption
+!
+subroutine EXCPH_gkkp_eval(iq_s,iq_bz,il,k,q,ID_gkkp)
+ !
+ use pars,           ONLY:SP,cZERO,rZERO,cONE
+ !use D_lattice,      ONLY:sop_inv,sop_tab
+ use R_lattice,      ONLY:bz_samp!,qindx_C
+ use BS_solvers,     ONLY:io_BSS_diago,BSS_eh_table_m1,BSS_eh_table,BSS_free
+ use IO_m,           ONLY:NONE,OP_APP_CL,OP_RD_CL,DUMP,OP_RD,RD_CL_IF_END,OP_WR_CL, &
+&                         deliver_IO_error_message,manage_action,RD
+ use IO_int,         ONLY:io_control
+ use BS,             ONLY:BS_H_dim,BS_bands
+ use ELPH,           ONLY:ph_freqs_sq,elph_branches,FAN_deltaE_treshold,GKKP,Q_io_map,K_io_map
+ use EXCPH,          ONLY:BS_mat_in,BS_mat_out,EXCPH_gkkp,EXCPH_gkkp_sq,Xi,BSS_eh_table_in,&
+&                         EXCPH_states,EXCPH_sum,k_plus_q_table,EXCPH_kthresh,&
+&                         l_const_elph,l_abs_elph,l_abs_exc,l_elec_only,l_hole_only
+ !
+ use y_memory_alloc
+ !
+ implicit none
+ !
+ type(bz_samp) ::k,q
+ integer, intent(in)    :: iq_s,iq_bz,il!,iq
+ integer, intent(inout) :: ID_gkkp
+ !
+ ! Work Space
+ !
+ integer                 :: io_err,i_H,i_Hp,i_alpha,iq_db,ik_db,&
+&                           iv,ic,ik_bz,ivp,icp,i_beta,ik_plus_iq_bz,IO_ACT, &
+&                           iv_in,ic_in,ik_bz_in,i_sp_c,i_sp_v,i_sp_c_in,i_sp_v_in
+ real(SP)                :: ph_E
+ complex(SP)             :: elph_gkkp,EXC_out,EXC_in
+ integer, external       :: io_ELPH
+ !
+ ! Loop scheme: 
+ !  for each (iq_bz, il) from gkkp_driver:
+ !  -> i_beta  ->i_H
+ !             ->ivp [Calculate Xi(1), sum over ivp, hole contribution]
+ !             ->icp [Calculate Xi(2), sum over icp, elec contribution]
+ !             ->i_alpha ->i_H [Calculate Gkkp(iq,il;i_beta,i_alpha), sum over i_H]
+ Xi=cZERO
+ !
+ elph_gkkp=cONE
+ !
+ !
+ ! elph_gkkp(:,ib,ob,ik_bz) = < ib k | dV_ql | ob k - q >
+ !
+ ! with k and q in the BZ
+ !
+ ! El-ph rotated or nosym. The wfc gauge and rotation phases must match WF_phases. 
+ !
+ iq_db=Q_io_map(iq_bz)
+ IO_ACT=RD
+ if(il==elph_branches(2)) IO_ACT=manage_action(RD_CL_IF_END,iq_bz,1,q%nbz)
+ !
+ call io_control(ACTION=IO_ACT,SEC=(/iq_db+1/),ID=ID_gkkp)
+ io_err=io_ELPH(ID_gkkp,'gkkp_expanded')
+ if(io_err/=0) call error(" Error reading gkkp_expanded") 
+ !
+ ph_E=sqrt(abs(ph_freqs_sq(iq_db,il)))
+ !
+ if (ph_E<FAN_deltaE_treshold) return
+ !
+ do i_beta=EXCPH_sum(1),EXCPH_sum(2)
+   !
+   Xi=cZERO
+   !
+   !write(iq_bz*1000+i_beta,*) iq_bz,i_beta,iq_s
+   !
+   do i_H=1,BS_H_dim
+     !
+     ! K = (ik_bz,iv,ic)
+     !
+     ik_bz = BSS_eh_table(i_H,1)
+     iv    = BSS_eh_table(i_H,2)
+     ic    = BSS_eh_table(i_H,3)
+     i_sp_c= BSS_eh_table(i_H,4)
+     i_sp_v= BSS_eh_table(i_H,5)
+     !
+     ! only sum k-points up to the shell with radius EXCPH_kthresh (debug only)
+     if (EXCPH_kthresh>0._SP) then
+       if (norm2(k%ptbz(ik_bz,:)) > EXCPH_kthresh ) cycle
+     endif
+     !
+     !ik_minus_iq_bz = qindx_C(ik_bz,iq_bz,1)      ! K_bz-Q_bz
+     !
+     ik_plus_iq_bz  = k_plus_q_table(ik_bz,iq_bz) ! K_bz+Q_bz
+     !
+     ! 1=DOWN (hole contribution)
+     !========
+     !
+     do ivp=BS_bands(1),BS_bands(2)
+       !
+       if (l_elec_only) exit
+       !
+       i_Hp=BSS_eh_table_m1(ik_bz,ivp,ic,i_sp_c,i_sp_v)
+       if (i_Hp==0) cycle
+       !
+       if (.not.l_const_elph) then
+         ik_db=K_io_map(ik_bz)
+         if(.not.l_abs_elph) elph_gkkp=    GKKP%dVc(il,iv,ivp,ik_db,1) /sqrt(2._SP*ph_E)
+         if(     l_abs_elph) elph_gkkp=abs(GKKP%dVc(il,iv,ivp,ik_db,1))/sqrt(2._SP*ph_E)
+       endif
+       !
+       if (     l_abs_exc) EXC_OUT=  abs(BS_mat_out(i_Hp,i_beta))
+       if (.not.l_abs_exc) EXC_OUT=conjg(BS_mat_out(i_Hp,i_beta))
+       !
+       Xi(i_H,1)=Xi(i_H,1)+elph_gkkp*EXC_OUT
+       !
+     enddo
+     !
+     ! 2=UP (electron contribution)
+     !======
+     !
+     do icp=BS_bands(1),BS_bands(2)
+       !
+       if (l_hole_only) exit
+       !
+       i_Hp=BSS_eh_table_m1(ik_plus_iq_bz,iv,icp,i_sp_c,i_sp_v)
+       if (i_Hp==0) cycle
+       if (.not.l_const_elph) then
+         ik_db=K_io_map(ik_plus_iq_bz)
+         if(.not.l_abs_elph) elph_gkkp=    GKKP%dVc(il,icp,ic,ik_db,1) /sqrt(2._SP*ph_E)
+         if(     l_abs_elph) elph_gkkp=abs(GKKP%dVc(il,icp,ic,ik_db,1))/sqrt(2._SP*ph_E)
+       endif
+       !
+       if (     l_abs_exc) EXC_OUT=  abs(BS_mat_out(i_Hp,i_beta))
+       if (.not.l_abs_exc) EXC_OUT=conjg(BS_mat_out(i_Hp,i_beta)) 
+       !
+       Xi(i_H,2)=Xi(i_H,2)+elph_gkkp*exc_out 
+       !
+     enddo
+     !
+   enddo
+   !
+   ! Gkkp m.e.
+   !=============
+   !
+   do i_alpha=EXCPH_states(1),EXCPH_states(2)
+     !
+     do i_H=1,BS_H_dim
+       !
+       ! K = (ik_bz,iv,ic)
+       !
+       ik_bz = BSS_eh_table(i_H,1)
+       iv    = BSS_eh_table(i_H,2)
+       ic    = BSS_eh_table(i_H,3)
+       i_sp_c= BSS_eh_table(i_H,4)
+       i_sp_v= BSS_eh_table(i_H,5)
+       !
+       ! K_in = (ik_bz,iv,ic)
+       !
+       ik_bz_in = BSS_eh_table_in(i_H,1)
+       iv_in    = BSS_eh_table_in(i_H,2)
+       ic_in    = BSS_eh_table_in(i_H,3)
+       i_sp_c_in= BSS_eh_table_in(i_H,4)
+       i_sp_v_in= BSS_eh_table_in(i_H,5)
+       !
+       if (ik_bz /= ik_bz_in) call error(" Wrong k correspondence")
+       if (iv    /= iv_in   ) call error(" Wrong iv correspondence")
+       if (ic    /= ic_in   ) call error(" Wrong ic correspondence")
+       if (i_sp_c/= i_sp_c_in) call error(" Wrong i_sp_c correspondence")
+       if (i_sp_v/= i_sp_v_in) call error(" Wrong i_sp_v correspondence")
+       !
+       ! EXC_IN: "Left" part of exciton-phonon coupling. The "right" part is its conjugate
+       !
+       if (     l_abs_exc) EXC_IN=  abs(BS_mat_in(i_H,i_alpha))
+       if (.not.l_abs_exc) EXC_IN=      BS_mat_in(i_H,i_alpha)
+       !
+       ! FIX ME ! Any reason why we use hole-electron instead of the standard elec-hole?
+       !
+       EXCPH_Gkkp(il,i_beta,i_alpha)=EXCPH_Gkkp(il,i_beta,i_alpha)+EXC_IN*(Xi(i_H,1)-Xi(i_H,2))
+       !
+     enddo
+   enddo
+   !
+ enddo !Loop i_beta
+ !
+ ! Compute the squared couplings (to be removed)
+ !
+ forall (i_alpha=EXCPH_states(1):EXCPH_states(2),i_beta=EXCPH_sum(1):EXCPH_sum(2)) &
+ &  EXCPH_Gkkp_sq(il,i_beta,i_alpha)=REAL(EXCPH_Gkkp(il,i_beta,i_alpha),SP)**2 + AIMAG(EXCPH_Gkkp(il,i_beta,i_alpha))**2
+ !
+end subroutine
diff --git a/src/exc-ph/EXCPH_lifetime.F b/src/exc-ph/EXCPH_lifetime.F
new file mode 100644
index 0000000000..57407c1afe
--- /dev/null
+++ b/src/exc-ph/EXCPH_lifetime.F
@@ -0,0 +1,506 @@
+!
+!        Copyright (C) 2000-2021 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): AM FP AC
+!
+! headers
+!
+#include<y_memory.h>
+!
+!> @brief Calculate exciton non-radiative life-time induced by phonon-scattering
+!!
+subroutine EXCPH_lifetime(k,q,X)
+ !
+ use units,          ONLY:HA2EV,HA2CMm1
+ use pars,           ONLY:SP,cZERO,rZERO,schlen,pi
+ use electrons,      ONLY:levels
+ use R_lattice,      ONLY:bz_samp
+ use parser_m,       ONLY:parser
+ use com,            ONLY:msg,of_open_close
+ use X_m,            ONLY:X_t
+ use IO_int,         ONLY:io_control
+ use IO_m,           ONLY:manage_action,OP_RD,REP,RD_CL_IF_END,DUMP
+ use EXCPH,          ONLY:EXCPH_sum,EXCPH_states,BS_E,BS_all_E,BS_E_in,L_damping,n_exc_in, &
+&                         EXC_LifeTime,min_EXC_E,EXCPH_Gkkp_sq,DB_Q_map,l_no_matrix_elements, &
+&                         RESIDUALS_and_EIGENVALUES_alloc,RESIDUALS_and_EIGENVALUES_free,alphaQ, &
+&                         l_EXCPH_offdiago,EXCPH_Gkkp,EXC_LineWidth 
+ use ELPH,           ONLY:use_PH_DbGd,elph_branches,FAN_deltaE_treshold,Q_io_map
+ use BS_solvers,     ONLY:deg_exc_thrshld
+ use vec_operate,    ONLY:degeneration_finder,iku_v_norm,iku_v_2D_norm
+ use frequency,      ONLY:w_samp
+ use stderr,         ONLY:intc
+ use parallel_m,     ONLY:PP_indexes,myid,PP_indexes_reset
+ use parallel_int,   ONLY:PP_redux_wait,PARALLEL_index
+ use LIVE_t,         ONLY:live_timing
+ !
+ use y_memory_alloc
+ !
+ implicit none
+ !
+ type(levels)  ::E
+ type(bz_samp) ::k,q
+ type(X_t)     ::X
+ !
+ ! Work space
+ !
+ integer  :: ID_exc_gkkp,ID_gkkp,io_err,IO_ACT
+ integer, external :: io_EXCPH_gkkp,io_ELPH
+ integer  :: iq,iq_s,iq_bz,i_alpha,j_alpha,i_beta,il,i_star,iq_db,igrp
+ integer  :: elph_branches_save(2)
+ integer  :: min_pos(1),first_exc_grp(n_exc_in),n_of_exc_grp(n_exc_in),n_deg_grp
+ logical  :: l_keep_deg,l_extend_output
+ real(SP) :: ExcPhM,EXC_life_average,EXC_line_average,EXC_energy_average,Pole
+ complex(SP) :: ExcPhW
+ real(SP) , allocatable :: EXC_life_of_q(:,:),EXC_pole_of_q(:,:)
+ type(PP_indexes)::px
+ !
+ ! Initialization
+ call parser('NoMatrxEl',l_no_matrix_elements)
+ call parser('KeepDeg',  l_keep_deg)
+ call parser('ExtendOutput', l_extend_output)
+ call parser('ExcPhOffDiago', l_EXCPH_offdiago)
+ !
+ call section('*','EXCPH-LifeTime')
+ !
+ call k_build_up_BZ_tables(k)
+ call k_build_up_BZ_tables(q)
+ !
+ call k_ibz2bz(q,"i",.false.)
+ !
+ ! Load excitonic energies 
+ !
+ call EXCPH_load_L(1,X,'check','Lout')
+ !
+ ! Allocations 
+ !
+ call RESIDUALS_and_EIGENVALUES_alloc(q)
+ if (l_extend_output) then
+   allocate(EXC_life_of_q(EXCPH_states(1):EXCPH_states(2),q%nbz))
+   allocate(EXC_pole_of_q(EXCPH_states(1):EXCPH_states(2),q%nbz))
+   EXC_life_of_q=rZERO
+   EXC_pole_of_q=rZERO
+ endif
+ !
+ ! Load excitonic energies 
+ !
+ call EXCPH_load_L(1,X,'eigenvalues','Lin')
+ BS_E_in(:)=BS_E(EXCPH_states(1):EXCPH_states(2)) ! L_in can be different from L_out 
+ !
+ do iq=1,q%nibz
+   call EXCPH_load_L(iq,X,'eigenvalues','Lout')
+   BS_all_E(:,iq)=real(BS_E(:)) ! I suppose the BSE eigenvalues are real
+ enddo
+ !
+ ! Apply 2D stretching if requested
+ if(alphaQ/=rZERO) then
+   call msg('rs','Excitonic bands 2D stretching :',alphaQ)
+   do iq=1,q%nibz
+     BS_all_E(:,iq)=BS_all_E(:,iq)+alphaQ*iku_v_2D_norm(q%pt(iq,:3))
+   enddo
+ endif
+ !
+ ! Here we find the degenerations of the first n_exc_in values of BS_all_E at gamma
+ ! and we set their energies to exactly the same value (average). This is to
+ ! improve the quality of the interpolation at gamma in the case of double grid 
+ ! (try to avoid unphysical splitting of degenerate states) 
+ !
+ call degeneration_finder(n_exc_in,first_exc_grp,n_of_exc_grp,n_deg_grp,&
+ &                       Er=BS_all_E(EXCPH_states(1):EXCPH_states(2),1),&
+ &                 deg_accuracy=deg_exc_thrshld,Include_single_values=.true.)
+ !
+ do igrp=1,n_deg_grp
+   j_alpha=first_exc_grp(igrp)+EXCPH_states(1)-1
+   i_alpha=first_exc_grp(igrp)+n_of_exc_grp(igrp)+EXCPH_states(1)-2
+   EXC_energy_average=sum(BS_all_E(j_alpha:i_alpha,1))/real(n_of_exc_grp(igrp),SP)
+   BS_all_E(j_alpha:i_alpha,1)=EXC_energy_average
+ enddo
+ !
+ !
+ ! Load the Exciton-phonon elements and phonon energies
+ !
+ call io_control(ACTION=OP_RD,COM=REP,SEC=(/1/),MODE=DUMP,ID=ID_exc_gkkp)
+ io_err=io_EXCPH_gkkp(ID_exc_gkkp)
+ if(io_err/=0) call error('excph_gkkp databases not found!')
+ !
+ elph_branches_save = elph_branches
+ call io_control(ACTION=OP_RD,COM=REP,SEC=(/1/),MODE=DUMP,ID=ID_gkkp)
+ io_err=io_ELPH(ID_gkkp,'gkkp_expanded no_matrix_elements')
+ elph_branches = elph_branches_save
+ if(io_err/=0) call error('gkkp_expanded databases not found!')
+ !
+ ! Info on excitons
+ ! 
+ min_pos  =minloc(BS_all_E(1,:))
+ min_EXC_E=minval(BS_all_E(1,:))
+ !
+ call msg('rs','Minimum excitonic energy : ',min_EXC_E*HA2EV,' eV, at q-point :'//intc(min_pos(1)))
+ call msg('rs','FAN treshold: ',FAN_deltaE_treshold*HA2CMm1,' cm-1')
+ call msg('rs','Self-energy broadening : ',L_damping*HA2EV*1000._SP,'meV')
+ if (l_EXCPH_offdiago) then
+   call msg('rs','Interband transitions (off-diagonal self-energy) included.')
+ endif
+ !
+ ! Turn on double-grid if presents
+ call EXCPH_double_grid_setup(q)
+ !
+ ! Calculate the life-times
+ !
+ YAMBO_ALLOC(EXC_LifeTime,(EXCPH_states(1):EXCPH_states(2)))
+ EXC_LifeTime=rZERO
+ if (l_EXCPH_offdiago) then
+   YAMBO_ALLOC(EXC_LineWidth,(EXCPH_states(1):EXCPH_states(2)))
+   EXC_LineWidth=rZERO
+ endif
+ !
+ call PP_indexes_reset(px)
+ call PARALLEL_index(px,(/q%nbz/))
+ !
+ call live_timing('Lifetimes',px%n_of_elements(myid+1))
+ !
+ do iq=1,q%nibz
+ do i_star=1,q%nstar(iq)
+   !
+   iq_s=q%star(iq,i_star)
+   iq_bz = q%k_table(iq, iq_s)
+   !
+   if (.not.px%element_1D(iq_bz)) cycle
+   !
+   IO_ACT=manage_action(RD_CL_IF_END,iq_bz,1,q%nbz,PAR_INDEX=px)
+   !
+   call io_control(ACTION=IO_ACT,COM=REP,SEC=(/1+iq_bz/),MODE=DUMP,ID=ID_exc_gkkp)
+   io_err=io_EXCPH_gkkp(ID_exc_gkkp)
+   if(io_err/=0) call error('excph_gkkp databases not found!')
+   !
+   !iq_db=DB_Q_map(iq_bz)
+   iq_db=Q_io_map(iq_bz)
+   call io_control(ACTION=IO_ACT,COM=REP,SEC=(/1+iq_db/),MODE=DUMP,ID=ID_gkkp)
+   io_err=io_ELPH(ID_gkkp,'gkkp_expanded no_matrix_elements')
+   if(io_err/=0) call error('gkkp_expanded databases not found!')
+   !
+   do i_alpha=EXCPH_states(1),EXCPH_states(2)
+     !
+     do i_beta=EXCPH_sum(1),EXCPH_sum(2)
+       !
+       do il=elph_branches(1),elph_branches(2)
+         !
+         ! Intraband terms only (lifetimes)
+         ExcPhM=EXCPH_Gkkp_sq(il,i_beta,i_alpha)
+         ! Including interband terms (linewidths)
+         if (l_EXCPH_offdiago) then
+           !
+           ExcPhW=cZERO
+           do j_alpha=EXCPH_states(1),EXCPH_states(2)
+             !
+             ExcPhW=ExcPhW+EXCPH_Gkkp(il,i_beta,j_alpha)*conjg(EXCPH_Gkkp(il,i_beta,i_alpha))
+             !
+           enddo
+           !
+         endif
+         !
+         if(l_no_matrix_elements) ExcPhM=1._SP 
+         !
+         if(.not.use_PH_DbGd) Pole=Simple_pole()
+         if(use_PH_DbGd)      Pole=DbGrid_pole()
+         !
+         EXC_LifeTime(i_alpha)=EXC_LifeTime(i_alpha)+ExcPhM*Pole
+         if (l_EXCPH_offdiago) then
+           EXC_LineWidth(i_alpha)=EXC_LineWidth(i_alpha)+REAL(ExcPhW,SP)*Pole
+         endif
+         if (l_extend_output) then
+           EXC_life_of_q(i_alpha,iq_bz)=EXC_life_of_q(i_alpha,iq_bz)+ExcPhM*Pole
+           EXC_pole_of_q(i_alpha,iq_bz)=EXC_pole_of_q(i_alpha,iq_bz)+Pole
+         endif
+         ! DS Debug <
+         ! write(100+i_alpha,*) i_alpha,EXC_LifeTime(i_alpha)
+         ! DS Debug >
+         !
+       enddo ! i_l
+       !
+     enddo ! i_beta
+     !
+   enddo ! i_alpha
+   !
+   call live_timing(steps=1)
+   !
+ enddo ! i_start
+ enddo ! iq
+ !
+ call live_timing( )
+ !
+ call PP_redux_wait(EXC_LifeTime)
+ if (l_EXCPH_offdiago) then
+   call PP_redux_wait(EXC_LineWidth)
+ endif
+ if (l_extend_output) then
+   call PP_redux_wait(EXC_life_of_q)
+   call PP_redux_wait(EXC_pole_of_q)
+ endif
+ !
+ call PP_indexes_reset(px)
+ !
+ ! Average lifetimes over degenerate groups (use input energies BS_E_in)
+ !
+ call degeneration_finder(n_exc_in,first_exc_grp,n_of_exc_grp,n_deg_grp,&
+ &                    Er=real(BS_E_in(EXCPH_states(1):EXCPH_states(2))),&
+ &                 deg_accuracy=0.001/HA2EV,Include_single_values=.true.)
+ !
+ do igrp=1,n_deg_grp
+   j_alpha=first_exc_grp(igrp)+EXCPH_states(1)-1
+   i_alpha=first_exc_grp(igrp)+n_of_exc_grp(igrp)+EXCPH_states(1)-2
+   EXC_life_average=sum(EXC_LifeTime(j_alpha:i_alpha))/real(n_of_exc_grp(igrp),SP)
+   EXC_LifeTime(j_alpha:i_alpha)=EXC_life_average
+   if (l_EXCPH_offdiago) then
+     EXC_line_average=sum(EXC_LineWidth(j_alpha:i_alpha))/real(n_of_exc_grp(igrp),SP) 
+     EXC_LineWidth(j_alpha:i_alpha)=EXC_line_average
+   endif
+ enddo
+ !
+ ! Convert to final units          
+ EXC_LifeTime=EXC_LifeTime*2._SP*pi/real(q%nbz,SP)
+ if (l_EXCPH_offdiago) EXC_LineWidth=EXC_LineWidth*2._SP*pi/real(q%nbz,SP)
+ !
+ ! Print output and write on file
+ !
+ call print_and_write()
+ !
+ if (l_extend_output) then
+   call print_LifeTimes_of_q("L")
+   call print_LifeTimes_of_q("P")
+ endif
+ !
+ ! Deallocations
+ !
+ YAMBO_FREE(EXC_LifeTime)
+ if (l_EXCPH_offdiago) then
+   YAMBO_FREE(EXC_LineWidth)
+ endif
+ call RESIDUALS_and_EIGENVALUES_free()
+ if (l_extend_output) then
+   deallocate(EXC_life_of_q)
+ endif
+ !
+ contains
+  !
+  function Simple_pole()
+    use EXCPH,          ONLY:min_EXC_E,DB_Q_map
+    use ELPH,           ONLY:FAN_deltaE_treshold,PH_freqs_sq,Q_io_map
+    use functions,      ONLY:bose_f,boltzman_f,Lorentzian_func
+    use units,          ONLY:HA2EV
+    implicit none
+    !
+    real(SP) :: Simple_pole
+    real(SP) :: E_alpha,E_beta
+    real(SP) :: ph_E,N_bose,F_boltz
+    integer  :: iq_db
+    !
+    Simple_pole=rZERO
+    !
+    E_alpha=real(BS_E_in(i_alpha),SP)
+    E_beta =BS_all_E(i_beta,iq)
+    !
+    iq_db=Q_io_map(iq_bz)
+    ph_E=sqrt(abs(PH_freqs_sq(iq_db,il)))
+    !
+    if(ph_E<FAN_deltaE_treshold) return
+    !
+    ! Remove degenerate states E(alpha,iq_in)=E(beta,iq_in) [currently iq_in=1]
+    !
+    if(iq_db==1.and.abs(E_alpha-E_beta)<FAN_deltaE_treshold .and. .not.l_keep_deg) return
+    ! even if keeping deg states, still remove the one where i_alpha=i_beta
+    if(iq_db==1.and.i_alpha==i_beta .and. l_keep_deg) return
+    ! [FP] This removes resonances, I think it is not correct
+    !if(abs(E_alpha-E_beta-ph_E)<FAN_deltaE_treshold.or.abs(E_alpha-E_beta+ph_E)<FAN_deltaE_treshold) return
+    !
+    N_bose  =bose_f(ph_E)
+    !
+    F_boltz=boltzman_f(E_beta-min_EXC_E)  ! Bose function? Boltzman function?
+    !
+    Simple_pole=(1._SP+N_bose+F_boltz)*Lorentzian_func(E_alpha-E_beta-ph_E,L_damping) &
+&                    +(N_bose-F_boltz)*Lorentzian_func(E_alpha-E_beta+ph_E,L_damping)
+    !
+    ! DEBUG <
+    ! write(200+i_alpha,*) E_alpha*HA2EV,E_beta*HA2EV,ph_E*HA2EV
+    ! DEBUG >
+    !
+  end function Simple_Pole
+  !
+  function DbGrid_pole()
+    use EXCPH,          ONLY:min_EXC_E_DbGd,EXC_E_DbGd
+    use ELPH,           ONLY:FAN_deltaE_treshold,PH_freqs,PH_freqs_sq,Q_io_map
+    use functions,      ONLY:bose_f,boltzman_f,Lorentzian_func
+    implicit none
+    !
+    real(SP) :: DbGrid_pole
+    real(SP) :: E_alpha,E_beta
+    real(SP) :: ph_E,N_bose,F_boltz
+    integer  :: nq_around,iq_fg,iE_fg
+    real(SP) :: weight_dbgd
+    real(SP), external :: get_ph_E_DbGd
+    !
+    DbGrid_pole=rZERO
+    !
+    E_alpha=real(BS_E_in(i_alpha),SP)
+    !
+    nq_around  =q%FGbz%k_range(iq_bz,2)-q%FGbz%k_range(iq_bz,1) 
+    weight_dbgd=1._SP/real(nq_around+1._SP,SP)                  
+    !
+    do iq_fg=q%FGbz%k_range(iq_bz,1),q%FGbz%k_range(iq_bz,2)
+      !
+      !
+      if(iq_fg==q%FGbz%k_range(iq_bz,1)) then
+        !
+        ! Here we get original points for coarse grids
+        ! (partially counters inaccuracies in the interpolation in the exc.
+        ! case)
+        !
+        iq_db=Q_io_map(iq_bz) ! This mapping is because sometimes the phonon q_bz differs from the exciton one in yambo
+        ph_E=sqrt(abs(PH_freqs_sq(iq_db,il)))
+        !
+        E_beta=BS_all_E(i_beta,iq)  ! Getting this in the IBZ for the equivalent iq to iq_bz
+        !
+        ! If there is the double-grid I read the gamma phonons from there
+        ! because they include the LO-TO splitting and the acustic sum rule
+        !
+        if(iq_bz==1) ph_E=PH_freqs%FG%E(il,1,1)
+        !
+      else
+        iE_fg=q%FGbz%E_map(iq_fg)                ! Map w(q) from BZ to IBZ
+        ph_E =PH_freqs%FG%E(il,iE_fg,1)          ! w(q_db) phonon energy in the D-Grid
+        !
+        E_beta=EXC_E_DbGd(i_beta,iq_fg)          ! E(q_bz) exciton energy in the D-Grid
+      endif
+      !
+      if(ph_E<FAN_deltaE_treshold) cycle
+      !
+      ! Remove degenerate states E(alpha,iq_in)=E(beta,iq_fg(iq_in)) [currently iq_in=1]
+      !
+      if(iq==1.and.abs(E_alpha-E_beta)<FAN_deltaE_treshold .and. .not.l_keep_deg) return
+      ! even if keeping deg states, still remove the one where i_alpha=i_beta
+      if(iq==1.and.i_alpha==i_beta .and. l_keep_deg) return
+      ! [FP] This removes resonances, I think it is not correct
+      !if(abs(E_alpha-E_beta-ph_E)<FAN_deltaE_treshold.or.abs(E_alpha-E_beta+ph_E)<FAN_deltaE_treshold) return
+      !
+      N_bose  =bose_f(ph_E)
+      F_boltz=boltzman_f(E_beta-min_EXC_E_DbGd)  ! Bose function? Boltzman function?
+      !
+      DbGrid_pole=DbGrid_pole+(1._SP+N_bose+F_boltz)*Lorentzian_func(E_alpha-E_beta-ph_E,L_damping) &
+&                                  +(N_bose-F_boltz)*Lorentzian_func(E_alpha-E_beta+ph_E,L_damping)
+      !
+    enddo
+    !
+    DbGrid_pole=DbGrid_pole*weight_dbgd
+    !
+  end function DbGrid_pole
+  !
+  subroutine print_and_write()
+    use pars,           ONLY:schlen
+    use OUTPUT,         ONLY:OUTPUT_driver
+    use stderr,         ONLY:real2ch,intc,log2ch
+    !
+    implicit none
+    !
+    character(schlen) :: what='QP'
+    !
+    call OUTPUT_driver(what,action="reset")
+    call OUTPUT_driver(what,action="open")
+    !
+    call OUTPUT_driver(what,ORDER=(/"Eo"/))
+    call OUTPUT_driver(what,ORDER=(/"Width [me"/))
+    !
+    call msg('rs','Exciton lifetimes:')
+    !
+    call msg('o '//what,'#')    
+    call msg('o '//what,'# === Exciton LifeTimes === ')    
+    call msg('o '//what,'#')    
+    call msg('o '//what,'# Exciton Sum                  : '//trim(intc(EXCPH_sum(1)))//' - '//trim(intc(EXCPH_sum(2))))    
+    call msg('o '//what,'# Exciton States               : '//trim(intc(EXCPH_states(1)))//' - '//trim(intc(EXCPH_states(2))))    
+    call msg('o '//what,'# Self-Energy broadening       :'//trim(real2ch(L_damping*HA2EV*1000._SP))//' meV')    
+    call msg('o '//what,'# FAN threshold                 :'//trim(real2ch(FAN_deltaE_treshold*HA2EV*1000._SP))//' meV')    
+    call msg('o '//what,'# PH Double-Grid                : '//log2ch(use_PH_DbGd))    
+    !
+    do i_alpha=EXCPH_states(1),EXCPH_states(2)
+      !
+      call OUTPUT_driver(what,TITLES=(/"Band   "/),I_VALUES=(/i_alpha/))
+      call OUTPUT_driver(what,TITLES=(/"E"/),R_VALUES=(/real(BS_E_in(i_alpha))/),UNIT="eV")
+      call OUTPUT_driver(what,TITLES=(/"Life^-1"/),R_VALUES=(/EXC_LifeTime(i_alpha)/),UNIT="meV")
+      if (l_EXCPH_offdiago) then
+        call OUTPUT_driver(what,TITLES=(/"Width"/),R_VALUES=(/EXC_LineWidth(i_alpha)/),UNIT="meV")
+      endif
+     
+      call OUTPUT_driver(what,action="write")
+      !
+      call msg('rs','Exciton ['//trim(intc(i_alpha))//'] Lifetime: ',EXC_LifeTime(i_alpha)*HA2EV*1000._SP,'meV')
+      !
+    enddo  
+    !
+    call OUTPUT_driver(what,action="close")
+    !
+  end subroutine 
+  !
+  subroutine print_LifeTimes_of_q(MODE)
+    ! MODE="L"[ife]:
+    !   - print q-map of lifetimes for each exc_in state for q-space plot
+    !     We print l(q)/sum_q l(q)
+    ! MODE="P"[ole]:
+    !   - Print value of the Pole [F_occ*delta(...)] for each exc_in state for q-space plot
+    !     We print sum_{beta,mu} Pole_{beta,mu}(q)
+    !
+    use vec_operate,   ONLY:iku_v_norm
+    use com,           ONLY:of_open_close
+    !use YPPm,          ONLY:K_transform
+    !
+    implicit none
+    !
+    character(1) :: MODE
+    character(schlen), allocatable :: headings(:)
+    character(schlen)              :: file_name
+    real(SP), allocatable          :: values(:)
+    real(SP)                       :: tmp_q(3)
+    !
+    allocate(headings(n_exc_in+4))
+    YAMBO_ALLOC(values,(n_exc_in+4))
+    values=rZERO
+    !
+    headings(1)=" |q| (a.u.) "
+    do i_alpha=EXCPH_states(1),EXCPH_states(2)
+      headings(i_alpha+1)=' e'//intc(i_alpha)
+      !
+      if (MODE=="L") then
+        EXC_life_of_q(i_alpha,:)=EXC_life_of_q(i_alpha,:)/SUM(EXC_life_of_q(i_alpha,:))
+      endif
+      !
+    enddo
+    headings(n_exc_in+2:n_exc_in+4)=&
+&           (/"q_x (rlu)","q_y (rlu)","q_z (rlu)"/)
+    !
+    if (MODE=="L") file_name="LifeTimes_of_q"
+    if (MODE=="P") file_name="Resonance_of_q"
+    !
+    call of_open_close(trim(file_name),'ot')
+    call msg('o '//trim(file_name),"#",headings,INDENT=0,USE_TABS=.true.)
+    call msg('o '//trim(file_name),"#")
+    !
+    do iq=1,q%nibz
+    do i_star=1,q%nstar(iq)
+      !
+      iq_s=q%star(iq,i_star)
+      iq_bz = q%k_table(iq, iq_s)
+      !
+      if (iq_bz>1) values(1)=iku_v_norm(q%ptbz(iq_bz,:3))
+      !
+      tmp_q = q%ptbz(iq_bz,:3)
+      !
+      !call K_transform(tmp_q,'iku')
+      if (MODE=="L") values(1+1:n_exc_in+1)=EXC_life_of_q(:,iq_bz)
+      if (MODE=="P") values(1+1:n_exc_in+1)=EXC_pole_of_q(:,iq_bz)
+      values(n_exc_in+2:n_exc_in+4)=tmp_q
+      call msg('o '//trim(file_name),' ',values,INDENT=0,USE_TABS=.true.)
+      !
+    enddo
+    enddo
+    call of_open_close(trim(file_name))
+    !
+  end subroutine print_LifeTimes_of_q
+  !
+end subroutine
diff --git a/src/exc-ph/EXCPH_load_L.F b/src/exc-ph/EXCPH_load_L.F
new file mode 100644
index 0000000000..150db6ed92
--- /dev/null
+++ b/src/exc-ph/EXCPH_load_L.F
@@ -0,0 +1,112 @@
+!
+!        Copyright (C) 2000-2021 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): AM FP
+!
+! headers
+!
+#include<y_memory.h>
+!
+! --- Theory ---
+! (5) Figure out rotations/symmetry of exciton eigenvectors and EPC m.e.
+! - FP
+!
+subroutine EXCPH_load_L(iq_bse,X,what,Ltype)
+ !
+ use pars,           ONLY:lchlen
+ use BS_solvers,     ONLY:io_BSS_diago,BS_mat,BSS_n_eig,BSS_free,BSS_write_eig_2_db
+ use IO_m,           ONLY:NONE,OP_APP_CL,OP_RD_CL,DUMP,OP_RD,RD_CL_IF_END,OP_WR_CL,deliver_IO_error_message
+ use R_lattice,      ONLY:bz_samp
+ use BS,             ONLY:BS_H_dim,BSE_L_kind,BS_res_ares_n_mat,BS_K_dim
+ use EXCPH,          ONLY:BS_E,BS_R_left,BS_R_right,EXCPH_states,EXCPH_sum,Lout_path,L_kind_in,L_kind_out
+ use IO_int,         ONLY:io_control
+ use stderr,         ONLY:intc
+ use X_m,            ONLY:X_t
+ use com,            ONLY:jobstr,msg
+ !
+ use y_memory_alloc
+ !
+ implicit none
+ ! 
+ type(X_t)     ::X
+ type(bz_samp) ::k,q
+ integer, intent(in) :: iq_bse     
+ character(*)        :: what,Ltype
+ !
+ ! Work space
+ !
+ integer :: ID_BS,io_err
+ character(lchlen) :: jobstr_save
+ logical :: l_read_residuals
+ !
+ if(index(Ltype,'Lout')>0.and.Lout_path/='none') then
+   jobstr_save=jobstr
+   jobstr     =trim(Lout_path)
+ endif
+ !
+ l_read_residuals=.FALSE.
+ if(index(what,'residuals')>0) l_read_residuals=.TRUE.
+ !
+ if(index(what,'check')>0) then
+   !
+   call io_control(ACTION=OP_RD_CL,COM=NONE,MODE=DUMP,SEC=(/1/),ID=ID_BS)
+   io_err=io_BSS_diago(iq_bse,1,ID_BS,X)
+   if(io_err/=0) call error(" Error checking "//trim(Ltype)//" for q = "//trim(intc(iq_bse)))
+   !
+   !if (BS_res_ares_n_mat/=1) call error("Excitonic Gkkp: anti-resonant part not implemented")
+   !
+   if(.not.BSS_write_eig_2_db) call error("BSS diago DB does not contain the eigenstates")
+   !
+   BS_H_dim=BS_K_dim(1)
+   !
+   if(EXCPH_states(2)>BSS_n_eig)  &
+&     call error("Number of exc. states in the excphon coupling must         <= number of BSE eigenstates")
+   !
+   if(EXCPH_sum(2)>BS_H_dim)      &
+&     call error("Number of virtual exc. states in the excphon coupling must <= size of the exc. Hamiltonian")
+   !
+   if(iq_bse==1) then
+     call msg('rs','Found '//trim(Ltype)//' with dimension: '//trim(intc(BSS_n_eig))//' x '//trim(intc(BS_H_dim)))
+     if(index(Ltype,'Lout')>0) then
+       L_kind_out=BSE_L_kind
+       call msg('rs','L kind out: '//trim(L_kind_out))
+     elseif(index(Ltype,'Lin')>0) then
+       L_kind_in =BSE_L_kind
+       call msg('rs','L kind  in: '//trim(L_kind_in))
+     endif
+   endif
+   !
+ elseif(index(what,'eigenvalues')>0) then
+   !
+   ! Read eigenvalues and optionally residuals
+   !
+   call io_control(ACTION=OP_RD_CL,COM=NONE,SEC=(/2/),ID=ID_BS)
+   if(l_read_residuals) then
+      io_err=io_BSS_diago(iq_bse,1,ID_BS,X,bsE=BS_E,bsRr=BS_R_right,bsRl=BS_R_left)
+   else
+      io_err=io_BSS_diago(iq_bse,1,ID_BS,X,bsE=BS_E)
+   endif
+   if(io_err/=0) call error(" Error loading "//trim(Ltype)//" for q = "//trim(intc(iq_bse)))
+   !
+ elseif(index(what,'load')>0) then
+   !      
+   ! Read BSE eigenvectors and eigenvalues and optionally residuals
+   !
+   call io_control(ACTION=OP_RD_CL,COM=NONE,SEC=(/2,3/),ID=ID_BS)
+   if(l_read_residuals) then
+      io_err=io_BSS_diago(iq_bse,1,ID_BS,X,bsE=BS_E,BS_mat=BS_mat,bsRr=BS_R_right,bsRl=BS_R_left)
+   else
+      io_err=io_BSS_diago(iq_bse,1,ID_BS,X,bsE=BS_E,BS_mat=BS_mat)
+   endif
+   if(io_err/=0) call error(" Error loading "//trim(Ltype)//" for q = "//trim(intc(iq_bse)))
+   !
+ else
+   call error(" Wrong call to the EXCPH_load_L function! ")
+ endif
+ !
+ if(index(Ltype,'Lout')>0.and.Lout_path/='none') then
+   jobstr     =jobstr_save
+ endif
+ !
+end subroutine EXCPH_load_L
diff --git a/src/exc-ph/EXCPH_optics.F b/src/exc-ph/EXCPH_optics.F
new file mode 100644
index 0000000000..c67fbf8a51
--- /dev/null
+++ b/src/exc-ph/EXCPH_optics.F
@@ -0,0 +1,264 @@
+!
+!        Copyright (C) 2000-2021 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): AM FP AC
+!
+! headers
+!
+#include<y_memory.h>
+!
+!> @brief Calculate phonon-assisted absorption and emission (see PRL 122, 187401 )
+!
+subroutine EXCPH_optics(E,k,q,X,wv)
+ !
+ use units,          ONLY:HA2EV
+ use pars,           ONLY:SP,cZERO,schlen,rZERO
+ use vec_operate,    ONLY:iku_v_2D_norm,c2a
+ use electrons,      ONLY:levels,nel_cond,E_reset
+ use R_lattice,      ONLY:bz_samp,bare_qpg,d3k_factor
+ use D_lattice,      ONLY:Boltz_Temp
+ use X_m,            ONLY:X_t
+ use com,            ONLY:msg,of_open_close
+ use EXCPH,          ONLY:EXCPH_sum,EXCPH_states,L_kind_in,L_kind_out,BS_E,BS_all_E,BS_E_in,   &
+&                         BS_R_left,BS_R_right,BS_R,BSE_alloc,BSE_free,EXC_occ,alphaQ,q_DbGd,  &
+&                         RESIDUALS_and_EIGENVALUES_free, RESIDUALS_and_EIGENVALUES_alloc,EXC_E_DbGd,  &
+&                         l_DbGd_PH_only,l_DbGd_WEIGHTs,l_no_matrix_elements,Sat_alloc,Sat_free, &
+&                         min_EXC_E_DbGd,min_pos_E_DbGd
+ use ELPH,           ONLY:use_PH_DbGd,ph_modes
+ use BS_solvers,     ONLY:BSS_eh_table,BSS_alloc,BSS_free
+ use BS,             ONLY:BSE_prop,BS_dip_size
+ use frequency,      ONLY:w_samp
+ use parser_m,       ONLY:parser
+ !
+ use y_memory_alloc
+ !
+ implicit none
+ !
+ type(levels)  ::E
+ type(bz_samp) ::k,q
+ type(X_t)     ::X
+ type(w_samp)  :: wv 
+ !
+ ! Work space
+ !
+ complex(SP), allocatable :: PL_ii(:,:),Epsilon_ii(:,:)
+ integer, parameter  :: n_output=7
+ real(SP)            :: output_data(n_output),E_fermi_h,E_fermi_e,v_tmp(3)
+ character(schlen)   :: headers(n_output)
+ integer             :: iw,iq,i_alpha,max_filling,EXC_n_sum,i_beta
+ integer, allocatable:: EXC_indexes(:)
+ character(schlen)   :: dbgd_string
+ character(schlen) :: q_point 
+ type(levels)        :: E_QF
+ logical :: l_sat_from_Hpert
+ !
+ call section('*','EXCPH-Optics')
+ !
+ ! Load excitonic energies 
+ !
+ if(L_kind_in/=L_kind_out) &
+&     call warning(' Lkind_in /= Lkind_out this can generate inconsistencies in the luminescence! ')
+ !
+ call EXCPH_load_L(1,X,'check','Lout')
+ !
+ ! Allocations 
+ !
+ call RESIDUALS_and_EIGENVALUES_alloc(q)
+ !
+ call Sat_alloc()
+ !
+ ! Load excitonic energies and residuals
+ !
+ BSE_prop='abs'
+ BS_dip_size=1
+ !
+ call K_observables()
+ !
+ call BSS_alloc()
+ !
+ call BSE_alloc(k,q)
+ !
+ call EXCPH_load_L(1,X,'load residuals','Lin')
+ BS_E_in(:)=BS_E(EXCPH_states(1):EXCPH_states(2)) ! L_in can be different from L_out 
+ !
+ ! Recalculate excitonic dipole if required
+ !
+ call EXC_dipole(k,E,q,X)
+ !
+ ! Excitonic occupation
+ !
+ if(nel_cond/=rZERO) then
+   call msg('rs','Excitonic quasi-fermi distribution interpolation')
+   call E_reset(E_QF)
+   call OCCUPATIONS_Quasi_Fermi(E,k,E_QF,E_fermi_h,E_fermi_e)
+   EXC_n_sum=EXCPH_sum(2)-EXCPH_sum(1)+1
+   YAMBO_ALLOC(EXC_occ,(EXC_n_sum,q%nibz))
+   YAMBO_ALLOC(EXC_indexes,(EXC_n_sum))
+   if(.not.allocated(q%k_table))  call k_build_up_BZ_tables(q)   
+   if(EXCPH_sum(1)/=1) call error("This part of the code does not work for ELPhExcSum(1)/=1")
+   do i_beta=EXCPH_sum(1),EXCPH_sum(2)
+      EXC_indexes(i_beta)=i_beta 
+   enddo
+   if(Boltz_Temp/=rZERO) call warning('Excitonic Occupation, EXCTemp ignored, use ElecTemp!')
+ endif
+ !
+ do iq=1,q%nibz
+   if(nel_cond/=rZERO) then
+     call EXCPH_load_L(iq,X,'load','Lout')
+     call EXC_occupations(E_QF,k,q,iq,EXC_n_sum,EXC_indexes,EXC_occ(:,iq))
+   else
+     call EXCPH_load_L(iq,X,'eigenvalues','Lout')
+   endif
+     BS_all_E(:,iq)=real(BS_E(:)) ! I suppose the BSE eigenvalues are real
+ enddo
+ !
+ call parser('NoMatrxEl',l_no_matrix_elements)
+ if(l_no_matrix_elements) call warning('No exc-ph matrix elements, just ph-assisted dos')
+ !  
+ ! Turn on double-grid if presents
+ call EXCPH_double_grid_setup(q)
+ !
+ dbgd_string=''
+ !
+ call msg('rs','Double-grid                  :',use_PH_DbGd)
+ !
+ if(use_PH_DbGd) then
+   dbgd_string='DbGd'
+   max_filling=maxval(q%FGbz%N_mapped)+1  ! max number of tilde q around each q-point include the q itself
+   !
+   ! Check double-grid flags
+   !
+   call parser('DbGdWEIGHTs',l_DbGd_WEIGHTs)
+   call parser('DbGdOnlyPh', l_DbGd_PH_only)
+   !
+   call msg('rs','Double-grid for satellites   :',l_DbGd_WEIGHTs)
+   call msg('rs','Double-grid only for phonons :',l_DbGd_PH_only) 
+   !
+ endif
+ !
+ ! Frequencies and damping setup
+ call FREQUENCIES_setup(wv)
+ YAMBO_ALLOC(Epsilon_ii,(wv%n_freqs,3))
+ YAMBO_ALLOC(PL_ii,(wv%n_freqs,3))
+ !
+ ! Band structure distortion
+ !
+ if(alphaQ/=rZERO) then
+   call msg('rs','Excitonic bands 2D stretching :',alphaQ)
+   do iq=1,q%nibz
+      ! In a general case here we should use iku_v_norm
+      ! we modified to the iku_v_2D_norm for the AB-hBN case
+      ! one can restore the original one
+      BS_all_E(:,iq)=BS_all_E(:,iq)+alphaQ*iku_v_2D_norm(q%pt(iq,:3))
+   enddo
+   if(use_PH_DbGd) then
+      do iq=1,q_DbGd%nibz
+         EXC_E_DbGd(:,iq)=EXC_E_DbGd(:,iq)+alphaQ*iku_v_2D_norm(q_DbGd%pt(iq,:3))
+      enddo
+       min_EXC_E_DbGd=minval(EXC_E_DbGd(1,:))
+       min_pos_E_DbGd=minloc(EXC_E_DbGd(1,:))
+       call c2a(v_in=q_DbGd%pt(min_pos_E_DbGd(1),:),v_out=v_tmp,mode="ki2a")
+       call msg('rs','NEW!!! Minimum excitonic energy with DbGd: ',min_EXC_E_DbGd*HA2EV,'eV')
+       write(q_point,'(3f12.6,a)') v_tmp(:),' [rlu] '
+       call msg('rs','NEW!!!! Minimum q-point in the DbGrid : '//q_point)
+   endif
+ endif
+ !
+ call parser('SatFromHPert',l_sat_from_Hpert)
+ if (l_sat_from_Hpert) then
+   !
+   ! This option only computes phonon-assisted emission satellites
+   ! using perturbation-theory formula from excitonic Hamiltonian
+   ! (i.e. |sum_in BS_R (in) G (in,out)|^2 instead of sum_in BS_R(in)**2! G(in,out)**2 )
+   !
+   call warning(' Calculating only modified satellites for PL')
+   call EXCPH_X_phass_from_Hpert(wv,q,PL_ii)
+   !
+ else
+   !
+   ! Calculate X(w) including phonon-assisted transitions as
+   ! derived from exciton-phonon self-energy
+   !
+   call EXCPH_X_phass(wv,q,Epsilon_ii,PL_ii)
+   !
+ endif
+ !
+! call X_setup_file_names(1,'bse ','phass',dbgd_string) 
+ !
+ call print_output()
+ !
+ ! Deallocations
+ !
+ call RESIDUALS_and_EIGENVALUES_free()
+ call Sat_free()
+ call BSS_free()
+ YAMBO_FREE(Epsilon_ii)
+ YAMBO_FREE(PL_ii)
+ !
+ call FREQUENCIES_reset(wv,"all")
+ !
+ contains
+   !
+   subroutine print_output()
+!     use X_output,      ONLY:X_write_messages_before_headers,X_write_q_plus_G
+     implicit none
+     !
+     character(schlen) :: eps_file_name,pl_file_name
+     !
+     eps_file_name='eps_bse_ph_ass'
+     pl_file_name ='pl_bse_ph_ass'
+     if(use_PH_DbGd) then
+       eps_file_name='eps_bse_ph_ass_dbgd'
+       pl_file_name ='pl_bse_ph_ass_dbgd'
+     endif
+     !
+     call of_open_close(eps_file_name,'ot')
+     call of_open_close(pl_file_name,'ot')
+     !
+!     call X_write_messages_before_headers(1,.FALSE.,.FALSE.,'r ')
+     !
+!     call X_write_q_plus_G(1,(/0._SP,0._SP,0._SP/),1)
+     !
+     call msg('o eps_bse pl_bse','# EXCITON PHONON')    
+     call msg('o eps_bse pl_bse','# Exciton_states    :',EXCPH_states,INDENT=0)    
+     call msg('o eps_bse pl_bse','# EXciton sum       :',EXCPH_sum,INDENT=0)    
+     if(use_PH_DbGd) then
+       call msg('o eps_bse pl_bse','# Double-grid for satellites   :',l_DbGd_WEIGHTs,INDENT=0)    
+       call msg('o eps_bse pl_bse','# Double-grid only for phonons :',l_DbGd_PH_only,INDENT=0)    
+     endif
+     call msg('o eps_bse pl_bse','#')
+     headers(1)='E[eV]'
+     headers(2:n_output)=(/'Im(eps_ph )','Re(eps_ph )','Im(eps_nos)','Re(eps_nos)','Re(eps_sat)','Im(eps_sat)'/)
+     call msg('o eps_bse','#',headers,INDENT=0,USE_TABS=.TRUE.)
+     !
+     headers(2:n_output)=(/'Im(pl_ph )','Re(pl_ph )','Im(pl_nos)','Re(pl_nos)','Re(pl_sat)','Im(pl_sat)'/)
+     call msg('o pl_bse','#',headers,INDENT=0,USE_TABS=.TRUE.)
+     call msg('o eps_bse pl_bse','#')
+     !
+     ! Write output on file
+     !
+     do iw=1,wv%n_freqs
+       !
+       ! Eps
+       !
+       output_data=(/real(wv%p(iw))*HA2EV,aimag(Epsilon_ii(iw,1)),real(Epsilon_ii(iw,1)), &
+&             aimag(Epsilon_ii(iw,2)),real(Epsilon_ii(iw,2)),aimag(Epsilon_ii(iw,3)),real(Epsilon_ii(iw,3))/)
+       call msg('o eps_bse','',output_data,INDENT=-2,USE_TABS=.true.)
+       !
+       ! Luminescence
+       !
+       output_data=(/real(wv%p(iw))*HA2EV,aimag(PL_ii(iw,1)),real(PL_ii(iw,1)),          &
+&               aimag(PL_ii(iw,2)),real(PL_ii(iw,2)),aimag(PL_ii(iw,3)),real(PL_ii(iw,3))/)
+       call msg('o pl_bse','',output_data,INDENT=-2,USE_TABS=.true.)
+       !
+     enddo
+     !
+     ! Close files
+     !
+     call of_open_close(eps_file_name)
+     call of_open_close(pl_file_name)
+     !
+   end subroutine print_output
+   !
+end subroutine
diff --git a/src/exc-ph/EXC_dipole.F b/src/exc-ph/EXC_dipole.F
new file mode 100644
index 0000000000..4f9038e5fe
--- /dev/null
+++ b/src/exc-ph/EXC_dipole.F
@@ -0,0 +1,113 @@
+!
+! License-Identifier: GPL
+!
+! Copyright (C) 2021 the YAMBO team
+!
+! Authors (see AUTHORS file for details): AM FP AC DS
+!
+! headers
+!
+#include<y_memory.h>
+!
+!> @brief Calculate exciton-dipole
+!!
+subroutine EXC_dipole(Xk,E,q,X)
+ !
+ use pars,           ONLY:pi,SP,rZERO
+ use com,            ONLY:msg
+ use stderr,         ONLY:intc
+ use R_lattice,      ONLY:d3k_factor,bare_qpg,bz_samp
+ use DIPOLES,        ONLY:DIPOLE_t,DIPOLES_reset
+ use electrons,      ONLY:levels,spin_occ
+ use EXCPH,          ONLY:EXC_q0,BS_R_left,BS_R_right,BS_R,EXCPH_states,BS_E_in
+ use vec_operate,    ONLY:v_norm
+ use X_m,            ONLY:X_t
+ use BS,             ONLY:BS_bands,BS_T_grp,BSE_prop,BS_dip_size,BS_nT_grps
+ use BS_solvers,     ONLY:BS_mat,BS_H_dim,BSS_n_eig,BSS_Vnl_included,BSS_alloc,BSS_free
+ use parallel_int,   ONLY:PARALLEL_global_indexes,PP_redux_wait,PARALLEL_index,PARALLEL_WF_distribute,PARALLEL_WF_index
+ use parallel_m,     ONLY:PAR_IND_WF_b_and_k
+ use y_memory_alloc
+ !
+ implicit none
+ !
+ type(levels),  intent(in) :: E
+ type(bz_samp), intent(in) :: Xk,q
+ type(X_t),     intent(in) :: X
+ !
+ ! Work Space
+ !
+ integer :: i_alpha,i_BS_mat,VB(2),CB(2)
+ type(DIPOLE_t) :: DIPs
+ !
+ if(any(abs(EXC_q0(:))>epsilon(1.))) then
+   !
+   call DIPOLES_reset(DIPs)
+   !
+   DIPs%q0=EXC_q0/v_norm(EXC_q0)
+   call msg('rs','Excitonic dipole along the direction : ', DIPs%q0)
+   !
+   DIPs%ib          =BS_bands(1:2)
+   DIPs%ib_lim(1)   =E%nbf(1)
+   DIPs%ib_lim(2)   =E%nbf(1)+1
+   !   
+   !  Unfortunatelly BSS_Vnl_included and DIPs%computed 
+   !  are not saved by the BS_solves I assume .TRUE. and 'R V P'
+   !
+   DIPs%Vnl_included=.TRUE. 
+   DIPs%computed    ='R V P'
+   !
+   call msg('s','Dipole bands range : ',BS_bands)
+   call msg('s','Dipole Vnl included:  ',BSS_Vnl_included)
+   !
+   ! ... PARALLEL setup
+   !
+   call PARALLEL_global_indexes(E,Xk,q," ",RESET=.TRUE.)
+   call PARALLEL_global_indexes(E,Xk,q,"Response_T_space",X=X)
+   !
+   ! ... EH space (no parallelization)
+   call K_Transitions_dimensions(1,E,Xk,DIPs,VB,CB)
+   !
+   !... Parallel distribution
+   call PARALLEL_global_Response_T_transitions(Xk)
+   !
+   ! ... EH space (with parallelization) and Transition groups build-up
+   call K_Transitions_setup(1,E,Xk,DIPs,VB,CB)
+   !
+   !... Blocks build-up
+   call K_blocks()
+   !
+   ! Fill BSS_dipole_opt to be done
+   call K_dipoles(1,E,Xk,DIPs)
+   !
+   ! I deallocate BSS_eh_table to force the code
+   ! to reconstruct all elements
+   ! 
+   call BSS_free() 
+   !
+   call K_components_folded_in_serial_arrays(1)
+   !
+   ! Recalculate the residuals
+   !
+   i_BS_mat=1
+   call K_diago_R_res("opt1",i_BS_mat,BS_E_in,BSS_n_eig,0,BS_R_right,BS_mat)
+   BS_R_left =conjg(BS_R_right)
+   !
+ else
+   !      
+   call msg('rs','Use dipole of the Lin') 
+   !
+ endif
+ !
+ ! Calculate the full residual
+ !
+ BS_R=rZERO
+ do i_alpha=EXCPH_states(1),EXCPH_states(2)
+   BS_R(i_alpha)=real(abs(BS_R_left(i_alpha)*BS_R_right(i_alpha)),SP)
+ enddo
+ !
+ ! This formula can be wrong in presence of the 2D-cutoff
+ BS_R=BS_R*real(spin_occ,SP)/(2._SP*pi)**3*d3k_factor*4._SP*pi/bare_qpg(1,1)**2
+ !
+ call PARALLEL_global_indexes(E,Xk,q,"Response_T_space",X=X,RESET=.TRUE.)
+ !
+end subroutine
diff --git a/src/exc-ph/EXC_occupations.F b/src/exc-ph/EXC_occupations.F
new file mode 100644
index 0000000000..e0b4423717
--- /dev/null
+++ b/src/exc-ph/EXC_occupations.F
@@ -0,0 +1,81 @@
+!
+!        Copyright (C) 2000-2022 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): CA
+!
+! headers
+!
+#include<y_memory.h>
+!
+! This file is distributed under the terms of the GNU
+! General Public License. You can redistribute it and/or
+! modify it under the terms of the GNU General Public
+! License as published by the Free Software Foundation;
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will
+! be useful, but WITHOUT ANY WARRANTY; without even the
+! implied warranty of MERCHANTABILITY or FITNESS FOR A
+! PARTICULAR PURPOSE.  See the GNU General Public License
+! for more details.
+!
+! You should have received a copy of the GNU General Public
+! License along with this program; if not, write to the Free
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston,
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+! Excitonic occupation, following PRB 99, 081109(R) 2019
+!
+subroutine EXC_occupations(E_QF,k,q,iq_ibz,EXC_n_states,EXC_indexes,EXC_occ)
+ !
+ use pars,           ONLY:SP,cZERO,rZERO,cONE
+ use electrons,      ONLY:levels,spin_occ
+ use R_lattice,      ONLY:qindx_C
+ use R_lattice,      ONLY:bz_samp
+ use BS,             ONLY:BS_H_dim,BS_bands
+ use BS_solvers,     ONLY:BSS_eh_table,BS_mat
+ !
+ use y_memory_alloc
+ !
+ implicit none
+ !
+ type(levels), intent(in)  :: E_QF
+ type(bz_samp),intent(in)  :: k,q
+ integer,      intent(in)  :: EXC_n_states,iq_ibz
+ integer,      intent(in)  :: EXC_indexes(EXC_n_states)
+ real(SP),     intent(out) :: EXC_occ(EXC_n_states)
+ !
+ ! Work space
+ !
+ integer                   :: i_H,iv,ic,i_l,i_c,ik_bz,ik_ibz,ik_minus_iq_bz,ik_minus_iq_ibz,iq_bz
+ real(SP)                  :: el_occ
+ !
+ EXC_occ=rZERO
+ !
+ do i_H=1,BS_H_dim
+   !
+   ! K = (ik_bz,iv,ic)
+   !
+   ik_bz  = BSS_eh_table(i_H,1)
+   ik_ibz = k%sstar(ik_bz,1)
+   !
+   iv    = BSS_eh_table(i_H,2)
+   ic    = BSS_eh_table(i_H,3)
+   !
+   ! In the BSE at finite q we connect (v, k-q) ==> (c, k)
+   !
+   iq_bz = q%k_table(iq_ibz,1)
+   ik_minus_iq_bz  = qindx_C(ik_bz,iq_bz,1)      ! K_bz-Q_bz
+   ik_minus_iq_ibz = k%sstar(ik_minus_iq_bz,1)
+   !
+   el_occ=E_qf%f(ic,ik_ibz,1)*(spin_occ-E_qf%f(iv,ik_minus_iq_ibz,1))
+   !
+   do i_c=1,EXC_n_states
+      i_l=EXC_indexes(i_c)
+      EXC_occ(i_c)=EXC_occ(i_c)+abs(BS_mat(i_H,i_l))**2*el_occ
+   enddo
+   !
+ enddo
+ !
+end subroutine
diff --git a/src/exc-ph/Satellite_DbGrid.F b/src/exc-ph/Satellite_DbGrid.F
new file mode 100644
index 0000000000..4662b67db3
--- /dev/null
+++ b/src/exc-ph/Satellite_DbGrid.F
@@ -0,0 +1,184 @@
+!
+!        Copyright (C) 2000-2021 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): AC FP
+!
+! headers
+!
+#include<y_memory.h>
+!
+! This file is distributed under the terms of the GNU
+! General Public License. You can redistribute it and/or
+! modify it under the terms of the GNU General Public
+! License as published by the Free Software Foundation;
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will
+! be useful, but WITHOUT ANY WARRANTY; without even the
+! implied warranty of MERCHANTABILITY or FITNESS FOR A
+! PARTICULAR PURPOSE.  See the GNU General Public License
+! for more details.
+!
+! You should have received a copy of the GNU General Public
+! License along with this program; if not, write to the Free
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston,
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+!> @brief Calculate the phonon-assisted satellite for absorption and emission
+!         using double-grid
+!
+! @param[in]       iq_bz               q-point
+! @param[in]       il                  phonon branch
+! @param[in]       i_alpha             incoming exciton
+! @param[in]       i_beta              outgoing exciton
+! @param[in]       W                   photon energy and damping factor
+!
+! @param[out]      Sat_absorption      Satellite in absorption
+! @param[out]      Sat_emission        Satellite in emission
+!!
+subroutine Satellite_DbGrid(iq_bz,q,il,i_alpha,i_beta,W,Sat_absorption,Sat_emission)
+ !
+ use frequency,     ONLY:w_samp
+ use R_lattice,     ONLY:bz_samp
+ use units,         ONLY:HA2EV
+ use electrons,     ONLY:nel_cond
+ use pars,          ONLY:SP,cZERO,rZERO
+ use EXCPH,         ONLY:BS_all_E,BS_E_in,min_EXC_E_DbGd,EXC_E_DbGd,l_DbGd_PH_only,BS_Sat_E_PH_abs, &
+&                        BS_Sat_WEIGHT,BS_Sat_WEIGHT_PH_abs,min_EXC_E,l_no_matrix_elements,         &
+&                        EXC_occ,EXC_occ_DbGd                         
+ use ELPH,          ONLY:PH_freqs,FAN_deltaE_treshold,PH_freqs_sq,Q_io_map
+ use functions,     ONLY:bose_f,boltzman_f
+ !
+ use y_memory_alloc
+ !
+ implicit none
+ !
+ integer,       intent(in)  :: il,iq_bz,i_alpha,i_beta
+ complex(SP),   intent(in)  :: W
+ complex(SP),   intent(out) :: Sat_absorption, Sat_emission
+ type(bz_samp), intent(in)  :: q
+ !
+ ! Work Space
+ !
+ real(SP)    :: PH_E,E_alpha,boltzm_factor,weight_dbgd,E_beta,BS_ave_W,BS_ave_W_PH_abs,bose_factor
+ complex(SP) :: BS_Sat_E_DbGd,BS_Sat_E_DbGd_PH_abs
+ integer     :: nq_around,iq_fg,iE_fg,iq_ibz,iq_db
+ complex(SP) :: pole
+ real(SP), external :: get_ph_E_DbGd
+ !
+ !
+ BS_ave_W       =BS_Sat_WEIGHT(il,i_beta,i_alpha)
+ BS_ave_W_PH_abs=BS_Sat_WEIGHT_PH_abs(il,i_beta,i_alpha)
+ !
+ if(l_no_matrix_elements) then
+   BS_ave_W       =1._SP
+   BS_ave_W_PH_abs=1._SP
+ endif
+ !
+ Sat_absorption=cZERO
+ Sat_emission  =cZERO
+ !
+ E_alpha =BS_E_in(i_alpha)
+ !
+ weight_dbgd=rZERO
+ !
+ do iq_fg=q%FGbz%k_range(iq_bz,1),q%FGbz%k_range(iq_bz,2),1
+   !
+   if(iq_fg==q%FGbz%k_range(iq_bz,1)) then
+     !      
+     iq_db=Q_io_map(iq_bz)
+     ph_E=sqrt(abs(PH_freqs_sq(iq_db,il)))      
+     !
+     ! If there is the double-grid I read the gamma phonons from there
+     ! because they include the LO-TO splitting and the acustic sum rule
+     !
+     if(iq_bz==1) ph_E=PH_freqs%FG%E(il,1,1) 
+     !
+   else
+     iE_fg=q%FGbz%E_map(iq_fg)                      ! Map w(q) from BZ to IBZ
+     ph_E =PH_freqs%FG%E(il,iE_fg,1)                ! w(q_db) phonon energy in the D-Grid
+   endif
+   !
+   if(ph_E<FAN_deltaE_treshold) cycle
+   !
+   weight_dbgd=weight_dbgd+1._SP
+   !
+   bose_factor=bose_f(ph_E)
+   !
+   if(l_DbGd_PH_only) then
+     !      
+     ! Excitonic energies in the course grid
+     !
+     iq_ibz = q%sstar(iq_bz,1)
+     E_beta  =BS_all_E(i_beta,iq_ibz)
+     !
+     if(nel_cond/=rZERO) then
+       boltzm_factor=EXC_occ(i_beta,iq_ibz)
+     else
+       boltzm_factor=boltzman_f(E_beta-min_EXC_E)
+     endif
+     !
+   else
+     !      
+     ! Excitonic energies in the fine-grid
+     !
+     E_beta  =EXC_E_DbGd(i_beta,iq_fg)
+     !
+     ! Boltzman factor for the luminescence
+     !
+     if(nel_cond/=rZERO) then
+       boltzm_factor=abs(EXC_occ_DbGd(i_beta,iq_fg)) ! Fourier interpolation can generate negative occupations I take abs()
+     else
+       boltzm_factor=boltzman_f(E_beta-min_EXC_E_DbGd)
+     endif
+     !
+   endif
+   !
+   ! Evaluate BS_Sat energies in the double-grid
+   !
+   BS_Sat_E_DbGd       =E_beta-BS_E_in(i_alpha)+ph_E
+   BS_Sat_E_DbGd_PH_abs=E_beta-BS_E_in(i_alpha)-ph_E
+   !
+   pole=E_alpha+BS_Sat_E_DbGd
+   !
+   ! Bose factor already included in BS_Sat_W_DG and BS_Sat_W_DG_PH_abs
+   !
+   ! Phonon emission in absorption
+   !
+   Sat_absorption=Sat_absorption+BS_ave_W/(W-pole)*(1._SP+bose_factor)
+   !
+   ! Phonon emission in luminescence
+   !
+   Sat_emission  =Sat_emission  +BS_ave_W/(W-pole)*bose_factor*boltzm_factor
+   !
+   pole=E_alpha+BS_Sat_E_DbGd_PH_abs
+   !
+   ! Phonon absorption in absorption
+   !
+   Sat_absorption=Sat_absorption+BS_ave_W_PH_abs/(W-pole)*bose_factor
+   ! 
+   ! Phonon absorption in luminescence
+   !
+   Sat_emission=Sat_emission    +BS_ave_W_PH_abs/(W-pole)*(1._SP+bose_factor)*boltzm_factor
+   !
+ enddo
+ !
+ ! Average on the q%FGbz points around iq_bz including the point calculated in Satellite_Simple
+ !
+ if(weight_dbgd/=rZERO) then
+   !
+   weight_dbgd=1._SP/weight_dbgd 
+   Sat_emission  =Sat_emission*weight_dbgd
+   Sat_absorption=Sat_absorption*weight_dbgd
+   !
+ else
+   !
+   ! Case for acustic phonons close to Gamma
+   !   
+   Sat_emission  =cZERO
+   Sat_absorption=cZERO
+   !
+ endif
+ !
+end subroutine Satellite_DbGrid
diff --git a/src/exc-ph/Satellite_Simple.F b/src/exc-ph/Satellite_Simple.F
new file mode 100644
index 0000000000..f4af3996cb
--- /dev/null
+++ b/src/exc-ph/Satellite_Simple.F
@@ -0,0 +1,111 @@
+!
+!        Copyright (C) 2000-2021 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): AC
+!
+! headers
+!
+#include<y_memory.h>
+!
+! This file is distributed under the terms of the GNU
+! General Public License. You can redistribute it and/or
+! modify it under the terms of the GNU General Public
+! License as published by the Free Software Foundation;
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will
+! be useful, but WITHOUT ANY WARRANTY; without even the
+! implied warranty of MERCHANTABILITY or FITNESS FOR A
+! PARTICULAR PURPOSE.  See the GNU General Public License
+! for more details.
+!
+! You should have received a copy of the GNU General Public
+! License along with this program; if not, write to the Free
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston,
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+!> @brief Calculate the phonon-assisted satellite for absorption and emission
+!         using double-grid
+!
+! @param[in]       iq_bz               q-point in the BZ
+! @param[in]       il                  phonon branch
+! @param[in]       i_alpha             incoming exciton
+! @param[in]       i_beta              outgoing exciton
+! @param[in]       W                   photon energy and damping factor
+!
+! @param[out]      Sat_absorption      Satellite in absorption
+! @param[out]      Sat_emission        Satellite in emission
+!!
+subroutine Satellite_Simple(iq_bz,q,il,i_alpha,i_beta,W,Sat_absorption,Sat_emission)
+ !
+ use frequency,     ONLY:w_samp
+ use units,         ONLY:HA2CMm1
+ use electrons,     ONLY:nel_cond
+ use R_lattice,     ONLY:bz_samp
+ use D_lattice,     ONLY:nsym
+ use pars,          ONLY:SP,cZERO,rZERO
+ use EXCPH,         ONLY:BS_Sat_E,BS_Sat_WEIGHT,BS_all_E,BS_Sat_E_PH_abs,BS_Sat_WEIGHT_PH_abs,BS_E_in, &
+&                        min_EXC_E,DB_Q_map,EXC_occ
+ use functions,     ONLY:bose_f,boltzman_f
+ use units,         ONLY:HA2CMm1
+ use ELPH,          ONLY:PH_freqs_sq,FAN_deltaE_treshold,Q_io_map
+ !
+ use y_memory_alloc
+ !
+ implicit none
+ !
+ integer,       intent(in)  :: il,iq_bz,i_alpha,i_beta
+ complex(SP),   intent(in)  :: W
+ complex(SP),   intent(out) :: Sat_absorption, Sat_emission
+ type(bz_samp)              :: q
+ !
+ ! Work Space
+ !
+ real(SP)    :: PH_E,bose_factor,boltzm_factor
+ complex(SP) :: pole,E_alpha
+ integer     :: iq_db,iq_ibz
+ !
+ Sat_absorption=cZERO
+ Sat_emission  =cZERO
+ !
+ ! Bose function for the phonons
+ !
+ !iq_db=DB_Q_map(iq_bz)
+ iq_db=Q_io_map(iq_bz)
+ ph_E=sqrt(abs(PH_freqs_sq(iq_db,il)))
+ !
+ if(ph_E<FAN_deltaE_treshold) return
+ !
+ bose_factor=bose_f(ph_E)
+ !
+ E_alpha=BS_E_in(i_alpha)
+ !
+ ! Boltzman factor for the luminescence
+ !
+ iq_ibz = q%sstar(iq_bz,1)
+ boltzm_factor=boltzman_f(BS_all_E(i_beta,iq_ibz)-min_EXC_E)
+ !
+ if(nel_cond/=rZERO) boltzm_factor=EXC_occ(i_beta,iq_ibz)
+ !
+ pole=E_alpha+BS_Sat_E(il,i_beta,i_alpha)
+ !
+ ! Phonon emission in absorption [photon absorption / phonon emission]
+ !
+ Sat_absorption=Sat_absorption+BS_Sat_WEIGHT(il,i_beta,i_alpha)/(W-pole)*(1._SP+bose_factor)
+ !
+ ! Phonon absorption in luminescence [photon emission / phonon absorption]
+ !
+ Sat_emission=Sat_emission    +BS_Sat_WEIGHT(il,i_beta,i_alpha)/(W-pole)*bose_factor*boltzm_factor
+ !
+ pole=E_alpha+BS_Sat_E_PH_abs(il,i_beta,i_alpha)
+ !
+ ! Phonon absorption in absorption [photon absorption / phonon absorption]
+ !
+ Sat_absorption=Sat_absorption+BS_Sat_WEIGHT_PH_abs(il,i_beta,i_alpha)/(W-pole)*bose_factor 
+ ! 
+ ! Phonon emission in luminescence [photon emission / phonon emission]
+ !
+ Sat_emission=Sat_emission   +BS_Sat_WEIGHT_PH_abs(il,i_beta,i_alpha)/(W-pole)*(1._SP+bose_factor)*boltzm_factor
+ !
+end subroutine Satellite_Simple
diff --git a/src/exc-ph/YPP_ELPH_project.dep b/src/exc-ph/YPP_ELPH_project.dep
new file mode 100644
index 0000000000..d828f38222
--- /dev/null
+++ b/src/exc-ph/YPP_ELPH_project.dep
@@ -0,0 +1,2 @@
+ EXC_occupations.o
+
diff --git a/src/hamiltonian/Apply_W_field.F b/src/hamiltonian/Apply_W_field.F
index 782d6df894..a8ced57be1 100644
--- a/src/hamiltonian/Apply_W_field.F
+++ b/src/hamiltonian/Apply_W_field.F
@@ -9,7 +9,7 @@ subroutine Apply_W_field(A_input,Wk,H_nl_sc)
  !
  use pars,           ONLY:SP,cZERO
  use units,          ONLY:SPEED_OF_LIGHT
- use hamiltonian,    ONLY:H_ref_bands
+ use hamiltonian,    ONLY:H_ref_bands,H_ref_bands_frozen
  use fields,         ONLY:gauge_field
  !
  implicit none
@@ -20,16 +20,19 @@ subroutine Apply_W_field(A_input,Wk,H_nl_sc)
  !
  ! Work Space
  !
- integer :: i1,id
+ integer :: i1,i2,id
  complex(SP) :: E_vec_pot(3)
  !
  E_vec_pot=-A_input%vecpot_vel/SPEED_OF_LIGHT
  !
- !$omp parallel do default(shared), private(i1,id)
+ !$omp parallel do default(shared), private(i1,i2,id)
  do i1=H_ref_bands(1),H_ref_bands(2)
-   do id=1,3
-     H_nl_sc(i1,H_ref_bands(1):H_ref_bands(2))=H_nl_sc(i1,H_ref_bands(1):H_ref_bands(2))-         &
-     &                                               E_vec_pot(id)*Wk(id,i1,H_ref_bands(1):H_ref_bands(2))
+   if ( H_ref_bands_frozen(i1)==1 ) cycle
+   do i2=H_ref_bands(1),H_ref_bands(2)
+     if ( H_ref_bands_frozen(i2)==1 ) cycle
+     do id=1,3
+       H_nl_sc(i1,i2)=H_nl_sc(i1,i2)+E_vec_pot(id)*Wk(id,i1,i2)
+     enddo
    enddo
  enddo
  !$omp end parallel do
diff --git a/src/hamiltonian/Bare_Hamiltonian.F b/src/hamiltonian/Bare_Hamiltonian.F
index bea95ad214..fc8f172984 100644
--- a/src/hamiltonian/Bare_Hamiltonian.F
+++ b/src/hamiltonian/Bare_Hamiltonian.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): DV DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine Bare_Hamiltonian(E,Xk,k)
  !
  use pars,           ONLY:cZERO
@@ -30,7 +34,9 @@ subroutine Bare_Hamiltonian(E,Xk,k)
 #if defined _SC
  use drivers,        ONLY:l_sc_magnetic
 #endif
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  type(levels)             :: E
  type(bz_samp)            :: Xk,k
diff --git a/src/hamiltonian/Berry_polarization_NEQ.F b/src/hamiltonian/Berry_polarization_NEQ.F
index a70e42f3d8..cb1c6884d8 100644
--- a/src/hamiltonian/Berry_polarization_NEQ.F
+++ b/src/hamiltonian/Berry_polarization_NEQ.F
@@ -13,7 +13,7 @@ subroutine Berry_polarization_NEQ(Xen,Xk,i_sp_pol,V_bands,Polarization)
  use R_lattice,      ONLY:bz_samp,k_map,nXkbz
  use hamiltonian,    ONLY:H_ref_bands
  use QP_m,           ONLY:QP_nk
- use electric,       ONLY:All_S_det,Sm1_plus,Sm1_minus,l_res_symms
+ use electric,       ONLY:All_S_det,Sm1_plus,Sm1_minus,l_no_res_symms
  use parallel_m,     ONLY:PAR_IND_Xk_bz,PAR_COM_Xk_ibz_INDEX
  use parallel_int,   ONLY:PP_redux_wait
 #if defined _TIMING
@@ -37,9 +37,11 @@ subroutine Berry_polarization_NEQ(Xen,Xk,i_sp_pol,V_bands,Polarization)
  call timing('NL Berry Pol NEQ',OPR='start')
 #endif
  !
+!$OMP WORKSHARE  
  Sm1_minus=cZERO
  Sm1_plus =cZERO
  All_S_det=cZERO
+!$OMP END WORKSHARE  
  !
  if (i_sp_pol==1) Polarization=cZERO
  !
@@ -72,7 +74,7 @@ subroutine Berry_polarization_NEQ(Xen,Xk,i_sp_pol,V_bands,Polarization)
  !
  ! Apply residual symmetries
  !
- if(nsym>1.and.l_res_symms) then
+ if(nsym/=1.and..not.l_no_res_symms) then
    Pol_symm=cZERO
    do is=1,nsym/(i_time_rev+1) ! Time reversal is not present 
      Pol_symm(:)=Pol_symm(:)+matmul(dl_sop(:,:,is),Polarization)
diff --git a/src/hamiltonian/Build_Overlaps_det_NEQ.F b/src/hamiltonian/Build_Overlaps_det_NEQ.F
index 746f19a003..17113e8e35 100644
--- a/src/hamiltonian/Build_Overlaps_det_NEQ.F
+++ b/src/hamiltonian/Build_Overlaps_det_NEQ.F
@@ -126,6 +126,8 @@ subroutine Build_overlaps_det_NEQ(en,Xk,i_sp,ikbz,V_bands,Sm1_plus,Sm1_minus,S_d
        !
        ! Matrix core
        !
+       ! Sm1(k,k-dk)_{n,m} =sum_{i,j} V^\dag(k)_(n,i) S^\dag(k-dk,k)_{i,j} V(k-dk)_(j,n)
+       !
        !$omp parallel do default(shared), private(i2,vtmp)
        do i2=H_ref_bands(1),en%nbf(i_sp)
          call M_by_V('C',H_ref_bands(2),DIP_S(:,:,id+(istep-1)*3,ikm,i_sp),V_bands(:,i2,ikm_ibz),vtmp)
diff --git a/src/hamiltonian/Build_W_operator.F b/src/hamiltonian/Build_W_operator.F
index 402527cd41..4b358c0a78 100644
--- a/src/hamiltonian/Build_W_operator.F
+++ b/src/hamiltonian/Build_W_operator.F
@@ -34,13 +34,16 @@ subroutine Build_W_operator(en,Xk,ik,i_sp,V_bands,Wk)
  !
  integer :: i1,i2,im,id,id_red
  integer :: ikbz,istep,max_step
+ !
  complex(SP) :: Wop(H_ref_bands(1):H_ref_bands(2),H_ref_bands(1):H_ref_bands(2),2)
  complex(SP) :: V_tilde(en%nbf(i_sp),H_ref_bands(1):H_ref_bands(2),2)
  complex(SP) :: delta_V_tilde(en%nbf(i_sp),H_ref_bands(1):H_ref_bands(2))
  complex(SP) :: V_tmp(H_ref_bands(1):H_ref_bands(2),en%nbf(i_sp))
- complex(SP) :: E_vec_pot(3),alpha
+ complex(SP) :: alpha
  !
+!$OMP PARALLEL WORKSHARE  
  Wk=cZERO
+!$OMP END PARALLEL WORKSHARE  
  !
  ikbz=Xk%k_table(ik,1)
  !
@@ -83,7 +86,9 @@ subroutine Build_W_operator(en,Xk,ik,i_sp,V_bands,Wk)
      ! <\phi_i | e^{iGr} | \phi_j >
      ! It works in principle but it requires big supercells
      !
+!$OMP PARALLEL WORKSHARE  
      Wop(:,:,:)=cZERO !DIP_S(H_ref_bands(1):,H_ref_bands(1):,id_red,ik,i_sp)*cI/(4._SP*pi)   
+!$OMP END PARALLEL WORKSHARE  
      !
    endif    
    !
diff --git a/src/hamiltonian/Build_tilde_vbands.F b/src/hamiltonian/Build_tilde_vbands.F
index 736d581932..1ee9160b77 100644
--- a/src/hamiltonian/Build_tilde_vbands.F
+++ b/src/hamiltonian/Build_tilde_vbands.F
@@ -5,6 +5,9 @@
 !
 ! Authors (see AUTHORS file for details): MG CA
 !
+! V_tilde(:,:,1) is calculated at  q+delta
+! V_tilde(:,:,2) is calculated at  q-delta
+!      
 subroutine Build_tilde_vbands(en,Xk,ikbz,i_sp,istep,id_red,V_bands,V_tilde)
  !
  ! Build the \tilde Valence bands as defined in Eq. 53 of
@@ -37,6 +40,8 @@ subroutine Build_tilde_vbands(en,Xk,ikbz,i_sp,istep,id_red,V_bands,V_tilde)
  complex(SP) :: tmp_Vp(H_ref_bands(1):H_ref_bands(2),en%nbf(i_sp)),tmp_Vm(H_ref_bands(1):H_ref_bands(2),en%nbf(i_sp))
  !
  V_tilde=cZERO
+ tmp_Vm =cZERO
+ tmp_Vp =cZERO
  nbf=en%nbf(i_sp)
  !
  if(k_map%max_kdir(id_red)==1) return
@@ -72,11 +77,11 @@ subroutine Build_tilde_vbands(en,Xk,ikbz,i_sp,istep,id_red,V_bands,V_tilde)
  !
  call M_by_M_omp('N','N',H_ref_nbands,nbf,H_ref_bands(2),cONE,  &
 &  DIP_S(H_ref_bands(1):,:,id_red+(istep-1)*3,ikbz,i_sp),H_ref_nbands, &
-&  V_bands(:,:nbf,ikp_ibz),H_ref_bands(2),cZERO,tmp_Vp(H_ref_bands(1):,:en%nbf(i_sp)),H_ref_nbands)
+&  V_bands(:,:nbf,ikp_ibz),H_ref_bands(2),cZERO,tmp_Vp(H_ref_bands(1):,:nbf),H_ref_nbands)
  !
  call M_by_M_omp('C','N',H_ref_nbands,nbf,H_ref_bands(2),cONE,  &
 &  DIP_S(:,H_ref_bands(1):,id_red+(istep-1)*3,ikm ,i_sp),H_ref_bands(2), &
-&  V_bands(:,:nbf,ikm_ibz),H_ref_bands(2),cZERO,tmp_Vm(H_ref_bands(1):,:en%nbf(i_sp)),H_ref_nbands)
+&  V_bands(:,:nbf,ikm_ibz),H_ref_bands(2),cZERO,tmp_Vm(H_ref_bands(1):,:nbf),H_ref_nbands)
  !
  ! Eq. (53) in PRB 69, 085106 (2004), derived in appendix (C), see eq. C(11)
  !
diff --git a/src/hamiltonian/MAG_A_phase.F b/src/hamiltonian/MAG_A_phase.F
index 4e9548a6af..841bd2a96e 100644
--- a/src/hamiltonian/MAG_A_phase.F
+++ b/src/hamiltonian/MAG_A_phase.F
@@ -41,8 +41,8 @@ subroutine MAG_A_phase()
  call WF_load(WF,0,1,(/ib_ref,ib_ref/),(/ik_ref,ik_ref/),(/is_ref,is_ref/),title='-MAG_GAUGE',impose_free_and_alloc=.TRUE.)
  !
  iwf=WF%index(ib_ref,ik_ref,is_ref)
- wf_r2(:)=r2_cyl(:)*WF%c(:,is_ref,iwf)
- r2_ref=real(Vstar_dot_V(fft_size,WF%c(:,is_ref,iwf),wf_r2))
+ wf_r2(:)=r2_cyl(:)*WF%r(:,1,iwf)
+ r2_ref=real(Vstar_dot_V(fft_size,cmplx(WF%r(:,1,iwf),kind=SP),wf_r2))
  !
  n_loops=int(r2_ref*Bz/(2*SPEED_OF_LIGHT)+0.5)
  !
diff --git a/src/hamiltonian/MAG_Hamiltonian.F b/src/hamiltonian/MAG_Hamiltonian.F
index 17f0d2cfa2..5e34dc3f4b 100644
--- a/src/hamiltonian/MAG_Hamiltonian.F
+++ b/src/hamiltonian/MAG_Hamiltonian.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): DS AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine MAG_Hamiltonian()
  !
  ! WARNING: you need to put you system at the corner of the supercell <---
@@ -54,8 +58,9 @@ subroutine MAG_Hamiltonian()
  use wrapper,        ONLY:Vstar_dot_V
  use parallel_m,     ONLY:PAR_IND_QP,HEAD_QP_cpu
  use QP_m,           ONLY:QP_table,QP_n_states
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  ! Dummies
  !
@@ -194,16 +199,16 @@ subroutine MAG_Hamiltonian()
              if(H_magn_x_on) H_tmp(:,j_spinor,j_spinor)= H_paramagn_x(:)*wf_x(:,j_spinor,ip_wf)
              if(H_magn_y_on) H_tmp(:,j_spinor,j_spinor)= H_tmp(:,j_spinor,j_spinor)+H_paramagn_y(:)*wf_y(:,j_spinor,ip_wf)
              if(H_magn_z_on) H_tmp(:,j_spinor,j_spinor)= H_tmp(:,j_spinor,j_spinor)+H_paramagn_z(:)*wf_z(:,j_spinor,ip_wf)
-             H_tmp(:,j_spinor,j_spinor)=H_tmp(:,j_spinor,j_spinor)+H_diamagn(:)*WF%c(:,j_spinor,ip_wf)
+             H_tmp(:,j_spinor,j_spinor)=H_tmp(:,j_spinor,j_spinor)+H_diamagn(:)*WF%r(:,j_spinor,ip_wf)
            endif
            !
            do i_spinor=1,n_spinor
              !
              if (MAG_pauli) H_tmp(:,i_spinor,j_spinor)= H_tmp(:,i_spinor,j_spinor) &
-             &                                          +H_pauli(:,i_spinor,j_spinor,i_sp_pol)*WF%c(:,j_spinor,ip_wf)
+             &                                          +H_pauli(:,i_spinor,j_spinor,i_sp_pol)*WF%r(:,j_spinor,ip_wf)
              !
              Hzero(ib,ibp,ik,i_sp_pol)= Hzero(ib,ibp,ik,i_sp_pol)+&
-&              Vstar_dot_V(fft_size,WF%c(:,i_spinor,i_wf),H_tmp(:,i_spinor,j_spinor))
+&              Vstar_dot_V(fft_size,cmplx(WF%r(:,i_spinor,i_wf),kind=SP),H_tmp(:,i_spinor,j_spinor))
              !
            enddo
            !
diff --git a/src/hamiltonian/MAG_common_build_A.F b/src/hamiltonian/MAG_common_build_A.F
index 7ab58c4d79..610afa7411 100644
--- a/src/hamiltonian/MAG_common_build_A.F
+++ b/src/hamiltonian/MAG_common_build_A.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine MAG_common_build_A(build)
  !
  !
@@ -41,7 +45,9 @@ subroutine MAG_common_build_A(build)
 &                         A_magn_x,A_magn_y,A_magn_z,gauge_factors,       &
 &                         phase_trick,phase_radius,n_loops
  use vec_operate,    ONLY:v_norm
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  ! Input variables
  logical   :: build
diff --git a/src/hamiltonian/Pseudo_Hamiltonian.F b/src/hamiltonian/Pseudo_Hamiltonian.F
index 2ccf2169f0..ca42aa244e 100644
--- a/src/hamiltonian/Pseudo_Hamiltonian.F
+++ b/src/hamiltonian/Pseudo_Hamiltonian.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine Pseudo_Hamiltonian(Xk,Xen,l_equilibrium)
  !
  ! This subroutine computes the non local part of the pseudo potential V^nl using the KB form factors.
@@ -45,8 +49,9 @@ subroutine Pseudo_Hamiltonian(Xk,Xen,l_equilibrium)
  use parallel_m,    ONLY:PAR_G_k_range
  use real_time,     ONLY:Ho_plus_sigma,H_pseudo_eq
 #endif
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp) :: Xk
  type(levels)  :: Xen
@@ -173,7 +178,7 @@ subroutine Pseudo_Hamiltonian(Xk,Xen,l_equilibrium)
          !
          if (iwf==0) cycle
          !
-         kbv_ks_space(ib)=V_dot_V(fft_size*n_spinor,WF%c(:,:,iwf),kbv_gauged)
+         kbv_ks_space(ib)=V_dot_V(fft_size*n_spinor,cmplx(WF%r(:,:,iwf),kind=SP),kbv_gauged(:,:))
          !
          do ibp=H_ref_bands(1),ib
            !
diff --git a/src/hamiltonian/Pseudo_KB_FFT.F b/src/hamiltonian/Pseudo_KB_FFT.F
index fe33649c41..86575ce5ec 100644
--- a/src/hamiltonian/Pseudo_KB_FFT.F
+++ b/src/hamiltonian/Pseudo_KB_FFT.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine Pseudo_KB_FFT(kbv,kbv_real_space,kpt,pp_range,npp,i_atom,is,ia,ik)
  !
  use drivers,       ONLY:l_real_time,l_sc_magnetic
@@ -13,8 +17,9 @@ subroutine Pseudo_KB_FFT(kbv,kbv_real_space,kpt,pp_range,npp,i_atom,is,ia,ik)
  use wave_func,     ONLY:wf_ncx,wf_igk,wf_nc_k
  use FFT_m,         ONLY:fft_size,fft_dim,fft_g_table,fft_norm,fftw_plan
  use D_lattice,     ONLY:atom_pos,x_cc,a,n_atoms
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer,     intent(in)  :: npp,pp_range(2),i_atom,is,ia,ik
  complex(SP), intent(in)  :: kbv(wf_ncx,n_spinor,4,npp)
diff --git a/src/hamiltonian/Pseudo_KB_G_to_R_space.F b/src/hamiltonian/Pseudo_KB_G_to_R_space.F
index 4232aebc97..ec3a0c7b52 100644
--- a/src/hamiltonian/Pseudo_KB_G_to_R_space.F
+++ b/src/hamiltonian/Pseudo_KB_G_to_R_space.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine Pseudo_KB_G_to_R_space(Xk,Xen)
  !
  use pars,          ONLY:SP
@@ -22,8 +26,9 @@ subroutine Pseudo_KB_G_to_R_space(Xk,Xen)
  use timing_m,      ONLY:timing
  use parallel_int,  ONLY:PP_wait
  use parallel_m,    ONLY:master_cpu,PAR_IND_Xk_ibz_ID,PAR_IND_Xk_ibz
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp) :: Xk
  type(levels)  :: Xen
@@ -39,7 +44,7 @@ subroutine Pseudo_KB_G_to_R_space(Xk,Xen)
  !
  ! I / O
  !
- integer                 :: IO_ACT_NOW,ID,ID_R,io_err(3),io_KB_pwscf_err,io_KB_abinit_err,io_Vnl_err,io_KB_real_space_err
+ integer                 :: IO_ACT_NOW,ID,ID_R,io_err(2),io_KB_pwscf_err,io_KB_abinit_err,io_KB_real_space_err
  integer, external       :: io_KB_abinit,io_KB_pwscf,io_KB_real_space
  !
  !
@@ -51,7 +56,6 @@ subroutine Pseudo_KB_G_to_R_space(Xk,Xen)
  !
  io_KB_abinit_err= io_err(1)
  io_KB_pwscf_err = io_err(2)
- io_Vnl_err     = io_err(3)
  !
  if(io_KB_abinit_err/=0.and.io_KB_pwscf_err/=0) return
  !
diff --git a/src/hamiltonian/Pseudo_KB_gauge_factor.F b/src/hamiltonian/Pseudo_KB_gauge_factor.F
index e7a5953e61..fc72863f94 100644
--- a/src/hamiltonian/Pseudo_KB_gauge_factor.F
+++ b/src/hamiltonian/Pseudo_KB_gauge_factor.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine Pseudo_KB_gauge_factor(gauge_factor)
  !
  use pars,          ONLY:SP
@@ -20,8 +24,9 @@ subroutine Pseudo_KB_gauge_factor(gauge_factor)
  use drivers,       ONLY:l_real_time
  use fields,        ONLY:A_tot
 #endif
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  complex(SP)             :: gauge_factor(fft_size,n_atoms)
  !
diff --git a/src/hamiltonian/V_Hartree.F b/src/hamiltonian/V_Hartree.F
index 37e5c6c9f8..a3ff103f71 100644
--- a/src/hamiltonian/V_Hartree.F
+++ b/src/hamiltonian/V_Hartree.F
@@ -35,11 +35,11 @@ subroutine V_Hartree(rho,vhr)
  !
  call timing('V_Hartree',OPR='start')
  !
-!$OMP WORKSHARE  
+!$OMP PARALLEL WORKSHARE  
  vhg   = (0._DP,0._DP)
  vhtmp = (0._DP,0._DP)
  rhog  = rho
-!$OMP END WORKSHARE  
+!$OMP END PARALLEL WORKSHARE  
  !
  ! FFT call: rho in reciprocal space
  !
@@ -73,9 +73,9 @@ subroutine V_Hartree(rho,vhr)
  call fft_3d(vhtmp,fft_dim,1)
 #endif
  !
-!$OMP WORKSHARE  
+!$OMP PARALLEL WORKSHARE  
  vhr(:,1) = real(vhtmp,SP)
- !$OMP END WORKSHARE  
+!$OMP END PARALLEL WORKSHARE  
  if(n_spin==2) vhr(:,2)=vhr(:,1)
  !
  call timing('V_Hartree',OPR='stop')
diff --git a/src/hamiltonian/V_qp_basis_to_H.F b/src/hamiltonian/V_qp_basis_to_H.F
index 294f12abe0..18d5ad7b69 100644
--- a/src/hamiltonian/V_qp_basis_to_H.F
+++ b/src/hamiltonian/V_qp_basis_to_H.F
@@ -47,7 +47,7 @@ subroutine V_qp_basis_to_H(ik,i_sp_pol,V,H)
      if (.not.(PAR_IND_QP%element_1D(i1))) cycle
    endif
    if (l_par_RT) then
-     if (.not.PAR_IND_B_mat_ordered%element_1D( UP_matrix_index(ib-H_ref_bands(1)+1,ibp-H_ref_bands(1)+1)-1 )) cycle
+     if (.not.PAR_IND_B_mat_ordered%element_1D( UP_matrix_index(ib-H_ref_bands(1)+1,ibp-H_ref_bands(1)+1) )) cycle
    endif
    !
    ! In the RT simulation the bands QP_table matrix is not ordered
diff --git a/src/hamiltonian/V_real_space_to_H.F b/src/hamiltonian/V_real_space_to_H.F
index d879b7c202..845e9ab01f 100644
--- a/src/hamiltonian/V_real_space_to_H.F
+++ b/src/hamiltonian/V_real_space_to_H.F
@@ -53,8 +53,10 @@ subroutine V_real_space_to_H(ik,i_sp_pol,H,WF,V_kind,V,Vxc)
  V_mat=cZERO
  if(index(V_kind,"xc") >0) call Build_V_xc_mat(Vxc,V_mat)
  if(index(V_kind,"def")>0) then
-   forall(i_spinor=1:n_spinor) V_mat(:,i_sp_pol+i_spinor-1,i_sp_pol+i_spinor-1)= &
-&         V_mat(:,i_sp_pol+i_spinor-1,i_sp_pol+i_spinor-1)+V(:,i_sp_pol+i_spinor-1)
+   do i_spinor=1,n_spinor
+     i_spin=i_sp_pol+i_spinor-1
+     V_mat(:,i_spin,i_spin)=V_mat(:,i_spin,i_spin)+V(:,i_spin)
+   enddo
  endif
  !
  PAR_IND_local=.true.
@@ -88,7 +90,7 @@ subroutine V_real_space_to_H(ik,i_sp_pol,H,WF,V_kind,V,Vxc)
    !
    do ib=H_ref_bands(1),H_ref_bands(2)
      do ibp=ib,H_ref_bands(2)
-       PAR_IND_local(ib,ibp)=PAR_IND_B_mat_ordered%element_1D( UP_matrix_index(ib-H_ref_bands(1)+1,ibp-H_ref_bands(1)+1)-1)
+       PAR_IND_local(ib,ibp)=PAR_IND_B_mat_ordered%element_1D( UP_matrix_index(ib-H_ref_bands(1)+1,ibp-H_ref_bands(1)+1) )
      enddo
    enddo
    !
@@ -112,11 +114,11 @@ subroutine V_real_space_to_H(ik,i_sp_pol,H,WF,V_kind,V,Vxc)
      i_spin=i_sp_pol+i_spinor-1
      do j_spinor=1,n_spinor
        j_spin=i_sp_pol+j_spinor-1
-       c_ws(:,i_spinor)=c_ws(:,i_spinor)+V_mat(:,i_spin,j_spin)*WF%c(:,j_spinor,i_wfp)
+       c_ws(:,i_spinor)=c_ws(:,i_spinor)+V_mat(:,i_spin,j_spin)*WF%r(:,j_spinor,i_wfp)
      enddo
    enddo
    !
-   H(ib,ibp) = H(ib,ibp)+Vstar_dot_V_omp(fft_size*n_spinor,WF%c(:,:,i_wf),c_ws(:,:))
+   H(ib,ibp) = H(ib,ibp)+Vstar_dot_V_omp(fft_size*n_spinor,cmplx(WF%r(:,:,i_wf),kind=SP),c_ws(:,:))
    !
    if(ib==ibp) H(ib,ib) =real(H(ib,ib),SP)
    if(ib/=ibp) H(ibp,ib)=conjg(H(ib,ibp))
diff --git a/src/hamiltonian/Vgrad_real_space_to_H.F b/src/hamiltonian/Vgrad_real_space_to_H.F
index b125dd026c..b072375bcc 100644
--- a/src/hamiltonian/Vgrad_real_space_to_H.F
+++ b/src/hamiltonian/Vgrad_real_space_to_H.F
@@ -43,7 +43,7 @@ subroutine Vgrad_real_space_to_H(ik,H,WF,V_grad)
  do ib=H_ref_bands(1),H_ref_bands(2)
    do ibp=ib,H_ref_bands(2)
    !
-   if (.not.PAR_IND_B_mat_ordered%element_1D( UP_matrix_index(ib-H_ref_bands(1)+1,ibp-H_ref_bands(1)+1)-1 )) cycle
+   if (.not.PAR_IND_B_mat_ordered%element_1D( UP_matrix_index(ib-H_ref_bands(1)+1,ibp-H_ref_bands(1)+1) )) cycle
    !
    ! <i_wf|V|ip_wf>
    !
@@ -66,7 +66,7 @@ subroutine Vgrad_real_space_to_H(ik,H,WF,V_grad)
    enddo
    !
    i_wf = WF%index(ib,ik,1)
-   H(ib,ibp) = 0.25_SP*Vstar_dot_V(fft_size*n_spinor,WF%c(:,:,i_wf),c_ws(:,:))
+   H(ib,ibp) = 0.25_SP*Vstar_dot_V(fft_size*n_spinor,cmplx(WF%r(:,:,i_wf),kind=SP),c_ws(:,:))
    !
    if(ib==ibp) H(ib,ib) = real(H(ib,ib ),SP)
    if(ib/=ibp) H(ibp,ib)=conjg(H(ib,ibp)   )
diff --git a/src/hamiltonian/WF_and_dipole_dimensions.F b/src/hamiltonian/WF_and_dipole_dimensions.F
index 6189135ca2..b4b66f0b6e 100644
--- a/src/hamiltonian/WF_and_dipole_dimensions.F
+++ b/src/hamiltonian/WF_and_dipole_dimensions.F
@@ -13,7 +13,7 @@ subroutine WF_and_dipole_dimensions(X,Xk)
  use X_m,          ONLY:X_t
  use wave_func,    ONLY:wf_ng
  use QP_m,         ONLY:QP_ng_Sx,QP_ng_SH,QP_ng_Vxc,QP_n_G_bands,COHSEX_use_empties
- use R_lattice,    ONLY:qindx_S,G_m_G,bz_samp,qindx_S_max_Go
+ use R_lattice,    ONLY:qindx_S,G_m_G_maxval,bz_samp,qindx_S_max_Go
  use interfaces,   ONLY:eval_G_minus_G
  use hamiltonian,  ONLY:H_ref_bands,WF_Go_indx,WF_G_max,l_sc_XC_is_local
 #if defined _RT || defined _NL
@@ -61,7 +61,7 @@ subroutine WF_and_dipole_dimensions(X,Xk)
      !
      X(2)%ng=eval_G_minus_G(X(2)%ng,0)
      !
-     WF_G_max=max(QP_ng_Sx,maxval(G_m_G))
+     WF_G_max=max(QP_ng_Sx,G_m_G_maxval)
      if (l_real_time.and..not.(l_sc_exx.or.l_sc_srpa)) WF_G_max=wf_ng
      !
      ! [07/07/2016] Modified these two lines to avoid the annoying QP_n_G_bands in the input.
@@ -80,12 +80,19 @@ subroutine WF_and_dipole_dimensions(X,Xk)
    !
  endif
  !
- ! The following lines should be unified moving eval_HARTREE and eval_DFT 
- ! inside the hamoltonian module and using them also with yambo_sc
+ ! Instead of setting a WF_G_max, it would be better to directly load the WFs with the proper cutoff 
  !
 #if defined _RT || defined _NL
+ !
+ ! If I'm running TD-HARTREE with lower cutoff, I likely to not put the Vxc cutoff in input
+ ! The DFT subroutines are called just to evaluate the equilibrium term with QP_ng_Vxc at the defaul (=mximum) 
+ ! since WF_load is done with QP_ng_SH this makes the code crash. This prevents the code from crashing
+ ! 
+ if (eval_HARTREE.and..not.eval_DFT) QP_ng_Vxc=QP_ng_SH
+ !
  if (eval_HARTREE.or.eval_energy.or.l_velocity_gauge_corr.or..not.l_RT_is_WFfree) WF_G_max=max(QP_ng_SH, WF_G_max)
  if (eval_DFT    .or.eval_energy.or.l_velocity_gauge_corr) WF_G_max=max(QP_ng_Vxc,WF_G_max)
+ !
 #endif
  !
 #if defined _SC
diff --git a/src/hamiltonian/XC_potentials.F b/src/hamiltonian/XC_potentials.F
index 772f133d07..9cca704b3c 100644
--- a/src/hamiltonian/XC_potentials.F
+++ b/src/hamiltonian/XC_potentials.F
@@ -16,7 +16,7 @@ subroutine XC_potentials(POTENTIALS_string)
  use xc_functionals, ONLY:XC_LDA_X,XC_LDA_C_PZ,XC_EXCHANGE_CORRELATION,XC_EXCHANGE,&
 &                         GS_xc_KIND,GS_xc_FUNCTIONAL
  use collision_ext,  ONLY:COLLISIONS_have_HARTREE,COLLISIONS_HXC_local,COLLISIONS_HXC_MB,COLLISIONS_HXC_use_TDDFT,&
-&                         COLLISIONS_CV_only
+&                         COLLISIONS_CV_only,COLLISIONS_from_BSE
  use electrons,      ONLY:n_full_bands,n_met_bands
  use stderr,         ONLY:STRING_split,STRING_same
  !
@@ -55,6 +55,11 @@ subroutine XC_potentials(POTENTIALS_string)
      if (.not.index(POTENTIALS_string,'CVONLY')>0) POTENTIALS_string=trim(POTENTIALS_string)//" CVONLY"
    endif
    !
+   if (STRING_same(trim(potentials(i_c)),'KBSE')) then
+    COLLISIONS_from_BSE=.true.
+    if (.not.index(POTENTIALS_string,'KBSE')>0) POTENTIALS_string=trim(POTENTIALS_string)//" KBSE"
+  endif
+  !
    if (STRING_same(trim(potentials(i_c)),'IP')) then
      ! This means Hartree and V_xc are frozed: IP=KS-IP
      l_sc_ip=.true.
@@ -159,6 +164,8 @@ subroutine XC_potentials(POTENTIALS_string)
      call warning("NULL SC potential! ")
    else if (STRING_same(trim(potentials(i_c)),'CVONLY')) then
      call warning("Only CV components included ")
+   else if (STRING_same(trim(potentials(i_c)),'KBSE')) then
+     call warning("Loading BSE kernel instead of computing collisions ")
    else
      call error("Unknown SC potential! ")
    endif
diff --git a/src/interface/.objects b/src/interface/.objects
index 51c0b18547..8d5ea6f74e 100644
--- a/src/interface/.objects
+++ b/src/interface/.objects
@@ -7,5 +7,5 @@ RT_SC_objects= INIT_read_command_line_potentials.o
 #endif
 objs = INIT_C_driver_runlevels.o INIT_load.o INIT.o INIT_read_command_line.o INIT_check_databases.o INIT_activate.o INIT_read_command_line_corr_scatt.o \
        $(RT_SC_objects) \
-       INIT_QP.o INIT_report_and_log_files.o INIT_input_file.o \
+       INIT_QP.o INIT_report_and_log_files.o INIT_input_file.o INIT_fields.o \
        INIT_q_points.o INIT_barriers.o INIT_QP_ctl_load.o INIT_QP_ctl_switch.o $(RT_objects)
diff --git a/src/interface/DOUBLE_project.dep b/src/interface/DOUBLE_project.dep
index de74fe71b9..d5653f7beb 100644
--- a/src/interface/DOUBLE_project.dep
+++ b/src/interface/DOUBLE_project.dep
@@ -9,6 +9,7 @@
  INIT_activate.o
  INIT_barriers.o
  INIT_check_databases.o
+ INIT_fields.o
  INIT_input_file.o
  INIT_load.o
  INIT_q_points.o
diff --git a/src/interface/INIT.F b/src/interface/INIT.F
index cb0631e264..3c097eb435 100644
--- a/src/interface/INIT.F
+++ b/src/interface/INIT.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 integer function INIT(en,q,k,X,Xw,Dip,instr,lnstr,CLOSE_Gs,FINALIZE)
  !
  ! INIT =-2  : Unsupported Runlevel(s) combination
@@ -15,9 +19,10 @@ integer function INIT(en,q,k,X,Xw,Dip,instr,lnstr,CLOSE_Gs,FINALIZE)
  !
  use drivers,        ONLY:infile_editing,l_em1s,l_acfdt,l_HF_and_locXC,l_col_cut,l_alda_fxc,l_bs_fxc,l_optics,l_bse,l_bss,&
 &                         l_chi,l_cohsex,l_life,l_rpa_IP,l_td_hartree,l_tddft,list_dbs,l_td_hf,l_setup,l_sc_run,l_rim,l_rim_w,&
-&                         l_real_time,l_ppa,l_lrc_fxc,l_gw0,l_elel_corr,l_elel_scatt,l_elph_corr,l_td_sex,&
+&                         l_real_time,l_ppa,l_lrc_fxc,l_gw0,l_elel_corr,l_elel_scatt,l_elph_corr,l_td_sex,l_EXCPH_gkkp,&
 &                         l_elph_Hamiltonian,l_elph_scatt,l_elphoton_corr,l_elphoton_scatt,l_em1d,l_eval_collisions,&
-&                         l_sc_magnetic,l_sc_electric,l_dipoles,l_pf_fxc,l_nl_optics,l_mpa,l_phel_corr,l_X,l_screen
+&                         l_sc_magnetic,l_sc_electric,l_dipoles,l_pf_fxc,l_nl_optics,l_mpa,l_phel_corr,l_X,l_screen,&
+&                         l_EXCPH_optics,l_EXCPH_lifetime
 #if defined _SCALAPACK
  use drivers,        ONLY:l_slk_test
 #endif
@@ -60,8 +65,9 @@ integer function INIT(en,q,k,X,Xw,Dip,instr,lnstr,CLOSE_Gs,FINALIZE)
  use com,            ONLY:of_yaml_IDs
  use yaml_output,    ONLY:yaml_close_stream
 #endif
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)    ::en
  type(bz_samp)   ::q,k
@@ -351,6 +357,9 @@ subroutine LOCAL_from_runlevels_to_logicals()
        l_elph_Hamiltonian=runlevel_is_on('ElPhHam')
        l_elph_corr=runlevel_is_on('el_ph_corr')
        l_phel_corr=runlevel_is_on('ph_el_corr')
+       l_EXCPH_optics    =runlevel_is_on('ExcPhOptics')
+       l_EXCPH_lifetime  =runlevel_is_on('ExcPhLifeT')
+       l_EXCPH_gkkp      =runlevel_is_on('ExcGkkp')
 #endif
 #if defined  _QED
        l_elphoton_corr=runlevel_is_on('el_photon_corr')
@@ -410,6 +419,12 @@ subroutine LOCAL_setup_before_input_IO()
      endif
 #if defined _ELPH
      elph_nQ_used=elph_nQ
+     if(l_EXCPH_optics.or.l_EXCPH_lifetime) then
+       l_EXCPH_gkkp=.true.
+       call initactivate(1,'ExcGkkp') 
+     endif
+     if(l_EXCPH_optics)                     call initactivate(1,'ExcPhOptics') 
+     if(l_EXCPH_lifetime)                   call initactivate(1,'ExcPhLifeT') 
 #endif
 #if defined _PHEL
      PH_Q=(/1,elph_nQ/)
diff --git a/src/interface/INIT_DephMatrix.F b/src/interface/INIT_DephMatrix.F
index 1c5e804982..5f8d5e7c24 100644
--- a/src/interface/INIT_DephMatrix.F
+++ b/src/interface/INIT_DephMatrix.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine INIT_DephMatrix()
  !
  ! Dephasing Matrix -> input file
@@ -12,8 +16,9 @@ subroutine INIT_DephMatrix()
  use pars,           ONLY:SP
  use parser_m,       ONLY:PARSER_array
  use real_time,      ONLY:RT_INPUT_Deph_limits
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  ! Work Space
  !
diff --git a/src/interface/INIT_QP.F b/src/interface/INIT_QP.F
index 3bf4b4ed76..38feedd268 100644
--- a/src/interface/INIT_QP.F
+++ b/src/interface/INIT_QP.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine INIT_QP(comment)
  !
  ! QP_state -> input file
@@ -13,8 +17,9 @@ subroutine INIT_QP(comment)
  use QP_m,      ONLY:QP_nk,QP_nb,QP_state
  use parser_m,  ONLY:PARSER_array
  use it_tools,  ONLY:check_verbosity
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  character(*) ::comment
  ! 
diff --git a/src/interface/INIT_activate.F b/src/interface/INIT_activate.F
index a9c046beb0..d9baeb2131 100644
--- a/src/interface/INIT_activate.F
+++ b/src/interface/INIT_activate.F
@@ -9,13 +9,14 @@ subroutine INIT_activate()
  !
  use BS_solvers,     ONLY:BSS_mode
  use QP_m,           ONLY:QP_solver,COHSEX_use_empties
- use stderr,         ONLY:intc,STRING_match
+ use stderr,         ONLY:STRING_match
  use global_XC,      ONLY:WF_exx_fraction,WF_exx_screening
  use drivers,        ONLY:l_em1s,l_acfdt,l_HF_and_locXC,l_col_cut,l_alda_fxc,l_bs_fxc,l_optics,l_bse,l_bss,&
 &                         l_chi,l_cohsex,l_life,l_rpa_IP,l_td_hf,l_setup,l_sc_run,l_rim,l_rim_w,&
 &                         l_real_time,l_ppa,l_mpa,l_lrc_fxc,l_gw0,l_elel_corr,l_elel_scatt,l_elph_corr,l_td_sex,&
 &                         l_elph_Hamiltonian,l_elph_scatt,l_elphoton_corr,l_elphoton_scatt,l_em1d,l_eval_collisions,&
-&                         l_sc_magnetic,l_sc_electric,l_dipoles,l_pf_fxc,l_nl_optics,l_phel_corr,l_nl_p_and_p,l_X
+&                         l_sc_magnetic,l_sc_electric,l_dipoles,l_pf_fxc,l_nl_optics,l_phel_corr,l_nl_p_and_p, &
+&                         l_EXCPH_gkkp,l_EXCPH_optics,l_EXCPH_lifetime,l_X
 #if defined _SC || defined _RT
  use hamiltonian,    ONLY:H_potential
 #endif
@@ -42,7 +43,7 @@ subroutine INIT_activate()
  !
  !Common
  !
- call initactivate(1,'StdoHash Nelectro ElecTemp BoseTemp OccTresh NLogCPUs DBsIOoff DBsFRAGpm EvalMagn MEM_tresh DegFix')
+ call initactivate(1,'StdoHash Nelectro NelCond ElecTemp BoseTemp OccTresh NLogCPUs DBsIOoff DBsFRAGpm EvalMagn MEM_tresh DegFix')
  !
  !FFT
  !
@@ -96,6 +97,10 @@ subroutine INIT_activate()
    !call CPU_activate("SLK")
  endif
  !
+#if defined _PAR_IO
+ call CPU_activate("BZINDX")
+#endif
+ !
 #if defined _SC
  if (l_eval_collisions .or. l_sc_run) then
    call CPU_activate("SE")
@@ -151,7 +156,7 @@ subroutine INIT_activate()
  !
  !Col CUTOFF 
  !
- if (l_col_cut) call initactivate(1,'CUTGeo CUTBox CUTRadius CUTCylLen CUTwsGvec CUTCol_test')
+ if (l_col_cut) call initactivate(1,'CUTGeo CUTBox CUTRadius CUTwsGvec CUTCol_test EpsEnv')
  !
  !XX
  !
@@ -227,8 +232,8 @@ subroutine INIT_activate()
    call initactivate(1,'BSENGexx ALLGexx')
    call initactivate(1,'Qdirection QShiftOrder')
    !
-   if(l_td_hf.or.l_td_sex) call initactivate(1,'BSENGBlk')
-   if(l_td_sex)            call initactivate(1,'WehDiag WehCpl')
+   if(l_td_hf.or.l_td_sex) call initactivate(1,'BSENGBlk WehCpl')
+   if(l_td_sex)            call initactivate(1,'WehDiag')
    if(l_alda_fxc) then
      call initactivate(1,'BSENGfxc FxcLibxc FxcMode UseNLCC')
      if( (WF_exx_fraction>0.) .or. (WF_exx_screening>0.) ) call initactivate(1,'BSENGBlk WehDiag WehCpl')
@@ -261,11 +266,11 @@ subroutine INIT_activate()
    call initactivate(1,'EvPolarization FrSndOrd')
 #endif
    !
-   call initactivate(1,'PL_weights')
+   call initactivate(1,'PL_weights EXCTemp')
    !
    call initactivate(1,'DrudeWBS Reflectivity')
    call initactivate(1,'BoseCut ShiftedPaths')
-   call initactivate(1,'BSEQptR BSEBands BSKCut BSKIOmode BSEEhEny BSehWind NoDiagSC')
+   call initactivate(1,'BSEQptR BSEBands BSEFrozenBands BSKCut BSKIOmode BSEEhEny BSehWind NoDiagSC')
    !
 #if defined _RT
    call initactivate(1,'RTOccMode ForceEqTrans')
@@ -289,7 +294,7 @@ subroutine INIT_activate()
    !
    if (index(BSS_mode,'i')/=0)  call initactivate(1,'BSSInvMode BSEPSInvTrs BSSInvPFratio BSSInvKdiag')
    if (index(BSS_mode,'d')/=0.or.index(BSS_mode,'s')/=0.or.index(BSS_mode,'o')/=0)  call initactivate(1,'WRbsWF BSSPertWidth')
-   if (index(BSS_mode,'d')/=0)  call initactivate(1,'BSSydiaLib BSSNEig BSSEnRange BSSFirstEig')
+   if (index(BSS_mode,'d')/=0)  call initactivate(1,'BSSldiaLib BSSNEig BSSEnRange BSSFirstEig')
    if (index(BSS_mode,'h')/=0)  call initactivate(1,'BSHayTrs BSHayTer BSHayItrIO BSHayItrMAX')
    if (index(BSS_mode,'s')/=0)  then
      call initactivate(1,'BSSNEig BSSEnTarget BSSSlepcApproach BSSSlepcPrecondition BSSSlepcExtraction')
@@ -370,7 +375,7 @@ subroutine INIT_activate()
 #if defined _QED
    if (l_elphoton_corr) then
      if (l_gw0) then
-       call initactivate(1,'FFTGvecs RandQpts QEDRLvcs GbndRnge GDamping dScStep DysSolver') 
+       call initactivate(1,'FFTGvecs RandQptsSphe QEDRLvcs GbndRnge GDamping dScStep DysSolver') 
        if (trim(QP_solver)=="g") then
          call initactivate(1,'GEnSteps GEnRnge GEnMode GDmRnge GreenFTresh GreenF2QP') 
        else
@@ -382,21 +387,25 @@ subroutine INIT_activate()
      endif
    endif
 #endif
+   !
+   if (l_HF_and_locXC.or.l_elel_corr) then
+     call INIT_QP_ctl_switch('G')
+#if defined _RT
+     call INIT_RT_ctl_switch('G')
+#endif
+   endif
    !
    if (l_elel_corr) then
      !
      call INIT_QP_ctl_switch('X')
-     call INIT_QP_ctl_switch('G')
-     !
 #if defined _RT
      call INIT_RT_ctl_switch('X')
-     call INIT_RT_ctl_switch('G')
 #endif
      !
      call initactivate(1,'BoseCut ShiftedPaths')
      !
      if (l_gw0) then
-       if (.not.l_cohsex.or.COHSEX_use_empties) call initactivate(1,'GbndRnge') 
+       if ((.not.l_cohsex).or.COHSEX_use_empties) call initactivate(1,'GbndRnge') 
        if (.not.l_cohsex.and.trim(QP_solver)/='g') call initactivate(1,'GDamping') 
        if (.not.l_cohsex) call initactivate(1,'dScStep') 
        if (.not.l_elphoton_corr) then
@@ -439,7 +448,7 @@ subroutine INIT_activate()
    if (l_elph_corr) then
      call INIT_QP_ctl_switch('G')
      call initactivate(1,'DysSolver')
-     call initactivate(1,'GphBRnge FANdEtresh DWdEtresh ElPhModes GDamping dScStep ExtendOut ElPhRndNq RandQpts')
+     call initactivate(1,'GphBRnge FANdEtresh DWdEtresh ElPhModes GDamping dScStep ExtendOut ElPhRndNq RandQptsSphe')
      call initactivate(1,'GkkpDB WRgFsq NewtDchk OnMassShell')
    endif
    if (trim(QP_solver)=="g".and.l_elph_corr) then
@@ -451,7 +460,7 @@ subroutine INIT_activate()
    !
  endif
  !
- if(l_alda_fxc.and.any((/l_em1s,l_em1d,l_acfdt,l_ppa,l_mpa,l_cohsex,l_gw0/)) ) call initactivate(1,'FxcGRLc')
+ if(l_alda_fxc.and.any((/l_em1s,l_em1d,l_acfdt,l_ppa,l_mpa,l_cohsex,l_gw0/)) ) call initactivate(1,'FxcGRLc RhoMapThresh')
  if( l_lrc_fxc.and.any((/l_em1s,l_em1d,l_acfdt,l_ppa,l_mpa,l_cohsex,l_gw0/)) ) call initactivate(1,'LRC_alpha LRC_beta')
  !
  ! El-Ph: Frohlich Hamiltonian
@@ -462,13 +471,24 @@ subroutine INIT_activate()
    call initactivate(1,'ElPhHBRnge ElPhModes ElPhHKpt GDamping REStresh')
  endif
  !
+ ! Exciton-phonon Self-energy
+ !
+ if (l_EXCPH_gkkp)     call initactivate(1,'ELPhExcStates ELPhExcSum LoutPath FANdEtresh EkpqShFact LDamping')
+ if (l_EXCPH_gkkp)     call initactivate(1,'ConstElph AbsElph AbsExc HoleContributionOnly ElectronContributionOnly MaxKShells')
+ if (l_EXCPH_lifetime) call initactivate(1,'BoseTemp ElPhModes EXCTemp AlphaQ ExcPhOffDiago')
+ if (l_EXCPH_lifetime) call initactivate(1,'NoMatrxEl KeepDeg ')
+ if (l_EXCPH_optics)  then
+   call initactivate(1,'EXCTemp AlphaQ ElPhModes PLqres DbGdOnlyPh DbGdWEIGHTs NoMatrxEl EnRngeXd DmRngeXd ETStpsXd')
+   call initactivate(1,'EXCPHdEtresh DbGdOnlyPh DbGdWEIGHTs NoMatrxEl EnRngeXd DmRngeXd ETStpsXd')
+ endif
+ !
 #endif
  !
 #if defined _SC || defined _RT
  !
  ! Collisions
  !
- if (l_eval_collisions) call initactivate( 0,'COLLBands')
+ if (l_eval_collisions) call initactivate( 0,'COLLBands COLLHoleBands COLLElecBands COLLBseMem')
  !
  if ( (l_eval_collisions.or.l_real_time).and.l_elel_scatt ) call initactivate(1,'PlasmaPerc')
  !
@@ -535,16 +555,16 @@ subroutine INIT_activate()
    call INIT_RT_ctl_switch('G')
    !
    call initactivate(1,'RTskipImposeN RTeeImposeE InducedField')
-   call initactivate(1,'Gauge VelGaugeCorr VelGaugeDiam PfromJ RTUpdateSOC RTUpdateE SaveGhistory')
+   call initactivate(1,'Gauge VelGaugeCorr VelGaugeNoDiam PfromJ RTgradk RTUpdateSOC RTUpdateE SaveGhistory')
    call initactivate(1,'RTEqScatt RTImpForMet RTzeroTempRef RTskipPHabs RTEvalEnergy RTEvalEntropy')
    call initactivate(1,'dTupdateTime dTupdateTimeSet dTupdateJump dTupdateTresh dT_MAX dT_SET')
    if (l_elel_scatt .or. l_elph_scatt .or. l_elphoton_scatt) then
      call initactivate(1,'LifeExtrapolation LifeExtrapSteps LifeFitTemp RelaxTimeApprox RTAtemp RTAchem LifeFitTemp')
    endif
-   if (l_elph_scatt) call initactivate(1,'MemTresh RandQpts RT_T_evol ElPhModes UseDebyeE')
+   if (l_elph_scatt) call initactivate(1,'MemTresh RandQptsSphe RT_T_evol ElPhModes UseDebyeE')
    if (l_elel_scatt) call initactivate( 1,'EERimPerc')
    !
-   call initactivate(1,'RTBands Integrator GrKind TwoAlph RTehEny ScattTresh')
+   call initactivate(1,'RTBands RTHoleBands RTElecBands Integrator GrKind TwoAlph RTehEny ScattTresh')
    call initactivate(1,'RADLifeTime RADmagnific PhLifeTime DephCVonly DephEThresh DephTRange')
    call initactivate(1,'RTstep NEsteps NETime DipoleEtresh SPINprojected')
    call initactivate(1,'IOtime IOCachetime')
@@ -562,18 +582,20 @@ subroutine INIT_activate()
  !
 #if defined _NL
  if (l_nl_optics) then
-   call initactivate(1,'FFTGvecs NLBands NLverbosity NLstep NLtime NLintegrator NLCorrelation NLLrcAlpha')
-   call initactivate(1,'TestOSCLL UseSymm')
+   call initactivate(1,'FFTGvecs NLBands NLHoleBands NLElecBands NLverbosity NLstep NLtime NLintegrator NLCorrelation NLLrcAlpha')
+   call initactivate(1,'TestOSCLL NoUseSymm VelGaugeNoDiam')
    if(.not.l_nl_p_and_p) call initactivate(1,'NLEnRange NLEnSteps NLrotaxis NLAngSteps NLMaxAng')
+   if(     l_nl_p_and_p) call initactivate(1,'IOtime IOCachetime SaveVbhistory FLOrder')
    call initactivate(1,'NLDamping RADLifeTime UseDipoles FrSndOrd NoComprCOLL COLLCut EvalCurrent InducedField') 
-   call initactivate(1,'FrPolPerdic TestOSCLL Gauge RADLifeTime HARRLvcs EXXRLvcs CORRLvcs') 
+   call initactivate(1,'TestOSCLL Gauge RADLifeTime HARRLvcs EXXRLvcs CORRLvcs') 
    call init_QP_ctl_switch('G')
+   !
+   do i1=1,n_ext_fields
+     call Afield_activate(i1)
+   enddo
+   !
  endif
  !
- do i1=1,n_ext_fields
-   call Afield_activate(i1)
- enddo
- !
 #endif
  !
 #if defined _SC
@@ -595,26 +617,6 @@ subroutine INIT_activate()
  !
  contains
    !
-#if defined _RT || defined _NL
-   !
-   subroutine Afield_activate(i1)
-     integer     ::i1
-     character(6)::field
-     field='Field'//trim(intc(i1))
-     call initactivate(1,field//'_Freq')
-     call initactivate(1,field//'_NFreqs')
-     call initactivate(1,field//'_DFreq')
-     call initactivate(1,field//'_Int')
-     call initactivate(1,field//'_Width')
-     call initactivate(1,field//'_FWHM')
-     call initactivate(1,field//'_kind')
-     call initactivate(1,field//'_pol')
-     call initactivate(1,field//'_Dir')
-     call initactivate(1,field//'_Dir_circ')
-     call initactivate(1,field//'_Tstart')
-   end subroutine
-   !
-#endif
    !
    subroutine CPU_activate(what)
      character(*) :: what 
diff --git a/src/interface/INIT_barriers.F b/src/interface/INIT_barriers.F
index 703e444bac..a72ad694a2 100644
--- a/src/interface/INIT_barriers.F
+++ b/src/interface/INIT_barriers.F
@@ -115,7 +115,7 @@ subroutine INIT_barriers()
    goto 1
  endif
  if (gw0_mpa) then
-   on_runlevels='dyson gw0 mpa em1d el_el_corr el_ph_corr HF_and_locXC'
+   on_runlevels='dyson gw0 mpa em1d el_el_corr el_ph_corr HF_and_locXC rim_w'
    call switch_off_runlevel('all',except=trim(on_runlevels)//' '//trim(always_on_runlevels))
    goto 1
  endif
diff --git a/src/interface/INIT_check_databases.F b/src/interface/INIT_check_databases.F
index 759b1dfce1..ca4ab98a7c 100644
--- a/src/interface/INIT_check_databases.F
+++ b/src/interface/INIT_check_databases.F
@@ -5,6 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<collisions.h>
+#include<y_memory.h>
+!
 subroutine INIT_check_databases(X,Xbsk,qp,Xxcw,Xw,q,k,en,en_sc,Dip,lnstr,instr,INIT,&
 &                               io_X_err,io_BS_err,io_BS_Fxc_err,io_QINDX_err,&
 &                               io_ELPH_err,io_SC_E_err)
@@ -30,11 +35,13 @@ subroutine INIT_check_databases(X,Xbsk,qp,Xxcw,Xw,q,k,en,en_sc,Dip,lnstr,instr,I
 # endif
 #endif
  !
- ! Define io_COLLs_header to handle either serial or parallel I/O
+ ! Inside collisions.h, io_COLLs_header is defined as either
+ ! - io_COLLISIONS        (serial I/O) or
+ ! - io_COLLISIONS_header (parallel I/O case)
  !
-#include<collisions.h>
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(X_t)        :: X(5),Xbsk
  type(QP_t)       :: qp
@@ -77,7 +84,7 @@ subroutine INIT_check_databases(X,Xbsk,qp,Xxcw,Xw,q,k,en,en_sc,Dip,lnstr,instr,I
  integer, external :: io_SC_components,io_RT_components
 #endif
 #if defined _ELPH 
- integer, external :: io_ELPH
+ integer, external :: io_ELPH,io_EXCPH_gkkp
 #endif 
  !
  ! BASICAL DATABASES
@@ -120,7 +127,7 @@ subroutine INIT_check_databases(X,Xbsk,qp,Xxcw,Xw,q,k,en,en_sc,Dip,lnstr,instr,I
  ! kindx
  !
  call io_control(ACTION=OP_RD_CL,COM=NONE,SEC=(/1/),MODE=DUMP,ID=io_ID)
- io_QINDX_err=io_QINDX(k,q,io_ID) 
+ io_QINDX_err=io_QINDX(k,q,io_ID,'minus_q') 
  if (io_QINDX_err==0) call initactivate(-1,'IkSigLim IkXLim')
  !
  ! If the GROT/KINDX DBs are not present, reset to setup run
@@ -269,6 +276,8 @@ subroutine INIT_check_databases(X,Xbsk,qp,Xxcw,Xw,q,k,en,en_sc,Dip,lnstr,instr,I
  io_ELPH_err(1)=io_ELPH(io_ID,'gkkp')
  call io_control(ACTION=OP_RD_CL,COM=NONE,MODE=DUMP,SEC=(/1/),ID=io_ID)
  io_ELPH_err(2)=io_ELPH(io_ID,'gkkp_expanded')
+ call io_control(ACTION=OP_RD_CL,COM=NONE,SEC=(/1/),MODE=DUMP,ID=io_ID)
+ io_err=io_EXCPH_gkkp(io_ID,'gkkp') 
 #endif
  !
  ! RT 
diff --git a/src/interface/INIT_fields.F b/src/interface/INIT_fields.F
new file mode 100644
index 0000000000..9fa24580ea
--- /dev/null
+++ b/src/interface/INIT_fields.F
@@ -0,0 +1,119 @@
+!
+!        Copyright (C) 2000-2022 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): AM DS
+! 
+! This file is distributed under the terms of the GNU 
+! General Public License. You can redistribute it and/or 
+! modify it under the terms of the GNU General Public 
+! License as published by the Free Software Foundation; 
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will 
+! be useful, but WITHOUT ANY WARRANTY; without even the 
+! implied warranty of MERCHANTABILITY or FITNESS FOR A 
+! PARTICULAR PURPOSE.  See the GNU General Public License 
+! for more details.
+!
+! You should have received a copy of the GNU General Public 
+! License along with this program; if not, write to the Free 
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, 
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+#if defined _RT || defined _NL
+!
+subroutine Afield_activate(i1)
+     use stderr,         ONLY:intc
+     use drivers,        ONLY:l_nl_p_and_p
+     !
+     implicit none
+     !
+     integer, intent(in)     ::i1
+     !
+     character(6)::field
+     field='Field'//trim(intc(i1))
+     call initactivate(1,field//'_Freq')
+     call initactivate(1,field//'_Int')
+     call initactivate(1,field//'_Width')
+     call initactivate(1,field//'_Chirp')
+     call initactivate(1,field//'_FWHM')
+     call initactivate(1,field//'_kind')
+     call initactivate(1,field//'_pol')
+     call initactivate(1,field//'_Dir')
+     call initactivate(1,field//'_Dir_circ')
+     call initactivate(1,field//'_Tstart')
+end subroutine
+!
+!
+subroutine Afield(defs,i_field)
+   !
+   use it_tools,      ONLY:it
+   use pars,          ONLY:SP
+   use drivers,       ONLY:infile_editing
+   use C_driver,      ONLY:code_bin
+   use fields,        ONLY:Efield,n_ext_fields
+   use units,         ONLY:AU2KWCMm2
+   use parser_m,      ONLY:parser
+   use stderr,        ONLY:intc
+   use it_m,          ONLY:E_unit,I_unit,Time_unit,V_real_time, &
+   &                       initmode,initdefs,V_nl_optics
+   !
+   implicit none
+   !
+   integer, intent(in):: i_field
+   type(initdefs)     :: defs
+   !
+   ! Workspace
+   !
+   character(10)      :: name
+   logical            :: is_def
+   integer	      :: V_field
+   !
+   name='Field'//trim(intc(i_field))
+   !
+   V_field=0  ! no verbosity
+#if defined _NL
+   if(i_field==3)  V_field=V_nl_optics
+#endif
+
+   !
+   call it(defs,trim(name)//'_Freq',            '[RT '//trim(name)//'] Frequency',&
+&          Efield(i_field)%frequency,unit=E_unit,verb_level=V_field)
+   call it(defs,trim(name)//'_Int',             '[RT '//trim(name)//'] Intensity',&
+&          Efield(i_field)%intensity,unit=I_unit,verb_level=V_field)
+   call it(defs,trim(name)//'_Width',           '[RT '//trim(name)//'] Width',&
+&          Efield(i_field)%width,unit=Time_unit(1),verb_level=V_field)
+   call it(defs,trim(name)//'_Chirp',           '[RT '//trim(name)//'] Width',&
+&          Efield(i_field)%chirp,unit=Time_unit(1),verb_level=V_real_time)
+   call it(defs,trim(name)//'_FWHM',            '[RT '//trim(name)//'] Full Width at Half Maximum (overwrite width if set)',&
+&          Efield(i_field)%FWHM,unit=Time_unit(1),verb_level=V_real_time)
+   call it(defs,trim(name)//'_kind',            '[RT '//trim(name)//'] Kind(SIN|COS|RES|ANTIRES|GAUSS|DELTA|QSSIN)',&
+&          Efield(i_field)%ef_name,verb_level=V_field)
+   call it(defs,trim(name)//'_pol',             '[RT '//trim(name)//'] Pol(linear|circular)',&
+&          Efield(i_field)%ef_pol,verb_level=V_field)
+   call it(defs,trim(name)//'_Dir',             '[RT '//trim(name)//'] Versor',&
+&          Efield(i_field)%versor,verb_level=V_field)
+   call it(defs,trim(name)//'_Dir_circ',        '[RT '//trim(name)//'] Versor_circ',&
+&          Efield(i_field)%versor_circ,verb_level=V_real_time)
+#if defined _NL
+   call it(defs,trim(name)//'_Tstart',          '[RT '//trim(name)//'] Initial Time',&
+&          Efield(i_field)%t_initial,unit=Time_unit(1),verb_level=V_real_time)
+#else
+   call it(defs,trim(name)//'_Tstart',          '[RT '//trim(name)//'] Initial Time',&
+&          Efield(i_field)%t_initial,unit=Time_unit(1),verb_level=V_field)
+#endif
+   !
+   if (initmode==4.and..not.infile_editing) then
+     !
+     call parser(trim(name)//'_FWHM',is_def)
+     if (     is_def) Efield(i_field)%width=Efield(i_field)%FWHM/(2._SP*sqrt(2._SP*log(2._SP)))
+     if (.not.is_def) Efield(i_field)%FWHM=Efield(i_field)%width*(2._SP*sqrt(2._SP*log(2._SP)))
+     !
+     if (trim(Efield(i_field)%ef_name)/='none'.and.index(code_bin,'ypp')==0) n_ext_fields=n_ext_fields+1
+     !
+   endif
+   !
+end subroutine
+!
+#endif
diff --git a/src/interface/INIT_load.F b/src/interface/INIT_load.F
index c2fb198625..2a86f55157 100644
--- a/src/interface/INIT_load.F
+++ b/src/interface/INIT_load.F
@@ -9,7 +9,7 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
  !
  use drivers,       ONLY:infile_editing
  use pars,          ONLY:schlen,SP
- use electrons,     ONLY:levels,nel,filled_tresh
+ use electrons,     ONLY:levels,nel,filled_tresh,nel_cond
  use zeros,         ONLY:G_mod_zero
  use frequency,     ONLY:w_samp
  use it_tools,      ONLY:it
@@ -27,16 +27,17 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
 &                        QP_G_er,QP_G_ir,QP_G_dr,QP_Sc_steps,QP_G_solver,&
 &                        QP_dSc_delta,QP_G_Zoom_treshold,GF_energy_range_mode
  use LIVE_t,        ONLY:nhash
- use wave_func,     ONLY:wf_ng
+ use wave_func,     ONLY:wf_ng,wf_ng_1st_BZ,rho_map_thresh,WF_load_mode
  use D_lattice,     ONLY:Tel,non_periodic_directions,molecule_position,Bose_Temp
+ use D_lattice,     ONLY:Boltz_Temp
  use R_lattice,     ONLY:ng_closed,QP_states_k,nXkibz,k_GRIDS_string,RIM_W_ng,rimw_type,&
-&                        bz_samp,RIM_ng,RIM_epsm1,RIM_id_epsm1_reference,&
-&                        RIM_n_rand_pts,cyl_ph_radius,box_length,cyl_length,cut_geometry,ws_cutoff
- use BS,            ONLY:BSE_mode,BSE_prop,BSK_mode,BS_eh_en,BS_eh_win,BS_q,BS_bands,&
+&                        bz_samp,RIM_ng,RIM_epsm1,eps_env,RIM_id_epsm1_reference,&
+&                        RIM_n_rand_pts,RIM_sphe_n_rand_pts,cyl_ph_radius,box_length,cut_geometry,ws_cutoff
+ use BS,            ONLY:BSE_mode,BSE_prop,BSK_mode,BS_eh_en,BS_eh_win,BS_q,BS_bands,BS_bands_frozen_ch,&
 &                        BS_n_g_W,BS_n_g_exch,BS_n_g_fxc,BSE_L_kind,BS_K_cutoff,BSK_IO_mode,BSE_dipole_geometry
  use BS_solvers,    ONLY:BSS_mode,Haydock_threshold,Haydock_iterIO,BSS_n_freqs,BSS_target_E,BSS_trange_E,&
-&                        BSS_dr,BSS_er,BSS_E_dir,BSS_Q_dir,BSS_damp_reference,BSS_inversion_mode,BSS_n_eig_Input,&
-&                        BSS_Wd,K_INV_EPS,K_INV_PL,BSS_n_eig,BSS_first_eig,Haydock_iterMAX,BSS_ydiago_solver
+&                        BSS_dr,BSS_er,BSS_E_dir,BSS_Q_dir,BSS_damp_reference,BSS_inversion_mode,&
+&                        BSS_Wd,K_INV_EPS,K_INV_PL,BSS_n_eig_Input,BSS_first_eig,Haydock_iterMAX,BSS_ldiago_solver
 #if defined _SLEPC && !defined _NL
  use BS_solvers,    ONLY:BSS_slepc_extraction,BSS_slepc_ncv,BSS_slepc_tol,BSS_slepc_maxit,&
  &                       BSS_slepc_precondition,BSS_slepc_approach,BSS_slepc_mpd,BSS_slepc_matrix_format
@@ -51,8 +52,8 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
  use QP_m,          ONLY:QP_QED_ng
 #endif
 #if defined _SC || defined _RT || defined _QED
- use hamiltonian,   ONLY:H_potential
- use collision_ext, ONLY:COLLISIONS_cutoff,COLL_bands
+ use hamiltonian,   ONLY:H_potential,H_ref_bands_frozen
+ use collision_ext, ONLY:COLLISIONS_cutoff,COLL_bands,COLL_bands_frozen,COLL_bands_frozen_ch
 #endif
  use QP_m,          ONLY:SC_E_threshold
 #if defined _SC
@@ -60,28 +61,28 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
  use SC,            ONLY:SC_iterations,SC_up_W_iters,&
 &                        SC_rho_threshold,SC_cycle_mixing,&
 &                        SC_bands,SC_preconditioning,OEP_approx,&
-&                        SC_neq_kind,SC_mu,SC_last_coh_it
+&                        SC_neq_kind,SC_mu,SC_last_coh_it,&
+&                        SC_bands_frozen,SC_bands_frozen_ch
 #endif
 #if defined _RT
- use RT_control,    ONLY:CARR_RT_IO_t,OBS_RT_IO_t,OUTPUT_RT_IO_t,Gless_RESTART_RT_IO_t,     &
-&                        CACHE_OBS_INTERVAL_time_INPUT,RT_BSE_Occ_Mode,Gless_RT_IO_t
- use stderr,        ONLY:intc
+ use RT_control,    ONLY:CARR_RT_IO_t,OBS_RT_IO_t,OUTPUT_RT_IO_t,Gless_RESTART_RT_IO_t,Floquet_order,&
+&                        CACHE_OBS_INTERVAL_time_INPUT,RT_BSE_Occ_Mode,Gless_RT_IO_t,Vbands_RT_IO_t
  use RT_occupations,ONLY:RT_RTA_temp,RT_RTA_chem
  use RT_lifetimes,  ONLY:RT_life_extrap_times_INPUT
  use real_time,     ONLY:RT_step, Integrator_name, RAD_LifeTime, Phase_LifeTime, &
-&                        NE_tot_time,RT_bands,RT_deph_range,                     &
+&                        NE_tot_time,RT_bands,RT_bands_frozen_ch,RT_deph_range,  &
 &                        RT_ave_dephasing,RT_eh_en,RT_scatt_tresh,Gr_kind,       &
 &                        NE_MEM_treshold,NE_time_step_update_jump_INPUT,         &
 &                        two_alpha,DbGd_EE_percent,NE_time_step_update_jump,     &
 &                        NE_initial_time_step_update,NE_step_update_treshold,    &
-&                        RT_MAX_step,RAD_magnification,  &
+&                        RT_MAX_step,RAD_magnification,RT_bands_frozen,          &
 &                        RT_deph_deg_thresh,RT_step_manual_prefactor,NE_time_step_update_manual
- use fields,        ONLY:Efield,n_ext_fields_max,n_ext_fields
+ use fields,        ONLY:n_ext_fields_max,n_ext_fields
  use plasma,        ONLY:PLASMA_redux_percent
 #endif
 #if defined _NL
  use nl_optics,     ONLY:NL_bands,NL_damping,NL_correlation,NL_er,n_frequencies,NL_LRC_alpha, &
-&                        NL_verb_name,n_angles,NL_rot_axis,max_angle 
+&                        NL_verb_name,n_angles,NL_rot_axis,NL_bands_frozen,NL_bands_frozen_ch,max_angle
 #endif 
 #if defined _NL | defined _SC
  use fields,        ONLY:Efield
@@ -92,6 +93,9 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
 #if defined _ELPH 
  use ELPH,          ONLY:QP_PH_n_G_bands,elph_nQ_used,elph_Ham_ik,elph_Ham_bands,elph_branches,&
 &                               RES_tresh,FAN_deltaE_treshold,DW_deltaE_treshold,E_kpq_sh_fact,gkkp_db
+ use EXCPH,         ONLY:EXCPH_states,EXCPH_sum,Lout_path,L_damping,EXC_q0,alphaQ, &
+&                        EXCPH_deltaE_treshold,EXCPH_kthresh
+ use D_lattice,     ONLY:Boltz_Temp
 #endif
 #if defined _OPENMP
  use openmp,        ONLY:n_threads_X,n_threads_SE,n_threads_SX,n_threads_RT,n_threads_DIP,n_threads_K,&
@@ -120,6 +124,7 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
  !
  ! Work Space
  !
+ integer  :: wf_ng_closed
 #if defined _RT
  integer  :: i_field
 #endif
@@ -162,6 +167,9 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
 #endif
 #if defined _ELPH 
  call it('r',defs,'el_ph_corr',    '[R] Electron-Phonon Correlation')   
+ call it('r',defs,'ExcGkkp',       '[R][EXCPH] Exciton-Phonon Matrix Elelements')
+ call it('r',defs,'ExcPhOptics',   '[R][EXCPH] Exciton-Phonon Optics')
+ call it('r',defs,'ExcPhLifeT',    '[R][EXCPH] Exciton-Phonon Life-Times')
 #endif
 #if defined _PHEL 
  call it('r',defs,'ph_el_corr',    '[R] Phonon-Electron Correlation')   
@@ -179,16 +187,32 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
  call CPU_structure_load( )
 #endif
  !
- !================
- ! GPL VARIABLES
- !================ 
+ ! Cutoff on the density
+ ! ng_closed (cutoff on the density) should never be lower than wf_ng (cutoff on the WFs)
+ ! Indeed, the two cutofff should be correlated. However, it is not in the code
+ ! Here there is a simple check, just in case
+ call it(defs,'MaxGvecs','[INI] Max number of G-vectors planned to use',ng_closed,unit=G_unit,verb_level=V_RL)
+ if (ng_closed<wf_ng) then
+   wf_ng=ng_closed
+   wf_ng_1st_BZ=ng_closed
+ endif
+ !
+ ! Here the it interface closes the value of wf_ng when initmode==2
+ ! Such operation is pointless, because WFs are defined w.r.t. spheres centered at k
+ ! The lowering of the cutoff for the WFs should be done accordingly.
+ ! Moreover, if the cutoff is not lowered, the closing may increase wf_ng
+ ! The if below is coded to prevent such issue
+ wf_ng_closed=wf_ng
+ call it(defs,'FFTGvecs','[FFT] Plane-waves',wf_ng_closed,G_unit,verb_level=V_RL)
+ if (wf_ng_closed<wf_ng) then
+   wf_ng=wf_ng_closed
+   wf_ng_1st_BZ=wf_ng_closed
+ endif
  !
  ! Basics
  !
  call it(defs,'StdoHash','[IO] Live-timing Hashes',nhash,verb_level=V_io)
- call it(defs,'MaxGvecs','[INI] Max number of G-vectors planned to use',ng_closed,unit=G_unit,verb_level=V_RL)
  call it(defs,'Gthresh' ,'[INI] Accuracy for energy shell separation [-1. automatic]',G_mod_zero,verb_level=V_RL)
- call it(defs,'FFTGvecs','[FFT] Plane-waves',wf_ng,G_unit,verb_level=V_RL) 
  call it(defs,'NonPDirs','[X/BSS] Non periodic chartesian directions (X,Y,Z,XY...)',non_periodic_directions,verb_level=V_resp)
  call it(defs,'MolPos',  '[X/BSS] Molecule coord in supercell, 0.5 is the middle',molecule_position,verb_level=V_resp)
  call it(defs,'K_grids', '[KPT] Select the grids (X=response, S=sigma, C=collisions, B=bse) [default X S C B]',&
@@ -196,18 +220,24 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
  call it(defs,'IkSigLim','[KPT] QP K-points indices range',QP_states_k,verb_level=V_kpt)
  call it(defs,'IkXLim',  '[KPT] X grid last k-point index',nXkibz,verb_level=V_kpt)
  call it(defs,'Nelectro','Electrons number',nel,verb_level=V_general)
+ call it(defs,'NelCond' ,'Number of electrons in conduction (Quasi-Fermi distribution)',nel_cond,verb_level=V_general)
  call it(defs,'ElecTemp','Electronic Temperature',Tel,T_unit,verb_level=V_general)
  call it(defs,'OccTresh','Occupation treshold (metallic bands)',filled_tresh,verb_level=V_general)
+#if defined _ELPH 
+ call it(defs,'BoseTemp','Bosonic Temperature',Bose_Temp,T_unit)
+#else
  call it(defs,'BoseTemp','Bosonic Temperature',Bose_Temp,T_unit,verb_level=V_general)
- call it(defs,'BoseCut', '[BOSE] Finite T Bose function cutoff',bose_E_cut,verb_level=V_general)
+#endif
+ call it(defs,'EXCTemp', 'Excitonic Temperature (for luminescence spectra and EXCPH)',Boltz_Temp,T_unit)
+ call it(defs,'BoseCut', 'Finite T Bose function cutoff',bose_E_cut,verb_level=V_general)
 #if defined _ELPH 
  call it('f',defs,'BSEscatt', '[KPT] Compute extended k/q scatering',verb_level=V_kpt)
- call it(defs,'ElPhRndNq',  '[ELPH] Read random Q-points',elph_nQ_used,verb_level=V_ph) 
+call it(defs,'ElPhRndNq',  '[ELPH] Read random Q-points',elph_nQ_used,verb_level=V_ph) 
  call it(defs,'EkpqShFact', '[ELPH] E(k+q) Interpolation shell factor (used only with double-grid)',E_kpq_sh_fact,verb_level=V_ph)
 #endif
  call it('f',defs,'WFbuffIO','[IO] Wave-functions buffered I/O',verb_level=V_io) 
  call it('f',defs,'NoDiagSC','New setup for non-diagonal supercells',verb_level=V_general) 
- call it('f',defs,'DegFix','Force the code to impose the energy levels to respect their degeneracy') 
+ call it('f',defs,'DegFix','Force the code to impose the energy levels to respect their degeneracy',verb_level=V_general) 
  !
  ! Parallel Setup
  !
@@ -233,6 +263,8 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
  call it(defs,'DBsFRAGpm', &
 &             '[IO] Space-separated list of +DB to FRAG and -DB to NOT FRAG. DB=(DIP,X,W,HF,COLLS,K,BS,QINDX,RT,ELPH,SC,ALL)',&
 &             DBs_FRAG_control_string,verb_level=V_io)
+ call it(defs,"WF_load_mode",'[WF-IO] Default distribution mode ("all"/"on-the-fly")',&
+&            WF_load_mode,verb_level=V_io)
  !
  ! S_xc
  !
@@ -293,9 +325,11 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
  call it(defs,'BSEmod',  '[BSE] resonant/retarded/coupling',BSE_mode,case="a")
  call it(defs,'Lkind',   '[BSE,X] bar(default)/full/tilde',BSE_L_kind,verb_level=V_resp)
  call it(defs,'BSEBands','[BSK] Bands range',BS_bands)
+ call it(defs,'BSEFrozenBands','[BSK] Bands expluded from BSE construction',BS_bands_frozen_ch,verb_level=V_resp)
  call it(defs,'BSENGBlk','[BSK] Screened interaction block size [if -1 uses all the G-vectors of W(q,G,Gp)]',BS_n_g_W,G_unit)
  call it(defs,'BSENGexx','[BSK] Exchange components',BS_n_g_exch,G_unit)
  call it(defs,'BSENGfxc','[BSK] Fxc components', BS_n_g_fxc,G_unit,verb_level=V_resp)
+ call it(defs,'RhoMapThresh','[BSK] Density cutoff for <Fxc> integrals region',rho_map_thresh,verb_level=V_resp)
  call it(defs,'FxcMode', '[TDDFT] ("G-XXX" or "R-XXX" with XXX=def/full/cut_Gmax/cut_GmGp") ',FXC_mode,verb_level=V_resp)
  call it(defs,'BSEEhEny','[BSK] Electron-hole energy range',BS_eh_en,E_unit,verb_level=V_resp)
  call it(defs,'BSKCut',  '[BSK] Cutoff on the BSE Kernel, 0=full 1=none',BS_K_cutoff,verb_level=V_resp)
@@ -305,9 +339,9 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
  call it('f',defs,'NoCondSumRule' ,'[BSE/X] Do not impose the conductivity sum rule in velocity gauge',verb_level=V_resp)
  call it('f',defs,'MetDamp' ,      '[BSE] Define '//slash//'w+=sqrt('//slash//'w*('//slash//'w+i'//slash//'eta))',verb_level=V_resp)
  call it(defs,'BSSmod',    '[BSS] (h)aydock/(d)iagonalization/(s)lepc/(i)nversion/(t)ddft`',BSS_mode,protect=.FALSE.)
- call it(defs,'BSEprop',   '[BSS] Can be any among abs/jdos/kerr/asymm/anHAll/magn/dich/photolum/esrt',BSE_prop)
+ call it(defs,'BSEprop',   '[BSS] Can be any among abs/jdos/kerr/asymm/anHAll/magn/dich/photolum/esrt/MEspin/MEorb',BSE_prop)
  call it(defs,'BSEdips',   '[BSS] Can be "trace/none" or "xy/xz/yz" to define off-diagonal rotation plane',BSE_dipole_geometry)
- call it(defs,'BSSydiaLib','[BSS] Library used with ydiago solver `(s)calapack/(e)lpa`',BSS_ydiago_solver)
+ call it(defs,'BSSldiaLib','[BSS] Library used with ldiago solver `(s)calapack/(e)lpa`',BSS_ldiago_solver)
  call it(defs,'BSSInvMode','[BSS] Inversion solver modality `(f)ull/(p)erturbative`',BSS_inversion_mode)
  call it(defs,'BSSInvPFratio','[BSS] Inversion solver. Ratio between the number of frequencies solved pert/full',&
 &             K_INV_EPS%PERT_FULL_ratio)
@@ -392,6 +426,7 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
  call it(defs,'IDEm1Ref','[RIM] Dielectric matrix reference component 1(x)/2(y)/3(z)',&
 &                         RIM_id_epsm1_reference,verb_level=V_RL)
  call it(defs,'RandQpts','[RIM] Number of random q-points in the BZ',RIM_n_rand_pts)
+ call it(defs,'RandQptsSphe','[RIM] Number of random q-points in the BZ',RIM_sphe_n_rand_pts)
  call it(defs,'RandGvec','[RIM] Coulomb interaction RS components',RIM_ng,G_unit)
  call it('f',defs,'QpgFull', '[F RIM] Coulomb interaction: Full matrix',verb_level=V_RL)
  !
@@ -401,10 +436,10 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
  !
  ! CUTOFF
  !
+ call it(defs,'EpsEnv',   '[COL] Static Inverse dielectric matrix of enviroment',eps_env,verb_level=V_RL)
  call it(defs,'CUTGeo',   '[CUT] Coulomb Cutoff geometry: box/cylinder/sphere/ws/slab X/Y/Z/XY..',cut_geometry)
  call it(defs,'CUTBox',   '[CUT] [au] Box sides',box_length)
  call it(defs,'CUTRadius','[CUT] [au] Sphere/Cylinder radius',cyl_ph_radius)
- call it(defs,'CUTCylLen','[CUT] [au] Cylinder length',cyl_length)
  call it(defs,'CUTwsGvec','[CUT] WS cutoff: number of G to be modified',ws_cutoff)
  call it('f',defs,'CUTCol_test','[CUT] Perform a cutoff test in R-space',verb_level=V_RL)
  !
@@ -420,6 +455,7 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
  ! Real Time dynamics
  !
  call it(defs,'RTBands',    '[RT] Bands',RT_bands)
+ call it(defs,'RTFrozenBands','[RT] Frozen Bands list',RT_bands_frozen_ch,Verb_level=V_real_time)
  !
  ! This call is a temporary fix due to the multiple definition of energy ranges.
  ! The call is needed to align the different values based on the USER definition.
@@ -450,11 +486,16 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
  call it(defs,'Integrator',  '[RT] Integrator. Use keywords space separated  ( "EULER/EXPn/INV" "SIMPLE/RK2/RK4/HEUN" "RWA") ',&
 &              Integrator_name)
  !
+#if defined _NL
+ IO_times=(/CARR_RT_IO_t%INTERVAL_time_INPUT,Vbands_RT_IO_t%INTERVAL_time_INPUT,OUTPUT_RT_IO_t%INTERVAL_time_INPUT/)
+#elif defined _RT
  IO_times=(/CARR_RT_IO_t%INTERVAL_time_INPUT,Gless_RESTART_RT_IO_t%INTERVAL_time_INPUT,OUTPUT_RT_IO_t%INTERVAL_time_INPUT/)
- call it(defs,'IOtime',      '[RT] Time between two consecutive I/O (CARRIERs - GF - OUTPUT)',IO_times,unit=Time_unit(1))
+#endif
+ call it(defs,'IOtime',      '[RT] Time between two consecutive I/O (CARRIERs - GF/WF - OUTPUT)',IO_times,unit=Time_unit(1))
  CARR_RT_IO_t%INTERVAL_time_INPUT             =IO_times(1)
  Gless_RESTART_RT_IO_t%INTERVAL_time_INPUT    =IO_times(2)
  Gless_RT_IO_t%INTERVAL_time_INPUT            =IO_times(2)
+ Vbands_RT_IO_t%INTERVAL_time_INPUT           =IO_times(2)
  OUTPUT_RT_IO_t%INTERVAL_time_INPUT           =IO_times(3)
  !
  IO_times(1:2)=(/CACHE_OBS_INTERVAL_time_INPUT,OBS_RT_IO_t%INTERVAL_time_INPUT/)
@@ -471,6 +512,7 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
  call it('f',defs,'RTEvalEnergy',  '[RT] Energy variation computed on the fly',Verb_level=V_real_time)
  call it('f',defs,'RTEvalEntropy', '[RT] Entropy variation computed on the fly',Verb_level=V_real_time)
  call it('f',defs,'SaveGhistory',  '[RT] Save the history of the green function',Verb_level=V_real_time)
+ call it('f',defs,'RTgradk',       '[RT] Compute the term grad_k(rho).E',Verb_level=V_real_time)
  !
  ! ... updates ...
  call it('f',defs,'RTUpdateSOC',     '[RT] Update the SOC interaction',Verb_level=V_real_time)
@@ -523,6 +565,7 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
  ! Non-linear Optics
  !
  call it(defs,'NLBands',      '[NL] Bands range',NL_bands)
+ call it(defs,'NLFrozenBands','[NL] Frozen Bands list',NL_bands_frozen_ch,Verb_level=V_nl_optics)
  call it(defs,'NLverbosity',  '[NL] Verbosity level (low | high)',NL_verb_name) 
  call it(defs,'NLstep',       '[NL] Time step length',RT_step,unit=Time_unit(1),Verb_level=V_nl_optics)
  call it(defs,'NLtime',       '[NL] Simulation Time',NE_tot_time,unit=Time_unit(1))
@@ -540,9 +583,12 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
  call it('f',defs,'FrSndOrd','[NL] Force second order in Covariant Dipoles',verb_level=V_nl_optics)
  call it('f',defs,'NoComprCOLL','[NL] Load collisions in double-precision and do not remove small elements (default false)',&
 &  verb_level=V_nl_optics)
- call it('f',defs,'UseSymm','[NL] Use full-symms for dipoles and residual-symms for polar/current (def false)', &
+ call it('f',defs,'NoUseSymm','[NL] Do not use residual-symms for polar/current (def false)', &
 &  verb_level=V_nl_optics)
  call it('f',defs,'EvalCurrent','[NL] Evaluate the current')
+ call it('f',defs,'SaveVbhistory',  '[RT] Save the history of the wave-functions function',Verb_level=V_nl_optics)
+ call it(defs,'FLOrder',  '[NL] Fourier Order of Floquet Analysis',Floquet_order, Verb_level=V_nl_optics)
+! call it('f',defs,'SaveVbfloquet',  '[RT] Save wave-functions for Floquet Analysis',Verb_level=V_nl_optics)
  !
 #endif
  !
@@ -552,7 +598,7 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
  !
  if (initmode==4.and..not.infile_editing) n_ext_fields=0
  do i_field=1,n_ext_fields_max
-   call Afield(i_field)
+   call Afield(defs,i_field)
  enddo
  !
 #endif
@@ -647,16 +693,15 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
  !
  call it(defs,'DipBands',       '[DIP] Bands range for dipoles',Dip%ib,verb_level=V_resp)
  call it(defs,'DipQpts',        '[DIP] Qpts range for dipoles',Dip%iq,verb_level=V_resp)
- call it(defs,'DipoleEtresh',   '[DIP] Treshold in the definition of R=P/deltaE',Dip%Energy_treshold,&
+ call it(defs,'DipoleEtresh',   '[DIP] Treshold in the definition of R=P/deltaE',Dip%Energy_threshold,&
 &                                                                               verb_level=V_resp,unit=E_unit)
- call it(defs,'DipApproach',    '[DIP] [G-space v/R-space x/Covariant/Shifted grids]',Dip%approach,verb_level=V_resp)
+ call it(defs,'DipApproach',    '[DIP] [G-space v/R-space x/Covariant/Shifted grids/derk]',Dip%approach,verb_level=V_resp)
  call it(defs,'DipComputed',    '[DIP] [default R P V; extra P2 Spin Orb]',Dip%computed,verb_level=V_resp)
  call it(defs,'ShiftedPaths',   '[DIP] Shifted grids paths (separated by a space)',grid_paths,verb_level=V_resp)
  call it('f',defs,'DipPDirect', '[DIP] Directly compute <v> also when using other approaches for dipoles',verb_level=V_resp)
  call it('f',defs,'DipBandsALL','[DIP] Compute all bands range, not only valence and conduction',verb_level=V_resp)
 #if defined _NL || defined _ELECTRIC
  call it('f',defs,'EvPolarization','[DIP] Evaluate Polarization (require DipApproach=Covariant)',verb_level=V_resp)
- call it('f',defs,'FrPolPerdic','[DIP] Force periodicity of polarization respect to the external field')
 #endif
 #if defined _RT
  call it('f',defs,'SPINprojected', '[DIP] Project the spin dipoles in the c/v channels ',verb_level=V_real_time)
@@ -667,9 +712,37 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
 #if defined _ELPH 
  call it(defs,'GphBRnge',  '[ELPH] G[W] bands range',QP_PH_n_G_bands)
  call it(defs,'ElPhModes', '[ELPH] Phonon modes included',elph_branches)
- call it(defs,'FANdEtresh','[ELPH] Energy treshold for Fan denominator',FAN_deltaE_treshold,verb_level=V_ph,unit=E_unit)
+ call it(defs,'FANdEtresh','[ELPH] Energy treshold for Fan denominator',FAN_deltaE_treshold,unit=E_unit)
  call it(defs,'DWdEtresh', '[ELPH] Energy treshold for DW denominator',DW_deltaE_treshold,verb_level=V_ph,unit=E_unit)
  call it(defs,'GkkpDB',    '[ELPH] GKKP database (gkkp | gkkp_expanded | genFroh )',gkkp_db,verb_level=V_ph)
+ !
+ call it(defs,'ELPhExcStates', '[EXCPH] Incoming (external) exciton states',EXCPH_states)
+ call it(defs,'ELPhExcSum',    '[EXCPH] Outgoing (virtual) exciton states',EXCPH_sum)
+ call it(defs,'LoutPath',      '[EXCPH] Path of the outgoing L',Lout_path)
+ call it(defs,'AlphaQ'       ,'[EXCPH] Excitonic band structure 2D distortion',alphaQ)
+ call it(defs,'EXCLongDr'     ,'[EXCPH] Electric field versor for the excitons',EXC_q0)
+ !
+ call it('f',defs,'DbGdOnlyPh'   , '[EXCPH] Use Double-Grid only for phonon energies (do not interpolate excitons)')
+ call it('f',defs,'ExtendOutput', '[EXCPH] Print additional output with q-resolved lifetimes and resonances')
+ call it('f',defs,'DbGdWEIGHTs', '[EXCPH] Use Double-grid also for satellite weights and renormalization')
+ call it('f',defs,'PLqres', '[EXCPH] Write contribution from each q-point')
+ !
+ ! Testing only zone <
+ call it('f',defs,'ConstElph' , '[EXCPH] Set all electron-phonon matrix elements to one (for testing purpose)')
+ call it('f',defs,'NoMatrxEl' , '[EXCPH] Set all  exciton-phonon matrix elements to one (for testing purpose)')
+ call it('f',defs,'KeepDeg'   , '[EXCPH] Keep degenerate states (for testing purpose)')
+ !
+ call it('f',defs,'AbsElph', '[EXCPH] Module of elph matrix elements (for testing purpose)')
+ call it('f',defs,'AbsExc',  '[EXCPH] Module of excitonic WFs (for testing purpose)')
+ call it('f',defs,'HoleContributionOnly', '[EXCPH] Hole channel only (for testing purpose)')
+ call it('f',defs,'ElectronContributionOnly',  '[EXCPH] Elec channel only (for testing purpose)')
+ call it(defs,'MaxKShells', '[EXCPH] K-sum up to a max shell within k-radius in IKU (for testing purpose)',EXCPH_kthresh)
+ ! Testing only zone >
+ !
+ call it(defs,'LDamping', '[EXCPH] Damping of exc-ph self-energy',L_damping,E_unit)
+ call it(defs,'EXCPHdEtresh','[ELPH] Energy treshold for exc-ph denominator',EXCPH_deltaE_treshold,unit=E_unit)
+ call it('f',defs,'ExcPhOffDiago', '[EXCPH] Exciton-Phonon off-diagonal self-energy')
+ !
  call it(defs,'ElPhHBRnge','[ELPH] Hamiltonian bands range',elph_Ham_bands)
  call it(defs,'ElPhHKpt',  '[ELPH] Hamiltonian k-point',elph_Ham_ik)
  call it(defs,'REStresh',  '[ELPH] Residual treshold to report in output files',RES_tresh)
@@ -711,14 +784,15 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
  ! SC and RT common
  !
  call it(defs,'COLLBands',   '[COLL] Bands for the collisions',COLL_bands)
+ call it(defs,'COLLFrozenBands','[COLL] Frozen Bands list',COLL_bands_frozen_ch,Verb_level=V_sc)
  !
- call it(defs,'HXC_Potential', '[SC] SC HXC Potential',H_potential,protect=.FALSE.,case="A")
- call it(defs,'COLLCut',    '[SC,RT] Cutoff on the collisions, 0=full(default =0.0005)',COLLISIONS_cutoff,verb_level=V_sc)
+ call it(defs,'HXC_Potential',  '[SC] SC HXC Potential',H_potential,protect=.FALSE.,case="A")
+ call it(defs,'COLLCut',        '[SC,RT] Cutoff on the collisions, 0=full 1=none (default =0.0005)',&
+ &                                                                   COLLISIONS_cutoff,verb_level=V_sc)
+ call it('f',defs,'COLLBseMem', '[SC,RT] Minimize memory use in bse kernel conversion',verb_level=V_sc)
  !
 #endif
  !
-#if defined _RT
-#endif
  !
 #if defined _SC
  call it('f',defs,'OEPItSolver','[SC] Iterative solution instead of inversion of OEP')
@@ -726,60 +800,14 @@ subroutine INIT_load(defs,en,q,k,X,Xw,Dip)
 #endif
  !
  if (initmode==4) then
+   !
+   call LEVELS_frozen(en)
    call LEVELS_mirror("MIRROR_q",en,X,Dip,l_USER_fields)
    call LEVELS_mirror("MIRROR_bands",en,X,Dip,l_USER_fields)
  endif
  ! 
  contains 
    !
-#if defined _RT || defined _NL
-   !
-   subroutine Afield(i_field)
-     use units,    ONLY  :AU2KWCMm2
-     use parser_m, ONLY  : parser
-     character(10)      :: name
-     integer            :: i_field
-     logical            :: is_def
-     !
-     name='Field'//trim(intc(i_field))
-     !
-     call it(defs,trim(name)//'_Freq',            '[RT '//trim(name)//'] Frequency',&
-&            Efield(i_field)%frequency,unit=E_unit)
-     call it(defs,trim(name)//'_NFreqs',            '[RT '//trim(name)//'] Frequency',&
-&            Efield(i_field)%n_frequencies)
-     call it(defs,trim(name)//'_DFreq',           '[RT '//trim(name)//'] Frequency step',&
-&            Efield(i_field)%W_step,unit=E_unit,verb_level=V_real_time)
-     call it(defs,trim(name)//'_Int',             '[RT '//trim(name)//'] Intensity',&
-&            Efield(i_field)%intensity,unit=I_unit)
-     call it(defs,trim(name)//'_Width',           '[RT '//trim(name)//'] Width',&
-&            Efield(i_field)%width,unit=Time_unit(1))
-     call it(defs,trim(name)//'_FWHM',            '[RT '//trim(name)//'] Full Width at Half Maximum (overwrite width if set)',&
-&            Efield(i_field)%FWHM,unit=Time_unit(1),verb_level=V_real_time)
-     call it(defs,trim(name)//'_kind',            '[RT '//trim(name)//'] Kind(SIN|SOFTSIN| see more on src/modules/mod_fields.F)',&
-&            Efield(i_field)%ef_name)
-     call it(defs,trim(name)//'_pol',             '[RT '//trim(name)//'] Pol(linear|circular)',&
-&            Efield(i_field)%ef_pol)
-     call it(defs,trim(name)//'_Dir',             '[RT '//trim(name)//'] Versor',&
-&            Efield(i_field)%versor)
-     call it(defs,trim(name)//'_Dir_circ',        '[RT '//trim(name)//'] Versor_circ',&
-&            Efield(i_field)%versor_circ,verb_level=V_real_time)
-     call it(defs,trim(name)//'_Tstart',          '[RT '//trim(name)//'] Initial Time',&
-&            Efield(i_field)%t_initial,unit=Time_unit(1))
-     !
-     if (initmode==4.and..not.infile_editing) then
-       !
-       call parser(trim(name)//'_FWHM',is_def)
-       if (     is_def) Efield(i_field)%width=Efield(i_field)%FWHM/(2._SP*sqrt(2._SP*log(2._SP)))
-       if (.not.is_def) Efield(i_field)%FWHM=Efield(i_field)%width*(2._SP*sqrt(2._SP*log(2._SP)))
-       !
-       if (trim(Efield(i_field)%ef_name)/='none') n_ext_fields=n_ext_fields+1
-       !
-     endif
-     !
-   end subroutine
-   !
-#endif
-   !
 #if defined _MPI 
    !
    subroutine CPU_structure_load()
diff --git a/src/interface/INIT_q_points.F b/src/interface/INIT_q_points.F
index 1593ab87d5..e8c1516dea 100644
--- a/src/interface/INIT_q_points.F
+++ b/src/interface/INIT_q_points.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine INIT_q_points( )
  !
  ! This routine:
@@ -17,12 +21,14 @@ subroutine INIT_q_points( )
  use pars,        ONLY:SP,schlen,DP
  use it_tools,    ONLY:check_verbosity
  use R_lattice,   ONLY:nqibz,q_pt,bz_samp,q_source
+ use D_lattice,   ONLY:nsym
 #if defined _ELPH
  use ELPH,        ONLY:PH_qpt,elph_use_q_grid,elph_nQ,elph_grids_are_expanded
 #endif
  use parser_m,    ONLY:PARSER_array
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  ! Work Space
  !
@@ -50,7 +56,14 @@ subroutine INIT_q_points( )
    q_source="is read from the El-Ph databases"
    nqibz=elph_nQ
    YAMBO_ALLOC(q_pt,(nqibz,3))
+   ! here I read from s.ph and I need to put a -1
    q_pt(:,:)=-real(PH_qpt(:,:),SP)
+ else if(allocated(PH_qpt).and.elph_use_q_grid.and.elph_grids_are_expanded.and.nsym==1) then
+   q_source="is read from the El-Ph databases expanded"
+   nqibz=elph_nQ
+   YAMBO_ALLOC(q_pt,(nqibz,3))
+   ! In the ndb.gkkp the -q was already imposed
+   q_pt(:,:)=real(PH_qpt(:,:),SP)
  endif
 #endif
  !
@@ -108,6 +121,6 @@ integer function get_e_p_q_point(i_q,q_in)
      endif
      call io_disconnect(ID)
      !
-   end function
+   end function get_e_p_q_point
    !
 end subroutine
diff --git a/src/interface/INIT_read_command_line.F b/src/interface/INIT_read_command_line.F
index 30ab9e718f..920aa1c8a2 100644
--- a/src/interface/INIT_read_command_line.F
+++ b/src/interface/INIT_read_command_line.F
@@ -337,6 +337,7 @@ subroutine INIT_read_command_line(rstr,init_)
        V_is_defined=INIT_read_command_line_potentials(V_string,"f")
        V_is_defined=INIT_read_command_line_potentials(V_string,"h")
        V_is_defined=INIT_read_command_line_potentials(V_string,"cvonly")
+       V_is_defined=INIT_read_command_line_potentials(V_string,"kbse")
        !
      endif
 #endif
@@ -357,6 +358,7 @@ subroutine INIT_read_command_line(rstr,init_)
        V_is_defined=INIT_read_command_line_potentials(V_string,"f")
        V_is_defined=INIT_read_command_line_potentials(V_string,"h")
        V_is_defined=INIT_read_command_line_potentials(V_string,"cvonly")
+       V_is_defined=INIT_read_command_line_potentials(V_string,"kbse")
        !
      endif
 #endif
@@ -403,6 +405,20 @@ subroutine INIT_read_command_line(rstr,init_)
      endif
 #endif
      !
+#if defined _ELPH
+     !
+     ! Exciton-phonon
+     !
+     if ( trim(rstr_piece(i1)) == 'excph' ) then
+       !
+       call initactivate(1,'ExcGkkp')
+       if ( trim(rstr_piece(i1+1)) == 'o'   ) call initactivate(1,'ExcPhOptics') 
+       if ( trim(rstr_piece(i1+1)) == 'l'   ) call initactivate(1,'ExcPhLifeT')  
+       !
+     endif
+     !
+#endif
+
 #if defined _NL
      !
      ! NL
@@ -426,8 +442,9 @@ subroutine INIT_read_command_line(rstr,init_)
      ! COLLISIONS
      !
      if (runlevel_is_on('collisions')) then
-       if ( l_elel_scatt.or.trim(H_potential)=='COH+SEX'.or.&
-&               trim(H_potential)=='COH'.or.trim(H_potential)=='SEX' ) call initactivate(1,'em1s')
+       if ( l_elel_scatt.or.index(H_potential,'SEX')>0 .or.&
+&                           index(H_potential,'COH')>0 ) call initactivate( 1,'em1s')
+       if ( index(H_potential,'KBSE')>0 )                call initactivate(-1,'em1s') 
      endif
      !
 #endif
diff --git a/src/interface/INIT_read_command_line_potentials.F b/src/interface/INIT_read_command_line_potentials.F
index c5b45b8b06..9c5b1f930a 100644
--- a/src/interface/INIT_read_command_line_potentials.F
+++ b/src/interface/INIT_read_command_line_potentials.F
@@ -47,7 +47,9 @@ logical function INIT_read_command_line_potentials(string,potential_user_string)
      H_potential=trim(H_potential)//"IP"
    case('cvonly')
      H_potential=trim(H_potential)//"CVONLY"
- end select
+   case('kbse')
+     H_potential=trim(H_potential)//"KBSE"
+  end select
  !
  string=STRING_remove(string,trim(potential_user_string),replace=" ")
  !
diff --git a/src/interface/NL_project.dep b/src/interface/NL_project.dep
index 762c81e5ad..aecba3ab0f 100644
--- a/src/interface/NL_project.dep
+++ b/src/interface/NL_project.dep
@@ -1,5 +1,6 @@
  INIT.o
  INIT_activate.o
+ INIT_fields.o
  INIT_load.o
  INIT_read_command_line.o
 
diff --git a/src/interface/RT_project.dep b/src/interface/RT_project.dep
index a91a355ec5..2dcbc4fc2c 100644
--- a/src/interface/RT_project.dep
+++ b/src/interface/RT_project.dep
@@ -4,6 +4,7 @@
  INIT_RT_ctl_switch.o
  INIT_activate.o
  INIT_check_databases.o
+ INIT_fields.o
  INIT_load.o
  INIT_read_command_line.o
  INIT_read_command_line_potentials.o
diff --git a/src/interpolate/INTERPOLATION_BZ.F b/src/interpolate/INTERPOLATION_BZ.F
index 8333d183d2..3060090a43 100644
--- a/src/interpolate/INTERPOLATION_BZ.F
+++ b/src/interpolate/INTERPOLATION_BZ.F
@@ -5,14 +5,19 @@
 !
 ! Authors (see AUTHORS file for details): CA
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine INTERPOLATION_BZ(K,NK,ID,R1D,R2D,C1D,C2D,E)
  use pars,        ONLY:SP
  use R_lattice,   ONLY:bz_samp
  use electrons,   ONLY:levels
  use interpolate, ONLY:interpls,REAL1D,REAL2D,CMPLX1D,CMPLX2D
  use timing_m,    ONLY:timing
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp), intent(in)              :: K
  integer,       intent(in)              :: ID,NK
diff --git a/src/interpolate/INTERPOLATION_BZ_coeff.F b/src/interpolate/INTERPOLATION_BZ_coeff.F
index 2a3848e9ef..9edb722737 100644
--- a/src/interpolate/INTERPOLATION_BZ_coeff.F
+++ b/src/interpolate/INTERPOLATION_BZ_coeff.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): CA DS AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine INTERPOLATION_BZ_coeff(Xk,VALUEs,engre,nvalues,Nk,ID)
  !
  ! Interpolation scheme (PRB 38 p2721)
@@ -24,8 +28,9 @@ subroutine INTERPOLATION_BZ_coeff(Xk,VALUEs,engre,nvalues,Nk,ID)
  use LIVE_t,            ONLY:live_timing
  use parallel_m,        ONLY:PP_indexes,PP_indexes_reset,myid,PAR_build_index,ncpu
  use parallel_int,      ONLY:PP_redux_wait,PARALLEL_index 
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer,       intent(in)  :: nvalues
  integer,       intent(in)  :: Nk,ID
diff --git a/src/interpolate/INTERPOLATION_BZ_setup.F b/src/interpolate/INTERPOLATION_BZ_setup.F
index db87409580..4cd509b804 100644
--- a/src/interpolate/INTERPOLATION_BZ_setup.F
+++ b/src/interpolate/INTERPOLATION_BZ_setup.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): CA
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine INTERPOLATION_BZ_setup(Xk)
  !
  ! Code inspired by BolzTraP
@@ -19,8 +23,9 @@ subroutine INTERPOLATION_BZ_setup(Xk)
  use vec_operate,       ONLY:sort
  use matrix_operate,    ONLY:m3inv
  use com,               ONLY:msg
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp) :: Xk
  !
diff --git a/src/interpolate/INTERPOLATION_coefficients.F b/src/interpolate/INTERPOLATION_coefficients.F
index 923190f820..81fee6287a 100644
--- a/src/interpolate/INTERPOLATION_coefficients.F
+++ b/src/interpolate/INTERPOLATION_coefficients.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): CA DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine INTERPOLATION_coefficients(R1D,R2D,C1D,C2D,E,k,Nk,ID,ID_obj)
  !
  ! Notice that input/output k-points for the interpolation are in iku units
@@ -16,8 +20,9 @@ subroutine INTERPOLATION_coefficients(R1D,R2D,C1D,C2D,E,k,Nk,ID,ID_obj)
  use electrons,         ONLY:levels
  use interpolate,       ONLY:interpls,REAL1D,REAL2D,CMPLX1D,CMPLX2D,nshells,INTERP_FineGd
  use interpolate_tools, ONLY:get_ID
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp), intent(in)           :: k
  real(SP),      intent(in), optional :: R1D(:,:),R2D(:,:,:)
diff --git a/src/interpolate/INTERPOLATION_driver_do.F b/src/interpolate/INTERPOLATION_driver_do.F
index 70236c3978..2a84f3661b 100644
--- a/src/interpolate/INTERPOLATION_driver_do.F
+++ b/src/interpolate/INTERPOLATION_driver_do.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine INTERPOLATION_driver_do(operation,ID,OUT_k,OUT_E,FineGrid)
  !
  ! INTREPOLATION_driver_do at present interpolates both to the Coarse grid and to the Fine grid
@@ -27,8 +31,9 @@ subroutine INTERPOLATION_driver_do(operation,ID,OUT_k,OUT_E,FineGrid)
 &                               l_INTERP_f,l_INTERP_E,l_INTERP_dE,INTERP_FineGd,&
 &                               l_integral_respect,OUT_k_nk
  use IO_int,               ONLY:IO_and_Messaging_switch
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  character(*),  intent(in)              :: operation
  integer,       intent(in)              :: ID
diff --git a/src/interpolate/INTERPOLATION_driver_end.F b/src/interpolate/INTERPOLATION_driver_end.F
index 13d5d7ded8..bb5559bcd7 100644
--- a/src/interpolate/INTERPOLATION_driver_end.F
+++ b/src/interpolate/INTERPOLATION_driver_end.F
@@ -5,13 +5,18 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine INTERPOLATION_driver_end(ID)
  !
  use interpolate_tools,    ONLY:INTERPOLATION_reset
  use interpolate,          ONLY:INTERPOLATE_msg_fmt,INTERPOLATE_initialized,&
 &                               lattice_vectors,INTERP_obj,max_interpls,int_sop,NN_nk_exact
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer, intent(in) :: ID
  !
diff --git a/src/interpolate/INTERPOLATION_fourier.F b/src/interpolate/INTERPOLATION_fourier.F
index 81a866ed0d..b7d4def0fc 100644
--- a/src/interpolate/INTERPOLATION_fourier.F
+++ b/src/interpolate/INTERPOLATION_fourier.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): CA AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine INTERPOLATION_fourier(K,VALUES,engre,nvalues,NK,ID)
  !
  use pars,              ONLY:SP,pi,cI,cZERO,DP,rZERO
@@ -18,8 +22,9 @@ subroutine INTERPOLATION_fourier(K,VALUES,engre,nvalues,NK,ID)
  use LIVE_t,            ONLY:live_timing
  use parallel_m,        ONLY:PP_indexes,PP_indexes_reset,myid
  use parallel_int,      ONLY:PP_redux_wait,PARALLEL_index 
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer,intent(in)        :: nvalues
  integer,intent(in)        :: NK,ID
diff --git a/src/interpolate/INTERPOLATION_nearest_k.F b/src/interpolate/INTERPOLATION_nearest_k.F
index a59e535ca1..e81b788c0b 100644
--- a/src/interpolate/INTERPOLATION_nearest_k.F
+++ b/src/interpolate/INTERPOLATION_nearest_k.F
@@ -4,6 +4,10 @@
 ! Copyright (C) 2018 The Yambo Team
 !
 ! Authors (see AUTHORS file for details): DS AM
+!
+! headers
+!
+#include<y_memory.h>
 !                  
 subroutine INTERPOLATION_nearest_k(K_in,V_in,K_out,V_out)
  !
@@ -13,8 +17,9 @@ subroutine INTERPOLATION_nearest_k(K_in,V_in,K_out,V_out)
  use vec_operate,       ONLY:sort
  use parallel_m,        ONLY:PP_indexes,PP_indexes_reset,ncpu,myid
  use parallel_int,      ONLY:PP_redux_wait,PARALLEL_index 
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  real(SP)                :: K_in(:,:)
  real(SP)                :: K_out(:,:)
diff --git a/src/io/.objects b/src/io/.objects
index cc8b94d070..2d1f681c97 100644
--- a/src/io/.objects
+++ b/src/io/.objects
@@ -7,10 +7,10 @@ RT_objects = io_RT_components.o io_RT_components_G_lesser.o io_RT_components_hea
              variables_RT_static.o variables_RT_dynamic.o io_KB_real_space.o
 #endif
 #if defined _NL 
-NL_objects = variables_NL.o io_NL.o
+NL_objects = variables_NL.o io_NL.o io_RT_components_V_bands.o
 #endif
 #if defined _ELPH 
-ELPH_objects = variables_ELPH.o io_ELPH.o io_gFsq.o
+ELPH_objects = variables_ELPH.o io_ELPH.o io_gFsq.o io_EXCPH_gkkp.o
 #endif
 #if defined _PHEL 
 PHEL_objects = io_PH.o
@@ -20,10 +20,10 @@ COLL_objects =
 COLL_objects = io_OSCLL.o 
 #endif
 objs = ver_is_gt_or_eq.o \
-       io_Vnl.o       io_E_SOC_map.o \
+       io_E_SOC_map.o \
        io_RIM.o io_RIM_W.o    io_GROT.o     $(ELPH_objects) $(PHEL_objects) \
-       io_QINDX.o    io_QP_and_GF.o   io_X.o           io_MPA.o  \
-       io_DB1.o       io_WF.o       io_KB_abinit.o   io_KB_pwscf.o   io_HF_and_locXC.o \
+       io_QINDX.o  io_kpts.o  io_QP_and_GF.o  io_X.o  io_MPA.o  \
+       io_DB1.o io_WF.o io_WF_phases.o io_KB_abinit.o io_KB_pwscf.o io_HF_and_locXC.o \
        io_BS_Fxc.o  io_NLCC_pwscf.o \
        io_ATMPROJ_pwscf.o  io_descriptors.o \
        io_BSS_diago.o  io_BSS_Haydock.o io_BSS_invert.o io_USPP_pwscf.o \
diff --git a/src/io/DOUBLE_project.dep b/src/io/DOUBLE_project.dep
index 92284ef9ea..60f733f26b 100644
--- a/src/io/DOUBLE_project.dep
+++ b/src/io/DOUBLE_project.dep
@@ -8,6 +8,8 @@
  io_DB1_selective_scan.o
  io_Double_Grid.o
  io_ELPH.o
+ io_EXCPH_SE.o
+ io_EXCPH_gkkp.o
  io_E_SOC_map.o
  io_GROT.o
  io_HF_and_locXC.o
@@ -28,16 +30,18 @@
  io_RT_components_OBS.o
  io_RT_components_Reference.o
  io_RT_components_TIME_points.o
+ io_RT_components_V_bands.o
  io_RT_components_header.o
  io_SC_components.o
  io_USPP_pwscf.o
- io_Vnl.o
  io_WF.o
+ io_WF_phases.o
  io_X.o
  io_descriptors.o
  io_full_SYMMs.o
  io_gFsq.o
  io_header.o
+ io_kpts.o
  io_out_of_date.o
  load_SC_components.o
  variables_DIPOLES.o
diff --git a/src/io/ELPH_project.dep b/src/io/ELPH_project.dep
index c3ec8f95d3..39bf033919 100644
--- a/src/io/ELPH_project.dep
+++ b/src/io/ELPH_project.dep
@@ -1,5 +1,6 @@
  io_Double_Grid.o
  io_ELPH.o
+ io_EXCPH_gkkp.o
  io_QP_and_GF.o
  io_RT_components.o
  io_RT_components_Reference.o
diff --git a/src/io/NL_project.dep b/src/io/NL_project.dep
index 889b62b3a1..6112a00d93 100644
--- a/src/io/NL_project.dep
+++ b/src/io/NL_project.dep
@@ -1,4 +1,6 @@
  io_DB1.o
  io_NL.o
+ io_RT_components.o
+ io_RT_components_V_bands.o
  variables_NL.o
 
diff --git a/src/io/io_ATMPROJ_pwscf.F b/src/io/io_ATMPROJ_pwscf.F
index 1648e0e11f..3a9cc305d9 100644
--- a/src/io/io_ATMPROJ_pwscf.F
+++ b/src/io/io_ATMPROJ_pwscf.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 integer function io_ATMPROJ_pwscf(ID)
  !
  use atom_proj
@@ -17,8 +21,9 @@ integer function io_ATMPROJ_pwscf(ID)
 &                           io_elemental,io_variable_bulk,def_variable_bulk, &
 &                           io_header,io_fragment
  use timing_m,       ONLY:timing
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer,intent(in) :: ID
  ! 
diff --git a/src/io/io_BSS_Haydock.F b/src/io/io_BSS_Haydock.F
index e3319e7aa3..499886d47e 100644
--- a/src/io/io_BSS_Haydock.F
+++ b/src/io/io_BSS_Haydock.F
@@ -15,6 +15,7 @@ integer function io_BSS_Haydock(ID,iq,it,reached_treshold,mode,Af,Bf,Cf,Vnm1,Vn,
  use stderr,        ONLY:intc
  use BS,            ONLY:BS_identifier
  use BS_solvers,    ONLY:BSS_q0,Haydock_v0_mod,HAYVEC_size
+ use X_m,           ONLY:i_G_shift
  use IO_m,          ONLY:io_status,io_sec
  use IO_int,        ONLY:io_connect,io_disconnect,io_elemental,io_bulk,io_header
  use timing_m,      ONLY:timing
@@ -42,7 +43,7 @@ integer function io_BSS_Haydock(ID,iq,it,reached_treshold,mode,Af,Bf,Cf,Vnm1,Vn,
    io_BSS_Haydock=io_header(ID,XC_KIND="K_E force",GAUGE=.TRUE.,IMPOSE_GAUGE=.TRUE.)
    if (io_BSS_Haydock<0) goto 1
    !
-   call io_elemental(ID,VAR="PARS",VAR_SZ=6,MENU=0)
+   call io_elemental(ID,VAR="PARS",VAR_SZ=7,MENU=0)
    call io_elemental(ID,VAR=&
 &       " BSK Identifier                   ",I0=BS_identifier,CHECK=.TRUE.,OP=(/"=="/))
    call io_elemental(ID,VAR=&
@@ -51,12 +52,14 @@ integer function io_BSS_Haydock(ID,iq,it,reached_treshold,mode,Af,Bf,Cf,Vnm1,Vn,
 &       " Haydock reached Treshold         ",R0=reached_treshold,DB_R0=reached_treshold)
    call io_elemental(ID,VAR=&
 &       " Field direction                  ",R1=BSS_q0,CHECK=.true.,OP=(/"==","==","=="/))
+   call io_elemental(ID,VAR=&
+&       " G-shift                          ",I0=i_G_shift,CHECK=.true.,OP=(/"=="/))
    !
    call io_elemental(ID,VAR="",VAR_SZ=0,MENU=1)
    io_BSS_Haydock=io_status(ID)
    !
    if (io_BSS_Haydock<0) then
-     reached_treshold=0._SP
+     reached_treshold=100._SP
      goto 1
    endif
    !
diff --git a/src/io/io_BSS_diago.F b/src/io/io_BSS_diago.F
index d97e451490..6b42ee726d 100644
--- a/src/io/io_BSS_diago.F
+++ b/src/io/io_BSS_diago.F
@@ -7,7 +7,7 @@
 !
 integer function io_BSS_diago(iq,i_BS_mat,ID,X_static,bsE,bsRl,bsRr,BsE_corr,&
 &                             bsL_magn,bsR_magn,bsL_kerr,bsR_kerr,bsR_dich,&
-&                             bsR_pl,BS_mat,write_ev)
+&                             bsR_mespin,bsR_meorb,bsR_pl,BS_mat,write_ev)
  !
  use pars,          ONLY:SP,schlen,IP
  use stderr,        ONLY:intc
@@ -22,12 +22,13 @@ integer function io_BSS_diago(iq,i_BS_mat,ID,X_static,bsE,bsRl,bsRr,BsE_corr,&
  !
  implicit none
  !
- type(X_t),intent(in)        :: X_static
+ type(X_t),intent(inout)        :: X_static
  integer                        :: iq,ID,i_BS_mat
  complex(SP)        , optional  :: bsE(:)
  real(SP)   ,pointer, optional  :: BsE_corr(:,:),bsR_pl(:,:)
  complex(SP),pointer, optional  :: bsL_kerr(:),bsR_kerr(:),bsR_dich(:,:),&
-&                                  bsL_magn(:,:),bsR_magn(:,:),bsRl(:),bsRr(:)
+&                                  bsR_mespin(:,:),bsR_meorb(:,:),bsL_magn(:,:),&
+&                                  bsR_magn(:,:),bsRl(:),bsRr(:)
  complex(SP)        , optional  :: BS_mat(:,:)
  logical, optional              :: write_ev
  !
@@ -143,6 +144,20 @@ integer function io_BSS_diago(iq,i_BS_mat,ID,X_static,bsE,bsRl,bsRr,BsE_corr,&
      endif
    endif
    !
+   if(present(bsR_mespin)) then
+     if(associated(bsR_mespin)) then
+       call io_bulk(ID,'BS_MEspin_Residuals',VAR_SZ=(/2,BSS_n_eig,2/))
+       call io_bulk(ID,C2=bsR_mespin)
+     endif
+   endif
+   !
+   if(present(bsR_meorb)) then
+     if(associated(bsR_meorb)) then
+       call io_bulk(ID,'BS_MEorb_Residuals',VAR_SZ=(/2,BSS_n_eig,2/))
+       call io_bulk(ID,C2=bsR_meorb)
+     endif
+   endif
+   !
    if(present(bsR_pl)) then
      if(associated(bsR_pl)) then
        call io_bulk(ID,'BS_PL_Residuals',VAR_SZ=(/2,BSS_n_eig/))
diff --git a/src/io/io_BSS_invert.F b/src/io/io_BSS_invert.F
index ed43a86228..1055ca6e56 100644
--- a/src/io/io_BSS_invert.F
+++ b/src/io/io_BSS_invert.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 integer function io_BSS_invert(iq,W,ID)
  !
  use units,         ONLY:HA2EV
@@ -15,7 +19,9 @@ integer function io_BSS_invert(iq,W,ID)
  use IO_m,          ONLY:io_sec,io_status,read_is_on
  use IO_int,        ONLY:io_connect,io_disconnect,io_elemental,io_bulk,io_header
  use timing_m,      ONLY:timing
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  integer      :: iq
  type(w_samp) :: W
  !
diff --git a/src/io/io_BS_Fxc.F b/src/io/io_BS_Fxc.F
index c270e3421f..a4e7f49014 100644
--- a/src/io/io_BS_Fxc.F
+++ b/src/io/io_BS_Fxc.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 integer function io_BS_Fxc(iq,FXC_W,ID,X)
  !
  use pars,           ONLY:schlen,IP,IP_YIO
@@ -22,7 +26,9 @@ integer function io_BS_Fxc(iq,FXC_W,ID,X)
 &                         io_elemental,def_variable_elemental,io_variable_elemental,&
 &                         io_bulk,io_header
  use descriptors,    ONLY:IO_desc,IO_desc_duplicate,IO_desc_reset
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  type(w_samp)        ::FXC_W
  integer             ::ID,iq
  type(X_t), optional ::X 
diff --git a/src/io/io_COL_CUT.F b/src/io/io_COL_CUT.F
index c1e1933587..7a3f8b6002 100644
--- a/src/io/io_COL_CUT.F
+++ b/src/io/io_COL_CUT.F
@@ -5,24 +5,33 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 integer function io_COL_CUT(ID)
  !
+ use pars,           ONLY:SP
  use wave_func,      ONLY:wf_ng,cutoff_ng
  use R_lattice,      ONLY:RIM_ng,RIM_n_rand_pts,CUTOFF_plus_RIM,&
-&                         cyl_ph_radius,box_length,cyl_length,cut_geometry,&
+&                         cyl_ph_radius,box_length,cut_geometry,&
 &                         ng_closed,nqibz,bare_qpg,cut_description
  use IO_m,           ONLY:io_sec,io_status
  use IO_int,         ONLY:io_connect,io_disconnect,io_header,io_elemental,io_bulk
  use timing_m,       ONLY:timing
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  integer :: ID
  ! 
  ! Work Space
  !
+ real(SP)            ::cyl_length
  integer             ::CUT_RIM_ng,CUT_RIM_npts,cutoff_ng_db
  !
  cutoff_ng = wf_ng ! when writing, the cutoff_ng is the same as the wavefunction ng
+ cyl_length= 0._SP ! not to break compatibility with previous databases
  !
  call timing('io_COL_CUT',OPR='start')
  io_COL_CUT=io_connect(desc="cutoff",type=2,ID=ID)
diff --git a/src/io/io_DB1.F b/src/io/io_DB1.F
index 572bb79c85..1fe6bfc951 100644
--- a/src/io/io_DB1.F
+++ b/src/io/io_DB1.F
@@ -5,7 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
 #include<dev_defs.h>
+#include<y_memory.h>
+!
 !
 integer function io_DB1(E,k,ID)
  !
@@ -43,8 +47,9 @@ integer function io_DB1(E,k,ID)
 #if defined _RT
  use real_time,        ONLY:RT_bands
 #endif
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)  :: E
  type(bz_samp) :: k
diff --git a/src/io/io_DB1_selective_scan.F b/src/io/io_DB1_selective_scan.F
index f325fb5400..c16d360b3a 100644
--- a/src/io/io_DB1_selective_scan.F
+++ b/src/io/io_DB1_selective_scan.F
@@ -5,7 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
 #include<dev_defs.h>
+#include<y_memory.h>
+!
 !
 integer function io_DB1_selective_scan(what,DB1_path,E,k,db1_alat,db1_a,COM_mode)
  !
@@ -22,8 +26,9 @@ integer function io_DB1_selective_scan(what,DB1_path,E,k,db1_alat,db1_a,COM_mode
  use com,              ONLY:core_io_path
  use IO_m,             ONLY:OP_RD_CL,NONE,DUMP
  use IO_int,           ONLY:io_control,io_connect,io_disconnect,io_elemental,io_bulk
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)      :: E
  type(bz_samp)     :: k
diff --git a/src/io/io_Double_Grid.F b/src/io/io_Double_Grid.F
index 26b6d4a3d7..a43652a1a3 100644
--- a/src/io/io_Double_Grid.F
+++ b/src/io/io_Double_Grid.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 integer function io_Double_Grid(E,Xk,ID,what)
  !
  use pars,           ONLY:schlen,IP_YIO,CR_YIO
@@ -19,8 +23,9 @@ integer function io_Double_Grid(E,Xk,ID,what)
 #if defined _ELPH
  use ELPH,           ONLY:EkplusQ_mode
 #endif
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels) :: E
  type(bz_samp):: Xk
diff --git a/src/io/io_ELPH.F b/src/io/io_ELPH.F
index ad11d2c023..d1572a6921 100644
--- a/src/io/io_ELPH.F
+++ b/src/io/io_ELPH.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM AC
 !
+! headers
+!
+#include<y_memory.h>
+!
 integer function io_ELPH(ID,what)
  !
  use pars,           ONLY:schlen
@@ -12,14 +16,16 @@ integer function io_ELPH(ID,what)
  use IO_int,         ONLY:io_connect,io_disconnect,io_fragment,&
 &                         io_bulk,io_header
  use ELPH,           ONLY:ph_modes,elph_nb,PH_freqs_sq,l_GKKP_hosts_bare_dV,&
-&                         PH_pol_vector,GKKP,elph_grids_are_expanded,GKKP,GKKP_bare
+&                         l_GKKP_hosts_DW,PH_pol_vector,&
+&                         GKKP,elph_grids_are_expanded,GKKP,GKKP_bare
  use stderr,         ONLY:intc
  use ALLOC,          ONLY:ELPH_alloc
  use R_lattice,      ONLY:nkbz
  use D_lattice,      ONLY:n_atoms
  use timing_m,       ONLY:timing
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer      ::ID
  character(*) ::what
@@ -115,6 +121,7 @@ integer function io_ELPH(ID,what)
  call io_bulk(ID_frag,VAR="ELPH_GKKP_Q"//trim(intc(iq)),VAR_SZ=(/2,ph_modes,elph_nb,elph_nb,nkbz/) )
  call io_bulk(ID_frag,C4=GKKP%dVc(:,:,:,:,1))
  !
+ !
  if (l_GKKP_hosts_bare_dV) then
    ! 
    ! ELPH_gkkp_bare 
@@ -124,10 +131,14 @@ integer function io_ELPH(ID,what)
    !
  endif
  !
- ! ELPH_DW 
- !
- call io_bulk(ID_frag,VAR="ELPH_DW_Q"//trim(intc(iq)),VAR_SZ=(/ph_modes,elph_nb,elph_nb,nkbz/))
- call io_bulk(ID_frag,R4=GKKP%dVr(:,:,:,:,1))
+ if (l_GKKP_hosts_DW) then
+   !
+   ! ELPH_DW 
+   !
+   call io_bulk(ID_frag,VAR="ELPH_DW_Q"//trim(intc(iq)),VAR_SZ=(/ph_modes,elph_nb,elph_nb,nkbz/))
+   call io_bulk(ID_frag,R4=GKKP%dVr(:,:,:,:,1))
+   !
+ endif
  !
 1 call io_fragment_disconnect(ID,ID_frag)
  !
diff --git a/src/io/io_EXCPH_SE.F b/src/io/io_EXCPH_SE.F
new file mode 100644
index 0000000000..464477c31c
--- /dev/null
+++ b/src/io/io_EXCPH_SE.F
@@ -0,0 +1,203 @@
+!
+!        Copyright (C) 2000-2019 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): FP
+!
+! headers
+!
+#include<y_memory.h>
+!
+! This file is distributed under the terms of the GNU
+! General Public License. You can redistribute it and/or
+! modify it under the terms of the GNU General Public
+! License as published by the Free Software Foundation;
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will
+! be useful, but WITHOUT ANY WARRANTY; without even the
+! implied warranty of MERCHANTABILITY or FITNESS FOR A
+! PARTICULAR PURPOSE.  See the GNU General Public License
+! for more details.
+!
+! You should have received a copy of the GNU General Public
+! License along with this program; if not, write to the Free
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston,
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+integer function io_EXCPH_SE(ID,what)
+ !
+ use pars,           ONLY:SP,schlen
+ use IO_m,           ONLY:io_sec,read_is_on,frag_ELPH,io_status
+ use IO_int,         ONLY:io_connect,io_disconnect,io_elemental,io_bulk,io_header,&
+&                         def_variable_bulk,io_variable_bulk,io_fragment
+ use EXCPH,          ONLY:EXCPH_Gkkp,EXCPH_Gkkp_sq,EXCPH_sum,EXCPH_states,&
+&                         BS_E_in,BS_Sat_E,BS_Sat_WEIGHT,BS_EXCPH_E,ph_EXCPH_E,&
+&                         BS_Sat_E_PH_abs,BS_Sat_WEIGHT_PH_abs
+ use ELPH,           ONLY:ph_modes,elph_branches
+ use stderr,         ONLY:intc
+ use R_lattice,      ONLY:nqibz
+ use timing_m,       ONLY:timing
+ use y_memory_alloc
+ !
+ implicit none
+ !
+ integer      ::ID
+ character(*) ::what
+ !
+ !Work Space
+ !
+ integer                ::iq,ierr,VAR_size(4),VAR_size_sq(3),scat_type,ID_frag
+ real(SP)               ::v(3)
+ character(schlen)      ::db_name,VAR_name,dim_names(4),dim_names_sq(3)
+ logical                ::l_exc_SE
+ !
+ call timing('io_EXCPH_gkkp',OPR='start')
+ !
+ ID_frag=0
+ !
+ ! DB name
+ !
+ db_name='excph_gkkp'
+ !
+ if (index(what,'excitonic_SE')>0) l_exc_SE = .true.
+ !
+ io_EXCPH_gkkp=io_connect(desc=trim(db_name),type=2,ID=ID)
+ if (io_EXCPH_gkkp/=0) goto 1
+ !
+ if (any((/io_sec(ID,:)==1/))) then
+   !
+   scat_type = 3
+   !
+   io_EXCPH_gkkp=io_header(ID,R_LATT=.true.,KPTS=.true.,FRAG=frag_ELPH)
+   !
+   call io_elemental(ID,VAR="PARS",VAR_SZ=6,MENU=0)
+   !
+   call io_elemental(ID,&
+&       VAR=" Phonon modes           :",I0=ph_modes,CHECK=.true.,OP=(/"=="/))
+   !
+   call io_elemental(ID,&
+&       VAR=" Exciton states           :",I1=EXCPH_states,CHECK=.true.,OP=(/"=="/))
+   !
+   call io_elemental(ID,&
+&       VAR=" Exciton sum (scattered):",I1=EXCPH_sum,CHECK=.true.,OP=(/"=="/))
+   !
+   call io_elemental(ID,&
+&       VAR=" Scattering type [1=TT,2=LT,3=TL,4=LL]:",I0=scat_type,CHECK=.true.,OP=(/"=="/))
+   !
+   !
+   call io_elemental(ID,VAR="",VAR_SZ=0,MENU=1)
+   !
+   io_EXCPH_gkkp=io_status(ID)
+   if (io_EXCPH_gkkp/=0) goto 1
+   !
+   call io_bulk(ID,VAR="PH_BRANCHES",VAR_SZ=(/2/))
+   call io_bulk(ID,I1=elph_branches)
+   !
+   call io_bulk(ID,VAR="EXCITON_STATES",VAR_SZ=(/2/))
+   call io_bulk(ID,I1=EXCPH_states)
+   !
+   call io_bulk(ID,VAR="EXCITON_SUM",VAR_SZ=(/2/))
+   call io_bulk(ID,I1=EXCPH_sum)
+   !
+   call io_bulk(ID,VAR="SCATTERING_TYPE",VAR_SZ=(/1/))
+   call io_bulk(ID,I0=scat_type)
+   !
+   if (l_exc_SE) then
+     !
+     call io_bulk(ID,VAR="BS_initial_states_E",VAR_SZ=(/2,EXCPH_states(2)/))
+     call io_bulk(ID,C1=BS_E_in)
+     !
+   endif
+   !
+ endif
+ !
+ iq=maxval(io_sec(ID,:))-1
+ if (iq<=0) goto 1
+ !
+ ! Fragmentation
+ !
+ call io_fragment(ID,ID_frag,i_fragment=iq,ierr=ierr)
+ if (ierr<0.and.read_is_on(ID)) goto 1
+ !
+ ! When the DB is fragmented I allow a partial reading checking
+ ! if the fragment exists or not.
+ ! If the fragment does not exist (ierr<0) I return an error code
+ !
+ if (ierr<0.and.read_is_on(ID)) then
+   io_EXCPH_gkkp=-1
+   goto 1
+ endif
+ !
+ ! Excitonic Gkkp (and SE) fragments
+ !=========================
+ !
+ VAR_name="EXCITON_PH_GKKP_Q"//trim(intc(iq))
+ VAR_size=(/2,ph_modes,EXCPH_sum(2),EXCPH_states(2)/)
+ dim_names = [character(schlen) :: 'complex','PH_modes','N_exc_sum','N_exc_states']
+ call def_variable_bulk(ID_frag,trim(VAR_name),1,VAR_size,SP,dim_names)
+ call io_variable_bulk(ID_frag,1,C3=EXCPH_Gkkp)
+ !
+ VAR_name="EXCITON_PH_GKKP_SQUARED_Q"//trim(intc(iq))
+ VAR_size_sq=(/ph_modes,EXCPH_sum(2),EXCPH_states(2)/)
+ dim_names_sq = [character(schlen) :: 'PH_modes','N_exc_sum','N_exc_states']
+ call def_variable_bulk(ID_frag,trim(VAR_name),1,VAR_size_sq,SP,dim_names_sq)
+ call io_variable_bulk(ID_frag,1,R3=EXCPH_Gkkp_sq)
+ !
+ if (l_exc_SE) then
+   !
+   VAR_name="EXCITON_SATELLITE_ENERGY_Q"//trim(intc(iq))
+   call def_variable_bulk(ID_frag,trim(VAR_name),1,VAR_size,SP,dim_names)
+   call io_variable_bulk(ID_frag,1,C3=BS_Sat_E)
+   !
+   VAR_name="EXCITON_SATELLITE_ENERGY_phonon_absorption_Q"//trim(intc(iq))
+   call def_variable_bulk(ID_frag,trim(VAR_name),1,VAR_size,SP,dim_names)
+   call io_variable_bulk(ID_frag,1,C3=BS_Sat_E_PH_abs)
+   !
+   VAR_name="EXCITON_SATELLITE_WEIGHT_Q"//trim(intc(iq))
+   call def_variable_bulk(ID_frag,trim(VAR_name),1,VAR_size_sq,SP,dim_names_sq)
+   call io_variable_bulk(ID_frag,1,R3=BS_Sat_WEIGHT)
+   !
+   VAR_name="EXCITON_SATELLITE_WEIGHT_phonon_absorption_Q"//trim(intc(iq))
+   call def_variable_bulk(ID_frag,trim(VAR_name),1,VAR_size_sq,SP,dim_names_sq)
+   call io_variable_bulk(ID_frag,1,R3=BS_Sat_WEIGHT_PH_abs)
+   !
+   VAR_name="PHONON_ENERGY_Q"//trim(intc(iq))
+   call def_variable_bulk(ID_frag,trim(VAR_name),1,(/VAR_size(2)/),SP,(/dim_names(2)/))
+   call io_variable_bulk(ID_frag,1,R1=ph_EXCPH_E)
+   !
+   VAR_name="EXCITON_BS_ENERGY_Q"//trim(intc(iq))
+   call def_variable_bulk(ID_frag,trim(VAR_name),1,(/VAR_size(1),VAR_size(3)/),SP,(/dim_names(1),dim_names(3)/))
+   call io_variable_bulk(ID_frag,1,C1=BS_EXCPH_E)
+   !
+ endif
+ !
+1 call io_fragment_disconnect(ID,ID_frag)
+ !
+ call io_disconnect(ID=ID)
+ !
+ call timing('io_EXCPH_gkkp',OPR='stop')
+ !
+ contains
+     integer function kind_to_integers(SCATTERING_TYPE)
+         !
+         integer :: l2int
+         character(schlen), intent(in) :: SCATTERING_TYPE
+         !
+         if (trim(SCATTERING_TYPE)=='TT') l2int=1
+         !
+         if (trim(SCATTERING_TYPE)=='LT') l2int=2
+         !
+         if (trim(SCATTERING_TYPE)=='TL') l2int=3
+         !
+         if (trim(SCATTERING_TYPE)=='LL') l2int=4
+         !
+         if (trim(SCATTERING_TYPE)=='IL') l2int=5
+         !
+         if (trim(SCATTERING_TYPE)=='IT') l2int=6
+         !
+         kind_to_integers=l2int
+         !
+     end function
+     !
+end function
diff --git a/src/io/io_EXCPH_gkkp.F b/src/io/io_EXCPH_gkkp.F
new file mode 100644
index 0000000000..2928fc5b2f
--- /dev/null
+++ b/src/io/io_EXCPH_gkkp.F
@@ -0,0 +1,156 @@
+!
+!        Copyright (C) 2000-2019 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): AM
+!
+! headers
+!
+#include<y_memory.h>
+!
+! This file is distributed under the terms of the GNU
+! General Public License. You can redistribute it and/or
+! modify it under the terms of the GNU General Public
+! License as published by the Free Software Foundation;
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will
+! be useful, but WITHOUT ANY WARRANTY; without even the
+! implied warranty of MERCHANTABILITY or FITNESS FOR A
+! PARTICULAR PURPOSE.  See the GNU General Public License
+! for more details.
+!
+! You should have received a copy of the GNU General Public
+! License along with this program; if not, write to the Free
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston,
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+integer function io_EXCPH_gkkp(ID)
+ !
+ use pars,           ONLY:SP,schlen
+ use IO_m,           ONLY:io_sec,read_is_on,frag_ELPH,io_status
+ use IO_int,         ONLY:io_connect,io_disconnect,io_elemental,io_bulk,io_header,&
+&                         def_variable_bulk,io_variable_bulk,io_fragment
+ use EXCPH,          ONLY:EXCPH_Gkkp,EXCPH_Gkkp_sq,EXCPH_sum,EXCPH_states,&
+&                         BS_EXCPH_E,ph_EXCPH_E,EXCPH_q,L_kind_in,L_kind_out,Sat_alloc, &
+&                         EXCPH_gkkp_alloc
+ use IO_int,         ONLY:io_variable_elemental,def_variable_elemental
+ use ELPH,           ONLY:ph_modes
+ use stderr,         ONLY:intc
+ use R_lattice,      ONLY:nqbz
+ use timing_m,       ONLY:timing
+ !
+ use y_memory_alloc
+ !
+ implicit none
+ !
+ integer      ::ID
+ !
+ !Work Space
+ !
+ integer                :: iq,ierr,VAR_size(4),VAR_size_sq(3),ID_frag
+ character(schlen)      :: db_name,VAR_name,dim_names(4),dim_names_sq(3)
+ !
+ call timing('io_EXCPH_gkkp',OPR='start')
+ !
+ ID_frag=0
+ io_EXCPH_gkkp=0
+ !
+ ! DB name
+ !
+ db_name='excph_gkkp'
+ !
+ io_EXCPH_gkkp=io_connect(desc=trim(db_name),type=2,ID=ID,FORCE_READ_MODE=(.not.any((/io_sec(ID,:)==1/))).and.frag_ELPH)
+ if (io_EXCPH_gkkp/=0) goto 2
+ !
+ if (any((/io_sec(ID,:)==1/))) then
+   !
+   io_EXCPH_gkkp=io_header(ID,R_LATT=.true.,KPTS=.true.,FRAG=frag_ELPH)
+   !
+   call io_elemental(ID,VAR="PARS",VAR_SZ=7,MENU=0)
+   !
+   call io_elemental(ID,&
+&       VAR=" Number of qpoints (BZ)   :",I0=nqbz,CHECK=.true.,OP=(/"=="/))
+   !
+   call io_elemental(ID,&
+&       VAR=" Exciton states           :",I1=EXCPH_states,CHECK=.true.,OP=(/"==","=="/))
+   !
+   call io_elemental(ID,&
+&       VAR=" Exciton sum (scattered)  :",I1=EXCPH_sum,CHECK=.true.,OP=(/"==","=="/))
+   !
+   call io_elemental(ID,&
+&       VAR=" Phonon modes            :",I0=ph_modes,CHECK=.true.,OP=(/"=="/))
+   !
+   call io_elemental(ID,VAR="",VAR_SZ=0,MENU=1)
+   !
+   io_EXCPH_gkkp=io_status(ID)
+   if (io_EXCPH_gkkp/=0) goto 1
+   !
+   call io_bulk(ID,VAR="EXCITON_STATES",VAR_SZ=(/2/))
+   call io_bulk(ID,I1=EXCPH_states)
+   !
+   call io_bulk(ID,VAR="EXCITON_SUM",VAR_SZ=(/2/))
+   call io_bulk(ID,I1=EXCPH_sum)
+   !
+   call io_bulk(ID,VAR="PHONON_MODES",VAR_SZ=(/1/))
+   call io_bulk(ID,I0=ph_modes)
+   !
+   call def_variable_elemental(ID,"L_kind_in",1,0,0)
+   call io_variable_elemental(ID,VAR="[K] L_kind in",CH0=L_kind_in,CHECK=.true.,OP=(/"=="/))
+   !
+   call def_variable_elemental(ID,"L_kind_out",1,0,0)
+   call io_variable_elemental(ID,VAR="[K] L_kind out",CH0=L_kind_out,CHECK=.true.,OP=(/"=="/))
+   !
+ endif
+ !
+ io_EXCPH_gkkp=io_status(ID)
+ !
+ iq=maxval(io_sec(ID,:))-1
+ if (iq<=0) goto 2
+ !
+ ! Fragmentation
+ !
+ call io_fragment(ID,ID_frag,i_fragment=iq,ierr=ierr)
+ if (ierr<0.and.read_is_on(ID)) goto 1
+ !
+ ! When the DB is fragmented I allow a partial reading checking
+ ! if the fragment exists or not.
+ ! If the fragment does not exist (ierr<0) I return an error code
+ !
+ if (ierr<0.and.read_is_on(ID)) then
+   io_EXCPH_gkkp=-1
+   goto 1
+ endif
+ !
+ ! Excitonic Gkkp (and SE) fragments
+ !=========================
+ !
+ if (read_is_on(ID).and..not.allocated(EXCPH_gkkp)) call EXCPH_gkkp_alloc()
+ !
+ if (.not. read_is_on(ID)) then
+   VAR_name="EXCPH_Q"//trim(intc(iq))
+   call def_variable_bulk(ID_frag,trim(VAR_name),1,(/3/),SP)
+   call io_variable_bulk(ID_frag,1,R1=EXCPH_q(iq,:))
+ endif
+ !
+ VAR_name="EXCITON_PH_GKKP_Q"//trim(intc(iq))
+ VAR_size=(/2,ph_modes,EXCPH_sum(2),EXCPH_states(2)/)
+ dim_names = [character(schlen) :: 'complex','PH_modes','N_exc_sum','N_exc_states']
+ call def_variable_bulk(ID_frag,trim(VAR_name),1,VAR_size,SP,dim_names)
+ call io_variable_bulk(ID_frag,1,C3=EXCPH_gkkp)
+ !
+ VAR_name="EXCITON_PH_GKKP_SQUARED_Q"//trim(intc(iq))
+ VAR_size_sq=(/ph_modes,EXCPH_sum(2),EXCPH_states(2)/)
+ dim_names_sq = [character(schlen) :: 'PH_modes','N_exc_sum','N_exc_states']
+ call def_variable_bulk(ID_frag,trim(VAR_name),1,VAR_size_sq,SP,dim_names_sq)
+ call io_variable_bulk(ID_frag,1,R3=EXCPH_gkkp_sq)
+ !
+ io_EXCPH_gkkp=io_status(ID)
+ !
+1 call io_fragment_disconnect(ID,ID_frag)
+ !
+2 call io_disconnect(ID=ID)
+ !
+ call timing('io_EXCPH_gkkp',OPR='stop')
+ !
+end function
diff --git a/src/io/io_E_SOC_map.F b/src/io/io_E_SOC_map.F
index a911870cca..2c86403f90 100644
--- a/src/io/io_E_SOC_map.F
+++ b/src/io/io_E_SOC_map.F
@@ -5,14 +5,19 @@
 !
 ! Authors (see AUTHORS file for details): DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 integer function io_E_SOC_map(En,kpt,ID)
  !
  use electrons,      ONLY:levels
  use R_lattice,      ONLY:bz_samp
  use IO_m,           ONLY:read_is_on,write_is_on,file_is_present
  use IO_int,         ONLY:io_connect,io_disconnect,io_elemental,io_bulk,io_header
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels) :: En
  type(bz_samp):: kpt
diff --git a/src/io/io_GROT.F b/src/io/io_GROT.F
index 43ee06c9a0..eed126e027 100644
--- a/src/io/io_GROT.F
+++ b/src/io/io_GROT.F
@@ -5,7 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
 #include<dev_defs.h>
+#include<y_memory.h>
+!
 !
 integer function io_GROT(io_db)
  !
@@ -16,8 +20,9 @@ integer function io_GROT(io_db)
  use IO_int,         ONLY:io_connect,io_disconnect,io_elemental,&
 &                         io_bulk,io_header,ver_is_gt_or_eq
  use timing_m,       ONLY:timing
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer :: io_db,nsym_db
  !
diff --git a/src/io/io_HF_and_locXC.F b/src/io/io_HF_and_locXC.F
index 5d64da9292..7f8b4f2abd 100644
--- a/src/io/io_HF_and_locXC.F
+++ b/src/io/io_HF_and_locXC.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 integer function io_HF_and_locXC(ID)
  !
  use pars,           ONLY:SP,schlen
@@ -18,7 +22,9 @@ integer function io_HF_and_locXC(ID)
  use IO_int,         ONLY:io_connect,io_disconnect,io_header,io_elemental,io_bulk,&
 &                         ver_is_gt_or_eq
  use timing_m,       ONLY:timing
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  integer :: ID
  !
  ! Work Space
diff --git a/src/io/io_KB_abinit.F b/src/io/io_KB_abinit.F
index 4c1514f794..e71beb00e3 100644
--- a/src/io/io_KB_abinit.F
+++ b/src/io/io_KB_abinit.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 integer function io_KB_abinit(ID)
  !
  use pars,             ONLY:SP
@@ -19,7 +23,9 @@ integer function io_KB_abinit(ID)
  use IO_int,           ONLY:io_connect,io_disconnect,io_fragment, &
 &                           io_elemental,io_bulk,io_header,ver_is_gt_or_eq
  use timing_m,         ONLY:timing
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  integer,       intent(in) :: ID
  !
diff --git a/src/io/io_KB_pwscf.F b/src/io/io_KB_pwscf.F
index 3024548612..1e0641250d 100644
--- a/src/io/io_KB_pwscf.F
+++ b/src/io/io_KB_pwscf.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 integer function io_KB_pwscf(ID)
  !
  use pars,             ONLY:SP
@@ -19,8 +23,9 @@ integer function io_KB_pwscf(ID)
 &                           pp_n_l_comp,pp_kbs,pp_kb,pp_kbd, &
 &                           PP_alloc_pwscf
  use timing_m,       ONLY:timing
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer,intent(in) :: ID
  ! 
diff --git a/src/io/io_KB_real_space.F b/src/io/io_KB_real_space.F
index c21bc22c81..226d72b7f3 100644
--- a/src/io/io_KB_real_space.F
+++ b/src/io/io_KB_real_space.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 integer function io_KB_real_space(ID,kbv_real_space,kbv_real_space_table)
  !
  use pars,             ONLY:SP
@@ -16,8 +20,9 @@ integer function io_KB_real_space(ID,kbv_real_space,kbv_real_space_table)
  use IO_m,             ONLY:io_sec,frag_WF
  use IO_int,           ONLY:io_connect,io_disconnect,io_fragment, &
 &                           io_elemental,io_bulk,io_header
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer,       intent(in)    :: ID
  integer,       intent(inout) :: kbv_real_space_table(pp_kbv_dim_yambo)
diff --git a/src/io/io_MPA.F b/src/io/io_MPA.F
index f6f226284d..d76d4fde2b 100644
--- a/src/io/io_MPA.F
+++ b/src/io/io_MPA.F
@@ -5,9 +5,13 @@
 !
 ! Authors (see AUTHORS file for details): DALV AF
 !
+! headers
+!
+#include<y_memory.h>
+!
 integer function io_MPA(X,Xw,ID)
  !
- use pars,          ONLY:SP,schlen
+ use pars,          ONLY:SP,IP_YIO,schlen
  use X_m,           ONLY:X_t,X_mat,X_RLcomp_ig,MPA_desc
  use frequency,     ONLY:w_samp
  use IO_int,        ONLY:io_connect,io_disconnect,io_elemental,io_header,io_fragment,&
@@ -18,8 +22,9 @@ integer function io_MPA(X,Xw,ID)
  use stderr,        ONLY:intc
  use timing_m,      ONLY:timing
  use mpa_m,         ONLY:MPA_freqs,MPA_alloc,MPA_E_par,MPA_R_par
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(X_t)   :: X
  type(w_samp):: Xw
@@ -27,7 +32,7 @@ integer function io_MPA(X,Xw,ID)
  !
  !Work Space
  !
- integer           :: sec_size,i1,iq,io_com_save,io_err,ID_frag
+ integer           :: i1,iq,io_com_save,io_err,ID_frag
  logical           :: different_db_RL_order
  character(schlen) :: ch
  complex(SP),allocatable :: X_on_disk(:,:)
@@ -92,20 +97,18 @@ integer function io_MPA(X,Xw,ID)
  !  type(X_t)    ::X(5)  x s d p m
  !
  if (any((/io_sec(ID,:)==2*iq/)) ) then
-   !
-   sec_size=6
-   !
-   ch="FREQ_PARS_sec_iq"//trim(intc(iq))
    !
    ! Important: once the file has been opend in PAR mode all the CPUs belonging to the COMM
    !            must go through the definitions of the variables although only one is writing
    !
    io_com_save=io_com(ID_frag)
    if (iq>2) io_com(ID_frag)=NONE
-   call def_variable_elemental(ID_frag,trim(ch),sec_size,SP,1,par_io_kind='independent')
    !
+   call def_variable_elemental(ID_frag,"Qpt_index",1,IP_YIO,1,par_io_kind='independent')
    call io_variable_elemental(ID_frag,  VAR=" :: Current Q-pt index     :",I0=iq)
-   call io_variable_elemental(ID_frag,  VAR=" :: Number of poles  :",I0=Xw%n_freqs/2,CHECK=.true.,OP=(/"=="/))
+   !
+   call def_variable_elemental(ID_frag,"N_freqs",1,IP_YIO,1,par_io_kind='independent')
+   call io_variable_elemental(ID_frag,  VAR=" :: Number of frequencies  :",I0=Xw%n_freqs,CHECK=.true.,OP=(/"=="/))
    !
    call def_variable_elemental(ID_frag,"",0,0,1)
    io_MPA=io_status(ID_frag)
@@ -129,35 +132,15 @@ integer function io_MPA(X,Xw,ID)
    !
    ch="MPA_E_Q_"//trim(intc(iq))
    !
-   if(write_is_on(ID)) then
-#if defined _PAR_IO
-     call def_variable_bulk(ID_frag,trim(ch),1,(/2,X%ng_db,X%ng_db,Xw%n_freqs/),SP,par_io_kind='collective')
-#else
-     call def_variable_bulk(ID_frag,trim(ch),1,(/2,X%ng_db,X%ng_db,Xw%n_freqs/),SP,par_io_kind='independent')
-#endif
-     call  io_variable_bulk(ID_frag,1,C3=MPA_E_par(1)%blc(:,:,:Xw%n_freqs),IPOS=(/1,MPA_E_par(1)%rows(1),MPA_E_par(1)%cols(1),1/))
-     !
-   else if(read_is_on(ID)) then
-     call def_variable_bulk(ID_frag,trim(ch),1,(/2,X%ng_db,X%ng_db,Xw%n_freqs/),SP)
-     call  io_variable_bulk(ID_frag,1,C3=MPA_E_par(1)%blc(:,:,:Xw%n_freqs),IPOS=(/1,MPA_E_par(1)%rows(1),MPA_E_par(1)%cols(1),1/))
-   endif
+   call def_variable_bulk(ID_frag,trim(ch),1,(/2,X%ng_db,X%ng_db,X%mpa_npoles/),SP,par_io_kind='collective')
+   call  io_variable_bulk(ID_frag,1,C3=MPA_E_par(1)%blc(:,:,:X%mpa_npoles),IPOS=(/1,MPA_E_par(1)%rows(1),MPA_E_par(1)%cols(1),1/))
    !
    ! residues @iq
    !
    ch="MPA_R_Q_"//trim(intc(iq))
    !
-   if(write_is_on(ID)) then
-#if defined _PAR_IO
-     call def_variable_bulk(ID_frag,trim(ch),1,(/2,X%ng_db,X%ng_db,Xw%n_freqs/),SP,par_io_kind='collective')
-#else
-     call def_variable_bulk(ID_frag,trim(ch),1,(/2,X%ng_db,X%ng_db,Xw%n_freqs/),SP,par_io_kind='independent')
-#endif
-     call  io_variable_bulk(ID_frag,1,C3=MPA_R_par(1)%blc(:,:,:Xw%n_freqs),IPOS=(/1,MPA_R_par(1)%rows(1),MPA_R_par(1)%cols(1),1/))
-     !
-   else if(read_is_on(ID)) then
-     call def_variable_bulk(ID_frag,trim(ch),1,(/2,X%ng_db,X%ng_db,Xw%n_freqs/),SP)
-     call  io_variable_bulk(ID_frag,1,C3=MPA_R_par(1)%blc(:,:,:Xw%n_freqs),IPOS=(/1,MPA_R_par(1)%rows(1),MPA_R_par(1)%cols(1),1/))
-   endif
+   call def_variable_bulk(ID_frag,trim(ch),1,(/2,X%ng_db,X%ng_db,X%mpa_npoles/),SP,par_io_kind='collective')
+   call  io_variable_bulk(ID_frag,1,C3=MPA_R_par(1)%blc(:,:,:X%mpa_npoles),IPOS=(/1,MPA_R_par(1)%rows(1),MPA_R_par(1)%cols(1),1/))
    !
    if (read_is_on(ID) .and. different_db_RL_order) then 
      call error('[io_MPA] different_db_RL_order    not implemented')
@@ -189,12 +172,13 @@ integer function io_MPA(X,Xw,ID)
 !
 integer function variables_MPA(ID,local_desc,X)
  !
- use pars,          ONLY:schlen
+ use pars,          ONLY:schlen,IP_YIO
  use descriptors,   ONLY:IO_desc,IO_desc_reset
  use X_m,           ONLY:X_t
- use IO_int,        ONLY:io_elemental
+ use IO_int,        ONLY:def_variable_elemental,io_variable_elemental
  use IO_m,          ONLY:io_status
- implicit none
+ !
+ implicit  none
  !
  integer            ::ID
  type(IO_desc)      ::local_desc
@@ -205,13 +189,13 @@ integer function variables_MPA(ID,local_desc,X)
  character(schlen)   ::MPA_label
  !
  call IO_desc_reset(local_desc)
- MPA_label='MPA_'
  !
- call io_elemental(ID,VAR=trim(MPA_label)//"PARS_1",VAR_SZ=2,MENU=0)
- call io_elemental(ID,DB_I0=X%ng_db,&
-&     VAR="X matrix size         :",I0=X%ng,CHECK=.true.,OP=(/"<="/),DESCRIPTOR=local_desc)
- call io_elemental(ID,&
-&     VAR="MPA npoles            :",I0=X%mpa_npoles,MENU=0,DESCRIPTOR=local_desc)
+ call def_variable_elemental(ID,"X_ng",1,IP_YIO,1)
+ call io_variable_elemental(ID,DB_I0=X%ng_db,VAR="X matrix size :",I0=X%ng,&
+ &                           CHECK=.true.,OP=(/"<="/),DESCRIPTOR=local_desc)
+ !
+ call def_variable_elemental(ID,"MPA_npoles",1,IP_YIO,1)
+ call io_variable_elemental(ID,VAR="MPA number of npoles :",I0=X%mpa_npoles,DESCRIPTOR=local_desc)
  !
  variables_MPA=io_status(ID)
  !
diff --git a/src/io/io_NL.F b/src/io/io_NL.F
index 39ef184d39..60a7cba86d 100644
--- a/src/io/io_NL.F
+++ b/src/io/io_NL.F
@@ -5,13 +5,17 @@
 !
 ! Authors (see AUTHORS file for details): MG CA
 !
+! headers
+!
+#include<y_memory.h>
+!
 integer function io_NL(what,ID)
  !
  use pars,          ONLY:schlen,SP,IP,CR_YIO,IP_YIO
  use units,         ONLY:HA2EV, AU2KWCMm2, AUT2FS
- use nl_optics,     ONLY:E_tot_t,E_ext_t,E_ks_t,NL_P_t, &
-&                        n_frequencies,NL_J_t
- use real_time,     ONLY:NE_steps
+ use nl_optics,     ONLY:E_tot_t,E_ext_t,E_ks_t,NL_P_t,NL_J_t, &
+&                        n_frequencies,l_pump_and_probe
+ use real_time,     ONLY:NE_steps,RT_step
  use fields,        ONLY:n_ext_fields
  use RT_control,    ONLY:OBS_RT_IO_t
  use fields,        ONLY:Efield
@@ -23,8 +27,9 @@ integer function io_NL(what,ID)
 #if defined _TIMING
  use timing_m,      ONLY:timing
 #endif
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer,      intent(in) :: ID
  character(*), intent(in) :: what 
@@ -35,7 +40,8 @@ integer function io_NL(what,ID)
  !
  integer              :: i_fragment,ID_frag
  character(schlen)    :: VAR_name
- integer              :: i_Prb
+ integer              :: i_Prb,i_time,N_IO,last_point_IO
+ real(SP)             :: Time_IO(NE_steps)
  !
 #if defined _TIMING
  call timing('io_NL',OPR='start')
@@ -60,18 +66,27 @@ integer function io_NL(what,ID)
    ! Time variable (just for compatibility reasons)
    !
    if(write_is_on(ID)) then
-     OBS_RT_IO_t%N         =NE_steps
-     OBS_RT_IO_t%last_point=NE_steps
+     N_IO         =NE_steps
+     last_point_IO=NE_steps
+     do i_time=1,NE_steps
+       Time_IO(i_time)=real(i_time-1,SP)*RT_step
+     enddo
    endif
    !
    call io_elemental(ID,VAR="IO_TIME_N_points",VAR_SZ=1,MENU=0) 
-   call io_elemental(ID,I0=OBS_RT_IO_t%N)
+   call io_elemental(ID,I0=N_IO)
    call io_elemental(ID,VAR="IO_TIME_LAST_POINT",VAR_SZ=1,MENU=0) 
-   call io_elemental(ID,I0=OBS_RT_IO_t%last_point)
+   call io_elemental(ID,I0=last_point_IO)
    call io_elemental(ID,VAR="",VAR_SZ=0,MENU=0)
-   if(read_is_on(ID).and..not.allocated(OBS_RT_IO_t%Time)) allocate(OBS_RT_IO_t%Time(NE_steps))
-   call io_bulk(ID,VAR="IO_TIME_points",VAR_SZ=(/OBS_RT_IO_t%N/))
-   call io_bulk(ID,R1=OBS_RT_IO_t%Time)
+   call io_bulk(ID,VAR="IO_TIME_points",VAR_SZ=(/N_IO/))
+   call io_bulk(ID,R1=Time_IO)
+   !
+   if(read_is_on(ID)) then
+     OBS_RT_IO_t%N         =N_IO
+     OBS_RT_IO_t%last_point=last_point_IO
+     if(.not.allocated(OBS_RT_IO_t%Time)) allocate(OBS_RT_IO_t%Time(NE_steps))
+     OBS_RT_IO_t%Time=Time_IO
+   endif
    !
    if (io_NL/=0) goto 1
    !
@@ -116,17 +131,9 @@ integer function io_NL(what,ID)
    call io_variable_elemental(ID_frag,VAR="[FIELDs] Damping           ",TERMINATOR="[fs]",&
 &         R0=Efield(i_Prb)%FWHM,CHECK=.true.,OP=(/"=="/))
 
-   call def_variable_elemental(ID_frag,"Field_Freq_range_"//trim(intc(i_Prb)),2,SP,0)
-   call io_variable_elemental(ID_frag,VAR="[FIELDs] Frequency range   ",TERMINATOR="[eV]",&
-&         R1=Efield(i_Prb)%frequency,CHECK=.true.,OP=(/"==","=="/),UNIT=HA2EV)
-
-   call def_variable_elemental(ID_frag,"Field_Freq_steps_"//trim(intc(i_Prb)),1,IP_YIO,0)
-   call io_variable_elemental(ID_frag,VAR="[FIELDs] Frequency steps", &
-&   I0=Efield(i_Prb)%n_frequencies,CHECK=.true.,OP=(/"=="/))
-
-   call def_variable_elemental(ID_frag,"Field_Freq_step_"//trim(intc(i_Prb)),1,SP,0)
-   call io_variable_elemental(ID_frag,VAR="[FIELDs] Frequency step    ",TERMINATOR="[eV]",&
-&         R0=Efield(i_Prb)%W_step,CHECK=.true.,OP=(/"=="/),UNIT=HA2EV)
+   call def_variable_elemental(ID_frag,"Field_Freq_"//trim(intc(i_Prb)),1,SP,0)
+   call io_variable_elemental(ID_frag,VAR="[FIELDs] Frequency    ",TERMINATOR="[eV]",&
+&         R0=Efield(i_Prb)%frequency,CHECK=.true.,OP=(/"=="/),UNIT=HA2EV)
 
    call def_variable_elemental(ID_frag,"Field_Initial_time_"//trim(intc(i_Prb)),1,SP,0)
    call io_variable_elemental(ID_frag,VAR=" [FIELDs] Initial time      ",TERMINATOR="[fs]",&
@@ -134,6 +141,8 @@ integer function io_NL(what,ID)
 
  enddo
  !
+ call def_variable_elemental(ID,VAR="",VAR_SZ=0,VAR_KIND=CR_YIO,MENU=1) 
+ !
  ! Polarization
  ! 
 2 write (VAR_name,'(a,i4.4)') 'NL_P_freq_',i_fragment
diff --git a/src/io/io_OSCLL.F b/src/io/io_OSCLL.F
index dd069b708d..9e01b8ead1 100644
--- a/src/io/io_OSCLL.F
+++ b/src/io/io_OSCLL.F
@@ -4,11 +4,15 @@
 ! Copyright (C) 2018 The Yambo Team
 !
 ! Authors (see AUTHORS file for details): CA
+!
+! headers
+!
+#include<y_memory.h>
 ! 
 integer function io_OSCLL(q,ID)
  !
  use pars,          ONLY:SP,schlen,IP_YIO
- use electrons,     ONLY:levels
+ use electrons,     ONLY:levels,n_sp_pol
  use matrix_operate,ONLY:mat_c2r,mat_r2c
  use R_lattice,     ONLY:nXkibz,bz_samp
  use QP_m,          ONLY:QP_nk
@@ -22,8 +26,9 @@ integer function io_OSCLL(q,ID)
 #if defined _TIMING
  use timing_m,      ONLY:timing
 #endif
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer     ::ID
  type(bz_samp), intent(in) :: q
@@ -83,7 +88,7 @@ integer function io_OSCLL(q,ID)
  !
  if (read_is_on(ID)) then
    if(.not.allocated(OSCLL)) then
-     YAMBO_ALLOC(OSCLL,(COLL_bands(1):COLL_bands(2),COLL_bands(1):COLL_bands(2),PAR_Xk_nibz,q%nbz,ng_oscll))
+     YAMBO_ALLOC(OSCLL,(COLL_bands(1):COLL_bands(2),COLL_bands(1):COLL_bands(2),PAR_Xk_nibz,n_sp_pol,q%nbz,ng_oscll))
    endif
  endif
  !
@@ -107,7 +112,7 @@ integer function io_OSCLL(q,ID)
      io_OSCLL=io_status(ID)
      if (io_OSCLL/=0) goto 1
      !
-     if (read_is_on(ID)) OSCLL(ib,:,ik_mem,:,ig)=disk_data
+     if (read_is_on(ID)) OSCLL(ib,:,ik_mem,i_sp_pol,:,ig)=disk_data
      !
    enddo
  enddo
diff --git a/src/io/io_PH.F b/src/io/io_PH.F
index d370830061..3e60b81301 100644
--- a/src/io/io_PH.F
+++ b/src/io/io_PH.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 integer function io_PH(ID)
  !
  use units,               ONLY:HA2EV
@@ -20,8 +24,9 @@ integer function io_PH(ID)
  use IO_m,                ONLY:io_sec,io_status,read_is_on,DB_is_OK
  use RT_lifetimes,        ONLY:RT_PE_life,RT_lifetime_alloc,Life_MEM_steps
  use descriptors,         ONLY:IO_desc_reset
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer           ::ID
  character(schlen) ::QP_solver_disk
diff --git a/src/io/io_QINDX.F b/src/io/io_QINDX.F
index a7dbe3c3ad..4b32089911 100644
--- a/src/io/io_QINDX.F
+++ b/src/io/io_QINDX.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 !> @brief Read and write scattering indexes
 !
 ! io_sec=1   general parameters   
@@ -14,9 +18,9 @@
 ! io_sec=5   qindx_B          
 ! io_Sec=6   qindx_C
 !
-integer function io_QINDX(Xk,q,io_db)
+integer function io_QINDX(Xk,q,io_db,convention)
  !
- use pars,           ONLY:SP,IP,LP,IP_YIO,LP_YIO,CR_YIO
+ use pars,           ONLY:SP,IP,LP,IP_YIO,LP_YIO,CR_YIO,schlen
  use R_lattice,      ONLY:nqibz,nqbz,qindx_X,qindx_B,qindx_S,qindx_C,&
 &                         bse_scattering,QP_states_k,nXkibz,qindx_alloc,&
 &                         Xk_grid_is_uniform,bz_samp,nXkbz,coll_scattering,q_pt,&
@@ -29,19 +33,27 @@ integer function io_QINDX(Xk,q,io_db)
 &                         ver_is_gt_or_eq,io_fragment,def_variable_bulk,io_variable_bulk
  use timing_m,       ONLY:timing
  use parallel_m,     ONLY:PAR_K_scheme
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
- type(bz_samp)::q,Xk
- integer      ::io_db,i3,i2,i2_dn,i2_up,i2_mem,i2_step,IO_size
+ type(bz_samp), intent(inout) :: q
+ type(bz_samp), intent(in)    :: Xk
+ integer      , intent(in)    :: io_db
+ character(*) , intent(in)    :: convention
  !
  ! Work Space
  !
- integer      :: ioScatt,id_frag
+ integer    :: ioScatt,id_frag
+ integer    :: i3,i2,i2_dn,i2_up,i2_mem,i2_step,IO_size
+ character(schlen) :: filename
  !
  call timing('io_QINDX',OPR='start')
  !
- io_QINDX=io_connect(desc='kindx',type=1,ID=io_db)
+ if(trim(convention)=="minus_q" ) filename="kindx"     ! standard yambo convention
+ if(trim(convention)=="plus_q"  ) filename="kindx_pq"  ! QE convention
+ !
+ io_QINDX=io_connect(desc=trim(filename),type=1,ID=io_db)
  if (io_QINDX/=0) goto 1
  !
  if (any((/io_sec(io_db,:)==1/))) then
@@ -123,7 +135,7 @@ integer function io_QINDX(Xk,q,io_db)
      !
      ! Fragmentation (1): eXchange indx
      !
-     io_extension(io_db)='kindx'
+     io_extension(io_db)=trim(filename)
      call io_fragment(io_db,id_frag,i_fragment=1)
      call def_variable_bulk(id_frag,"Qindx",1,shape(qindx_X),IP_YIO)
      call  io_variable_bulk(id_frag,1,I3=qindx_X)
@@ -138,7 +150,7 @@ integer function io_QINDX(Xk,q,io_db)
      !
      ! Fragmentation (2): Self-Energy indx
      !
-     io_extension(io_db)='kindx'
+     io_extension(io_db)=trim(filename)
      call io_fragment(io_db,id_frag,i_fragment=2)
      call def_variable_bulk(id_frag,"Sindx",2,shape(qindx_S),IP_YIO)
      call  io_variable_bulk(id_frag,2,I3=qindx_S)
@@ -153,7 +165,7 @@ integer function io_QINDX(Xk,q,io_db)
      !
      ! Fragmentation (3): BSE indx
      !
-     io_extension(io_db)='kindx'
+     io_extension(io_db)=trim(filename)
      call io_fragment(io_db,id_frag,i_fragment=3)
      call def_variable_bulk(id_frag,"Bindx",3,(/nXkbz,nXkbz,2/),IP_YIO,par_io_kind='independent')
      ! 46341 is the sqare root of the maximum integer 2147483647
@@ -193,7 +205,7 @@ integer function io_QINDX(Xk,q,io_db)
      !
      ! Fragmentation (4): COLL indx
      !
-     io_extension(io_db)='kindx'
+     io_extension(io_db)=trim(filename)
      call io_fragment(io_db,id_frag,i_fragment=4)
      call def_variable_bulk(id_frag,"Cindx",4,shape(qindx_C),IP_YIO)
      call  io_variable_bulk(id_frag,4,I3=qindx_C)
@@ -207,9 +219,9 @@ integer function io_QINDX(Xk,q,io_db)
  !
 end function io_QINDX
 !
-integer function qindx_B_init(ID,ID_frag)
+integer function qindx_B_init(ID,ID_frag,convention)
  !
- use pars,       ONLY:IP,IP_YIO
+ use pars,       ONLY:IP,IP_YIO,schlen
  use R_lattice,  ONLY:nXkbz
  use IO_m,       ONLY:OP_RD,RD,io_status
  use IO_int,     ONLY:io_control,io_connect,def_variable_bulk,io_fragment
@@ -217,9 +229,15 @@ integer function qindx_B_init(ID,ID_frag)
  implicit none
  !
  integer  :: ID,ID_frag
+ character(*) , intent(in)    :: convention
+ !
+ character(schlen) :: filename
+ !
+ if(trim(convention)=="minus_q" ) filename="kindx"     ! standard yambo convention
+ if(trim(convention)=="plus_q"  ) filename="kindx_pq"  ! QE convention
  !
  call io_control(ACTION=OP_RD,ID=ID)
- qindx_B_init=io_connect(desc='kindx',type=1,ID=ID)
+ qindx_B_init=io_connect(desc=trim(filename),type=1,ID=ID)
  !
  call io_fragment(ID,ID_frag,i_fragment=3)
  call def_variable_bulk(ID_frag,"Bindx",3,(/nXkbz,nXkbz,2/),IP_YIO,par_io_kind='independent')
diff --git a/src/io/io_QP_and_GF.F b/src/io/io_QP_and_GF.F
index d9e97ef57b..8e8bd609a9 100644
--- a/src/io/io_QP_and_GF.F
+++ b/src/io/io_QP_and_GF.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 integer function io_QP_and_GF(what,qp,ID)
  use pars,           ONLY:SP,schlen
  use D_lattice,      ONLY:alat
@@ -23,8 +27,9 @@ integer function io_QP_and_GF(what,qp,ID)
 #if _ELPH
  use ELPH,           ONLY:ELPH_desc,use_PH_DbGd
 #endif
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(QP_t)  ::qp
  character(*)::what ! 'QP'/'G'/filename
diff --git a/src/io/io_RIM.F b/src/io/io_RIM.F
index 4c0653c87a..421fd6076e 100644
--- a/src/io/io_RIM.F
+++ b/src/io/io_RIM.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include <y_memory.h>
+!
 integer function io_RIM(ID)
  !
  use pars,           ONLY:SP
@@ -13,8 +17,9 @@ integer function io_RIM(ID)
 &                         nqibz
  use IO_m,           ONLY:io_sec,read_is_on,io_status,write_is_on
  use IO_int,         ONLY:io_connect,io_disconnect,io_header,io_elemental,io_bulk,ver_is_gt_or_eq
+ use y_memory_alloc
  !
-#include <y_memory.h>
+ implicit none
  !
  integer :: ID
  ! 
diff --git a/src/io/io_RIM_W.F b/src/io/io_RIM_W.F
index ea5decabcf..ac6c58a93b 100644
--- a/src/io/io_RIM_W.F
+++ b/src/io/io_RIM_W.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include <y_memory.h>
+!
 integer function io_RIM_W(ID,mode,Xw)
  !
  use pars,           ONLY:SP
@@ -15,8 +19,9 @@ integer function io_RIM_W(ID,mode,Xw)
  use IO_int,         ONLY:io_connect,io_disconnect,io_header,&
 &                         io_elemental,io_bulk
  use frequency,      ONLY:w_samp
+ use y_memory_alloc
  !
-#include <y_memory.h>
+ implicit none
  !
  integer :: ID
  type(w_samp) :: Xw
diff --git a/src/io/io_RT_components.F b/src/io/io_RT_components.F
index 569ae6a7b4..3c24f900a6 100644
--- a/src/io/io_RT_components.F
+++ b/src/io/io_RT_components.F
@@ -55,7 +55,7 @@ integer function io_RT_components(what,ID)
  use IO_m,           ONLY:io_sec,read_is_on,write_is_on,io_file
  use real_time,      ONLY:NE_i_time,RT_step,RT_dyn_step,RT_carriers,RT_nbands,G_MEM_steps
  use RT_control,     ONLY:RT_carriers_object,CARR_RT_IO_t,Gless_RESTART_RT_IO_t,OBS_RT_IO_t,CACHE_OBS_steps_now,&
-&                         CACHE_OBS_steps,RT_DB_file_name,Gless_RT_IO_t,&
+&                         Vbands_RT_IO_t,CACHE_OBS_steps,RT_DB_file_name,Gless_RT_IO_t,SAVE_Vb_history,SAVE_Vb_floquet,&
 &                         RT_DB_name,RT_define_the_IO_DB_identifier,RT_do_IO,RT_current_DB_identifier,RT_return_db_ID
 #if defined _PAR_IO
 #endif
@@ -73,7 +73,7 @@ integer function io_RT_components(what,ID)
  !
  ! Work Space
  character(schlen)     ::db_name
- integer               ::ik,db_kind,T_size,G_IO_steps,ID_frag
+ integer               ::ik,db_kind,T_size,G_IO_steps
  logical               ::loading_carr_before_neq
  integer, external     ::variables_RT_dynamic
  !
@@ -99,7 +99,7 @@ integer function io_RT_components(what,ID)
  !
  ! ACTION
  !========
- if (RT_do_IO("ANY_G")) then 
+ if (RT_do_IO("ANY_G").or.RT_do_IO("ANY_Vb")) then 
    io_RT_components=io_connect(desc="RT_"//trim(RT_DB_name(RT_current_DB_identifier)),type=db_kind,ID=ID)
    if (io_RT_components/=0) goto 1
  else
@@ -149,6 +149,9 @@ integer function io_RT_components(what,ID)
      io_RT_components=variables_RT_dynamic(ID)
      if (io_RT_components/=0) goto 1
    endif
+   !
+   if (RT_do_IO("V_bands"))       &
+&      call io_RT_components_TIME_points(ID,T_size,.FALSE.,(SAVE_Vb_history.or.SAVE_Vb_floquet),Vbands_RT_IO_t)
    !
    if (RT_do_IO("G_lesser"))          call io_RT_components_TIME_points(ID,T_size,.FALSE.,.TRUE.,Gless_RT_IO_t)
    !
@@ -164,15 +167,6 @@ integer function io_RT_components(what,ID)
  !
  ! 3rd SECTION
  !=============
- if (RT_do_IO("K_SECTION")) then
-#if defined _PAR_IO
-   ID_frag=ID
-#else
-   ! === deal with K-fragments ===
-   ik = max(1,maxval(io_sec(ID,:))-2)
-   call io_fragment(ID,ID_frag,i_fragment=ik)
-#endif
- endif
  !
 #if defined _ELPH_ITERATIVE
  !
@@ -182,15 +176,23 @@ integer function io_RT_components(what,ID)
    !
    ! This should be replaced by call THETA_IO
    !
-   call def_variable_bulk(ID_frag,"RT_THETA_K"//trim(intc(ik)),4,(/2,2*nqbz*ph_modes,RT_nbands,RT_nbands/),SP)
-   call  io_variable_bulk(ID_frag,4,C3=THETA_matrix(:,:,:,ik))
+   call def_variable_bulk(ID,"RT_THETA_K"//trim(intc(ik)),4,(/2,2*nqbz*ph_modes,RT_nbands,RT_nbands/),SP)
+   call  io_variable_bulk(ID,4,C3=THETA_matrix(:,:,:,ik))
  endif
  !
 #endif
  !
  ! Lesser Green Function
  !=======================
- if (RT_do_IO("ANY_G")) call io_RT_components_G_lesser(ID_frag,T_size)
+ if (RT_do_IO("ANY_G")) call io_RT_components_G_lesser(ID,T_size)
+ !
+#if defined _NL
+ !
+ ! Vbands
+ !========
+ if (RT_do_IO("ANY_Vb")) call io_RT_components_V_bands(ID,T_size)
+ !
+#endif
  !
  ! Carrier components
  !====================
@@ -212,9 +214,6 @@ integer function io_RT_components(what,ID)
  !
  ! End
  !=====
-#if !defined _PAR_IO
- if (RT_do_IO("K_SECTION")) call io_fragment_disconnect(ID,ID_frag)
-#endif
 1 call io_disconnect(ID=ID)
  call timing('io_RT_components',OPR='stop')
  !
diff --git a/src/io/io_RT_components_G_lesser.F b/src/io/io_RT_components_G_lesser.F
index 527d5cef8c..357ba4cfd7 100644
--- a/src/io/io_RT_components_G_lesser.F
+++ b/src/io/io_RT_components_G_lesser.F
@@ -8,11 +8,12 @@
 subroutine io_RT_components_G_lesser(ID,T_size)
  !
  use pars,           ONLY:SP,schlen
- use drivers,        ONLY:l_real_time
+ use drivers,        ONLY:l_real_time,l_nl_optics
  use R_lattice,      ONLY:nkibz
+ use electrons,      ONLY:n_sp_pol
  use stderr,         ONLY:intc
  use IO_int,         ONLY:def_variable_bulk,io_variable_bulk
- use IO_m,           ONLY:variable_exist,write_is_on,read_is_on,io_sec,io_unit,netcdf_call,nf90_sync
+ use IO_m,           ONLY:variable_exist,write_is_on,read_is_on,io_unit,netcdf_call,nf90_sync
  use hamiltonian,    ONLY:H_rotation
  use RT_control,     ONLY:Gless_RT_IO_t,NE_i_start_time,RT_do_IO
  use real_time,      ONLY:dG_lesser,G_MEM_steps,RT_bands,RT_nbands,RT_nk,l_RT_rotate_DM,&
@@ -28,57 +29,38 @@ subroutine io_RT_components_G_lesser(ID,T_size)
  integer, intent(in)  :: ID,T_size
  !
  logical           :: l_H_rot,l_H_sig,l_H_EQ
- integer           :: ib,ik,ikibz,i_sp_pol,i_mem,i_mem_sorted,G_IO_steps,VAR_SZ(5),IPOS(5),&
+ integer           :: ib,ik,ikibz,i_sp_pol,i_mem,i_mem_sorted,G_IO_steps,VAR_SZ(6),IPOS(6),&
 &                     PAR_G_k_range_ibz(2),k_range(2),ip,TMP_VAR_SZ
- character(schlen) :: dim_names(5),dim_name_ibz,G_var_name,Sigma_var_name,H_var_name,ROT_var_name
- complex(SP), allocatable ::Delta_Sigma(:,:,:,:)
+ character(schlen) :: dim_names(6),dim_name_ibz,G_var_name,Sigma_var_name,H_var_name,ROT_var_name
+ complex(SP), allocatable ::Delta_Sigma(:,:,:,:,:)
  !
  ! Setup
  !=======
  !
  ! Variables SIZE and strings...
  !-------------------------------
- VAR_SZ(1:4)=(/2,RT_nbands,RT_nbands,1/)
- dim_names(1:4)  =[character(schlen) :: 'complex', 'RT_nbands','RT_nbands','k_fragment']
- dim_name_ibz ='k_fragment'
- IPOS(1:4)=1
-#if defined _PAR_IO
- VAR_SZ(4)    = RT_nk
- dim_names(4) ='RT_nk'
- IPOS(4)      =PAR_G_k_range(1)
+ VAR_SZ    =(/2,RT_nbands,RT_nbands,RT_nk,n_sp_pol,G_MEM_steps/)
+ dim_names =[character(schlen) :: 'complex', 'RT_nbands','RT_nbands','RT_nk','n_sp_pol','G_MEM_steps']
+ IPOS      =(/1,1,1,PAR_G_k_range(1),1,1/)
+ !
  dim_name_ibz ='nkibz'
-#endif
- VAR_SZ(5)    = G_MEM_steps
- dim_names(5) ='G_MEM_steps'
+ !
  if (RT_do_IO("G_lesser_K_section")) then
-   VAR_SZ(5)    = NF90_UNLIMITED
-   dim_names(5) ='G_IO_time_step'
+   VAR_SZ(6)    = NF90_UNLIMITED
+   dim_names(6) ='G_IO_time_step'
  endif
  ! 
  ! Sizes...
  !----------
- i_sp_pol=1
- ik = maxval(io_sec(ID,:))-2
- k_range      =(/ik,ik/)
- G_IO_steps   =1
-#if defined _PAR_IO
  k_range      =PAR_G_k_range
  G_IO_steps   =Gless_RT_IO_t%INTERVAL_steps/nint(RT_dyn_step/RT_step)
-#endif
- ! 
+ !
  ! Variables...
  !--------------
  G_var_name="dG_lesser"
  Sigma_var_name="Delta_Sigma"
  ROT_var_name="H_rotation"
  H_var_name="H_equilibrium"
-#if !defined _PAR_IO
- G_var_name="dG_lesser_K"//trim(intc(ik))//"_SP_POL"//trim(intc(i_sp_pol))
- ikibz=RTibz%k_map(ik)
- Sigma_var_name="Delta_Sigma_K"//trim(intc(ikibz))//"_SP_POL"//trim(intc(i_sp_pol))
- H_var_name="H_equilibrium_K"//trim(intc(ikibz))//"_SP_POL"//trim(intc(i_sp_pol))
- ROT_var_name="H_rotation_K"//trim(intc(ikibz))//"_SP_POL"//trim(intc(i_sp_pol))
-#endif
  ! 
  ! H_sig/H_rot...
  !----------------
@@ -92,72 +74,61 @@ subroutine io_RT_components_G_lesser(ID,T_size)
  ! 
  ! Variable Definitions...
  !-------------------------
-#if defined _PAR_IO 
- if( NE_i_time==NE_i_start_time.or..not.l_real_time) then
-#endif
+ if( NE_i_time==NE_i_start_time.or..not.(l_real_time.or.l_nl_optics)) then
    call def_variable_bulk(ID,trim(G_var_name),4,VAR_SZ,SP,dim_names=dim_names,par_io_kind='collective',silent=.true.)
    if (l_H_sig) then
-     TMP_VAR_SZ=1
-#if defined _PAR_IO 
      TMP_VAR_SZ=nkibz
-#endif
-     call def_variable_bulk(ID,trim(Sigma_var_name),5,(/VAR_SZ(:3),TMP_VAR_SZ,VAR_SZ(5)/),SP,&
-&                           dim_names=(/dim_names(:3),dim_name_ibz,dim_names(5)/),par_io_kind='collective',silent=.true.)   
+     call def_variable_bulk(ID,trim(Sigma_var_name),5,(/VAR_SZ(:3),TMP_VAR_SZ,VAR_SZ(5:6)/),SP,&
+&                           dim_names=(/dim_names(:3),dim_name_ibz,dim_names(5:6)/),par_io_kind='collective',silent=.true.)   
    endif
-   if (l_H_rot) call def_variable_bulk(ID,trim(ROT_var_name),6,VAR_SZ(1:4),SP,&
-&                                      dim_names=dim_names(1:4),par_io_kind='collective',silent=.true.)
-   if (l_H_EQ) call def_variable_bulk(ID,trim(H_var_name),7,VAR_SZ(1:4),SP,&
-&                                     dim_names=dim_names(1:4),par_io_kind='collective',silent=.true.)
-#if defined _PAR_IO 
+   if (l_H_rot) call def_variable_bulk(ID,trim(ROT_var_name),6,VAR_SZ(1:5),SP,&
+&                                      dim_names=dim_names(1:5),par_io_kind='collective',silent=.true.)
+   if (l_H_EQ) call def_variable_bulk(ID,trim(H_var_name),7,VAR_SZ(1:5),SP,&
+&                                     dim_names=dim_names(1:5),par_io_kind='collective',silent=.true.)
  endif
-#endif
  !
  ! Variable I/O...
  !-----------------
- if (l_real_time) then
+ if (l_real_time.or.l_nl_optics) then
    do i_mem_sorted=G_MEM_steps-T_size+1,G_MEM_steps
-     if (RT_do_IO("G_lesser_K_section"))         IPOS(5)=Gless_RT_IO_t%N+(i_mem_sorted-G_MEM_steps)
-     if (RT_do_IO("G_lesser_RESTART_K_section")) IPOS(5)=         T_size+(i_mem_sorted-G_MEM_steps)
+     if (RT_do_IO("G_lesser_K_section"))         IPOS(6)=Gless_RT_IO_t%N+(i_mem_sorted-G_MEM_steps)
+     if (RT_do_IO("G_lesser_RESTART_K_section")) IPOS(6)=         T_size+(i_mem_sorted-G_MEM_steps)
      if (i_mem_sorted==G_MEM_steps  ) i_mem=i_MEM_now
      if (i_mem_sorted==G_MEM_steps-1) i_mem=i_MEM_prev
      if (i_mem_sorted==G_MEM_steps-2) i_mem=i_MEM_old
-     call io_variable_bulk(ID,4,C4=dG_lesser(:,:,k_range(1):k_range(2),i_mem:i_mem),IPOS=IPOS)
+     call io_variable_bulk(ID,4,C5=dG_lesser(:,:,k_range(1):k_range(2),:,i_mem:i_mem),IPOS=IPOS)
    enddo
  else
-   IPOS(5)=Gless_RT_IO_t%N
-   call io_variable_bulk(ID,4,C4=dG_lesser(:,:,k_range(1):k_range(2),1:1),IPOS=IPOS)
+   IPOS(6)=Gless_RT_IO_t%N
+   call io_variable_bulk(ID,4,C5=dG_lesser(:,:,k_range(1):k_range(2),:,1:1),IPOS=IPOS)
  endif
  !
  if (l_H_sig) then
    PAR_G_k_range_ibz(1)=RTibz%k_map(k_range(1))
    PAR_G_k_range_ibz(2)=RTibz%k_map(k_range(2))
-   allocate(Delta_Sigma(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range_ibz(1):PAR_G_k_range_ibz(2),1))
+   allocate(Delta_Sigma(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range_ibz(1):PAR_G_k_range_ibz(2),n_sp_pol,1))
    if(write_is_on(ID)) then
      do ikibz=PAR_G_k_range_ibz(1),PAR_G_k_range_ibz(2)
        ik=RTibz%k_range(ikibz,1)
-       if(     l_RT_RWA) Delta_Sigma(:,:,ikibz,1:1)=Ho_plus_sigma(:,:,ik,1:1)
-       if(.not.l_RT_RWA) Delta_Sigma(:,:,ikibz,1:1)=Ho_plus_sigma(:,:,ik,1:1)-H_EQ(:,:,ik,1:1)
+       if(     l_RT_RWA) Delta_Sigma(:,:,ikibz,:,1)=Ho_plus_sigma(:,:,ik,:)
+       if(.not.l_RT_RWA) Delta_Sigma(:,:,ikibz,:,1)=Ho_plus_sigma(:,:,ik,:)-H_EQ(:,:,ik,:)
      enddo
    endif
-#if defined _PAR_IO
    IPOS(4)=PAR_G_k_range_ibz(1)
-#endif
-   call io_variable_bulk(ID,5,C4=Delta_Sigma,IPOS=IPOS)
-#if defined _PAR_IO
+   call io_variable_bulk(ID,5,C5=Delta_Sigma,IPOS=IPOS)
    IPOS(4)=PAR_G_k_range(1)
-#endif
    if(read_is_on(ID)) then
      do ip=PAR_G_k_range_ibz(1),PAR_G_k_range_ibz(2)
        ikibz=RTibz%k_map(ip)
-       Ho_plus_sigma(:,:,ip,1:1)=Delta_Sigma(:,:,ikibz,1:1)
+       Ho_plus_sigma(:,:,ip,:)=Delta_Sigma(:,:,ikibz,:,1)
      enddo
    endif
    deallocate(Delta_Sigma)
  endif
  !
- if (l_H_rot) call io_variable_bulk(ID,6,C3=H_rotation(:,:,k_range(1):k_range(2),1),IPOS=IPOS(1:4))
+ if (l_H_rot) call io_variable_bulk(ID,6,C4=H_rotation(:,:,k_range(1):k_range(2),:),IPOS=IPOS(1:5))
  !
- if (l_H_EQ) call io_variable_bulk(ID,7,C3=H_EQ(:,:,k_range(1):k_range(2),1),IPOS=IPOS(1:4))
+ if (l_H_EQ) call io_variable_bulk(ID,7,C4=H_EQ(:,:,k_range(1):k_range(2),:),IPOS=IPOS(1:5))
  !
  ! In case of frequent I/O this gives a significant slowdown of the simulation
  if (G_IO_steps>=10) call netcdf_call(nf90_sync(io_unit(ID)),ID)
diff --git a/src/io/io_RT_components_OBS.F b/src/io/io_RT_components_OBS.F
index 1f8c33ef47..e4779893c5 100644
--- a/src/io/io_RT_components_OBS.F
+++ b/src/io/io_RT_components_OBS.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine io_RT_components_OBS(ID,what)
  !
  use pars,        ONLY:SP,schlen
@@ -15,8 +19,9 @@ subroutine io_RT_components_OBS(ID,what)
  use netcdf,      ONLY:NF90_UNLIMITED
  use RT_control,  ONLY:J_cache,P_cache,Ms_cache,Ml_cache,A_cache,&
 &                      CACHE_OBS_steps_now,OBS_RT_IO_t,RT_do_IO
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer,      intent(in) :: ID
  character(*), intent(in) :: what
diff --git a/src/io/io_RT_components_Reference.F b/src/io/io_RT_components_Reference.F
index b32dc54960..f8f54fdaab 100644
--- a/src/io/io_RT_components_Reference.F
+++ b/src/io/io_RT_components_Reference.F
@@ -5,11 +5,16 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine io_RT_components_Reference(ID)
  !
  use pars,           ONLY:SP
  use drivers,        ONLY:l_elph_scatt
  use R_lattice,      ONLY:nqbz
+ use electrons,      ONLY:n_sp_pol
  use IO_m,           ONLY:read_is_on
  use IO_int,         ONLY:def_variable_bulk,io_variable_bulk
  use RT_lifetimes,   ONLY:RT_lifetime,RT_EE_REF_life,RT_EP_emit_REF_life,RT_EP_abs_REF_life,&
@@ -18,8 +23,9 @@ subroutine io_RT_components_Reference(ID)
 #if defined _ELPH
  use ELPH,           ONLY:elph_branches
 #endif
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer, intent(in)  :: ID
  !
@@ -60,8 +66,8 @@ subroutine io_RT_components_Reference(ID)
  !
  if (allocated(G_lesser_reference)) then
    !
-   call def_variable_bulk(ID,"G_lesser_reference",4,(/2,RT_nbands,RT_nbands,RT_nk/),SP)
-   call  io_variable_bulk(ID,4,C3=G_lesser_reference)
+   call def_variable_bulk(ID,"G_lesser_reference",4,(/2,RT_nbands,RT_nbands,RT_nk,n_sp_pol/),SP)
+   call  io_variable_bulk(ID,4,C4=G_lesser_reference)
    !
  endif
  !
diff --git a/src/io/io_RT_components_TIME_points.F b/src/io/io_RT_components_TIME_points.F
index e4c4185c0c..fdc38f6367 100644
--- a/src/io/io_RT_components_TIME_points.F
+++ b/src/io/io_RT_components_TIME_points.F
@@ -5,20 +5,25 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine io_RT_components_TIME_points(ID,VAR_SZ,load_RESTART_info,UNLIMITED_time,TYP)
  !
  use pars,           ONLY:IP,SP,schlen,IP_YIO
  use drivers,        ONLY:l_real_time
  use units,          ONLY:AUT2FS
  use RT_output_m,    ONLY:RT_desc
- use RT_control,     ONLY:RT_IO_t,RT_do_IO
+ use RT_control,     ONLY:RT_IO_t,RT_do_IO,Floquet_order
  use netcdf,         ONLY:NF90_UNLIMITED
  use real_time,      ONLY:MEM_pointer,G_MEM_steps,NE_i_time,i_MEM_now,i_MEM_prev,i_MEM_old,&
 &                         RT_dyn_step,RT_step
  use IO_m,           ONLY:write_is_on,read_is_on
  use IO_int,         ONLY:def_variable_bulk,io_variable_bulk,def_variable_elemental,io_variable_elemental
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer,       intent(in)    :: ID,VAR_SZ
  logical,       intent(in)    :: load_RESTART_info,UNLIMITED_time
@@ -40,6 +45,9 @@ subroutine io_RT_components_TIME_points(ID,VAR_SZ,load_RESTART_info,UNLIMITED_ti
 &                       DESCRIPTOR=RT_desc,UNIT=AUT2FS)
  endif
  !
+ call def_variable_elemental(ID,"IO_Floquet_order",1,IP_YIO,0) 
+ call io_variable_elemental(ID,I0=Floquet_order)
+ !
  call def_variable_elemental(ID,"IO_TIME_steps_last_nsteps",3,IP_YIO,0) 
  call io_variable_elemental(ID,I0=TYP%INTERVAL_steps)
  call io_variable_elemental(ID,I0=TYP%last_point)
@@ -66,12 +74,14 @@ subroutine io_RT_components_TIME_points(ID,VAR_SZ,load_RESTART_info,UNLIMITED_ti
  !
  IPOS=TYP%N-VAR_SZ+1
  IO_LIM=(/TYP%N-VAR_SZ+1,TYP%N/)
- if(TYP%first_IO(2).or.RT_do_IO("ANY_G").or.read_is_on(ID)) then
+ if(TYP%first_IO(2).or.RT_do_IO("ANY_G").or.RT_do_IO("ANY_Vb").or.read_is_on(ID)) then
    dim_name="IO_TIME_n_points"
    if (UNLIMITED_time) then
      call def_variable_bulk(ID,"IO_TIME_points",3,(/NF90_UNLIMITED/),SP,dim_name,silent=.true.,par_io_kind="collective")
-   else
+   else if (RT_do_IO("ANY_G")) then
      call def_variable_bulk(ID,"IO_TIME_points",3,(/G_MEM_steps/),   SP,dim_name,silent=.true.,par_io_kind="collective")
+   else if (RT_do_IO("ANY_Vb")) then
+     call def_variable_bulk(ID,"IO_TIME_points",3,(/1/),             SP,dim_name,silent=.true.,par_io_kind="collective")
    endif
  endif
  !
diff --git a/src/io/io_RT_components_V_bands.F b/src/io/io_RT_components_V_bands.F
new file mode 100644
index 0000000000..faec1554d3
--- /dev/null
+++ b/src/io/io_RT_components_V_bands.F
@@ -0,0 +1,82 @@
+!
+!        Copyright (C) 2000-2022 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): AM, DS
+! 
+! This file is distributed under the terms of the GNU 
+! General Public License. You can redistribute it and/or 
+! modify it under the terms of the GNU General Public 
+! License as published by the Free Software Foundation; 
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will 
+! be useful, but WITHOUT ANY WARRANTY; without even the 
+! implied warranty of MERCHANTABILITY or FITNESS FOR A 
+! PARTICULAR PURPOSE.  See the GNU General Public License 
+! for more details.
+!
+! You should have received a copy of the GNU General Public 
+! License along with this program; if not, write to the Free 
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, 
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+subroutine io_RT_components_V_bands(ID,T_size)
+ !
+ use pars,           ONLY:SP,schlen
+ use drivers,        ONLY:l_nl_p_and_p
+ use iso_c_binding,  ONLY:C_LOC,C_F_POINTER
+ use IO_m,           ONLY:read_is_on,write_is_on
+ use IO_int,         ONLY:def_variable_bulk,io_variable_bulk
+ use electrons,      ONLY:n_sp_pol,n_full_bands
+ use RT_control,     ONLY:Vbands_RT_IO_t,NE_i_start_time,SAVE_Vb_history, SAVE_Vb_floquet
+ use real_time,      ONLY:RT_bands,RT_nbands,NE_i_time!,RT_dyn_step,RT_step
+ use nl_optics,      ONLY:V_bands
+ use netcdf,         ONLY:NF90_UNLIMITED
+ use QP_m,           ONLY:QP_nk
+ !
+ implicit none
+ !
+ integer, intent(in)  :: ID,T_size
+ !
+ integer           :: VAR_SZ(6),IPOS(6),nbf
+ character(schlen) :: dim_names(6)
+ !complex(SP),pointer :: tmp_link(:,:,:,:,:)
+ complex(SP)       :: tmp_var(RT_nbands,maxval(n_full_bands),QP_nk,n_sp_pol,1)
+ !
+ ! Setup
+ !=======
+ !
+ ! Variables SIZE and names...
+ !----------------------------
+ nbf=maxval(n_full_bands)
+ VAR_SZ(1:5)=(/2,RT_nbands,nbf,QP_nk,n_sp_pol/)
+ dim_names(1:5)  =[character(schlen) :: 'complex', 'RT_nbands','nb_full','QP_nk','n_sp_pol']
+ IPOS(1:5)=1
+ !
+ VAR_SZ(6)    = T_size
+ dim_names(6) ='V_bands_IO_time_step'
+ IPOS(6)      = Vbands_RT_IO_t%N
+ !
+  
+ ! Variable Definitions...
+ !-------------------------
+ if( NE_i_time==NE_i_start_time .or. .not.l_nl_p_and_p) then
+   if (SAVE_Vb_history.or.SAVE_Vb_floquet) VAR_SZ(6)    = NF90_UNLIMITED
+   !call def_variable_bulk(ID,"V_bands",4,VAR_SZ,SP,dim_names=dim_names,par_io_kind='collective',silent=.true.)
+   call def_variable_bulk(ID,"V_bands",4,VAR_SZ,SP,dim_names=dim_names,silent=.true.)
+   VAR_SZ(6)    = T_size
+ endif
+ !
+ ! Variable I/O...
+ !-----------------
+ ! This does not work since the variable is not contiguous
+ !call C_F_POINTER(C_LOC(V_bands(RT_bands(1):RT_bands(2),1:nbf,1:QP_nk,1:n_sp_pol)),tmp_link,VAR_SZ(2:6))
+ !call io_variable_bulk(ID,4,C5=tmp_link,IPOS=IPOS)
+ !nullify(tmp_link)
+ !
+ if (write_is_on(ID)) tmp_var(:,:,:,:,1) = V_bands(RT_bands(1):RT_bands(2),1:nbf,1:QP_nk,1:n_sp_pol)
+ call io_variable_bulk(ID,4,C5=tmp_var,IPOS=IPOS)
+ if (read_is_on(ID)) V_bands(RT_bands(1):RT_bands(2),1:nbf,1:QP_nk,1:n_sp_pol) = tmp_var(:,:,:,:,1)
+ !
+end subroutine io_RT_components_V_bands
diff --git a/src/io/io_SC_components.F b/src/io/io_SC_components.F
index 86e2ea894f..f5244c7bc4 100644
--- a/src/io/io_SC_components.F
+++ b/src/io/io_SC_components.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 integer function io_SC_components(what,E_sc,ID)
  !
  ! what can be E WF V OBSERVABLES (J,P,D...) G
@@ -23,7 +27,9 @@ integer function io_SC_components(what,E_sc,ID)
  use FFT_m,          ONLY:fft_size
  use electrons,      ONLY:levels,n_sp_pol,n_spin
  use timing_m,      ONLY:timing
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  integer, intent(in)      :: ID
  type(levels)             :: E_sc
@@ -162,8 +168,7 @@ integer function io_SC_components(what,E_sc,ID)
      !
      E_tmp=E_tmp+E_sc%E(SC_bands(2),:,:)
      !
-     forall (ib=SC_bands(2)+1:bands_to_load) &
-     & E_sc%E(ib,:,:)=E_sc%E(ib,:,:)+ E_tmp(:,:)
+     forall (ib=SC_bands(2)+1:bands_to_load) E_sc%E(ib,:,:)=E_sc%E(ib,:,:)+ E_tmp(:,:)
      !
      YAMBO_FREE(E_tmp)
      !
diff --git a/src/io/io_Vnl.F b/src/io/io_Vnl.F
deleted file mode 100644
index 7fbd86b7f1..0000000000
--- a/src/io/io_Vnl.F
+++ /dev/null
@@ -1,124 +0,0 @@
-!
-! License-Identifier: GPL
-!
-! Copyright (C) 2011 The Yambo Team
-!
-! Authors (see AUTHORS file for details): AM
-!
-integer function io_Vnl(Dip,Xen,ID)
- !
- use pars,          ONLY:SP,schlen
- use DIPOLES,       ONLY:DIPOLE_t
- use electrons,     ONLY:levels,n_sp_pol
- use pseudo,        ONLY:Vnl
- use R_lattice,     ONLY:nXkibz
- use matrix_operate,ONLY:mat_c2r,mat_r2c
- use IO_int,        ONLY:io_connect,io_disconnect,io_elemental,io_bulk,io_header,io_fragment
- use IO_m,          ONLY:io_sec,io_status,read_is_on,write_is_on,io_extension,frag_WF
- use global_XC,     ONLY:Dipole_WF_xc_string,loaded_WF_xc_string
- !
-#include<y_memory.h>
- !
- type(DIPOLE_t)::Dip
- type(levels)::Xen
- integer     ::ID
- !
- ! Work Space
- !
- integer :: i1,ixyz,sec_size,i_spin,ID_frag
- integer :: db_nbm(2),db_nbf(2),db_nb(2)
- character(schlen)    :: VAR_name
- real(SP),allocatable :: Vnl_disk(:,:,:) ! complex, ic, iv
- !
- io_Vnl=io_connect(desc='Vnl',type=0,ID=ID)
- !
- if (io_Vnl/=0) goto 1
- !
- if (any((/io_sec(ID,:)==1/))) then
-   !
-   io_Vnl=io_header(ID,IMPOSE_SN=.true.,FRAG=frag_WF)
-   if (io_Vnl/=0) goto 1
-   !
-   sec_size=5
-   !
-   call io_elemental(ID,VAR="PARS",VAR_SZ=sec_size,MENU=0)
-   call io_elemental(ID,DB_I1=db_nb,&
-&       VAR=" Dip band range          ",I1=Dip%ib,CHECK=.true.,OP=(/">=","<="/))
-   if (n_sp_pol==1) then
-     call io_elemental(ID,DB_I0=db_nbm(1),&
-&         VAR=" Metallic bands          ",I0=Xen%nbm(1),CHECK=.true.,OP=(/"<="/))
-     call io_elemental(ID,DB_I0=db_nbf(1),&
-&         VAR=" Filled bands            ",I0=Xen%nbf(1),CHECK=.true.,OP=(/">="/))
-   else
-     call io_elemental(ID,DB_I1=db_nbm,&
-&         VAR=" Metallic bands          ",I1=Xen%nbm,CHECK=.true.,OP=(/"<=","<="/))
-     call io_elemental(ID,DB_I1=db_nbf,&
-&         VAR=" Filled bands            ",I1=Xen%nbf,CHECK=.true.,OP=(/">=",">="/))
-   endif
-   call io_elemental(ID,&
-&       VAR=" RL vectors in the sum   ",I0=Dip%ng,WARN=.true.,OP=(/"<="/))
-   !
-   call io_elemental(ID,VAR="",VAR_SZ=0,MENU=0)
-   !
-   ! Wavefunctions xc 
-   !
-   call io_elemental(ID,VAR='WAVE_FUNC_XC',CH0="",VAR_SZ=1,MENU=0)
-   call io_elemental(ID,DB_CH0=Dipole_WF_xc_string,CH0=loaded_WF_xc_string,&
-&       VAR=' Wavefunctions          ',CHECK=.true.,OP=(/"=="/))
-   call io_elemental(ID,VAR="",VAR_SZ=0)
-   !
-   io_Vnl=io_status(ID)
-   if (io_Vnl/=0) then
-     call warning(' The commutator DB cannot be used. Try to lower ElecTemp in the input.')
-     goto 1
-   endif
- endif
- !
- ! On disk the size is Vnl(3,db_nb(2),maxval(db_nbm),nXkibz)
- !
- sec_size=3*db_nb(2)*maxval(db_nbm)
- if (any((/io_sec(ID,:)==2/))) then
-   !
-   YAMBO_ALLOC(Vnl_disk,(db_nb(2),maxval(db_nbm),2))!Allocate to exactly the sizeo on disk)
-   !
-   if(read_is_on(ID)) then
-      !
-      ! Fill an array Vnl to size required by yambo, ignore extra elements
-      !
-      YAMBO_ALLOC(Vnl,(3,Dip%ib(2),maxval(db_nbm),nXkibz,n_sp_pol))
-   endif
-   !
-   do i1=1,nXkibz
-     !
-     ! Fragmentation
-     !
-     io_extension(ID)='Vnl'
-     call io_fragment(ID,ID_frag,i_fragment=i1)
-     !
-     do ixyz=1,3
-       !
-       do i_spin=1,n_sp_pol
-         !
-         if (write_is_on(ID)) call mat_c2r(Vnl(ixyz,:,:,i1,i_spin),Vnl_disk)
-         !
-         write (VAR_name,'(3(a,i4.4))') 'Vnl_k_',i1,'_xyz_',ixyz,'_spin_',i_spin
-         call io_bulk(ID_frag,VAR=trim(VAR_name),VAR_SZ=shape(Vnl_disk))
-         call io_bulk(ID_frag,R3=Vnl_disk)
-         !
-         if (read_is_on(ID)) call mat_r2c(Vnl_disk,Vnl(ixyz,:,:,i1,i_spin))
-         !
-       enddo
-       !
-     enddo
-     !
-     call io_fragment_disconnect(ID,ID_frag)
-     !
-   enddo
-   !
-   YAMBO_FREE(Vnl_disk)
-   !
- endif
- !
-1 call io_disconnect(ID=ID)
- !
-end function
diff --git a/src/io/io_WF_phases.F b/src/io/io_WF_phases.F
new file mode 100644
index 0000000000..aa57645229
--- /dev/null
+++ b/src/io/io_WF_phases.F
@@ -0,0 +1,89 @@
+!
+! License-Identifier: GPL
+!
+! Copyright (C) 2011 The Yambo Team
+!
+! Authors (see AUTHORS file for details): DS
+!
+integer function io_WF_phases(nb,ik,istark,i_sp_pol,ID,nsz,WF_phases)
+ !
+ use R_lattice,     ONLY:nXkibz
+ use D_lattice,     ONLY:nsym
+ use electrons,     ONLY:n_sp_pol,n_max_deg
+ use pars,          ONLY:schlen,SP,SP_YIO,IP_YIO
+ use IO_m,          ONLY:io_sec,io_extension,read_is_on
+ use IO_int,        ONLY:io_connect,io_disconnect,io_fragment,def_variable_bulk,&
+&                        io_variable_bulk,io_header,def_variable_elemental,io_variable_elemental
+ use wave_func,     ONLY:WF_phases_b_map
+ use timing_m,      ONLY:timing
+ !
+ implicit none
+ !
+ integer               :: nb(2),ik,istark,i_sp_pol,ID
+ integer, intent(in)   :: nsz(5)
+ complex(SP), optional :: WF_phases(:,:,:,:,:)
+ !
+ ! Work Space
+ !
+ integer :: VAR_ID,VAR_SIZE(6),VAR_POS(6),i_fragment,ID_frag
+ character(schlen)    :: VAR_name,DIM_NAMES(6)
+ !
+ call timing('io_WF_phases',OPR='start')
+ !
+ io_extension(ID)='wf_phases'
+ !
+ io_WF_phases=io_connect(desc=trim(io_extension(ID)),type=2,ID=ID)
+ if (io_WF_phases/=0) goto 1
+ !
+ if (io_sec(ID,1)==0) goto 1
+ !
+ if (any((/io_sec(ID,:)==1/))) then
+   !
+   io_WF_phases=io_header(ID,R_LATT=.true.,WF=.true.,IMPOSE_SN=.true.,TEMP=.true.)
+   if (io_WF_phases/=0) goto 1
+   !
+   call def_variable_elemental(ID,"nbands",2,IP_YIO,0)
+   call io_variable_elemental(ID,VAR="Number of bands",I1=nb,CHECK=.true.,OP=(/"==","=="/))
+   !call io_variable_elemental(ID,VAR="Number of bands",I1=nb,CHECK=.true.,OP=(/">=","<="/))
+   !
+ endif
+ !
+ ! WF_phases_b_map
+ !
+ if (any((/io_sec(ID,:)==2/))) then
+   VAR_ID=2
+   VAR_name='WF_phases_b_map'
+   call def_variable_bulk(ID,trim(VAR_name),VAR_ID,shape(WF_phases_b_map),IP_YIO)
+   call  io_variable_bulk(ID,VAR_ID,I4=WF_phases_b_map)
+   !
+ endif
+ !
+ ! WF_phases
+ !
+ if (any((/io_sec(ID,:)==3/))) then
+   !
+   ! Fragmentation
+   !
+   i_fragment=ik+(i_sp_pol-1)*nXkibz
+   call io_fragment(ID,ID_frag,i_fragment=i_fragment)
+   !
+   VAR_ID=1
+   VAR_name='WF_phases'
+   VAR_SIZE=(/2,nsz(1),nsz(2),nsz(3),nsz(4),nsz(5)/)
+   DIM_NAMES = [character(schlen) :: 're_im', 'bands', 'bands', 'bands_grp', 'syms', 'istark']
+   !VAR_SIZE=(/2,shape(WF_phases)/)
+   VAR_POS=(/1,1,1,1,1,1/)
+   if(istark>0) VAR_POS=(/1,1,1,1,1,istark/)
+   !
+   call def_variable_bulk(ID_frag,trim(VAR_name),VAR_ID,VAR_SIZE,SP_YIO,DIM_NAMES)
+   call io_variable_bulk(ID_frag,VAR_ID,C5=WF_phases,IPOS=VAR_POS)
+   ! 
+   call io_fragment_disconnect(ID,ID_frag)
+   !
+ endif
+ !
+1 call io_disconnect(ID=ID)
+ !
+ call timing('io_WF_phases',OPR='stop')
+ !
+end function
diff --git a/src/io/io_X.F b/src/io/io_X.F
index c064af651f..84bd3d6218 100644
--- a/src/io/io_X.F
+++ b/src/io/io_X.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 integer function io_X(X,Xw,ID)
  !
  use pars,          ONLY:SP,schlen
@@ -21,8 +25,9 @@ integer function io_X(X,Xw,ID)
 &                        IO_OUTDATED_DB,frag_RESPONSE,IO_NO_DATABASE,io_mode,DUMP
  use stderr,        ONLY:intc,STRING_same
  use timing_m,      ONLY:timing
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(X_t)   :: X
  type(w_samp):: Xw
@@ -224,8 +229,10 @@ integer function io_X(X,Xw,ID)
      endif
      !
      if (allocated(X_par)) then
-       call def_variable_bulk(ID_frag,trim(ch),1,(/2,X%ng_db,X%ng_db,Xw%n_freqs/),SP)
-       call  io_variable_bulk(ID_frag,1,C3=X_par(1)%blc(:,:,:Xw%n_freqs),IPOS=(/1,X_par(1)%rows(1),X_par(1)%cols(1),1/))
+       if(allocated(X_par(1)%blc)) then
+         call def_variable_bulk(ID_frag,trim(ch),1,(/2,X%ng_db,X%ng_db,Xw%n_freqs/),SP)
+         call  io_variable_bulk(ID_frag,1,C3=X_par(1)%blc(:,:,:Xw%n_freqs),IPOS=(/1,X_par(1)%rows(1),X_par(1)%cols(1),1/))
+       endif
      endif
      !
    endif
@@ -243,6 +250,7 @@ integer function io_X(X,Xw,ID)
      endif
      !
      if (allocated(X_par)) then
+      if(allocated(X_par(1)%blc)) then
        YAMBO_ALLOC(X_on_disk,(X_par(1)%rows(1):X_par(1)%rows(2),X_par(1)%cols(1):X_par(1)%cols(2)))
        do i1=1,Xw%n_freqs
          !
@@ -258,6 +266,7 @@ integer function io_X(X,Xw,ID)
      endif
      !
      YAMBO_FREE(X_on_disk)
+    endif
    endif
    !
  endif
diff --git a/src/io/io_full_SYMMs.F b/src/io/io_full_SYMMs.F
index 8b5da5703a..6cb36cc1db 100644
--- a/src/io/io_full_SYMMs.F
+++ b/src/io/io_full_SYMMs.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): CA
 !
+! headers
+!
+#include<y_memory.h>
+!
 integer function io_full_SYMMs(k,ID)
  !
  use pars,          ONLY:schlen,SP,IP,IP_YIO,LP_YIO
@@ -17,8 +21,9 @@ integer function io_full_SYMMs(k,ID)
 #if defined _TIMING
  use timing_m,      ONLY:timing
 #endif
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer,      intent(in) :: ID
  type(bz_samp),intent(in) :: k
@@ -31,7 +36,7 @@ integer function io_full_SYMMs(k,ID)
  !
  io_extension(ID)="full_SYMMs"
  !
- io_full_SYMMs=io_connect(desc=trim(io_extension(ID)),type=2,ID=ID)
+ io_full_SYMMs=io_connect(desc=trim(io_extension(ID)),type=1,ID=ID)
  if (io_full_SYMMs/=0) goto 1
  !
  if (any((/io_sec(ID,:)==1/))) then
diff --git a/src/io/io_gFsq.F b/src/io/io_gFsq.F
index fa6dd8a232..7b4a63aa16 100644
--- a/src/io/io_gFsq.F
+++ b/src/io/io_gFsq.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM AC
 !
+! headers
+!
+#include<y_memory.h>
+!
 integer function io_gFsq(ID)
  !
  use pars,           ONLY:schlen
@@ -18,8 +22,9 @@ integer function io_gFsq(ID)
  use QP_m,           ONLY:QP_n_states,QP_table
  use parallel_m,     ONLY:PAR_Q_bz_index
  use timing_m,       ONLY:timing
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer      ::ID
  !
diff --git a/src/io/io_header.F b/src/io/io_header.F
index 425e8de0b5..de7f42ed6a 100644
--- a/src/io/io_header.F
+++ b/src/io/io_header.F
@@ -137,7 +137,7 @@ integer function io_header(ID,QPTS,R_LATT,WF,IMPOSE_SN,TEMP,KPTS,D_LATT,XC_KIND,
      goto 1
    endif
    if (WARN_SERIAL.and.io_com(ID)/=NONE) then 
-     call warning('Wrong serial number for '//trim(io_file(ID)))
+     call warning('Different serial number for '//trim(io_file(ID)))
    endif
  endif
  !
diff --git a/src/io/io_kpts.F b/src/io/io_kpts.F
new file mode 100644
index 0000000000..6c5694fdac
--- /dev/null
+++ b/src/io/io_kpts.F
@@ -0,0 +1,81 @@
+!
+! License-Identifier: GPL
+!
+! Copyright (C) 2014 The Yambo Team
+!
+! Authors (see AUTHORS file for details): DS
+!
+!> @brief Write info about k-points
+!
+! io_sec=1   general parameters   
+! io_sec=2   k-points
+!
+! headers
+!
+#include<y_memory.h>
+!
+integer function io_kpts(k,Xk,q,io_db)
+ !
+ use pars,           ONLY:IP_YIO
+ use R_lattice,      ONLY:bz_samp
+ use IO_m,           ONLY:io_sec,io_status
+ use IO_int,         ONLY:io_connect,io_disconnect,&
+&                         io_variable_elemental,def_variable_elemental,io_header,&
+&                         def_variable_bulk,io_variable_bulk
+ use timing_m,       ONLY:timing
+ use y_memory_alloc
+ !
+ implicit none
+ !
+ type(bz_samp)::k,q,Xk
+ integer      ::io_db
+ !
+ call timing('io_kpts',OPR='start')
+ !
+ io_kpts=io_connect(desc='kpts',type=1,ID=io_db)
+ if (io_kpts/=0) goto 1
+ !
+ if (any((/io_sec(io_db,:)==1/))) then
+   !
+   io_kpts=io_header(io_db,IMPOSE_SN=.true.)
+   !
+   call def_variable_elemental(io_db,"k_nibz",1,IP_YIO,0)
+   call io_variable_elemental(io_db,I0=k%nibz,CHECK=.true.,OP=(/"=="/))
+   !
+   call def_variable_elemental(io_db,"k_nbz",1,IP_YIO,0)
+   call io_variable_elemental(io_db,I0=k%nbz,CHECK=.true.,OP=(/"=="/))
+   !
+   call def_variable_elemental(io_db,"Xk_nibz",1,IP_YIO,0)
+   call io_variable_elemental(io_db,VAR="Number of kpts for polarization (ibz)",&
+   &                                         I0=Xk%nibz,CHECK=.true.,OP=(/"=="/))
+   !
+   call def_variable_elemental(io_db,"Xk_nbz",1,IP_YIO,0)
+   call io_variable_elemental(io_db,VAR="Number of kpts for polarization (bz)",&
+   &                                         I0=Xk%nbz,CHECK=.true.,OP=(/"=="/))
+   !
+   call def_variable_elemental(io_db,"q_nibz",1,IP_YIO,0)
+   call io_variable_elemental(io_db,I0=q%nibz,CHECK=.true.,OP=(/"=="/))
+   !
+   call def_variable_elemental(io_db,"q_nbz",1,IP_YIO,0)
+   call io_variable_elemental(io_db,I0=q%nbz,CHECK=.true.,OP=(/"=="/))
+   !
+   call def_variable_elemental(io_db,"",0,0,1)
+   !
+   io_kpts=io_status(io_db)
+   !
+   if (io_kpts/=0) goto 1
+   !
+ endif
+ !
+ if (any((/io_sec(io_db,:)==2/))) then
+   !
+   call def_variable_bulk(io_db,"Xk_sstar",1,shape(Xk%sstar),IP_YIO)
+   call  io_variable_bulk(io_db,1,I2=Xk%sstar)
+   !
+ endif
+ !
+1 call io_disconnect(ID=io_db)
+ !
+ call timing('io_kpts',OPR='stop')
+ !
+end function io_kpts
diff --git a/src/io/load_SC_components.F b/src/io/load_SC_components.F
index 298be591ce..6026b95d98 100644
--- a/src/io/load_SC_components.F
+++ b/src/io/load_SC_components.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM MG
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine load_SC_components(what,E,COM_,MODE_,ik,i_sp_pol,n_bands,kind,&
 &                             xc_functional,perturbation)
  !
@@ -17,7 +21,9 @@ subroutine load_SC_components(what,E,COM_,MODE_,ik,i_sp_pol,n_bands,kind,&
  use IO_int,      ONLY:io_control
  use IO_m,        ONLY:manage_action,OP_RD_CL,RD_CL_IF_END,OP_RD,RD_CL,&
 &                      DUMP,OP_RD_CL,NONE,IO_NO_DATABASE
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  character(*)           :: what
  type(levels), optional :: E
diff --git a/src/io/variables_ELPH.F b/src/io/variables_ELPH.F
index a1a1eacdb9..ed9b85350e 100644
--- a/src/io/variables_ELPH.F
+++ b/src/io/variables_ELPH.F
@@ -5,13 +5,17 @@
 !
 ! Authors (see AUTHORS file for details): AM AC
 !
+! headers
+!
+#include<y_memory.h>
+!
 integer function variables_ELPH(ID,what)
  !
  use pars,           ONLY:SP
  use IO_m,           ONLY:io_status,read_is_on,write_is_on,io_mode,DUMP,IO_INCOMPATIBLE_VAR
  use IO_int,         ONLY:ver_is_gt_or_eq,io_variable_elemental,io_bulk,def_variable_elemental
- use ELPH,           ONLY:ph_modes,elph_nb,elph_nk_bz,ELPH_desc,elph_grids_are_expanded, &
-&                         QP_PH_n_G_bands,PH_W_debye,elph_nQ,PH_kpt_bz,l_GKKP_hosts_bare_dV,&
+ use ELPH,           ONLY:ph_modes,elph_bands,elph_nb,elph_nk_bz,ELPH_desc,elph_grids_are_expanded, &
+&                         QP_PH_n_G_bands,PH_W_debye,elph_nQ,PH_kpt_bz,l_GKKP_hosts_bare_dV,l_GKKP_hosts_DW,&
 &                         elph_use_q_grid,PH_qpt,elph_nQ_used,elph_Ham_bands,elph_branches
  use ALLOC,          ONLY:ELPH_alloc
  use R_lattice,      ONLY:nkbz,nqibz,q_pt
@@ -19,15 +23,16 @@ integer function variables_ELPH(ID,what)
  use vec_operate,    ONLY:rlu_v_is_zero,c2a
  use zeros,          ONLY:k_rlu_zero
  use descriptors,    ONLY:IO_desc_reset
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer, intent(in) ::ID
  character(*) ::what
  !
  !Work Space
  !
- integer                ::iq
+ integer                ::iq,NVAR
  logical                ::l_gFsq,l_gkkp
  real(SP)               ::v(3)
  !
@@ -40,20 +45,35 @@ integer function variables_ELPH(ID,what)
  !
  call IO_desc_reset(ELPH_desc)
  !
- if (.not.ver_is_gt_or_eq(ID,revision=18920)) call def_variable_elemental(ID,"PARS",6,SP,0)
- if (     ver_is_gt_or_eq(ID,revision=18920)) call def_variable_elemental(ID,"PARS",7,SP,0)
+ NVAR=6
+ if (ver_is_gt_or_eq(ID,revision=18920)) NVAR=7
+ if (ver_is_gt_or_eq(ID,revision=21550)) NVAR=9
+ call def_variable_elemental(ID,"PARS",NVAR,SP,0)
+ !
  call io_variable_elemental(ID,VAR="[ELPH] Phonon modes",I0=ph_modes,CHECK=.true.,OP=(/"=="/),DESCRIPTOR=ELPH_desc)
  call io_variable_elemental(ID,VAR="[ELPH] Q points",I0=elph_nQ,CHECK=.true.,OP=(/"=="/),&
 &                             TERMINATOR="[avail]",DESCRIPTOR=ELPH_desc)
  if (l_gFsq) call io_variable_elemental(ID,VAR="[ELPH]",I0=elph_nQ_used,CHECK=.true.,OP=(/"=="/),&
 &                                         TERMINATOR="[used]",DESCRIPTOR=ELPH_desc)
  call io_variable_elemental(ID,VAR="[ELPH] K points",I0=elph_nk_bz,CHECK=.true.,OP=(/"=="/),DESCRIPTOR=ELPH_desc)
- call io_variable_elemental(ID,VAR="[ELPH] El-PH bands",I0=elph_nb,CHECK=.true.,OP=(/"=="/),DESCRIPTOR=ELPH_desc)
+ if (ver_is_gt_or_eq(ID,revision=21550)) then
+   call io_variable_elemental(ID,VAR="[ELPH] El-PH bands",I1=elph_bands,CHECK=.true.,OP=(/"==","=="/),DESCRIPTOR=ELPH_desc)
+   if (read_is_on(ID)) elph_nb=elph_bands(2)-elph_bands(1)+1
+ else
+   call io_variable_elemental(ID,VAR="[ELPH] El-PH bands",I0=elph_nb,CHECK=.true.,OP=(/"=="/),DESCRIPTOR=ELPH_desc)
+   if (read_is_on(ID)) elph_bands=(/1,elph_nb/)
+ endif
  call io_variable_elemental(ID,VAR="[ELPH] Using the Q-grid",L0=elph_use_q_grid,CHECK=.true.,OP=(/"=="/),DESCRIPTOR=ELPH_desc)
  if (ver_is_gt_or_eq(ID,revision=18920)) then
    call io_variable_elemental(ID,VAR="[ELPH] Bare interaction",L0=l_GKKP_hosts_bare_dV,&
 &                               CHECK=.true.,OP=(/"=="/),DESCRIPTOR=ELPH_desc)
  endif
+ if (ver_is_gt_or_eq(ID,revision=21550)) then
+   call io_variable_elemental(ID,VAR="[ELPH] Debye-Waller",L0=l_GKKP_hosts_DW,&
+&                               CHECK=.true.,OP=(/"=="/),DESCRIPTOR=ELPH_desc)
+ else
+   l_GKKP_hosts_DW=.true.
+ endif
  if (l_gFsq) then
    call def_variable_elemental(ID,"QP_nb_nk_n_states",3,SP,0)
    call io_variable_elemental(ID,I0=QP_nb)
@@ -63,8 +83,8 @@ integer function variables_ELPH(ID,what)
  call def_variable_elemental(ID,"",0,0,1)
  !
  if (io_mode(ID)==DUMP.or.write_is_on(ID)) then
-   QP_PH_n_G_bands=(/1,elph_nb/)
-   elph_Ham_bands =(/1,elph_nb/)
+   QP_PH_n_G_bands=(/elph_bands(1),elph_bands(2)/)
+   elph_Ham_bands =(/elph_bands(1),elph_bands(2)/)
    elph_branches  =(/1,ph_modes/)
  endif
  !
@@ -89,11 +109,9 @@ integer function variables_ELPH(ID,what)
    enddo
  endif
  !
- if (elph_grids_are_expanded) then
-   !
+ if (ver_is_gt_or_eq(ID,revision=21550).or.elph_grids_are_expanded) then
    call io_bulk(ID,VAR="PH_K",VAR_SZ=(/nkbz,3/) )
    call io_bulk(ID,R2=PH_kpt_bz(:,:))
-   !
  endif
  !
  variables_ELPH=io_status(ID)
diff --git a/src/io/variables_NL.F b/src/io/variables_NL.F
index 5511479d36..cb49c85cb7 100644
--- a/src/io/variables_NL.F
+++ b/src/io/variables_NL.F
@@ -15,8 +15,7 @@ integer function variables_NL(ID)
  use QP_m,          ONLY:QP_ng_SH,QP_ng_Sx
  use real_time,     ONLY:Integrator_name,NE_steps,RT_step,l_RT_induced_field,RT_bands,RAD_LifeTime
  use electric,      ONLY:l_force_SndOrd
- use fields,        ONLY:n_ext_fields
- use fields,        ONLY:Efield
+ use fields,        ONLY:n_ext_fields,Efield
  use IO_m,          ONLY:io_status
  use IO_int,        ONLY:io_variable_elemental,def_variable_elemental,ver_is_gt_or_eq
  use descriptors,   ONLY:IO_desc_reset
@@ -136,18 +135,9 @@ integer function variables_NL(ID)
 &         R0=Efield(i_Prb)%FWHM,CHECK=.true.,OP=(/"=="/),& 
 &         DESCRIPTOR=NL_desc)
 
-   call def_variable_elemental(ID,"Field_Freq_range_"//trim(intc(i_Prb)),2,SP,0)
-   call io_variable_elemental(ID,VAR="[FIELDs] Frequency range   ",TERMINATOR="[eV]",&
-&         R1=Efield(i_Prb)%frequency,CHECK=.true.,OP=(/"==","=="/),UNIT=HA2EV,& 
-&         DESCRIPTOR=NL_desc)
-
-   call def_variable_elemental(ID,"Field_Freq_steps_"//trim(intc(i_Prb)),1,IP_YIO,0)
-   call io_variable_elemental(ID,VAR="[FIELDs] Frequency steps",I0=Efield(i_Prb)%n_frequencies,CHECK=.true.,OP=(/"=="/),& 
-&         DESCRIPTOR=NL_desc)
-
-   call def_variable_elemental(ID,"Field_Freq_step_"//trim(intc(i_Prb)),1,SP,0)
-   call io_variable_elemental(ID,VAR="[FIELDs] Frequency step    ",TERMINATOR="[eV]",&
-&         R0=Efield(i_Prb)%W_step,CHECK=.true.,OP=(/"=="/),UNIT=HA2EV,& 
+   call def_variable_elemental(ID,"Field_Freq_"//trim(intc(i_Prb)),1,SP,0)
+   call io_variable_elemental(ID,VAR="[FIELDs] Frequency    ",TERMINATOR="[eV]",&
+&         R0=Efield(i_Prb)%frequency,CHECK=.true.,OP=(/"=="/),UNIT=HA2EV,& 
 &         DESCRIPTOR=NL_desc)
 
    call def_variable_elemental(ID,"Field_Initial_time_"//trim(intc(i_Prb)),1,SP,0)
diff --git a/src/io/variables_Overlaps.F b/src/io/variables_Overlaps.F
index 6b3d4b86ef..6cb6574bd1 100644
--- a/src/io/variables_Overlaps.F
+++ b/src/io/variables_Overlaps.F
@@ -23,16 +23,8 @@ integer function variables_Overlaps(Dip,ID)
  !
  call IO_desc_reset(DIP_desc)
  !
- call def_variable_elemental(ID,"Bands",2,IP_YIO,0)
- call io_variable_elemental(ID,DB_I1=Dip%db_ib,VAR="DIP band range",I1=Dip%ib,CHECK=.true.,OP=(/">=","<="/),&
-&                           DESCRIPTOR=DIP_desc)
- !
- call def_variable_elemental(ID,"Bands_limits",2,IP_YIO,0)
- call io_variable_elemental(ID,DB_I1=Dip%db_ib_lim,VAR="DIP band range limits",I1=Dip%ib_lim,CHECK=.true.,OP=(/"<=",">="/),&
-&                           DESCRIPTOR=DIP_desc)
- !
- call def_variable_elemental(ID,"Bands_ordered",1,LP_YIO,0)
- call io_variable_elemental(ID,VAR="Bands ordered",L0=Dip%bands_ordered,CHECK=.true.,OP=(/">="/),&
+ call def_variable_elemental(ID,"Bands",1,IP_YIO,0)
+ call io_variable_elemental(ID,DB_I0=Dip%db_ib(2),VAR="DIP nbands",I0=Dip%ib(2),CHECK=.true.,OP=(/"<="/),&
 &                           DESCRIPTOR=DIP_desc)
  !
  call def_variable_elemental(ID,"NG",1,IP_YIO,0)
diff --git a/src/io/variables_RT_static.F b/src/io/variables_RT_static.F
index 967d2ec737..d2f4519da9 100644
--- a/src/io/variables_RT_static.F
+++ b/src/io/variables_RT_static.F
@@ -55,16 +55,18 @@ integer function variables_RT_static(ID)
  !
  ! ELPH_vars
  !
-#if defined _ELPH 
- call def_variable_elemental(ID,VAR="ELPH_vars",VAR_SZ=5,VAR_KIND=IP_YIO,MENU=0) 
- call io_variable_elemental(ID,L0=l_elph_scatt,&
-&   VAR=" [E-P] Electron-phonon", CHECK=.true.,OP=(/"=="/),DESCRIPTOR=RT_desc)
- call io_variable_elemental(ID,L0=l_RT_iterative_ELPH_lifetimes,&
-&   VAR=" [E-P] Iterative time evolution   ", CHECK=.true.,OP=(/"=="/),DESCRIPTOR=RT_desc)
- call io_variable_elemental(ID, &
-&   VAR=" [E-P] Branches",I1=elph_branches,CHECK=.true.,OP=(/"==","=="/),DESCRIPTOR=RT_desc)
- call io_variable_elemental(ID, &
-&   VAR=" [E-P] GF`s Memory steps",I0=G_MEM_steps,CHECK=.true.,OP=(/"=="/),DESCRIPTOR=RT_desc)
+#if defined _ELPH
+ if ( write_is_on(ID) .or. variable_exist(ID,"ELPH_vars") ) then
+   call def_variable_elemental(ID,VAR="ELPH_vars",VAR_SZ=5,VAR_KIND=IP_YIO,MENU=0) 
+   call io_variable_elemental(ID,L0=l_elph_scatt,&
+&     VAR=" [E-P] Electron-phonon", CHECK=.true.,OP=(/"=="/),DESCRIPTOR=RT_desc)
+   call io_variable_elemental(ID,L0=l_RT_iterative_ELPH_lifetimes,&
+&     VAR=" [E-P] Iterative time evolution   ", CHECK=.true.,OP=(/"=="/),DESCRIPTOR=RT_desc)
+   call io_variable_elemental(ID, &
+&     VAR=" [E-P] Branches",I1=elph_branches,CHECK=.true.,OP=(/"==","=="/),DESCRIPTOR=RT_desc)
+   call io_variable_elemental(ID, &
+&     VAR=" [E-P] GF`s Memory steps",I0=G_MEM_steps,CHECK=.true.,OP=(/"=="/),DESCRIPTOR=RT_desc)
+ endif
 #endif
  !
  ! E-E_vars
@@ -157,17 +159,15 @@ integer function variables_RT_static(ID)
        call def_variable_elemental(ID,VAR="FIELD_POL"//trim(intc(i1)),VAR_SZ=1,VAR_KIND=0,MENU=0)
        call io_variable_elemental(ID,CH0=Efield(i1)%ef_pol,VAR=' [FIELDs] Polarization',DESCRIPTOR=RT_desc)
        !
-       call def_variable_elemental(ID,VAR="EXTERNAL_FIELD"//trim(intc(i1)),VAR_SZ=13,VAR_KIND=SP,MENU=0)
+       call def_variable_elemental(ID,VAR="EXTERNAL_FIELD"//trim(intc(i1)),VAR_SZ=14,VAR_KIND=SP,MENU=0)
        call io_variable_elemental(ID,R1=Efield(i1)%versor,VAR=" [FIELDs] Versor",DESCRIPTOR=RT_desc)
        call io_variable_elemental(ID,R1=Efield(i1)%versor_circ,VAR=" [FIELDs] Versor_circ",DESCRIPTOR=RT_desc)
        call io_variable_elemental(ID,VAR=" [FIELDs] Intensity",R0=Efield(i1)%intensity,&
 &                                 DESCRIPTOR=RT_desc,UNIT=AU2KWCMm2,TERMINATOR="[kWCMm2]")
-       call io_variable_elemental(ID,VAR=" [FIELDs] Damping",R0=Efield(i1)%width,DESCRIPTOR=RT_desc,UNIT=AUT2FS,TERMINATOR="[fs]")
-       call io_variable_elemental(ID,VAR=" [FIELDs] Frequency range",R1=Efield(i1)%frequency,&
+       call io_variable_elemental(ID,VAR=" [FIELDs] Width    ",R0=Efield(i1)%width,DESCRIPTOR=RT_desc,UNIT=AUT2FS,TERMINATOR="[fs]")
+       call io_variable_elemental(ID,VAR=" [FIELDs] Chirp    ",R0=Efield(i1)%chirp,DESCRIPTOR=RT_desc,UNIT=AUT2FS,TERMINATOR="[fs]")
+       call io_variable_elemental(ID,VAR=" [FIELDs] Frequency range",R0=Efield(i1)%frequency,&
 &                                 DESCRIPTOR=RT_desc,UNIT=HA2EV,TERMINATOR="[eV]")
-       call io_variable_elemental(ID,VAR=" [FIELDs] Frequency steps",I0=Efield(i1)%n_frequencies,DESCRIPTOR=RT_desc)
-       call io_variable_elemental(ID,VAR=" [FIELDs] Frequency step",R0=Efield(i1)%W_step,&
-&                                 CHECK=.true.,OP=(/"=="/),DESCRIPTOR=RT_desc,UNIT=HA2EV,TERMINATOR="[eV]")
        call io_variable_elemental(ID,R0=Efield(i1)%t_initial,VAR=' [FIELDs] Initial time',&
 &                                 DESCRIPTOR=RT_desc,UNIT=AUT2FS,TERMINATOR="[fs]")
        !                       
diff --git a/src/io_parallel/io_BS_PAR_block.F b/src/io_parallel/io_BS_PAR_block.F
index c7b98f61be..dadbe49b85 100644
--- a/src/io_parallel/io_BS_PAR_block.F
+++ b/src/io_parallel/io_BS_PAR_block.F
@@ -14,7 +14,6 @@ subroutine io_BS_PAR_block(iq,i_block,ID,mode)
  ! Each block has a status variables indicating if the matrix element was already calculated
  !
  use pars,          ONLY:IP,IPL,SP,schlen,max_io_vars_open
- use iso_c_binding, ONLY:C_LOC,C_F_POINTER
  use stderr,        ONLY:STRING_split
  use BS,            ONLY:BS_blk,BSK_IO_mode,MAX_BSK_LIN_size,n_BS_blks_min,BS_K_dim,BSK_IO_sum_value
  use timing_m,      ONLY:timing
@@ -33,7 +32,7 @@ subroutine io_BS_PAR_block(iq,i_block,ID,mode)
  integer, parameter :: n_max_tot_vars=6  ! see io_BS_PAR_init
  !
  character(schlen)   :: BSK_IO_local,BSK_IO_split(3)
- logical             :: lwrite(3),file_sync
+ logical             :: lwrite(3),file_sync,do_table,do_energ
  integer             :: x1,x2,VAR_IDs(2),ID_table,ID_energ,stat,ID_now(2)
  integer             :: n2,i1,i2,idx1,idx2,imax,i_rep(2),ilast
  integer(IPL)        :: idx
@@ -206,10 +205,13 @@ subroutine io_BS_PAR_block(iq,i_block,ID,mode)
  end select
  !
  if ((x1==x2.or.BS_blk(i_block)%size(2)==BS_K_dim(1)) .and. .not.trim(mode)=="matonly") then
-   if (  write_is_on(ID) .or. variable_exist(ID,"BS_TABLE") ) then
+   ! logicals for back compatibility
+   do_table=variable_exist(ID,"BS_TABLE").or.write_is_on(ID)
+   do_energ=variable_exist(ID,"BS_EH_ENERGIES").or.write_is_on(ID)
+   if ( do_table .or. do_energ ) then
      if (BS_blk(i_block)%mode=="C" .or. BS_blk(i_block)%mode=="A") x1=x1+BS_K_dim(1)
-     call io_variable_bulk(ID, ID_energ,  R1=BS_blk(i_block)%E,     IPOS=(/x1/) )
-     call io_variable_bulk(ID, ID_table,  I2=BS_blk(i_block)%table, IPOS=(/1,x1/) )
+     if(do_table) call io_variable_bulk(ID, ID_energ,  R1=BS_blk(i_block)%E,     IPOS=(/x1/) )
+     if(do_energ) call io_variable_bulk(ID, ID_table,  I2=BS_blk(i_block)%table, IPOS=(/1,x1/) )
    endif
  endif
  !
@@ -224,3 +226,66 @@ subroutine io_BS_PAR_block(iq,i_block,ID,mode)
  call timing('io_BS',OPR='stop')
  !
 end subroutine
+!
+!
+subroutine read_BS_PAR_collisions(iq,i_block,ID)
+ !
+ use BS,            ONLY:BS_blk
+ use timing_m,      ONLY:timing
+ use IO_int,        ONLY:io_variable_bulk
+ !
+ implicit none
+ !
+ integer,      intent(in) :: ID
+ integer,      intent(in) :: iq,i_block
+ !
+ ! Work Space
+ !
+ integer             :: x1,x2,y1,y2,VAR_IDs(2)
+ !
+ !
+ ! Get the type of block that we want to write 
+ !
+ select case(BS_blk(i_block)%mode)
+ case("R")
+   VAR_IDs=(/1,2/)
+ case("C")
+   VAR_IDs=(/3,4/)
+ case("A")
+   VAR_IDs=(/5,6/)
+ end select
+ !
+ ! Get lower bound coordinates in 2D matrix
+ x1=BS_blk(i_block)%coordinate(1)
+ x2=BS_blk(i_block)%coordinate(2)
+ !
+ y1=BS_blk(i_block)%poordinate(1)
+ y2=BS_blk(i_block)%poordinate(2)
+ !
+ call timing('read_BS_coll_row',OPR='start')
+ call io_variable_bulk(ID, VAR_IDs(1),  C2=BS_blk(i_block)%mat , IPOS=(/1,x1,x2/) )
+ call timing('read_BS_coll_row',OPR='stop')
+ call timing('read_BS_coll_col',OPR='start')
+ call io_variable_bulk(ID, VAR_IDs(1),  C2=BS_blk(i_block)%tam , IPOS=(/1,y1,y2/) )
+ call timing('read_BS_coll_col',OPR='stop')
+ !
+end subroutine read_BS_PAR_collisions
+!
+!
+subroutine read_BS_table(iq,ID)
+ !
+ use pars,          ONLY:max_io_vars_open
+ use IO_int,        ONLY:io_variable_bulk
+ use BS,            ONLY:BS_blk,BS_K_dim
+ !
+ implicit none
+ !
+ integer, intent(in) :: iq,ID
+ !
+ integer :: ID_table
+ !
+ ID_table=max_io_vars_open
+ call io_variable_bulk(ID, ID_table,  I2=BS_blk(1)%table, IPOS=(/1,1/) )
+ call io_variable_bulk(ID, ID_table,  I2=BS_blk(2)%table, IPOS=(/1,BS_K_dim(1)+1/) )
+ !
+end subroutine read_BS_table
diff --git a/src/io_parallel/io_BS_PAR_init.F b/src/io_parallel/io_BS_PAR_init.F
index 70b759fdf9..71736093d2 100644
--- a/src/io_parallel/io_BS_PAR_init.F
+++ b/src/io_parallel/io_BS_PAR_init.F
@@ -29,7 +29,7 @@ integer function io_BS_PAR_init(iq,ID,mode)
  !
  character(schlen)  :: db_name,ch_tmp,dim_names(3,n_max_vars),BSK_IO_local,BSK_IO_split(3)
  character(lchlen)  :: ch(n_max_vars,max_io_vars_open-1)
- logical           :: def_var, io_var
+ logical           :: def_var, io_var, do_table, do_energ
  integer(IPL)      :: BS_lin_size
  integer           :: n_vars, i1,i2, i_rep,i_var, BSK_n_dims,n_replica
  integer           :: chunksize(3)
@@ -139,15 +139,19 @@ integer function io_BS_PAR_init(iq,ID,mode)
    BSK_n_dims=2
    n_replica=1
    !
+   ! Resonant block
    dim_names(2:3,1) = [character(schlen) :: 'BS_K_dim', 'BS_K_dim']
    BS_IO_dim(:,1) = (/BS_K_dim(1),BS_K_dim(1)/)
    !
-   if(BS_n_eh_spaces==2) then
+   if(.not.l_BS_ares_from_res) then
+     ! Coupling block
      dim_names(2:3,2) = [character(schlen) ::  'BS_K_dim', 'BS_K_dim_A']
      BS_IO_dim(:,2) = (/BS_K_dim(1),BS_K_dim(2)/)
+     ! Anti-resonant block
      dim_names(2:3,3) = [character(schlen) :: 'BS_K_dim_A', 'BS_K_dim_A']
      BS_IO_dim(:,3) = (/BS_K_dim(2),BS_K_dim(2)/)
    else if (BS_K_coupling) then
+     ! Coupling block
      dim_names(2:3,2) = [character(schlen) :: 'BS_K_dim', 'BS_K_dim']
      BS_IO_dim(:,2) = (/BS_K_dim(1),BS_K_dim(1)/)
    endif
@@ -157,7 +161,7 @@ integer function io_BS_PAR_init(iq,ID,mode)
    chunksize(3) = YAMBO_DEF_CHUNK
    ! If BSE matrix is <YAMBO_DEF_CHUNK, we do not chunk.
    if (BS_K_dim(1) < YAMBO_DEF_CHUNK) chunksize(2:3) = 0
-   if (BS_n_eh_spaces==2 .and. BS_K_dim(2) < YAMBO_DEF_CHUNK) chunksize(2:3) = 0
+   if (.not.l_BS_ares_from_res .and. BS_K_dim(2) < YAMBO_DEF_CHUNK) chunksize(2:3) = 0
    !
  end select
  !
@@ -181,12 +185,18 @@ integer function io_BS_PAR_init(iq,ID,mode)
    enddo
  enddo
  !
- if( (trim(mode)=="full" .or. trim(mode)=="compressed_mat") .and. &
- &    ( write_is_on(ID) .or. variable_exist(ID,"BS_TABLE"))  ) then
-  dim_names(1,1)="BS_H_dim"
-  call def_variable_bulk(ID,"BS_EH_ENERGIES",max_io_vars_open-1,[BS_H_dim],SP_YIO,dim_names(1:1,1),par_io_kind='independent')
-  dim_names(1:2,1)=[character(schlen) :: "n_transition_variables","BS_H_dim"]
-   call def_variable_bulk(ID,"BS_TABLE",max_io_vars_open,[5,BS_H_dim],IP_YIO,dim_names(1:2,1),par_io_kind='independent')
+ if( (trim(mode)=="full" .or. trim(mode)=="compressed_mat") ) then
+   ! logicals for back compatibility
+   do_table=variable_exist(ID,"BS_TABLE").or.write_is_on(ID)
+   do_energ=variable_exist(ID,"BS_EH_ENERGIES").or.write_is_on(ID)
+   if ( do_energ ) then
+     dim_names(1,1)="BS_H_dim"
+     call def_variable_bulk(ID,"BS_EH_ENERGIES",max_io_vars_open-1,[BS_H_dim],SP_YIO,dim_names(1:1,1),par_io_kind='independent')
+   endif
+   if ( do_table ) then
+     dim_names(1:2,1)=[character(schlen) :: "n_transition_variables","BS_H_dim"]
+     call def_variable_bulk(ID,"BS_TABLE",max_io_vars_open,[5,BS_H_dim],IP_YIO,dim_names(1:2,1),par_io_kind='independent')
+   endif
  endif
  !
 end function io_BS_PAR_init
diff --git a/src/io_parallel/io_BS_header.F b/src/io_parallel/io_BS_header.F
index b1d97b6782..09b7342aa6 100644
--- a/src/io_parallel/io_BS_header.F
+++ b/src/io_parallel/io_BS_header.F
@@ -11,6 +11,7 @@ integer function io_BS_header(iq,X,ID,mode)
  !
  use parallel_int,  ONLY:PP_wait
  use com,           ONLY:msg
+ use drivers,       ONLY:l_eval_collisions
  use pars,          ONLY:schlen,lchlen
  use stderr,        ONLY:intc
  use X_m,           ONLY:X_t
@@ -68,8 +69,12 @@ integer function io_BS_header(iq,X,ID,mode)
  !
  ! Check if the file already contains data and it is consistent
  !
- io_BS_header=io_header(ID,QPTS=.true.,R_LATT=.true.,WF=.true.,&
+ if (l_eval_collisions) then
+   io_BS_header=io_header(ID,R_LATT=.true.,WF=.true.,CUTOFF=.true.)
+ else
+   io_BS_header=io_header(ID,QPTS=.true.,R_LATT=.true.,WF=.true.,&
 &                       IMPOSE_SN=.true.,XC_KIND="K_WF force Xs",CUTOFF=.true.,FRAG=frag_BS_K)
+ endif
  !
  io_BS_header=variables_BS(ID,iq,local_desc,.TRUE.,X=X)
  !
diff --git a/src/io_parallel/io_COLLISIONS.F b/src/io_parallel/io_COLLISIONS.F
index 27acf114e0..82b05b47d6 100644
--- a/src/io_parallel/io_COLLISIONS.F
+++ b/src/io_parallel/io_COLLISIONS.F
@@ -5,12 +5,17 @@
 !
 ! Authors (see AUTHORS file for details): AM CA DS
 !
-integer function io_COLLISIONS(ID,COLL,COLL_element,i_coll_)
+! headers
+!
+#include<y_memory.h>
+!
+integer function io_COLLISIONS(ID,COLL_grp,COLL_element,i_coll_)
  !
  use pars,          ONLY:schlen,IP,SP,DP,IP_YIO,CR_YIO
  use parallel_m,    ONLY:master_cpu
  use iso_c_binding, ONLY:C_LOC,C_F_POINTER
- use collision_ext, ONLY:COLLISIONS_element,COLLISIONS_group,LAST_COLL_sync,PAR_COLL_min,COLLISIONS_load_SP
+ use collision_ext, ONLY:COLLISIONS_element,COLLISIONS_group,PAR_COLL_min,&
+&                        COLLISIONS_CV_only,COLL_bands_frozen,COLLISIONS_load_SP
  use IO_m,          ONLY:io_sec,io_status,IO_OUTDATED_DB,read_is_on,IO_mode,io_unit,&
 &                        VERIFY,io_extension,write_is_on,nf90_sync,netcdf_call,YAMBO_DEF_CHUNK
  use IO_int,        ONLY:io_connect,io_header,io_elemental,ver_is_gt_or_eq,io_fragment, &
@@ -19,13 +24,15 @@ integer function io_COLLISIONS(ID,COLL,COLL_element,i_coll_)
  use plasma,        ONLY:N_plasma_poles_global,PLASMA_redux_percent
  use QP_m,          ONLY:QP_ng_Sx,QP_ng_SH,SC_band_mixing,QP_QED_ng
  use X_m,           ONLY:X_desc
+ use electrons,     ONLY:n_sp_pol
  use drivers,       ONLY:l_elel_scatt,l_elphoton_scatt
  use timing_m,      ONLY:timing
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer, intent(inout)            ::ID
- type(COLLISIONS_group)            ::COLL
+ type(COLLISIONS_group)            ::COLL_grp
  type(COLLISIONS_element),optional, target :: COLL_element
  integer, intent(in)     ,optional         :: i_coll_
  !
@@ -34,7 +41,8 @@ integer function io_COLLISIONS(ID,COLL,COLL_element,i_coll_)
  !
  character(1)      :: TMP_VAR(1)
  character(schlen) :: db_name
- integer           :: i_coll,stat,COLL_chunksize(3)
+ integer           :: i_coll,i_coll_mem,sync_step,stat,COLL_chunksize(3),N(3)
+ integer           :: Nbv,Nbc,Nb1,Nb2,Nk
  logical           :: def1,def2,io2,io3,io4,file_sync
  !
  call timing('io_COLLISIONS',OPR='start')
@@ -45,7 +53,7 @@ integer function io_COLLISIONS(ID,COLL,COLL_element,i_coll_)
  if (present(COLL_element)) i_coll=COLL_element%I
  if (present(i_coll_))      i_coll=i_coll_
  !
- call COLLISIONS_dbname(COLL%kind,db_name)
+ call COLLISIONS_dbname(COLL_grp%kind,db_name)
  !
  io_COLLISIONS=io_connect(desc=trim(db_name),type=2,ID=ID)
  !
@@ -60,7 +68,25 @@ integer function io_COLLISIONS(ID,COLL,COLL_element,i_coll_)
  !
  if (def1) then
    call def_variable_bulk(ID,'N_COLLISIONS_STATES',1,(/1/),IP_YIO,par_io_kind='independent')
-   if (write_is_on(ID)) COLL_element%N=(COLL%D(3,2)-COLL%D(3,1)+1)*(COLL%D(2,2)-COLL%D(2,1)+1)*(COLL%D(1,2)-COLL%D(1,1)+1)
+   if (write_is_on(ID)) then
+     Nk=COLL_grp%D(3,2)-COLL_grp%D(3,1)+1
+     Nbc=COLL_grp%D(2,2)-COLL_grp%D(2,3)+1
+     Nbv=COLL_grp%D(1,4)-COLL_grp%D(1,1)+1
+     Nb2=COLL_grp%D(2,2)-COLL_grp%D(2,1)+1
+     Nb1=COLL_grp%D(1,2)-COLL_grp%D(1,1)+1
+     if (any((/COLL_bands_frozen==1/))) then
+       Nb2=count(COLL_bands_frozen(COLL_grp%D(2,1):COLL_grp%D(2,2))==0)
+       Nb1=count(COLL_bands_frozen(COLL_grp%D(1,1):COLL_grp%D(1,2))==0)
+       Nbc=count(COLL_bands_frozen(COLL_grp%D(2,3):COLL_grp%D(2,2))==0)
+       Nbv=count(COLL_bands_frozen(COLL_grp%D(1,1):COLL_grp%D(1,4))==0)
+     endif
+     if (COLLISIONS_CV_only) then
+       N=(/Nbv,Nbc,2*Nk/)
+     else
+       N=(/Nb1,Nb2,Nk/)
+     endif
+     COLL_element%N=N(1)*N(2)*N(3)*n_sp_pol
+   endif
  endif
  !
  ! Despite all processors write to file (and also define the variable), only the master fills it
@@ -71,17 +97,17 @@ integer function io_COLLISIONS(ID,COLL,COLL_element,i_coll_)
  !
  if (io3.or.def2) then
    !
-   if(COLL%kind==QP_SE_GW_NEQ.or.COLL%kind==QP_SE_GW_QED) then
+   if(COLL_grp%kind==QP_SE_GW_NEQ.or.COLL_grp%kind==QP_SE_GW_QED) then
      ! Real collisions
      if(def2) then
        COLL_chunksize(1:2)=YAMBO_DEF_CHUNK
        if (COLL_element%N<YAMBO_DEF_CHUNK) COLL_chunksize(1)=COLL_element%N
-       if (COLL%N<YAMBO_DEF_CHUNK)         COLL_chunksize(2)=0
-       call def_variable_bulk(ID,'COLLISIONS_v',2,(/COLL_element%N,COLL%N/),SP,&
+       if (COLL_grp%N<YAMBO_DEF_CHUNK)     COLL_chunksize(2)=0
+       call def_variable_bulk(ID,'COLLISIONS_v',2,(/COLL_element%N,COLL_grp%N/),SP,&
        &                      par_io_kind='independent',chunksize=COLL_chunksize(1:2))
      endif
      if(io3 ) then
-       call C_F_POINTER(C_LOC(COLL_element%v_r),tmp_link_real,(/COLL_element%N,1/))
+       call C_F_POINTER(C_LOC(COLL_element%v_r(:COLL_element%N)),tmp_link_real,(/COLL_element%N,1/))
        call  io_variable_bulk(ID,2,R2=tmp_link_real,IPOS=(/1,i_coll/))
        nullify(tmp_link_real)
      endif
@@ -89,14 +115,16 @@ integer function io_COLLISIONS(ID,COLL,COLL_element,i_coll_)
      ! Complex collisions
      if(def2) then
        COLL_chunksize(1)=2
-       COLL_chunksize(2:3)=YAMBO_DEF_CHUNK
-       if (COLL_element%N<YAMBO_DEF_CHUNK) COLL_chunksize(2)=COLL_element%N
-       if (COLL%N<YAMBO_DEF_CHUNK)         COLL_chunksize(3)=0
-       call def_variable_bulk(ID,'COLLISIONS_v',2,(/2,COLL_element%N,COLL%N/),SP,&
-       &                      par_io_kind='independent',chunksize=COLL_chunksize(1:2))
+       !COLL_chunksize(2:3)=YAMBO_DEF_CHUNK
+       !if (COLL_element%N<YAMBO_DEF_CHUNK) COLL_chunksize(2)=COLL_element%N
+       !if (COLL_grp%N<YAMBO_DEF_CHUNK)     COLL_chunksize(3)=COLL_grp%N
+       COLL_chunksize(2)=COLL_element%N
+       COLL_chunksize(3)=1 !COLL_grp%N
+       call def_variable_bulk(ID,'COLLISIONS_v',2,(/2,COLL_element%N,COLL_grp%N/),SP,&
+       &                      par_io_kind='independent',chunksize=COLL_chunksize)
      endif
      if(io3 ) then
-       call C_F_POINTER(C_LOC(COLL_element%v_c),tmp_link_complex,(/COLL_element%N,1/))
+       call C_F_POINTER(C_LOC(COLL_element%v_c(:COLL_element%N)),tmp_link_complex,(/COLL_element%N,1/))
        call  io_variable_bulk(ID,2,C2=tmp_link_complex,IPOS=(/1,1,i_coll/))
        if(SP==DP.and.COLLISIONS_load_SP) then
          COLL_element%v_c_SP=cmplx(COLL_element%v_c)
@@ -106,34 +134,38 @@ integer function io_COLLISIONS(ID,COLL,COLL_element,i_coll_)
      endif
    endif
    !
-   ! This should be used for restart like in BSE ...
-   ! if(BS_blk(i_block)%mat(i1,i2)==YAMBO_CMPLX_NO_FILL) cycle
-   ! BS_blk(i_block)%done(i1,i2)='t'
+   ! Here I could set it to "t" only when COLL_element%v_c/=0 or COLL_element%v_r/=0 ?
    if(io3.and.read_is_on(ID)) COLL_element%table='t'
    !
  endif
  !
- if (def1) call def_variable_bulk(ID,'COLLISIONS_DONE',3,(/COLL%N/),CR_YIO,par_io_kind='independent')
+ if (def1) call def_variable_bulk(ID,'COLLISIONS_DONE',3,(/COLL_grp%N/),CR_YIO,par_io_kind='independent')
  !
  if (io4) then
    if (write_is_on(ID)) then
-     COLL%IO_status(i_coll)=1
+     COLL_grp%IO_status(i_coll)=1
      TMP_VAR="t"
    endif
    call io_variable_bulk(ID,3,CH1=TMP_VAR(1:1),IPOS=[i_coll])
    if (read_is_on(ID)) then
-     COLL%IO_status(i_coll)=-1
-     if(TMP_VAR(1)=="t") COLL%IO_status(i_coll)=1
+     COLL_grp%IO_status(i_coll)=-1
+     if(TMP_VAR(1)=="t") COLL_grp%IO_status(i_coll)=1
    endif
  endif
  !
 #if defined _RT
- ! nf90_sync is performed up to LAST_COLL_sync and at steps of 5% of bse kernel
- file_sync= i_coll<=LAST_COLL_sync .and. mod(i_coll,max(1,PAR_COLL_min/20))==0 .and. index(trim(db_name),"HXC")/=0
- !
- if(write_is_on(ID).and.file_sync.and.io3) then
-   stat = nf90_sync(io_unit(ID))
-   call netcdf_call(stat,ID)
+ if (write_is_on(ID).and.io3 .and. index(trim(db_name),"HXC")/=0) then
+   !
+   ! nf90_sync is performed up to PAR_COLL_min and at steps of 5% of bse kernel
+   i_coll_mem = COLL_grp%PAR_map(i_coll)
+   sync_step  = max(1,PAR_COLL_min/20)
+   file_sync= (i_coll_mem<PAR_COLL_min .and. mod(i_coll_mem,sync_step)==0) .or. i_coll_mem==PAR_COLL_min
+   !
+   if(file_sync) then
+     stat = nf90_sync(io_unit(ID))
+     call netcdf_call(stat,ID)
+   endif
+   !
  endif
 #endif
  !
diff --git a/src/io_parallel/io_COLLISIONS_header.F b/src/io_parallel/io_COLLISIONS_header.F
index a40228cf97..d20c3ab0c7 100644
--- a/src/io_parallel/io_COLLISIONS_header.F
+++ b/src/io_parallel/io_COLLISIONS_header.F
@@ -5,14 +5,21 @@
 !
 ! Authors (see AUTHORS file for details): AM CA DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 integer function io_COLLISIONS_header(ID,COLL)
  !
- use pars,          ONLY:schlen,IP,IP_YIO
+ use pars,          ONLY:schlen,IP,IP_YIO,LP_YIO
  use R_lattice,     ONLY:RIM_ng,RIM_n_rand_pts
- use collision_ext, ONLY:COLLISIONS_cutoff,COLL_bands,COLLISIONS_group,COLLISIONS_CV_only
- use IO_m,          ONLY:io_status,IO_OUTDATED_DB,read_is_on
+ use collision_ext, ONLY:COLLISIONS_cutoff,COLL_bands,COLLISIONS_group,&
+&                        COLLISIONS_CV_only,COLL_bands_frozen
+ use IO_m,          ONLY:variable_exist,write_is_on,io_status,IO_OUTDATED_DB,read_is_on
  use IO_int,        ONLY:io_connect,io_header,io_elemental,ver_is_gt_or_eq, &
-&                        io_variable_bulk,def_variable_bulk,io_disconnect
+&                        io_variable_bulk,def_variable_bulk,io_disconnect,  &
+&                        def_variable_elemental,&
+&                        io_variable_elemental
  use global_XC,     ONLY:QP_SE_GW_NEQ,QP_SE_GW_QED,&
 &                        H_SE_FOCK,H_SE_COH,H_SE_SEX,H_SE_HARTREE
  use plasma,        ONLY:N_plasma_poles_global,PLASMA_redux_percent
@@ -20,7 +27,9 @@ integer function io_COLLISIONS_header(ID,COLL)
  use X_m,           ONLY:X_desc
  use drivers,       ONLY:l_elel_scatt,l_elphoton_scatt
  use timing_m,      ONLY:timing
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  integer, intent(inout)            ::ID
  type(COLLISIONS_group)            ::COLL
@@ -54,7 +63,7 @@ integer function io_COLLISIONS_header(ID,COLL)
  if (COLL%kind==QP_SE_GW_NEQ) SIZE_=SIZE_+3
  if (COLL%kind==QP_SE_GW_QED) SIZE_=SIZE_+2
  if (ver_is_gt_or_eq(ID,revision=21128)) then
-   SIZE_ = SIZE_+1
+   SIZE_ = SIZE_+2
  endif
  call io_elemental(ID,VAR="PARAMETERS",VAR_SZ=SIZE_,MENU=0)
  !
@@ -139,13 +148,20 @@ integer function io_COLLISIONS_header(ID,COLL)
  !
  call io_elemental(ID,VAR="",VAR_SZ=0,MENU=1)
  !
+ if (variable_exist(ID,'COLL_bands_frozen').or.write_is_on(ID)) then
+   call def_variable_bulk(ID,"COLL_bands_frozen",1,shape(COLL_bands_frozen),LP_YIO)
+   call  io_variable_bulk(ID,1,L1=COLL_bands_frozen)
+ else
+   COLL_bands_frozen=.false.
+ endif
+ !
  ! States
  !
  if (read_is_on(ID).and..not.allocated(COLL%state)) then
    YAMBO_ALLOC(COLL%state,(COLL%N,4))
  endif
- call def_variable_bulk(ID,'COLLISIONS_STATE',1,(/COLL%N,4/),IP_YIO)
- call  io_variable_bulk(ID,1,I2=COLL%state)
+ call def_variable_bulk(ID,'COLLISIONS_STATE',2,(/COLL%N,4/),IP_YIO)
+ call  io_variable_bulk(ID,2,I2=COLL%state)
  !
  io_COLLISIONS_header=io_status(ID)
  !
diff --git a/src/io_parallel/io_DIPOLES.F b/src/io_parallel/io_DIPOLES.F
index fd2b1af055..ce0457142a 100644
--- a/src/io_parallel/io_DIPOLES.F
+++ b/src/io_parallel/io_DIPOLES.F
@@ -137,12 +137,12 @@ integer function io_DIPOLES(Dip,ID)
    if (i_fragment>=1) call  io_variable_bulk(ID,VAR_ID,C5=DIP_orbital(:,:,:,ik_mem:ik_mem,i_sp_pol:i_sp_pol,2),IPOS=DIP_ipos)
  endif
  !
- if (STRING_match(Dip%computed,"M_orb")) then
+ if (STRING_match(Dip%computed,"M_orbRT")) then
    !
    ! Orbital magnetization
    !
    VAR_ID=5
-   VAR_name='DIP_orbital'
+   VAR_name='DIP_orbital_RT'
    if (i_fragment< 1) call def_variable_bulk(ID,trim(VAR_name),VAR_ID,DIP_size,SP,par_io_kind='independent')
    if (i_fragment>=1) call  io_variable_bulk(ID,VAR_ID,C5=DIP_orbital(:,:,:,ik_mem:ik_mem,i_sp_pol:i_sp_pol,1),IPOS=DIP_ipos)
    !
@@ -183,12 +183,12 @@ integer function io_DIPOLES(Dip,ID)
    !
  endif
  !
- if (STRING_match(Dip%computed,"M_CD_orb")) then
+ if (STRING_match(Dip%computed,"M_orbCD")) then
    !
    ! Orbital magnetization
    !
    VAR_ID=9
-   VAR_name='DIP_Orbital'
+   VAR_name='DIP_Orbital_CD'
    if (i_fragment< 1) call def_variable_bulk(ID,trim(VAR_name),VAR_ID,DIP_size,SP,par_io_kind='independent')
    if (i_fragment>=1) call  io_variable_bulk(ID,VAR_ID,C5=DIP_orbital(:,:,:,ik_mem:ik_mem,i_sp_pol:i_sp_pol,1),IPOS=DIP_ipos)
    !
diff --git a/src/io_parallel/variables_BS.F b/src/io_parallel/variables_BS.F
index 648ffe53a7..4cbf404cca 100644
--- a/src/io_parallel/variables_BS.F
+++ b/src/io_parallel/variables_BS.F
@@ -44,8 +44,8 @@ integer function variables_BS(ID,iq,desc,CLOSE_the_menu,X)
  call io_variable_elemental(ID,VAR="[K] Identifier",I0=BS_identifier,DESCRIPTOR=desc)
  !
  if(iq>1) then
-   call def_variable_elemental(ID,"Q-point",3,SP,0)
-   call io_variable_elemental(ID,VAR="[K] Q-point",&
+   call def_variable_elemental(ID,"BS_Q",3,SP,0)
+   call io_variable_elemental(ID,VAR="[K] BS Q-point",&
 &                             R1=BSqpts(:,iq),CHECK=.true.,OP=(/"==","==","=="/),DESCRIPTOR=desc)
  endif
  !
@@ -173,8 +173,6 @@ integer function variables_BS(ID,iq,desc,CLOSE_the_menu,X)
    !
  endif
  !
-#if defined _PAR_IO
- !
  ! Check if BSE kernel is written in triangular form
  !
  if (write_is_on(ID).or.variable_exist(ID,"BSE_KERNEL_IO_MODE")) then
@@ -184,8 +182,6 @@ integer function variables_BS(ID,iq,desc,CLOSE_the_menu,X)
    BSK_IO_mode="2D_standard"
  endif
  !
-#endif
- !
  if (write_is_on(ID).or.variable_exist(ID,"X_kind")) then
    call def_variable_elemental(ID,"X_kind",1,CR_YIO,0)
    call io_variable_elemental(ID,VAR="[X] Response Kind",CH0=BSE_L_kind,CHECK=.true.,OP=(/"=="/),DESCRIPTOR=desc)
diff --git a/src/io_parallel/variables_BS_more.F b/src/io_parallel/variables_BS_more.F
index 137875c598..a39e129727 100644
--- a/src/io_parallel/variables_BS_more.F
+++ b/src/io_parallel/variables_BS_more.F
@@ -8,7 +8,8 @@
 integer function variables_BS_more(ID,mode)
  !
  use pars,       ONLY:IP,SP,LP,schlen,LP_YIO
- use BS,         ONLY:BS_K_cutoff,BS_K_cutoff_done,BS_max_val,l_BSE_kernel_complete
+ use BS,         ONLY:BS_K_cutoff,BS_K_cutoff_done,BS_max_val,l_BSE_kernel_complete,&
+&                     BS_kpt_bz,BS_kpt_ibz
  use IO_m,       ONLY:nf90_sync,io_unit,write_is_on,read_is_on,&
 &                     variable_exist,NF90_unlimited,netcdf_dim_size
  use IO_int,     ONLY:def_variable_elemental,io_variable_elemental,&
@@ -23,7 +24,7 @@ integer function variables_BS_more(ID,mode)
  integer           :: N_cutoff,stat
  logical           :: io_var
  real(SP)          :: BS_K_cutoff_IO(1)
- character(schlen) :: kernel_dim_name(1)
+ character(schlen) :: kernel_dim_name(1),kpt_dim_names(2)
  !
  variables_BS_more=0
  !
@@ -56,6 +57,20 @@ integer function variables_BS_more(ID,mode)
    endif
  endif
  !
+ if(write_is_on(ID)) io_var=trim(mode)=="start"
+ if( read_is_on(ID)) io_var=trim(mode)=="start".and.variable_exist(ID,"BS_K_BZ")
+ if (io_var) then
+   kpt_dim_names=[character(schlen) :: "xyz","n_kpt_bz"]
+   call def_variable_bulk(ID,"BS_K_BZ",2,shape(BS_kpt_bz),SP,kpt_dim_names)
+   call io_variable_bulk(ID,2,R2=BS_kpt_bz)
+ endif
+ if( read_is_on(ID)) io_var=trim(mode)=="start".and.variable_exist(ID,"BS_K_IBZ")
+ if (io_var) then
+   kpt_dim_names=[character(schlen) :: "xyz","n_kpt_ibz"]
+   call def_variable_bulk(ID,"BS_K_IBZ",2,shape(BS_kpt_ibz),SP,kpt_dim_names)
+   call io_variable_bulk(ID,2,R2=BS_kpt_ibz)
+ endif
+ !
  if(write_is_on(ID)) stat = nf90_sync(io_unit(ID))
  !
  variables_BS_more=io_status(ID)
diff --git a/src/io_serial/.objects b/src/io_serial/.objects
index a43913ec15..6b18a8d628 100644
--- a/src/io_serial/.objects
+++ b/src/io_serial/.objects
@@ -1,4 +1,5 @@
+COLL_objects =
 #if defined _SC || _RT || _QED
-COLL_objects = io_COLLISIONS.o
+COLL_objects = io_COLLISIONS.o 
 #endif
 objs = io_BS.o io_DIPOLES.o variables_BS.o io_Overlaps.o $(COLL_objects)
diff --git a/src/io_serial/io_BS.F b/src/io_serial/io_BS.F
index 0f873eaa17..16738e164b 100644
--- a/src/io_serial/io_BS.F
+++ b/src/io_serial/io_BS.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 ! Analogously to io_DIPOLES, we write the io files of BS with the
 ! information of the q-vector
 ! Just to use what is done:
@@ -25,8 +29,9 @@ integer function io_BS(iq,X,ID)
 &                        def_variable_bulk,io_variable_bulk,io_header,io_fragment
  use descriptors,   ONLY:IO_desc,IO_desc_reset,IO_desc_duplicate
  use timing_m,      ONLY:timing
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(X_t):: X 
  integer  :: iq,ID
diff --git a/src/io_serial/io_COLLISIONS.F b/src/io_serial/io_COLLISIONS.F
index 5e5e38c3e2..8af5f4802a 100644
--- a/src/io_serial/io_COLLISIONS.F
+++ b/src/io_serial/io_COLLISIONS.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM CA DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 integer function io_COLLISIONS(ID,COLL,COLL_element,i_coll_,COMM)
  !
  use pars,          ONLY:schlen,IP,SP,IP_YIO
@@ -22,7 +26,9 @@ integer function io_COLLISIONS(ID,COLL,COLL_element,i_coll_,COMM)
  use X_m,           ONLY:X_desc
  use drivers,       ONLY:l_elel_scatt,l_elphoton_scatt
  use timing_m,      ONLY:timing
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  integer, intent(inout)            ::ID
  type(COLLISIONS_group)            ::COLL
diff --git a/src/io_serial/io_DIPOLES.F b/src/io_serial/io_DIPOLES.F
index 1970496e45..eae2d9093d 100644
--- a/src/io_serial/io_DIPOLES.F
+++ b/src/io_serial/io_DIPOLES.F
@@ -124,11 +124,11 @@ integer function io_DIPOLES(Dip,ID)
    !
  endif
  !
- if (STRING_match(Dip%computed,"M_orb")) then
+ if (STRING_match(Dip%computed,"M_orbRT")) then
    !
    ! Orbital magnetization
    !
-   write (VAR_name,'(3(a,i4.4))') 'DIP_orbital_k_',ik,'_spin_',i_sp_pol
+   write (VAR_name,'(3(a,i4.4))') 'DIP_orbital_RT_k_',ik,'_spin_',i_sp_pol
    call io_bulk(ID_frag,VAR=trim(VAR_name),VAR_SZ=DIP_size)
    call io_bulk(ID_frag,C3=DIP_orbital(:,:,:,ik_mem,i_sp_pol,1),IPOS=DIP_ipos)
    !
@@ -167,16 +167,16 @@ integer function io_DIPOLES(Dip,ID)
    !
  endif
  !
- if (STRING_match(Dip%computed,"M_CD_orb")) then
+ if (STRING_match(Dip%computed,"M_orbCD")) then
    !
    ! Orbital magnetization
    !
-   write (VAR_name,'(3(a,i4.4))') 'DIP_orbital_k_',ik,'_spin_',i_sp_pol
+   write (VAR_name,'(3(a,i4.4))') 'DIP_orbital_CD_k_',ik,'_spin_',i_sp_pol
    call io_bulk(ID_frag,VAR=trim(VAR_name),VAR_SZ=DIP_size)
    call io_bulk(ID_frag,C3=DIP_orbital(:,:,:,ik_mem,i_sp_pol,1),IPOS=DIP_ipos)
    !
  endif
- ! 
+ !
  call io_fragment_disconnect(ID,ID_frag)
  !
 1 call io_disconnect(ID=ID)
diff --git a/src/io_serial/variables_BS.F b/src/io_serial/variables_BS.F
index 603dc1c0ed..2c17937c73 100644
--- a/src/io_serial/variables_BS.F
+++ b/src/io_serial/variables_BS.F
@@ -12,10 +12,10 @@ integer function variables_BS(ID,iq,desc,CLOSE_the_menu,X)
  use drivers,       ONLY:l_td_hf
  use X_m,           ONLY:X_t
  use R_lattice,     ONLY:RIM_ng,RIM_n_rand_pts,cut_geometry
- use BS,            ONLY:BS_identifier,BS_K_dim,BS_bands,&
+ use BS,            ONLY:BS_identifier,BS_K_dim,BS_bands,BSE_L_kind,&
 &                        BS_K_coupling,BS_res_K_exchange,BS_res_K_corr,&
 &                        BS_n_g_exch,BS_n_g_W,BS_eh_en,BS_eh_win,BS_W_is_diagonal,&
-&                        BS_K_is_ALDA,BS_cpl_K_exchange,BSE_L_kind,&
+&                        BS_K_is_ALDA,BS_cpl_K_exchange,&
 &                        BS_cpl_K_corr,BSqpts,BS_perturbative_SOC
  use IO_m,          ONLY:io_status,variable_exist,write_is_on
  use IO_int,        ONLY:ver_is_gt_or_eq,def_variable_elemental,io_variable_elemental
diff --git a/src/linear_algebra/.objects b/src/linear_algebra/.objects
index a3c9bfe4bc..ce3436cd7f 100644
--- a/src/linear_algebra/.objects
+++ b/src/linear_algebra/.objects
@@ -9,5 +9,5 @@ SLEPC_objs = MATRIX_slepc.o
 #if defined _GPU && defined _PGI
 GPU_objs = SERIAL_lin_system_gpu.o
 #endif
-objs = LINEAR_ALGEBRA_driver.o SERIAL_SVD_inversion.o SERIAL_inversion.o SERIAL_diagonalization.o \
+objs = LINEAR_ALGEBRA_driver.o SERIAL_SVD.o SERIAL_inversion.o SERIAL_diagonalization.o \
        SERIAL_HERMITIAN_diagonalization.o SERIAL_lin_system.o $(SLK_objs) $(SLEPC_objs) $(GPU_objs)
diff --git a/src/linear_algebra/DOUBLE_project.dep b/src/linear_algebra/DOUBLE_project.dep
index 01c2bbd778..ac3bcff9d9 100644
--- a/src/linear_algebra/DOUBLE_project.dep
+++ b/src/linear_algebra/DOUBLE_project.dep
@@ -6,7 +6,7 @@
  PARALLEL_inversion.o
  PARALLEL_lin_system.o
  SERIAL_HERMITIAN_diagonalization.o
- SERIAL_SVD_inversion.o
+ SERIAL_SVD.o
  SERIAL_diagonalization.o
  SERIAL_inversion.o
  SERIAL_lin_system.o
diff --git a/src/linear_algebra/LINEAR_ALGEBRA_driver.F b/src/linear_algebra/LINEAR_ALGEBRA_driver.F
index 7f5153e3bd..84f0f5c329 100644
--- a/src/linear_algebra/LINEAR_ALGEBRA_driver.F
+++ b/src/linear_algebra/LINEAR_ALGEBRA_driver.F
@@ -5,6 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): AM AF
 !
+! headers
+!
+#include<y_memory.h>
+#include<dev_defs.h>
+!
 subroutine LINEAR_ALGEBRA_driver(idriver,lib_in,&
 &                                M,M_slk,&
 &                                M_bse,&
@@ -33,9 +38,9 @@ subroutine LINEAR_ALGEBRA_driver(idriver,lib_in,&
  use timing_m,      ONLY:timing
  use gpu_m,         ONLY:have_gpu
  use devxlib,       ONLY:devxlib_mapped
+ use y_memory_alloc
  !
-#include<y_memory.h>
-#include<dev_defs.h>
+ implicit none
  !
  integer                    :: idriver
  integer,          optional :: lib_in
@@ -79,7 +84,7 @@ subroutine LINEAR_ALGEBRA_driver(idriver,lib_in,&
  endif
 
  !
- ! Decide which to use among LK or SLK
+ ! Decide which LA lib to use
  !
  lib_in_use=USE_LK
 #if defined _SCALAPACK 
@@ -205,9 +210,9 @@ subroutine LINEAR_ALGEBRA_driver(idriver,lib_in,&
    !
    if (idriver==SVD_INV) then
      if (present(M_slk)) then
-       call SERIAL_SVD_inversion(N,M_slk%blc(:,:,M_slk%I),SVD_digits)
+       call SERIAL_SVD(N,M_slk%blc(:,:,M_slk%I),'inv',SVD_digits)
      else
-       call SERIAL_SVD_inversion(N,M,SVD_digits)
+       call SERIAL_SVD(N,M,'inv',SVD_digits)
      endif
    endif
    !
diff --git a/src/linear_algebra/MATRIX_slepc.F b/src/linear_algebra/MATRIX_slepc.F
index 335f884426..1b9fe53970 100644
--- a/src/linear_algebra/MATRIX_slepc.F
+++ b/src/linear_algebra/MATRIX_slepc.F
@@ -21,7 +21,7 @@ subroutine MATRIX_slepc(M_slepc,n_eig,V_right,V_left,E_real,E_cmpl)
  ! This routine can be called from within Matrix_driver.F but should be kept in
  ! a separate source file for clarity.
  !
- use pars,          ONLY : SP,cI
+ use pars,          ONLY : SP,cI,schlen
  use units,         ONLY : HA2EV
  use com,           ONLY : msg
  use stderr,        ONLY : intc
@@ -87,6 +87,7 @@ subroutine MATRIX_slepc(M_slepc,n_eig,V_right,V_left,E_real,E_cmpl)
  !
  logical           :: l_precondition
  character(len=30) :: rowfmt
+ character(schlen) :: BSS_slepc_precondition_out
  !
  !
  call MatGetSize(M_slepc,Nsize,Msize,ierr)
@@ -223,7 +224,7 @@ subroutine MATRIX_slepc(M_slepc,n_eig,V_right,V_left,E_real,E_cmpl)
    !if (l_precondition) call PCSetType(pc,pckind,ierr)
  endif
  !
- BSS_slepc_precondition=trim(stkind)//"+"//trim(kspkind)//"+"//trim(pckind)
+ BSS_slepc_precondition_out=trim(stkind)//"+"//trim(kspkind)//"+"//trim(pckind)
  !
  if (BSS_slepc_extraction == 'ritz')              extr = EPS_RITZ
  if (BSS_slepc_extraction == 'harmonic')          extr = EPS_HARMONIC         
@@ -269,6 +270,7 @@ subroutine MATRIX_slepc(M_slepc,n_eig,V_right,V_left,E_real,E_cmpl)
  !Set monitor
  call EPSMonitorSet(eps,MyEPSMonitor,0,PETSC_NULL_FUNCTION,ierr)
  !
+#if PETSC_VERSION_GE(3,22,0)
  if(present(V_left).and.BSS_slepc_pseudo_herm.and.epskind==EPSKRYLOVSCHUR) then
    ! Default Shao used in case input is mispelled or not set
    epsbsetype=EPS_KRYLOVSCHUR_BSE_SHAO
@@ -283,20 +285,25 @@ subroutine MATRIX_slepc(M_slepc,n_eig,V_right,V_left,E_real,E_cmpl)
  !
  ! Fix the BSS_slepc_approach string
  if (epskind==EPSKRYLOVSCHUR) BSS_slepc_approach="Krylov-Schur"
+#endif
  !
  if(present(V_left)) then
+#if PETSC_VERSION_GE(3,22,0)
    if(BSS_slepc_pseudo_herm.and.epskind==EPSKRYLOVSCHUR) then
      if(epsbsetype==EPS_KRYLOVSCHUR_BSE_SHAO)         BSS_slepc_approach=trim(BSS_slepc_approach)//"+Shao" 
      if(epsbsetype==EPS_KRYLOVSCHUR_BSE_GRUNING)      BSS_slepc_approach=trim(BSS_slepc_approach)//"+Gruning" 
      if(epsbsetype==EPS_KRYLOVSCHUR_BSE_PROJECTEDBSE) BSS_slepc_approach=trim(BSS_slepc_approach)//"+ProjectedBSE"
    else
+#endif
      BSS_slepc_approach=trim(BSS_slepc_approach)//"+NonHermitian"
+#if PETSC_VERSION_GE(3,22,0)
    endif
+#endif
  endif
  !
  call msg( 'sr', '[SLEPC] Approach                          ',BSS_slepc_approach)
  !
- call msg( 'sr', '[SLEPC] Precondition method               ',BSS_slepc_precondition)
+ call msg( 'sr', '[SLEPC] Precondition method               ',BSS_slepc_precondition_out)
  call msg( 'sr', '[SLEPC] Extraction method                 ',BSS_slepc_extraction)
  call msg( 'sr', '[SLEPC] Number of requested eigenvalues   ', n_eig ) 
  !
@@ -376,12 +383,21 @@ subroutine MATRIX_slepc(M_slepc,n_eig,V_right,V_left,E_real,E_cmpl)
      call VecScatterEnd(vs,xup,vout,INSERT_VALUES,SCATTER_FORWARD,ierr)
      call VecScatterEnd(vs2,xdown,vout2,INSERT_VALUES,SCATTER_FORWARD,ierr)
      !
+#if PETSC_VERSION_LT(3,23,0)
      call VecGetArrayReadF90(vout,xsr,ierr)
      V_right(1:Nsize/2, i_eig+1) = cmplx(xsr,kind=SP)
      call VecRestoreArrayReadF90(vout,xsr,ierr)
      call VecGetArrayReadF90(vout2,xsr,ierr)
      V_right(Nsize/2+1:Nsize, i_eig+1) = cmplx(xsr,kind=SP)
      call VecRestoreArrayReadF90(vout2,xsr,ierr)
+#else
+     call VecGetArrayRead(vout,xsr,ierr)
+     V_right(1:Nsize/2, i_eig+1) = cmplx(xsr,kind=SP)
+     call VecRestoreArrayRead(vout,xsr,ierr)
+     call VecGetArrayRead(vout2,xsr,ierr)
+     V_right(Nsize/2+1:Nsize, i_eig+1) = cmplx(xsr,kind=SP)
+     call VecRestoreArrayRead(vout2,xsr,ierr)
+#endif
      !
      call VecRestoreSubVector(xr,is(1),xup,ierr)
      call VecRestoreSubVector(xr,is(2),xdown,ierr)
@@ -396,12 +412,21 @@ subroutine MATRIX_slepc(M_slepc,n_eig,V_right,V_left,E_real,E_cmpl)
        call VecScatterEnd(vs,xup,vout,INSERT_VALUES,SCATTER_FORWARD,ierr)
        call VecScatterEnd(vs2,xdown,vout2,INSERT_VALUES,SCATTER_FORWARD,ierr)
        !
+#if PETSC_VERSION_LT(3,23,0)
        call VecGetArrayReadF90(vout,xsr_left,ierr)
        V_left(1:Nsize/2, i_eig+1) = cmplx(xsr_left,kind=SP)
        call VecRestoreArrayReadF90(vout,xsr_left,ierr)
        call VecGetArrayReadF90(vout2,xsr_left,ierr)
        V_left(Nsize/2+1:Nsize, i_eig+1) = cmplx(xsr_left,kind=SP)
        call VecRestoreArrayReadF90(vout2,xsr_left,ierr)
+#else
+       call VecGetArrayRead(vout,xsr_left,ierr)
+       V_left(1:Nsize/2, i_eig+1) = cmplx(xsr_left,kind=SP)
+       call VecRestoreArrayRead(vout,xsr_left,ierr)
+       call VecGetArrayRead(vout2,xsr_left,ierr)
+       V_left(Nsize/2+1:Nsize, i_eig+1) = cmplx(xsr_left,kind=SP)
+       call VecRestoreArrayRead(vout2,xsr_left,ierr)
+#endif
        !
        call VecRestoreSubVector(xr_left,is(1),xup,ierr)
        call VecRestoreSubVector(xr_left,is(2),xdown,ierr)
@@ -412,10 +437,17 @@ subroutine MATRIX_slepc(M_slepc,n_eig,V_right,V_left,E_real,E_cmpl)
      call VecScatterBegin(vs,xr,vout,INSERT_VALUES,SCATTER_FORWARD,ierr)
      call VecScatterEnd(vs,xr,vout,INSERT_VALUES,SCATTER_FORWARD,ierr)
      !
+#if PETSC_VERSION_LT(3,23,0)
      call VecGetArrayReadF90(vout,xsr,ierr)
      V_right(:, i_eig+1) = cmplx(xsr,kind=SP)
      !if (BSS_slepc_double_grp) V_right(BS_K_dim(1)+1:,i_eig+1)=cI*V_right(BS_K_dim(1)+1:,i_eig+1)
      call VecRestoreArrayReadF90(vout,xsr,ierr)
+#else
+     call VecGetArrayRead(vout,xsr,ierr)
+     V_right(:, i_eig+1) = cmplx(xsr,kind=SP)
+     !if (BSS_slepc_double_grp) V_right(BS_K_dim(1)+1:,i_eig+1)=cI*V_right(BS_K_dim(1)+1:,i_eig+1)
+     call VecRestoreArrayRead(vout,xsr,ierr)
+#endif
      !
      if (present(V_left)) then
        !save the left eigenvectors
@@ -423,10 +455,17 @@ subroutine MATRIX_slepc(M_slepc,n_eig,V_right,V_left,E_real,E_cmpl)
        call VecScatterBegin(vs,xr_left,vout,INSERT_VALUES,SCATTER_FORWARD,ierr)
        call VecScatterEnd(vs,xr_left,vout,INSERT_VALUES,SCATTER_FORWARD,ierr)
        !
+#if PETSC_VERSION_LT(3,23,0)
        call VecGetArrayReadF90(vout,xsr_left,ierr)
        V_left(:, i_eig+1) = cmplx(xsr_left,kind=SP)
        !if (BSS_slepc_double_grp) V_left(BS_K_dim(1)+1:,i_eig+1) =cI*V_left(BS_K_dim(1)+1:,i_eig+1)
        call VecRestoreArrayReadF90(vout,xsr_left,ierr)
+#else
+       call VecGetArrayRead(vout,xsr_left,ierr)
+       V_left(:, i_eig+1) = cmplx(xsr_left,kind=SP)
+       !if (BSS_slepc_double_grp) V_left(BS_K_dim(1)+1:,i_eig+1) =cI*V_left(BS_K_dim(1)+1:,i_eig+1)
+       call VecRestoreArrayRead(vout,xsr_left,ierr)
+#endif
      endif
    endif
    !
diff --git a/src/linear_algebra/PARALLEL_HERMITIAN_diagonalization.F b/src/linear_algebra/PARALLEL_HERMITIAN_diagonalization.F
index 1e9ec9f0c7..766d58240a 100644
--- a/src/linear_algebra/PARALLEL_HERMITIAN_diagonalization.F
+++ b/src/linear_algebra/PARALLEL_HERMITIAN_diagonalization.F
@@ -6,6 +6,10 @@
 !
 ! Authors (see AUTHORS file for details): AM AF
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine PARALLEL_HERMITIAN_diagonalization(M, V, N, E)
  !
  ! perform the diagonalization by using scalapack
@@ -16,7 +20,9 @@ subroutine PARALLEL_HERMITIAN_diagonalization(M, V, N, E)
  use linear_algebra, ONLY:LINEAR_ALGEBRA_error,LINEAR_ALGEBRA_WS_reset,LALGEBRA_WS
  use matrix,         ONLY:PAR_matrix
  use timing_m,       ONLY:timing
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  integer           :: N
  type(PAR_matrix)  :: M,V
diff --git a/src/linear_algebra/PARALLEL_inversion.F b/src/linear_algebra/PARALLEL_inversion.F
index 3ff718df6d..a658157747 100644
--- a/src/linear_algebra/PARALLEL_inversion.F
+++ b/src/linear_algebra/PARALLEL_inversion.F
@@ -5,16 +5,9 @@
 !
 ! Authors (see AUTHORS file for details): AM AF
 !
-! Copyright (C) 2004 WanT Group
+! headers
 !
-! This file is distributed under the terms of the
-! GNU General Public License. See the file `License'
-! in the root directory of the present distribution,
-! License-Identifier: GPL
-!
-! Copyright (C) 2016 The Yambo Team
-!
-! Authors (see AUTHORS file for details): AM AF
+#include<y_memory.h>
 !
 subroutine PARALLEL_inversion(M,N)
  !
@@ -25,7 +18,9 @@ subroutine PARALLEL_inversion(M,N)
 &                         LINEAR_ALGEBRA_WS_reset
  use matrix,         ONLY:PAR_matrix
  use timing_m,       ONLY:timing
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  integer           :: N
  type(PAR_matrix)  :: M
diff --git a/src/linear_algebra/PARALLEL_lin_system.F b/src/linear_algebra/PARALLEL_lin_system.F
index 6ec0edd7bb..550eabd329 100644
--- a/src/linear_algebra/PARALLEL_lin_system.F
+++ b/src/linear_algebra/PARALLEL_lin_system.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AF AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine PARALLEL_lin_system(A,B,n,nrhs)
  !
  !  Solve the linear system
@@ -18,7 +22,9 @@ subroutine PARALLEL_lin_system(A,B,n,nrhs)
 &                         LINEAR_ALGEBRA_WS_reset
  use matrix,         ONLY:PAR_matrix
  use timing_m,       ONLY:timing
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  integer           :: n,nrhs
  type(PAR_matrix)  :: A, B
diff --git a/src/linear_algebra/SC_project.dep b/src/linear_algebra/SC_project.dep
new file mode 100644
index 0000000000..5310098e74
--- /dev/null
+++ b/src/linear_algebra/SC_project.dep
@@ -0,0 +1,2 @@
+  LINEAR_ALGEBRA_driver.o
+
diff --git a/src/linear_algebra/SERIAL_HERMITIAN_diagonalization.F b/src/linear_algebra/SERIAL_HERMITIAN_diagonalization.F
index 44f78ad485..799d6b60be 100644
--- a/src/linear_algebra/SERIAL_HERMITIAN_diagonalization.F
+++ b/src/linear_algebra/SERIAL_HERMITIAN_diagonalization.F
@@ -5,38 +5,83 @@
 !
 ! Authors (see AUTHORS file for details): AM AF
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine SERIAL_HERMITIAN_diagonalization(n,M,E)
  !
- use pars,           ONLY:SP
- use drivers,        ONLY:l_nl_optics,l_real_time
- use linear_algebra, ONLY:LINEAR_ALGEBRA_error,&
-&                         LINEAR_ALGEBRA_WS_reset,LALGEBRA_WS
- use timing_m,       ONLY:timing
-#include<y_memory.h>
+ use pars,           ONLY: SP
+ use drivers,        ONLY: l_nl_optics,l_real_time
+ use linear_algebra, ONLY: LINEAR_ALGEBRA_error,LINEAR_ALGEBRA_WS_reset,LALGEBRA_WS,&
+&                          magma_init_done,magma_setup,la_xheev
+#ifdef _MAGMA
+ use magma,          ONLY: magmaf_cheevd_m, magmaf_zheevd_m
+#endif
+ use timing_m,       ONLY: timing
+ use com,            ONLY: msg
+#ifdef _OPENMP
+ use omp_lib
+#endif
+ use y_memory_alloc
+ !
+ implicit none
  !
  integer     :: n
  complex(SP) :: M(n,n)
  real(SP)    :: E(n)
  !
  character(64)     :: subname="SERIAL_HERMITIAN_diagonalization"
- integer           :: lwork
+ integer           :: lwork, lrwork, liwork
  type(LALGEBRA_WS) :: WS
  !
+ integer     :: nthreads_save
+
+#if defined _DOUBLE
+#  define magmaf_xheevd_m magmaf_zheevd_m
+#else
+#  define magmaf_xheevd_m magmaf_cheevd_m
+#endif
+
+ if(.not.(l_nl_optics.or.l_real_time)) call timing('SER_HERM_diago',OPR='start')
+ !
+ ! Thread safety
+#if defined _OPENMP
+ nthreads_save=omp_get_max_threads()
+ call omp_set_num_threads(1)
+#endif
+
+ !
+ ! Workspace
+ !
  lwork=-1
  allocate(WS%v_cmplx(1))
  !
- if(.not.(l_nl_optics.or.l_real_time)) then
-   call timing('SER_HERM_diago',OPR='start')
-   YAMBO_ALLOC(WS%v_real,(max(1,3*n-2)))
- else
-   allocate(WS%v_real(max(1,3*n-2)))
- endif
+ lrwork=max(1,3*n-2)
+#if defined _MAGMA
+ lrwork=1
+#endif
  !
-#if defined _DOUBLE
- call ZHEEV('V','U',n,M,size(M,1),E,WS%v_cmplx,lwork,WS%v_real,WS%i_fail)
+ allocate(WS%v_real(lrwork))
+ !
+#if defined _MAGMA
+ !
+ lrwork=-1
+ liwork=-1
+ allocate(WS%v_int(1))
+ !
+ if (.not.magma_init_done) then
+   call msg('sr','Initializing magma library in geev')
+   call magma_setup()
+ endif
+ !   
+ call magmaf_xheevd_m(1,'v','u', n,M,size(M,1),E,WS%v_cmplx,lwork,WS%v_real,&
+ &                       lrwork,WS%v_int,liwork,WS%i_fail)
 #else
- call CHEEV('V','U',n,M,size(M,1),E,WS%v_cmplx,lwork,WS%v_real,WS%i_fail)
+ call la_xheev('V','U',n,M,size(M,1),E,WS%v_cmplx,lwork,WS%v_real,WS%i_fail)
 #endif
+ !
+ if(WS%i_fail /=0 ) call LINEAR_ALGEBRA_error(' _XHEEV_ ','WS%v_cmplx size query failed')
  !
  lwork=nint(real(WS%v_cmplx(1)))
  deallocate(WS%v_cmplx)
@@ -47,18 +92,43 @@ subroutine SERIAL_HERMITIAN_diagonalization(n,M,E)
    allocate(WS%v_cmplx(lwork))
  endif
  !
-#if defined _DOUBLE
- call ZHEEV('V','U',n,M,size(M,1),E,WS%v_cmplx,lwork,WS%v_real,WS%i_fail)
- if(WS%i_fail.ne.0) call LINEAR_ALGEBRA_error(subname,'performing ZHEEV')
+#if defined _MAGMA
+ !
+ lrwork=nint(WS%v_real(1))
+ deallocate(WS%v_real)
+ liwork=WS%v_int(1)
+ deallocate(WS%v_int)
+ !
+ if(.not.(l_nl_optics.or.l_real_time)) then
+   YAMBO_ALLOC(WS%v_real,(lrwork))
+   YAMBO_ALLOC(WS%v_int,(liwork))
+ else
+   allocate(WS%v_real(lrwork))
+   allocate(WS%v_int(liwork))
+ endif
+ !   
+ ! use magma (if available)
+ !   
+ ! 
+ call magmaf_xheevd_m(1,'v','u',n, M,size(M,1),E,WS%v_cmplx,lwork,WS%v_real,&
+&                     lrwork,WS%v_int,liwork,WS%i_fail)
+ !
 #else
- call CHEEV('V','U',n,M,size(M,1),E,WS%v_cmplx,lwork,WS%v_real,WS%i_fail)
- if(WS%i_fail.ne.0) call LINEAR_ALGEBRA_error(subname,'performing CHEEV')
+ !
+ call la_xheev('V','U',n,M,size(M,1),E,WS%v_cmplx,lwork,WS%v_real,WS%i_fail)
+ !
 #endif
+ !
+ if(WS%i_fail /= 0) call LINEAR_ALGEBRA_error(' XHEEV ','failed')
  !
  call LINEAR_ALGEBRA_WS_reset(WS)
  !
- if(.not.(l_nl_optics.or.l_real_time)) then
-   call timing('SER_HERM_diago',OPR='stop')
- endif
+ ! Thread safety
+#if defined _OPENMP
+ call omp_set_num_threads(nthreads_save)
+#endif
+ !
+ if(.not.(l_nl_optics.or.l_real_time)) call timing('SER_HERM_diago',OPR='stop')
  !
 end subroutine
+
diff --git a/src/linear_algebra/SERIAL_SVD.F b/src/linear_algebra/SERIAL_SVD.F
new file mode 100644
index 0000000000..6a5953c609
--- /dev/null
+++ b/src/linear_algebra/SERIAL_SVD.F
@@ -0,0 +1,84 @@
+!
+! License-Identifier: GPL
+!
+! Copyright (C) 2016 The Yambo Team
+!
+! Authors (see AUTHORS file for details): AM
+!
+! headers
+!
+#include<y_memory.h>
+!
+subroutine SERIAL_SVD(n,M,mode,SVD_digits)
+ !
+ use pars,           ONLY:SP,cZERO,cONE
+ use wrapper,        ONLY:M_by_M
+ use linear_algebra, ONLY:LINEAR_ALGEBRA_error,la_xgesvd,&
+&                         LINEAR_ALGEBRA_WS_reset,LALGEBRA_WS
+ use timing_m,       ONLY:timing
+ use y_memory_alloc
+ !
+ implicit none
+ !
+ integer,     intent(in)    :: n
+ complex(SP), intent(inout) :: M(n,n)
+ character(3),intent(in)    :: mode
+ integer,     intent(in)    :: SVD_digits
+ !
+ !ws
+ integer           :: i_loop 
+ real(SP)          :: SV_min
+ type(LALGEBRA_WS) :: WS
+ !
+ call timing('SERIAL_SVD',OPR='start')
+ !
+ YAMBO_ALLOC(WS%v_real,(5*n))
+ YAMBO_ALLOC(WS%vp_real,(n))
+ YAMBO_ALLOC(WS%m2_cmplx,(n,n))
+ YAMBO_ALLOC(WS%m1_cmplx,(n,n))
+ allocate(WS%v_cmplx(1))
+ !
+ call la_xgesvd('S','A',n,n,M,n,WS%vp_real,WS%m2_cmplx,n,WS%m1_cmplx,n,&
+&               WS%v_cmplx,-1,WS%v_real,WS%i_fail)
+ !
+ if(WS%i_fail/=0) call LINEAR_ALGEBRA_error('GESVD (SVD)','WS%v_cmplxspace failed')
+ !
+ WS%dim=int(real(WS%v_cmplx(1)))
+ deallocate(WS%v_cmplx)
+ YAMBO_ALLOC(WS%v_cmplx,(WS%dim))
+ !
+ call la_xgesvd('S','A',n,n,M,n,WS%vp_real,WS%m2_cmplx,n,WS%m1_cmplx,n,&
+&               WS%v_cmplx,WS%dim,WS%v_real,WS%i_fail)
+ !
+ if(WS%i_fail/=0) call LINEAR_ALGEBRA_error('GESVD (SVD)','failed')
+ !
+ if(index(mode,'inv')/=0) then   ! Matrix inversion
+   !      
+   SV_min=10._SP**(-SVD_digits)
+   !
+   do i_loop=1,n
+     if (abs(WS%vp_real(i_loop))<SV_min) then
+       WS%m1_cmplx(i_loop,:)=cZERO
+     else
+       WS%m1_cmplx(i_loop,:)=WS%m1_cmplx(i_loop,:)/WS%vp_real(i_loop)
+     endif
+   enddo
+   !
+   call M_by_M('c','c',n,WS%m1_cmplx,WS%m2_cmplx,M)
+   !
+ elseif(index(mode,'uni')/=0)  then  
+   !      
+   ! Transform in a unitary matrix
+   ! U= M1*I*M2
+   !
+   call M_by_M('n','n',n,WS%m2_cmplx,WS%m1_cmplx,M)
+   !
+ else
+   call error('Error SVD mode ')
+ endif
+ !
+ call LINEAR_ALGEBRA_WS_reset(WS)
+ !
+ call timing('SERIAL_SVD',OPR='stop')
+ !
+end subroutine
diff --git a/src/linear_algebra/SERIAL_SVD_inversion.F b/src/linear_algebra/SERIAL_SVD_inversion.F
deleted file mode 100644
index 1e0ab201fe..0000000000
--- a/src/linear_algebra/SERIAL_SVD_inversion.F
+++ /dev/null
@@ -1,61 +0,0 @@
-!
-! License-Identifier: GPL
-!
-! Copyright (C) 2016 The Yambo Team
-!
-! Authors (see AUTHORS file for details): AM
-!
-subroutine SERIAL_SVD_inversion(n,M,SVD_digits)
- !
- use pars,           ONLY:SP,cZERO
- use wrapper,        ONLY:M_by_M
- use linear_algebra, ONLY:LINEAR_ALGEBRA_error,SV_decomposition,&
-&                         LINEAR_ALGEBRA_WS_reset,LALGEBRA_WS
- use timing_m,       ONLY:timing
-#include<y_memory.h>
- !
- integer     :: n
- complex(SP) :: M(n,n)
- integer     :: SVD_digits 
- !
- !ws
- integer           :: i_loop 
- real(SP)          :: SV_min
- type(LALGEBRA_WS) :: WS
- !
- call timing('SERIAL_SVD_inversion',OPR='start')
- !
- YAMBO_ALLOC(WS%v_real,(5*n))
- YAMBO_ALLOC(WS%vp_real,(n))
- YAMBO_ALLOC(WS%m2_cmplx,(n,n))
- YAMBO_ALLOC(WS%m1_cmplx,(n,n))
- allocate(WS%v_cmplx(1))
- !
- call SV_decomposition(n,M,WS%vp_real,WS%m2_cmplx,WS%m1_cmplx,WS%v_cmplx,-1,WS%v_real,WS%i_fail)
- !
- if(WS%i_fail/=0) call LINEAR_ALGEBRA_error('GESVD (SVD)','WS%v_cmplxspace failed')
- !
- WS%dim=int(real(WS%v_cmplx(1)))
- deallocate(WS%v_cmplx)
- YAMBO_ALLOC(WS%v_cmplx,(WS%dim))
- call SV_decomposition(n,M,WS%vp_real,WS%m2_cmplx,WS%m1_cmplx,WS%v_cmplx,WS%dim,WS%v_real,WS%i_fail)
- !
- if(WS%i_fail/=0) call LINEAR_ALGEBRA_error('GESVD (SVD)','failed')
- !
- SV_min=10._SP**(-SVD_digits)
- !
- do i_loop=1,n
-   if (abs(WS%vp_real(i_loop))<SV_min) then
-     WS%m1_cmplx(i_loop,:)=cZERO
-   else
-     WS%m1_cmplx(i_loop,:)=WS%m1_cmplx(i_loop,:)/WS%vp_real(i_loop)
-   endif
- enddo
- !
- call M_by_M('c','c',n,WS%m1_cmplx,WS%m2_cmplx,M)
- !
- call LINEAR_ALGEBRA_WS_reset(WS)
- !
- call timing('SERIAL_SVD_inversion',OPR='stop')
- !
-end subroutine
diff --git a/src/linear_algebra/SERIAL_diagonalization.F b/src/linear_algebra/SERIAL_diagonalization.F
index e03eada92f..dee7ae9342 100644
--- a/src/linear_algebra/SERIAL_diagonalization.F
+++ b/src/linear_algebra/SERIAL_diagonalization.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine SERIAL_diagonalization(n,M,E,V_left,V_right)
  !
  !  The right eigenvector V_right(j) of M satisfies
@@ -15,32 +19,96 @@ subroutine SERIAL_diagonalization(n,M,E,V_left,V_right)
  !                V_left(j)**H * M = E(j) * V_left(j)**H
  !  where V_left(j)**H denotes the conjugate transpose of V_left(j).
  !
- use pars,          ONLY:SP
- use linear_algebra,ONLY:LINEAR_ALGEBRA_error,M_eigenvalues,LINEAR_ALGEBRA_WS_reset,LALGEBRA_WS
- use timing_m,      ONLY:timing
-#include<y_memory.h>
+ use pars,            ONLY: SP
+ use linear_algebra,  ONLY: LINEAR_ALGEBRA_error,LINEAR_ALGEBRA_WS_reset,LALGEBRA_WS,&
+&                           magma_init_done,magma_setup,la_xgeev
+#ifdef _MAGMA
+ use magma,           ONLY: magmaf_zgeev_m,magmaf_cgeev_m
+#endif
+ use timing_m,        ONLY: timing
+ use com,             ONLY: msg
+#ifdef _OPENMP
+ use omp_lib
+#endif
+ use y_memory_alloc
+ !
+ implicit none
  !
  integer           :: n
  complex(SP)       :: M(n,n),E(n),V_left(n,n),V_right(n,n)
  type(LALGEBRA_WS) :: WS
  !
+ integer :: nthreads_save
+
+#if defined _DOUBLE
+#  define magmaf_xgeev_m magmaf_zgeev_m
+#else
+#  define magmaf_xgeev_m magmaf_cgeev_m
+#endif
+
+ call timing('SERIAL_diagonalization',OPR='start')
+ !
+ ! Thread safety
+#if defined _OPENMP
+ nthreads_save=omp_get_max_threads()
+ call omp_set_num_threads(1)
+#endif
+
+ !
+ ! Workspace
+ !
  allocate(WS%v_cmplx(1))
  YAMBO_ALLOC(WS%v_real,(2*n))
- call M_eigenvalues(n,M,E,V_left,V_right,WS%v_cmplx,-1,WS%v_real,WS%i_fail)
  !
- call timing('SERIAL_diagonalization',OPR='start')
+#if defined _MAGMA
+   !
+   if (.not.magma_init_done) then
+     call msg('sr','Initializing magma library in geev')
+     call magma_setup()
+   endif
+   !   
+   call magmaf_xgeev_m('V','V', n,M,n,E,V_left,n,V_right,n,&
+&                      WS%v_cmplx,-1,WS%v_real,WS%i_fail)
+#else
+   call la_xgeev('V','V',n,M,n,E,V_left,n,V_right,n,WS%v_cmplx,-1,WS%v_real,WS%i_fail)
+#endif
  !
- if(WS%i_fail.ne.0) call LINEAR_ALGEBRA_error('M_eigenvalues (SD)','WS%v_cmplxspace failed')
+ if(WS%i_fail /=0 ) call LINEAR_ALGEBRA_error(' _XGEEV_ ','WS%v_cmplx size query failed')
  !
  WS%dim=int(real(WS%v_cmplx(1)))
  deallocate(WS%v_cmplx)
  YAMBO_ALLOC(WS%v_cmplx,(WS%dim))
- call M_eigenvalues(n,M,E,V_left,V_right,WS%v_cmplx,WS%dim,WS%v_real,WS%i_fail)
+
+ !   
+ ! use magma (if available)
+ !   
+#if defined _MAGMA
+ !   
+ call msg('sr','SERIAL diagonalization with magma')
  !
- if(WS%i_fail.ne.0) call LINEAR_ALGEBRA_error('M_eigenvalues (SD)','failed')
+ call magmaf_xgeev_m('V','V', n,M,n,E,V_left,n,V_right,n,&
+&                    WS%v_cmplx,WS%dim,WS%v_real,WS%i_fail)
+ !   
+#else
+ !   
+ ! use lapack (otherwise)
+ !   
+ call msg('sr','SERIAL diagonalization with lapack')
+ !
+ call la_xgeev('V','V',n,M,n,E,V_left,n,V_right,n,WS%v_cmplx,WS%dim,WS%v_real,WS%i_fail)
+ !   
+#endif
+ !
+ if(WS%i_fail /= 0) call LINEAR_ALGEBRA_error(' _XGEEV_ ','failed')
  !
  call LINEAR_ALGEBRA_WS_reset(WS)
  !
+ ! Thread safety
+#if defined _OPENMP
+ call omp_set_num_threads(nthreads_save)
+#endif
+ !   
+ !
  call timing('SERIAL_diagonalization',OPR='stop')
  !
 end subroutine
diff --git a/src/linear_algebra/SERIAL_inversion.F b/src/linear_algebra/SERIAL_inversion.F
index 79c489e57b..44fa4598c3 100644
--- a/src/linear_algebra/SERIAL_inversion.F
+++ b/src/linear_algebra/SERIAL_inversion.F
@@ -5,14 +5,20 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine SERIAL_inversion(n,M,det,eval_det)
  !
  use pars,           ONLY: SP,cONE
  use drivers,        ONLY: l_nl_optics,l_real_time
- use linear_algebra, ONLY: LINEAR_ALGEBRA_error,LU_factorization,LU_inversion,&
+ use linear_algebra, ONLY: LINEAR_ALGEBRA_error,la_xgetrf,la_xgetri,&
 &                          LINEAR_ALGEBRA_WS_reset,LALGEBRA_WS
  use timing_m,       ONLY: timing
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  integer           :: n
  complex(SP)       :: M(n,n),det
@@ -29,7 +35,7 @@ subroutine SERIAL_inversion(n,M,det,eval_det)
    allocate(WS%v_int(n))
  endif
  !
- call LU_factorization(n,M,WS%v_int,WS%i_fail)
+ call la_xgetrf(n,n,M,n,WS%v_int,WS%i_fail)
  !
  if(WS%i_fail/=0) call LINEAR_ALGEBRA_error('GETRF (SI)','WS%v_cmplxspace failed')
  !
@@ -49,7 +55,9 @@ subroutine SERIAL_inversion(n,M,det,eval_det)
  else
    allocate(WS%v_cmplx(1))
  endif
- call LU_inversion(n,M,WS%v_int,WS%v_cmplx,-1,WS%i_fail)
+ !
+ call la_xgetri(n,M,n,WS%v_int,WS%v_cmplx,-1,WS%i_fail)
+ !
  WS%dim=int(real(WS%v_cmplx(1)))
  if(.not.(l_nl_optics.or.l_real_time)) then
    YAMBO_FREE(WS%v_cmplx)
@@ -58,7 +66,8 @@ subroutine SERIAL_inversion(n,M,det,eval_det)
    deallocate(WS%v_cmplx)
    allocate(WS%v_cmplx(WS%dim))
  endif
- call LU_inversion(n,M,WS%v_int,WS%v_cmplx,WS%dim,WS%i_fail)
+ !
+ call la_xgetri(n,M,n,WS%v_int,WS%v_cmplx,WS%dim,WS%i_fail)
  !
  if(WS%i_fail/=0) call LINEAR_ALGEBRA_error('GETRI (SI)','failed')
  !
diff --git a/src/linear_algebra/SERIAL_lin_system.F b/src/linear_algebra/SERIAL_lin_system.F
index 02fa8d938e..6dc54da9ca 100644
--- a/src/linear_algebra/SERIAL_lin_system.F
+++ b/src/linear_algebra/SERIAL_lin_system.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AF
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine SERIAL_lin_system(n,nrhs,A,B)
  !
  !  Solve the linear system
@@ -15,8 +19,9 @@ subroutine SERIAL_lin_system(n,nrhs,A,B)
  use drivers,        ONLY:l_nl_optics,l_real_time
  use linear_algebra, ONLY:LINEAR_ALGEBRA_error,LINEAR_ALGEBRA_WS_reset,LALGEBRA_WS
  use timing_m,       ONLY:timing
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer     :: n,nrhs
  complex(SP) :: A(n,n),B(n,nrhs)
diff --git a/src/linear_algebra/SERIAL_lin_system_gpu.F b/src/linear_algebra/SERIAL_lin_system_gpu.F
index 057e76bcfc..f2b7f83cae 100644
--- a/src/linear_algebra/SERIAL_lin_system_gpu.F
+++ b/src/linear_algebra/SERIAL_lin_system_gpu.F
@@ -4,6 +4,11 @@
 ! Copyright (C) 2018 the Yambo Team
 !
 ! Authors (see AUTHORS file for details): AF
+!
+! headers
+!
+#include<dev_defs.h>
+#include<y_memory.h>
 ! 
 subroutine SERIAL_lin_system_gpu(n,nrhs,A_d,B_d)
  !
@@ -19,9 +24,9 @@ subroutine SERIAL_lin_system_gpu(n,nrhs,A_d,B_d)
 #if defined _CUDA
  use cublas,         ONLY:CUBLAS_OP_N
 #endif
+ use y_memory_alloc
  !
-#include<dev_defs.h>
-#include<y_memory.h>
+ implicit none
  !
  integer     :: n,nrhs
  complex(SP) DEV_ATTR :: A_d(n,n),B_d(n,nrhs)
diff --git a/src/linear_algebra/SLK_test.F b/src/linear_algebra/SLK_test.F
index 4a94c556f3..49bf0b0236 100644
--- a/src/linear_algebra/SLK_test.F
+++ b/src/linear_algebra/SLK_test.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM AF
 !
+! headers
+!
+#include<y_memory.h>
+!
 ! Copyright (C) 2004 WanT Group
 !
 ! This file is distributed under the terms of the
@@ -16,6 +20,10 @@
 !
 ! Authors (see AUTHORS file for details): AM AF
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine SLK_test(E,k,q) 
  ! 
  ! simple program to check parallel linear algebra
@@ -28,8 +36,9 @@ subroutine SLK_test(E,k,q)
  use parallel_int,   ONLY:PARALLEL_global_indexes
  use linear_algebra, ONLY:INV,DIAGO,LIN_SYS,MAT_MUL,USE_LK,USE_SLK
  use matrix_operate, ONLY:hermitian
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)   ::E
  type(bz_samp)  ::k,q
@@ -191,7 +200,9 @@ subroutine inverse_check( ndim, Amat, Ainv )
  use com,        ONLY:msg
  use parallel_m, ONLY:mpi_comm_world
  use wrapper,    ONLY:M_by_M
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  integer            :: ndim
  complex(SP) :: Amat(ndim,ndim)
diff --git a/src/matrices/MATRIX_duplicate.F b/src/matrices/MATRIX_duplicate.F
index 43e708abcc..7ff9cddb80 100644
--- a/src/matrices/MATRIX_duplicate.F
+++ b/src/matrices/MATRIX_duplicate.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine MATRIX_duplicate( operation, M_in, INDEX_in, M_out )
  !
  ! operation can be "none/free/+1/-1/-C/+T/+TC"
@@ -12,7 +16,9 @@ subroutine MATRIX_duplicate( operation, M_in, INDEX_in, M_out )
  use parallel_m,     ONLY:COMM_copy
  use pars,           ONLY:cZERO,SP
  use matrix,         ONLY:PAR_matrix,MATRIX_reset
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  character(*)               :: operation
  type(PAR_matrix), optional :: M_in
diff --git a/src/matrices/MATRIX_init.F b/src/matrices/MATRIX_init.F
index 7847633e56..c240856f89 100644
--- a/src/matrices/MATRIX_init.F
+++ b/src/matrices/MATRIX_init.F
@@ -5,6 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include <dev_defs.h>
+#include <y_memory.h>
+!
 subroutine MATRIX_init( what, M_par, N, Nb)
  !
  ! I initialize a general parallel matrix here. Note the meaning of the two COMMS: INTRA and INTER.
@@ -21,8 +26,9 @@ subroutine MATRIX_init( what, M_par, N, Nb)
  use matrix,         ONLY:PAR_matrix,MATRIX_reset
  use parallel_m,     ONLY:COMM_copy,PAR_COM_SLK,PAR_COM_SLK_INDEX_local,&
 &                         PAR_COM_RL_INDEX,PAR_COM_RL_A2A,ncpu
-#include <dev_defs.h>
-#include <y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  character(*)           :: what
  type(PAR_matrix)       :: M_par
@@ -31,9 +37,7 @@ subroutine MATRIX_init( what, M_par, N, Nb)
  ! Work Space
  !
  integer              :: M,i_c,N_rows
- integer, allocatable :: rows(:,:),cols(:,:)
  !
- call MATRIX_reset(M_par)
  !DEV_ACC enter data copyin(M_par)
  !
  ! COMMs
@@ -41,7 +45,7 @@ subroutine MATRIX_init( what, M_par, N, Nb)
  if (what=="SLK") then
    call COMM_copy(PAR_COM_SLK_INDEX_local, M_par%INTRA_comm)
    call COMM_copy(PAR_COM_SLK,             M_par%INTER_comm)
- else if (what=="XUP".or.what=="XDN".or.what=="X".or.what=="MPA") then
+ else if (what=="X".or.what=="MPA") then
    call COMM_copy(PAR_COM_RL_A2A,   M_par%INTRA_comm)
    call COMM_copy(PAR_COM_RL_INDEX, M_par%INTER_comm)
  endif
@@ -68,34 +72,7 @@ subroutine MATRIX_init( what, M_par, N, Nb)
  endif
 #endif
  !
- ! Response function. Only upper triangle is distributed.
- !
- if (what=="XUP") then
-   !
-   allocate(rows(M_par%INTER_comm%n_CPU,2))
-   allocate(cols(M_par%INTER_comm%n_CPU,2))
-   M_par%kind="XUP"
-   M= ( (N**2-N)/2 + N )/M_par%INTER_comm%n_CPU
-   cols(1,:)=(/1,N/)
-   rows(1,:)=(/1,UPPER_solver(N)/)
-   do i_c=2,M_par%INTER_comm%n_CPU
-     cols(i_c,:)=(/rows(i_c-1,2)+1,N/)
-     N_rows=UPPER_solver( cols(i_c,2)- cols(i_c,1)+1 )
-     rows(i_c,:)=rows(i_c-1,2)+(/1,N_rows/)
-     if (N_rows==0) rows(i_c,2)=N
-   enddo
-   M_par%I     = 1
-   M_par%Nb    = Nb
-   M_par%N     = N
-   M_par%rows  = rows(M_par%INTER_comm%CPU_id+1,:)
-   M_par%cols  = cols(M_par%INTER_comm%CPU_id+1,:)
-   call rows_and_cols_setup()
-   deallocate(rows,cols)
-   return
-   !
- endif
- !
- ! Entire Matrix
+ ! Response function. Entire Matrix
  !
  if (what=="X".or.what=="PAR".or.what=="MPA") then
    !
@@ -123,10 +100,4 @@ subroutine rows_and_cols_setup()
      M_par%BLCcols  = M_par%cols
    end subroutine
    !
-   integer function UPPER_solver(N)
-     integer :: N
-     UPPER_solver=nint( (real(N,SP)+.5_SP)-sqrt( (real(N)+0.5_SP)**2 - 2._SP*real(M,SP) ) )+1
-     if (  (real(N,SP)+.5_SP)**2 - 2._SP*real(M,SP) < 0 ) UPPER_solver=0
-   end function
-   !
 end subroutine
diff --git a/src/matrices/MATRIX_transfer.F b/src/matrices/MATRIX_transfer.F
index f1f8c6290d..b1007ee71f 100644
--- a/src/matrices/MATRIX_transfer.F
+++ b/src/matrices/MATRIX_transfer.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine MATRIX_transfer( M_in, M_bse, M, M_out, INDEX_in, INDEX_out, SND, RCV, COMM, COMM_name)
  !
  ! General, multi-purpose M <-> M transfer
@@ -17,7 +21,9 @@ subroutine MATRIX_transfer( M_in, M_bse, M, M_out, INDEX_in, INDEX_out, SND, RCV
  use D_lattice,      ONLY:i_space_inv
  use matrix,         ONLY:PAR_matrix,I_do_SND,I_do_RCV,TRANSFER_world,PAR_to_PAR_comm_NAME
  use timing_m,       ONLY:timing
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  type(PAR_matrix), optional :: M_in
  type(PAR_matrix), optional :: M_out
@@ -116,16 +122,6 @@ subroutine MATRIX_transfer( M_in, M_bse, M, M_out, INDEX_in, INDEX_out, SND, RCV
    !
  endif
  !
- if (M_in_kind=="XUP".and.X_FILL_UP_matrix_only) then
-   if (i_space_inv==0) call MATRIX_duplicate("+TC",M_in=M_in,INDEX_in=M_in%I,M_out=M_Xdn)
-   if (i_space_inv==1) call MATRIX_duplicate("+T" ,M_in=M_in,INDEX_in=M_in%I,M_out=M_Xdn)
-   call MATRIX_PAR_to_from_PAR(M_Xdn%blc(M_Xdn%rows(1):M_Xdn%rows(2),M_Xdn%cols(1):M_Xdn%cols(2),M_Xdn%I)  ,&
-&                              M_Xdn%rows, M_Xdn%cols,&
-&                              M_out%blc(M_out%rows(1):M_out%rows(2),M_out%cols(1):M_out%cols(2),M_out%I),&
-&                              M_out%rows,M_out%cols) 
-   call MATRIX_duplicate("free",M_out=M_Xdn)
- endif
- !
  call COMM_reset(TRANSFER_world)
  !
  call timing('MATRIX transfer ('//trim(PAR_to_PAR_comm_NAME)//')',OPR='stop')
diff --git a/src/matrices/MATRIX_transpose.F b/src/matrices/MATRIX_transpose.F
index d50c7e58a3..a9a250fd55 100644
--- a/src/matrices/MATRIX_transpose.F
+++ b/src/matrices/MATRIX_transpose.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine MATRIX_transpose( operation, M_in, M_out )
  !
  ! performs the transpose (or transpose conjugate) of a distributed
@@ -16,7 +20,9 @@ subroutine MATRIX_transpose( operation, M_in, M_out )
  !
  use interfaces,     ONLY:MATRIX_duplicate
  use matrix,         ONLY:PAR_matrix,MATRIX_reset
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  character(*)               :: operation
  type(PAR_matrix), optional :: M_in
diff --git a/src/matrices/SC_project.dep b/src/matrices/SC_project.dep
new file mode 100644
index 0000000000..cb0ee78d1d
--- /dev/null
+++ b/src/matrices/SC_project.dep
@@ -0,0 +1,2 @@
+  MATRIX_init.o
+
diff --git a/src/matrices/SLK_matrix_init.F b/src/matrices/SLK_matrix_init.F
index 59718bbd98..ba0777f8f6 100644
--- a/src/matrices/SLK_matrix_init.F
+++ b/src/matrices/SLK_matrix_init.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM AF
 !
+! headers
+!
+#include<y_memory.h>
+!
 ! Copyright (C) 2004 WanT Group
 !
 ! This file is distributed under the terms of the
@@ -16,11 +20,17 @@
 !
 ! Authors (see AUTHORS file for details): AM AF
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine SLK_matrix_init(M_slk, N, N_blocks)
  !
  use SLK_m,          ONLY:SLK_ORTHO
  use matrix,         ONLY:PAR_matrix
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  integer           :: N,N_blocks
  type(PAR_matrix)  :: M_slk
diff --git a/src/memory/MEM_global_mesg.F b/src/memory/MEM_global_mesg.F
index 82c421d303..cadef0d283 100644
--- a/src/memory/MEM_global_mesg.F
+++ b/src/memory/MEM_global_mesg.F
@@ -12,6 +12,7 @@ subroutine MEM_global_mesg(what,xkind,HOST_SIZE)
  use stderr,       ONLY:real2ch
  use units,        ONLY:Kilobyte
  use LIVE_t,       ONLY:LIVE_message
+ use y_memory,     ONLY:mem_string
  !
  implicit none
  !
@@ -20,23 +21,18 @@ subroutine MEM_global_mesg(what,xkind,HOST_SIZE)
  integer(IPL), intent(in) :: HOST_SIZE(:)
  !
  integer   :: i1
- real(DP)  :: MEM_size
+ real(SP)  :: MEM_size
  !
- MEM_size=0
- do i1=1,size(HOST_SIZE)
-   MEM_size=MEM_size+real(HOST_SIZE(i1),DP)
+ MEM_size=real(HOST_SIZE(1),SP)
+ do i1=2,size(HOST_SIZE)
+   MEM_size=MEM_size*real(HOST_SIZE(i1),SP)
  enddo
- MEM_size=MEM_size*real(xkind,DP)/real(Kilobyte,DP)/real(1000,DP)
+ MEM_size=MEM_size*real(xkind,SP)/Kilobyte
  !
  ! Error Message
  !===============
- call LIVE_message("[MEMORY] Parallel distribution of "//what//" on HOST "//trim(host_name)//&
-&                  " with size "//trim(real2ch(real(MEM_size,SP)))//" [Mb] ")
- !
- ! 2020/05/20: Here I do not use mem_string since it wants an integer in input
- !             It is not a good solution since integers are limited to their maximum value
- !             A real would be much better for the size in Kb ...
-!&                  " with size "//trim(mem_string(real(MEM_size,SP))))
+ call LIVE_message("[PAR-MEMORY] Parallel distribution of "//what//" on HOST "//trim(host_name)//&
+&                  " with total size "//trim(mem_string(MEM_size)))
  !
  call MEM_error("GLOBAL_"//what)
  !
diff --git a/src/memory/MEM_manager_messages.F b/src/memory/MEM_manager_messages.F
index c566b2a4e8..f28156b31c 100644
--- a/src/memory/MEM_manager_messages.F
+++ b/src/memory/MEM_manager_messages.F
@@ -7,7 +7,7 @@
 !
 subroutine MEM_manager_messages(what,adding,MEM_now_Kb,where)
  !
- use pars,         ONLY:lchlen
+ use pars,         ONLY:SP,lchlen
  use com,          ONLY:msg,fat_log
  use y_memory,       ONLY:TOT_MEM_Kb,TOT_MEM_Kb_MEMSTAT,TOT_MEM_Kb_SAVE,MEM_treshold,&
 &                       MEM_jump_treshold,mem_string,HOST_,DEV_
@@ -42,10 +42,10 @@ subroutine MEM_manager_messages(what,adding,MEM_now_Kb,where)
    msg_="[MEMORY]"
    if (     adding) msg_=trim(msg_)//" Alloc "//what
    if (.not.adding) msg_=trim(msg_)//"  Free "//what
-   msg_=trim(msg_)//"("//trim(mem_string(MEM_now_Kb))//")"
-   msg_=trim(msg_)//trim(label)//" TOTAL: "//trim(mem_string(TOT_MEM_Kb(where)))//" (traced)"
+   msg_=trim(msg_)//"("//trim(mem_string(real(MEM_now_Kb,SP)))//")"
+   msg_=trim(msg_)//trim(label)//" TOTAL: "//trim(mem_string(real(TOT_MEM_Kb(where),SP)))//" (traced)"
    if (TOT_MEM_Kb_MEMSTAT(where)>0) then
-     msg_=trim(msg_)//" "//trim(mem_string(TOT_MEM_Kb_MEMSTAT(where)))//" (memstat)"
+     msg_=trim(msg_)//" "//trim(mem_string(real(TOT_MEM_Kb_MEMSTAT(where),SP)))//" (memstat)"
    endif
    call msg("s",trim(msg_))
  endif
@@ -54,9 +54,9 @@ subroutine MEM_manager_messages(what,adding,MEM_now_Kb,where)
  if (deliver_a_msg) then
    call MEM_STAT_update( )
    msg_="[MEMORY] In use: "
-   msg_=trim(msg_)//trim(label)//" TOTAL: "//trim(mem_string(TOT_MEM_Kb(where)))//" (traced)"
+   msg_=trim(msg_)//trim(label)//" TOTAL: "//trim(mem_string(real(TOT_MEM_Kb(where),SP)))//" (traced)"
    if (TOT_MEM_Kb_MEMSTAT(where)>0) then
-     msg_=trim(msg_)//" "//trim(mem_string(TOT_MEM_Kb_MEMSTAT(where)))//" (memstat)"
+     msg_=trim(msg_)//" "//trim(mem_string(real(TOT_MEM_Kb_MEMSTAT(where),SP)))//" (memstat)"
    endif
    call msg("s",trim(msg_))
    TOT_MEM_Kb_SAVE(where)=TOT_MEM_Kb(where)
diff --git a/src/memory/MEM_report.F b/src/memory/MEM_report.F
index 634db2febc..51322a8b97 100644
--- a/src/memory/MEM_report.F
+++ b/src/memory/MEM_report.F
@@ -7,7 +7,7 @@
 !
 subroutine MEM_report( REPORT )
  !
- use pars,         ONLY:lchlen
+ use pars,         ONLY:SP,lchlen
  use y_memory,       ONLY:MEMs,N_MEM_elements,mem_string,MEM_element,MAX_MEM_Kb,N_MEM_SAVE_elements,&
 &                       LARGE_MEMs,MEM_treshold,MEM_SAVE_treshold,HOST_,DEV_
  use units,        ONLY:Kilobyte
@@ -28,8 +28,8 @@ subroutine MEM_report( REPORT )
    PREFIX=" "
    loc="r"
    call msg("r" ,"Memory Usage: global (Only MASTER cpu here). [O] stands for group 'O'")
-   call msg("rn","Memory treshold are:"//trim(mem_string(MEM_treshold))//" (basic treshold) "//&
-&                                       trim(mem_string(MEM_SAVE_treshold))//" (SAVEs treshold) ")
+   call msg("rn","Memory treshold are:"//trim(mem_string(real(MEM_treshold,SP)))//" (basic treshold) "//&
+&                                       trim(mem_string(real(MEM_SAVE_treshold,SP)))//" (SAVEs treshold) ")
  endif
  !
  ORPHAN_TOT=0
@@ -48,7 +48,7 @@ subroutine MEM_report( REPORT )
  !
  if (ORPHAN_TOT>0.and.fat_log) then
    if (loc=="r") call msg(loc,"")
-   WRITE(msg_,'(a8,a," : ",a,3x)') trim(PREFIX),"TOTAL not deallocated",trim(mem_string(ORPHAN_TOT))
+   WRITE(msg_,'(a8,a," : ",a,3x)') trim(PREFIX),"TOTAL not deallocated",trim(mem_string(real(ORPHAN_TOT,SP)))
    call msg(loc,msg_(1:len_trim(msg_)))
  endif
  !
@@ -62,12 +62,12 @@ subroutine MEM_report( REPORT )
  endif
  !
 #ifdef _GPU
- write(msg_,'(a8,a," : ",a,3x)') trim(PREFIX),  "Max memory used (HOST) ",trim(mem_string(MAX_MEM_Kb(HOST_)))
+ write(msg_,'(a8,a," : ",a,3x)') trim(PREFIX),  "Max memory used (HOST) ",trim(mem_string(real(MAX_MEM_Kb(HOST_),SP)))
  call msg("nr",msg_(1:len_trim(msg_)))
- write(msg_,'(a8,a," : ",a,3x)') trim(PREFIX),  "Max memory used (DEV)  ",trim(mem_string(MAX_MEM_Kb(DEV_)))
+ write(msg_,'(a8,a," : ",a,3x)') trim(PREFIX),  "Max memory used (DEV)  ",trim(mem_string(real(MAX_MEM_Kb(DEV_),SP)))
  call msg("r",msg_(1:len_trim(msg_)))
 #else
- write(msg_,'(a8,a," : ",a,3x)') trim(PREFIX),  "Max memory used  ",trim(mem_string(MAX_MEM_Kb(HOST_)))
+ write(msg_,'(a8,a," : ",a,3x)') trim(PREFIX),  "Max memory used  ",trim(mem_string(real(MAX_MEM_Kb(HOST_),SP)))
  call msg("nr",msg_(1:len_trim(msg_)))
 #endif
  !
@@ -90,7 +90,7 @@ subroutine plot_the_element(M,what)
        i_title=1
        !
        if (REPORT=="check") then
-         WRITE(msg_,'(a,1x,a," : ",a)') trim(PREFIX),trim(mem_string(MEM_now_kb,where))
+         WRITE(msg_,'(a,1x,a," : ",a)') trim(PREFIX),trim(mem_string(real(MEM_now_kb,SP),where))
          if (M%N>1.or.(M%N==1.and.trim(M%shelf)/=trim(M%name(1)))) then
            WRITE(msg_,'(2a)') trim(msg_),trim(M%name(i_el))//" "//trim(M%shelf)
          else
@@ -105,7 +105,7 @@ subroutine plot_the_element(M,what)
          else
            WRITE(msg_,'(a8,a27)') trim(PREFIX),trim(M%name(i_el))
          endif
-         WRITE(msg_,'(a," : ",a)') trim(msg_),trim(mem_string(MEM_now_kb,where))
+         WRITE(msg_,'(a," : ",a)') trim(msg_),trim(mem_string(real(MEM_now_kb,SP),where))
          if (len_trim(M%desc(i_el))>0) then
            WRITE(msg_,'(a,a)')  trim(msg_)," ("//trim(M%desc(i_el))//")"
          endif
diff --git a/src/modules/.objects b/src/modules/.objects
index 6857541807..da69ec83b8 100644
--- a/src/modules/.objects
+++ b/src/modules/.objects
@@ -17,7 +17,7 @@ RT_objects_iterative  = mod_RT_iterative.o
 NL_objects       = mod_fields.o mod_electric.o mod_nl_optics.o mod_NL_interfaces.o
 #endif
 #if defined _ELPH
-ELPH_objects = mod_ELPH.o
+ELPH_objects = mod_ELPH.o mod_EXCPH.o
 #endif
 #if defined _OPENACC
 DEV_objects_BASE = mod_cudafor.o mod_cufft.o
@@ -44,6 +44,7 @@ objs = mod_pars.o mod_units.o mod_lexical_sort.o mod_stderr.o mod_openmp.o mod_m
        mod_QP.o mod_MPA.o mod_collision_el.o \
        mod_BS.o mod_BS_solvers.o mod_QP_CTL.o mod_TDDFT.o mod_ACFDT.o mod_MAGNONS.o mod_DICHROISM.o mod_PHOTOLUM.o \
        mod_IO.o mod_IO_interfaces.o mod_COLL_interfaces.o $(ELPH_objects) mod_POL_FIT.o  $(RT_objects_pre) \
-       mod_hamiltonian.o $(COMMON_objects) $(SC_objects) $(RT_objects_post) $(RT_objects_iterative) $(MAGNETIC_objects) $(NL_objects) $(ELECTRIC_objects) \
+       mod_hamiltonian.o $(COMMON_objects) $(SC_objects) $(RT_objects_post) $(RT_objects_iterative) $(MAGNETIC_objects) \
+          $(NL_objects) $(ELECTRIC_objects) \
        mod_debug.o mod_interfaces.o mod_interpolate_tools.o mod_interpolate.o SET_logicals.o SET_defaults.o $(DEV_objects)
 #endif
diff --git a/src/modules/DOUBLE_project.dep b/src/modules/DOUBLE_project.dep
index 7041bb894b..a8f5d4fd1a 100644
--- a/src/modules/DOUBLE_project.dep
+++ b/src/modules/DOUBLE_project.dep
@@ -9,6 +9,7 @@
  mod_DIPOLES.o
  mod_D_lattice.o
  mod_ELPH.o
+ mod_EXCPH.o
  mod_FFT.o
  mod_H_interfaces.o
  mod_IO.o
@@ -18,7 +19,6 @@
  mod_MPA.o
  mod_NL_interfaces.o
  mod_OUTPUT.o
- mod_Overlaps.o
  mod_PHOTOLUM.o
  mod_POL_FIT.o
  mod_QP.o
diff --git a/src/modules/ELPH_project.dep b/src/modules/ELPH_project.dep
index 4ef7c99d88..cb6330659f 100644
--- a/src/modules/ELPH_project.dep
+++ b/src/modules/ELPH_project.dep
@@ -1,4 +1,5 @@
  SET_defaults.o
  mod_ALLOC.o
  mod_ELPH.o
+ mod_EXCPH.o
 
diff --git a/src/modules/NL_project.dep b/src/modules/NL_project.dep
index 3b9be1f900..57d5568804 100644
--- a/src/modules/NL_project.dep
+++ b/src/modules/NL_project.dep
@@ -1,4 +1,5 @@
  SET_defaults.o
+ SET_logicals.o
  mod_BS_solvers.o
  mod_H_interfaces.o
  mod_NL_interfaces.o
diff --git a/src/modules/SET_defaults.F b/src/modules/SET_defaults.F
index 97c94566e0..c943a557d3 100644
--- a/src/modules/SET_defaults.F
+++ b/src/modules/SET_defaults.F
@@ -8,7 +8,7 @@
 subroutine SET_defaults(INSTR,IND,OD,COM_DIR)
  !
  use debug,      ONLY:dbg
- use pars,       ONLY:SP,cZERO
+ use pars,       ONLY:SP,cZERO,rZERO
  use C_driver,   ONLY:code_branch
  use units,      ONLY:HA2EV,FS2AUT,kWCMm22AU
  use LOGO,       ONLY:ID_logo,ID_logo_stderr
@@ -29,26 +29,27 @@ subroutine SET_defaults(INSTR,IND,OD,COM_DIR)
  use functions,  ONLY:bose_E_cut
  use D_lattice,  ONLY:i_space_inv,inv_index,n_atoms_species_max,n_atomic_species,mag_syms,&
 &                     input_Tel_is_negative,non_periodic_directions,lattice,Bose_Temp,    &
-&                     molecule_position,l_0D,l_1D,l_2D,l_3D
+&                     molecule_position,l_0D,l_1D,l_2D,l_3D,Boltz_Temp
  use pseudo,     ONLY:pp_n_l_times_proj_max,pp_kbv_dim
  use R_lattice,  ONLY:n_g_shells,ng_closed,bse_scattering,coll_scattering,&
 &                     Xk_grid_is_uniform,RIM_id_epsm1_reference,RIM_epsm1,&
-&                     RIM_anisotropy,RIM_ng,RIM_W_ng,RIM_n_rand_pts,nqibz,q0_def_norm,&
-&                     cutoff_presets,k_map,qindx_S_max_Go,RIMW_type
- use electrons,  ONLY:n_spin,n_sp_pol,n_spinor,filled_tresh,l_spin_orbit,n_spin_den,eval_magn
+&                     RIM_anisotropy,RIM_ng,RIM_W_ng,RIM_n_rand_pts,RIM_sphe_n_rand_pts,nqibz,q0_def_norm,&
+&                     cutoff_presets,k_map,qindx_S_max_Go,RIMW_type,eps_env
+ use electrons,  ONLY:n_spin,n_sp_pol,n_spinor,filled_tresh,l_spin_orbit,n_spin_den,&
+&                     i_spin_majority,i_spin_minority,eval_magn,nel_cond,deg_threshold
  use parallel_m, ONLY:ncpu,CPU_str_reset,CREATE_hosts_COMM
  use com,        ONLY:isec,depth,secnm,previous_secmode,of_name,n_ofs_defined,of_opened,of_unit,max_open_ofs,&
 &                     more_io_path,core_io_path,com_path,repfile,grid_paths,fat_log,exp_user
  use stderr,     ONLY:win_size,tty_size,logfile,set_real_printed_length,log_as_a_file
  use LIVE_t,     ONLY:log_line_to_dump,log_line,nhash,ct
- use wave_func,  ONLY:wf_ng,wf_norm_test,wf_nb_io,wf_nb_io_groups,WF,WF_buffer,WF_buffered_IO
+ use wave_func,  ONLY:wf_ng,wf_norm_test,wf_nb_io,wf_nb_io_groups,WF,WF_buffer,WF_buffered_IO,rho_map_thresh
  use FFT_m,      ONLY:fft_dim_loaded,fft_size,fft_dim,fft_multiplier
  use IO_m,       ONLY:io_reset,max_io_units,serial_number,frag_WF
  use BS_solvers, ONLY:BSS_mode,BSS_n_freqs,BSS_er,BSS_dr,BSS_P_dir,BSS_E_dir,BSS_Q_dir,BSS_first_eig,&
 &                     Haydock_threshold,Haydock_iterIO,Haydock_iterMAX,BSS_uses_DbGd,BSS_Wd,BSS_n_eig,BSS_n_eig_Input,&
 &                     BSS_damp_reference,BSS_Vnl_included,BSS_uses_GreenF,BSS_inversion_mode,BSS_target_E,&
-&                     BSS_perturbative_width,K_INV_EPS,K_INV_PL,K_INV_PI_PH,BSS_desc,BSS_ydiago_solver,BSS_trange_E,&
-&                     BSS_kernel_IO_on_the_fly,BSS_kernel_stored_size,BSS_kernel_last_stored_block
+&                     BSS_perturbative_width,K_INV_EPS,K_INV_PL,K_INV_PI_PH,BSS_desc,BSS_ldiago_solver,BSS_trange_E,&
+&                     BSS_kernel_IO_on_the_fly,BSS_kernel_stored_size,BSS_kernel_last_stored_block,deg_exc_thrshld
  use descriptors,ONLY:IO_desc_reset
 #if defined _SLEPC && !defined _NL
  use BS_solvers, ONLY:BSS_slepc_extraction,BSS_slepc_ncv,BSS_slepc_tol,BSS_slepc_maxit,&
@@ -64,7 +65,10 @@ subroutine SET_defaults(INSTR,IND,OD,COM_DIR)
  use ACFDT,      ONLY:ACFDT_n_lambda,ACFDT_n_freqs,ACFDT_E_range
 #if defined _ELPH 
  use ELPH,       ONLY:ph_modes,elph_nb,gsqF_energy_steps,eval_G_using_KK,use_PH_DbGd,EkplusQ_mode,gkkp_db,&
-&                     elph_Ham_bands,elph_Ham_ik,elph_branches,RES_tresh,FAN_deltaE_treshold,DW_deltaE_treshold
+&                     elph_Ham_bands,elph_Ham_ik,elph_branches,RES_tresh,FAN_deltaE_treshold,DW_deltaE_treshold,&
+&                     elph_bands
+ use EXCPH,      ONLY:Lin_path,Lout_path,EXCPH_states,EXCPH_sum,L_damping,EXC_q0,alphaQ, &
+&                     EXCPH_deltaE_treshold,EXCPH_kthresh
 #endif
  use zeros,      ONLY:zero_norm,k_iku_zero,k_rlu_zero,G_iku_zero,G_mod_zero,zero_dfl
  use y_memory,   ONLY:MEMs,LARGE_MEMs,N_MEM_max,N_MEM_SAVE_max,MEM_element_init
@@ -79,18 +83,18 @@ subroutine SET_defaults(INSTR,IND,OD,COM_DIR)
 #endif
 #if defined _SC || defined _RT || defined _QED || defined _NL
  use collision_ext,  ONLY:COLLISIONS_naming,COLLISIONS_cutoff,COLLISIONS_CV_only, &
-&                         COLLISIONS_load_SP,COLLISIONS_compr
+&                         COLLISIONS_load_SP,COLLISIONS_compr,COLL_bands_frozen_ch
  use hamiltonian,    ONLY:H_potential
 #endif
 #if defined _RT || defined _YPP_RT
  use RT_output_m,ONLY:n_RT_headings,N_RT_o_files,RT_o_file_long,RT_o_file_short,RT_o_file_is_open,&
 &                     RT_headings,YPP_append,Gr_desc,TIME_STEP_desc,RT_desc
  use RT_control, ONLY:CARR_RT_IO_t,OBS_RT_IO_t,OUTPUT_RT_IO_t,Gless_RESTART_RT_IO_t,SAVE_G_history,RT_ctl_db,&
-&                     CACHE_OBS_last_point,CACHE_OBS_INTERVAL_time,CACHE_OBS_INTERVAL_time_INPUT,   &
-&                     CACHE_OBS_steps,CACHE_OBS_steps_now,RT_NAN_found
+&                     Vbands_RT_IO_t,CACHE_OBS_last_point,CACHE_OBS_INTERVAL_time,CACHE_OBS_INTERVAL_time_INPUT,   &
+&                     CACHE_OBS_steps,CACHE_OBS_steps_now,RT_NAN_found,SAVE_Vb_history, SAVE_Vb_floquet, Floquet_order
  use RT_lifetimes,ONLY:RT_life_extrap_times
  use real_time,  ONLY:RT_step,NE_steps,NE_time,Integrator_name,RT_deph_range,&
-&                     Gr_kind,two_alpha,RAD_LifeTime,Phase_LifeTime,&
+&                     Gr_kind,two_alpha,RAD_LifeTime,Phase_LifeTime,RT_bands_frozen_ch,&
 &                     NE_tot_time,NE_MEM_treshold,RT_eh_en,l_update_SOC,&
 &                     DbGd_EE_percent,RT_scatt_tresh,integrator_step,RT_nk,&
 &                     i_MEM_old,i_MEM_prev,i_MEM_now,G_MEM_steps,RT_step_manual_prefactor,&
@@ -98,18 +102,19 @@ subroutine SET_defaults(INSTR,IND,OD,COM_DIR)
 &                     NE_time_step_update_last_point,NE_time_step_update_jump,NE_time_step_update_jump_INPUT, &
 &                     NE_initial_time_step_update,NE_step_update_treshold
  use fields,     ONLY:Efield,Efield_reset,n_ext_fields_max,n_ext_fields,A_vecpot_reset,   &
-&                     A_tot,A_ind,A_ext
+&                     A_tot,A_ind,A_ext,field_from_file_fname,field_from_file_steps,      &
+&                     field_from_file_dt
  use plasma,     ONLY:PLASMA_redux_percent
 #endif
 #if defined _NL
  use nl_optics,  ONLY:NL_correlation,NL_er,n_frequencies,NL_damping,NL_LRC_alpha,NL_initial_versor,     &
-&                     NL_verb_name,NL_bands,NL_desc,n_angles,NL_rot_axis,max_angle
+&                     NL_verb_name,NL_bands,NL_desc,n_angles,NL_rot_axis,NL_bands_frozen_ch,max_angle
  use fields,     ONLY:Efield,Efield_reset
 #endif
 #if defined _SC
  use magnetic,    ONLY:MAG_B,MAG_radius,MAG_hamiltonian_type,MAG_landau,MAG_pauli,&
 &                      MAG_gauge,MAG_psi,MAG_theta,phase_trick
- use electric,    ONLY:l_P_periodic,l_force_SndOrd
+ use electric,    ONLY:l_force_SndOrd
 #endif
 #if defined _YAML_OUTPUT
  use com,         ONLY:depth_yaml
@@ -255,10 +260,12 @@ subroutine SET_defaults(INSTR,IND,OD,COM_DIR)
  molecule_position=0._SP
  lattice='Unknown'
  Bose_Temp=-1./HA2EV
+ Boltz_Temp=0._SP
  l_3D=.TRUE.
  l_2D=.FALSE.
  l_1D=.FALSE.
  l_0D=.FALSE.
+ Boltz_Temp=0._SP
  !
  ! R_lattice
  !
@@ -273,6 +280,7 @@ subroutine SET_defaults(INSTR,IND,OD,COM_DIR)
  k_map%q_step  =-1
  k_map%max_kdir= 0
  qindx_S_max_Go=-1
+ eps_env=1._SP
  !
  ! RIM
  !
@@ -281,6 +289,7 @@ subroutine SET_defaults(INSTR,IND,OD,COM_DIR)
  RIM_anisotropy=0._SP
  RIM_ng=0
  RIM_n_rand_pts=0
+ RIM_sphe_n_rand_pts=0
  !
  ! RIM-W
  !
@@ -310,8 +319,12 @@ subroutine SET_defaults(INSTR,IND,OD,COM_DIR)
  n_sp_pol=1
  n_spinor=1
  n_spin_den=1
+ i_spin_majority=1
+ i_spin_minority=n_sp_pol
  l_spin_orbit       = .FALSE.
- filled_tresh       =0.00001_SP
+ filled_tresh       = 0.00001_SP
+ nel_cond=rZERO
+ deg_threshold	    =0.0001_SP/HA2EV
  !
  ! Magnetization and density
  !
@@ -330,6 +343,11 @@ subroutine SET_defaults(INSTR,IND,OD,COM_DIR)
  wf_norm_test=.TRUE.
  wf_nb_io=0
  wf_nb_io_groups=1
+#if defined _GAMMA_ONLY
+ rho_map_thresh=1.E-7_SP
+#else
+ rho_map_thresh=0._SP
+#endif
  !
  !FFT
  !
@@ -438,9 +456,9 @@ subroutine SET_defaults(INSTR,IND,OD,COM_DIR)
  BS_K_cutoff=0.0000_SP
  BSS_mode=' '
  BSS_inversion_mode='pf'
- BSS_ydiago_solver='s'
+ BSS_ldiago_solver='s'
 #if defined _ELPA
- BSS_ydiago_solver='e'
+ BSS_ldiago_solver='e'
 #endif
  BSS_er=(/0._SP,10._SP/)/HA2EV
  BSS_dr=0.1_SP/HA2EV
@@ -474,6 +492,7 @@ subroutine SET_defaults(INSTR,IND,OD,COM_DIR)
  BSS_target_E=0._SP
  BSS_trange_E=-1._SP/HA2EV
  MAX_BSK_LIN_size=45000*45000 ! 46341 is the sqare root of the maximum integer in IP: 2147483647
+ deg_exc_thrshld=0.001_SP/HA2EV
  !
  call IO_desc_reset(BSS_desc)
  !
@@ -514,7 +533,7 @@ subroutine SET_defaults(INSTR,IND,OD,COM_DIR)
  GS_exx_SCREENING=0.0            ! no screening
  !
  l_BS_anomalous_Hall=.false.
- PL_weights=(/1._SP,1._SP,1._SP/)
+ PL_weights=(/1._SP,0._SP,0._SP/)
  !
 #if defined _ELPH 
  !
@@ -522,18 +541,31 @@ subroutine SET_defaults(INSTR,IND,OD,COM_DIR)
  !
  ph_modes=0
  elph_nb=0
+ elph_bands=-1
  elph_branches=0
  gsqF_energy_steps=2
  eval_G_using_KK=.FALSE.
  elph_Ham_bands=0
  elph_Ham_ik=1
  RES_tresh=0.01_SP
- DW_deltaE_treshold=1.E-6_SP/HA2EV   ! This is a cutoff on electronic energy differences
- FAN_deltaE_treshold=1.E-6_SP/HA2EV  ! This is a cutoff on phonon energies
+ DW_deltaE_treshold=1.E-6_SP/HA2EV    ! This is a cutoff on electronic energy differences
+ FAN_deltaE_treshold=1.E-6_SP/HA2EV   ! This is a cutoff on phonon energies
  use_PH_DbGd=.FALSE.
  EkplusQ_mode='interp' ! E(k+q) energies (interp | dftp )
  gkkp_db='gkkp'        ! gkkp_db ( gkkp | gkkp_expanded | genFroh )
  !
+ ! EXC-PH
+ !
+ Lin_path ='none'
+ Lout_path='none'
+ EXCPH_states=(/1,1/)
+ EXCPH_sum=(/1,1/)
+ L_damping=0.0005/HA2EV ! set to 0.5 meV
+ EXCPH_deltaE_treshold=1.E-6_SP/HA2EV ! This is a cutoff for denominator in the exc-ph spectra
+ EXC_q0=(/0.0,0.0,0.0/)
+ alphaQ=rZERO
+ EXCPH_kthresh=-1. ! If negative, use all kpoints (turn positive only for debug)
+ !
 #endif
  !
 #if defined _SC || defined _RT || defined _QED || defined _NL
@@ -548,14 +580,16 @@ subroutine SET_defaults(INSTR,IND,OD,COM_DIR)
  COLLISIONS_cutoff=0.0005_SP
 # endif
  !
-#if defined _NL
+# if defined _NL
  COLLISIONS_cutoff=0.0005_SP
-#endif
+# endif
  !
  COLLISIONS_CV_only=.FALSE.
  COLLISIONS_load_SP=.FALSE.
  COLLISIONS_compr  =.FALSE.
  !
+ COLL_bands_frozen_ch=""
+ !
  H_potential="NONE"
  !
 #endif
@@ -584,7 +618,6 @@ subroutine SET_defaults(INSTR,IND,OD,COM_DIR)
  !
  call IO_desc_reset(SC_desc)
  !
- l_P_periodic   =.FALSE.
  l_force_SndOrd =.FALSE.
  !
 #endif
@@ -593,6 +626,9 @@ subroutine SET_defaults(INSTR,IND,OD,COM_DIR)
  !
  ! Real Time
  !
+ field_from_file_fname=""
+ field_from_file_steps=0
+ field_from_file_dt=0._SP
  n_ext_fields=0
  do i1=1,n_ext_fields_max
    call Efield_reset(Efield(i1))
@@ -615,6 +651,8 @@ subroutine SET_defaults(INSTR,IND,OD,COM_DIR)
  NE_time=0._SP
  NE_tot_time= 1000._SP*FS2AUT
  !
+ RT_bands_frozen_ch=""
+ !
  NE_time_step_update_last_point = 1
  NE_time_step_update_jump       = 0._SP
  NE_time_step_update_jump_INPUT  = 0._SP
@@ -664,11 +702,15 @@ subroutine SET_defaults(INSTR,IND,OD,COM_DIR)
  !
  ! RT_ctl
  !
- OBS_RT_IO_t%INTERVAL_time_INPUT              =2._SP*FS2AUT
- CARR_RT_IO_t%INTERVAL_time_INPUT             =2._SP*FS2AUT
- OUTPUT_RT_IO_t%INTERVAL_time_INPUT           =5._SP*FS2AUT
- Gless_RESTART_RT_IO_t%INTERVAL_time_INPUT    =1._SP*FS2AUT
+ OBS_RT_IO_t%INTERVAL_time_INPUT              =2.0_SP*FS2AUT
+ CARR_RT_IO_t%INTERVAL_time_INPUT             =2.0_SP*FS2AUT
+ OUTPUT_RT_IO_t%INTERVAL_time_INPUT           =0.1_SP*FS2AUT
+ Gless_RESTART_RT_IO_t%INTERVAL_time_INPUT    =5.0_SP*FS2AUT
+ Vbands_RT_IO_t%INTERVAL_time_INPUT           =5.0_SP*FS2AUT
  SAVE_G_history=.FALSE.
+ SAVE_Vb_history=.FALSE.
+ SAVE_Vb_floquet=.FALSE. ! this default setting may not be needed as I defined Floquet order
+ Floquet_order = -1
  RT_NAN_found  =.FALSE.
  !
  ! RT_output
@@ -692,6 +734,7 @@ subroutine SET_defaults(INSTR,IND,OD,COM_DIR)
  NE_tot_time   =  -1._SP*FS2AUT 
  Integrator_name='INVINT'
  NL_bands      =(/0,0/)
+ NL_bands_frozen_ch=""
  NL_correlation="IPA"
  NL_er         =(/-1._SP,-1._SP/)/HA2EV
  n_frequencies =0
@@ -702,6 +745,9 @@ subroutine SET_defaults(INSTR,IND,OD,COM_DIR)
  NL_damping    =0.2_SP/HA2EV
  NL_LRC_alpha  =0._SP
  !
+ field_from_file_fname=""
+ field_from_file_steps=0
+ field_from_file_dt=0._SP
  do i1=1,n_ext_fields_max
    call Efield_reset(Efield(i1))
    Efield(i1)%t_initial=RT_step
diff --git a/src/modules/SET_logicals.F b/src/modules/SET_logicals.F
index 9ed8cae0fd..9a70a42900 100644
--- a/src/modules/SET_logicals.F
+++ b/src/modules/SET_logicals.F
@@ -13,7 +13,7 @@ subroutine SET_logicals()
  use com,        ONLY:write_to_of,write_to_report,fat_log
  use IO_m,       ONLY:IO_write,IO_read,DBs_IO_control_string,DBs_FRAG_control_string, &
 &                     io_DIP,io_RESPONSE,io_HF,io_COLLs,io_GF,io_OBSERVABLES,io_CARRIERs,io_SC,&
-&                     io_BS_K,io_SCREEN,io_MULTIPOLE, &
+&                     io_BS_K,io_SCREEN,io_MULTIPOLE,io_V_bands, &
 &                     frag_DIP,frag_RESPONSE,frag_MULTIPOLE,frag_HF,frag_SC,frag_DG,   &
 &                     frag_BS_K,frag_SCREEN,frag_QINDX,frag_ELPH,frag_RT
  use stderr,     ONLY:write_to_log,write_fragments_IO_log,STRING_split,write_to_log_default
@@ -58,11 +58,19 @@ subroutine SET_logicals()
  io_HF          =.TRUE.
  io_COLLs       =.TRUE.
  io_GF          =.TRUE.
+ io_V_bands     =.FALSE.
  io_OBSERVABLES =.TRUE.
  io_CARRIERs    =.TRUE.
  io_SC          =.TRUE.
  io_BS_K        =.TRUE.
  !
+#if defined _NL
+ io_CARRIERs    =.FALSE.
+ io_GF          =.FALSE.
+ io_V_bands     =.TRUE.
+ io_OBSERVABLES =.TRUE.
+#endif
+ !
  ch_piece=' '
  call STRING_split(DBs_IO_control_string,ch_piece)
  !
@@ -74,6 +82,7 @@ subroutine SET_logicals()
    if (str_check(5,(/"COLLS","COLLs","colls","ALL  ","all  "/),&
 &                                               ch_piece(i_ch)) ) io_COLLs      =.FALSE.
    if (str_check(4,(/"GF ","gf ","ALL","all"/),ch_piece(i_ch)) ) io_GF          =.FALSE.
+   if (str_check(4,(/"Vb ","vb ","ALL","all"/),ch_piece(i_ch)) ) io_V_bands     =.FALSE.
    if (str_check(4,(/"J  ","j  ","ALL","all"/),ch_piece(i_ch)) ) io_OBSERVABLES =.FALSE. ! Back Compatibility
    if (str_check(4,(/"OBS","obs","ALL","all"/),ch_piece(i_ch)) ) io_OBSERVABLES =.FALSE. 
    if (str_check(5,(/"CARRIERs","carriers","CARRIERS","ALL     ","all     "/),&
diff --git a/src/modules/YPP_ELPH_project.dep b/src/modules/YPP_ELPH_project.dep
index 51510bad65..125d179d1a 100644
--- a/src/modules/YPP_ELPH_project.dep
+++ b/src/modules/YPP_ELPH_project.dep
@@ -1,2 +1,2 @@
  mod_ELPH.o
- mod_PHEL.o
+ mod_ELPH_intfcs.o
diff --git a/src/modules/mod_BS.F b/src/modules/mod_BS.F
index bd0b1c3219..2f3af8f513 100644
--- a/src/modules/mod_BS.F
+++ b/src/modules/mod_BS.F
@@ -5,19 +5,25 @@
 !
 ! Authors (see AUTHORS file for details): AM MG
 !
+! headers
+!
+#include<dev_defs.h>
+#include<y_memory.h>
+!
 module BS
  !
- use pars,          ONLY: IP,IPL,SP,schlen,lchlen
+ use pars,          ONLY: IP,IPL,LP,SP,schlen,lchlen
  use collision_el,  ONLY: elemental_collision
  use gpu_m,         ONLY: have_gpu
+ use y_memory_alloc
  !
-#include<dev_defs.h>
-#include<y_memory.h>
+ implicit none
  !
  ! Modes and logicals
  !====================
  logical :: l_BSE_minimize_memory
  logical :: l_BSE_kernel_complete
+ logical :: l_BSE_kernel_full=.FALSE.
  logical :: l_BSE_restart
  logical :: BS_W_is_diagonal=.FALSE.
  logical :: BS_K_coupling=.FALSE.
@@ -37,6 +43,7 @@ module BS
  character(schlen) :: BSE_dipole_geometry="none"
  character(schlen) :: BSK_mode
  character(schlen) :: BSK_IO_mode
+ character(schlen) :: BS_bands_frozen_ch=""
  !
  ! Dipoles tracing 
  !=================
@@ -58,6 +65,8 @@ module BS
  logical :: l_BS_optics
  logical :: l_BS_esort
  logical :: l_BS_esort_indx
+ logical :: l_BS_mespin
+ logical :: l_BS_meorb
  !
  ! Dimensions
  !=============
@@ -77,21 +86,33 @@ module BS
  real(SP):: BS_K_cutoff
  real(SP),allocatable :: BS_K_cutoff_done(:)
  real(SP),allocatable :: BSqpts(:,:)
+ real(SP),allocatable :: BS_kpt_bz(:,:)
+ real(SP),allocatable :: BS_kpt_ibz(:,:)
+ integer,allocatable :: BS_K_io_map(:)
+ integer(LP),allocatable :: BS_bands_frozen(:)
  !
  ! Resonant/Antiresonant handling
  !================================
  !
- ! The BS kernel, in general, is composed of two resonant (K_r,R and K_c,R) and two anti-resonant (K_r,A and K_c,A) 
- ! blocks.
+ ! The the "standard" convention for the BSE kernel is
+ !
+ !      |   K_R(q)       K_C(q)  |     |   K_R(q)          K_C(q)       |  
+ !  K = |                        |  =  |                                |  (1a)
+ !      |   K_Q(q)       K_A(q)  |     | -K_C^\dag(-q)   -K_R^\dag(-q)  |
+ ! 
+ ! Here we use the one with the square root of the occupation factors
+ ! The BS kernel, in general, is composed of resonant K_R, couplnig K_C, anti-resonant K_A and anti-coupling K_Q) blocks.
+ !
+ !       |   K_R(q)         cI*Kt_C(q)  |      |   K_R(q)           cI*Kt_C(q)       |
+ !  Kt = |                              |  =   |                                     |  (1b)
+ !       |  cI*Kt_Q(q)         K_A(q)   |      | -[cI*Kt_C(-q)]^\dag   -K_R^\dag(-q) |
  !
- !      |  K_r,R(q)          cI*K_c,R(q)  |  
- !  K = |                                 | (1)
- !      | cI*K_c,A(q)        K_r,A^*(q)   |
  !
  ! In general we have
  !
- !  K_c,A(q) = - (K_c,R(-q))^*   (2)
- !  K_r,A(q) = - (K_r,R(-q))^*   (3)
+ !  K_Q(q)  = -K_C(-q)^\dag   (2a)
+ !  Kt_Q(q) =  Kt_C(-q)^\dag  (2b)  [ This means cI*Kt_Q(q) = -[cI*K_C(-q)]^* ]
+ !  K_A(q)  = -K_R(-q)^\dag   (3)
  !
  ! But in some cases (2) and (3) do not hold
  !
@@ -99,7 +120,7 @@ module BS
  !  - n_spinor==2 and q/=1
  !  - magnons
  !
- ! ONLY in these cases and when we need the entire matrix (l_BS_anti_resonant=.TRUE.) we have
+ ! Only in these cases and when we need the entire matrix (l_BS_anti_resonant=.TRUE.) we have
  !
  !  l_BS_ares_from_res=.FALSE.
  !
@@ -109,8 +130,8 @@ module BS
  !
  ! In addition
  !
- ! --> no coupling = 2 BSE matrices (K_r,R(q) and K_r,A^*(q)) --> BS_res_ares_n_mat=2
- ! -->    coupling = 1 BSE matrix with all 4 blocks  in (1)   --> BS_res_ares_n_mat=1
+ ! --> no coupling = 2 BSE matrices (K_R(q) and K_A^*(q))  --> BS_res_ares_n_mat=2
+ ! -->    coupling = 1 BSE matrix with all 4 blocks in (1) --> BS_res_ares_n_mat=1
  !
  ! Summary
  !=========
@@ -127,8 +148,10 @@ module BS
  !     |_l_BS_ares_from_res=.FALSE. 2                  2
  !
  logical :: l_BS_ares_from_res=.TRUE. 
- integer :: BS_res_ares_n_mat=1
- integer :: BS_n_eh_spaces=1 
+ logical :: l_BS_res_from_E
+ integer :: BS_res_ares_n_mat
+ integer :: BS_res_ares_n_grps
+ integer :: BS_n_eh_spaces
  !
  ! Live Timing
  !=============
@@ -186,8 +209,9 @@ module BS
    complex(SP), allocatable      :: Z(:)
    ! For dipoles the anti-resonant component is always stored
    complex(SP), allocatable      :: dipoles_opt(:,:,:)         ! (/x,y,z ; i_Tr ;   2)
-   complex(SP), allocatable      :: dipoles_dic(:,:,:)         ! (/x,y,z ; i_Tr ;   2) 
+   complex(SP), allocatable      :: dipoles_orb(:,:,:)         ! (/x,y,z ; i_Tr ;   2) 
    complex(SP), allocatable      :: dipoles_mag(:,:,:)         ! (/S+,S-,Sz ; i_Tr; 2)
+   complex(SP), allocatable      :: dipoles_spin(:,:,:)        ! (/x,y,z ; i_Tr ;   2) 
    !
    ! Exchange Oscillators
    !
@@ -229,10 +253,16 @@ module BS
    ! Matrix Block
    !--------------
    complex(SP), allocatable      :: mat(:,:)
-   character,   allocatable      :: done(:,:)
-   integer,     allocatable      :: table(:,:)
+   character, allocatable        :: done(:,:)
+   integer, allocatable          :: table(:,:)
    real(SP),    allocatable      :: E(:)
    !
+   ! For collisions only
+   !--------------------
+   integer                       :: zise(2)       ! dual of %size
+   integer                       :: poordinate(2) ! dual of %coordinate
+   complex(SP), allocatable      :: tam(:,:)      ! dual of %mat
+   !
    ! Oscillators...
    !
    ! ... correlation
@@ -285,15 +315,19 @@ subroutine BS_Blocks_and_Transitions_alloc(E,iT,dom_k_T_group)
      integer :: nT,nDIP
      !
      nT=BS_T_grp(iT)%size
-     nDIP=2/BS_n_eh_spaces
+     nDIP=1
+     if(l_BS_ares_from_res) nDIP=2
      !
      YAMBO_ALLOC(BS_T_grp(iT)%dipoles_opt,(BS_dip_size,nT,nDIP))
      BS_T_grp(iT)%dipoles_opt=cZERO
      if(l_BS_magnons) then
        YAMBO_ALLOC(BS_T_grp(iT)%dipoles_mag,(2,nT,nDIP))
      endif
-     if(l_BS_dichroism) then
-       YAMBO_ALLOC(BS_T_grp(iT)%dipoles_dic,(BS_dip_size,nT,nDIP))
+     if(l_BS_mespin) then
+       YAMBO_ALLOC(BS_T_grp(iT)%dipoles_spin,(BS_dip_size,nT,nDIP))
+     endif
+     if(l_BS_dichroism.or.l_BS_meorb) then
+       YAMBO_ALLOC(BS_T_grp(iT)%dipoles_orb,(BS_dip_size,nT,nDIP))
      endif
      YAMBO_ALLOC(BS_T_grp(iT)%table,(nT,5))
      YAMBO_ALLOC(BS_T_grp(iT)%E,(nT,dom_k_T_group))
@@ -330,8 +364,11 @@ subroutine BS_Blocks_and_Transitions_free()
        if (l_BS_magnons) then
          YAMBO_FREE(BS_T_grp(iT)%dipoles_mag)
        endif
-       if (l_BS_dichroism) then
-         YAMBO_FREE(BS_T_grp(iT)%dipoles_dic)
+       if (l_BS_dichroism.or.l_BS_meorb) then
+         YAMBO_FREE(BS_T_grp(iT)%dipoles_orb)
+       endif
+       if (l_BS_mespin) then
+         YAMBO_FREE(BS_T_grp(iT)%dipoles_spin)
        endif
        YAMBO_FREE(BS_T_grp(iT)%table)
        YAMBO_FREE(BS_T_grp(iT)%E)
diff --git a/src/modules/mod_BS_solvers.F b/src/modules/mod_BS_solvers.F
index f29b734a10..9bcfbaacf1 100644
--- a/src/modules/mod_BS_solvers.F
+++ b/src/modules/mod_BS_solvers.F
@@ -5,14 +5,19 @@
 !
 ! Authors (see AUTHORS file for details): AM DS NM
 !
+! headers
+!
+#include <y_memory.h>
+!
 module BS_solvers
  !
  use descriptors,    ONLY:IO_desc
  use pars,           ONLY:SP,schlen,lchlen
  use BS,             ONLY:BS_T_grp,BS_nT_grps,BS_K_dim,BS_H_dim,BS_block,n_BS_blks,BS_blk
  use matrix,         ONLY:PAR_matrix
+ use y_memory_alloc
  !
-#include <y_memory.h>
+ implicit none
  !
  ! Epsilon factor
  !==================
@@ -54,11 +59,11 @@ module BS_solvers
  character(schlen):: BSS_mode
  complex(SP)      :: BSS_Wd
  integer  :: BSS_n_freqs
- integer  :: BSS_n_eig            ! number of eigenvalues to find. Ydiago/Slepc
- integer  :: BSS_n_eig_Input      ! number of eigenvalues requested in input. Ydiago/Slepc
- integer  :: BSS_first_eig        ! index of the first eigenvalue to find. Ydiago
+ integer  :: BSS_n_eig            ! number of eigenvalues to find. Ldiago/Slepc
+ integer  :: BSS_n_eig_Input      ! number of eigenvalues requested in input. Ldiago/Slepc
+ integer  :: BSS_first_eig        ! index of the first eigenvalue to find. Ldiago
  real(SP) :: BSS_target_E         ! find eigenvalues close to this energy. Slepc
- real(SP) :: BSS_trange_E(2)      ! find eigenvalues in this energy range. Ydiago
+ real(SP) :: BSS_trange_E(2)      ! find eigenvalues in this energy range. Ldiago
  real(SP) :: BSS_er(2)
  real(SP) :: BSS_dr(2)
  real(SP) :: BSS_damp_reference
@@ -80,12 +85,13 @@ module BS_solvers
  logical  :: run_Haydock=.FALSE.
  logical  :: run_inversion=.FALSE.
  logical  :: run_Diago=.FALSE.
+ logical  :: load_Diago=.FALSE.
  logical  :: run_Slepc=.FALSE.
  !
  ! Solvers (Diago)
  !=========
  logical  :: BSS_write_eig_2_db
- character(1) :: BSS_ydiago_solver     ! "s" for scalapack / "e" for elpa
+ character(1) :: BSS_ldiago_solver     ! "s" for scalapack / "e" for elpa
  !
  ! Solvers (Haydock)
  !=========
@@ -137,6 +143,7 @@ module BS_solvers
  ! Solvers (Temporary (LARGE) arrays to be used in solvers ONLY)
  !=========
  integer    , allocatable :: BSS_eh_table(:,:)
+ integer    , allocatable :: BSS_eh_table_m1(:,:,:,:,:)
  real(SP)   , allocatable :: BSS_eh_E(:)
  real(SP)   , allocatable :: BSS_eh_f(:)
  real(SP)   , allocatable :: BSS_eh_W(:)
@@ -149,6 +156,9 @@ module BS_solvers
  complex(SP), allocatable :: BS_diagonal(:)
  complex(SP), allocatable :: BS_mat(:,:)
  !
+ ! Eigenvalues degeneracy treshold
+ real(SP) :: deg_exc_thrshld  
+ !
  interface 
    !
    integer function variables_BS(ID,iq,local_desc,CLOSE_the_menu,X)
@@ -163,8 +173,8 @@ integer function variables_BS(ID,iq,local_desc,CLOSE_the_menu,X)
    end function
    !
    integer function io_BSS_diago(iq,i_BS_mat,ID,X_static,bsE,bsRl,BsRr,BsE_corr,&
-   &                        bsL_magn,bsR_magn,bsL_kerr,bsR_kerr,bsR_dich,bsR_pl,&
-   &                        BS_mat,write_ev)
+   &                        bsL_magn,bsR_magn,bsL_kerr,bsR_kerr,bsR_dich,&
+   &                        bsR_mespin,bsR_meorb,bsR_pl,BS_mat,write_ev)
      use pars,          ONLY:SP
      use X_m,           ONLY:X_t
      implicit none
@@ -172,11 +182,13 @@ integer function io_BSS_diago(iq,i_BS_mat,ID,X_static,bsE,bsRl,BsRr,BsE_corr,&
      integer                      :: iq,ID,i_BS_mat
      complex(SP)        ,optional :: bsE(:)
      real(SP),   pointer,optional :: BsE_corr(:,:),bsR_pl(:,:)
-     complex(SP),pointer,optional :: bsL_kerr(:),bsR_kerr(:),bsR_dich(:,:),bsL_magn(:,:),bsR_magn(:,:),bsRl(:),bsRr(:)
+     complex(SP),pointer,optional :: bsL_kerr(:),bsR_kerr(:),bsR_dich(:,:),&
+&                                    bsR_mespin(:,:),bsR_meorb(:,:),bsL_magn(:,:),&
+&                                    bsR_magn(:,:),bsRl(:),bsRr(:)
      complex(SP)        ,optional :: BS_mat(:,:)
      logical, optional            :: write_ev
    end function
-   !
+
    subroutine write_bs_ev_par(iq, i_BS_mat, neigs_this_cpu, neig_shift, BS_VR, BS_VL)
      use pars,           ONLY:SP
      implicit none
@@ -196,18 +208,26 @@ integer function io_BSS_Haydock(ID,iq,it,reached_treshold,mode,Af,Bf,Cf,Vnm1,Vn,
    !
  end interface
  !
- interface BSS_resize
+ interface BSS_resize_and_save
    module procedure resizeR1
    module procedure resizeR1p, resizeR2p
    module procedure resizeC1,  resizeC2
    module procedure resizeC1p, resizeC2p
    module procedure resizeC22p
  end interface
+ !
+ interface BSS_resize_only
+   module procedure resize_onlyR1
+   module procedure resize_onlyR1p, resize_onlyR2p
+   module procedure resize_onlyC1,  resize_onlyC2
+   module procedure resize_onlyC1p, resize_onlyC2p
+   module procedure resize_onlyC22p
+ end interface
 
  !
  contains
    !
-   ! Start BSS_resize interface
+   ! Start BSS_resize_and_save interface
    !
    subroutine resizeR1(VAR_R1)
      !
@@ -227,7 +247,7 @@ subroutine resizeR1p(VAR_R1)
      real(SP)                         :: TMP_R1(BSS_n_eig)
      !
      TMP_R1=VAR_R1(:BSS_n_eig)
-     deallocate(VAR_R1)
+     deallocate(VAR_R1); nullify(VAR_R1)
      allocate(VAR_R1(BSS_n_eig))
      VAR_R1=TMP_R1
      !
@@ -296,7 +316,7 @@ subroutine resizeC2p(VAR_C2)
      dim1=size(VAR_C2,2)
      allocate(TMP_C2(BSS_n_eig,dim1))
      TMP_C2=VAR_C2(:BSS_n_eig,:)
-     deallocate(VAR_C2)
+     deallocate(VAR_C2); nullify(VAR_C2)
      allocate(VAR_C2(BSS_n_eig,dim1))
      VAR_C2=TMP_C2
      !
@@ -315,7 +335,67 @@ subroutine resizeC22p(VAR_C22,both)
      !
    end subroutine resizeC22p
    !
-   ! End BSS_resize interface
+   ! End BSS_resize_and_save interface
+   !
+   !
+   ! Start BSS_resize_only interface
+   !
+   subroutine resize_onlyR1(VAR_R1)
+     real(SP), allocatable, intent(inout) :: VAR_R1(:)
+     deallocate(VAR_R1)
+     allocate(VAR_R1(BSS_n_eig))
+   end subroutine resize_onlyR1
+   !
+   subroutine resize_onlyR1p(VAR_R1)
+     real(SP), pointer, intent(inout) :: VAR_R1(:)
+     deallocate(VAR_R1); nullify(VAR_R1)
+     allocate(VAR_R1(BSS_n_eig))
+   end subroutine resize_onlyR1p
+   !
+   subroutine resize_onlyR2p(VAR_R2)
+     real(SP), pointer, intent(inout) :: VAR_R2(:,:)
+     integer :: dim1
+     dim1=size(VAR_R2(:,1))
+     deallocate(VAR_r2); nullify(VAR_R2)
+     allocate(VAR_R2(dim1,BSS_n_eig))
+   end subroutine resize_onlyR2p
+   !
+   subroutine resize_onlyC1(VAR_C1)
+     complex(SP), allocatable, intent(inout) :: VAR_C1(:)
+     deallocate(VAR_C1)
+     allocate(VAR_C1(BSS_n_eig))
+   end subroutine resize_onlyC1
+   !
+   subroutine resize_onlyC1p(VAR_C1)
+     complex(SP), pointer, intent(inout) :: VAR_C1(:)
+     deallocate(VAR_C1); nullify(VAR_C1)
+     allocate(VAR_C1(BSS_n_eig))
+   end subroutine resize_onlyC1p
+   !
+   subroutine resize_onlyC2(VAR_C2)
+     complex(SP), allocatable, intent(inout) :: VAR_C2(:,:)
+     integer :: dim1
+     dim1=size(VAR_C2,2)
+     deallocate(VAR_C2)
+     allocate(VAR_C2(BSS_n_eig,dim1))
+   end subroutine resize_onlyC2
+   !
+   subroutine resize_onlyC2p(VAR_C2)
+     complex(SP), pointer, intent(inout) :: VAR_C2(:,:)
+     integer :: dim1
+     dim1=size(VAR_C2,2)
+     deallocate(VAR_C2); nullify(VAR_C2)
+     allocate(VAR_C2(BSS_n_eig,dim1))
+   end subroutine resize_onlyC2p
+   !
+   subroutine resize_onlyC22p(VAR_C22,both)
+     logical, intent(in)                 :: both
+     complex(SP), pointer, intent(inout) :: VAR_C22(:,:)
+     deallocate(VAR_C22); nullify(VAR_C22)
+     allocate(VAR_C22(BSS_n_eig,BSS_n_eig))
+   end subroutine resize_onlyC22p
+   !
+   ! End BSS_resize_only interface
    !
    subroutine BSS_alloc( )
      use pars,      ONLY:IP,cZERO,rZERO
diff --git a/src/modules/mod_DICHROISM.F b/src/modules/mod_DICHROISM.F
index d3208473e2..fa86538e88 100644
--- a/src/modules/mod_DICHROISM.F
+++ b/src/modules/mod_DICHROISM.F
@@ -5,14 +5,21 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 module DICHROISM
  !
  use pars,    ONLY:SP
- use BS,      ONLY:l_BS_dichroism
+ use BS,      ONLY:l_BS_dichroism,l_BS_mespin,l_BS_meorb
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  complex(SP), allocatable :: BSS_dipoles_dich(:,:)
+ complex(SP), allocatable :: BSS_dipoles_mespin(:,:)
+ complex(SP), allocatable :: BSS_dipoles_meorb(:,:)
  !
  contains
    !
@@ -28,5 +35,31 @@ subroutine BSS_DICH_free( )
      if (.not.l_BS_dichroism) return
      YAMBO_FREE(BSS_dipoles_dich)
    end subroutine
+
+   subroutine BSS_MEspin_alloc( )
+     use pars,       ONLY:cZERO
+     use BS,         ONLY:BS_dip_size,BS_H_dim
+     if (.not.l_BS_mespin) return
+     YAMBO_ALLOC(BSS_dipoles_mespin,(BS_dip_size,BS_H_dim))
+     BSS_dipoles_mespin = cZERO
+   end subroutine
+   !
+   subroutine BSS_MEspin_free( )
+     if (.not.l_BS_mespin) return
+     YAMBO_FREE(BSS_dipoles_mespin)
+   end subroutine
+   !
+   subroutine BSS_MEorb_alloc( )
+     use pars,       ONLY:cZERO
+     use BS,         ONLY:BS_dip_size,BS_H_dim
+     if (.not.l_BS_meorb) return
+     YAMBO_ALLOC(BSS_dipoles_meorb,(BS_dip_size,BS_H_dim))
+     BSS_dipoles_meorb = cZERO
+   end subroutine
+   !
+   subroutine BSS_MEorb_free( )
+     if (.not.l_BS_meorb) return
+     YAMBO_FREE(BSS_dipoles_meorb)
+   end subroutine
    !
 end module
diff --git a/src/modules/mod_DIPOLES.F b/src/modules/mod_DIPOLES.F
index fa94f55380..ca393c4079 100644
--- a/src/modules/mod_DIPOLES.F
+++ b/src/modules/mod_DIPOLES.F
@@ -5,15 +5,20 @@
 !
 ! Authors (see AUTHORS file for details): DS AM
 !
+! headers
+!
+#include <dev_defs.h>
+#include <y_memory.h>
+!
 module DIPOLES
  !
  use pars,          ONLY:SP,lchlen,schlen
  use units,         ONLY:HA2EV
  use descriptors,   ONLY:IO_desc
  use gpu_m,         ONLY:have_gpu
+ use y_memory_alloc
  !
-#include <dev_defs.h>
-#include <y_memory.h>
+ implicit none
  !
  ! Logicals
  !
@@ -23,6 +28,7 @@ module DIPOLES
  !
  character(schlen) :: shifted_grids_obs=""
  character(schlen) :: covariant_obs    =""
+ character(schlen) :: der_k_obs        =""
  character(schlen) :: g_space_obs      =""
  character(schlen) :: x_space_obs      =""
  character(schlen) :: specific_obs     =""
@@ -31,6 +37,7 @@ module DIPOLES
  !
  integer                  :: num_shifted_grids
  logical                  :: eval_OVERLAPS
+ logical                  :: l_force_SndOrd
  ! 
  ! Descriptor
  !
@@ -63,8 +70,8 @@ module DIPOLES
    integer    :: ng
    real(SP)   :: ehe(2)
    real(SP)   :: q0(3)
-   real(SP)   :: Energy_treshold
-   real(SP)   :: Energy_treshold_vv
+   real(SP)   :: Energy_threshold
+   real(SP)   :: Energy_threshold_vv
    logical    :: v_eff
    logical    :: Vnl_included
    logical    :: bands_ordered
@@ -91,8 +98,8 @@ subroutine DIPOLES_duplicate(Dip_in,Dip_out)
      Dip_out%ng        = Dip_in%ng
      Dip_out%ehe       = Dip_in%ehe
      Dip_out%q0        = Dip_in%q0
-     Dip_out%Energy_treshold    = Dip_in%Energy_treshold
-     Dip_out%Energy_treshold_vv = Dip_in%Energy_treshold_vv
+     Dip_out%Energy_threshold    = Dip_in%Energy_threshold
+     Dip_out%Energy_threshold_vv = Dip_in%Energy_threshold_vv
      Dip_out%v_eff              = Dip_in%v_eff
      Dip_out%Vnl_included       = Dip_in%Vnl_included
      Dip_out%bands_ordered      = Dip_in%bands_ordered
@@ -111,8 +118,8 @@ subroutine DIPOLES_reset(Dip)
      Dip%ng=0
      Dip%ehe=(/-1._SP,-1._SP/)/HA2EV
      Dip%q0=(/1._SP,0._SP,0._SP/)
-     Dip%Energy_treshold=1.E-5_SP/HA2EV
-     Dip%Energy_treshold_vv=1.E+5_SP/HA2EV
+     Dip%Energy_threshold=1.E-5_SP/HA2EV
+     Dip%Energy_threshold_vv=1.E+5_SP/HA2EV
      Dip%v_eff=.false.
      Dip%Vnl_included=.false.
      Dip%bands_ordered=.true.
diff --git a/src/modules/mod_D_lattice.F b/src/modules/mod_D_lattice.F
index b64ec25fa9..1aaa37cef4 100644
--- a/src/modules/mod_D_lattice.F
+++ b/src/modules/mod_D_lattice.F
@@ -5,13 +5,18 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<dev_defs.h>
+#include<y_memory.h>
+!
 module D_lattice
  !
  use pars,        ONLY:LP,SP,lchlen,cONE,cZERO,cI,schlen
  use devxlib
+ use y_memory_alloc
  !
-#include<dev_defs.h>
-#include<y_memory.h>
+ implicit none
  !
  ! Non periodic directions
  !
@@ -25,6 +30,7 @@ module D_lattice
  real(SP) :: T_elecs
  real(SP) :: T_holes
  real(SP) :: Bose_Temp
+ real(SP) :: Boltz_Temp
  real(SP) :: input_GS_Tel
  logical  :: input_Tel_is_negative
  !
@@ -121,15 +127,15 @@ subroutine symmetry_group_table(msg_where)
          m=matmul(dl_sop(:,:,i1),dl_sop(:,:,i2))
          do i3=1,nsym
            if (all(abs(m-dl_sop(:,:,i3))<=1.E-5)) then
-             if (sop_tab(i1,i2)/=0) call error('[SYMs] check the input symmetries!')        
+             if (sop_tab(i1,i2)/=0) call error('[SYMs] Two sym pairs lead to the same symmetry') 
              sop_tab(i1,i2)=i3
              if (sop_tab(i1,i2)==1) sop_inv(i1)=i2                
            endif
          enddo
-         if (sop_tab(i1,i2)==0) call error('[SYMs] check the input symmetries!')    
+         if (sop_tab(i1,i2)==0) call error('[SYMs] Sop tab is not complete')
        enddo
      enddo
-     if (any(sop_inv==0)) call error('[SYMs] check the input symmetries!')
+     if (any(sop_inv==0)) call error('[SYMs] Sop inv missing for some symmetries')
      call msg(msg_where,'Group table correct   ',.true.)
      !
 #ifdef _GPU
diff --git a/src/modules/mod_ELPH.F b/src/modules/mod_ELPH.F
index 1f7a81845f..694adf285a 100644
--- a/src/modules/mod_ELPH.F
+++ b/src/modules/mod_ELPH.F
@@ -5,21 +5,27 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 module ELPH
  !
  use pars,           ONLY:SP,rZERO,schlen
  use electrons,      ONLY:levels,E_reset
  use descriptors,    ONLY:IO_desc
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  ! Dumensions
- integer   :: ph_modes=0
- integer   :: elph_nb=0
+ integer   :: ph_modes
+ integer   :: elph_bands(2)
+ integer   :: elph_nb
  integer   :: elph_nk_bz=0
  integer   :: elph_nQ=0
  integer   :: elph_nQ_used=0
- integer   :: elph_branches(2)=0
+ integer   :: elph_branches(2)
  !
  logical   :: elph_use_q_grid=.FALSE.
  logical   :: elph_grids_are_expanded=.FALSE.
@@ -113,8 +119,10 @@ module ELPH
  !
  ! Databases
  logical :: l_GKKP_hosts_bare_dV=.FALSE.
- logical :: l_GKKP_DB_exists=.FALSE.
+ logical :: l_GKKP_hosts_DW     =.TRUE.
+ logical :: l_GKKP_DB_exists    =.FALSE.
  logical :: l_GKKP_expanded_DB_exists=.FALSE.
+ logical :: l_GKKP_debug=.FALSE.
  character(schlen) :: gkkp_db ! Database type for el-ph calculation (gkkp | expanded | genFroh )
  !
  contains
@@ -152,8 +160,8 @@ subroutine FineGd_E_kpq_alloc(E)
      !
      type(E_kpq_fine_grid):: E
      !
-     YAMBO_ALLOC(E%E_kpq,(elph_nb,E%nk,n_sp_pol,E%nq_around))
-     YAMBO_ALLOC(E%f_kpq,(elph_nb,E%nk,n_sp_pol,E%nq_around))
+     YAMBO_ALLOC(E%E_kpq,(elph_bands(1):elph_bands(2),E%nk,n_sp_pol,E%nq_around))
+     YAMBO_ALLOC(E%f_kpq,(elph_bands(1):elph_bands(2),E%nk,n_sp_pol,E%nq_around))
      !
    end subroutine
    !
diff --git a/src/modules/mod_EXCPH.F b/src/modules/mod_EXCPH.F
new file mode 100644
index 0000000000..62590a51e6
--- /dev/null
+++ b/src/modules/mod_EXCPH.F
@@ -0,0 +1,353 @@
+!
+!        Copyright (C) 2000-2021 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): AM
+!
+! This file is distributed under the terms of the GNU
+! General Public License. You can redistribute it and/or
+! modify it under the terms of the GNU General Public
+! License as published by the Free Software Foundation;
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will
+! be useful, but WITHOUT ANY WARRANTY; without even the
+! implied warranty of MERCHANTABILITY or FITNESS FOR A
+! PARTICULAR PURPOSE.  See the GNU General Public License
+! for more details.
+!
+! You should have received a copy of the GNU General Public
+! License along with this program; if not, write to the Free
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston,
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+! headers
+!
+#include<y_memory.h>
+!
+module EXCPH
+ !
+ use pars,           ONLY:SP,schlen,lchlen,cZERO,rZERO
+ use BS_solvers,     ONLY:BSS_n_eig
+ use BS,             ONLY:BS_H_dim
+ use ELPH,           ONLY:ph_modes
+ use R_lattice,      ONLY:bz_samp
+ use y_memory_alloc
+ !
+ implicit none
+ !
+ ! Exciton-Phonon 
+ !================
+ !
+ ! ... GKKP & EXC-PH Interaction
+ !
+ logical  :: l_const_elph,l_abs_elph,l_abs_exc,l_elec_only,l_hole_only 
+ complex(SP), allocatable :: EXCPH_Gkkp(:,:,:)
+ real(SP),    allocatable :: EXCPH_Gkkp_sq(:,:,:)
+ integer                  :: n_exc_in
+ integer                  :: n_exc_out
+ integer                  :: EXCPH_states(2)    ! External states
+ integer                  :: EXCPH_sum(2)       ! Internal states ("virtual")
+ integer                  :: EXCPH_interband(2) ! Interband states
+ real(SP), allocatable    :: EXCPH_q(:,:)
+ real(SP)                 :: EXC_q0(3)
+ !
+ ! L_in and L_out paths
+ !
+ character(lchlen)       :: Lin_path 
+ character(lchlen)       :: Lout_path 
+ !
+ ! L types
+ !
+ character(schlen)       :: L_kind_in
+ character(schlen)       :: L_kind_out 
+ !
+ ! ExcPH gkkp
+ !
+ complex(SP), allocatable :: A_rot(:,:),Xi(:,:)
+ integer,     allocatable :: BSS_eh_table_m1_in(:,:,:,:,:)
+ integer,     allocatable :: k_plus_q_table(:,:)
+ complex(SP), allocatable :: BS_E_in(:),BS_E(:)
+ real(SP),    allocatable :: BS_all_E(:,:)
+ complex(SP), allocatable :: BS_mat_in(:,:)
+ complex(SP), allocatable :: BS_mat_out(:,:)
+ integer,     allocatable :: BSS_eh_table_in(:,:)
+ complex(SP), pointer :: BS_R_left(:),BS_R_right(:)
+ real(SP),    allocatable :: BS_R(:)
+ !
+ ! Debug
+ !
+ real(SP) :: EXCPH_kthresh 
+ !
+ ! ... Sigma
+ !
+ complex(SP), allocatable :: BS_Sat_E(:,:,:)
+ complex(SP), allocatable :: BS_Sat_E_PH_abs(:,:,:)
+ real(SP),    target, allocatable :: BS_Sat_WEIGHT(:,:,:)
+ real(SP),    target, allocatable :: BS_Sat_WEIGHT_PH_abs(:,:,:)
+ real(SP),    allocatable :: EXCPH_Renorm(:)
+ real(SP),    allocatable :: EXCPH_Renorm_PH_abs(:)
+ real(SP),    allocatable :: EXCPH_Renorm_PL(:)
+ real(SP),    allocatable :: EXCPH_Renorm_PL_PH_abs(:) 
+ complex(SP), allocatable :: BS_EXCPH_E(:)
+ real(SP),    allocatable :: ph_EXCPH_E(:)
+ complex(SP), allocatable :: EXCPH_Sigma_c_diag(:,:)
+ complex(SP), allocatable :: EXCPH_Sigma_c(:,:,:)
+ real(SP) :: L_damping
+ real(SP) :: EXCPH_deltaE_treshold
+ !
+ ! Minimum excitonic energy
+ !
+ real(SP) :: min_EXC_E
+ !
+ ! Double-grid
+ !
+ integer  :: ID_EXC,ID_EXC_obj
+ real(SP) :: min_EXC_E_DbGd
+ integer  :: min_pos_E_DbGd(1)
+ real(SP), allocatable :: EXC_E_DbGd(:,:)
+ type(bz_samp) :: q_DbGd 
+ !
+ ! Life Times and Line Widths
+ !
+ real(SP), allocatable :: EXC_Lifetime(:)
+ real(SP), allocatable :: EXC_LineWidth(:)
+ !
+ ! DB Q-mapping in case symmetries are removed
+ !
+ integer, allocatable :: DB_Q_map(:)
+ !
+ ! Control flags
+ !
+ logical       :: l_DbGd_WEIGHTs ! Double-Grid for Satellite weights
+ logical       :: l_DbGd_PH_only ! Double-Grid only for phonon energies
+ logical       :: l_no_matrix_elements  ! Set EXC-PH matrix elements to 1 (in lifetime and satellites)
+ logical       :: l_EXCPH_offdiago ! Turn on off-diagonal part of the EXC-PH self-energy
+ !
+ ! Excitonic occupations
+ !
+ real(SP), allocatable :: EXC_occ(:,:)
+ integer  :: ID_occ,ID_occ_obj
+ real(SP), allocatable :: EXC_occ_DbGd(:,:)
+ real(SP) :: alphaQ
+ !
+ contains
+   !
+   subroutine BSE_in_alloc(k)
+     !      
+     ! pre-allocs...
+     !
+     use R_lattice,      ONLY:bz_samp
+     use electrons,      ONLY:n_sp_pol 
+     use BS,             ONLY:BS_bands
+     !
+#include<y_memory.h>
+     !
+     type(bz_samp) ::k
+     ! 
+     YAMBO_ALLOC(BS_E_in,(BSS_n_eig))
+     YAMBO_ALLOC(BS_mat_in,(BS_H_dim,EXCPH_states(1):EXCPH_states(2)))
+     YAMBO_ALLOC(BSS_eh_table_in,(BS_H_dim,5))
+     YAMBO_ALLOC(BSS_eh_table_m1_in,(k%nbz,BS_bands(1):BS_bands(2),BS_bands(1):BS_bands(2),n_sp_pol,n_sp_pol))
+     !
+     BS_mat_in=cZERO
+     BS_E_in  =cZERO
+     BSS_eh_table_in     = 0
+     BSS_eh_table_m1_in  = 0
+     !
+   end subroutine BSE_in_alloc
+   !
+   subroutine BSE_out_alloc()
+     implicit none
+     !
+     YAMBO_ALLOC(BS_mat_out,(BS_H_dim,EXCPH_sum(1):EXCPH_sum(2)))
+     BS_mat_out=cZERO
+     !
+   end subroutine BSE_out_alloc
+   ! 
+   subroutine Sat_alloc()
+     implicit none
+     YAMBO_ALLOC(BS_Sat_E,(ph_modes,EXCPH_sum(1):EXCPH_sum(2),EXCPH_states(1):EXCPH_states(2)))
+     YAMBO_ALLOC(BS_Sat_E_PH_abs,(ph_modes,EXCPH_sum(1):EXCPH_sum(2),EXCPH_states(1):EXCPH_states(2)))
+     YAMBO_ALLOC(BS_Sat_WEIGHT,(ph_modes,EXCPH_sum(1):EXCPH_sum(2),EXCPH_states(1):EXCPH_states(2)))
+     YAMBO_ALLOC(BS_Sat_WEIGHT_PH_abs,(ph_modes,EXCPH_sum(1):EXCPH_sum(2),EXCPH_states(1):EXCPH_states(2)))
+     !
+     BS_Sat_E            =rZERO
+     BS_Sat_E_PH_abs     =rZERO
+     BS_Sat_WEIGHT       =rZERO
+     BS_Sat_WEIGHT_PH_abs=rZERO
+     !
+   end subroutine Sat_alloc
+   !
+   subroutine Sat_free()
+     implicit none
+     YAMBO_FREE(BS_Sat_E)
+     YAMBO_FREE(BS_Sat_E_PH_abs)
+     YAMBO_FREE(BS_Sat_WEIGHT)
+     YAMBO_FREE(BS_Sat_WEIGHT_PH_abs)
+   end subroutine Sat_free
+   !
+   subroutine Renorm_alloc()
+     implicit none
+     YAMBO_ALLOC(EXCPH_Renorm,(EXCPH_states(2)))
+     YAMBO_ALLOC(EXCPH_Renorm_PH_abs,(EXCPH_states(2)))
+     YAMBO_ALLOC(EXCPH_Renorm_PL,(EXCPH_states(2)))
+     YAMBO_ALLOC(EXCPH_Renorm_PL_PH_abs,(EXCPH_states(2)))
+     !
+     EXCPH_Renorm       =rZERO
+     EXCPH_Renorm_PH_abs=rZERO
+     EXCPH_Renorm_PL       =rZERO
+     EXCPH_Renorm_PL_PH_abs=rZERO
+     !
+   end subroutine Renorm_alloc
+   !
+   subroutine Renorm_free()
+     implicit none
+     YAMBO_FREE(EXCPH_Renorm)
+     YAMBO_FREE(EXCPH_Renorm_PH_abs)
+     YAMBO_FREE(EXCPH_Renorm_PL)
+     YAMBO_FREE(EXCPH_Renorm_PL_PH_abs)
+   end subroutine Renorm_free
+   !
+   subroutine EXCPH_gkkp_alloc()
+     implicit none
+     YAMBO_ALLOC(EXCPH_Gkkp,(ph_modes,EXCPH_sum(1):EXCPH_sum(2),EXCPH_states(1):EXCPH_states(2)))
+     YAMBO_ALLOC(EXCPH_Gkkp_sq,(ph_modes,EXCPH_sum(1):EXCPH_sum(2),EXCPH_states(1):EXCPH_states(2)))
+     YAMBO_ALLOC(Xi,(BS_H_dim,2))    
+     EXCPH_gkkp          =cZERO
+     EXCPH_gkkp_sq       =rZERO
+   end subroutine EXCPH_gkkp_alloc   
+   !
+   subroutine EXCPH_gkkp_free
+     implicit none
+     YAMBO_FREE(EXCPH_Gkkp)
+     YAMBO_FREE(EXCPH_Gkkp_sq)
+     YAMBO_FREE(Xi)    
+   end subroutine EXCPH_gkkp_free
+   !
+   subroutine RESIDUALS_and_EIGENVALUES_free
+     implicit none
+     !
+     YAMBO_FREE(BS_all_E)
+     YAMBO_FREE_P(BS_R_left)
+     YAMBO_FREE_P(BS_R_right)
+     YAMBO_FREE(BS_R)
+     if(allocated(BS_E)) then
+        YAMBO_FREE(BS_E)
+     endif
+     !
+   end subroutine RESIDUALS_and_EIGENVALUES_free
+   !
+   subroutine RESIDUALS_and_EIGENVALUES_alloc(q)
+     use R_lattice,      ONLY:bz_samp
+     implicit none
+     type(bz_samp) ::q
+     if(.not.allocated(BS_E)) then
+       YAMBO_ALLOC(BS_E,(BSS_n_eig))
+     endif
+     YAMBO_ALLOC_P(BS_R_left,(BSS_n_eig))
+     YAMBO_ALLOC_P(BS_R_right,(BSS_n_eig))
+     YAMBO_ALLOC(BS_R,(BSS_n_eig))
+     YAMBO_ALLOC(BS_E_in,(EXCPH_states(1):EXCPH_states(2)))
+     YAMBO_ALLOC(BS_all_E,(BSS_n_eig,q%nibz))
+   end subroutine RESIDUALS_and_EIGENVALUES_alloc
+   !
+   subroutine BSE_alloc(k,q)
+     !      
+     use electrons,      ONLY:n_sp_pol      
+     use BS_solvers,     ONLY:BS_mat,BSS_eh_table,BSS_eh_table_m1,BS_H_dim,BSS_n_eig
+     use BS,             ONLY:BS_H_dim,BS_bands,BSqpts
+     use R_lattice,      ONLY:bz_samp
+     !
+#include<y_memory.h>
+     !
+     type(bz_samp) ::k,q
+     !      
+     ! pre-allocs...
+     !
+     YAMBO_ALLOC(BS_mat,(BS_H_dim,BSS_n_eig))
+     YAMBO_ALLOC(BSS_eh_table_m1,(k%nbz,BS_bands(1):BS_bands(2),BS_bands(1):BS_bands(2),n_sp_pol,n_sp_pol))
+     !
+     if(.not.allocated(BS_E)) then
+        YAMBO_ALLOC(BS_E,(BSS_n_eig))
+     endif
+     YAMBO_ALLOC(k_plus_q_table,(k%nbz,q%nbz))
+     YAMBO_ALLOC(BSqpts,(3,q%nibz))
+     !
+     BS_mat=cZERO
+     BS_E=cZERO
+     !
+     k_plus_q_table  = 0
+     BSS_eh_table_m1 = 0
+     !
+   end subroutine BSE_alloc
+   !
+   subroutine BSE_free
+     use BS_solvers,     ONLY:BS_mat,BSS_eh_table,BSS_eh_table_m1,BS_H_dim,BSS_n_eig
+     use BS,             ONLY:BSqpts
+     !
+#include<y_memory.h>
+     !
+     YAMBO_FREE(BS_mat)
+     YAMBO_FREE(BSS_eh_table)
+     YAMBO_FREE(BSS_eh_table_m1)
+     if(allocated(BS_E)) then
+       YAMBO_FREE(BS_E)
+     endif
+     YAMBO_FREE(k_plus_q_table)
+     YAMBO_FREE(BSqpts)
+   end subroutine BSE_free
+   !
+   subroutine BSE_in_free
+     implicit none
+     YAMBO_FREE(BS_E_in)
+     YAMBO_FREE(BS_mat_in)
+     YAMBO_FREE(BSS_eh_table_in)
+     YAMBO_FREE(BSS_eh_table_m1_in)
+   end subroutine BSE_in_free
+   !
+   subroutine build_Q_map(q)
+    use R_lattice,      ONLY:bz_samp
+    use zeros,          ONLY:k_iku_zero
+    use vec_operate,    ONLY:v_is_zero,c2a,rlu_v_is_zero
+    use ELPH,           ONLY:PH_qpt
+    use com,            ONLY:msg
+    !
+    implicit none
+    type(bz_samp) ::q
+    !
+    integer :: iq,idb
+    real(SP) :: v_rlu(3)
+    !
+    YAMBO_ALLOC(DB_Q_map,(q%nbz))
+    !
+    ! In this case nibz=nbz
+    !
+    DB_Q_map=0
+    do iq=1,q%nbz
+      do idb=1,q%nbz
+        v_rlu=q%ptbz(iq,:)-PH_qpt(idb,:)
+        !if(.not.v_is_zero(v_rlu,zero_=k_iku_zero)) cycle
+        call c2a(v_in=v_rlu,mode='ki2a')
+        if(.not.rlu_v_is_zero(v_rlu)) cycle
+        DB_Q_map(iq)=idb
+        exit
+      enddo
+    enddo
+    if(any(DB_Q_map==0)) then
+      do iq=1,q%nbz
+        if(DB_Q_map(iq)==0) call msg('s',' Q point not found in gkkp_expanded DBs ',q%ptbz(iq,:))
+      enddo 
+      call error("Q-point not found in gkkp_expanded DBs")
+    endif
+    !
+    do iq=1,q%nbz
+      if(DB_Q_map(iq)/=iq) then
+        call warning('Q-points in Yambo and gkkp_expanded DBs have a different order !')
+        exit
+      endif
+    enddo
+    ! 
+   end subroutine build_Q_map
+   !
+end module EXCPH
diff --git a/src/modules/mod_FFT.F b/src/modules/mod_FFT.F
index aaae357551..9d220b65b2 100644
--- a/src/modules/mod_FFT.F
+++ b/src/modules/mod_FFT.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<dev_defs.h>
+!
 !#ifdef _MKLGPU
 !include "mkl_dfti_omp_offload.f90"
 !#endif
@@ -16,7 +20,6 @@ module FFT_m
  use mklfft_gpu
 #endif
  !
-#include<dev_defs.h>
  !
  implicit none
  !
diff --git a/src/modules/mod_IO.F b/src/modules/mod_IO.F
index a711fd0675..38cad3c48c 100644
--- a/src/modules/mod_IO.F
+++ b/src/modules/mod_IO.F
@@ -38,6 +38,7 @@ module IO_m
  logical           :: io_OBSERVABLES
  logical           :: io_CARRIERs
  logical           :: io_GF
+ logical           :: io_V_bands
  logical           :: io_SC
  logical           :: io_BS_K
  logical           :: io_SCREEN
@@ -452,12 +453,12 @@ subroutine netcdf_call(status,ID,VAR,ID_VAR)
    integer function netcdf_dim_size(ID,DIMNAME)
      ! Returns the value of the specified dim of a variable
      integer,      intent(in)    :: ID
-     character(*), intent(inout) :: DIMNAME
+     character(*), intent(in)    :: DIMNAME
      integer                     :: nf90_err,NF90_DIM_ID
      netcdf_dim_size=0
      nf90_err=nf90_inq_dimid(io_unit(ID),DIMNAME,NF90_DIM_ID)
      if(nf90_err/=NF90_NOERR) return
-     nf90_err=nf90_inquire_dimension(io_unit(ID),NF90_DIM_ID, name = dimname, len = netcdf_dim_size)
+     nf90_err=nf90_inquire_dimension(io_unit(ID),NF90_DIM_ID, len = netcdf_dim_size)
    end function
    !
    integer function netcdf_dim(ID,DIM,dim_name)
diff --git a/src/modules/mod_MAGNONS.F b/src/modules/mod_MAGNONS.F
index aea16a2895..b2331f5ee4 100644
--- a/src/modules/mod_MAGNONS.F
+++ b/src/modules/mod_MAGNONS.F
@@ -5,12 +5,17 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 module MAGNONS
  !
  use pars,       ONLY:SP
  use BS,         ONLY:l_BS_magnons
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  complex(SP), allocatable :: BSS_dipoles_magn(:,:)
  !
diff --git a/src/modules/mod_MPA.F b/src/modules/mod_MPA.F
index 1acd838381..f3e4dce818 100644
--- a/src/modules/mod_MPA.F
+++ b/src/modules/mod_MPA.F
@@ -5,6 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): DALV,AF
 !
+! headers
+!
+#include <y_memory.h>
+#include <dev_defs.h>
+!
 !------------------------------------------------------------
 ! Multipole interpolation:
 !                         - analytical solution for 1-3 poles
@@ -34,9 +39,9 @@ module mpa_m
  use functions,       ONLY:NAN
  use matrix,          ONLY:PAR_matrix
  use gpu_m,           ONLY:have_gpu
+ use y_memory_alloc
  !
-#include <y_memory.h>
-#include <dev_defs.h>
+ implicit none
 
  complex(SP),      allocatable, target :: MPA_freqs(:)
  type(PAR_matrix), allocatable, target :: MPA_E_par(:)
diff --git a/src/modules/mod_Overlaps.F b/src/modules/mod_Overlaps.F
deleted file mode 100644
index 1bfec2272b..0000000000
--- a/src/modules/mod_Overlaps.F
+++ /dev/null
@@ -1,63 +0,0 @@
-!
-! License-Identifier: GPL
-!
-! Copyright (C) 2015 The Yambo Team
-!
-! Authors (see AUTHORS file for details): MG CA
-!
-module Overlaps_m
- !
- use pars,  ONLY:SP
-#include<y_memory.h>
- !
- complex(SP), allocatable :: S_det(:,:)        
- complex(SP), allocatable :: Sm1_plus(:,:,:,:)
- complex(SP), allocatable :: Sm1_minus(:,:,:,:)
- !
- interface
-   !
-   integer function io_Overlaps(X,Xen,ID,DIP_S)
-     !
-     use pars,          ONLY:SP
-     use X_m,           ONLY:X_t
-     use electrons,     ONLY:levels,n_sp_pol
-     use R_lattice,     ONLY:nXkbz
-     !
-     implicit none
-     type(X_t)   ::X
-     type(levels)::Xen
-     integer     ::ID
-     complex(SP),  optional :: DIP_S(X%ib(2),X%ib(2),6,Xen%nk,n_sp_pol)
-     !
-   end function io_Overlaps
-     !
- end interface
- !
- contains
-   !
-   subroutine S_alloc(Xen,Xk)
-     !
-     use R_lattice, ONLY:bz_samp
-     use electrons, ONLY:levels
-     use drivers,   ONLY:l_real_time
-     !
-     type(bz_samp), intent(in) :: Xk
-     type(levels),  intent(in) :: Xen
-     !
-     YAMBO_ALLOC(S_det,(3,Xk%nbz))
-     YAMBO_ALLOC(Sm1_plus,(3,maxval(Xen%nbf),maxval(Xen%nbf),Xk%nbz))
-     YAMBO_ALLOC(Sm1_minus,(3,maxval(Xen%nbf),maxval(Xen%nbf),Xk%nbz))
-     !
-   end subroutine S_alloc
-   !
-   subroutine S_dealloc()
-     !
-     use drivers,   ONLY:l_real_time
-     !
-     YAMBO_FREE(S_det)
-     YAMBO_FREE(Sm1_plus)
-     YAMBO_FREE(Sm1_minus)
-     !
-   end subroutine S_dealloc    
-   !
-end module
diff --git a/src/modules/mod_PHOTOLUM.F b/src/modules/mod_PHOTOLUM.F
index 620ee61f1f..80e740c979 100644
--- a/src/modules/mod_PHOTOLUM.F
+++ b/src/modules/mod_PHOTOLUM.F
@@ -5,11 +5,17 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 module PHOTOLUM
  !
- use BS,         ONLY:l_BS_photolum
+ use BS,         ONLY:l_BS_photolum,l_BS_trace
  use pars,       ONLY:SP
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  real(SP)                 :: PL_prefactor
  !
@@ -24,7 +30,7 @@ module PHOTOLUM
  ! BS dipoles
  !
  complex(SP), allocatable :: BSS_dipoles_PL(:,:)
- complex(SP), allocatable :: BSS_PL_f(:)
+ complex(SP), allocatable :: BSS_PL_f(:,:)
  !
  contains
    !
@@ -36,19 +42,17 @@ subroutine PL_init( )
      use BS_solvers, ONLY:BSS_n_freqs
      PL_prefactor =32._SP*pi**3*SPEED_OF_LIGHT*2._SP/3._SP*RL_vol/nqbz/(2._SP*pi)**3
      if (allocated(PL))  return
-     PL_weights=PL_weights/v_norm(PL_weights)
+     if (     l_BS_trace) PL_weights=PL_weights/v_norm(PL_weights)
+     if (.not.l_BS_trace) PL_weights=(/1._SP,0._SP,0._SP/)
    end subroutine
    !
    subroutine BSS_PL_alloc( )
      use pars,       ONLY:cZERO,rZERO
-     use BS,         ONLY:BS_H_dim,BS_K_dim
-     integer :: f_PL_dim
+     use BS,         ONLY:BS_H_dim,BS_K_dim,BS_dip_size
      if (.not.l_BS_photolum) return
-     YAMBO_ALLOC(BSS_dipoles_PL,(3,BS_H_dim))
+     YAMBO_ALLOC(BSS_dipoles_PL,(BS_dip_size,BS_H_dim))
      BSS_dipoles_PL = cZERO
-     f_PL_dim=BS_H_dim
-     if(BS_H_dim==BS_K_dim(1)) f_PL_dim=2*BS_K_dim(1)
-     YAMBO_ALLOC(BSS_PL_f,(BS_H_dim))
+     YAMBO_ALLOC(BSS_PL_f,(BS_H_dim,2))
      BSS_PL_f = rZERO
    end subroutine
    !
diff --git a/src/modules/mod_POL_FIT.F b/src/modules/mod_POL_FIT.F
index d177471c91..2c172d16d0 100644
--- a/src/modules/mod_POL_FIT.F
+++ b/src/modules/mod_POL_FIT.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 ! POL_Gilbreth_fit based on:
 !
 ! pfit.f90: Module for polynomial least-squares fitting
@@ -116,8 +120,9 @@ subroutine POL_Gilbreth_fit(x,y,sig,a)
     !                    The estimated error in a(i) is sqrt(Cov(a(i),a(i))).
     ! Notes:
     !   This routine uses a QR decomposition method, which should be more
-    !   numerically stable than solving the normal equations.
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
     real(DP), intent(in)  :: x(:), y(:), sig(:)
     real(DP), intent(out) :: a(:)
     real(DP), allocatable :: work(:), C(:,:), Q(:,:), R(:,:), b(:)
diff --git a/src/modules/mod_QP.F b/src/modules/mod_QP.F
index cb786e2d9f..efdd4bff43 100644
--- a/src/modules/mod_QP.F
+++ b/src/modules/mod_QP.F
@@ -5,12 +5,17 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 module QP_m
  !
  use descriptors,    ONLY:IO_desc,IO_desc_duplicate,IO_desc_reset
  use pars,           ONLY:SP,schlen,cZERO
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  ! Mixing for non perturbative calculations
  integer           :: SC_bands_mixed
diff --git a/src/modules/mod_QP_CTL.F b/src/modules/mod_QP_CTL.F
index 1637bf34a9..a34cc3582c 100644
--- a/src/modules/mod_QP_CTL.F
+++ b/src/modules/mod_QP_CTL.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 module QP_CTL_m
  !
  use pars,      ONLY:SP,schlen,lchlen,n_QP_actions_MAX
@@ -180,7 +184,9 @@ subroutine QP_CTL_fill(iA,i_spin,ID,i_field,CBm,VBM)
    end subroutine
    !
    subroutine QP_actions_reset
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
      integer :: i_a
      n_QP_actions=0
      do i_a=1,n_QP_actions_MAX
diff --git a/src/modules/mod_RT_control.F b/src/modules/mod_RT_control.F
index b59bc48756..7667ca5b4f 100644
--- a/src/modules/mod_RT_control.F
+++ b/src/modules/mod_RT_control.F
@@ -5,13 +5,18 @@
 !
 ! Authors (see AUTHORS file for details): DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 module RT_control
  !
  use pars,       ONLY:SP,lchlen,schlen,rZERO
  use units,      ONLY:AUT2FS
  use real_time,  ONLY:l_NE_dynamics,l_NE_with_fields,l_elph_scatt
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer                  :: CACHE_OBS_last_point
  real(SP)                 :: CACHE_OBS_INTERVAL_time
@@ -19,7 +24,7 @@ module RT_control
  integer                  :: CACHE_OBS_steps
  integer                  :: CACHE_OBS_steps_now 
  !
- integer,parameter :: N_RT_databases=11 
+ integer,parameter :: N_RT_databases=13 
  integer           :: RT_DB_ID(N_RT_databases)= 0
  integer           :: RT_current_DB_identifier ! \in [1,...] pointing to the DB under I/O
  character(lchlen) :: RT_DB_file_name(N_RT_databases) = " "
@@ -29,12 +34,14 @@ module RT_control
                                                      "none                      ",&
                                                      "OBSERVABLES               ",&
                                                      "G_lesser_RESTART          ",&
-                                                     "G_lesser                  ",&
+                                                     "G_lesser_RESTART_K_section",&
                                                      "carriers                  ",&
                                                      "THETA                     ",&
                                                      "REFERENCE                 ",&
-                                                     "G_lesser_RESTART_K_section",&
-                                                     "G_lesser_K_section        "/)
+                                                     "G_lesser                  ",&
+                                                     "G_lesser_K_section        ",&
+                                                     "V_bands                   ",&
+                                                     "V_bands_K_section         "/)
  integer                  :: NE_i_start_time    = 1   ! restart point
  integer                  :: last_it_evaluated  = 0   ! When RT_dynamics_jumps are non zero this is 
  !                                                    ! the last evaluated point
@@ -59,9 +66,14 @@ module RT_control
  type(RT_IO_t),save  :: OUTPUT_RT_IO_t
  type(RT_IO_t),save  :: Gless_RESTART_RT_IO_t
  type(RT_IO_t),save  :: Gless_RT_IO_t
+ type(RT_IO_t),save  :: Vbands_RT_IO_t
  !
  ! G_lesser I/O
  logical                  :: SAVE_G_history
+ logical                  :: SAVE_Vb_history
+ logical                  :: SAVE_Vb_floquet
+ integer                  :: Floquet_order
+ integer                  :: Vbands_IO_time(2)
  !
  ! J, P and M (spin) cache 
  !
@@ -77,6 +89,7 @@ module RT_control
  logical                 :: NEQ_Residuals=.FALSE.
  logical                 :: EQ_Transitions=.TRUE.
  logical                 :: EQ_NoOcc=.FALSE.
+ logical                 :: ALL_NoOcc=.FALSE.
  integer,      parameter :: N_RT_ctl_controls=3
  character(schlen)       :: RT_BSE_Occ_Mode="K"
  character(schlen)       :: RT_ctl_db(N_RT_ctl_controls)
@@ -132,12 +145,14 @@ integer function RT_return_db_ID(what)
      if (what=="M") RT_return_db_ID=4
      if (what=="OBSERVABLES") RT_return_db_ID=4
      if (what=="G_lesser_RESTART") RT_return_db_ID=5
-     if (what=="G_lesser") RT_return_db_ID=6
+     if (what=="G_lesser_RESTART_K_section") RT_return_db_ID=6
      if (what=="carriers") RT_return_db_ID=7
      if (what=="THETA") RT_return_db_ID=8
      if (what=="REFERENCE") RT_return_db_ID=9
-     if (what=="G_lesser_RESTART_K_section") RT_return_db_ID=10
+     if (what=="G_lesser") RT_return_db_ID=10
      if (what=="G_lesser_K_section") RT_return_db_ID=11
+     if (what=="V_bands") RT_return_db_ID=12
+     if (what=="V_bands_K_section") RT_return_db_ID=13
    end function
    !
    logical function RT_do_IO(what)
@@ -156,19 +171,23 @@ logical function RT_do_IO(what)
      if (what=="REFERENCE".and.RT_current_DB_identifier==9) RT_do_IO=.TRUE.
      !
      if (what=="DYN_VARS".and.(RT_current_DB_identifier>=4.and.RT_current_DB_identifier<=8)) RT_do_IO=.TRUE.
-     if (what=="DYN_VARS".and.(RT_current_DB_identifier==10.or.RT_current_DB_identifier==11)) RT_do_IO=.TRUE.
+     if (what=="DYN_VARS".and.(RT_current_DB_identifier>=10)                               ) RT_do_IO=.TRUE.
      if (what=="K_SECTION".and.(RT_current_DB_identifier==5.or.RT_current_DB_identifier==6&
-&                               .or.RT_current_DB_identifier==8.or.RT_current_DB_identifier==10&
-&                               .or.RT_current_DB_identifier==11)) RT_do_IO=.TRUE.
+&                               .or.RT_current_DB_identifier==8.or.RT_current_DB_identifier>=10)) RT_do_IO=.TRUE.
      !
      if (what=="ANY_G".and.(RT_current_DB_identifier==5.or.RT_current_DB_identifier==6)) RT_do_IO=.TRUE.
      if (what=="ANY_G".and.(RT_current_DB_identifier==10.or.RT_current_DB_identifier==11)) RT_do_IO=.TRUE.
-     if (what=="ANY_K_section".and.(RT_current_DB_identifier==10.or.RT_current_DB_identifier==11)) RT_do_IO=.TRUE.
+     if (what=="ANY_K_section".and.(RT_current_DB_identifier==6.or.RT_current_DB_identifier==11)) RT_do_IO=.TRUE.
      if (what=="G_lesser_RESTART".and.RT_current_DB_identifier==5) RT_do_IO=.TRUE.
-     if (what=="G_lesser".and.        RT_current_DB_identifier==6) RT_do_IO=.TRUE.
-     if (what=="G_lesser_RESTART_K_section".and.RT_current_DB_identifier==10) RT_do_IO=.TRUE.
+     if (what=="G_lesser_RESTART_K_section".and.RT_current_DB_identifier==6) RT_do_IO=.TRUE.
+     if (what=="G_lesser".and.        RT_current_DB_identifier==10) RT_do_IO=.TRUE.
      if (what=="G_lesser_K_section".and.        RT_current_DB_identifier==11) RT_do_IO=.TRUE.
      !
+     if (what=="ANY_Vb".and.(RT_current_DB_identifier==12.or.RT_current_DB_identifier==13)) RT_do_IO=.TRUE.
+     if (what=="ANY_K_section".and.      RT_current_DB_identifier==13) RT_do_IO=.TRUE.
+     if (what=="V_bands".and.            RT_current_DB_identifier==12) RT_do_IO=.TRUE.
+     if (what=="V_bands_K_section".and.  RT_current_DB_identifier==13) RT_do_IO=.TRUE.
+     !
    end function
    !
    subroutine RT_define_the_IO_DB_identifier(what)
@@ -178,12 +197,14 @@ subroutine RT_define_the_IO_DB_identifier(what)
           (what=="M" .or.what=="OBSERVABLES") .or. &
           (what=="D" .or.what=="OBSERVABLES") ) RT_current_DB_identifier=4
      if (what=="G_lesser_RESTART")              RT_current_DB_identifier=5
-     if (what=="G_lesser")  RT_current_DB_identifier=6
+     if (what=="G_lesser_RESTART_K_section")    RT_current_DB_identifier=6
      if (what=="carriers")  RT_current_DB_identifier=7
      if (what=="THETA")     RT_current_DB_identifier=8
      if (what=="REFERENCE") RT_current_DB_identifier=9
-     if (what=="G_lesser_RESTART_K_section")     RT_current_DB_identifier=10
+     if (what=="G_lesser")  RT_current_DB_identifier=10
      if (what=="G_lesser_K_section")             RT_current_DB_identifier=11
+     if (what=="V_bands")                        RT_current_DB_identifier=12
+     if (what=="V_bands_K_section")             RT_current_DB_identifier=13
    end subroutine
    !
    subroutine RT_control_alloc(what)
@@ -231,6 +252,9 @@ subroutine RT_control_alloc(what)
          Gless_RT_IO_t%Time=rZERO
        endif
        !
+       YAMBO_ALLOC(Vbands_RT_IO_t%Time,(NE_steps))
+       Vbands_RT_IO_t%Time=rZERO
+       !
      endif
      !
    end subroutine RT_control_alloc
@@ -250,6 +274,7 @@ subroutine RT_control_free(what)
        YAMBO_FREE(OBS_RT_IO_t%Time)
        YAMBO_FREE(CARR_RT_IO_t%Time)
        YAMBO_FREE(Gless_RESTART_RT_IO_t%Time)
+       YAMBO_FREE(Vbands_RT_IO_t%Time)
      endif
      !
    end subroutine RT_control_free
diff --git a/src/modules/mod_RT_lifetimes.F b/src/modules/mod_RT_lifetimes.F
index 972a45668f..49be3fe6f9 100644
--- a/src/modules/mod_RT_lifetimes.F
+++ b/src/modules/mod_RT_lifetimes.F
@@ -5,12 +5,17 @@
 !
 ! Authors (see AUTHORS file for details): AM 
 !
+! headers
+!
+#include<y_memory.h>
+!
 module RT_lifetimes
  !
  use pars,           ONLY:SP,rZERO,schlen
  use drivers,        ONLY:l_elph_scatt,l_elel_scatt,l_elphoton_scatt,l_phel_scatt
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type RT_lifetime
    logical                  :: active=.FALSE.
diff --git a/src/modules/mod_RT_occupations.F b/src/modules/mod_RT_occupations.F
index 42b08c54fd..4f2b5b3bb4 100644
--- a/src/modules/mod_RT_occupations.F
+++ b/src/modules/mod_RT_occupations.F
@@ -5,22 +5,28 @@
 !
 ! Authors (see AUTHORS file for details): AM 
 !
+! headers
+!
+#include<y_memory.h>
+!
 module RT_occupations
  !
  use pars,           ONLY:SP,rZERO,schlen
+ use electrons,      ONLY:n_sp_pol
  use drivers,        ONLY:l_elph_scatt,l_elel_scatt,l_elphoton_scatt,&
 &                         l_phel_scatt,l_phel_corr
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type RT_occupation
    character(schlen)        :: KIND
    logical                  :: active
    integer                  :: D1(2)
    integer                  :: D2
-   real(SP),    allocatable :: N(:,:)
-   real(SP),    allocatable :: N_ref(:,:)
-   real(SP),    allocatable :: dN(:,:)
+   real(SP),    allocatable :: N(:,:,:)
+   real(SP),    allocatable :: N_ref(:,:,:)
+   real(SP),    allocatable :: dN(:,:,:)
  end type RT_occupation
  !
  type(RT_occupation) :: RT_el_occ,RT_ho_occ,RT_ph_occ,RT_life_occ
@@ -84,12 +90,12 @@ subroutine RT_occupation_alloc(KIND,OCC,D1,D2)
      OCC%D1=D1
      OCC%D2=D2
      if (KIND=="electrons".or.KIND=="life") then
-       YAMBO_ALLOC(OCC%dN,(D1(1):D1(2),D2))
+       YAMBO_ALLOC(OCC%dN,(D1(1):D1(2),D2,n_sp_pol))
        OCC%dN=rZERO
      endif
      if (KIND=="electrons".or.KIND=="holes".or.KIND=="phonons") then
-       YAMBO_ALLOC(OCC%N,(D1(1):D1(2),D2))
-       YAMBO_ALLOC(OCC%N_ref,(D1(1):D1(2),D2))
+       YAMBO_ALLOC(OCC%N,(D1(1):D1(2),D2,n_sp_pol))
+       YAMBO_ALLOC(OCC%N_ref,(D1(1):D1(2),D2,n_sp_pol))
        OCC%N=rZERO
        OCC%N_ref=rZERO
      endif
diff --git a/src/modules/mod_RT_output.F b/src/modules/mod_RT_output.F
index a45fca3f20..fb26102f01 100644
--- a/src/modules/mod_RT_output.F
+++ b/src/modules/mod_RT_output.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 module RT_output_m
  !
  use drivers,    ONLY:l_elel_scatt,l_elph_scatt,l_elphoton_scatt
@@ -12,8 +16,9 @@ module RT_output_m
  use units,      ONLY:FS2AUT
  use global_XC,  ONLY:QP_SE_NONE
  use descriptors,ONLY:IO_desc
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer, parameter       :: N_MAX_RT_o_files=150
  integer, parameter       :: N_MAX_RT_X_order=9
diff --git a/src/modules/mod_R_lattice.F b/src/modules/mod_R_lattice.F
index 0dafcb238d..50fe17ebab 100644
--- a/src/modules/mod_R_lattice.F
+++ b/src/modules/mod_R_lattice.F
@@ -5,13 +5,18 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<dev_defs.h>
+#include<y_memory.h>
+!
 module R_lattice
  !
  use pars,        ONLY:SP,DP,schlen, IP
  use descriptors, ONLY:IO_desc
+ use y_memory_alloc
  !
-#include<dev_defs.h>
-#include<y_memory.h>
+ implicit none
  !
  integer, parameter:: MAX_kpts_to_report=50
  !
@@ -23,7 +28,7 @@ module R_lattice
  real(SP)          :: k_grid_b(3,3)
  logical           :: Xk_grid_is_uniform
  logical           :: Xk_grid_new_mapping
- character(schlen) :: k_GRIDS_string="X S B C"
+ character(schlen) :: k_GRIDS_string="X S B C minus_q"
  integer           :: k_GRIDS_vector(4)=(/1,1,1,1/)
  !
  ! Q/K-sampling
@@ -63,10 +68,16 @@ module R_lattice
    integer           :: nibz
    integer           :: nbz
    integer,  allocatable :: nstar(:)      ! n° of points in the star
-   integer,  allocatable :: star(:,:)     ! ik,ikstar --> is     is sends ik in ikstar
-   integer,  allocatable :: sstar(:,:)    ! ik_bz     --> ik,is  is sends ik in ikbz
-   integer,  allocatable :: s_table(:,:)  ! ik,is     --> sym@k_bz  revers of sstar(2)
-   integer,  allocatable :: k_table(:,:)  ! ik,is     --> ik_bz     revers of sstar(1)
+     ! Below "is" can be any symmetry operation, while is_used
+   integer,  allocatable :: star(:,:)     ! ik,ikstar --> is_used      is_used sends ik in ikstar
+   integer,  allocatable :: sstar(:,:)    ! ik_bz     --> ik,is_used   is_used sends ik in ikbz
+                                          !               istar        added dimension 3, index in the star
+   integer,  allocatable :: s_table(:,:)  ! ik,is     --> is_used      revers of sstar(2)
+   integer,  allocatable :: k_table(:,:)  ! ik,is     --> ik_bz        revers of sstar(1)
+   integer,  allocatable :: g_table(:,:)  ! ik,is     --> ig_index
+   integer,  allocatable :: grp_table(:,:)! ik,is_grp --> is      table of syms from small group to full list
+   integer,  allocatable :: grp_table_m1(:,:)! ik,is  --> is_grp  table of syms from full list to small group
+   integer,  allocatable :: grp_nsym(:)   ! ik        --> is number of syms in the small group
    real(SP), allocatable :: pt(:,:)      
    real(SP), allocatable :: ptbz(:,:)    
    real(SP), allocatable :: weights(:)   
@@ -95,6 +106,7 @@ module R_lattice
  ! Q/K sampling shadow tables
  !
  real(SP),allocatable :: k_pt(:,:)
+ integer ,allocatable :: k_sstar(:,:)
  real(SP),allocatable :: q_pt(:,:)
  integer ,allocatable :: q_sstar(:,:)
  !
@@ -124,13 +136,16 @@ module R_lattice
  integer              :: G_m_G_maxval   ! Maximum value of G_m_G
  integer ,allocatable :: ng_in_shell(:) ! Number of G in each shell
  integer ,allocatable :: g_rot(:,:)
+ real(SP),allocatable :: g_phs(:,:)
  integer ,allocatable :: G_m_G(:,:)
  integer ,allocatable :: minus_G(:)
+ integer ,allocatable :: G_mod_indx(:)
  real(SP),allocatable :: g_vec(:,:)
  real(SP),allocatable :: E_of_shell(:)  ! Energy associated at each shell
  ! GPUs
  real(SP),allocatable, target DEV_ATTR :: g_vec_d(:,:)
  integer ,allocatable DEV_ATTR :: g_rot_d(:,:)
+ real(SP),allocatable DEV_ATTR :: g_phs_d(:,:)
  integer ,allocatable DEV_ATTR :: G_m_G_d(:,:)
  integer ,allocatable DEV_ATTR :: minus_G_d(:)
  !
@@ -139,6 +154,7 @@ module R_lattice
  integer :: RIM_ng
  integer :: RIM_id_epsm1_reference
  integer :: RIM_n_rand_pts
+ integer :: RIM_sphe_n_rand_pts
  logical :: RIM_is_diagonal
  real(SP):: RIM_RL_vol
  real(SP):: RIM_epsm1(3)
@@ -160,7 +176,6 @@ module R_lattice
  ! Coulomb (including Cutoff)
  !
  real(SP)                :: cyl_ph_radius
- real(SP)                :: cyl_length
  real(SP)                :: box_length(3)
  real(SP)                :: cyl_cut
  real(SP)                :: ws_cutoff 
@@ -177,6 +192,10 @@ module R_lattice
  logical                 :: cut_is_slab
  integer                 :: idir(3)
  !
+ ! Environment effectice screening
+ !
+ real(SP)                :: eps_env
+ !
  ! BZ map by direction
  !
  type(bz_map) :: k_map
@@ -385,7 +404,39 @@ subroutine bz_samp_duplicate(BZi, BZo)
      endif
      !
    end subroutine bz_samp_duplicate
+   !
+   subroutine bz_samp_duplicate_nosymm(BZi, BZo)
+     type(bz_samp), intent(in)    :: BZi
+     type(bz_samp), intent(inout) :: BZo
+     integer :: ik
      !
+     BZo%nibz  = BZi%nbz
+     BZo%nbz   = BZi%nbz
+     BZo%units = BZi%units
+     BZo%description = BZi%description
+     !
+     YAMBO_ALLOC(BZo%pt,(BZo%nibz,3))
+     BZo%pt(:,:) = BZi%ptbz(:,:)
+     !
+     YAMBO_ALLOC(BZo%nstar,(BZo%nibz))
+     BZo%nstar=1
+     !
+     YAMBO_ALLOC(BZo%weights,(BZo%nibz))
+     BZo%weights=1._SP
+     !
+     YAMBO_ALLOC(BZo%star,(BZo%nibz,1))
+     BZo%star=1
+     !
+     YAMBO_ALLOC(BZo%sstar,(BZo%nbz,2))
+     do ik=1,BZo%nbz
+       BZo%sstar(ik,1)=ik
+       BZo%sstar(ik,2)=1
+     enddo
+     !
+     YAMBO_ALLOC(BZo%ptbz,(BZo%nbz,3))
+     BZo%ptbz=BZi%ptbz
+   end subroutine bz_samp_duplicate_nosymm
+   !
    subroutine bz_samp_duplicate_Fg(FGi,FGo,N_coarse)
      type(bz_fine_grid), intent(in)  ::FGi
      type(bz_fine_grid), intent(out) ::FGo
@@ -432,7 +483,6 @@ subroutine cutoff_presets()
      Box_length=0.
      cyl_ph_radius=0.
      box_length=0.
-     cyl_length=0.
      ws_cutoff=0.7
      cut_geometry='none'
      cut_description='none'
diff --git a/src/modules/mod_SC.F b/src/modules/mod_SC.F
index d9de937736..2cf8f146d2 100644
--- a/src/modules/mod_SC.F
+++ b/src/modules/mod_SC.F
@@ -5,12 +5,17 @@
 !
 ! Authors (see AUTHORS file for details): AM MG
 !
+! headers
+!
+#include<y_memory.h>
+!
 module SC
  !
  use descriptors,     ONLY:IO_desc
- use pars,            ONLY:SP,schlen,lchlen
+ use pars,            ONLY:SP,LP,schlen,lchlen
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer           :: SC_bands(2)
  integer           :: SC_nbands
@@ -28,6 +33,9 @@ module SC
  real(SP)          :: SC_rho_threshold
  real(SP)          :: SC_cycle_mixing
  !
+ character(schlen)       :: SC_bands_frozen_ch
+ integer(LP), allocatable:: SC_bands_frozen(:)
+ !
  character(schlen) :: SC_preconditioning="simple"
  character(schlen) :: SC_potential
  logical           :: compatible_SC_DB
diff --git a/src/modules/mod_SLK.F b/src/modules/mod_SLK.F
index 0751309195..f10067180c 100644
--- a/src/modules/mod_SLK.F
+++ b/src/modules/mod_SLK.F
@@ -5,11 +5,17 @@
 !
 ! Authors (see AUTHORS file for details): AM AF
 !
+! headers
+!
+#include<y_memory.h>
+!
 module SLK_m
  !
  use pars,       ONLY: SP
  use parallel_m, ONLY: yMPI_comm
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  integer              :: n_pools = 1 ! number of pools
  !
diff --git a/src/modules/mod_X.F b/src/modules/mod_X.F
index 2fd85b9bc8..07e227bad7 100644
--- a/src/modules/mod_X.F
+++ b/src/modules/mod_X.F
@@ -5,6 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include <dev_defs.h>
+#include <y_memory.h>
+!
 module X_m
  !
  use descriptors,    ONLY:IO_desc
@@ -13,9 +18,9 @@ module X_m
  use matrix,         ONLY:PAR_matrix
  use OUTPUT,         ONLY:N_MAX_columns
  use gpu_m,          ONLY:have_gpu
+ use y_memory_alloc
  !
-#include <dev_defs.h>
-#include <y_memory.h>
+ implicit none
  !
  ! Observables
  !
@@ -75,6 +80,8 @@ module X_m
  complex(SP), allocatable :: X_fxc(:)
  complex(SP), allocatable :: X_magnons(:,:,:)
  complex(SP), allocatable :: X_dichroism(:,:)
+ complex(SP), allocatable :: X_mespin(:,:)
+ complex(SP), allocatable :: X_meorb(:,:)
  !
  ! Response function
  !
@@ -157,7 +164,7 @@ module X_m
    integer               :: N_messages
    character(schlen)     :: messages(N_MAX_columns)
  end type
- integer, parameter     :: N_X_obs=16
+ integer, parameter     :: N_X_obs=18
  type(X_obs_t)          :: X_obs(N_X_obs) 
  !
  interface
diff --git a/src/modules/mod_collision_el.F b/src/modules/mod_collision_el.F
index 0c36964db9..a0c0864fff 100644
--- a/src/modules/mod_collision_el.F
+++ b/src/modules/mod_collision_el.F
@@ -5,6 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<dev_defs.h>
+#include<y_memory.h>
+!
 module collision_el
  !
  !=====================
@@ -15,9 +20,9 @@ module collision_el
  use parallel_m,  ONLY:PP_indexes
  use qe_pseudo_m, ONLY:bec_type,deallocate_bec_type
  use gpu_m,       ONLY:have_gpu
+ use y_memory_alloc
  !
-#include<dev_defs.h>
-#include<y_memory.h>
+ implicit none
  !
  type elemental_collision
    !
@@ -160,6 +165,7 @@ subroutine elemental_collision_alloc(ggw,NG,GAMP_NG,GAMP_ROWS,GAMP_COLS,&
          !
        endif
        ggw%ngrho=NG
+       ggw%iqref=0
      endif
      !
      ! GAMP
diff --git a/src/modules/mod_collision_ext.F b/src/modules/mod_collision_ext.F
index 4acae16c12..b26e02ff8c 100644
--- a/src/modules/mod_collision_ext.F
+++ b/src/modules/mod_collision_ext.F
@@ -18,9 +18,11 @@ module collision_ext
  !
  integer :: COLL_bands(2)
  integer :: COLL_ID
- integer :: LAST_COLL_sync = 0
  integer :: PAR_COLL_min   = 0
  !
+ character(schlen)       :: COLL_bands_frozen_ch
+ integer(LP), allocatable:: COLL_bands_frozen(:)
+ !
  ! Cut off on exchange matrix elements
  ! Sx_cutoff=0 all, (Sx_cutoff>1 or Sx_cutoff<0)=none
  !
@@ -33,6 +35,7 @@ module collision_ext
  logical :: COLLISIONS_HXC_MB=.FALSE.
  logical :: COLLISIONS_HXC_use_TDDFT=.FALSE.
  logical :: COLLISIONS_CV_only=.FALSE.
+ logical :: COLLISIONS_from_BSE=.FALSE.
  logical :: COLLISIONS_load_SP=.FALSE.  ! Force loading of collision in single precision
  logical :: COLLISIONS_compr  =.FALSE. ! If parallel-io is used collisions in memory are compressed
  !
@@ -51,16 +54,16 @@ module collision_ext
  type COLLISIONS_element
    integer               :: I=0         ! State Index 
    integer               :: N=0         ! Linear Dimension
-   character,      allocatable  :: table(:,:,:)
+   character,      allocatable  :: table(:,:,:,:)
    complex(SP),  allocatable  :: v_c(:)      
    complex(SP6), allocatable  :: v_c_SP(:)    ! SEX and HF only in single precision
-   complex(SP), allocatable  :: v3_c(:,:,:)
+   complex(SP), allocatable  :: v3_c(:,:,:,:)
    real(SP)   , allocatable  :: v_r(:)       ! GW NEQ collisions
-   real(SP)   , allocatable  :: v3_r(:,:,:)
+   real(SP)   , allocatable  :: v3_r(:,:,:,:)
  end type
  !
  type COLLISIONS_group
-   integer               :: D(3,2)=0    ! Dimension (v3 of corresponding COLLISIONS_element)
+   integer               :: D(3,4)=0    ! Dimension (v3 of corresponding COLLISIONS_element)
    character(schlen)     :: name="none"
    integer               :: kind=0
    integer               :: N=0         ! # of collisions (dimension of linked COLLISIONS_element)
@@ -96,8 +99,8 @@ module collision_ext
  ! ====================================
  integer :: ng_oscll                  ! number of G-vectors in the Oscillators
  complex(SP), allocatable :: W(:,:,:) ! screened interaction
- complex(SP), allocatable :: OSCLL(:,:,:,:,:) ! Oscillators
- complex(SP), allocatable :: OSCLL_k(:,:,:,:) ! Oscillators in k, just for the evaluation
+ complex(SP), allocatable :: OSCLL(:,:,:,:,:,:) ! Oscillators
+ complex(SP), allocatable :: OSCLL_k(:,:,:,:)   ! Oscillators in (i_sp_pol,ik), just for the evaluation
  !
  contains
    !
diff --git a/src/modules/mod_drivers.F b/src/modules/mod_drivers.F
index 1939be3916..bff467c8ef 100644
--- a/src/modules/mod_drivers.F
+++ b/src/modules/mod_drivers.F
@@ -51,6 +51,14 @@ module drivers
  !
  logical :: l_elph_Hamiltonian   =.FALSE.
  !
+ ! Exciton-phonon specific flags
+ !
+ logical :: l_EXCPH_Sigma        =.FALSE.
+ logical :: l_EXCPH_gkkp         =.FALSE.
+ logical :: l_EXCPH_optics       =.FALSE.
+ logical :: l_EXCPH_lifetime     =.FALSE.
+ logical :: l_EXCPH_offdiago     =.FALSE.
+ !
  ! Real Axis non SC Self-energies
  !
  logical :: l_HF_and_locXC       =.FALSE.
diff --git a/src/modules/mod_electric.F b/src/modules/mod_electric.F
index 8f53a446cb..e7760acf31 100644
--- a/src/modules/mod_electric.F
+++ b/src/modules/mod_electric.F
@@ -5,12 +5,17 @@
 !
 ! Authors (see AUTHORS file for details): MG CA
 !
+! headers
+!
+#include<y_memory.h>
+!
 module electric
  !
  use pars,       ONLY:SP,schlen,lchlen
  use electrons,  ONLY:levels
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  complex(SP)            :: NL_initial_P(3) ! in principle it can be different from zero
  !
@@ -28,9 +33,7 @@ module electric
  !
  ! Polarization
  !
- logical :: l_P_periodic ! Enforce periodicity in the polarization
- !
- logical  :: l_res_symms=.FALSE.  ! Do not use resisual symmetries (default False)
+ logical  :: l_no_res_symms=.FALSE.  ! Do not use resisual symmetries (default False)
  !
  contains
    !
diff --git a/src/modules/mod_electrons.F b/src/modules/mod_electrons.F
index 54a57dfdc7..6d4d472d81 100644
--- a/src/modules/mod_electrons.F
+++ b/src/modules/mod_electrons.F
@@ -5,19 +5,26 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 module electrons
  !
  use pars,  ONLY:SP,rZERO
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  real(SP) :: nel
+ real(SP) :: nel_cond ! Number of electrons promoted in conduction (quasi-fermi distribution)
  real(SP) :: default_nel
  real(SP) :: spin_occ
  real(SP) :: filled_tresh
  integer  :: n_bands
  integer  :: n_met_bands(2)
  integer  :: n_full_bands(2)
+ real(SP) :: deg_threshold ! Threshold for degenerate levels
  !
  ! Spin support
  !
@@ -36,6 +43,11 @@ module electrons
  integer  :: n_spin_den
  logical  :: l_spin_orbit
  !
+ integer  :: i_spin_majority
+ integer  :: i_spin_minority
+ !
+ integer, parameter  :: n_max_deg=4
+ !
  ! Average magnetization density
  !
  logical                  :: eval_magn
@@ -260,6 +272,23 @@ subroutine FineGd_E_components_reset(E,FULL_RESET)
      !
    end subroutine
    !
+   subroutine get_spin_majority(E)
+     !
+     type(levels),intent(in) ::E
+     integer :: nb_ref
+     !
+     nb_ref=maxval(E%nbm)
+     !
+     i_spin_majority=1
+     i_spin_minority=n_spin
+     if(n_sp_pol==2) then
+       if(E%E(nb_ref,1,1)<=E%E(nb_ref,1,2)) i_spin_majority=1
+       if(E%E(nb_ref,1,1)> E%E(nb_ref,1,2)) i_spin_majority=2
+       i_spin_minority=mod(i_spin_majority,n_sp_pol)+1
+     endif
+     !
+   end subroutine
+   !
    integer function spin(v)
      integer :: v(:)
      !
diff --git a/src/modules/mod_fields.F b/src/modules/mod_fields.F
index c55d4837e0..11f4f99ab3 100644
--- a/src/modules/mod_fields.F
+++ b/src/modules/mod_fields.F
@@ -3,7 +3,7 @@
 !
 ! Copyright (C) 2006 The Yambo Team
 !
-! Authors (see AUTHORS file for details): AM, CA
+! Authors (see AUTHORS file for details): AM CA DS
 !
 ! External fields:
 !
@@ -22,8 +22,8 @@
 !      
 module fields
  !
- use pars,  ONLY:SP,DP,schlen,lchlen,pi,n_ext_fields_max
- use stderr,ONLY:STRING_split
+ use pars,  ONLY:SP,DP,schlen,lchlen,pi, &
+&                n_ext_fields_max,n_fields_defs_max
  use units, ONLY:FS2AUT
  !
  implicit none
@@ -38,15 +38,14 @@ module fields
    integer  :: t_initial_indx    ! time index of extern field switch on  (input variable)
    real(SP) :: t_final
    integer  :: t_final_indx      ! time index of extern field switch off (computed in RT_Fluence)
-   integer  :: n_frequencies
-   real(SP) :: frequency(2)
+   real(SP) :: frequency
    real(SP) :: width
+   real(SP) :: chirp
    real(SP) :: FWHM
    real(SP) :: fluence
    real(SP) :: pi_kind
    real(SP) :: intensity
    real(SP) :: amplitude
-   real(SP) :: W_step
    real(SP) :: versor(3)
    real(SP) :: versor_circ(3)
  end type ext_field
@@ -54,6 +53,11 @@ module fields
  integer               :: n_ext_fields    =0
  type(ext_field), save :: Efield(n_ext_fields_max)
  !
+ real(SP), allocatable :: field_from_file(:,:,:)
+ character(schlen)     :: field_from_file_fname(n_ext_fields_max)
+ real(SP)              :: field_from_file_dt
+ integer               :: field_from_file_steps
+ !
  ! Vector potential
  ! 
  type gauge_field
@@ -72,17 +76,23 @@ module fields
  contains
    !
    logical function field_is_ok(E_field)
-     use stderr,        ONLY:STRING_same
-     type(ext_field)   ::E_field
-     field_is_ok= any((/ STRING_same(E_field%ef_name,'SIN') ,&
-&                        STRING_same(E_field%ef_name,'SOFTSIN') ,& 
-&                        STRING_same(E_field%ef_name,'DELTA') ,& 
-&                        STRING_same(E_field%ef_name,'GAUSS') ,& 
-&                        STRING_same(E_field%ef_name,'THETA') ,& 
-&                        STRING_same(E_field%ef_name,'PULSE') ,& 
-&                        STRING_same(E_field%ef_name,'QSSIN') ,&
-&                        STRING_same(E_field%ef_name,'SPULSE'),&
-&                        STRING_same(E_field%ef_name,'QSFIELD') /)) .and. &
+     use stderr,        ONLY:STRING_same,STRING_split
+     type(ext_field), intent(in) :: E_field
+     character(schlen)   :: ef_name(n_fields_defs_max)
+     call STRING_split(E_field%ef_name,ef_name)
+     field_is_ok= any((/ STRING_same(ef_name(1),'SIN') ,&
+&                        STRING_same(ef_name(1),'SOFTSIN') ,&
+&                        STRING_same(ef_name(1),'DELTA') ,&
+&                        STRING_same(ef_name(1),'RECT')  ,&
+&                        STRING_same(ef_name(1),'RECTSIN'),&
+&                        STRING_same(ef_name(1),'GAUSS') ,&
+&                        STRING_same(ef_name(1),'THETA') ,&
+&                        STRING_same(ef_name(1),'PULSE') ,&
+&                        STRING_same(ef_name(1),'QSSIN') ,&
+&                        STRING_same(ef_name(1),'SPULSE'),&
+&                        STRING_same(ef_name(1),'PHHG')  ,&
+&                        STRING_same(ef_name(1),'QSFIELD'),&
+&                        STRING_same(ef_name(1),'FROM_FILE') /)) .and. &
 &                 any((/ STRING_same(E_field%ef_pol,'linear') ,&
 &                        STRING_same(E_field%ef_pol,'circular') /))
    end function
@@ -95,273 +105,16 @@ real(SP) function EtoT(E,T)
      if (present(E)) EtoT=2._SP*pi*Time_of(E,"fs")*FS2AUT
    end function
    !
-   real(SP) function field_frequency(E_field,i_f)
-     integer           ::i_f
-     type(ext_field)   ::E_field
-     field_frequency=E_field%frequency(1)
-     if (i_f>1.and.E_field%n_frequencies>1) field_frequency=E_field%frequency(1)+&
-&       (E_field%frequency(2)-E_field%frequency(1))/(E_field%n_frequencies-1)*(i_f-1)
-   end function
-   !
-   function small_a(T,E_field,order,envelop_only)
-     !
-     ! The vector potential is generally written as
-     !
-     !  order=0  A (t)=-cEo  a (t) theta(t)
-     !  order=1  A'(t)=-cEo (a'(t) theta(t)+a (t) delta(t))
-     !  order=2  A"(t)=-cEo (a"(t) theta(t)+a'(t) delta(t)-a(t) sign(t))
-     !
-     ! the functions theta,delta and sign can be the standard distributions
-     ! or more fancy functions that can mimic the distributions.
-     !
-     ! Note that A is evolved using A''(t) starting from A(0) and A'(0). 
-     !
-     use X_m,       ONLY:global_gauge
-     use pars,      ONLY:cI,cONE,cZERO,pi,schlen
-     use wrapper,   ONLY:FADEVA
-     !
-     type(ext_field), intent(inout)   ::E_field
-     !
-     real(SP), intent(in)  ::T
-     integer,  intent(in)  ::order
-     logical,  intent(in), optional :: envelop_only
-     !
-     ! Workspace
-     !
-     real(SP)          ::damp_func,a,b,c,sarg,WT,Tl
-     complex(SP), dimension(2) :: small_a
-     !
-     integer           ::i_fr,i_field,n_fields
-     real(SP)          ::T_0,W_0,sigma,WtimesT,EXPf,fr_shift(2),W_field,W_field_m1,cos_wt,sin_wt
-     complex(SP)       ::f_now,f_t(2),f0t,f1t,cZ,cS,exp_iwt,erfi_z
-     character(schlen) ::field_defs(3)
-     logical           ::envelop_only_
-     !
-     ! Zeroing
-     !
-     small_a=cZERO
-     f_t=cZERO
-     damp_func=1._SP
-     !
-     envelop_only_=.false.
-     if(present(envelop_only)) envelop_only_=envelop_only
-     !
-     if(trim(E_field%ef_pol)=="linear"  ) n_fields=1
-     if(trim(E_field%ef_pol)=="circular") n_fields=2
-     !
-     ! Field parameters
-     !
-     sigma=E_field%width
-     fr_shift(1)=0._SP
-     fr_shift(2)=pi/2._SP
-     !
-     field_defs=""
-     call STRING_split(trim(E_field%ef_name),field_defs)
-     !
-     select case( trim(field_defs(1)) )
-       case('STATIC','SIN','ANTIRES','RES','DELTA')
-         ! Fields which do not need T_0
-         W_0=0._SP
-         T_0=0._SP
-         damp_func=1._SP
-         ! DEBUG <
-         ! damping for sin 
-         !if (sigma>0._SP) then
-         !  T_0=5*sigma
-         !  damp_func=1._SP
-         !  if (T<T_0)             damp_func=exp( (T-T_0)/sigma)
-         !  if (T>NE_tot_time-T_0) damp_func=exp(-(T-NE_tot_time+T_0)/sigma)
-         !endif
-         ! DEBUG >
-       case('SOFTSIN','THETA')
-         ! Fields which do not need T_0 and with damp_func
-         W_0=0._SP
-         T_0=0._SP
-         a =  3._SP/sigma**4 
-         b = -8._SP/sigma**3
-         c =  6._SP/sigma**2
-         damp_func=1._SP
-         if(real(T)<sigma.and.sigma>0._SP) damp_func=(a*T**4 + b*T**3 + c*T**2) 
-       case('GAUSS','QSSIN','QSFIELD','PULSE','SPULSE')
-         ! Fields which need T_0
-         W_0=field_frequency(E_field,1)
-         T_0=pi/W_0*(real(nint(W_0/pi*3._SP*sigma),SP))
-         if(trim(field_defs(2))=="2SIGMA" .or. trim(field_defs(3))=="2SIGMA") T_0=2._SP*sigma
-         if(trim(field_defs(2))=="3SIGMA" .or. trim(field_defs(3))=="3SIGMA") T_0=3._SP*sigma
-         if(trim(field_defs(2))=="4SIGMA" .or. trim(field_defs(3))=="4SIGMA") T_0=4._SP*sigma
-         if(trim(field_defs(2))=="5SIGMA" .or. trim(field_defs(3))=="5SIGMA") T_0=5._SP*sigma
-     end select
-     !
-     E_field%To=T_0
-     !
-     do i_field=1,n_fields
-       do i_fr=1,max(1,E_field%n_frequencies)
-         !
-         W_field=field_frequency(E_field,i_fr)
-         W_field_m1=1._SP/W_field
-         WtimesT=W_field*(T-T_0)+fr_shift(i_field)
-         !
-         if (envelop_only_) then
-           f0t=cONE ; f1t=cONE
-         else
-           cos_wt=cos(WtimesT) ; sin_wt=sin(WtimesT); exp_iwt=cos_wt+cI*sin_wt
-           f0t=cmplx(cos_wt,0._SP) ; f1t=cmplx(sin_wt,0._SP)
-           if(trim(field_defs(2))=="ANTIRES")     then ; f0t= 0.5_SP*      exp_iwt  ; f1t=-cI*0.5_SP*      exp_iwt   ; endif
-           if(trim(field_defs(2))=="RES")         then ; f0t= 0.5_SP*conjg(exp_iwt) ; f1t= cI*0.5_SP*conjg(exp_iwt)  ; endif
-         endif
-         !
-         EXPf=exp(-(T-T_0)**2/(2._SP*sigma**2) )
-         !
-         select case( trim(field_defs(1)) )
-         case('STATIC')
-           if (order==0 ) f_now=T
-           if (order==1 ) f_now=1._SP
-           if (order==2 ) f_now=0._SP
-           !
-         case('SIN')
-           if (order==0 ) f_now=-damp_func*(f0t-1._SP)*W_field_m1
-           if (order==1 ) f_now=+damp_func* f1t
-           if (order==2 ) f_now=+damp_func* f0t       *W_field
-           !
-         case('SOFTSIN')
-           if (order==-1) f_now=-2
-           if (order== 0) f_now=-damp_func*(f0t-1._SP)*W_field_m1
-           if (order== 1) f_now=+damp_func* f1t
-           if (order== 2) f_now=+damp_func* f0t       *W_field
-           !
-         case('THETA')
-           if (order==0 ) f_now=damp_func*T
-           if (order==1 ) f_now=damp_func
-           if (order==2 ) f_now=0._SP
-           !
-         case('DELTA')
-           if (order==0 ) f_now=1._SP
-           if (order> 0 ) f_now=0._SP
-           if (order==-1) f_now=1._SP
-           !
-         case('PHHG')
-           sarg=pi*(T-T_0)/sigma     
-           WT  =W_field*T
-           if(T-T_0<=0.0.or.T-T_0>=sigma.and.order>0) then
-             f_now=0.0
-           elseif(T-T_0>=sigma.and.order==0) then
-             Tl=sigma+T_0
-             WT  =W_field*Tl
-             f_now=-(sigma*sin(((sigma*W_field+2*pi)*Tl-2*pi*T_0)/sigma))/(4*(sigma*W_field+2*pi))  & 
-&              -(sigma*sin(((sigma*W_field-2*pi)*Tl+2*pi*T_0)/sigma))/(4*(sigma*W_field-2*pi))+sin(WT)/(2*W_field)
-           else
-             if (order==0 ) f_now=-(sigma*sin(((sigma*W_field+2*pi)*T-2*pi*T_0)/sigma))/(4*(sigma*W_field+2*pi)) &
-&              -(sigma*sin(((sigma*W_field-2*pi)*T+2*pi*T_0)/sigma))/(4*(sigma*W_field-2*pi))+sin(WT)/(2*W_field)
-             if (order==1 ) f_now=sin(sarg)**2*cos(WT)
-             if (order==2 ) f_now=(2*pi*cos(WT)*cos(sarg)*sin(sarg))/sigma-W_field*sin(WT)*sin(sarg)**2
-           endif
-           !
-         case('GAUSS')
-           if (order==0 ) f_now= sigma*sqrt(pi/2._SP)* ( erf( (T-T_0)/(sigma*sqrt(2._SP)) )+1._SP )
-           if (order==1 ) f_now= Expf
-           if (order==2 ) f_now=-Expf*(T-T_0)/sigma**2
-           !
-         case('QSSIN')
-           !
-           ! W    =FADEVA function
-           ! W(-z)=exp(-z^2)*(1-  erf (i*z))
-           !      =exp(-z^2)*(1-i*erfi( z ))
-           !
-           ! ERFI(z)=i*(exp(z^2)*W(-z)-1)
-           !
-           cZ=(sigma**2*W_field-cI*(T-T_0))/(sigma*sqrt(2._SP))
-           cS=cmplx(W_field**2*sigma**2/2._SP,fr_shift(i_field),SP)
-           !           
-           if (order==0 .and. trim(global_gauge)=="velocity" ) erfi_z=-sqrt(pi/2._SP)*sigma/2._SP*         &
-&                                      cI*( (exp(      cZ **2-      cS )*FADEVA(-      cZ )-exp(-      cS )) + &
-&                                           (exp(conjg(cZ)**2-conjg(cS))*FADEVA(-conjg(cZ))-exp(-conjg(cS))) )
-           if (order==0 .and. trim(global_gauge)=="length" )   erfi_z=cZERO
-           !
-           if (order==0 ) f_now= erfi_z
-           if (order==1 ) f_now=                     f1t          *EXPf
-           if (order==2 ) f_now=(W_field*f0t-(T-T_0)*f1t/sigma**2)*EXPf
-           !
-         case('QSFIELD')
-           !
-           if (order==0 ) f_now=                      f1t          *EXPf
-           if (order==1 ) f_now=( W_field*f0t-(T-T_0)*f1t/sigma**2)*EXPf
-           if (order==2 ) f_now=(-W_field*f1t-f1t/sigma**2                           &
-                          &             -W_field*(T-T_0)*f1t/sigma**2                    &
-                          &             -(T-T_0)*(W_field*f0t-(T-T_0)*f1t/sigma**2)/sigma**2)*EXPf
-	   f_now=f_now/W_field
-           !
-         case('PULSE')
-           if(abs((T-T_0))< sigma) then
-             if (order==0 ) f_now= 0._SP
-             if (order==1 ) f_now=       (T-T_0 + sigma)**2 *(T-T_0 -sigma)**2/sigma**4 *f0t
-             if (order==2 ) f_now=(4._SP*(T-T_0 + sigma)    *(T-T_0 -sigma)**2/sigma**4)*f0t &
-&                                       -(1._SP*(T-T_0 + sigma)**2 *(T-T_0 -sigma)**2/sigma**4)*W_field*f1t
-           endif
-           !
-         case('SPULSE')
-           T_0=sigma
-           W_0=W_field
-           f_now=cZERO
-           if(abs((T-T_0))< sigma) then
-             if (order==0 ) f_now=(4.0*W_0*(T-T_0)*f0t*(-sigma**2*W_0**2+W_0**2*(T-T_0)**2-6.0)   &
-&              +f1t*(sigma**4*W_0**4-2.0*sigma**2*W_0**2*(W_0**2*(T-T_0)**2-2.0)+W_0**4*(T-T_0)**4-12.0* &
-&               W_0**2*(T -T_0)**2+24))/W_0**5/sigma**4+                                                          &
-&               ((4.0*W_0*sigma*f0t*(-sigma**2*W_0**2+W_0**2*sigma**2-6.0)                             &
-&              +f1t*(sigma**4*W_0**4-2.0*sigma**2*W_0**2*(W_0**2*sigma**2-2.0)+W_0**4*sigma**4-12.0*   &
-&               W_0**2*sigma**2+24))/W_0**5/sigma**4)                                                             
-             if (order==1 ) f_now=((T-T_0)**2 - sigma**2)**2/sigma**4*f0t
-             if (order==2 ) f_now=(2._SP*(T-T_0 + sigma)    *(T-T_0 -sigma)**2/sigma**4                 &
-&                                        +2._SP*(T-T_0 + sigma)**2 *(T-T_0 -sigma)   /sigma**4 )    *f0t &
-&                                        -      (T-T_0 + sigma)**2 *(T-T_0 -sigma)**2/sigma**4  *W_0*f1t
-           endif
-         end select
-         !
-         f_t(i_field)=f_t(i_field)+f_now
-         !
-       enddo
+   integer function get_field_file_index(filename)
+     character(*)      ::filename
+     integer           ::i1
+     get_field_file_index=-1
+     do i1=1,n_ext_fields_max
+       if ( trim(filename)/=trim(field_from_file_fname(i1)) ) cycle
+       get_field_file_index=i1
+       return
      enddo
-     !
-     small_a=f_t
-     !
-   end function small_a
-   !
-   complex(SP) function small_a_frequency(W_i,E_field,iErr)
-     !
-     use pars,      ONLY:cZERO,cI,pi,schlen
-     type(ext_field)   ::E_field
-     complex(SP)       ::W_i
-     integer           ::iErr
-     !
-     integer           ::i_f
-     real(SP)          ::W_0
-     complex(SP)       ::local_a(2)
-     character(schlen) ::field_defs(3)
-     !
-     iErr=-1
-     local_a=cZERO
-     !
-     field_defs=""
-     call STRING_split(trim(E_field%ef_name),field_defs)
-     !
-     do i_f=1,E_field%n_frequencies     
-       W_0=field_frequency(E_field,i_f)
-       select case( trim(field_defs(1)) )
-       case('SIN')
-         iErr=0
-         local_a(1)=local_a(1)+(1._SP/(W_i-W_0)                 -1._SP/W_0)/2._SP  ! RES
-         local_a(2)=local_a(2)+(               -1._SP/(W_i+W_0) -1._SP/W_0)/2._SP  ! ARES
-       case('DELTA')
-         iErr=0
-         local_a=1._SP/2._SP
-       end select
-     enddo
-     !
-     if(trim(field_defs(2))==    'RES') local_a(2)=0._SP
-     if(trim(field_defs(2))=='ANTIRES') local_a(1)=0._SP
-     !
-     small_a_frequency=local_a(1)+local_a(2)
-     !
-   end function small_a_frequency
+   end function
    !
    subroutine Efield_duplicate(E1,E2)
      type(ext_field) :: E1,E2
@@ -370,9 +123,8 @@ subroutine Efield_duplicate(E1,E2)
      E2%t_final=E1%t_final  
      E2%t_final_indx=E1%t_final_indx  
      E2%frequency=E1%frequency  
-     E2%n_frequencies=E1%n_frequencies  
-     E2%W_step=E1%W_step  
      E2%width=E1%width  
+     E2%chirp=E1%chirp
      E2%FWHM=E1%FWHM
      E2%amplitude=E1%amplitude  
      E2%intensity=E1%intensity  
@@ -391,13 +143,12 @@ subroutine Efield_reset(E)
      E%t_final      =0._SP
      E%t_final_indx =0
      E%frequency    =0._SP
-     E%n_frequencies=1
-     E%W_step       =0._SP
      E%amplitude    =0._SP
      E%intensity    =0._SP
      E%fluence      =0._SP
      E%pi_kind      =0._SP
      E%width        =0._SP
+     E%chirp        =0._SP
      E%ef_name      ='none'
      E%ef_pol       ='none'
      E%versor       =(/1._SP,0._SP,0._SP/)
@@ -430,13 +181,14 @@ end subroutine compute_envelop
    !
    complex(SP) function Divide_by_Field(Field,order)
      !
-     use pars, ONLY: cI,cONE,pi,schlen
+     use pars, ONLY: SP,cI,cONE,pi,schlen
+     use stderr,ONLY:STRING_split
      type(ext_field):: Field
      integer        :: order
      ! work space
      complex(SP) :: E_w
      real(SP)    :: T_0, sigma, W_0, T
-     character(schlen) ::field_defs(3)
+     character(schlen) ::field_defs(n_fields_defs_max)
      !
      call STRING_split(trim(Field%ef_name),field_defs)
      !
@@ -446,8 +198,8 @@ complex(SP) function Divide_by_Field(Field,order)
          if (order==0) Divide_by_Field =  4._SP*cONE/Field%amplitude**2
        case('QSSIN') !Approximate relations/does not work yet
          sigma=Field%width
-         T_0=10._SP*sigma
-         W_0=Field%frequency(1)
+         W_0=Field%frequency
+         T_0=pi/W_0*(real(nint(W_0/pi*3._SP*sigma),SP))
          T = 2._SP*pi/W_0
          E_w= sqrt(pi/2._SP)*Field%amplitude*sigma*exp(-cI*W_0*T_0)*&
               &(erf((T-T_0)/sqrt(2._SP)/sigma)+erf(T_0/sqrt(2._SP)/sigma))
@@ -532,4 +284,41 @@ real(SP) function Efield_strength(Intensity,unit_system)
      !
    end function
    !
-end module
+end module fields
+!
+!
+module fields_int
+ !
+ interface
+   !
+   function small_a(T,dt,E_field,order,envelop_only)
+     !
+     use pars,     ONLY:SP
+     use fields,   ONLY:ext_field
+     !
+     type(ext_field), intent(inout)   ::E_field
+     !
+     real(SP), intent(in)  ::T,dt
+     integer,  intent(in)  ::order
+     logical,  intent(in), optional :: envelop_only
+     !
+     complex(SP), dimension(2) :: small_a
+     !
+   end function small_a
+   !
+   function small_a_frequency(W_i,E_field,iErr)
+     !
+     use pars,     ONLY:SP
+     use fields,   ONLY:ext_field
+     !
+     type(ext_field)   ::E_field
+     complex(SP)       ::W_i
+     integer           ::iErr
+     !
+     complex(SP) :: small_a_frequency
+     !
+   end function small_a_frequency
+   !
+ end interface
+ !
+end module fields_int
diff --git a/src/modules/mod_frequency.F b/src/modules/mod_frequency.F
index ffae67f692..1bdc0aee4c 100644
--- a/src/modules/mod_frequency.F
+++ b/src/modules/mod_frequency.F
@@ -5,12 +5,17 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 module frequency
  !
  use pars,  ONLY:SP
  use units, ONLY:HA2EV
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  ! Bare 2 Coarse grid
  !
diff --git a/src/modules/mod_functions.F b/src/modules/mod_functions.F
index bced9cf1bb..f1f080d3d8 100644
--- a/src/modules/mod_functions.F
+++ b/src/modules/mod_functions.F
@@ -106,13 +106,21 @@ pure function Lorentzian_func(x, damp)
      ! From Mathworld
      ! https://mathworld.wolfram.com/LorentzianFunction.html
      !
-     use pars, ONLY:pi
+     use pars, ONLY:DP,pi
      real(SP), intent(in) :: x,damp
      real(SP) :: Lorentzian_func
      !
-     Lorentzian_func=1.0/pi*(0.5*damp)/(x**2+(0.5*damp)**2)
+     ! Double-precision temporaries
+     real(DP) :: xd, dd, val
      !
-   end
+     xd = real(x, DP)
+     dd = real(damp, DP)
+     !
+     val = (0.5_DP * dd) / (xd*xd + (0.5_DP*dd)**2) / real(pi, DP)
+     !
+     Lorentzian_func = real(val, SP)
+     !  
+   end function Lorentzian_func
    !
    ! Fermi functions
    !-----------------
@@ -196,13 +204,23 @@ pure function bose_decay(E)
    ! Boltzman function
    !----------------
    !
-   pure function boltzman_f(Eb)
+   function boltzman_f(Eb)
      !
-     use D_lattice, ONLY:Bose_Temp
+     use D_lattice, ONLY:Boltz_Temp
+     use units,     ONLY:HA2EV,HA2KEL
      real(SP), intent(in):: Eb
      real(SP)            :: boltzman_f
      !
-     boltzman_f=exp(-Eb/Bose_Temp)
+     real(SP) :: E_small=0.005/HA2EV  ! Threashold for degenerate excitons
+     real(SP) :: T_small=1._SP/HA2KEL
+     !
+     if(abs(Eb)>E_small.and.abs(Boltz_Temp)>T_small) then
+       boltzman_f=exp(-Eb/Boltz_Temp)  ! Normal case
+     elseif(abs(Eb)>E_small.and.abs(Boltz_Temp)<=T_small) then
+       boltzman_f=0._SP  ! Case  E>0 but T=0
+     elseif(abs(Eb)<E_small) then
+       boltzman_f=1._SP  ! Limit E=0 
+     endif
      !
    end function
 
diff --git a/src/modules/mod_global_XC.F b/src/modules/mod_global_XC.F
index f6f65f7241..78520da335 100644
--- a/src/modules/mod_global_XC.F
+++ b/src/modules/mod_global_XC.F
@@ -186,7 +186,7 @@ subroutine  correct_global_XC(what,STRING)
     !
     if (what=='E'.or.what=='G')         G_E_xc_string=      trim(G_E_xc_string)//"+"//STRING
     if (what=='X') then
-      forall(i_X=1:size(X_E_xc_string)) X_E_xc_string(i_X)= trim(X_E_xc_string(i_X))//"+"//STRING
+      forall (i_X=1:size(X_E_xc_string)) X_E_xc_string(i_X)= trim(X_E_xc_string(i_X))//"+"//STRING
     endif
     if (what=='K')                      K_E_xc_string=      trim(K_E_xc_string)//"+"//STRING
     if (what=='WF')               loaded_WF_xc_string=trim(loaded_WF_xc_string)//"+"//STRING
diff --git a/src/modules/mod_gpu.F b/src/modules/mod_gpu.F
index 51bea8d72a..76b997270a 100644
--- a/src/modules/mod_gpu.F
+++ b/src/modules/mod_gpu.F
@@ -15,7 +15,11 @@ module gpu_m
   use devxlib_linalg
   use devxlib_async,     ONLY:devxlib_async2stream
 #if defined _GPU && defined _CUDA
+# if defined __GFORTRAN__
+  use cusolverdn_y
+# else
   use cusolverdn
+# endif
 #endif
 #if defined _CUDAF
   use cufft
diff --git a/src/modules/mod_hamiltonian.F b/src/modules/mod_hamiltonian.F
index 03b5a8b436..de0fda624f 100644
--- a/src/modules/mod_hamiltonian.F
+++ b/src/modules/mod_hamiltonian.F
@@ -5,16 +5,24 @@
 !
 ! Authors (see AUTHORS file for details): DS AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 module hamiltonian
  !
- use pars,       ONLY:SP,schlen
+ use pars,       ONLY:LP,SP,schlen
  use electrons,  ONLY:levels
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer           :: H_ref_bands(2)
  integer           :: H_ref_nbands
  !
+ character(schlen)       :: H_ref_bands_frozen_ch
+ integer(LP), allocatable:: H_ref_bands_frozen(:)
+ !
  character(schlen) :: H_potential
  !
  ! Possible Kinds, functionals and external perturbations
@@ -57,9 +65,19 @@ module hamiltonian
    integer function B_mat_index(ib1,ib2,nb)
      integer :: ib1,ib2,nb(2)
      !
+     ! This is identical to 
+     ! B_mat_index = matrix_index(ib1-nb(1)+1,ib2-nb(1)+1,nb(2)-nb(1)+1)
      B_mat_index=(ib1-nb(1))*(nb(2)-nb(1)+1)+ib2-nb(1)+1
      !
-   end function
+   end function B_mat_index
+   !
+   integer function B_mat_index_cv(ib1,ib2,nb1,nb2)
+     integer :: ib1,ib2,nb1(2),nb2(2)
+     !
+     B_mat_index_cv=(ib1-nb1(1))*(nb2(2)-nb2(1)+1)+ib2-nb2(1)+1
+     !
+   end function B_mat_index_cv
+   !
    !
    subroutine H_QP_table_setup(E)
      use drivers,        ONLY:l_use_collisions
diff --git a/src/modules/mod_interfaces.F b/src/modules/mod_interfaces.F
index 396a22f0ba..8bd539dee4 100644
--- a/src/modules/mod_interfaces.F
+++ b/src/modules/mod_interfaces.F
@@ -7,6 +7,8 @@
 !
 ! headers
 !
+#include<dev_defs.h>
+!
 #if defined _SLEPC && !defined _NL
 #include <petsc/finclude/petscsys.h>
 #include <petsc/finclude/petscvec.h>
@@ -17,8 +19,6 @@
 !
 module interfaces
  !
-#include<dev_defs.h>
- !
  implicit none
  !
  interface
@@ -111,7 +111,7 @@ subroutine LINEAR_ALGEBRA_driver(idriver,lib_in,M,M_slk,M_bse,B,B_slk,C,C_slk,V_
      !
    end subroutine
    !
-  subroutine YDIAGO_driver(i_BS_mat, BS_energies, BS_VR, &
+  subroutine LDIAGO_driver(i_BS_mat, BS_energies, BS_VR, &
     & neigs_this_cpu, neig_shift, neigs_range, eigvals_range, &
     & BS_VL, BS_overlap, solver_type, elpasolver)
     use pars,           ONLY:SP
@@ -127,7 +127,7 @@ subroutine YDIAGO_driver(i_BS_mat, BS_energies, BS_VR, &
     character, optional                :: solver_type
     integer,  optional                 :: elpasolver
     complex(SP),pointer,optional       :: BS_overlap(:,:)
-  end subroutine YDIAGO_driver
+  end subroutine LDIAGO_driver
 
    subroutine el_density_and_current(E,k,rho,drho,J,bands,force_spatial_inversion)
      use pars,           ONLY:SP
@@ -175,9 +175,7 @@ end subroutine el_density_of_states
    subroutine MATRIX_slepc(M_slepc,n_eig,V_right,V_left,E_real,E_cmpl)
      !
      use pars,           ONLY:SP,schlen
-     use petscmatdef,    ONLY:tmat
-#include <petsc/finclude/petscsys.h>
-#include <petsc/finclude/petscmat.h>
+     use petscmatdef,    ONLY:tMat
      Mat                         :: M_slepc      ! matrix to diagonalize of stype from slepc
      integer                     :: n_eig        ! number of eigenvalues to compute
      complex(SP)                 :: V_right(:,:) ! right eigenvalues
@@ -188,10 +186,10 @@ subroutine MATRIX_slepc(M_slepc,n_eig,V_right,V_left,E_real,E_cmpl)
    end subroutine
 #endif
    !
-   integer function eval_G_minus_G(iG,iGo,force_recompute,COMM)
+   integer function eval_G_minus_G(iG,iGo,opr_in,COMM)
      use parallel_m,      ONLY:yMPI_comm
      integer :: iG,iGo
-     logical,         optional :: force_recompute
+     character(1),    optional :: opr_in
      type(yMPI_comm), optional :: COMM
    end function
    !
@@ -232,7 +230,8 @@ subroutine OCCUPATIONS_Gaps(E,E_g_dir,E_g_ind,N_f,N_m,I_dir,E_k_dir,E_k_ind)
    end subroutine 
    !
    subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left_abs,BS_R_right_abs,BS_E_SOC_corr,&
-     &                                  BS_R_left_magn,BS_R_right_magn,BS_R_left_kerr,BS_R_right_kerr,BS_R_right_dich,BS_R_PL)
+     &                                   BS_R_left_magn,BS_R_right_magn,BS_R_left_kerr,BS_R_right_kerr,BS_R_right_dich,&
+     &                                   BS_R_right_mespin,BS_R_right_meorb,BS_R_PL)
      use BS_solvers,    ONLY:BSS_n_eig
      use pars,          ONLY:SP
      use frequency,     ONLY:w_samp
@@ -242,7 +241,8 @@ subroutine K_diago_response_functions(iq,i_BS_mat,W,BS_E,BS_R_left_abs,BS_R_righ
      complex(SP), pointer, intent(in) :: BS_R_left_abs(:),BS_R_right_abs(:)
      real(SP),    pointer, intent(in) :: BS_E_SOC_corr(:,:)
      complex(SP), pointer, intent(in) :: BS_R_left_magn(:,:),BS_R_right_magn(:,:)
-     complex(SP), pointer, intent(in) :: BS_R_left_kerr(:),BS_R_right_kerr(:),BS_R_right_dich(:,:)
+     complex(SP), pointer, intent(in) :: BS_R_left_kerr(:),BS_R_right_kerr(:),BS_R_right_dich(:,:), &
+                                       & BS_R_right_mespin(:,:),BS_R_right_meorb(:,:)
      real(SP),    pointer, intent(in) :: BS_R_PL(:,:)
    end subroutine
    !
@@ -253,7 +253,7 @@ subroutine K_diago_R_res(mode,i_BS_mat,BS_E, &
      character(*),intent(in)  :: mode
      integer,     intent(in)  :: i_BS_mat, neigs_this_cpu, neig_shift
      complex(SP), allocatable,         intent(in)  :: BS_E(:)
-     complex(SP), pointer,             intent(out) :: BS_R_right(:)
+     complex(SP), pointer,             intent(inout) :: BS_R_right(:)
      complex(SP), target, allocatable, intent(in)  :: BS_V_right(:,:)
    end subroutine K_diago_R_res
    !
@@ -264,7 +264,7 @@ subroutine K_diago_L_res(mode,i_BS_mat,BS_E, &
      character(*),intent(in)  :: mode
      integer,     intent(in)  :: i_BS_mat, neigs_this_cpu, neig_shift
      complex(SP), allocatable,         intent(in)  :: BS_E(:)
-     complex(SP), pointer,             intent(out) :: BS_R_left(:)
+     complex(SP), pointer,             intent(inout) :: BS_R_left(:)
      complex(SP), target, allocatable, intent(in)  :: BS_V_left(:,:)
      complex(SP), pointer, optional,   intent(in)  :: BS_Overlap(:,:) 
    end subroutine K_diago_L_res
@@ -278,12 +278,7 @@ end subroutine K_stored_in_a_big_matrix
    !
    subroutine K_diago_perturbative(pert_dim, what, i_BS_mat, &
     &           BS_corrections, BS_VR, neigs_this_cpu, neig_shift, BS_VL, BS_Overlap) 
-     use pars,           ONLY:SP,cZERO
-     use wrapper_omp,    ONLY:V_dot_V_omp
-     use parallel_int,   ONLY:PP_redux_wait,PARALLEL_index 
-     use LIVE_t,         ONLY:live_timing
-     use BS_solvers,     ONLY:BSS_eh_E_SOC_corr,BSS_eh_W,BSS_eh_E,BSS_n_eig
- 	  use BS,             ONLY:BS_K_coupling,BS_K_dim,BS_H_dim
+     use pars,           ONLY:SP
      implicit none
      integer,     intent(in)                   :: pert_dim
      character(3),intent(in)                   :: what
@@ -341,6 +336,16 @@ function TDDFT_ALDA_eh_space_R_kernel(is,os,isp,osp,tddft_wf,mode) result(H_x)
      type(tddft_wf_t), target, intent(inout) :: tddft_wf
    end function TDDFT_ALDA_eh_space_R_kernel
    !
+   subroutine WF_symm_kpoint_g(b_to_load,ik,i_sp_pol,isymm,ig0,WF,ssop,isref,igref)
+     use pars,          ONLY:SP
+     use wave_func,     ONLY:WAVEs
+     integer,              intent(in)    :: b_to_load(2)
+     integer,              intent(in)    :: ik,i_sp_pol,isymm,ig0
+     type (WAVEs), target, intent(inout) :: WF
+     complex(SP) DEV_ATTR, optional, intent(in) :: ssop(2,2)
+     integer,              optional, intent(in) :: isref,igref
+   end subroutine WF_symm_kpoint_g
+   !
    subroutine WF_shift_kpoint(b_to_shift,nb_to_shift,ikbz,wf_shift,Xk,WF_k_in,WF_k_out)
      use pars,          ONLY:SP
      use electrons,     ONLY:n_spinor
diff --git a/src/modules/mod_interpolate_tools.F b/src/modules/mod_interpolate_tools.F
index 1452521bc2..e7e3791b21 100644
--- a/src/modules/mod_interpolate_tools.F
+++ b/src/modules/mod_interpolate_tools.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 module interpolate_tools
  !
  use pars,         ONLY:SP,schlen,rZERO
@@ -12,8 +16,9 @@ module interpolate_tools
 &                       l_INTERP_W,NN_nk_exact,NN_nk_border,NN_MAX_real_nearest,NN_n_of_nearest,l_INTERP_Z,&
 &                       INTERPOLATE_msg_fmt,OUT_k_nk,INTERP_FineGd
  use electrons,    ONLY:n_sp_pol
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !  
  contains
    !
diff --git a/src/modules/mod_linear_algebra.F b/src/modules/mod_linear_algebra.F
index 9e7ca9ecaf..c313f75fe6 100644
--- a/src/modules/mod_linear_algebra.F
+++ b/src/modules/mod_linear_algebra.F
@@ -5,12 +5,24 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<dev_defs.h>
+#include<y_memory.h>
+!
 module linear_algebra
  !
- use pars,       ONLY:SP,schlen
+ use iso_c_binding
+ use iso_fortran_env,  ONLY: int64
+ use pars,             ONLY: SP,schlen
  !
-#include<dev_defs.h>
-#include<y_memory.h>
+#ifdef _MAGMA
+ use magma,            ONLY: magmaf_init,magmaf_queue_create,&
+&                            magmaf_cgeev_m,magmaf_zgeev_m,MagmaVec
+#endif
+ use y_memory_alloc
+ !
+ implicit none
  !
  integer, parameter :: USE_LK=1
  integer, parameter :: USE_SLK=2
@@ -22,6 +34,16 @@ module linear_algebra
  integer, parameter :: LIN_SYS=8
  integer, parameter :: MAT_MUL=9
  integer, parameter :: min_cpu_block_size=50
+ !
+ ! magma vars
+ !
+ integer(int64)     :: magma_queue    !! magma_queue_t
+ logical            :: magma_init_done = .false.
+#if defined _MAGMA
+ logical, parameter :: have_magma=.true.
+#else
+ logical, parameter :: have_magma=.false.
+#endif
  !
  ! Common Work Space
  !
@@ -48,17 +70,156 @@ module linear_algebra
    integer    , allocatable DEV_ATTR :: vp_int_d(:)
    !
  end type LALGEBRA_WS
+
+ !
+ !===============
+ ! INTERFACES
+ !===============
+ !
+ ! Eigensolvers
+ !
+ interface la_xgeev
+   subroutine zgeev(jobvl,jobvr,n,A,lda,w,vl,ldvl,vr,ldvr,work,lwork,rwork,info)
+     use iso_fortran_env, ONLY : WP => real64
+     character(1) :: jobvl,jobvr 
+     integer      :: n,lda,ldvl,ldvr,lwork,info
+     complex(WP)  :: w(*), A(lda,*), vl(ldvl,*), vr(ldvr,*), work(*)
+     real(WP)     :: rwork(*)
+   end subroutine
+   subroutine cgeev(jobvl,jobvr,n,A,lda,w,vl,ldvl,vr,ldvr,work,lwork,rwork,info)
+     use iso_fortran_env, ONLY : WP => real32
+     character(1) :: jobvl,jobvr 
+     integer      :: n,lda,ldvl,ldvr,lwork,info
+     complex(WP)  :: w(*), A(lda,*), vl(ldvl,*), vr(ldvr,*), work(*)
+     real(WP)     :: rwork(*)
+   end subroutine
+ end interface
+ !
+ interface la_xheev
+   subroutine zheev(jobz,uplo,n,A,lda,w,work,lwork,rwork,info)
+     use iso_fortran_env, ONLY : WP => real64
+     character(1) :: jobz,uplo
+     integer      :: n,lda,lwork,info
+     complex(WP)  :: A(lda,*), work(*)
+     real(WP)     :: w(*), rwork(*)
+   end subroutine
+   subroutine cheev(jobz,uplo,n,A,lda,w,work,lwork,rwork,info)
+     use iso_fortran_env, ONLY : WP => real32
+     character(1) :: jobz,uplo
+     integer      :: n,lda,lwork,info
+     complex(WP)  :: A(lda,*), work(*)
+     real(WP)     :: w(*), rwork(*)
+   end subroutine
+ end interface
+ !
+!#ifdef _MAGMA
+! !
+! interface magmaf_xgeev_m
+!   procedure :: magmaf_cgeev_m,magmaf_zgeev_m
+! end interface
+! !
+! interface magmaf_xheevd_m
+!   procedure :: magmaf_cheevd_m,magmaf_zheevd_m
+! end interface
+! !
+!#endif
+ ! 
+ ! Singular Value Decomposition
+ !
+ interface la_xgesvd
+   subroutine zgesvd(jobu,jobvt,m,n,A,lda,s,U,ldu,VT,ldvt,work,lwork,rwork,info)
+     use iso_fortran_env, ONLY : WP => real64
+     character(1) :: jobu,jobvt
+     integer      :: m,n,lda,ldu,ldvt,lwork,info
+     complex(WP)  :: A(lda,*), U(ldu,*), VT(ldvt,*), work(*)
+     real(WP)     :: s(*), rwork(*)
+   end subroutine
+   subroutine cgesvd(jobu,jobvt,m,n,A,lda,s,U,ldu,VT,ldvt,work,lwork,rwork,info)
+     use iso_fortran_env, ONLY : WP => real32
+     character(1) :: jobu,jobvt
+     integer      :: m,n,lda,ldu,ldvt,lwork,info
+     complex(WP)  :: A(lda,*), U(ldu,*), VT(ldvt,*), work(*)
+     real(WP)     :: s(*), rwork(*)
+   end subroutine
+ end interface
  !
- public :: LU_factorization
- public :: LU_inversion
- public :: SV_decomposition
- public :: M_eigenvalues
+ ! Lower/Upper Factorization and Inverison
  !
- interface M_eigenvalues
-   module procedure heev,geev
+ interface la_xgetrf
+   subroutine zgetrf(m,n,A,lda,ipiv,info)
+     use iso_fortran_env, ONLY : WP => real64
+     integer      :: m,n,lda,info
+     integer      :: ipiv(*)
+     complex(WP)  :: A(lda,*) 
+   end subroutine
+   subroutine cgetrf(m,n,A,lda,ipiv,info)
+     use iso_fortran_env, ONLY : WP => real32
+     integer      :: m,n,lda,info
+     integer      :: ipiv(*)
+     complex(WP)  :: A(lda,*) 
+   end subroutine
  end interface
  !
+ interface la_xgetri
+   subroutine zgetri(n,A,lda,ipiv,work,lwork,info)
+     use iso_fortran_env, ONLY : WP => real64
+     integer      :: n,lda,info,lwork
+     integer      :: ipiv(*)
+     complex(WP)  :: A(lda,*), work(*)
+   end subroutine
+   subroutine cgetri(n,A,lda,ipiv,work,lwork,info)
+     use iso_fortran_env, ONLY : WP => real32
+     integer      :: n,lda,info,lwork
+     integer      :: ipiv(*)
+     complex(WP)  :: A(lda,*), work(*)
+   end subroutine
+ end interface
+ !
+ interface la_xgetrs
+   subroutine zgetrs(trans,n,nrhs,A,lda,ipiv,B,ldb,info)
+     use iso_fortran_env, ONLY : WP => real64
+     character(1) :: trans
+     integer      :: n,nrhs,lda,ldb,info
+     integer      :: ipiv(*)
+     complex(WP)  :: A(lda,*), B(ldb,*)
+   end subroutine
+   subroutine cgetrs(trans,n,nrhs,A,lda,ipiv,B,ldb,info)
+     use iso_fortran_env, ONLY : WP => real32
+     character(1) :: trans
+     integer      :: n,nrhs,lda,ldb,info
+     integer      :: ipiv(*)
+     complex(WP)  :: A(lda,*), B(ldb,*)
+   end subroutine
+ end interface
+
+
+ !
+ !====================
+ ! PUBLIC STATEMENTS
+ !====================
+ !
+!#ifdef _MAGMA
+! public :: magmaf_xgeev_m,magmaf_xheevd_m
+!#endif
+ public :: la_xgeev, la_xheev
+ public :: la_xgesvd
+ public :: la_xgetrf, la_xgetri, la_xgetrs
+ !
+ !
  contains
+   !
+   !============================
+   ! AUXILIARY FUNCTIONS
+   !============================
+   !
+   subroutine magma_setup()
+#ifdef _MAGMA
+     call magmaf_init()
+     call magmaf_queue_create( 0, magma_queue )
+     magma_init_done=.true.
+#endif
+     return
+   end subroutine
    !
    subroutine LINEAR_ALGEBRA_WS_reset(WS)
      use drivers,   ONLY:l_nl_optics,l_real_time
@@ -83,15 +244,6 @@ subroutine LINEAR_ALGEBRA_WS_reset(WS)
        YAMBO_FREE(WS%vp_int)
        YAMBO_FREE(WS%v_int)
      else
-       if(allocated(WS%v_real))   deallocate(WS%v_real)
-       if(allocated(WS%vp_real))  deallocate(WS%vp_real)
-       if(allocated(WS%v_cmplx))  deallocate(WS%v_cmplx)
-       if(allocated(WS%vp_cmplx)) deallocate(WS%vp_cmplx)
-       if(allocated(WS%m1_cmplx)) deallocate(WS%m1_cmplx)
-       if(allocated(WS%m2_cmplx)) deallocate(WS%m2_cmplx)
-       if(allocated(WS%vp_int))   deallocate(WS%vp_int)
-       if(allocated(WS%v_int))    deallocate(WS%v_int)
-       !
 #ifdef _CUDAF
        if(allocated(WS%v_real_d))   deallocate(WS%v_real_d)
        if(allocated(WS%vp_real_d))  deallocate(WS%vp_real_d)
@@ -108,6 +260,16 @@ subroutine LINEAR_ALGEBRA_WS_reset(WS)
        !
        !call error("[GPU] openacc FREE not implemented in LinAlg_WS_reset")
 #endif
+       !
+       if(allocated(WS%v_real))   deallocate(WS%v_real)
+       if(allocated(WS%vp_real))  deallocate(WS%vp_real)
+       if(allocated(WS%v_cmplx))  deallocate(WS%v_cmplx)
+       if(allocated(WS%vp_cmplx)) deallocate(WS%vp_cmplx)
+       if(allocated(WS%m1_cmplx)) deallocate(WS%m1_cmplx)
+       if(allocated(WS%m2_cmplx)) deallocate(WS%m2_cmplx)
+       if(allocated(WS%vp_int))   deallocate(WS%vp_int)
+       if(allocated(WS%v_int))    deallocate(WS%v_int)
+       !
      endif
      !
    end subroutine
@@ -118,94 +280,5 @@ subroutine LINEAR_ALGEBRA_error(calling_subr,message_)
      call error( trim( STRING_pack('LINEAR ALGEBRA driver [',trim(calling_subr),'] ',trim(message_)) ))
    end subroutine
    !
-   !============================
-   ! SINGLE VALUE DECOMPOSITION
-   !============================
-   !
-   subroutine SV_decomposition(msize, M, SV, U, VH, work, lwork, r_WK, ifail)
-     !
-     integer,    intent(in)   :: msize, lwork
-     integer,    intent(out)  :: ifail
-     !     
-     real(SP),   intent(out)  :: r_WK(*), SV(*)
-     complex(SP),intent(inout):: M(msize,*)
-     complex(SP),intent(out)  :: U(msize,*), VH(msize,*), work(*)
-     !
-#if defined _DOUBLE
-   call ZGESVD('S','A',msize,msize,M,msize,SV,U,msize,VH,msize,work,lwork,r_WK,ifail)
-#else
-   call CGESVD('S','A',msize,msize,M,msize,SV,U,msize,VH,msize,work,lwork,r_WK,ifail)
-#endif
-     !
-   end subroutine SV_decomposition
-   !
-   !=========================================
-   ! LOWER/UPPER FACTORIZATION and INVERISON
-   !=========================================
-   !
-   subroutine LU_factorization(msize,M,ipvt,ifail)
-     !
-     integer,     intent(in)   :: msize
-     integer,     intent(out)  :: ifail, ipvt(*)
-     complex(SP), intent(inout):: M(msize,*)
-     !
-#if defined _DOUBLE
-     call zgetrf(msize,msize,M,msize,ipvt,ifail)
-#else
-     call cgetrf(msize,msize,M,msize,ipvt,ifail)
-#endif
-     !
-   end subroutine LU_factorization
-   !
-   subroutine LU_inversion(msize,M,ipvt,work,lwork,ifail)
-     !
-     integer,     intent(in)   :: msize, lwork, ipvt(*)
-     integer,     intent(out)  :: ifail
-     complex(SP), intent(inout):: M(msize,*)
-     complex(SP), intent(out)  :: work(*)
-     !
-#if defined _DOUBLE
-     call zgetri(msize,M,msize,ipvt,work,lwork,ifail)
-#else
-     call cgetri(msize,M,msize,ipvt,work,lwork,ifail)
-#endif
-     !
-   end subroutine LU_inversion
-   !
-   !==============================
-   ! EIGENVALUES AND EIGENVECTORS 
-   !==============================
-   !
-   subroutine heev(msize,M,E_real,work,lwork,rwk,ifail)
-     !
-     integer,    intent(in)    :: msize,lwork
-     integer,    intent(out)   :: ifail
-     real(SP),   intent(out)   :: E_real(*), rwk(*)
-     complex(SP),intent(out)   :: work(*)
-     complex(SP),intent(inout) :: M(msize,*)
-     !
-#if defined _DOUBLE
-   call ZHEEV('V','U',msize,M,msize,E_real,work,lwork,rwk,ifail)
-#else
-   call CHEEV('V','U',msize,M,msize,E_real,work,lwork,rwk,ifail)
-#endif
-     !
-   end subroutine heev 
-   !
-   subroutine geev(msize,M,E_cmpl,V_left,V_right,work,lwork,rwk,ifail)
-     !
-     integer,    intent(in)    :: msize,lwork
-     integer,    intent(out)   :: ifail
-     real(SP),   intent(out)   :: rwk(*)
-     complex(SP),intent(out)   :: E_cmpl(*),V_left(msize,*),V_right(msize,*),work(*)
-     complex(SP),intent(inout) :: M(msize,*)
-     !
-#if defined _DOUBLE
-   call ZGEEV('V','V',msize,M,msize,E_cmpl,V_left,msize,V_right,msize,work,lwork,rwk,ifail)
-#else
-   call CGEEV('V','V',msize,M,msize,E_cmpl,V_left,msize,V_right,msize,work,lwork,rwk,ifail)
-#endif
-     !
-   end subroutine geev
-   !
 end module linear_algebra
+
diff --git a/src/modules/mod_logo.F b/src/modules/mod_logo.F
index 123f58bc55..099057e4ec 100644
--- a/src/modules/mod_logo.F
+++ b/src/modules/mod_logo.F
@@ -2,8 +2,9 @@
 ! License-Identifier: GPL
 !
 ! Copyright (C) 2008 The Yambo Team
+! Copyright (C) 2025 The Lumen Team
 !
-! Authors (see AUTHORS file for details): AM
+! Authors (see AUTHORS file for details): AM,AC
 !
 module LOGO
  !
@@ -38,7 +39,78 @@ integer function pickup_a_random(rand_range)
      pickup_a_random=int(dlaran(iseed(4:))*rand_range)
    end function
    !
-   subroutine pickup_a_logo(unit_)
+   subroutine pickup_a_lumen_logo(unit_)
+     !
+     use C_driver, ONLY:code_version,code_revision,code_hash,code_libraries,code_branch,&
+     &                  lumen_version
+     use pars,     ONLY:SP
+     use stderr,   ONLY:intc,slash
+     !
+     integer      :: unit_
+     ! 
+     ! Work Space
+     !
+     character(schlen) :: comment(4),ch
+     integer           :: max_length,i1,left_space,iA,ic
+     !
+     comment(1)="Fork "//trim(lumen_version)
+     !
+     comment(2)="Branch is "//trim(code_branch)
+     !
+     comment(3)=trim(code_libraries)//" Build"
+     !
+     comment(4)='http://www.lumen-code.org'
+     !
+     ID_logo_stderr=1
+     ID_logo       =1
+     !
+     n_logo_lines=9
+     !
+     logo_line( 1)='                                                     '
+     logo_line( 2)='$$\                                                  '
+     logo_line( 3)='$$ |                                                 '
+     logo_line( 4)='$$ |     $$\   $$\ $$$$$$\$$$$\   $$$$$$\  $$$$$$$\  '
+     logo_line( 5)='$$ |     $$ |  $$ |$$  _$$  _$$\ $$  __$$\ $$  __$$\ '
+     logo_line( 6)='$$ |     $$ |  $$ |$$ / $$ / $$ |$$$$$$$$ |$$ |  $$ |'
+     logo_line( 7)='$$ |     $$ |  $$ |$$ | $$ | $$ |$$   ____|$$ |  $$ |'
+     logo_line( 8)='$$$$$$$$\\$$$$$$  |$$ | $$ | $$ |\$$$$$$$\ $$ |  $$ |'
+     logo_line( 9)='\________|\______/ \__| \__| \__| \_______|\__|  \__|' 
+     !
+     ! Replace "S" with "\" and find the max length of 
+     ! the lines to center the two comments
+     !
+     do i1=1,n_logo_lines
+       iA=index(logo_line(i1),'S')
+       if (iA==0) cycle
+       ch=logo_line(i1)
+       do while (index(ch,'S')/=0) 
+         iA=index(ch,'S')
+         ch(iA:iA)=slash
+       enddo
+       logo_line(i1)=ch
+     enddo
+     !
+     max_length=-1
+     do i1=1,n_logo_lines
+       max_length=max( len_trim(logo_line(i1)) , max_length)
+     enddo
+     !
+     n_logo_lines=n_logo_lines+1
+     logo_line(n_logo_lines)=' '
+     !
+     do ic=1,4
+       n_logo_lines=n_logo_lines+1
+       logo_line(n_logo_lines)=trim(comment(ic))
+       left_space=(max_length-len_trim(comment(ic)))/2+1
+       if (left_space>0) then
+         write (logo_line(n_logo_lines),'(t'//trim(intc(left_space))//',a)') trim(comment(ic))
+       endif
+     enddo
+     !
+   end subroutine
+   !
+   !
+   subroutine pickup_a_yambo_logo(unit_)
      !
      use C_driver, ONLY:code_version,code_revision,code_hash,code_libraries,code_branch
      use pars,     ONLY:SP
diff --git a/src/modules/mod_matrix.F b/src/modules/mod_matrix.F
index 024f76abdd..086e6884a4 100644
--- a/src/modules/mod_matrix.F
+++ b/src/modules/mod_matrix.F
@@ -5,14 +5,19 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<dev_defs.h>
+#include<y_memory.h>
+!
 module matrix
  !
  use pars,       ONLY:schlen,SP
  use parallel_m, ONLY:yMPI_comm
  use devxlib,    ONLY:devxlib_mapped,devxlib_memset_d
+ use y_memory_alloc
  !
-#include<dev_defs.h>
-#include<y_memory.h>
+ implicit none
  !
  ! Distributed Matrix structure
  !
@@ -31,7 +36,7 @@ module matrix
  !
  type PAR_matrix
    !
-   character(3) :: kind   ! "PAR" "SLK" "XUP" "XDN" "SER"
+   character(3) :: kind   ! "PAR" "SLK" "SER"
    !
    ! Dimensions
    integer :: I           ! Block element
diff --git a/src/modules/mod_matrix_operate.F b/src/modules/mod_matrix_operate.F
index e537d440b5..b09fd974cc 100644
--- a/src/modules/mod_matrix_operate.F
+++ b/src/modules/mod_matrix_operate.F
@@ -28,15 +28,15 @@ integer function UP_matrix_index(i1,i2)
      !
      ! Note that:
      !
-     !  UP_matrix_index(1,1)=2
-     !  UP_matrix_index(2,2)=3
-     !  UP_matrix_index(1,2)=4
-     !  UP_matrix_index(3,3)=5
-     !  UP_matrix_index(2,3)=6
-     !  UP_matrix_index(1,3)=7
+     !  UP_matrix_index(1,1)=1
+     !  UP_matrix_index(2,2)=2
+     !  UP_matrix_index(1,2)=3
+     !  UP_matrix_index(3,3)=4
+     !  UP_matrix_index(2,3)=5
+     !  UP_matrix_index(1,3)=6
      !
      integer :: i1,i2
-     UP_matrix_index=(i2**2+i2)/2-i1+2
+     UP_matrix_index=i2*(i2+1)/2-i1+1
      !
    end function
    !
diff --git a/src/modules/mod_memory.F b/src/modules/mod_memory.F
index 8852053b70..a8053957fe 100644
--- a/src/modules/mod_memory.F
+++ b/src/modules/mod_memory.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<dev_defs.h>
+!
 module y_memory
  !
  use pars,          ONLY:SP,schlen,IP,IPL,DP,LP,SP6,N_MEM_max,N_MEM_SAVE_max,&
@@ -12,7 +16,6 @@ module y_memory
  use openmp,        ONLY:master_thread
  use iso_c_binding, ONLY:c_int
  !
-#include<dev_defs.h>
  !
  implicit none
  !
@@ -156,7 +159,7 @@ subroutine MEM_element_init(MEM_element_to_init,size_)
    !
    character(schlen) function mem_string(MEM,where)
      use stderr,       ONLY:intc,real2ch
-     integer :: MEM !in Kb
+     real(SP)          :: MEM !in Kb
      integer, optional :: where
      !
      character(64) :: where_str
@@ -167,12 +170,12 @@ character(schlen) function mem_string(MEM,where)
        if (where==DEV_)  where_str="  Dev"
      endif
      !
-     if (MEM<1000) then
-       mem_string=trim(real2ch(real(MEM,SP)))//" [Kb]"
-     else if (MEM<1000000) then
-       mem_string=trim(real2ch(real(MEM,SP)/1000))//" [Mb]"
+     if (MEM<1000._SP) then
+       mem_string=trim(real2ch(MEM))//" [Kb]"
+     else if (MEM<1000000._SP) then
+       mem_string=trim(real2ch(MEM/1000._SP))//" [Mb]"
      else
-       mem_string=trim(real2ch(real(MEM,SP)/1000000))//" [Gb]"
+       mem_string=trim(real2ch(MEM/1000000._SP))//" [Gb]"
      endif
      mem_string=trim(mem_string)//trim(where_str)
    end function
@@ -556,3 +559,15 @@ subroutine MEM_l6(name,l)
      call MEM_manager_alloc(name,size(l,KIND=IPL),kind(l(1,1,1,1,1,1)),HOST_)
    end subroutine
 end module y_memory
+
+
+module y_memory_alloc
+#if defined _OPENACC || defined _OPENMP_GPU
+ use pars,         ONLY:IPL
+ use y_memory,     ONLY:MEM_err,MEM_msg,MEM_count,MEM_count_d,MEM_global_mesg,IPL
+ use devxlib,      ONLY:devxlib_map,devxlib_unmap,devxlib_mapped,devxlib_memcpy_h2d
+#else
+ use pars,         ONLY:IPL
+ use y_memory,     ONLY:MEM_err,MEM_msg,MEM_count,MEM_global_mesg
+#endif
+end module y_memory_alloc
diff --git a/src/modules/mod_mklfft_gpu.F b/src/modules/mod_mklfft_gpu.F
index 56feeecc3c..887fc25e97 100644
--- a/src/modules/mod_mklfft_gpu.F
+++ b/src/modules/mod_mklfft_gpu.F
@@ -5,6 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): AF
 !
+! headers
+!
+#include<dev_defs.h>
+#include<y_memory.h>
+!
 #if defined _OPENMP_GPU && defined _MKLGPU
 !=====================================================================
 ! Driver to 3D FFT: FFTW, Goedecker
@@ -28,7 +33,6 @@
 !
 !=====================================================================
 !
-#include<dev_defs.h>
 include "mkl_dfti_omp_offload.f90"
 !
 module mklfft_gpu
@@ -42,8 +46,9 @@ module mklfft_gpu
 
 contains
 subroutine fft_3d_mklgpu(c_d,n,fft_sign,mklgpu_desc)
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer                :: fft_sign,n(3)
  type(dfti_descriptor), pointer :: mklgpu_desc
diff --git a/src/modules/mod_nl_optics.F b/src/modules/mod_nl_optics.F
index adb3e99839..446b6a3d49 100644
--- a/src/modules/mod_nl_optics.F
+++ b/src/modules/mod_nl_optics.F
@@ -5,15 +5,19 @@
 !
 ! Authors (see AUTHORS file for details): MG AC
 !
+! headers
+!
+#include<y_memory.h>
+!
 module nl_optics
  !
  use descriptors,     ONLY:IO_desc
- use pars,            ONLY:SP,schlen,lchlen
+ use pars,            ONLY:SP,LP,schlen,lchlen
  use electrons,       ONLY:levels
- use real_time,       ONLY:NE_steps
- use R_lattice,       ONLY:bz_samp
+ use real_time,       ONLY:NE_steps,dG_lesser
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  ! Long range correction field
  !
@@ -41,7 +45,7 @@ module nl_optics
  !
  ! Initial Energies and Damping
  !------------
- complex(SP), allocatable :: I_relax(:,:,:,:,:)
+ complex(SP), allocatable :: I_relax(:,:,:,:)
  real(SP),    allocatable :: E_full(:,:,:)
  !
  ! For TDDFT, V_xc_0 stores the V_xc at equilibrium
@@ -55,16 +59,13 @@ module nl_optics
  ! WFs and Density
  !
  ! Bands in terms of Kohn-Sham orbitals
- complex(SP), allocatable :: V_bands(:,:,:,:)  ! time-dependent valence bands
+ complex(SP), allocatable, target :: V_bands(:,:,:,:)  ! time-dependent valence bands
  ! 
  ! Time-dependent Valence bands in real-space
  complex(SP), allocatable :: VAL_BANDS(:,:,:,:)
  !
  real(SP),    allocatable :: full_rho(:)       ! density of all bands from 1:SC_bands(2)
  !
- ! Density matrix
- complex(SP), allocatable :: dG(:,:,:) 
- !
  ! Sigma sex at equilibrium (for oscillators)
  complex(SP), allocatable :: Sigma_SEX_EQ(:,:,:,:)
  !
@@ -128,6 +129,9 @@ module nl_optics
  logical  :: Loop_on_frequencies
  logical  :: Loop_on_angles
  !
+ character(schlen)       :: NL_bands_frozen_ch
+ integer(LP), allocatable:: NL_bands_frozen(:)
+ !
  ! Velocity gauge in indepdent particle
  !     
  logical :: l_velocity_IPA=.FALSE.
@@ -151,10 +155,6 @@ module nl_optics
  logical  :: l_eval_CURRENT  ! Evaluate current using the commutator v=[H,r] and the IP formulation
  logical  :: l_test_OSCLL    ! Test collisions built using OSCLLs vs standard Collisions
  !
- ! Wk operator at equilibrium 
- !
- complex(SP), allocatable :: Wk_EQ(:,:,:,:,:)
- !     
  ! IO variables
  !
  real(SP),     allocatable :: NL_P_t(:,:)
@@ -193,14 +193,14 @@ subroutine NL_alloc(en)
      !
      if(Correlation==TDDFT.or.Correlation==JGM) then
        YAMBO_ALLOC(V_xc_0,(fft_size,n_spin))
-     endif          
+     endif
      !
      if(l_use_Hxc_collisions) then
        YAMBO_ALLOC(RT_Vnl_xc,(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),QP_nk,n_sp_pol))
      endif          
      !
      if(eval_dG) then
-       YAMBO_ALLOC(dG,(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),QP_nk))
+       YAMBO_ALLOC(dG_lesser,(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),QP_nk,n_sp_pol,1))
      endif
      !
      ! Reference energies
@@ -238,15 +238,13 @@ subroutine NL_alloc_k_distributed(en)
      if(fft_size>0) then
        YAMBO_ALLOC(VAL_BANDS,(fft_size,maxval(en%nbf),PAR_Xk_nibz,n_sp_pol))
      endif
-     YAMBO_ALLOC(I_relax,(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),maxval(en%nbf),PAR_Xk_nibz,n_sp_pol))
+     YAMBO_ALLOC(I_relax,(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),PAR_Xk_nibz,n_sp_pol))
      YAMBO_ALLOC(Ho_plus_Sigma,(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),PAR_Xk_nibz,n_sp_pol))
      !
      if(Correlation==LSEX.or.Correlation==LHF) then
        YAMBO_ALLOC(Sigma_SEX_EQ,(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),n_sp_pol,PAR_Xk_nibz))
      endif          
      !  
-     YAMBO_ALLOC(Wk_EQ,(3,NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),n_sp_pol,PAR_Xk_nibz))
-     !
    end subroutine NL_alloc_k_distributed
    !
    subroutine NL_free
@@ -270,10 +268,7 @@ subroutine NL_free
      YAMBO_FREE(V_xc_0)
      YAMBO_FREE(VAL_BANDS)
      !
-     if(.not.l_use_DIPOLES) then
-       call ELECTRIC_free()
-       YAMBO_FREE(Wk_EQ)
-     endif
+     if(.not.l_use_DIPOLES) call ELECTRIC_free()
      !
      YAMBO_FREE(NL_P_t)
      YAMBO_FREE(NL_J_t)
@@ -281,15 +276,11 @@ subroutine NL_free
      YAMBO_FREE(E_ext_t)
      YAMBO_FREE(E_ks_t)
      !	
-     if(eval_dG) then
-       YAMBO_FREE(dG)
-     endif
-     !
      if(l_use_Hxc_collisions) then
        YAMBO_FREE(RT_Vnl_xc)
      endif
      if(eval_dG) then
-       YAMBO_FREE(dG)
+       YAMBO_FREE(dG_lesser)
      endif
      !
      if(Correlation==LSEX.or.Correlation==LHF) then
diff --git a/src/modules/mod_parallel.F b/src/modules/mod_parallel.F
index 677f8e6c66..22332583ff 100644
--- a/src/modules/mod_parallel.F
+++ b/src/modules/mod_parallel.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 module parallel_m
  !
  ! Collective Operations (from  http://linux.die.net/man/3/mpi_lor )
@@ -43,7 +47,9 @@ module parallel_m
 #endif
  !
 #if !defined _io_lib
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
 #endif
  !
 #if defined _MPI
@@ -197,6 +203,7 @@ module parallel_m
    type(PP_indexes)    :: IND_bz
    integer,allocatable :: ibz_index(:)
    integer,allocatable :: bz_index(:)
+   integer,allocatable :: bz_id(:)
    integer             :: nibz
    integer             :: nbz
    integer             :: comm_world
@@ -757,4 +764,3 @@ subroutine par_distribute_set(nsets, nsets_this_cpu, nset_shift, comm)
    end subroutine par_distribute_set
 !
 end module parallel_m
-!
diff --git a/src/modules/mod_parallel_interface.F b/src/modules/mod_parallel_interface.F
index eb9cf28b18..ad06f50767 100644
--- a/src/modules/mod_parallel_interface.F
+++ b/src/modules/mod_parallel_interface.F
@@ -97,7 +97,17 @@ subroutine PARALLEL_MATRIX_distribute(COMM,PAR_IND,nb,PAR_index,PAR_ID,PAR_n_ele
      integer, optional:: PAR_index(:,:)
      integer, optional:: PAR_ID
      integer, optional:: PAR_n_elements
-   end subroutine
+   end subroutine PARALLEL_MATRIX_distribute
+   !
+   subroutine PARALLEL_MATRIX_distribute_cv(COMM,PAR_IND,nb1,nb2,PAR_index,PAR_ID,PAR_n_elements)
+     use parallel_m,    ONLY:yMPI_comm,PP_indexes
+     type(yMPI_comm)   :: COMM
+     type(PP_indexes) :: PAR_IND
+     integer          :: nb1(2),nb2(2)
+     integer, optional:: PAR_index(:,:)
+     integer, optional:: PAR_ID
+     integer, optional:: PAR_n_elements
+   end subroutine PARALLEL_MATRIX_distribute_cv
    !
    subroutine PARALLEL_WF_index(COMM)
      use parallel_m,      ONLY:yMPI_comm
@@ -1280,3 +1290,12 @@ subroutine PP_snd_rcv_c1(mode,array,node,COMM,TAG)
    end subroutine
    !
 end module parallel_int
+
+
+module y_memory_alloc_par
+ use pars,         ONLY:IPL
+ use parallel_m,   ONLY:PAR_COM_HOST
+ use parallel_int, ONLY:PP_redux_wait,PP_wait
+ integer      :: isize_mem
+ integer(IPL) :: HOST_SIZE(1),LOCAL_SIZE(1)
+end module y_memory_alloc_par
diff --git a/src/modules/mod_pars.F b/src/modules/mod_pars.F
index 426c7f8969..442599ce0e 100644
--- a/src/modules/mod_pars.F
+++ b/src/modules/mod_pars.F
@@ -32,6 +32,7 @@ module pars
  !
  !...RT
  integer, parameter :: n_ext_fields_max=4
+ integer, parameter :: n_fields_defs_max=6
  !
  !...Input file analyze
  integer, parameter :: n_max_DB_IO_off_fields=20
@@ -42,10 +43,10 @@ module pars
  integer, parameter :: n_debugs_max=100
  ! 
  !...Output Files/Report File
- integer,parameter  :: max_n_ofs=1000
- integer,parameter  :: max_open_ofs=1000
- integer,parameter  :: repfile_index=1001
- integer,parameter  :: logfile_index=1002
+ integer,parameter  :: max_n_ofs=10000
+ integer,parameter  :: max_open_ofs=10000
+ integer,parameter  :: repfile_index=10001
+ integer,parameter  :: logfile_index=10002
  !
  !...I/O
  integer, parameter :: max_io_units=40
@@ -95,7 +96,7 @@ module pars
  ! STRINGs
  !=========
  integer, parameter  :: schlen=100
- integer, parameter  :: lchlen=300
+ integer, parameter  :: lchlen=500
  integer, parameter  :: msg_len=50
  integer, parameter  :: DB_menu_length=30+msg_len
  !
diff --git a/src/modules/mod_plasma.F b/src/modules/mod_plasma.F
index 310cc5c04e..3284bcb587 100644
--- a/src/modules/mod_plasma.F
+++ b/src/modules/mod_plasma.F
@@ -5,10 +5,16 @@
 !
 ! Authors (see AUTHORS file for details): DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 module plasma
  !
  use pars, ONLY :SP
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  ! NOTE: Plasma_tab is on bg (big grid), plasma_tab_m1 points to cg (coarse_grid).
  !       Plasma_grid connects the two
diff --git a/src/modules/mod_pseudo.F b/src/modules/mod_pseudo.F
index 4ee2823c26..3e06f4843a 100644
--- a/src/modules/mod_pseudo.F
+++ b/src/modules/mod_pseudo.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DS AF
 !
+! headers
+!
+#include <y_memory.h>
+!
 module pseudo
  !
  use pars,        ONLY:SP,DP,LP
@@ -13,7 +17,9 @@ module pseudo
  use D_lattice,   ONLY:n_atomic_species,n_atoms_species
  use qe_pseudo_m, ONLY:qe_pseudo_deallocate, qe_pseudo_allocate,&
 &                      bec_type, deallocate_bec_type
-#include <y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  ! *** Non-Local pseudo. The [r,Vnl] commutator ***
  !
@@ -37,12 +43,6 @@ module pseudo
 
  real(SP), allocatable:: pp_factor(:,:,:,:,:)
  !
- ! ABINIT (old) KB
- !=====================
- real(SP), allocatable:: pp_kbs_old(:,:)
- real(SP), allocatable:: pp_kb_old(:,:,:,:)
- real(SP), allocatable:: pp_kbd_old(:,:,:,:)
- !
  ! ABINIT and PWSCF KB
  !=====================
  real(SP), allocatable:: pp_kbs(:,:)
@@ -64,22 +64,7 @@ module pseudo
  integer,     allocatable:: qe_atoms_map(:,:)
  type(bec_type), allocatable:: becp(:,:)
  !
- ! PW (OLD)
- !=========
- complex(SP), allocatable :: Vnl(:,:,:,:,:)
- !
  contains
-   !
-   ! Abinit (OLD)
-   subroutine PP_alloc_abinit_old()
-     use R_lattice,  ONLY:ng_vec
-     YAMBO_ALLOC(pp_kbs_old,(n_atomic_species,pp_n_l_times_proj_max))
-     YAMBO_ALLOC(pp_kb_old,(ng_vec,n_atomic_species,pp_n_l_times_proj_max,n_sp_pol))
-     YAMBO_ALLOC(pp_kbd_old,(ng_vec,n_atomic_species,pp_n_l_times_proj_max,n_sp_pol))
-     pp_kbs_old =0._SP
-     pp_kb_old  =0._SP
-     pp_kbd_old =0._SP
-   end subroutine
    !
    ! Abinit
    subroutine PP_alloc_abinit()
@@ -103,12 +88,6 @@ subroutine PP_alloc_pwscf()
      pp_kbd =0._SP
    end subroutine
    !
-   subroutine PP_free_old()
-     YAMBO_FREE(pp_kbs_old)
-     YAMBO_FREE(pp_kb_old)
-     YAMBO_FREE(pp_kbd_old)
-   end subroutine
-   !
    subroutine PP_free()
      YAMBO_FREE(pp_kbs)
      YAMBO_FREE(pp_kb)
diff --git a/src/modules/mod_real_time.F b/src/modules/mod_real_time.F
index 48674f4e87..060b18f247 100644
--- a/src/modules/mod_real_time.F
+++ b/src/modules/mod_real_time.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM MG DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 module real_time
  !
  use drivers,        ONLY:l_elel_scatt,l_elph_scatt,l_elphoton_scatt,l_phel_scatt
@@ -15,8 +19,9 @@ module real_time
  use RT_lifetimes,   ONLY:RT_obj_lifetime,RT_obj_lifetime_alloc,RT_obj_lifetime_free
  use RT_occupations, ONLY:RT_obj_occupation,RT_obj_occupation_alloc,RT_obj_occupation_free
  use electrons,      ONLY:E_fine_grid
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  ! CORE LOGICALS
  !
@@ -49,6 +54,7 @@ module real_time
  logical          :: l_P_integrating_J
  logical          :: l_velocity_gauge_corr
  logical          :: l_velocity_gauge_diam
+ logical          :: l_length_grad_k
  !
  ! Specific flag for when Yambo libraries are called to perform TRabs via YPP
  !
@@ -75,6 +81,7 @@ module real_time
  !
  integer  :: NE_i_time=1 ! = NE_time/RT_step+1
  integer  :: NE_steps    ! Still relative to RT_step
+ integer  :: NE_i_last_field
  !
  ! Lifetimes Extrapolation Time status
  !
@@ -134,6 +141,8 @@ module real_time
  integer                 :: RT_nk
  integer                 :: RT_bands(2)
  integer                 :: RT_nbands
+ character(schlen)       :: RT_bands_frozen_ch
+ integer(LP), allocatable:: RT_bands_frozen(:)
  !
  ! Equilibrium V_xc and V_hartree in real space
  ! Presently REF_V_hartree_sc is not used
@@ -182,9 +191,10 @@ module real_time
  !
  ! GFs
  !-----
- complex(SP), allocatable :: G_lesser(:,:,:,:)          ! Band, Band, K, Time
- complex(SP), allocatable :: dG_lesser(:,:,:,:)
- complex(SP), allocatable :: G_lesser_reference(:,:,:)  ! Band, Band, K
+ complex(SP), allocatable :: G_lesser(:,:,:,:,:)          ! Band, Band, K, Spin, Time
+ complex(SP), allocatable :: dG_lesser(:,:,:,:,:)
+ complex(SP), allocatable :: dG_avg(:,:,:,:)
+ complex(SP), allocatable :: G_lesser_reference(:,:,:,:)  ! Band, Band, K, Spin
  complex(SP), allocatable :: I1_matrix(:,:)
  !
  ! RT_Hamiltonian
@@ -192,6 +202,7 @@ module real_time
  !
  complex(SP), allocatable :: Ho_plus_Sigma(:,:,:,:)
  complex(SP), allocatable :: H_EQ(:,:,:,:)
+ complex(SP), allocatable :: H_field(:,:,:,:)
  complex(SP), allocatable :: H_pseudo_eq(:,:,:,:)
  complex(SP), allocatable :: RT_Vnl_xc(:,:,:,:)
  !
@@ -210,6 +221,7 @@ module real_time
  logical  :: l_RT_RWA
  logical  :: l_RT_ACC
  logical  :: l_RT_PERT
+ logical  :: l_RT_FRES
  logical  :: l_RT_DIAG
  !
  logical  :: l_RT_SIMPLE
@@ -219,6 +231,7 @@ module real_time
  !
  integer  :: Integrator_step
  integer  :: Integrator_nsteps
+ integer  :: Integrator_nfrac
  integer  :: Integrator_exp_order
  !
  real(SP) :: a_tableau(4)
diff --git a/src/modules/mod_vec_operate.F b/src/modules/mod_vec_operate.F
index c27c697af2..4647b164ed 100644
--- a/src/modules/mod_vec_operate.F
+++ b/src/modules/mod_vec_operate.F
@@ -90,6 +90,30 @@ real(SP) function iku_v_norm(v,inside_bz)
      iku_v_norm=sqrt(dot_product(u,u))
      !
    end function iku_v_norm
+   !
+   real(SP) function iku_v_2D_norm(v,inside_bz)
+     !
+     ! shifted in the BZ if inside_bz is set to true, ak2bz=|k|
+     !
+     use D_lattice,      ONLY:alat
+     real(SP), intent(in)           :: v(3)
+     logical,  intent(in), optional :: inside_bz
+     !
+     real(SP) :: u(3)
+     logical  :: inside_bz_
+     !
+     inside_bz_=.false.
+     if(present(inside_bz)) inside_bz_=inside_bz
+     !
+     u(:)=v(:)
+     if(inside_bz_) call k2bz(v_in=u)
+     !
+     u(1:2)=u(1:2)*2._SP*pi/alat(1:2)
+     u(3)=0._SP
+     iku_v_2D_norm=sqrt(dot_product(u,u))
+     !
+   end function iku_v_2D_norm
+
    !
    real(SP) function rlu_v_norm(k,inside_bz) 
      !
@@ -190,7 +214,7 @@ function axis_rotation(axis, vec, angle)
      !
      U_cross=rZERO
      forall (i=1:3,j=1:3) U_cross(i,j) = axis(i)*axis(j)
-     R=R+(1.0-cos(angle))*U_cross
+     R=R+(1.0_SP-cos(angle))*U_cross
      !
      axis_rotation=matmul(R,vec)
      !
@@ -565,7 +589,9 @@ subroutine r_sort(arrin,arrout,indx,indx_m1,r_zero)
      if (present(arrout)) arrout=l_arrout
      if (.not.present(arrout)) arrin=l_arrout
      if (present(indx)) indx=l_indx
-     if (present(indx_m1)) forall( i=1:n) indx_m1(l_indx(i))=i
+     if (present(indx_m1)) then
+       forall (i=1:n) indx_m1(l_indx(i))=i
+     endif
      deallocate(l_indx,l_arrout)
    end subroutine r_sort
    !
@@ -664,12 +690,14 @@ subroutine i_sort(arrin,arrout,indx,indx_m1)
      if (     present(arrout) ) arrout=l_arrout
      if (.not.present(arrout) ) arrin =l_arrout
      if (     present(indx)   ) indx  =l_indx
-     if (     present(indx_m1)) forall( i=1:n) indx_m1(l_indx(i))=i
+     if (     present(indx_m1)) then
+       forall ( i=1:n) indx_m1(l_indx(i))=i
+     endif
      deallocate(l_indx,l_arrout)
    end subroutine i_sort
    !
-   subroutine degeneration_finder(E,n,first_deg_el,deg_elmnts,deg_grps,&
-&                                 deg_accuracy,Include_single_values)
+   subroutine degeneration_finder(n,first_deg_el,deg_elmnts,deg_grps,&
+&                                 Ei,Er,deg_accuracy,Include_single_values)
      !
      ! Assumes energy are sorted
      !
@@ -682,14 +710,15 @@ subroutine degeneration_finder(E,n,first_deg_el,deg_elmnts,deg_grps,&
      !
      integer, intent(in)  :: n
      integer, intent(out) :: first_deg_el(n),deg_elmnts(n),deg_grps
-     real(SP),intent(in)  :: E(n),deg_accuracy
+     integer, intent(in),optional  :: Ei(n)
+     real(SP),intent(in),optional  :: Er(n),deg_accuracy
      logical, optional    :: Include_single_values
      !
      ! Work Space
      !
-     integer :: iref,i1
+     integer :: iref,i1,E_diff_i(2)
      real(SP):: E_diff(2)
-     logical :: l_flag,l_singles
+     logical :: l_flag,l_singles,l_check
      !
      deg_grps=0
      first_deg_el=0
@@ -698,18 +727,37 @@ subroutine degeneration_finder(E,n,first_deg_el,deg_elmnts,deg_grps,&
      l_singles=.FALSE.
      if (present(Include_single_values)) l_singles=Include_single_values
      !
+     if( (.not.(present(deg_accuracy).and.present(Er))) .and. .not. present(Ei) ) then
+       call error(" deg finder called without Er nor Ei")
+     endif
+     !
      iref=1
      !
      do while (iref < n ) 
        !
        if (l_singles) then
          !
-         E_diff=2._SP*deg_accuracy
-         !
-         if (deg_grps>0) E_diff(1)=abs(E(iref)-E(first_deg_el(deg_grps)))
-         if (iref<n    ) E_diff(2)=abs(E(iref)-E(iref+1))
+         if(present(deg_accuracy).and.present(Er)) then
+           !
+           E_diff=2._SP*deg_accuracy
+           !
+           if (deg_grps>0) E_diff(1)=abs(Er(iref)-Er(first_deg_el(deg_grps)))
+           if (iref<n    ) E_diff(2)=abs(Er(iref)-Er(iref+1))
+           !
+           l_check=all(E_diff>=deg_accuracy)
+           !
+         else if( present(Ei) ) then
+           !
+           E_diff=-1
+           !
+           if (deg_grps>0) E_diff_i(1)=Ei(iref)-Ei(first_deg_el(deg_grps))
+           if (iref<n    ) E_diff_i(2)=Ei(iref)-Ei(iref+1)
+           !
+           l_check=all(E_diff_i==0)
+           !
+         endif
          !
-         if (all(E_diff>=deg_accuracy)) then
+         if (l_check) then
            deg_grps=deg_grps+1
            first_deg_el(deg_grps)=iref
            deg_elmnts(deg_grps)=1
@@ -721,7 +769,10 @@ subroutine degeneration_finder(E,n,first_deg_el,deg_elmnts,deg_grps,&
        !
        do i1=iref+1,n
          !
-         if (abs(E(iref)-E(i1))<deg_accuracy) then
+         if(present(deg_accuracy).and.present(Er)) l_check=abs(Er(iref)-Er(i1))<deg_accuracy
+         if(                          present(Ei)) l_check=    Ei(iref)-Ei(i1) ==0
+         !
+         if (l_check) then
            if (l_flag) then
              deg_grps=deg_grps+1
              first_deg_el(deg_grps)=iref
@@ -742,7 +793,10 @@ subroutine degeneration_finder(E,n,first_deg_el,deg_elmnts,deg_grps,&
      if(deg_grps> 0) i1=first_deg_el(deg_grps)
      if(deg_grps==0) i1=1
      !
-     if(l_singles .and.  abs(E(n)-E(i1))>deg_accuracy) then
+     if(present(deg_accuracy).and.present(Er)) l_check=abs(Er(n)-Er(i1)) > deg_accuracy
+     if(                          present(Ei)) l_check=    Ei(n)-Ei(i1) /=0
+     !
+     if(l_singles .and. l_check) then
        deg_grps=deg_grps+1
        first_deg_el(deg_grps)=n
        deg_elmnts(deg_grps)=1
diff --git a/src/modules/mod_wave_func.F b/src/modules/mod_wave_func.F
index d7eca59c88..d4b5e75a8b 100644
--- a/src/modules/mod_wave_func.F
+++ b/src/modules/mod_wave_func.F
@@ -3,16 +3,22 @@
 !
 ! Copyright (C) 2006 The Yambo Team
 !
-! Authors (see AUTHORS file for details): AM DS
+! Authors (see AUTHORS file for details): AM DS AF
+!
+! headers
+!
+#include <dev_defs.h>
+#include <y_memory.h>
+#include <yambo_wfs.h>
 !
 module wave_func
  !
- use pars,           ONLY:SP
+ use pars,           ONLY:SP,schlen
  use devxlib,        ONLY:devxlib_mapped
  use gpu_m,          ONLY:have_gpu
+ use y_memory_alloc
  !
-#include <dev_defs.h>
-#include <y_memory.h>
+ implicit none
  !
  type WAVEs 
    integer              :: b(2)      ! band range 
@@ -20,13 +26,21 @@ module wave_func
    integer              :: sp_pol(2) ! sp_pol_range
    integer              :: N         ! states 
    character(1)         :: space
-   complex(SP), allocatable :: c(:,:,:)
-   integer    , allocatable :: index(:,:,:)
-   logical    , allocatable :: state(:,:,:)
-   logical                  :: to_load = .TRUE.
-   complex(SP), allocatable DEV_ATTR :: c_d(:,:,:)
+   WF_RSPACE(SP), allocatable :: r(:,:,:)
+   complex(SP),   allocatable :: c(:,:,:)
+   integer    ,   allocatable :: index(:,:,:)
+   logical    ,   allocatable :: state(:,:,:)
+   logical                    :: to_load = .TRUE.
+   WF_RSPACE(SP), allocatable DEV_ATTR :: r_d(:,:,:)
+   complex(SP),   allocatable DEV_ATTR :: c_d(:,:,:)
  end type WAVEs
  !
+#if defined _GAMMA_ONLY
+ real(SP), parameter    :: wfZERO=0._SP
+#else
+ complex(SP), parameter :: wfZERO=cmplx(0._SP,0._SP)
+#endif
+ !
  ! Buffered WFs
  !
  type (WAVEs), save :: WF_buffer
@@ -39,6 +53,7 @@ module wave_func
  ! Real components ?
  !
  logical                :: real_wavefunctions
+ character(schlen)      :: WF_load_mode="all"   ! "all"/"on-the-fly"/"on-the-fly-kq"
  !
  ! Bands block size
  !
@@ -54,11 +69,13 @@ module wave_func
  !
  ! Max Num. of G-VECTORS for the WFs maxval(wf_igk) > wf_ncx
  !
- integer                :: wf_ng          ! For WFs in the    IBZ
+ integer                :: wf_ng          ! For WFs in the    IBZ (may be later overwritten, e.g. by io_COL_CUT)
  integer                :: wf_ng_1st_BZ   ! For WFs in the 1st BZ
  integer                :: wf_ng_overlaps ! For WFs shifted used by the overlaps
  integer                :: cutoff_ng      ! For G-vectors used in cutoff (if read, may be different from wf_ng)
  !
+ real(SP)               :: E_of_WFs
+ !
  ! Wave function derivatives
  !
  complex(SP), allocatable :: wf_x(:,:,:)
@@ -73,12 +90,24 @@ module wave_func
  !
  integer,    allocatable:: wf_nc_k(:)
  !
+ ! Tho map to reduce fft_size
+ !
+ real(SP)               :: rho_map_thresh
+ integer                :: rho_map_size
+ integer,    allocatable:: rho_map(:)
+ integer,    allocatable:: rho_map_inv(:)
+ !
  ! Table correspondance G-vec <-> Components: G_ic = wf_igk(ic,ik)
  !
  integer,    allocatable:: wf_igk(:,:)
  integer,    allocatable DEV_ATTR :: wf_igk_d(:,:)
  logical                :: wf_norm_test
  !
+ ! Wave function phases
+ !
+ integer,     allocatable :: WF_phases_b_map(:,:,:,:)
+ complex(SP), allocatable :: WF_phases(:,:,:,:,:)
+ !
  interface
    !
    integer function io_WF(ID,wf)
@@ -87,21 +116,14 @@ integer function io_WF(ID,wf)
      complex(SP), optional :: wf(:,:,:)
    end function
    !
-   function WF_symm(ifft,isc)
+   integer function io_WF_phases(nb,ik,istark,i_sp_pol,ID,nsz,WF_phases)
      use pars,        ONLY: SP
-     use electrons,   ONLY: n_spinor
-     integer             :: ifft,isc(4)
-     complex(SP),dimension(n_spinor) :: WF_symm
-   end function
-   !
-#if defined _GPU
-   function WF_symm_gpu(ifft,isc)
-     use pars,        ONLY: SP
-     use electrons,   ONLY: n_spinor
-     integer             :: ifft,isc(4)
-     complex(SP),dimension(n_spinor) DEV_ATTR :: WF_symm_gpu
-   end function
-#endif
+     use electrons,   ONLY: n_max_deg
+     use D_lattice,   ONLY: nsym
+     integer               :: nb(2),ik,istark,i_sp_pol,ID
+     integer, intent(in)   :: nsz(5)
+     complex(SP), optional :: WF_phases(:,:,:,:,:)
+   end function io_WF_phases
    !
  end interface 
  !
@@ -112,7 +134,6 @@ subroutine WF_copy(WF_in,WF_out)
      type(WAVEs):: WF_in,WF_out
      logical :: lpres
      !
-     YAMBO_ALLOC_MOLD(WF_out%c,WF_in%c)
      YAMBO_ALLOC(WF_out%index,(WF_in%b(1):WF_in%b(2),WF_in%k(1):WF_in%k(2),WF_in%sp_pol(1):WF_in%sp_pol(2)))
      !
      WF_out%b      = WF_in%b
@@ -121,14 +142,28 @@ subroutine WF_copy(WF_in,WF_out)
      WF_out%N      = WF_in%N
      WF_out%space  = WF_in%space
      WF_out%to_load= WF_in%to_load
-     WF_out%c      = WF_in%c
      WF_out%index  = WF_in%index
      !
+     if (allocated(WF_in%c)) then
+       YAMBO_ALLOC_MOLD(WF_out%c,WF_in%c)
+       WF_out%c=WF_in%c
+     endif
+     if (allocated(WF_in%r)) then
+       YAMBO_ALLOC_MOLD(WF_out%r,WF_in%r)
+       WF_out%r=WF_in%r
+     endif
+     !
      lpres=have_gpu.and.devxlib_mapped(DEV_VAR(WF_in%c))
 #ifdef _GPU
      if (lpres) then
        YAMBO_ALLOC_GPU_SOURCE(DEV_VAR(WF_out%c),WF_in%c)
      endif
+#endif
+     lpres=have_gpu.and.devxlib_mapped(DEV_VAR(WF_in%r))
+#ifdef _GPU
+     if (lpres) then
+       YAMBO_ALLOC_GPU_SOURCE(DEV_VAR(WF_out%r),WF_in%r)
+     endif
 #endif
    end subroutine
    !
diff --git a/src/modules/mod_wrapper.F b/src/modules/mod_wrapper.F
index 6398c6842b..c1a717d3db 100644
--- a/src/modules/mod_wrapper.F
+++ b/src/modules/mod_wrapper.F
@@ -5,8 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): CA AF
 !
+! headers
+!
 #include<dev_defs.h>
 !
+!
 module wrapper
  !
  ! To remember:
diff --git a/src/modules/mod_zeros.F b/src/modules/mod_zeros.F
index 1a98a1755b..3149160078 100644
--- a/src/modules/mod_zeros.F
+++ b/src/modules/mod_zeros.F
@@ -55,9 +55,13 @@ subroutine define_zeros(vector_,ref_value,zero_,RLU)
        allocate(tmp_arr(size(vector_,1)))
        do ic=1,3
          if (allocated(vector_rlu)) then
-           forall(i1=1:nv) tmp_arr(i1)=abs(vector_rlu(i1,ic))
+           do i1=1,nv 
+             tmp_arr(i1)=abs(vector_rlu(i1,ic))
+           enddo
          else
-           forall(i1=1:nv) tmp_arr(i1)=abs(vector_(i1,ic))
+           do i1=1,nv 
+             tmp_arr(i1)=abs(vector_(i1,ic))
+           enddo
          endif
          call sort(tmp_arr)
          zero_(ic)=zero_dfl
diff --git a/src/nloptics/.objects b/src/nloptics/.objects
index 0f742a4561..d67e16b2e1 100644
--- a/src/nloptics/.objects
+++ b/src/nloptics/.objects
@@ -1,7 +1,8 @@
 #if defined _NL
 objs = NL_driver.o NL_initialize.o NL_start_and_restart.o el_density_vbands.o NL_Integrator.o \
-       NL_damping.o NL_test_collisions.o DIPOLE_symmetrize.o \
+       NL_damping.o NL_test_collisions.o \
        NL_Hamiltonian.o NL_output.o DIP_polarization.o NL_average_operator.o EXC_macroscopic_JGM.o \
        NL_databases_IO.o NL_build_dG_lesser.o NL_build_valence_bands.o EXP_step.o \
-       NL_average_operator.o NL_current.o NL_Berry_current.o EXP_op.o INVINT_step.o RK_basestep.o 
+       NL_average_operator.o NL_current.o NL_Berry_current.o EXP_op.o INVINT_step.o RK_basestep.o \
+       NL_carriers_number.o
 #endif
diff --git a/src/nloptics/DIPOLE_symmetrize.F b/src/nloptics/DIPOLE_symmetrize.F
deleted file mode 100644
index fb72796089..0000000000
--- a/src/nloptics/DIPOLE_symmetrize.F
+++ /dev/null
@@ -1,80 +0,0 @@
-!
-! License-Identifier: GPL
-!
-! Copyright (C) 2025 The Yambo Team
-!
-! Authors (see AUTHORS file for details): CA
-!                  
-subroutine DIPOLE_symmetrize(k)
- !
- use pars,           ONLY:SP,cI,cZERO
- use nl_optics,      ONLY:NL_bands
- use R_lattice,      ONLY:bz_samp
- use electrons,      ONLY:n_sp_pol
- use D_lattice,      ONLY:kpoints_map,old_dl_sop,old_S_contains_TR
- use DIPOLES,        ONLY:DIP_iR
- use IO_int,         ONLY:io_control
- use IO_m,           ONLY:REP,OP_RD_CL,DUMP
- use com,            ONLY:msg
- use parallel_m,     ONLY:PAR_IND_Xk_ibz,PAR_Xk_ibz_index,PAR_COM_Xk_ibz_INDEX
- use parallel_int,   ONLY:PP_redux_wait
- !
-#include<y_memory.h>
- !     
- type(bz_samp), intent(in)  :: k
- !
- ! Work space
- !
- integer, external :: io_full_SYMMs
- integer           :: io_old_SYMMs,ID
- integer           :: ik,ib1,ib2,i_sp,ik_old,is,ik_mem
- complex(SP),allocatable :: DIP_tmp(:,:,:,:,:)
- !     
- ! Load old full symmetries if present
- !
- call io_control(ACTION=OP_RD_CL,COM=REP,MODE=DUMP,SEC=(/1/),ID=ID)
- io_old_SYMMs=io_full_SYMMs(k,ID)
- !
- if(io_old_SYMMs/=0) return
- !
- YAMBO_ALLOC(DIP_tmp,(3,NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),k%nibz,n_sp_pol))
- !
- ! I need dipoles in all cores
- !
- DIP_tmp=cZERO
- do ik=1,k%nibz
-   if (.not.PAR_IND_Xk_ibz%element_1D(ik)) cycle
-   ik_mem=PAR_Xk_ibz_index(ik)
-   DIP_tmp(:,:,:,ik,:)=DIP_iR(:,:,:,ik_mem,:) 
- enddo
- call PP_redux_wait(DIP_tmp,COMM=PAR_COM_Xk_ibz_INDEX%COMM)
- !
- ! Loop on k-points and application of symmetries to the dipole
- !
- call msg('rs','Dipoles symmetrized with full symmetries of the system') 
- !
- do ik=1,k%nibz
-   ! 
-   if (.not.PAR_IND_Xk_ibz%element_1D(ik)) cycle
-   ik_mem=PAR_Xk_ibz_index(ik)
-   ik_old=kpoints_map(1,ik)
-   is=kpoints_map(2,ik)
-   ! 
-   if(is==1) continue
-   !
-   do i_sp=1,n_sp_pol
-   do ib2=NL_bands(1),NL_bands(2)
-   do ib1=NL_bands(1),NL_bands(2)
-     DIP_iR(:,ib1,ib2,ik_mem,i_sp)=matmul(old_dl_sop(:,:,is),DIP_tmp(:,ib1,ib2,ik_old,i_sp)) 
-   enddo
-   enddo
-   if(old_S_contains_TR(is)==1) DIP_iR(:,:,:,ik_mem,i_sp)=conjg(DIP_iR(:,:,:,ik_mem,i_sp))
-   enddo
-   !     
- enddo 
- !
- YAMBO_FREE(kpoints_map)
- YAMBO_FREE(old_dl_sop)
- YAMBO_FREE(DIP_tmp)
- !  
-end subroutine DIPOLE_symmetrize
diff --git a/src/nloptics/DOUBLE_project.dep b/src/nloptics/DOUBLE_project.dep
index ab54f3ef9b..68b0c91045 100644
--- a/src/nloptics/DOUBLE_project.dep
+++ b/src/nloptics/DOUBLE_project.dep
@@ -1,4 +1,3 @@
- DIPOLE_symmetrize.o
  DIP_polarization.o
  EXC_macroscopic_JGM.o
  EXP_op.o
@@ -10,6 +9,7 @@
  NL_average_operator.o
  NL_build_dG_lesser.o
  NL_build_valence_bands.o
+ NL_carriers_number.o
  NL_current.o
  NL_damping.o
  NL_databases_IO.o
diff --git a/src/nloptics/EXP_step.F b/src/nloptics/EXP_step.F
index 2bda482b36..9cb1d90527 100644
--- a/src/nloptics/EXP_step.F
+++ b/src/nloptics/EXP_step.F
@@ -13,7 +13,7 @@ subroutine EXP_step(K_out,V_in,H_in,I_in,nbf,dT)
   complex(SP), intent(in)  :: V_in(NL_bands(2),nbf)
   complex(SP), intent(out) :: K_out(NL_bands(2),nbf)
   complex(SP), intent(in)  :: H_in(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2))
-  complex(SP), intent(in)  :: I_in(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),nbf)
+  complex(SP), intent(in)  :: I_in(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2))
   real(SP),    intent(in)  :: dT
   !
   ! Work Space
@@ -27,7 +27,7 @@ subroutine EXP_step(K_out,V_in,H_in,I_in,nbf,dT)
 !$omp parallel do default(shared), private(ib,U,H)
   do ib=1,nbf
     !
-    H=H_in+I_in(:,:,ib)
+    H=H_in+I_in
     !
     call EXP_op(U,H,dT)
     !
diff --git a/src/nloptics/INVINT_step.F b/src/nloptics/INVINT_step.F
index e9a0898438..65d002f8fd 100644
--- a/src/nloptics/INVINT_step.F
+++ b/src/nloptics/INVINT_step.F
@@ -18,7 +18,7 @@ subroutine INVINT_step(V_out,V_in,H_in,I_in,nbf,dT)
   complex(SP), intent(in)  :: V_in(NL_bands(2),nbf)
   complex(SP), intent(out) :: V_out(NL_bands(2),nbf)
   complex(SP), intent(in)  :: H_in(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2))
-  complex(SP), intent(in)  :: I_in(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),nbf)
+  complex(SP), intent(in)  :: I_in(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2))
   real(SP),    intent(in)  :: dT
   !
   ! Workspace
@@ -38,11 +38,11 @@ subroutine INVINT_step(V_out,V_in,H_in,I_in,nbf,dT)
   do ib=1,nbf
     !
     Hp=cZERO
-    forall(i1=NL_bands(1):NL_bands(2)) Hp(i1,i1)=cONE
+    forall (i1=NL_bands(1):NL_bands(2)) Hp(i1,i1)=cONE
     Hm=Hp
     !
-    Hm=Hm-cI*dT/2._SP*(H_in+I_in(:,:,ib))
-    Hp=Hp+cI*dT/2._SP*(H_in+I_in(:,:,ib))
+    Hm=Hm-cI*dT/2._SP*(H_in+I_in)
+    Hp=Hp+cI*dT/2._SP*(H_in+I_in)
     !
     if (l_use_inv) then
       !
diff --git a/src/nloptics/NL_Berry_current.F b/src/nloptics/NL_Berry_current.F
index 407abbcd48..2312b6dbe6 100644
--- a/src/nloptics/NL_Berry_current.F
+++ b/src/nloptics/NL_Berry_current.F
@@ -28,7 +28,7 @@ subroutine NL_Berry_current(en,Xk,k_map,V_bands,NL_J)
  use parallel_int,   ONLY:PP_redux_wait
  use QP_m,           ONLY:QP_nk
  use nl_optics,      ONLY:NL_bands
- use electric,       ONLY:l_res_symms,l_force_SndOrd
+ use electric,       ONLY:l_no_res_symms,l_force_SndOrd
  !
  implicit none           
  !
@@ -84,12 +84,8 @@ subroutine NL_Berry_current(en,Xk,k_map,V_bands,NL_J)
      Nperpend=k_map%max_kdir(id2)*k_map%max_kdir(id3)       
      !
      max_step=1
-! 
-!    Using higher order for the k-derivative gives a worst result
-!    when compared with dP/dt for this reason this line is commented
-!
-    if(k_map%max_kdir(id_red)>=6.and..not.l_force_SndOrd) max_step=2
-!
+     ! 
+     ! Using higher order for the k-derivative gives a worst result
      !
      do ikbz=1,nXkbz
        !
@@ -97,20 +93,26 @@ subroutine NL_Berry_current(en,Xk,k_map,V_bands,NL_J)
        !
        do istep=1,max_step
          call Build_tilde_vbands(en,Xk,ikbz,isp,istep,id_red,V_bands(:,:en%nbf(isp),:,isp),V_tilde)
+!$OMP PARALLEL WORKSHARE  
          delta_V_tilde(:,:,istep)=(V_tilde(:,:,1)-V_tilde(:,:,2))/dble(istep)
+!$OMP END PARALLEL WORKSHARE  
        enddo
        !
        ik = Xk%sstar(ikbz,1)
        !
        if(max_step==1) then
+!$omp parallel do default(shared), private(iv), reduction(+:NL_J_red)
          do iv=1,en%nbf(isp)
            NL_J_red(id_red)=NL_J_red(id_red)+sum(VH_bands(:,iv,ik,isp)*delta_V_tilde(iv,:,1))
          enddo
+!$omp end parallel do
        elseif(max_step==2) then
+!$omp parallel do default(shared), private(iv), reduction(+:NL_J_red)
          do iv=1,en%nbf(isp)
            NL_J_red(id_red)=NL_J_red(id_red)+sum(VH_bands(:,iv,ik,isp) &
 &					    *(4._SP*delta_V_tilde(iv,:,1)-delta_V_tilde(iv,:,2)))/3._SP
          enddo
+!$omp end parallel do
        endif
        !
      enddo
@@ -119,7 +121,7 @@ subroutine NL_Berry_current(en,Xk,k_map,V_bands,NL_J)
    !
    ! Notice that the 2.0 originates from H i\der_k +i \der_k H = 2 * Re[H i\der_k]
    !
-   NL_J_red(id_red)=2.0*spin_occ*real(NL_J_red(id_red),SP)/dble(Nperpend)
+   NL_J_red(id_red)=2.0_SP*spin_occ*real(NL_J_red(id_red),SP)/dble(Nperpend)
    !
  enddo
  !
@@ -129,13 +131,13 @@ subroutine NL_Berry_current(en,Xk,k_map,V_bands,NL_J)
  ! 
 !$omp parallel do default(shared), private(id)
  do id=1,3
-   NL_J(id)=-sum(NL_J_red(:)*a(:,id))/(4.0*pi)
+   NL_J(id)=sum(NL_J_red(:)*a(:,id))/(4.0_SP*pi)
  enddo
 !$omp end parallel do
  !
  ! Apply residual symmetries
  !
- if(nsym>1.and.l_res_symms) then
+ if(nsym/=1.and..not.l_no_res_symms) then
    NL_J_symm=cZERO
    do is=1,nsym/(i_time_rev+1) ! Time-reversal is never present 
      NL_J_symm(:)=NL_J_symm(:)+matmul(dl_sop(:,:,is),NL_J)
diff --git a/src/nloptics/NL_Hamiltonian.F b/src/nloptics/NL_Hamiltonian.F
index f8c6f75202..bbe16e5a4d 100644
--- a/src/nloptics/NL_Hamiltonian.F
+++ b/src/nloptics/NL_Hamiltonian.F
@@ -5,11 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): MG CA
 !
-subroutine NL_Hamiltonian(E,k,q,X,Time,i_time,V_bands)
+subroutine NL_Hamiltonian(E,k,q,X,Time,V_bands)
  !
  ! The subroutine calculate Ho + V + Sigma and the fields E_tot, E_ext, E_ks
  !
- !   H_k=Ho_k+U_k+ V_k^H-V_k^H[rho_ref]+V_xc[rho]
+ !   H_k=Ho_k+U_k+ V_k^H[rho]-V_k^H[rho_ref]+V_xc[rho]
  !                                        
  use pars,           ONLY:SP,cZERO,pi,cONE
  use units,          ONLY:SPEED_OF_LIGHT
@@ -22,12 +22,12 @@ subroutine NL_Hamiltonian(E,k,q,X,Time,i_time,V_bands)
  use X_m,            ONLY:global_gauge,X_t
  use fields,         ONLY:A_ext,A_tot,Efield
  use real_time,      ONLY:eval_DFT,eval_HARTREE,l_RT_induced_Field,rho_reference,&
-&                         Ho_plus_Sigma,RT_Vnl_xc,RT_nbands         
+&                         Ho_plus_Sigma,RT_Vnl_xc,RT_nbands,dG_lesser
  use wave_func,      ONLY:WF
  use xc_functionals, ONLY:V_xc,XC_potential_driver
  use nl_optics,      ONLY:Correlation,V_xc_0,full_rho,IPA,E_full,LRC,JGM,LRCW,l_use_DIPOLES,NL_LRC_alpha,   &
-&                         NL_initial_P,E_tot,E_ext,E_ks,E_xc_0,I_relax,Alpha_ED,dG,eval_dG, & 
-&                         NL_bands,VAL_BANDS,NL_P,LSEX,LHF,Wk_EQ
+&                         NL_initial_P,E_tot,E_ext,E_ks,E_xc_0,I_relax,Alpha_ED,eval_dG, & 
+&                         NL_bands,VAL_BANDS,NL_P,LSEX,LHF
  use global_XC,      ONLY:WF_xc_functional,WF_kind
  use parallel_m,     ONLY:PAR_IND_Xk_ibz,PAR_Xk_ibz_index
  use wrapper_omp,    ONLY:M_plus_alpha_M_omp
@@ -44,7 +44,6 @@ subroutine NL_Hamiltonian(E,k,q,X,Time,i_time,V_bands)
  real(SP),      intent(in)    :: Time
  type(X_t),     intent(in)    :: X
  complex(SP),   intent(in)    :: V_bands(NL_bands(2),maxval(E%nbf),QP_nk,n_sp_pol)
- integer,       intent(in)    :: i_time
  !
  ! Working Space
  !
@@ -89,7 +88,7 @@ subroutine NL_Hamiltonian(E,k,q,X,Time,i_time,V_bands)
  !
  ! Evaluate the G_lesser for current/polarization and collisions
  ! ======================================================
- if(eval_dG)              call NL_build_dG_lesser(E,V_bands,dG)
+ if(eval_dG) call NL_build_dG_lesser(E,V_bands,dG_lesser)
  !
  ! Evaluate the Polarization 
  ! ===================================================================
@@ -117,7 +116,9 @@ subroutine NL_Hamiltonian(E,k,q,X,Time,i_time,V_bands)
  !
  call RT_Ext_fields_sum(A_ext,Time)
  !
- A_tot=A_ext
+ A_tot%vecpot    =A_ext%vecpot    
+ A_tot%vecpot_vel=A_ext%vecpot_vel
+ A_tot%vecpot_acc=A_ext%vecpot_acc
  !
  E_ext=-A_ext%vecpot_vel/SPEED_OF_LIGHT
  !
@@ -138,7 +139,7 @@ subroutine NL_Hamiltonian(E,k,q,X,Time,i_time,V_bands)
       !This can be calculated once for each frequency, but I did not want to mess up NL_driver (MG)
       alpha_ = NL_LRC_alpha 
       if (Efield(1)%ef_name=='SOFTSIN'.or.Efield(1)%ef_name=='SIN') &
-      &     alpha_  = Alpha_ED(E,Efield(1)%frequency(1))
+      &     alpha_  = Alpha_ED(E,Efield(1)%frequency)
            E_vec_pot=E_vec_pot + alpha_*NL_P(1:3)
       !
     elseif(Correlation==LRC) then
@@ -163,17 +164,17 @@ subroutine NL_Hamiltonian(E,k,q,X,Time,i_time,V_bands)
     !
  endif
  !
- ! Calculate collisions
- ! =======================================
- if(l_use_Hxc_collisions) call COLLISIONS_compose_nl(dG)
+ ! Calculate collisions if doing TD-SEX or TD-HF
+ ! =============================================
+ if(l_use_Hxc_collisions) call COLLISIONS_compose_nl(dG_lesser)
  !
  ! Build the Hamiltonian
  ! =====================
  !
-!$OMP WORKSHARE  
+!$OMP PARALLEL WORKSHARE  
  Ho_plus_Sigma=cZERO
  I_relax      =cZERO
-!$OMP END WORKSHARE 
+!$OMP END PARALLEL WORKSHARE 
  !
  do i_sp_pol=1,n_sp_pol
   do ik=1,QP_nk
@@ -215,22 +216,13 @@ subroutine NL_Hamiltonian(E,k,q,X,Time,i_time,V_bands)
    else
      !
      ! Use Sm1_plus, Sm1_minus and V_bands to build the W operator
-     ! I split the W(E)*E operator as:
-     ! W(0)*E+[W(E)-W(0)]*E
-     ! becasue W(0) is at Equilibrium and can by symmetrized easily
      ! 
      call Build_W_operator(E,k,ik,i_sp_pol,V_bands(:,:E%nbf(i_sp_pol),:,i_sp_pol),Wk)
-     if(allocated(Wk_EQ)) then
-       !  This part of the operator is symmetriezed before the dynamics
-       call Apply_W_field(A_tot,Wk-Wk_EQ(:,:,:,i_sp_pol,ik_mem),H_nl_sc)
-       call RT_Apply_field(ik,i_sp_pol,H_nl_sc,A_tot)
-     else
-       call Apply_W_field(A_tot,Wk,H_nl_sc)
-     endif
+     call Apply_W_field(A_tot,Wk,H_nl_sc)
      !
    endif                                                         
    !
-!   if(Correlation==LSEX.or.Correlation==LHF) call OSCLL_compose_nl(dG,H_nl_sc,k,q,E,ik,i_sp_pol)
+!   if(Correlation==LSEX.or.Correlation==LHF) call OSCLL_compose_nl(dG_lesser,H_nl_sc,k,q,E,ik,i_sp_pol)
    if(Correlation==LSEX.or.Correlation==LHF) call OSCLL_compose_vbands(V_bands,H_nl_sc,k,q,E,ik,i_sp_pol)
    !
    if(l_use_Hxc_collisions) then
@@ -240,7 +232,7 @@ subroutine NL_Hamiltonian(E,k,q,X,Time,i_time,V_bands)
    !
    ! Damping: NL_damping fills the I_relax matrix 
    ! ================================================
-   call NL_damping(E,ik,i_sp_pol,V_bands(:,:E%nbf(i_sp_pol),ik,i_sp_pol))
+   call NL_damping(E,ik,i_sp_pol,dG_lesser,I_relax)
    !
    ! Store the Hamiltonian
    ! =====================
diff --git a/src/nloptics/NL_Integrator.F b/src/nloptics/NL_Integrator.F
index 23a69c4cc0..e12b026450 100644
--- a/src/nloptics/NL_Integrator.F
+++ b/src/nloptics/NL_Integrator.F
@@ -86,7 +86,7 @@ subroutine NL_Integrator(E,k,q,X,Integrator,i_time,V_bands)
      do is=1,n_sp_pol
        nb=E%nbf(is)
        Ktmp=cZERO
-       call EXP_step(Ktmp(:,:nb),V_bands(:,:nb,ik,is),Ho_plus_Sigma(:,:,ik_mem,is),I_relax(:,:,:nb,ik_mem,is),nb,RT_step)
+       call EXP_step(Ktmp(:,:nb),V_bands(:,:nb,ik,is),Ho_plus_Sigma(:,:,ik_mem,is),I_relax(:,:,ik_mem,is),nb,RT_step)
        V_bands(:,:nb,ik,is) = V_bands(:,:nb,ik,is) + Ktmp(:,:nb)
      enddo
    enddo
@@ -96,15 +96,19 @@ subroutine NL_Integrator(E,k,q,X,Integrator,i_time,V_bands)
    !  
    do ik=1,QP_nk
      if(.not.PAR_IND_Xk_ibz%element_1D(ik)) then
+!$OMP PARALLEL WORKSHARE  
        V_bands(:,:,ik,:)=cZERO
+!$OMP END PARALLEL WORKSHARE  
        cycle
      endif
      ik_mem=PAR_Xk_ibz_index(ik)
      do is=1,n_sp_pol
        nb=E%nbf(is)
+!$OMP PARALLEL WORKSHARE  
        Ktmp=cZERO
        Ktmp(:,:nb)=V_bands(:,:nb,ik,is)
-       call INVINT_step(V_bands(:,:nb,ik,is),Ktmp(:,:nb),Ho_plus_Sigma(:,:,ik_mem,is),I_relax(:,:,:nb,ik_mem,is),nb,RT_step)
+!$OMP END PARALLEL WORKSHARE  
+       call INVINT_step(V_bands(:,:nb,ik,is),Ktmp(:,:nb),Ho_plus_Sigma(:,:,ik_mem,is),I_relax(:,:,ik_mem,is),nb,RT_step)
      enddo
    enddo
    if(.not.l_velocity_IPA) call PP_redux_wait(V_bands,COMM=PAR_COM_Xk_ibz_INDEX%COMM) 
@@ -113,13 +117,15 @@ subroutine NL_Integrator(E,k,q,X,Integrator,i_time,V_bands)
    !  
    ! Evaluate V(t+dt/2)
    !
+!$OMP PARALLEL WORKSHARE  
    K_all=cZERO
+!$OMP END PARALLEL WORKSHARE  
    do ik=1,QP_nk
      if(.not.PAR_IND_Xk_ibz%element_1D(ik)) cycle 
      ik_mem=PAR_Xk_ibz_index(ik)
      do is=1,n_sp_pol
        call INVINT_step(K_all(:,:E%nbf(is),ik,is),V_bands(:,:E%nbf(is),ik,is),Ho_plus_Sigma(:,:,ik_mem,is), &
-&                                                                 I_relax(:,:,:E%nbf(is),ik_mem,is),E%nbf(is),RT_step/2._SP)
+&                                                                 I_relax(:,:,ik_mem,is),E%nbf(is),RT_step/2._SP)
      enddo
    enddo
    if(.not.l_velocity_IPA) call PP_redux_wait(K_all,COMM=PAR_COM_Xk_ibz_INDEX%COMM) 
@@ -129,7 +135,7 @@ subroutine NL_Integrator(E,k,q,X,Integrator,i_time,V_bands)
 #if defined _TIMING
  call timing('NL Integrator',OPR='stop')
 #endif
-   call NL_Hamiltonian(E,k,q,X,Time+RT_step/2._SP,i_time,K_all)
+   call NL_Hamiltonian(E,k,q,X,Time+RT_step/2._SP,K_all)
 #if defined _TIMING
  call timing('NL Integrator',OPR='start')
 #endif
@@ -140,13 +146,15 @@ subroutine NL_Integrator(E,k,q,X,Integrator,i_time,V_bands)
    K_all=V_bands
    do ik=1,QP_nk
      if(.not.PAR_IND_Xk_ibz%element_1D(ik)) then
+!$OMP PARALLEL WORKSHARE  
        V_bands(:,:,ik,:)=cZERO
+!$OMP END PARALLEL WORKSHARE  
        cycle
      endif
      ik_mem=PAR_Xk_ibz_index(ik)
      do is=1,n_sp_pol
        call INVINT_step(V_bands(:,:E%nbf(is),ik,is),K_all(:,:E%nbf(is),ik,is),Ho_plus_Sigma(:,:,ik_mem,is), &
-&                                                                 I_relax(:,:,:E%nbf(is),ik_mem,is),E%nbf(is),RT_step)
+&                                                                 I_relax(:,:,ik_mem,is),E%nbf(is),RT_step)
      enddo
    enddo
    if(.not.l_velocity_IPA) call PP_redux_wait(V_bands,COMM=PAR_COM_Xk_ibz_INDEX%COMM) 
@@ -161,7 +169,7 @@ subroutine NL_Integrator(E,k,q,X,Integrator,i_time,V_bands)
      ik_mem=PAR_Xk_ibz_index(ik)
      do is=1,n_sp_pol
        nb=E%nbf(is)
-       call EXP_step(Ktmp(:,:nb),V_bands(:,:nb,ik,is),Ho_plus_Sigma(:,:,ik_mem,is),I_relax(:,:,:nb,ik_mem,is),nb,RT_step/2._SP)
+       call EXP_step(Ktmp(:,:nb),V_bands(:,:nb,ik,is),Ho_plus_Sigma(:,:,ik_mem,is),I_relax(:,:,ik_mem,is),nb,RT_step/2._SP)
        K_all(:,:E%nbf(is),ik,is) = V_bands(:,:E%nbf(is),ik,is) + Ktmp(:,:E%nbf(is))
      enddo
    enddo
@@ -172,7 +180,7 @@ subroutine NL_Integrator(E,k,q,X,Integrator,i_time,V_bands)
 #if defined _TIMING
  call timing('NL Integrator',OPR='stop')
 #endif
-   call NL_Hamiltonian(E,k,q,X,Time+RT_step/2._SP,i_time,K_all)
+   call NL_Hamiltonian(E,k,q,X,Time+RT_step/2._SP,K_all)
 #if defined _TIMING
  call timing('NL Integrator',OPR='start')
 #endif
@@ -187,7 +195,7 @@ subroutine NL_Integrator(E,k,q,X,Integrator,i_time,V_bands)
      ik_mem=PAR_Xk_ibz_index(ik)
      do is=1,n_sp_pol
        nb=E%nbf(is)
-       call RK_basestep(Ktmp(:,:nb),K_all(:,:nb,ik,is),Ho_plus_Sigma(:,:,ik_mem,is),I_relax(:,:,:nb,ik_mem,is),E%nbf(is))
+       call RK_basestep(Ktmp(:,:nb),K_all(:,:nb,ik,is),Ho_plus_Sigma(:,:,ik_mem,is),I_relax(:,:,ik_mem,is),E%nbf(is))
        V_bands(:,:nb,ik,is) = V_bands(:,:nb,ik,is) + RK2_bpar*RT_step*Ktmp(:,:nb)
      enddo
    enddo
@@ -203,7 +211,7 @@ subroutine NL_Integrator(E,k,q,X,Integrator,i_time,V_bands)
      ik_mem=PAR_Xk_ibz_index(ik)
      do is=1,n_sp_pol
        nb=E%nbf(is)
-       call RK_basestep(Ktmp(:,:nb),V_bands(:,:nb,ik,is),Ho_plus_Sigma(:,:,ik_mem,is),I_relax(:,:,:nb,ik_mem,is),nb)
+       call RK_basestep(Ktmp(:,:nb),V_bands(:,:nb,ik,is),Ho_plus_Sigma(:,:,ik_mem,is),I_relax(:,:,ik_mem,is),nb)
        K_all(:,:nb,ik,is) = V_bands(:,:nb,ik,is) + RK2_apar*RT_step*Ktmp(:,:nb)
      enddo
    enddo
@@ -214,7 +222,7 @@ subroutine NL_Integrator(E,k,q,X,Integrator,i_time,V_bands)
 #if defined _TIMING
  call timing('NL Integrator',OPR='stop')
 #endif
-   call NL_Hamiltonian(E,k,q,X,Time+RT_step/2._SP,i_time,K_all)
+   call NL_Hamiltonian(E,k,q,X,Time+RT_step/2._SP,K_all)
 #if defined _TIMING
  call timing('NL Integrator',OPR='start')
 #endif
@@ -229,7 +237,7 @@ subroutine NL_Integrator(E,k,q,X,Integrator,i_time,V_bands)
      ik_mem=PAR_Xk_ibz_index(ik)
      do is=1,n_sp_pol
        nb=E%nbf(is)
-       call RK_basestep(Ktmp(:,:nb),K_all(:,:nb,ik,is),Ho_plus_Sigma(:,:,ik_mem,is),I_relax(:,:,:nb,ik_mem,is),nb)
+       call RK_basestep(Ktmp(:,:nb),K_all(:,:nb,ik,is),Ho_plus_Sigma(:,:,ik_mem,is),I_relax(:,:,ik_mem,is),nb)
        V_bands(:,:nb,ik,is) = V_bands(:,:nb,ik,is) + RK2_bpar*RT_step*Ktmp(:,:nb)
      enddo
    enddo
@@ -250,7 +258,7 @@ subroutine NL_Integrator(E,k,q,X,Integrator,i_time,V_bands)
      ik_mem=PAR_Xk_ibz_index(ik)
      do is=1,n_sp_pol
        nb=E%nbf(is)
-       call RK_basestep(Ktmp(:,:nb),V_bands(:,:nb,ik,is),Ho_plus_Sigma(:,:,ik_mem,is),I_relax(:,:,:nb,ik_mem,is),nb)
+       call RK_basestep(Ktmp(:,:nb),V_bands(:,:nb,ik,is),Ho_plus_Sigma(:,:,ik_mem,is),I_relax(:,:,ik_mem,is),nb)
        V_tmp(:,:nb,ik,is) = V_bands(:,:nb,ik,is) + Heun_apar(1)*RT_step*Ktmp(:,:nb)
        K_all(:,:nb,ik,is) = Heun_bpar(1)*Ktmp(:,:nb)
      enddo
@@ -261,7 +269,7 @@ subroutine NL_Integrator(E,k,q,X,Integrator,i_time,V_bands)
 #if defined _TIMING
  call timing('NL Integrator',OPR='stop')
 #endif
-   call NL_Hamiltonian(E,k,q,X,Time+RT_step*Heun_apar(1),i_time,V_tmp)
+   call NL_Hamiltonian(E,k,q,X,Time+RT_step*Heun_apar(1),V_tmp)
 #if defined _TIMING
  call timing('NL Integrator',OPR='start')
 #endif
@@ -277,7 +285,7 @@ subroutine NL_Integrator(E,k,q,X,Integrator,i_time,V_bands)
      ik_mem=PAR_Xk_ibz_index(ik)
      do is=1,n_sp_pol
        nb=E%nbf(is)
-       call RK_basestep(Ktmp(:,:nb),V_tmp(:,:nb,ik,is),Ho_plus_Sigma(:,:,ik_mem,is),I_relax(:,:,:nb,ik_mem,is),nb)
+       call RK_basestep(Ktmp(:,:nb),V_tmp(:,:nb,ik,is),Ho_plus_Sigma(:,:,ik_mem,is),I_relax(:,:,ik_mem,is),nb)
        V_tmp(:,:nb,ik,is) = V_bands(:,:nb,ik,is) + Heun_apar(2)*RT_step*Ktmp(:,:nb)
      enddo
    enddo
@@ -285,7 +293,7 @@ subroutine NL_Integrator(E,k,q,X,Integrator,i_time,V_bands)
    !
    ! Recalculate H(t) using  V(t+2dt/3) 
    !
-   call NL_Hamiltonian(E,k,q,X,Time+RT_step*Heun_apar(2),i_time,V_tmp)
+   call NL_Hamiltonian(E,k,q,X,Time+RT_step*Heun_apar(2),V_tmp)
    !
    ! final step |V(t+dt)>
    !
@@ -298,7 +306,7 @@ subroutine NL_Integrator(E,k,q,X,Integrator,i_time,V_bands)
      ik_mem=PAR_Xk_ibz_index(ik)
      do is=1,n_sp_pol
        nb=E%nbf(is)
-       call RK_basestep(Ktmp(:,:nb),V_tmp(:,:nb,ik,is),Ho_plus_Sigma(:,:,ik_mem,is),I_relax(:,:,:nb,ik_mem,is),nb)
+       call RK_basestep(Ktmp(:,:nb),V_tmp(:,:nb,ik,is),Ho_plus_Sigma(:,:,ik_mem,is),I_relax(:,:,ik_mem,is),nb)
        K_all(:,:E%nbf(is),ik,is)   = K_all(:,:E%nbf(is),ik,is) + Heun_bpar(2)*Ktmp(:,:nb)
        V_bands(:,:E%nbf(is),ik,is) = V_bands(:,:E%nbf(is),ik,is) + RT_step*K_all(:,:E%nbf(is),ik,is)
      enddo
@@ -323,9 +331,9 @@ subroutine NL_Integrator(E,k,q,X,Integrator,i_time,V_bands)
      ik_mem=PAR_Xk_ibz_index(ik)
      do is=1,n_sp_pol
        do ib=1,E%nbf(is)
-         call EXP_op(U_tmp(:,:),Ho_plus_Sigma(:,:,ik_mem,is)+I_relax(:,:E%nbf(is),ib,ik_mem,is),RT_step*RK4_apar(1))
+         call EXP_op(U_tmp(:,:),Ho_plus_Sigma(:,:,ik_mem,is)+I_relax(:,:,ik_mem,is),RT_step*RK4_apar(1))
          V_tmp(NL_bands(1):,ib,ik,is)=matmul(U_tmp,V_bands(NL_bands(1):,ib,ik,is))
-         U_tot(:,:,ib,ik,is) =RK4_bpar(1)*(Ho_plus_Sigma(:,:,ik_mem,is)+I_relax(:,:,ib,ik_mem,is))
+         U_tot(:,:,ib,ik,is) =RK4_bpar(1)*(Ho_plus_Sigma(:,:,ik_mem,is)+I_relax(:,:,ik_mem,is))
        enddo
      enddo
    enddo
@@ -337,7 +345,7 @@ subroutine NL_Integrator(E,k,q,X,Integrator,i_time,V_bands)
 #if defined _TIMING
  call timing('NL Integrator',OPR='stop')
 #endif
-   call NL_Hamiltonian(E,k,q,X,Time+RT_step*RK4_apar(1),i_time,V_tmp)
+   call NL_Hamiltonian(E,k,q,X,Time+RT_step*RK4_apar(1),V_tmp)
 #if defined _TIMING
  call timing('NL Integrator',OPR='start')
 #endif
@@ -350,9 +358,9 @@ subroutine NL_Integrator(E,k,q,X,Integrator,i_time,V_bands)
      ik_mem=PAR_Xk_ibz_index(ik)
      do is=1,n_sp_pol
        do ib=1,E%nbf(is)
-         call EXP_op(U_tmp(:,:),Ho_plus_Sigma(:,:,ik_mem,is)+I_relax(:,:,ib,ik_mem,is),RT_step*RK4_apar(2))
+         call EXP_op(U_tmp(:,:),Ho_plus_Sigma(:,:,ik_mem,is)+I_relax(:,:,ik_mem,is),RT_step*RK4_apar(2))
          V_tmp(NL_bands(1):,ib,ik,is)=matmul(U_tmp,V_bands(NL_bands(1):,ib,ik,is))
-         U_tot(:,:,ib,ik,is) = U_tot(:,:,ib,ik,is) + RK4_bpar(2)*(Ho_plus_Sigma(:,:,ik_mem,is)+I_relax(:,:,ib,ik_mem,is)) 
+         U_tot(:,:,ib,ik,is) = U_tot(:,:,ib,ik,is) + RK4_bpar(2)*(Ho_plus_Sigma(:,:,ik_mem,is)+I_relax(:,:,ik_mem,is)) 
        enddo
      enddo
    enddo
@@ -364,7 +372,7 @@ subroutine NL_Integrator(E,k,q,X,Integrator,i_time,V_bands)
 #if defined _TIMING
  call timing('NL Integrator',OPR='stop')
 #endif
-   call NL_Hamiltonian(E,k,q,X,Time+RT_step*RK4_apar(2),i_time,V_tmp)
+   call NL_Hamiltonian(E,k,q,X,Time+RT_step*RK4_apar(2),V_tmp)
 #if defined _TIMING
  call timing('NL Integrator',OPR='start')
 #endif
@@ -377,9 +385,9 @@ subroutine NL_Integrator(E,k,q,X,Integrator,i_time,V_bands)
      ik_mem=PAR_Xk_ibz_index(ik)
      do is=1,n_sp_pol
        do ib=1,E%nbf(is)
-         call EXP_op(U_tmp(:,:),Ho_plus_Sigma(:,:,ik_mem,is)+I_relax(:,:,ib,ik_mem,is),RT_step*RK4_apar(3))
+         call EXP_op(U_tmp(:,:),Ho_plus_Sigma(:,:,ik_mem,is)+I_relax(:,:,ik_mem,is),RT_step*RK4_apar(3))
          V_tmp(NL_bands(1):,ib,ik,is)=matmul(U_tmp,V_bands(NL_bands(1):,ib,ik,is))
-         U_tot(:,:,ib,ik,is) = U_tot(:,:,ib,ik,is) + RK4_bpar(3)*(Ho_plus_Sigma(:,:,ik_mem,is)+I_relax(:,:,ib,ik_mem,is))
+         U_tot(:,:,ib,ik,is) = U_tot(:,:,ib,ik,is) + RK4_bpar(3)*(Ho_plus_Sigma(:,:,ik_mem,is)+I_relax(:,:,ik_mem,is))
        enddo
      enddo
    enddo
@@ -391,7 +399,7 @@ subroutine NL_Integrator(E,k,q,X,Integrator,i_time,V_bands)
 #if defined _TIMING
  call timing('NL Integrator',OPR='stop')
 #endif
-   call NL_Hamiltonian(E,k,q,X,Time+RT_step*RK4_apar(3),i_time,V_tmp)
+   call NL_Hamiltonian(E,k,q,X,Time+RT_step*RK4_apar(3),V_tmp)
 #if defined _TIMING
  call timing('NL Integrator',OPR='start')
 #endif
@@ -408,7 +416,7 @@ subroutine NL_Integrator(E,k,q,X,Integrator,i_time,V_bands)
      ik_mem=PAR_Xk_ibz_index(ik)
      do is=1,n_sp_pol
        do ib=1,E%nbf(is)
-         U_tot(:,:,ib,ik,is) = U_tot(:,:,ib,ik,is) + RK4_bpar(4)*(Ho_plus_Sigma(:,:,ik_mem,is)+I_relax(:,:,ib,ik_mem,is))
+         U_tot(:,:,ib,ik,is) = U_tot(:,:,ib,ik,is) + RK4_bpar(4)*(Ho_plus_Sigma(:,:,ik_mem,is)+I_relax(:,:,ik_mem,is))
          call EXP_op(U_tmp(:,:),U_tot(:,:,ib,ik,is),RT_step)
          V_bands(NL_bands(1):,ib,ik,is)=matmul(U_tmp,V_tmp(NL_bands(1):,ib,ik,is))
        enddo
@@ -431,7 +439,7 @@ subroutine NL_Integrator(E,k,q,X,Integrator,i_time,V_bands)
      ik_mem=PAR_Xk_ibz_index(ik)
      do is=1,n_sp_pol
        nb=E%nbf(is)
-       call RK_basestep(Ktmp(:,:E%nbf(is)),V_bands(:,:nb,ik,is),Ho_plus_Sigma(:,:,ik_mem,is),I_relax(:,:,:nb,ik_mem,is),nb) 
+       call RK_basestep(Ktmp(:,:E%nbf(is)),V_bands(:,:nb,ik,is),Ho_plus_Sigma(:,:,ik_mem,is),I_relax(:,:,ik_mem,is),nb) 
        V_tmp(:,:E%nbf(is),ik,is) = V_bands(:,:E%nbf(is),ik,is) + RK4_apar(1)*RT_step*Ktmp(:,:E%nbf(is))
        K_all(:,:E%nbf(is),ik,is) = RK4_bpar(1)*Ktmp(:,:E%nbf(is))
      enddo
@@ -443,7 +451,7 @@ subroutine NL_Integrator(E,k,q,X,Integrator,i_time,V_bands)
 #if defined _TIMING
  call timing('NL Integrator',OPR='stop')
 #endif
-   call NL_Hamiltonian(E,k,q,X,Time+RT_step*RK4_apar(1),i_time,V_tmp)
+   call NL_Hamiltonian(E,k,q,X,Time+RT_step*RK4_apar(1),V_tmp)
 #if defined _TIMING
  call timing('NL Integrator',OPR='start')
 #endif
@@ -458,7 +466,7 @@ subroutine NL_Integrator(E,k,q,X,Integrator,i_time,V_bands)
      ik_mem=PAR_Xk_ibz_index(ik)
      do is=1,n_sp_pol
        nb=E%nbf(is)
-       call RK_basestep(Ktmp(:,:E%nbf(is)),V_tmp(:,:nb,ik,is),Ho_plus_Sigma(:,:,ik_mem,is),I_relax(:,:,:nb,ik_mem,is),nb) 
+       call RK_basestep(Ktmp(:,:E%nbf(is)),V_tmp(:,:nb,ik,is),Ho_plus_Sigma(:,:,ik_mem,is),I_relax(:,:,ik_mem,is),nb) 
        V_tmp(:,:E%nbf(is),ik,is) = V_bands(:,:E%nbf(is),ik,is) + RK4_apar(2)*RT_step*Ktmp(:,:E%nbf(is))
        K_all(:,:E%nbf(is),ik,is) = K_all(:,:E%nbf(is),ik,is)   + RK4_bpar(2)*Ktmp(:,:E%nbf(is))
      enddo
@@ -470,7 +478,7 @@ subroutine NL_Integrator(E,k,q,X,Integrator,i_time,V_bands)
 #if defined _TIMING
  call timing('NL Integrator',OPR='stop')
 #endif
-   call NL_Hamiltonian(E,k,q,X,Time+RT_step*RK4_apar(2),i_time,V_tmp)
+   call NL_Hamiltonian(E,k,q,X,Time+RT_step*RK4_apar(2),V_tmp)
 #if defined _TIMING
  call timing('NL Integrator',OPR='start')
 #endif
@@ -485,7 +493,7 @@ subroutine NL_Integrator(E,k,q,X,Integrator,i_time,V_bands)
      ik_mem=PAR_Xk_ibz_index(ik)
      do is=1,n_sp_pol
        nb=E%nbf(is)
-       call RK_basestep(Ktmp(:,:E%nbf(is)),V_tmp(:,:nb,ik,is),Ho_plus_Sigma(:,:,ik_mem,is),I_relax(:,:,:nb,ik_mem,is),nb) 
+       call RK_basestep(Ktmp(:,:E%nbf(is)),V_tmp(:,:nb,ik,is),Ho_plus_Sigma(:,:,ik_mem,is),I_relax(:,:,ik_mem,is),nb) 
        V_tmp(:,:E%nbf(is),ik,is) = V_bands(:,:E%nbf(is),ik,is) + RK4_apar(3)*RT_step*Ktmp(:,:E%nbf(is))
        K_all(:,:E%nbf(is),ik,is) = K_all(:,:E%nbf(is),ik,is) + RK4_bpar(3)*Ktmp(:,:E%nbf(is))
      enddo
@@ -497,7 +505,7 @@ subroutine NL_Integrator(E,k,q,X,Integrator,i_time,V_bands)
 #if defined _TIMING
  call timing('NL Integrator',OPR='stop')
 #endif
-   call NL_Hamiltonian(E,k,q,X,Time+RT_step*RK4_apar(3),i_time,V_tmp)
+   call NL_Hamiltonian(E,k,q,X,Time+RT_step*RK4_apar(3),V_tmp)
 #if defined _TIMING
  call timing('NL Integrator',OPR='start')
 #endif
@@ -512,7 +520,7 @@ subroutine NL_Integrator(E,k,q,X,Integrator,i_time,V_bands)
      ik_mem=PAR_Xk_ibz_index(ik)
      do is=1,n_sp_pol
        nb=E%nbf(is)
-       call RK_basestep(Ktmp(:,:E%nbf(is)),V_tmp(:,:nb,ik,is),Ho_plus_Sigma(:,:,ik_mem,is),I_relax(:,:,:nb,ik_mem,is),nb)
+       call RK_basestep(Ktmp(:,:E%nbf(is)),V_tmp(:,:nb,ik,is),Ho_plus_Sigma(:,:,ik_mem,is),I_relax(:,:,ik_mem,is),nb)
        K_all(:,:E%nbf(is),ik,is) = K_all(:,:E%nbf(is),ik,is) + RK4_bpar(4)*Ktmp(:,:E%nbf(is))
        V_bands(:,:E%nbf(is),ik,is) = V_bands(:,:E%nbf(is),ik,is) + RT_step*K_all(:,:E%nbf(is),ik,is)
      enddo
diff --git a/src/nloptics/NL_build_dG_lesser.F b/src/nloptics/NL_build_dG_lesser.F
index 02d5426fb8..5ab2708162 100644
--- a/src/nloptics/NL_build_dG_lesser.F
+++ b/src/nloptics/NL_build_dG_lesser.F
@@ -34,40 +34,44 @@ subroutine NL_build_dG_lesser(en,V_bands,dG)
  !
  type(levels), intent(in)  :: en
  complex(SP),  intent(in)  :: V_bands(NL_bands(2),maxval(en%nbf),QP_nk,n_sp_pol)
- complex(SP),  intent(out) :: dG(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),QP_nk)
+ complex(SP),  intent(out) :: dG(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),QP_nk,n_sp_pol)
  !
  ! Work Space
  !
- integer :: i1,i2
+ integer :: i1,i2,is
  integer :: ik
  !
- if (n_sp_pol==2) call error(" NL_build_dG not coded for n_sp_pol==2")
- !
 #if defined _TIMING
  call timing('NL build_dG',OPR='start')
 #endif
  !
+!$OMP PARALLEL WORKSHARE  
  dG=cZERO
+!$OMP END PARALLEL WORKSHARE  
  !
+ do is=1,n_sp_pol
  do ik=1,QP_nk
    if (.not.PAR_IND_Xk_ibz%element_1D(ik)) cycle
    do i1=NL_bands(1),NL_bands(2)             
 !$omp parallel do default(shared), private(i2)
      do i2=i1,NL_bands(2)
-       dG(i1,i2,ik)=Vstar_dot_V_omp(en%nbf(1),V_bands(i2,:,ik,1),V_bands(i1,:,ik,1))
-       dG(i2,i1,ik)=conjg(dG(i1,i2,ik))
+       dG(i1,i2,ik,is)=Vstar_dot_V_omp(en%nbf(1),V_bands(i2,:,ik,is),V_bands(i1,:,ik,is))
+       dG(i2,i1,ik,is)=conjg(dG(i1,i2,ik,is))
      enddo
 !$omp end parallel do
    enddo
  enddo
+ enddo
  !
+!$OMP PARALLEL WORKSHARE  
  dG=cI*spin_occ*dG
+!$OMP END PARALLEL WORKSHARE  
  !
  call PP_redux_wait(dG,COMM=PAR_COM_Xk_ibz_INDEX%COMM )
  !
 !$omp parallel do default(shared), private(i1)
  do i1=NL_bands(1),en%nbf(1)
-   dG(i1,i1,:)=dG(i1,i1,:)-cI*spin_occ
+   dG(i1,i1,:,:)=dG(i1,i1,:,:)-cI*spin_occ
  enddo
 !$omp end parallel do
  !
diff --git a/src/nloptics/NL_build_valence_bands.F b/src/nloptics/NL_build_valence_bands.F
index adb64d792e..c304e5ea17 100644
--- a/src/nloptics/NL_build_valence_bands.F
+++ b/src/nloptics/NL_build_valence_bands.F
@@ -49,7 +49,7 @@ subroutine NL_build_valence_bands(en,V_input,VAL_BANDS,lowest_band)
 !$omp parallel do default(shared), private(i_v,i_spinor)
        do i_v=1,en%nbf(is)
          do i_spinor=1,n_spinor
-           Val_bands(:,i_v,ik_mem,is)=Val_bands(:,i_v,ik_mem,is)+V_input(ib,i_v,ik,is)*WF%c(:,i_spinor,i_wf)
+           Val_bands(:,i_v,ik_mem,is)=Val_bands(:,i_v,ik_mem,is)+V_input(ib,i_v,ik,is)*WF%r(:,i_spinor,i_wf)
          enddo
        enddo
 !$omp end parallel do
diff --git a/src/nloptics/NL_carriers_number.F b/src/nloptics/NL_carriers_number.F
new file mode 100644
index 0000000000..0d84e2165e
--- /dev/null
+++ b/src/nloptics/NL_carriers_number.F
@@ -0,0 +1,57 @@
+!
+!        Copyright (C) 2000-2022 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): DS AM
+! 
+! This file is distributed under the terms of the GNU 
+! General Public License. You can redistribute it and/or 
+! modify it under the terms of the GNU General Public 
+! License as published by the Free Software Foundation; 
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will 
+! be useful, but WITHOUT ANY WARRANTY; without even the 
+! implied warranty of MERCHANTABILITY or FITNESS FOR A 
+! PARTICULAR PURPOSE.  See the GNU General Public License 
+! for more details.
+!
+! You should have received a copy of the GNU General Public 
+! License along with this program; if not, write to the Free 
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, 
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+subroutine NL_carriers_number(en,k,Ncarr)
+ !
+ use pars,           ONLY:SP
+ use R_lattice,      ONLY:bz_samp
+ use electrons,      ONLY:levels,n_sp_pol
+ use nl_optics,      ONLY:NL_bands
+ use real_time,      ONLY:dG_lesser
+ !
+ implicit none
+ !
+ type(levels),  intent(in) :: en
+ type(bz_samp), intent(in) :: k
+ real(SP),      intent(out):: Ncarr(3)
+ !
+ ! Workspace
+ !
+ integer     :: ib,ik,i_sp_pol
+ !
+ ! Electrons and holes number
+ !
+ Ncarr = 0._SP
+ !
+ do i_sp_pol=1,n_sp_pol
+ do ik=1,en%nk
+   do ib=NL_bands(1),NL_bands(2)
+     if ( en%E(ib,ik,i_sp_pol)<=0._SP ) Ncarr(2)=Ncarr(2)-aimag(dG_lesser(ib,ib,ik,i_sp_pol,1))*k%weights(ik)
+     if ( en%E(ib,ik,i_sp_pol)> 0._SP ) Ncarr(3)=Ncarr(3)+aimag(dG_lesser(ib,ib,ik,i_sp_pol,1))*k%weights(ik)
+   enddo
+ enddo
+ enddo
+ !
+ Ncarr(1)=Ncarr(2)-Ncarr(3)
+ !
+end subroutine NL_carriers_number
diff --git a/src/nloptics/NL_current.F b/src/nloptics/NL_current.F
index 917539d684..abf6dee2f6 100644
--- a/src/nloptics/NL_current.F
+++ b/src/nloptics/NL_current.F
@@ -41,27 +41,34 @@ subroutine NL_current(k,E,V_input,NL_J)
  !
  ! A : Current
  !
- call NL_average_operator(V_VALUE=NL_J(4:6), V_MATRIX=DIP_v(:,NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),:,:),  &
+ call NL_average_operator(V_VALUE=NL_J(4:6), V_MATRIX=-DIP_v(:,NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),:,:),  &
      &  V_bands=V_input,k=k,en=E,TRANS=.TRUE.,nK_mem=PAR_Xk_nibz)
  !
  ! DEBUG <
  ! Calculate current using density matrix
- ! call NL_average_operator(V_VALUE=NL_J(4:6), V_MATRIX=DIP_v(:,NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),:,:),  &
+ ! call NL_average_operator(V_VALUE=NL_J(4:6), V_MATRIX=-DIP_v(:,NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),:,:),  &
  !     &  dG=dG,k=k,en=E,TRANS=.TRUE.,nK_mem=PAR_Xk_nibz)
  ! DEBUG >
  !
  if(trim(global_gauge)=='velocity') then
    !
+   ! The diamagnetic term is needed to obtain a gauge invariant current
+   ! However the below definition of nel_effect would be exact
+   ! only in the limit NL_bands(2) --> \infty
    nel_effect=nel-(NL_bands(1)-1)*spin_occ
    !
    ! Velocity gauge I must use the correct definition of the J operator
    !
-   NL_J(1:3)=NL_J(4:6)+A_ext%vecpot(:)*real(nel_effect,SP)/(2._SP*SPEED_OF_LIGHT)
-   !
    ! In velocity gauge J(1:3) is the full current
    ! while J(4:6) is the current without the diamagnetic term
    !
+   NL_J(1:3)=NL_J(4:6)+A_ext%vecpot(:)*real(nel_effect,SP)/(2._SP*SPEED_OF_LIGHT)
+   !
  else
+   !
+   ! In length gauge J(1:3) is the berry current
+   ! while J(4:6) is the dipoles current
+   ! There is no diamagnetic term here, since A(t)=0 in this gauge
    !
    if(l_use_DIPOLES) then
      NL_J(1:3)=NL_J(4:6)
diff --git a/src/nloptics/NL_damping.F b/src/nloptics/NL_damping.F
index 29bf6576d4..5ac0dd60d8 100644
--- a/src/nloptics/NL_damping.F
+++ b/src/nloptics/NL_damping.F
@@ -5,27 +5,39 @@
 !
 ! Authors (see AUTHORS file for details): MG CA
 !
-subroutine NL_damping(E,ik,i_sp_pol,V_bands)
+!> @brief Dephasing for the real-time dynamics 
+!!
+!! @param[in]       dG_lesser                   variation of the G_lesser = G(t) - G^0
+!! @param[in]       ik				k-point index
+!! @param[in]       i_sp_pol			spin-polarization index
+!! @param[in]       E 	                        energies and life-time array
+!!
+!! @param[out]      I_relax			relaxation matrix (distributed on k-points)
+!
+!
+subroutine NL_damping(E,ik,i_sp_pol,dG_lesser,I_relax)
  !
  ! Output: I_relax
  !
  use pars,           ONLY:SP,cI
  use zeros,          ONLY:zero_dfl
- use electrons,      ONLY:levels
+ use parallel_m,     ONLY:PAR_Xk_nibz,PAR_Xk_ibz_index
+ use electrons,      ONLY:levels,n_sp_pol
  use real_time,      ONLY:Phase_LifeTime,RAD_LifeTime
- use nl_optics,      ONLY:I_relax,NL_bands
- use parallel_m,     ONLY:PAR_Xk_ibz_index
+ use nl_optics,      ONLY:NL_bands
+ use QP_m,           ONLY:QP_nk
  use real_time,      ONLY:RT_deph_deg_thresh
  !
  implicit none
  !                      
- type(levels), intent(in) :: E
- integer,      intent(in) :: ik,i_sp_pol
- complex(SP),  intent(in) :: V_bands(NL_bands(2),E%nbf(i_sp_pol))
+ type(levels), intent(in)  :: E
+ integer,      intent(in)  :: ik,i_sp_pol
+ complex(SP),  intent(in)  :: dG_lesser(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),QP_nk,n_sp_pol)
+ complex(SP),  intent(out) :: I_relax(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),PAR_Xk_nibz,n_sp_pol)
  !
  ! Working Space
  !
- integer          :: ib,i1,i2,ik_mem
+ integer          :: i1,i2,ik_mem
  real(SP)         :: Gamma_ij
  !
  ! Damping
@@ -34,13 +46,9 @@ subroutine NL_damping(E,ik,i_sp_pol,V_bands)
  ik_mem=PAR_Xk_ibz_index(ik)
  !
  ! Fictitious lifetime equivalent to the broadening
- ! in linear optics
- !
- ! In the 2013 version this term was:
+ ! in linear optics in term of density matrix
  !
- ! I = 2*cI/Phase_LifeTime ( (|V_b(t=0)><V_b(t=0)| -|V_b(t)><V_b(t)| )
- !
- !  In the 2021 version diagonal and off-diagonal contribution are separated
+ ! I = cI/Phase_LifeTime ( \rho_{ij}(t) -\rho^0_{ij} )
  !
  if(Phase_LifeTime<zero_dfl) return
  !
@@ -55,12 +63,10 @@ subroutine NL_damping(E,ik,i_sp_pol,V_bands)
      Gamma_ij=0._SP
      !
      if(allocated(E%W))          Gamma_ij=Gamma_ij+abs(E%W(i1,ik,i_sp_pol))+abs(E%W(i2,ik,i_sp_pol))
-     if(Phase_LifeTime>zero_dfl) Gamma_ij=Gamma_ij+2._SP/Phase_LifeTime 
+     if(Phase_LifeTime>zero_dfl) Gamma_ij=Gamma_ij+1._SP/Phase_LifeTime 
      !
-     do ib=1,E%nbf(i_sp_pol)
-       I_relax(i1,i2,ib,ik_mem,i_sp_pol) = -cI*Gamma_ij*(conjg(V_bands(i2,ib))*V_bands(i1,ib))
-       I_relax(i2,i1,ib,ik_mem,i_sp_pol) = -conjg(I_relax(i1,i2,ib,ik_mem,i_sp_pol))
-     enddo
+     I_relax(i1,i2,ik_mem,i_sp_pol) = -Gamma_ij*dG_lesser(i1,i2,ik,i_sp_pol)
+     I_relax(i2,i1,ik_mem,i_sp_pol) = -conjg(I_relax(i1,i2,ik_mem,i_sp_pol))
      !
    enddo
  enddo
@@ -68,32 +74,20 @@ subroutine NL_damping(E,ik,i_sp_pol,V_bands)
  !
  if(RAD_LifeTime>zero_dfl) then
    !      
-!$omp parallel do default(shared), private(i1,i2,ib)
+!$omp parallel do default(shared), private(i1,i2)
    do i1=NL_bands(1),NL_bands(2)
      !
-     do ib=1,E%nbf(i_sp_pol)
-       I_relax(i1,i1,ib,ik_mem,i_sp_pol)=-cI*2._SP/RAD_LifeTime*(conjg(V_bands(i1,ib))*V_bands(i1,ib))
-     enddo
+     I_relax(i1,i1,ik_mem,i_sp_pol)=I_relax(i1,i1,ik_mem,i_sp_pol)-1._SP/RAD_LifeTime*dG_lesser(i1,i1,ik,i_sp_pol)
      !
      ! I need this additional part for the degenerate states 
      !
      do i2=i1+1,NL_bands(2)
        if ( abs(E%E(i1,ik,1)-E%E(i2,ik,1))>=RT_deph_deg_thresh ) cycle
-         do ib=1,E%nbf(i_sp_pol)
-           I_relax(i1,i2,ib,ik_mem,i_sp_pol) = -cI*2._SP/RAD_LifeTime*(conjg(V_bands(i2,ib))*V_bands(i1,ib))
-           I_relax(i2,i1,ib,ik_mem,i_sp_pol) = -conjg(I_relax(i1,i2,ib,ik_mem,i_sp_pol))
-         enddo
+       I_relax(i1,i2,ik_mem,i_sp_pol) = I_relax(i1,i2,ik_mem,i_sp_pol)-1._SP/RAD_LifeTime*dG_lesser(i1,i2,ik,i_sp_pol)
+       I_relax(i2,i1,ik_mem,i_sp_pol) = -conjg(I_relax(i1,i2,ik_mem,i_sp_pol))
      enddo
      !
    enddo
-!$omp end parallel do
-   !
-   ! Remove the equilibrium contribution
-   !
-!$omp parallel do default(shared), private(ib)
-   do ib=NL_bands(1),E%nbf(i_sp_pol)
-     I_relax(ib,ib,ib,ik_mem,i_sp_pol)=I_relax(ib,ib,ib,ik_mem,i_sp_pol)+cI*2._SP/RAD_LifeTime
-   enddo
 !$omp end parallel do
    !
  endif
diff --git a/src/nloptics/NL_databases_IO.F b/src/nloptics/NL_databases_IO.F
index efbe120c62..a32cfbdf77 100644
--- a/src/nloptics/NL_databases_IO.F
+++ b/src/nloptics/NL_databases_IO.F
@@ -9,7 +9,7 @@ subroutine NL_databases_IO(i_f,read_or_write)
  !
  use IO_int,        ONLY:io_control
  use IO_m,          ONLY:REP,OP_WR_CL,VERIFY,OP_APP_CL,OP_RD_CL         
- use nl_optics,     ONLY:ID
+ use nl_optics,     ONLY:ID,NL_P_t
  use parallel_m,    ONLY:master_cpu
  !
  implicit none
diff --git a/src/nloptics/NL_driver.F b/src/nloptics/NL_driver.F
index 3887874f01..031ec3666d 100644
--- a/src/nloptics/NL_driver.F
+++ b/src/nloptics/NL_driver.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): MG CA
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine NL_driver(E,X,Xw,k,q,Dip)
  !
  use pars,           ONLY:pi,SP,cZERO,cONE,schlen
@@ -14,7 +18,7 @@ subroutine NL_driver(E,X,Xw,k,q,Dip)
  use QP_m,           ONLY: QP_ng_SH,QP_ng_Sx 
  use LIVE_t,         ONLY:live_timing 
  use electrons,      ONLY:levels,n_sp_pol
- use collision_ext,  ONLY:COLLISIONS_CV_only,HXC_collisions 
+ use collision_ext,  ONLY:COLLISIONS_CV_only,COLL_bands_frozen,HXC_collisions 
  use X_m,            ONLY:global_gauge,X_t
  use DIPOLES,        ONLY:DIPOLE_t,DIP_S
  use ALLOC,          ONLY:DIPOLE_ALLOC_elemental,DIPOLE_ALLOC_global
@@ -30,16 +34,18 @@ subroutine NL_driver(E,X,Xw,k,q,Dip)
 &                         NL_estep,NL_er,Integrator,l_use_DIPOLES,NL_P_t,JGM,TDDFT,NL_P,loop_on_angles,n_angles,&
 &                         E_ext_t,E_tot_t,E_ks_t,E_ks,E_tot,E_ext,runs_done,n_runs_todo,l_eval_CURRENT,NL_rot_axis,&
 &                         HF,SEX,NL_bands,NL_nbands,NL_alloc_k_distributed,NL_J_t,n_runs,loop_on_frequencies, &
-&                         l_pump_and_probe,eval_OSCLL,l_test_OSCLL,LSEX,LHF,Sigma_SEX_EQ,Wk_EQ
- use real_time,      ONLY:RT_step,NE_steps,NE_i_time,RT_nbands,RT_bands,eval_DFT,eval_HARTREE
+&                         l_pump_and_probe,eval_OSCLL,l_test_OSCLL,LSEX,LHF,Sigma_SEX_EQ
+ use real_time,      ONLY:RT_step,NE_steps,NE_i_time,NE_time,RT_nbands,RT_bands, &
+                          eval_DFT,eval_HARTREE,RT_P,RT_ind_J,l_velocity_gauge_diam
+ use RT_control,     ONLY:RT_NAN_found
  use IO_m,           ONLY:io_COLLs
  use parallel_int,   ONLY:PARALLEL_global_indexes,PARALLEL_WF_distribute,PARALLEL_WF_index
  use parallel_m,     ONLY:PAR_IND_freqs,PAR_IND_freqs_ID,PAR_IND_Xk_ibz,PAR_Xk_ibz_index
  use collision_ext,  ONLY:COLLISIONS_have_HARTREE
  use collision_ext,  ONLY:ng_oscll,OSCLL,W
- use electric,       ONLY:l_res_symms
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  ! Notice that E is never changed!
  !
@@ -53,10 +59,10 @@ subroutine NL_driver(E,X,Xw,k,q,Dip)
  ! 
  ! Work Space
  !
- integer           :: i_f,i1,i_sp_pol,wf_ng_save,io_err,ik,ik_mem
+ integer           :: i_f,i1,i_sp_pol,wf_ng_save,io_err,ik,ik_mem,my_runs
  character(schlen) :: l_message
  logical           :: l_dynamics_is_over
- real(SP)          :: Time,angle
+ real(SP)          :: OBSERVABLES(25),Time,angle
  !
  ! In Non-Linear NL_bands always equal to RT_bands
  ! ==============================================
@@ -98,11 +104,11 @@ subroutine NL_driver(E,X,Xw,k,q,Dip)
  ! ========================================
  call DIPOLE_dimensions(E,Dip,NL_bands,(/0._SP,0._SP,0._SP/))
  call DIPOLE_IO(k,E,Dip,'read ',io_err,'NL')
- if(io_err/=0) call error("Error in DIPOLES! ")
- !
- ! Symmetrize dipoles if old-symmetries are present
- ! =================================================
- if(l_res_symms) call DIPOLE_symmetrize(k)
+ if(io_err/=0) call error("Error in Dipoles I/O")
+ if(.not.l_use_DIPOLES) then 
+   call OVERLAPS_IO(k,E,Dip,'read',io_err,'NL')
+   if(io_err/=0) call error("Error in Overlaps I/O")
+ endif
  !
  ! Generate bare_qpg if required
  !=============================
@@ -118,13 +124,9 @@ subroutine NL_driver(E,X,Xw,k,q,Dip)
  endif
  !
  call DIPOLE_ALLOC_elemental('P_square')
- if(l_use_DIPOLES) then
-   call DIPOLE_ALLOC_elemental('DIP_S')
- else
-   if(.not.l_eval_CURRENT) then
-     call DIPOLE_ALLOC_elemental('DIP_v')
-   endif
- endif
+ ! Deallocate what is not needed
+ if(l_use_DIPOLES) call DIPOLE_ALLOC_elemental('DIP_S')
+ if(.not.l_eval_CURRENT) call DIPOLE_ALLOC_elemental('DIP_v')
  !
  ! WFs loading
  !=============
@@ -169,6 +171,7 @@ subroutine NL_driver(E,X,Xw,k,q,Dip)
    if(l_use_Hxc_collisions.and..not.COLLISIONS_have_HARTREE) eval_HARTREE=.TRUE.
    !     
    if(COLLISIONS_CV_only) call msg('rs','Collisions contains C<->V ONLY scatterings')
+   if(any((/COLL_bands_frozen==1/))) call msg('rs','Collisions contains ONLY selected scatterings')
    !
  endif
  !
@@ -237,25 +240,20 @@ subroutine NL_driver(E,X,Xw,k,q,Dip)
    !
  endif
  !
- ! Store Equilibrium Wk
- if(.not.l_use_DIPOLES.and..not.trim(global_gauge)=='velocity'.and.l_res_symms) then
-   do ik=1,QP_nk
-     if (.not.PAR_IND_Xk_ibz%element_1D(ik)) cycle
-     ik_mem=PAR_Xk_ibz_index(ik)
-     do i_sp_pol=1,n_sp_pol
-       call Build_W_operator(E,k,ik,i_sp_pol,V_bands(:,:E%nbf(i_sp_pol),:,i_sp_pol), & 
-&                            Wk_EQ(:,:,:,i_sp_pol,ik_mem))
-     enddo
-   enddo
- else
-   YAMBO_FREE(Wk_EQ)
- endif
- !
- !
  ! Check if calculation was already done
  ! =====================================
  if(n_runs_todo==0) return
  !
+ if (l_pump_and_probe) then
+   !
+   ! Initialize output files
+   !==========================
+   call RT_output_INIT( )
+   !
+   call RT_output_and_IO_driver('open',E,k,OBSERVABLES)
+   !
+ endif
+ !
  ! Initialize IO
  ! ==================
  call NL_databases_IO(0,'write')
@@ -271,7 +269,9 @@ subroutine NL_driver(E,X,Xw,k,q,Dip)
  else
    call error("Wrong input: use pump-probe mode 'yambo_nl -u p' or set a number of frequencie/angles with 'yambo_nl -u n'")
  endif
- call live_timing(l_message,PAR_IND_freqs%n_of_elements(PAR_IND_freqs_ID+1)*NE_steps)
+ if(.not.l_pump_and_probe) NE_i_time=0
+ my_runs=PAR_IND_freqs%n_of_elements(PAR_IND_freqs_ID+1)
+ call live_timing(l_message,my_runs*(NE_steps-NE_i_time))
  !
  do i_f=1,n_runs
    !
@@ -282,12 +282,16 @@ subroutine NL_driver(E,X,Xw,k,q,Dip)
      cycle
    endif
    !
-   ! Re-initialization
-   ! ================
-   V_bands=cZERO
-   do i_sp_pol=1,n_sp_pol
-     forall(i1=1:E%nbf(n_sp_pol)) V_bands(i1,i1,:,i_sp_pol)=cONE
-   enddo
+   if (.not.l_pump_and_probe) then
+     !
+     ! Re-initialization
+     ! ================
+     V_bands=cZERO
+     do i_sp_pol=1,n_sp_pol
+       forall (i1=1:E%nbf(n_sp_pol)) V_bands(i1,i1,:,i_sp_pol)=cONE
+     enddo
+     !
+   endif
    !
    ! Update Efield
    ! ================
@@ -299,15 +303,14 @@ subroutine NL_driver(E,X,Xw,k,q,Dip)
    !
    l_dynamics_is_over=.false.
    !
-   NE_i_time=0
+   if(.not.l_pump_and_probe) NE_i_time=0
    !
    do while(.not.l_dynamics_is_over)
      !
      NE_i_time=NE_i_time+1
+     NE_time=real(NE_i_time-1,SP)*RT_step
      !
-     Time=real(NE_i_time-1,SP)*RT_step
-     !
-     call NL_Hamiltonian(E,k,q,X(1),Time,NE_i_time,V_bands)
+     call NL_Hamiltonian(E,k,q,X(1),NE_time,V_bands)
      !
      ! I evaluate current after the Hamiltonian because I need
      ! time-dependent overlaps Sm1_plus, and Sm1_mins
@@ -316,6 +319,22 @@ subroutine NL_driver(E,X,Xw,k,q,Dip)
        call NL_current(k,E,V_bands,NL_J)
        NL_J=NL_J-NL_initial_J
      endif
+     ! 
+     if (l_pump_and_probe) then
+       !
+       RT_P    =real(NL_P(1:3),SP)
+       if (     l_velocity_gauge_diam) RT_ind_J=real(NL_J(1:3),SP)
+       if (.not.l_velocity_gauge_diam) RT_ind_J=real(NL_J(4:6),SP)
+       !
+       ! Carriers number
+       !================
+       call NL_carriers_number(E,k,OBSERVABLES(1:3))
+       !
+       ! Write DBs and output
+       !=====================
+       call RT_output_and_IO_driver('write',E,k,OBSERVABLES)
+       !
+     endif
      !
      ! Store Variables
      !
@@ -325,16 +344,20 @@ subroutine NL_driver(E,X,Xw,k,q,Dip)
      E_tot_t(NE_i_time,:)=E_tot
      E_ks_t (NE_i_time,:)=E_ks
      !
-     call NL_output(NE_i_time,i_f) 
+     call NL_output(NE_i_time,i_f)
      !
      call NL_Integrator(E,k,q,X(1),Integrator,NE_i_time,V_bands)
      !
      call live_timing(steps=1)
      !
-     l_dynamics_is_over= (NE_i_time>=NE_steps) .or. stop_now(.false.)
+     l_dynamics_is_over= (NE_i_time>=NE_steps) .or. stop_now(.false.) .or. RT_NAN_found
      !
    enddo
    !
+   if (l_pump_and_probe) then
+     call RT_output_and_IO_driver('close',E,k,OBSERVABLES)
+   endif
+   !
    call NL_databases_IO(i_f,'write')
    !
    if(stop_now(.false.)) exit
diff --git a/src/nloptics/NL_initialize.F b/src/nloptics/NL_initialize.F
index f979668ea7..17b6bcf18a 100644
--- a/src/nloptics/NL_initialize.F
+++ b/src/nloptics/NL_initialize.F
@@ -1,16 +1,20 @@
-!
+! 
 ! License-Identifier: GPL
 !
 ! Copyright (C) 2016 The Yambo Team
 !
 ! Authors (see AUTHORS file for details): MG CA
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine NL_initialize(E,k)
  ! 
- use pars,           ONLY:SP,DP,pi
+ use pars,           ONLY:SP,DP,pi,n_fields_defs_max,schlen
  use drivers,        ONLY:l_sc_fock,l_sc_coh,l_sc_sex,l_sc_hartree
  use vec_operate,    ONLY:normalize_v,v_norm,cross_product
- use collision_ext,  ONLY:COLLISIONS_CV_only,COLLISIONS_load_SP,COLLISIONS_compr
+ use collision_ext,  ONLY:COLLISIONS_CV_only,COLL_bands_frozen,COLLISIONS_load_SP,COLLISIONS_compr
  use hamiltonian,    ONLY:H_potential
  use units,          ONLY:AU2VMm1,AUT2FS,HA2EV
  use electrons,      ONLY:levels,n_sp_pol
@@ -21,20 +25,26 @@ subroutine NL_initialize(E,k)
  use QP_m,           ONLY:QP_ng_Sc,QP_ng_Sx,QP_ng_Sh
  use collision_ext,  ONLY:ng_oscll,COLLISIONS_cutoff
  use R_lattice,      ONLY:bz_samp,k_map
- use fields,         ONLY:Efield,Efield_strength,n_ext_fields,EtoT
+ use IO_m,           ONLY:io_GF
+ use stderr,         ONLY:STRING_split
+ use X_m,            ONLY:global_gauge
+ use fields,         ONLY:Efield,Efield_strength,n_ext_fields_max,EtoT
  use real_time,      ONLY:RT_step,NE_steps,NE_tot_time,eval_HARTREE,eval_DFT,l_RT_induced_field,l_NE_with_fields, &
-&                         Phase_LifeTime,Integrator_name,RT_dyn_step,NE_i_time,RT_potential,RAD_LifeTime
- use RT_control,     ONLY:SETUP_RT_IO_type,OBS_RT_IO_t
+&                         Phase_LifeTime,Integrator_name,RT_dyn_step,NE_i_time,RT_potential,RAD_LifeTime,&
+&                         l_velocity_gauge_corr,l_velocity_gauge_diam,G_MEM_steps,RT_nk,i_MEM_now
+ use RT_control,     ONLY:OBS_RT_IO_t, RT_control_alloc,RT_control_free,l_NE_with_fields,Vbands_RT_IO_t,&                         
+&                         SAVE_Vb_history,SAVE_G_history, SAVE_Vb_floquet, Vbands_IO_time, Floquet_order
  use nl_optics,      ONLY:l_use_DIPOLES,EULER,EULEREXP,RK2,RK4,RK2EXP,RK4EXP,HEUN,INVINT,Integrator,CRANK_NICOLSON, &
 &                         NL_damping,NL_correlation,Correlation,NL_er,NL_estep,n_frequencies,IPA,TDDFT,HARTREE,LRC,JGM,  &
 &                         LRCW,SEX,HF,eval_dG,eval_COLLISIONS,NL_bands,NL_verb_name,VERB_HIGH,VERB_LOW,NL_verbosity,LSEX,&
 &                         l_eval_CURRENT,l_pump_and_probe,DephMult,l_velocity_IPA,n_angles,l_test_OSCLL,LHF,eval_OSCLL,  &
 &                         NL_ang_step,n_runs,NL_rot_axis,loop_on_frequencies,loop_on_angles,NL_initial_versor,max_angle
- use electric,       ONLY:l_force_SndOrd,l_P_periodic,l_res_symms
+ use electric,       ONLY:l_force_SndOrd,l_no_res_symms
  use parser_m,       ONLY:parser
  use stderr,         ONLY:intc
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels), intent(in) :: E
  type(bz_samp),intent(in) :: k
@@ -42,9 +52,11 @@ subroutine NL_initialize(E,k)
  ! Working space
  !
  real(SP) :: Time,W_0,T_0,sigma,E_dot_rot_axis 
- integer  :: i1,n_active_fields
+ integer  :: i1,n_active_fields,Integr_steps
+ logical  :: l_velocity_gauge_tmp
  real(SP) :: Sampling_Time
  logical  :: l_no_compress_COLL
+ character(schlen)  :: field_defs(n_fields_defs_max)
  !
  ! Parser
  ! ============
@@ -52,17 +64,34 @@ subroutine NL_initialize(E,k)
  call parser('FrSndOrd',             l_force_SndOrd)
  call parser('InducedField' ,        l_RT_induced_field)
  call parser('EvalCurrent' ,         l_eval_CURRENT)
- call parser('FrPolPerdic' ,         l_P_periodic)
  call parser('TestOSCLL',            l_test_OSCLL)
  call parser('NoComprCOLL',          l_no_compress_COLL)
- call parser('UseSymm',              l_res_symms)
+ call parser('NoUseSymm',            l_no_res_symms)
+ call parser('SaveVbhistory',        SAVE_Vb_history)
+ !call parser('SaveVbfloquet',        SAVE_Vb_floquet)
+ call parser('VelGaugeNoDiam',       l_velocity_gauge_tmp)
+ call parser('UseDipoles',           l_use_DIPOLES)
+ call parser('SaveGhistory',         SAVE_G_history)
+ !
+ l_velocity_gauge_corr=.false.
+ l_velocity_gauge_diam=.not.l_velocity_gauge_tmp
+ !
+ if (SAVE_G_history) then
+   io_GF=.true.
+   RT_nk=k%nibz
+   G_MEM_steps=1
+   i_MEM_now=1
+ endif
  !
  if(l_use_DIPOLES) call warning(' Fixed dipoles: only linear response properties are correct!')
- if(l_P_periodic)  call msg('sr','[NL] Enforce periodicity of the polarization repect to Efield')
+ !
+ Floquet_order = min(5,Floquet_order)
+ SAVE_Vb_floquet = (Floquet_order > 0)
  !
  ! Velocity gauge mode
  ! =======================================
  l_velocity_IPA=.FALSE.
+ !
  if(trim(global_gauge)=='velocity') then
     !
     ! Turn on current
@@ -78,11 +107,10 @@ subroutine NL_initialize(E,k)
     call warning('Berry phase turned off in velocity gauge')
     !
     ! Improve parallelization in the IPA case (each k-point is independent)
+    ! This parallelization is not compatible with wavefunction-IO for 
+    ! this reason I turn it off in pump-probe setup
     !
-    if(trim(NL_Correlation)=='IPA') then
-      l_velocity_IPA=.TRUE.
-      call msg('rs','Fast parallelization: velocity gauge + IPA ')
-    endif
+    if(trim(NL_Correlation)=='IPA') l_velocity_IPA=.TRUE.
     !
  endif
  !
@@ -92,6 +120,8 @@ subroutine NL_initialize(E,k)
  if(DP/=SP)      call error(" Non-linear optics works only in double precision, configure with --enable-dp flag")
  if(n_sp_pol/=1) call error(" Spin-polarized systems not yet implemented")
  if(any(NL_bands(:)==0)) call error("Any of the NLBands limits is 0, change to nv|nc with 0<nv<fermi<nc")
+ if(any(NL_bands(1)>E%nbf)) call error("You should include at least a valence band")
+ if(any(NL_bands(2)<E%nbf+1)) call error("You should include at least a conduction band")
  !
  ! K-sampling
  ! ===========
@@ -104,23 +134,34 @@ subroutine NL_initialize(E,k)
  select case (trim(Integrator_name))
     case('EULER')
         Integrator=EULER
+        Integr_steps=1
     case('EULEREXP')
         Integrator=EULEREXP
+        Integr_steps=1
     case('RK2')  
         Integrator=RK2
+        Integr_steps=2
     case('RK4')
         Integrator=RK4
+	! RK4 uses dt/2 and dt/3. So I need a factor 6
+        Integr_steps=6
     case('RK2EXP')
         Integrator=RK2EXP
+        Integr_steps=2
     case('RK4EXP')
+	! RK4EXP uses dt/2 and dt/3. So I need a factor 6
         Integrator=RK4EXP
+        Integr_steps=6
     case('HEUN')
         Integrator=HEUN
+        Integr_steps=0
         call error("HEUN integrator not yet available")
     case('INVINT')
         Integrator=INVINT
+        Integr_steps=1
     case('CRANKNIC')
         Integrator=CRANK_NICOLSON
+        Integr_steps=2
     case default
         call error("Unknow integrator")
  end select
@@ -139,15 +180,22 @@ subroutine NL_initialize(E,k)
  ! Check Field
  ! =========================
  !
- ! This part of the code works with only one field
+ ! Initial check for fields defined from file
+ do i1=1,n_ext_fields_max
+   field_defs=""
+   Efield(i1)%ef_name=trim(Efield(i1)%ef_name)
+   call STRING_split(Efield(i1)%ef_name,field_defs)
+   if ( field_defs(1)/='FROM_FILE') cycle
+   call RT_load_field_from_file_init(field_defs(2),field_defs(3),Integr_steps)
+ enddo
  !
- n_ext_fields=3
  n_active_fields=0
  !
- do i1=1,n_ext_fields
+ do i1=1,n_ext_fields_max
    !
    Efield(i1)%ef_name=trim(Efield(i1)%ef_name)      
    if((Efield(i1)%ef_name=='none')) cycle
+   call STRING_split(Efield(i1)%ef_name,field_defs)
    !
    if(all(abs(Efield(i1)%versor(:))<epsilon(1.))) then
      call msg('rs',"External field "//intc(i1)//" versor is zero.")
@@ -170,8 +218,10 @@ subroutine NL_initialize(E,k)
    ! see definition of "step_case" in function "theta_function" in src/modules/mod_functions.F
    Efield(i1)%t_initial_indx=max(nint(Efield(i1)%t_initial/RT_step)+1,2)
    Efield(i1)%t_initial     =real(Efield(i1)%t_initial_indx-1)*RT_step
-   if(Efield(i1)%ef_pol/="linear") call error("Only 'linear' polarization has been implemented and tested ")
-   Efield(i1)%ef_pol        ="linear"
+   !
+   if ( field_defs(1)=='FROM_FILE') &
+&       call RT_load_field_from_file(field_defs(2),field_defs(3),i1)
+
    !
    call Check_symmetries(Efield(i1)%versor,"electric")
    !
@@ -199,10 +249,26 @@ subroutine NL_initialize(E,k)
  if(.not.l_pump_and_probe.and.Efield(1)%ef_name/='SOFTSIN'.and.Efield(1)%ef_name/='SIN') &
     call warning(' Non-linear optics properties can be obtained with SOFTSIN/SIN fields only')
  !
+ call STRING_split(Efield(1)%ef_name,field_defs)
+ if( field_defs(1)=='FROM_FILE'.and.l_pump_and_probe) &
+ & call error("Field from file only works in pump and probe mode")
+
+ if(l_pump_and_probe.and.l_velocity_IPA) then
+   !
+   ! Fast parallelization not compatible with RT-IO
+   !
+   l_velocity_IPA=.false.
+   !
+ elseif(.not.l_pump_and_probe.and.l_velocity_IPA) then
+   !
+   call msg('s','Fast parallelization for velociy-gauge + IPA')
+   !
+ endif
+ !
  ! For short pulses field (PULSE, QSSIN, QSFIELD, SPULSE)
  ! calcualte the T_0
  !
- W_0=Efield(1)%frequency(1)
+ W_0=Efield(1)%frequency
  sigma=Efield(1)%width
  T_0=pi/W_0*(real(nint(W_0/pi*3._SP*sigma),SP))
  if(Efield(1)%ef_name=='SPULSE') T_0=sigma
@@ -227,6 +293,29 @@ subroutine NL_initialize(E,k)
  endif
  !
  if(all(NL_er>0._SP).and.n_frequencies>=1) NL_estep=(NL_er(2)-NL_er(1))/real(n_frequencies,SP)
+ !
+ if (SAVE_Vb_floquet) then
+   if (.not.(Efield(1)%ef_name=='SOFTSIN'.or.Efield(1)%ef_name=='SIN')) then
+      call warning(' Floquet analysis implemented only for pure periodic signal. Switching it off ')
+      SAVE_Vb_floquet = .FALSE.
+   endif
+   if (NE_tot_time > 0._SP) then
+      call warning(' Recalculating time for Floquet analysis ')
+      NE_tot_time = -1._SP
+   endif
+   if ((Efield(2)%ef_name/='none').or.(Efield(3)%ef_name/='none')) then
+     call warning(' Floquet analysis only with one external field. Switching it off ')
+     SAVE_Vb_floquet = .FALSE.
+   endif
+   if (SAVE_Vb_history) then
+     call warning(' Floquet Analysis not compatible with saving WF history. Switching it off ')
+     SAVE_Vb_floquet = .FALSE.
+   endif
+   if (n_active_fields>1) then
+     call warning(' Floquet Analysis not compatible with multiple fieldis. Switching it off ')
+     SAVE_Vb_floquet = .FALSE.
+   endif
+ endif
  ! 
  ! Determine the total time from the damping or from T_0
  !
@@ -247,10 +336,17 @@ subroutine NL_initialize(E,k)
      Sampling_Time=EtoT(E=NL_er(1))+5._SP*RT_step
      !
      if(n_active_fields>1) then
-       if((Efield(2)%ef_name/='none').and.Efield(2)%frequency(1)>0._SP) &
-&         Sampling_Time=maxval((/Sampling_Time,EtoT(E=Efield(2)%frequency(1))+5._SP*RT_step/))
-       if((Efield(3)%ef_name/='none').and.Efield(3)%frequency(1)>0._SP) &
-&         Sampling_Time=maxval((/Sampling_Time,EtoT(E=Efield(3)%frequency(1))+5._SP*RT_step/))
+       if((Efield(2)%ef_name/='none').and.Efield(2)%frequency>0._SP) &
+&         Sampling_Time=maxval((/Sampling_Time,EtoT(E=Efield(2)%frequency)+5._SP*RT_step/))
+       if((Efield(3)%ef_name/='none').and.Efield(3)%frequency>0._SP) &
+&         Sampling_Time=maxval((/Sampling_Time,EtoT(E=Efield(3)%frequency)+5._SP*RT_step/))
+     endif
+     !
+     if (SAVE_Vb_floquet) then
+       Sampling_Time = 4._SP*EtoT(E=W_0)+5._SP*RT_step 
+       Vbands_RT_IO_t%INTERVAL_time_INPUT = (2._SP*pi/W_0)/(2._SP*Floquet_order + 1._SP)
+       Vbands_IO_time(1) = int(NE_tot_time/RT_step)
+       Vbands_IO_time(2) = int((NE_tot_time+2._SP*pi/W_0)/RT_step) 
      endif
      !
      NE_tot_time=NE_tot_time+Sampling_Time
@@ -270,7 +366,7 @@ subroutine NL_initialize(E,k)
    endif
    ! 
  else
-   call msg('s','Total simulation time read from input') 
+   call msg('s','Total simulation time read from input')
  endif
  !
  call msg('sr','Total simulation time       ',NE_tot_time*AUT2FS,"[fs]")
@@ -378,6 +474,11 @@ subroutine NL_initialize(E,k)
    !
  endif
  !
+ if (l_pump_and_probe) eval_dG=.true.
+ !
+ ! The new dephasing is based on density matrix
+ if (Phase_LifeTime>0._SP) eval_dG=.true.
+ !
  if(eval_COLLISIONS) then
    COLLISIONS_load_SP =.not.l_no_compress_COLL
    COLLISIONS_compr   =.not.l_no_compress_COLL
@@ -403,6 +504,7 @@ subroutine NL_initialize(E,k)
  if(eval_COLLISIONS) then
    !      
    if(COLLISIONS_CV_only) call warning(' ONLY cv scattering in COLLISIONS! ')
+   if( any((/COLL_bands_frozen==1/)) ) call warning(' ONLY selected scattering in COLLISIONS! ')
    !
    if(l_no_compress_COLL) then
      call msg('sr','Full collisions in double-precision ')
@@ -464,14 +566,20 @@ subroutine NL_initialize(E,k)
  ! frequencies is supported
  !
  RT_dyn_step=RT_step
- OBS_RT_IO_t%INTERVAL_time     =RT_step
- OBS_RT_IO_t%INTERVAL_time_INPUT=RT_step
- call SETUP_RT_IO_type(OBS_RT_IO_t,who="OBS",what="INIT")
- YAMBO_ALLOC(OBS_RT_IO_t%Time,(NE_steps))
+ !
+ ! I/O steps
+ !===========
+ !
+ call RT_IO_type_time_steps("INIT")
+ !
+ call RT_control_alloc("all")
+ !
+ !if(.not.l_pump_and_probe) then
  do NE_i_time=1,NE_steps
    Time=real(NE_i_time-1,SP)*RT_step
    OBS_RT_IO_t%Time(NE_i_time)=Time
  enddo
+ !endif
  !
  ! LOG
  ! ===
@@ -487,22 +595,30 @@ subroutine NL_initialize(E,k)
  call msg( 'r', 'Damping                    ',NL_damping*HA2EV,"[eV]")
  call msg( 'r', 'Dephasing                  ',Phase_LifeTime*AUT2FS,"[fs]")
  call msg( 'r', 'Radiative LifeTime         ',RAD_LifeTime*AUT2FS,"[fs]")
- do i1=1,n_ext_fields
+ do i1=1,n_active_fields
    call msg( 'r', 'Efield                     '//trim(Efield(i1)%ef_name))
    call msg( 'r', 'Efield width               ',Efield(i1)%width*AUT2FS,"[fs]")
+   call msg( 'r', 'Efield frequency           ',Efield(i1)%frequency*HA2EV,"[eV]")
    call msg( 'r', 'Efield Amplitude      [V/m]',Efield(i1)%amplitude*AU2VMm1)
+   call msg( 'r', 'Pond. Energy        [eV/mu]',Efield(i1)%amplitude**2/4._SP/Efield(i1)%frequency**2*HA2EV)
+   call msg( 'r', 'Adiabatic param      [1/mu]',Efield(i1)%amplitude**2/4._SP/Efield(i1)%frequency**3)
  enddo
- call msg( 'r', 'Efield T_0                 ',T_0,"[fs]")
- call msg( 'r', 'Frequencies range          ',NL_er*HA2EV)
- call msg( 'sr','Frequencies steps          ',n_frequencies)
- call msg( 'r', 'Frequency   step           ',NL_estep*HA2EV)
- call msg( 'sr','Number of angles           ',n_angles)
- call msg( 'sr','Max angle (degree)         ',max_angle)
- call msg( 'r', 'Rotation axis              ',NL_rot_axis)
- call msg( 'r', 'Use Dipoles                ',l_use_DIPOLES)
- call msg( 'r', 'Induced Field              ',l_RT_induced_field)
- call msg( 'r', 'Force Snd Order Dipoles    ',l_force_SndOrd)
- call msg( 'r', 'Evaluate Current           ',l_eval_CURRENT)
- call msg( 'r', 'Enforce periodic pol.      ',l_P_periodic)
+ call msg( 'r', 'Efield T_0                 ',T_0*AUT2FS,"[fs]")
+ if (.not.l_pump_and_probe) then
+   call msg( 'r', 'Frequencies range          ',NL_er*HA2EV)
+   call msg( 'sr','Frequencies steps          ',n_frequencies)
+   call msg( 'r', 'Frequency   step           ',NL_estep*HA2EV)
+   call msg( 'sr','Number of angles           ',n_angles)
+   if(n_angles>0) then
+     call msg( 'sr','Max angle (degree)         ',max_angle)
+     call msg( 'r', 'Rotation axis              ',NL_rot_axis)
+   endif
+ endif
+ call msg( 'r', 'Use Dipoles                   ',l_use_DIPOLES)
+ call msg( 'r', 'Induced Field                 ',l_RT_induced_field)
+ call msg( 'r', 'Force Snd Order Dipoles       ',l_force_SndOrd)
+ call msg( 'r', 'Evaluate Current              ',l_eval_CURRENT)
+ call msg( 'r', 'Symmetrize current and pol    ',.not.l_no_res_symms)
+ call msg( 'r', 'Fast parallelization (vel-IPA)',l_velocity_IPA)
  !
 end subroutine NL_initialize
diff --git a/src/nloptics/NL_output.F b/src/nloptics/NL_output.F
index 67212970d4..4782bfc597 100644
--- a/src/nloptics/NL_output.F
+++ b/src/nloptics/NL_output.F
@@ -30,26 +30,26 @@ subroutine NL_output(i_time,i_run)
  !
  if(NL_verbosity==VERB_LOW) return
  !
- file_name(1)='external_potential_F'//intc(i_run)
- file_name(2)='polarization_F'//intc(i_run)
- file_name(3)='e_total_F'//intc(i_run)
- file_name(4)='current_F'//intc(i_run)
+ file_name(1)='NL_ext_pot_F'//intc(i_run)
+ file_name(2)='NL_pol_F'//intc(i_run)
+ file_name(3)='NL_Etot_F'//intc(i_run)
+ file_name(4)='NL_curr_F'//intc(i_run)
  !
  n_files=1
- short_file_name(n_files)='external_potential'
+ short_file_name(n_files)='NL_ext_pot'
  n_headings(n_files)=10
  !
  n_files=n_files+1
- short_file_name(n_files)='polarization'
+ short_file_name(n_files)='NL_pol'
  n_headings(n_files)=7
  !
  n_files=n_files+1
- short_file_name(n_files)='e_total'
+ short_file_name(n_files)='NL_Etot'
  n_headings(n_files)=4
  !
  if(l_eval_CURRENT) then
    n_files=n_files+1
-   short_file_name(n_files)='current'
+   short_file_name(n_files)='NL_curr'
    n_headings(n_files)=7
  endif
  !
diff --git a/src/nloptics/NL_project.dep b/src/nloptics/NL_project.dep
index ab54f3ef9b..68b0c91045 100644
--- a/src/nloptics/NL_project.dep
+++ b/src/nloptics/NL_project.dep
@@ -1,4 +1,3 @@
- DIPOLE_symmetrize.o
  DIP_polarization.o
  EXC_macroscopic_JGM.o
  EXP_op.o
@@ -10,6 +9,7 @@
  NL_average_operator.o
  NL_build_dG_lesser.o
  NL_build_valence_bands.o
+ NL_carriers_number.o
  NL_current.o
  NL_damping.o
  NL_databases_IO.o
diff --git a/src/nloptics/NL_start_and_restart.F b/src/nloptics/NL_start_and_restart.F
index 572632844a..45b46a69d4 100644
--- a/src/nloptics/NL_start_and_restart.F
+++ b/src/nloptics/NL_start_and_restart.F
@@ -6,6 +6,10 @@
 !
 ! Authors (see AUTHORS file for details): AMCA
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine NL_start_and_restart(E,k,Dip)
  !
  ! This subroutine initialize some variables and 
@@ -17,10 +21,15 @@ subroutine NL_start_and_restart(E,k,Dip)
  ! [4] Initialize the bare Hartree and XC
  ! [5] Initialize the fields
  !
- use pars,           ONLY:cZERO,rZERO,cONE
+ use pars,           ONLY:cZERO,rZERO,cONE,SP
  use com,            ONLY:msg
+ use units,          ONLY:AUT2FS
  use electrons,      ONLY:levels,n_sp_pol
- use real_time,      ONLY:rho_reference,eval_HARTREE,eval_DFT
+ use real_time,      ONLY:rho_reference,RT_time_status,NE_i_time,RT_step,RT_dyn_step,NE_steps,&
+&                         eval_HARTREE,eval_DFT
+ use RT_control,     ONLY:Vbands_RT_IO_t,OBS_RT_IO_t,OUTPUT_RT_IO_t,CACHE_OBS_INTERVAL_time,&
+&                         N_RT_databases,NE_i_start_time,RT_return_db_ID,RT_DB_ID,RT_DB_name,&
+&                         RT_control_alloc,RT_control_free
  use R_lattice,      ONLY:bz_samp
  use X_m,            ONLY:X_t
  use DIPOLES,        ONLY:DIPOLE_t
@@ -33,14 +42,15 @@ subroutine NL_start_and_restart(E,k,Dip)
  use parallel_m,     ONLY:master_cpu
  use nl_optics,      ONLY:Correlation,IPA,E_full,full_rho,V_bands,TDDFT,n_runs_todo,NL_LRC_alpha,NL_bands, &
 &                         V_xc_0,NL_initial_P,l_use_DIPOLES,n_runs,runs_done,JGM,E_xc_0,I_relax,l_use_DIPOLES,&
-&                         VAL_BANDS,l_eval_CURRENT,NL_initial_J,NL_P
+&                         VAL_BANDS,l_eval_CURRENT,NL_initial_J,NL_P,l_pump_and_probe
  use IO_int,         ONLY:io_control
- use IO_m,           ONLY:OP_RD_CL,NONE,file_is_present,VERIFY
- use parallel_int,   ONLY:PP_redux_wait
+ use IO_m,           ONLY:OP_RD_CL,NONE,file_is_present,VERIFY,DUMP,REP
+ use parallel_int,   ONLY:PP_wait,PP_redux_wait
  use stderr,         ONLY:intc
  use collision_ext,  ONLY:COLLISIONS_have_HARTREE
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)      :: E
  type(bz_samp)     :: k
@@ -49,9 +59,10 @@ subroutine NL_start_and_restart(E,k,Dip)
  !
  ! Working space
  !
- integer :: i1,ib,i_f,i_sp_pol
- integer :: ID,io_NonLinear
- integer, external::io_NL
+ logical :: RESTART
+ integer :: i1,ib,i_f,i_sp_pol,i_db
+ integer :: ID,io_NonLinear,COM_MODE,io_err(N_RT_databases)
+ integer, external :: io_NL,io_RT_components
  !
  !
  ! Initialize the Gf and potentials
@@ -68,7 +79,7 @@ subroutine NL_start_and_restart(E,k,Dip)
      & call error(" Wrong bands range!")
  !
  do i_sp_pol=1,n_sp_pol
-   forall(i1=1:E%nbf(n_sp_pol)) V_bands(i1,i1,:,i_sp_pol)=cONE
+   forall (i1=1:E%nbf(n_sp_pol)) V_bands(i1,i1,:,i_sp_pol)=cONE
  enddo
  !
  if(Correlation/=IPA.and..not.COLLISIONS_have_HARTREE.and.(eval_HARTREE.or.eval_DFT)) then
@@ -149,26 +160,105 @@ subroutine NL_start_and_restart(E,k,Dip)
  ! Restart
  ! ====================================
  YAMBO_ALLOC(runs_done,(n_runs))
- !
  runs_done=.false.
  !
- if(master_cpu) then
-   !      
-   call io_control(ACTION=OP_RD_CL,COM=NONE,SEC=(/1/),MODE=VERIFY,ID=ID)
-   io_NonLinear=io_NL("Nonlinear",ID)
-   !
-   if(io_NonLinear==0) then
-     do i_f=1,n_runs
-        if(file_is_present('Nonlinear_fragment_'//intc(i_f))) runs_done(i_f)=.true.
-     enddo                                                                    
-    endif
-    !
- endif
+ RESTART=.FALSE.
  !
- call PP_redux_wait(runs_done,imode=3)
+ if (.not.l_pump_and_probe) then
+   !
+   if(master_cpu) then
+     !
+     call io_control(ACTION=OP_RD_CL,COM=NONE,SEC=(/1/),MODE=VERIFY,ID=ID)
+     io_NonLinear=io_NL("Nonlinear",ID)
+     !
+     if(io_NonLinear==0) then
+       do i_f=1,n_runs
+          if(file_is_present('Nonlinear_fragment_'//intc(i_f))) runs_done(i_f)=.true.
+       enddo
+     endif
+     !
+   endif
+   !
+   call PP_redux_wait(runs_done,imode=3)
+   !
+ else
+   !
+   NE_i_time=0
+   !
+   call msg('s','Checking for RESTART in pump and probe mode ')
+   !
+   ! Perform a first I/O to check which DBs are present.
+   !=====================================================
+   do i_db=1,N_RT_databases
+     COM_MODE=NONE
+     if (i_db==RT_return_db_ID("ANY_Vb")) COM_MODE=REP
+     call io_control(ACTION=OP_RD_CL,COM=COM_MODE,SEC=(/1/),MODE=VERIFY,ID=RT_DB_ID(i_db))
+     io_err(i_db)=io_RT_components(trim(RT_DB_name(i_db)),RT_DB_ID(i_db))
+   enddo
+   !
+   ! To restart I need:
+   !
+   !  1. The Vbands -> wave-functions
+   !  2. The OBSERVABLES -> RT_IO_t
+   !
+   RESTART = io_err(RT_return_db_ID("V_bands"))==0.and.io_err(RT_return_db_ID("OBSERVABLES"))==0
+   !
+ endif
  !
  n_runs_todo=n_runs-count(runs_done)
  if(n_runs_todo/=n_runs) call msg('s','RESTART, remaining frequencies/angles ',n_runs_todo)
  !
+ if (l_pump_and_probe.and.RESTART) then
+   !
+   call msg('s','RESTART in pump and probe mode ')
+   !
+   do i_db=1,N_RT_databases
+     call RT_RESTART_database_copy( trim(RT_DB_name(i_db)) )
+   enddo
+   !
+   call PP_wait()
+   !
+   do i_db=1,N_RT_databases
+     if (i_db==RT_return_db_ID("V_bands_K_section")) cycle
+     call io_control(ACTION=OP_RD_CL,COM=NONE,SEC=(/2/),MODE=DUMP,ID=RT_DB_ID(i_db))
+     io_err(i_db)=io_RT_components(trim(RT_DB_name(i_db)),RT_DB_ID(i_db))
+   enddo
+   !
+   NE_i_start_time=NE_i_time
+   !
+   call RT_control_free("all")
+   call RT_control_alloc("all")
+   !
+   YAMBO_FREE(RT_TIME_status)
+   !
+   ! I use G_lesser(:,:,:) to reconstruct everything
+   ! Note that the databases must be loaded at the RESTART time (NE_time)
+   !
+   i_db=RT_return_db_ID("V_bands_K_section")
+   !
+   call io_control(ACTION=OP_RD_CL,COM=NONE,SEC=(/3/),MODE=DUMP,ID=RT_DB_ID(i_db))
+   io_err(i_db)=io_RT_components('V_bands_K_section',RT_DB_ID(i_db))
+   !
+   call RT_IO_type_time_steps("RESTART")
+   !
+   call msg( 'nr','---RESTART report---')
+   !
+   call msg('s','[RESTART] Number of steps done         ',real(NE_i_time)/real(NE_steps)*100._SP,"[o/o]")
+   call msg('r','Number of steps done              ',real(NE_i_time)/real(NE_steps)*100._SP,"[o/o]")
+   call msg('r','Time reached                      ',real(NE_i_time-1)*RT_step*AUT2FS,"[fs]")
+   call msg('r','Time step                         ',RT_dyn_step*AUT2FS,"[fs]")
+   !
+   !call msg('r',  'dT Update Interval                ',NE_time_step_update_jump*AUT2FS,"[fs]")
+   !call msg('r',  'Lifetimes inter/intr times        ',RT_life_extrap_times*AUT2FS,"[fs]")
+   call msg('r',  'T cache step  of J,P,M            ',CACHE_OBS_INTERVAL_time*AUT2FS,"[fs]")
+   call msg('r',  'T between I/O of J,P,M            ',OBS_RT_IO_t%INTERVAL_time*AUT2FS,"[fs]")
+   !call msg('r',  '          I/O of carriers         ',CARR_RT_IO_t%INTERVAL_time*AUT2FS,"[fs]")
+   call msg('r',  '          I/O of   WFs            ',Vbands_RT_IO_t%INTERVAL_time*AUT2FS,"[fs]")
+   call msg('r',  '               outputs            ',OUTPUT_RT_IO_t%INTERVAL_time*AUT2FS,"[fs]")
+   !
+   NE_i_time=NE_i_time-1
+   !
+ endif
+ !
 end subroutine NL_start_and_restart
 
diff --git a/src/nloptics/NL_test_collisions.F b/src/nloptics/NL_test_collisions.F
index 56f0be55f7..e860fdb94a 100644
--- a/src/nloptics/NL_test_collisions.F
+++ b/src/nloptics/NL_test_collisions.F
@@ -15,6 +15,7 @@ subroutine NL_test_collisions(k,q)
  use R_lattice,      ONLY:nXkibz,bz_samp,qindx_S
  use collision_ext,  ONLY:HXC_COLL_element,HXC_collisions
  use nl_optics,      ONLY:NL_bands
+ use electrons,      ONLY:n_sp_pol
  use com,            ONLY:msg
  !
  implicit none
@@ -22,10 +23,10 @@ subroutine NL_test_collisions(k,q)
  type(bz_samp), intent(in)  :: k,q
  !
  integer :: i_kmq,i_qp,ib,ibp,i_coll,i_coll_mem
- integer :: i_k,i_n,i_m,i_spin
+ integer :: i_k,i_n,i_m,i_spin,i_sp
  integer :: ic1
- complex(SP) :: COLL(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),nXkibz)
- complex(SP) :: COLL_OSCLL(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),nXkibz)
+ complex(SP) :: COLL(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),nXkibz,n_sp_pol)
+ complex(SP) :: COLL_OSCLL(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),nXkibz,n_sp_pol)
  ! 
  real(SP) :: max_diff,diff
  !
@@ -50,23 +51,25 @@ subroutine NL_test_collisions(k,q)
    !
    COLL=cZERO
    !
+   do i_sp=1,n_sp_pol
    do i_kmq=1,nXkibz
      !
      do ibp=NL_bands(1),NL_bands(2)
        do ib=NL_bands(1),NL_bands(2)
-         if (HXC_COLL_element(i_coll_mem)%table(ib,ibp,i_kmq)=="t") then
-           COLL(ib,ibp,i_kmq)=HXC_COLL_element(i_coll_mem)%v_c(ic1)
+         if (HXC_COLL_element(i_coll_mem)%table(ib,ibp,i_kmq,i_sp)=="t") then
+           COLL(ib,ibp,i_kmq,i_sp)=HXC_COLL_element(i_coll_mem)%v_c(ic1)
            ic1=ic1+1
          endif
          !
-         diff=abs(COLL(ib,ibp,i_kmq)-COLL_OSCLL(ib,ibp,i_kmq))
-         write(*,'(6i5,2e14.6)') i_k,i_m,i_n,i_kmq,ib,ibp,abs(COLL(ib,ibp,i_kmq)),abs(COLL_OSCLL(ib,ibp,i_kmq))
+         diff=abs(COLL(ib,ibp,i_kmq,i_sp)-COLL_OSCLL(ib,ibp,i_kmq,i_sp))
+         write(*,'(7i5,2e14.6)') i_sp,i_k,i_m,i_n,i_kmq,ib,ibp,abs(COLL(ib,ibp,i_kmq,i_sp)),abs(COLL_OSCLL(ib,ibp,i_kmq,i_sp))
          if(max_diff<=diff) max_diff=diff
          !
        enddo
      enddo
      !
    enddo
+   enddo
    !
  enddo 
  !
diff --git a/src/nloptics/RK_basestep.F b/src/nloptics/RK_basestep.F
index 33f799bb77..2e2c153c71 100644
--- a/src/nloptics/RK_basestep.F
+++ b/src/nloptics/RK_basestep.F
@@ -13,7 +13,7 @@ subroutine RK_basestep(K_out,V_in,H_in,I_in,nbf)
   complex(SP), intent(in)  :: V_in(NL_bands(2),nbf)
   complex(SP), intent(out) :: K_out(NL_bands(2),nbf)
   complex(SP), intent(in)  :: H_in(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2))
-  complex(SP), intent(in)  :: I_in(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2),nbf)
+  complex(SP), intent(in)  :: I_in(NL_bands(1):NL_bands(2),NL_bands(1):NL_bands(2))
   !
   ! Workspace
   !
@@ -24,7 +24,7 @@ subroutine RK_basestep(K_out,V_in,H_in,I_in,nbf)
   !
 !$omp parallel do default(shared), private(ib,H)
   do ib=1,nbf
-    H = H_in + I_in(:,:,ib)
+    H = H_in + I_in
     K_out(NL_bands(1):,ib)=-cI*matmul(H,V_in(NL_bands(1):,ib))
   enddo
 !$omp end parallel do
diff --git a/src/nloptics/el_density_vbands.F b/src/nloptics/el_density_vbands.F
index c76d295d16..daf1046431 100644
--- a/src/nloptics/el_density_vbands.F
+++ b/src/nloptics/el_density_vbands.F
@@ -47,7 +47,9 @@ subroutine el_density_vbands(en,Xk,rho,VAL_BANDS)
  integer     :: i1,ik,is,rho_syms,ir,ik_mem
  real(SP)    :: rho_no_sym(fft_size)
  !
+!$OMP PARALLEL WORKSHARE  
  rho_no_sym=rZERO
+!$OMP END PARALLEL WORKSHARE  
  !
  do ik=1,Xk%nibz
    !  
@@ -70,7 +72,9 @@ subroutine el_density_vbands(en,Xk,rho,VAL_BANDS)
  ! Simmetrization
  !
  rho_syms=nsym/(i_time_rev+1)
+!$OMP PARALLEL WORKSHARE  
  rho(:)=rZERO
+!$OMP END PARALLEL WORKSHARE  
  ! 
 !$omp parallel do default(shared), private(ir), reduction(+:rho)
  do ir=1,fft_size
diff --git a/src/output/K_OUTPUT.F b/src/output/K_OUTPUT.F
index 25dad42431..154aa4b8a4 100644
--- a/src/output/K_OUTPUT.F
+++ b/src/output/K_OUTPUT.F
@@ -19,7 +19,8 @@ subroutine K_OUTPUT(iq,W,WHAT,OBS)
  use X_m,            ONLY:Epsilon_ii,eps_2_alpha,X_OUTPUT_driver,Epsilon_ij,&
 &                         X_do_obs,X_OUTPUT_messages,X_dichroism,Joint_DOS,BS_E_sorted,N_BS_E_sorted,&
 &                         BS_E_sorted_indx,X_magnons,i_G_shift,Q_plus_G_pt,Q_plus_G_sq_modulus,&
-&                         Resp_ii,Resp_ij,l_drude,skip_cond_sum_rule,X_drude_term,global_gauge
+&                         Resp_ii,Resp_ij,l_drude,skip_cond_sum_rule,X_drude_term,global_gauge,&
+&                         X_mespin,X_meorb
  use com,            ONLY:msg,com_compose_msg
  use R_lattice,      ONLY:bare_qpg,FineGd_desc,nkbz
  use electrons,      ONLY:spin_occ
@@ -59,7 +60,7 @@ subroutine K_OUTPUT(iq,W,WHAT,OBS)
      if (.not.STRING_match(WHAT,"IP").and..not.BS_K_is_ALDA) out_string=trim(solver)//' bse'
      if (                                      BS_K_is_ALDA) out_string=trim(solver)//' alda bse'
    endif
-   call X_OUTPUT_driver(WHAT="keys"//OBS,IQ=iq,IG=i_G_shift,keys=trim(out_string))
+   call X_OUTPUT_driver(WHAT="keys "//OBS,IQ=iq,IG=i_G_shift,keys=trim(out_string))
    !
    call X_OUTPUT_driver("open "//OBS)
    !
@@ -111,11 +112,13 @@ subroutine K_OUTPUT(iq,W,WHAT,OBS)
      endif
    enddo
    !
-   ! Eq.(1.12) PRB 48, 11705 (1993)
+   ! Eqs.(23a) and (48a) in PRB 95, 155203 (2017),
+   ! from Eq.(1.12) PRB 48, 11705 (1993)
    !
    A_drude=para_term_w0-diam_term_exact
    !
    if (trim(global_gauge)=='velocity') then
+     ! DS. Note. The sum rule imposition does not work if neq occupations are loaded. Not sure why.
      if (     skip_cond_sum_rule) diam_term_used=diam_term_exact
      if (.not.skip_cond_sum_rule) diam_term_used=para_term_w0
      spectra(:,1)=spectra(:,1)-diam_term_used(2)
@@ -303,10 +306,16 @@ subroutine K_OUTPUT(iq,W,WHAT,OBS)
  ! PHOTOLUMINESCENCE
  !--------------------
  if (X_do_obs("PL")) then
-   tmp_var=PL
-   call ELEMENTAL_dump("PL",tmp_var)
+   call ELEMENTAL_dump_real("PL",PL(:,2:3))
  endif
  !
+
+ ! MAGNETOELECTRIC
+ !------------
+if (X_do_obs("MEspin")) call ELEMENTAL_dump("MEspin", X_mespin(:,2:))
+if (X_do_obs("MEorb"))  call ELEMENTAL_dump("MEorb",  X_meorb(:,2:))
+
+
  if (STRING_match(WHAT,"close")) call X_OUTPUT_driver("close "//OBS)
  if (STRING_match(WHAT,"reset")) call X_OUTPUT_driver("reset "//OBS)
  !
@@ -446,4 +455,23 @@ subroutine add_drude_term(spec_io)
      !
    end subroutine add_drude_term
    !
+   subroutine ELEMENTAL_dump_real(OBS_local,F)
+     character(*) :: OBS_local
+     integer      :: iw,I
+     real(SP)     :: F(BSS_n_freqs,3)
+     real(SP)     :: data(4)
+     do iw=1,BSS_n_freqs
+       data(1 )=real(W%p(iw))
+       do I=1,3
+         data(I+1)=F(iw,I)
+       enddo
+       if (STRING_match(WHAT,"IP")) then
+         call X_OUTPUT_driver("WRITE "//OBS_local,data=(/data(1),data(3)/))
+       else
+         if (.not.l_last_col) call X_OUTPUT_driver("WRITE "//OBS_local,data=data(:3))
+         if (     l_last_col) call X_OUTPUT_driver("WRITE "//OBS_local,data=data(:4))
+       endif
+     enddo
+   end subroutine
+   !
 end subroutine
diff --git a/src/output/X_OUTPUT_driver.F b/src/output/X_OUTPUT_driver.F
index de20ff34f0..c8e75e0e39 100644
--- a/src/output/X_OUTPUT_driver.F
+++ b/src/output/X_OUTPUT_driver.F
@@ -14,7 +14,8 @@ subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data,indexes)
  use electrons,     ONLY:n_spinor
  use PHOTOLUM,      ONLY:PL
  use BS,            ONLY:l_BS_kerr,l_BS_magnons,l_BS_dichroism,BS_K_coupling,&
-&                        l_BS_photolum,l_BS_abs,l_BS_jdos,l_BS_esort_indx,l_BS_esort
+&                        l_BS_photolum,l_BS_abs,l_BS_jdos,l_BS_esort_indx,l_BS_esort,&
+&                        l_BS_mespin,l_BS_meorb
  use BS_solvers,    ONLY:l_abs_prop_chi_bse,l_eels_can_be_computed
  use stderr,         ONLY:STRING_match,STRING_split,intc,STRING_remove
  use OUTPUT,         ONLY:OUTPUT_driver
@@ -55,6 +56,8 @@ subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data,indexes)
    call ELEMENTAL_init(14,"CD","Dichroism","dichroism")
    call ELEMENTAL_init(15,"Spm","Magnons (S+-)","magnons")
    call ELEMENTAL_init(16,"Smp","Magnons (S-p)","magnons")
+   call ELEMENTAL_init(17,"MEspin","Magnetoelectric (spin)",  "mespin")
+   call ELEMENTAL_init(18,"MEorb" ,"Magnetoelectric (orbital)","meorb")
  endif
  !
  if (STRING_match(WHAT,"INIT")) then
@@ -88,6 +91,8 @@ subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data,indexes)
    if (l_BS_magnons) call X_obs_onoff("+ Spm")
    if (l_BS_magnons.and.n_spinor==2) call X_obs_onoff("+ Smp")
    if (l_BS_dichroism) call X_obs_onoff("+ CD")
+   if (l_BS_mespin) call X_obs_onoff("+ MEspin")
+   if (l_BS_meorb)  call X_obs_onoff("+ MEorb")
    !
    !...solvers specific
    if (present(solver)) then
@@ -102,7 +107,8 @@ subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data,indexes)
        endif
      endif
      if (STRING_match(solver,"Inversion")) call X_obs_onoff("- Esort E_IP E_INDX_IP kerr magnons jdos")
-     if (STRING_match(solver,"Haydock")) call X_obs_onoff("- Esort jdos E_IP E_INDX_IP")
+     !if (STRING_match(solver,"Haydock")) call X_obs_onoff("- Esort E_IP E_INDX_IP")
+     if (STRING_match(solver,"Haydock")) call X_obs_onoff("- Esort E_IP E_INDX_IP jdos")
      if (STRING_match(solver,"Diago").or.STRING_match(solver,"Slepc")) call X_obs_onoff("- E_IP E_INDX_IP")
    endif
    !
@@ -143,7 +149,7 @@ subroutine X_OUTPUT_driver(WHAT,keys,solver,IQ,IG,DESC,data,indexes)
    endif
    if (X_do_obs("PL")) then
      X_obs(5)%N_columns=3
-     X_obs(5)%COL_header(1)   = 'E'
+     X_obs(5)%COL_header(1)   = 'E [eV]'
      X_obs(5)%COL_header(2:3) = (/'PL  ','PL_o'/)
    endif
    !
diff --git a/src/parallel/PARALLEL_FREQS_setup.F b/src/parallel/PARALLEL_FREQS_setup.F
index cd7bf1ad74..3dcd789b7b 100644
--- a/src/parallel/PARALLEL_FREQS_setup.F
+++ b/src/parallel/PARALLEL_FREQS_setup.F
@@ -5,14 +5,19 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine PARALLEL_FREQS_setup(W,IND,COMM)
  !
  use frequency,   ONLY:w_samp
  use parallel_m,  ONLY:PP_indexes,yMPI_comm,PAR_IND_FREQ_ID,PAR_n_freqs,PP_indexes_reset,&
 &                      PAR_build_index,PAR_FREQS_index
  use parallel_int,ONLY:PARALLEL_index,PARALLEL_live_message
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(w_samp)    :: W
  type(PP_indexes):: IND
diff --git a/src/parallel/PARALLEL_Haydock_VEC_COMMs.F b/src/parallel/PARALLEL_Haydock_VEC_COMMs.F
index 7b9c9da865..410e9bee73 100644
--- a/src/parallel/PARALLEL_Haydock_VEC_COMMs.F
+++ b/src/parallel/PARALLEL_Haydock_VEC_COMMs.F
@@ -4,6 +4,10 @@
 !
 ! Authors (see AUTHORS file for details): MG DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine PARALLEL_Haydock_VEC_COMMs(what)
  !
  ! Set up the mask and communicators for the Haydock part.  
@@ -11,7 +15,9 @@ subroutine PARALLEL_Haydock_VEC_COMMs(what)
  use BS,          ONLY:BS_nT_grps
  use parallel_m,  ONLY:PAR_IND_T_groups,PAR_IND_T_Haydock,&
 &                      PAR_COM_T_Haydock,PAR_COM_WORLD,CREATE_the_COMM,ncpu
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  character(*),       intent(in) :: what     
  !
diff --git a/src/parallel/PARALLEL_MATRIX_distribute.F b/src/parallel/PARALLEL_MATRIX_distribute.F
index cb4c6ccba0..cc93dd625c 100644
--- a/src/parallel/PARALLEL_MATRIX_distribute.F
+++ b/src/parallel/PARALLEL_MATRIX_distribute.F
@@ -49,4 +49,47 @@ subroutine PARALLEL_MATRIX_distribute(COMM,PAR_IND,nb,PAR_index,PAR_ID,PAR_n_ele
    PAR_n_elements=PAR_IND%n_of_elements(COMM%CPU_id+1)
  endif
  !
-end subroutine
+end subroutine PARALLEL_MATRIX_distribute
+!
+!
+subroutine PARALLEL_MATRIX_distribute_cv(COMM,PAR_IND,nb1,nb2,PAR_index,PAR_ID,PAR_n_elements)
+ !
+ use parallel_m,    ONLY:yMPI_comm,PP_indexes
+ use parallel_int,  ONLY:PARALLEL_index
+ use hamiltonian,   ONLY:B_mat_index_cv
+ !
+ implicit none
+ !
+ type(yMPI_comm)   :: COMM
+ type(PP_indexes) :: PAR_IND
+ integer          :: nb1(2),nb2(2)
+ !
+ integer, optional:: PAR_index(:,:)
+ integer, optional:: PAR_ID
+ integer, optional:: PAR_n_elements
+ !
+ ! Work space
+ !
+ integer        :: ib1,ib2,Nel
+ !
+ call PARALLEL_index(PAR_IND,(/ (nb1(2)-nb1(1)+1)*(nb2(2)-nb2(1)+1) /),COMM=COMM,CONSECUTIVE=.TRUE.,NO_EMPTIES=.TRUE.)
+ !
+ if (present(PAR_ID)) PAR_ID=COMM%CPU_id
+ !
+ if (present(PAR_index)) then
+   Nel=PAR_IND%n_of_elements(COMM%CPU_id+1)
+   PAR_index=0
+   Nel=0
+   do ib1=nb1(1),nb1(2)
+     do ib2=nb2(1),nb2(2)
+       if (PAR_IND%element_1D( B_mat_index_cv(ib1,ib2,nb1,nb2) ) ) then
+         Nel=Nel+1
+         PAR_index(ib1,ib2)=Nel
+       endif
+     enddo
+   enddo
+ endif
+ !
+ if (present(PAR_n_elements)) PAR_n_elements=PAR_IND%n_of_elements(COMM%CPU_id+1)
+ !
+end subroutine PARALLEL_MATRIX_distribute_cv
diff --git a/src/parallel/PARALLEL_SETUP_K_scheme.F b/src/parallel/PARALLEL_SETUP_K_scheme.F
index 6a5086c823..93beb30dc5 100644
--- a/src/parallel/PARALLEL_SETUP_K_scheme.F
+++ b/src/parallel/PARALLEL_SETUP_K_scheme.F
@@ -8,7 +8,8 @@
 subroutine PARALLEL_SETUP_K_scheme(PAR_K_scheme_kind)
  !
  use R_lattice,    ONLY:nXkibz,nXkbz
- use parallel_m,   ONLY:COMM_copy,PAR_INDEX_copy,COMM_reset,PP_indexes_reset,PAR_K_scheme
+ use parallel_int, ONLY:PP_redux_wait
+ use parallel_m,   ONLY:COMM_copy,PAR_INDEX_copy,COMM_reset,PP_indexes_reset,PAR_K_scheme,myid
  ! DIP
  use parallel_m,  ONLY:PAR_COM_DIPk_ibz_INDEX,PAR_COM_DIPk_ibz_A2A,PAR_IND_DIPk_ibz,PAR_IND_DIPk_bz,&
  &                     PAR_DIPk_ibz_index,PAR_DIPk_bz_index,PAR_DIPk_nibz,PAR_DIPk_nbz,             &
@@ -24,7 +25,8 @@ subroutine PARALLEL_SETUP_K_scheme(PAR_K_scheme_kind)
  !
  character(*),  intent(in)    :: PAR_K_scheme_kind
  !
- integer :: i1
+ integer :: i1,N
+ integer, allocatable :: tmp_vec(:)
  !
  call COMM_reset(PAR_K_scheme%COM_ibz_INDEX)
  call COMM_reset(PAR_K_scheme%COM_ibz_A2A)
@@ -32,6 +34,7 @@ subroutine PARALLEL_SETUP_K_scheme(PAR_K_scheme_kind)
  call PP_indexes_reset(PAR_K_scheme%IND_bz)
  if (allocated(PAR_K_scheme%ibz_index)) deallocate(PAR_K_scheme%ibz_index)
  if (allocated(PAR_K_scheme%bz_index))  deallocate(PAR_K_scheme%bz_index)
+ if (allocated(PAR_K_scheme%bz_id))     deallocate(PAR_K_scheme%bz_id)
  !
  select case(trim(PAR_K_scheme_kind))
  case ("BZINDX")
@@ -94,8 +97,24 @@ subroutine PARALLEL_SETUP_K_scheme(PAR_K_scheme_kind)
      PAR_K_scheme%ibz_index=PAR_Xk_ibz_index
    endif
    if (allocated(PAR_Xk_bz_index)) then
-     allocate(PAR_K_scheme%bz_index(size(PAR_Xk_bz_index)))
+     N=size(PAR_Xk_bz_index)
+     allocate(PAR_K_scheme%bz_index(N))
+     allocate(PAR_K_scheme%bz_id(N))
      PAR_K_scheme%bz_index=PAR_Xk_bz_index
+     PAR_K_scheme%bz_id=0
+     do i1=1,N
+       if(PAR_Xk_bz_index(i1)==0) cycle
+       PAR_K_scheme%bz_id(i1)=myid
+     enddo
+     call PP_redux_wait(PAR_K_scheme%bz_id)
+     allocate(tmp_vec(N))
+     tmp_vec=-PAR_Xk_bz_index
+     call PP_redux_wait(tmp_vec)
+     do i1=1,N
+       if(PAR_Xk_bz_index(i1)/=0) cycle
+       PAR_K_scheme%bz_index(i1)=tmp_vec(i1)
+     enddo
+     deallocate(tmp_vec)
    endif
    PAR_K_scheme%comm_world=mpi_comm_world
  case ("Kdef")
diff --git a/src/parallel/PARALLEL_Transitions_grouping.F b/src/parallel/PARALLEL_Transitions_grouping.F
index 1f1e3e9d56..4680425938 100644
--- a/src/parallel/PARALLEL_Transitions_grouping.F
+++ b/src/parallel/PARALLEL_Transitions_grouping.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine PARALLEL_Transitions_grouping()
  !
  use pars,          ONLY:SP
@@ -16,8 +20,9 @@ subroutine PARALLEL_Transitions_grouping()
 &                        PAR_COM_Xk_ibz_INDEX,PAR_IND_T_Haydock,PP_indexes_reset
  use R_lattice,     ONLY:nXkibz
  use vec_operate,   ONLY:sort
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  ! Work Space
  !
diff --git a/src/parallel/PARALLEL_WF_distribute.F b/src/parallel/PARALLEL_WF_distribute.F
index 60aa309a06..5ad7b5c9af 100644
--- a/src/parallel/PARALLEL_WF_distribute.F
+++ b/src/parallel/PARALLEL_WF_distribute.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine PARALLEL_WF_distribute(B_index,Bp_index,K_index,B_and_K_index,&
 &                                 QP_index,PLASMA_index,Bm_index,CLEAN_UP)
  !
@@ -22,7 +26,9 @@ subroutine PARALLEL_WF_distribute(B_index,Bp_index,K_index,B_and_K_index,&
  use R_lattice,       ONLY:nkbz
  use collision_ext,   ONLY:GW_NEQ_collisions
 #endif
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  type(PP_indexes), optional    :: K_index
  type(PP_indexes), optional    :: B_index
diff --git a/src/parallel/PARALLEL_WF_index.F b/src/parallel/PARALLEL_WF_index.F
index 15088de036..2de44020e7 100644
--- a/src/parallel/PARALLEL_WF_index.F
+++ b/src/parallel/PARALLEL_WF_index.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine PARALLEL_WF_index(COMM)
  !
  ! Here I used states_to_load to define the WF_linear index.
@@ -16,7 +20,9 @@ subroutine PARALLEL_WF_index(COMM)
 &                          COMM_copy,PAR_COM_density
  use wave_func,       ONLY:states_to_load
  use parallel_int,    ONLY:PP_redux_wait
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  type(yMPI_comm), optional :: COMM
  !
diff --git a/src/parallel/PARALLEL_add_Q_to_K_list.F b/src/parallel/PARALLEL_add_Q_to_K_list.F
index c557524e3d..212133bf8f 100644
--- a/src/parallel/PARALLEL_add_Q_to_K_list.F
+++ b/src/parallel/PARALLEL_add_Q_to_K_list.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine PARALLEL_add_Q_to_K_list(ZONE,IND_in,IND_in_ID,IND_k_out,IND_k_out_ID,&
 &                                   IND_q,COMM,q_range,k,q)
  !
@@ -13,8 +17,9 @@ subroutine PARALLEL_add_Q_to_K_list(ZONE,IND_in,IND_in_ID,IND_k_out,IND_k_out_ID
  use QP_m,          ONLY:QP_n_states,QP_table
  use R_lattice,     ONLY:k_map
  use vec_operate,   ONLY:k_periodic_idx
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  character(*)           ::ZONE
  type(PP_indexes)       ::IND_in,IND_k_out,IND_q
diff --git a/src/parallel/PARALLEL_collisions.F b/src/parallel/PARALLEL_collisions.F
index 2a9a62573d..39501d620d 100644
--- a/src/parallel/PARALLEL_collisions.F
+++ b/src/parallel/PARALLEL_collisions.F
@@ -5,7 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
-subroutine PARALLEL_collisions( k, COLL )
+! headers
+!
+#include<y_memory.h>
+!
+subroutine PARALLEL_collisions( k, COLL_grp )
  !
  ! SC
  !====
@@ -128,45 +132,62 @@ subroutine PARALLEL_collisions( k, COLL )
  !
  use drivers,       ONLY:l_eval_collisions
  use parallel_int,  ONLY:PARALLEL_live_message,PARALLEL_check_phase_space
- use collision_ext, ONLY:COLLISIONS_group,COLL_bands
+ use collision_ext, ONLY:COLLISIONS_group,COLL_bands,COLLISIONS_CV_only
  use parallel_m,    ONLY:PAR_IND_Xk_ibz,PAR_IND_B_mat,PAR_IND_G_k,PAR_IND_Q_bz,PAR_IND_QP,&
 &                        l_par_RT,l_par_SE,l_par_SX,l_par_SC,l_par_NL
- use hamiltonian,   ONLY:B_mat_index
+ use hamiltonian,   ONLY:B_mat_index,B_mat_index_cv
+ use electrons,     ONLY:n_met_bands,n_full_bands
  use R_lattice,     ONLY:bz_samp,qindx_S,nqbz
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp)          :: k
- type(COLLISIONS_group) :: COLL
+ type(COLLISIONS_group) :: COLL_grp
  !
  ! Work Space
  !
- integer :: i_coll,i_k,ib1,ib2,NC_tot,i_q,i_p,i_p_bz
+ integer :: i_coll,i_k,ib1,ib2,NC_tot,i_q,i_p,i_p_bz, &
+ &          blim1(2),blim2(2),nb_cv,i1
  !
- if ( COLL%N == 0 ) return
+ if ( COLL_grp%N == 0 ) return
  !
- YAMBO_ALLOC(COLL%PAR_map,(COLL%N))
- YAMBO_ALLOC(COLL%PAR_IND%element_1D,(COLL%N))
+ YAMBO_ALLOC(COLL_grp%PAR_map,(COLL_grp%N))
+ YAMBO_ALLOC(COLL_grp%PAR_IND%element_1D,(COLL_grp%N))
  !
  if (.not.allocated(PAR_IND_G_k%element_1D).and.l_eval_collisions ) then
    YAMBO_ALLOC(PAR_IND_G_k%element_1D,(k%nibz))
    PAR_IND_G_k%element_1D=.FALSE.
  endif
- COLL%PAR_IND%element_1D=.FALSE.
- COLL%PAR_N=0
- COLL%PAR_map=0
+ COLL_grp%PAR_IND%element_1D=.FALSE.
+ COLL_grp%PAR_N=0
+ COLL_grp%PAR_map=0
  NC_tot=0
  !
- do i_coll=1,COLL%N
+ if (COLLISIONS_CV_only) then
+   blim1=(/COLL_bands(1),maxval(n_met_bands)/)
+   blim2=(/minval(n_full_bands),COLL_bands(2)/)
+   nb_cv=(blim1(2)-blim1(1)+1)*(blim2(2)-blim2(1)+1)
+ endif
+ !
+ do i_coll=1,COLL_grp%N
    !
-   ib1 = COLL%state(i_coll,1)
-   ib2 = COLL%state(i_coll,2)
-   i_k = COLL%state(i_coll,3)
+   ib1 = COLL_grp%state(i_coll,1)
+   ib2 = COLL_grp%state(i_coll,2)
+   i_k = COLL_grp%state(i_coll,3)
    NC_tot=NC_tot+1
    !
    if (l_par_RT) then
      if (.not.PAR_IND_Xk_ibz%element_1D(i_k)) cycle
-     if (.not.PAR_IND_B_mat%element_1D( B_mat_index(ib1,ib2,COLL_bands) ) ) cycle
+     if (COLLISIONS_CV_only) then
+       if (ib1> blim1(2) .and. ib2> blim1(2) ) cycle
+       if (ib1< blim1(1) .and. ib2< blim2(1) ) cycle
+       if (ib1<ib2) i1=B_mat_index_cv(ib1,ib2,blim1,blim2)
+       if (ib1>ib2) i1=B_mat_index_cv(ib2,ib1,blim2,blim1)!+nb_cv
+     else
+       i1=B_mat_index(ib1,ib2,COLL_bands)
+     endif
+     if (.not.PAR_IND_B_mat%element_1D( i1 ) ) cycle
    endif
    !
    if (l_par_NL) then
@@ -178,9 +199,9 @@ subroutine PARALLEL_collisions( k, COLL )
      if (.not.PAR_IND_QP%element_1D(i_coll)) cycle
    endif
    !
-   COLL%PAR_N=COLL%PAR_N+1
-   COLL%PAR_map(i_coll)=COLL%PAR_N
-   COLL%PAR_IND%element_1D(i_coll)=.TRUE.
+   COLL_grp%PAR_N=COLL_grp%PAR_N+1
+   COLL_grp%PAR_map(i_coll)=COLL_grp%PAR_N
+   COLL_grp%PAR_IND%element_1D(i_coll)=.TRUE.
    !
    if (l_eval_collisions) then
      !
@@ -203,22 +224,23 @@ subroutine PARALLEL_collisions( k, COLL )
    !
  enddo
  !
- call PARALLEL_live_message(trim(COLL%name)//" Number of Collisions",LOADED=COLL%PAR_N,TOTAL=NC_tot)
+ call PARALLEL_live_message(trim(COLL_grp%name)//" Number of Collisions",LOADED=COLL_grp%PAR_N,TOTAL=NC_tot)
  !
- call PARALLEL_check_phase_space(COLL%PAR_N,MSG=trim(COLL%name)//" COLLISIONS")
+ call PARALLEL_check_phase_space(COLL_grp%PAR_N,MSG=trim(COLL_grp%name)//" COLLISIONS")
  !
 end subroutine PARALLEL_collisions
 !
 !
-subroutine PARALLEL_collisions_reset(COLL)
+subroutine PARALLEL_collisions_reset(COLL_grp)
  !
  use collision_ext, ONLY:COLLISIONS_group
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
- type(COLLISIONS_group), intent(inout) :: COLL
+ type(COLLISIONS_group), intent(inout) :: COLL_grp
  !
- YAMBO_FREE(COLL%PAR_map)
- YAMBO_FREE(COLL%PAR_IND%element_1D)
+ YAMBO_FREE(COLL_grp%PAR_map)
+ YAMBO_FREE(COLL_grp%PAR_IND%element_1D)
  !
 end subroutine PARALLEL_collisions_reset
diff --git a/src/parallel/PARALLEL_distribute_BZk_using_IBZk.F b/src/parallel/PARALLEL_distribute_BZk_using_IBZk.F
index a6339eece6..33ab88c721 100644
--- a/src/parallel/PARALLEL_distribute_BZk_using_IBZk.F
+++ b/src/parallel/PARALLEL_distribute_BZk_using_IBZk.F
@@ -5,12 +5,17 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include <y_memory.h>
+!
 subroutine PARALLEL_distribute_BZk_using_IBZk(COMM,K,IND_ibz,IBZ_id,IND_out,BZ_id,BZ_index,PAR_n_bz)
  !
  use R_lattice,     ONLY:bz_samp
  use parallel_m,    ONLY:PP_indexes,yMPI_comm
+ use y_memory_alloc
  !
-#include <y_memory.h>
+ implicit none
  !
  type(yMPI_comm)   :: COMM
  type(PP_indexes) :: IND_ibz,IND_out
diff --git a/src/parallel/PARALLEL_global_Non_Linear.F b/src/parallel/PARALLEL_global_Non_Linear.F
index e18fd65e70..fbf6d1c135 100644
--- a/src/parallel/PARALLEL_global_Non_Linear.F
+++ b/src/parallel/PARALLEL_global_Non_Linear.F
@@ -11,6 +11,7 @@ subroutine PARALLEL_global_Non_Linear(E,Xk,q)
  use R_lattice,     ONLY:bz_samp
  use collision_ext, ONLY:HXC_collisions
  use nl_optics,     ONLY:NL_bands
+ use RT_control,    ONLY:SAVE_G_history
  use IO_int,        ONLY:IO_and_Messaging_switch
  use parallel_int,  ONLY:PARALLEL_index,PARALLEL_assign_chains_and_COMMs,PARALLEL_live_message
  use parallel_m,    ONLY:ncpu,COMM_copy,PAR_build_index,PAR_INDEX_copy
@@ -23,7 +24,7 @@ subroutine PARALLEL_global_Non_Linear(E,Xk,q)
  ! INDEX
  use parallel_m,    ONLY:PAR_freqs_index,PAR_Xk_ibz_index,PAR_Xk_bz_index,PAR_QP_index
  ! DIMENSIONS
- use parallel_m,    ONLY:PAR_n_freqs,PAR_nQP,PAR_Xk_nbz,PAR_Xk_nibz
+ use parallel_m,    ONLY:PAR_n_freqs,PAR_nQP,PAR_Xk_nbz,PAR_Xk_nibz,PAR_G_k_range
  ! ID's
  use parallel_m,    ONLY:PAR_IND_freqs_ID,PAR_IND_Q_ibz_ID,PAR_IND_WF_k_ID,&
 &                        PAR_IND_Xk_bz_ID,PAR_IND_Xk_ibz_ID
@@ -38,7 +39,7 @@ subroutine PARALLEL_global_Non_Linear(E,Xk,q)
  !
  ! Work space
  !
- integer      ::  i_qp,ib1,ib2,i_k
+ integer      ::  i_qp,ib1,ib2,i_k,nk_G_IO
  !
  CALL PARALLEL_structure(2,(/"w ","k "/))
  !
@@ -67,7 +68,7 @@ subroutine PARALLEL_global_Non_Linear(E,Xk,q)
  !
  ! K-points
  !
- call PARALLEL_index(PAR_IND_Xk_ibz,(/Xk%nibz/),COMM=PAR_COM_Xk_ibz_INDEX)
+ call PARALLEL_index(PAR_IND_Xk_ibz,(/Xk%nibz/),CONSECUTIVE=SAVE_G_history,COMM=PAR_COM_Xk_ibz_INDEX)
  PAR_IND_Xk_ibz_ID=PAR_COM_Xk_ibz_INDEX%CPU_id
  PAR_Xk_nibz=PAR_IND_Xk_ibz%n_of_elements(PAR_IND_Xk_ibz_ID+1)
  !
@@ -82,7 +83,15 @@ subroutine PARALLEL_global_Non_Linear(E,Xk,q)
  call PARALLEL_distribute_BZk_using_IBZk(PAR_COM_Xk_ibz_INDEX,Xk,PAR_IND_Xk_ibz,PAR_IND_Xk_ibz_ID,&
 &                                                       PAR_IND_Xk_bz, PAR_IND_Xk_bz_ID,&
 &                                                       PAR_Xk_bz_index,PAR_Xk_nbz)
-
+ !
+ nk_G_IO=0
+ PAR_G_k_range=0
+ do i_k = 1, Xk%nibz
+   if (.not.PAR_IND_Xk_ibz%element_1D(i_k) ) cycle
+   if (nk_G_IO==0) PAR_G_k_range=i_k
+   if (nk_G_IO >0) PAR_G_k_range(2)=i_k
+   nk_G_IO=nk_G_IO+1
+ enddo
  !
  ! Overlap indeces for WF distribution
  !
diff --git a/src/parallel/PARALLEL_global_Oscillators.F b/src/parallel/PARALLEL_global_Oscillators.F
index 99c0ab7ff1..8daba10839 100644
--- a/src/parallel/PARALLEL_global_Oscillators.F
+++ b/src/parallel/PARALLEL_global_Oscillators.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): CA MG DS
 !
+! headers
+!
+#include<y_memory.h>
+! 
 subroutine PARALLEL_global_Oscillators(E,Xk,q)
  !
  use electrons,     ONLY:levels
@@ -27,8 +31,9 @@ subroutine PARALLEL_global_Oscillators(E,Xk,q)
 &                        PAR_IND_Xk_bz_ID,PAR_IND_Xk_ibz_ID
  use openmp,        ONLY:n_threads_OSCLL,OPENMP_set_threads
  use QP_m,          ONLY:QP_n_states,QP_table
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)         :: E
  type(bz_samp)        :: Xk,q
diff --git a/src/parallel/PARALLEL_global_Real_Time.F b/src/parallel/PARALLEL_global_Real_Time.F
index ee4a31cd95..2321a58dea 100644
--- a/src/parallel/PARALLEL_global_Real_Time.F
+++ b/src/parallel/PARALLEL_global_Real_Time.F
@@ -5,18 +5,23 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine PARALLEL_global_Real_Time(E,Xk,q,X_type)
  !
  use drivers,       ONLY:l_eval_collisions
- use electrons,     ONLY:levels
+ use electrons,     ONLY:levels,n_met_bands,n_full_bands
  use R_lattice,     ONLY:bz_samp
  use IO_int,        ONLY:IO_and_Messaging_switch
  use openmp,        ONLY:n_threads_RT,OPENMP_set_threads
  use parallel_int,  ONLY:PARALLEL_index,PARALLEL_assign_chains_and_COMMs,&
-&                        PARALLEL_live_message,PARALLEL_MATRIX_distribute
- use collision_ext, ONLY:COH_collisions,HXC_collisions,GW_NEQ_collisions
+&                        PARALLEL_live_message,PARALLEL_MATRIX_distribute,&
+&                        PARALLEL_MATRIX_distribute_cv
+ use collision_ext, ONLY:COH_collisions,HXC_collisions,GW_NEQ_collisions,COLLISIONS_CV_only
  use matrix_operate,ONLY:UP_matrix_index
- use hamiltonian,   ONLY:B_mat_index
+ use hamiltonian,   ONLY:B_mat_index,B_mat_index_cv
  use real_time,     ONLY:RT_bands,l_RT_uses_E_FineGd
  use parallel_m,    ONLY:ncpu,HEAD_QP_cpu,HEAD_k_cpu,HEAD_q_cpu,COMM_copy,PAR_INDEX_copy,&
 &                        PAR_build_index,PP_indexes_reset
@@ -33,8 +38,9 @@ subroutine PARALLEL_global_Real_Time(E,Xk,q,X_type)
  ! ID's
  use parallel_m,    ONLY:PAR_IND_Xk_ibz_ID,PAR_IND_Xk_ibz_ID,PAR_IND_Xk_ibz_ID,&
 &                        PAR_IND_DIPk_ibz_ID,PAR_IND_Q_bz_ID,PAR_IND_WF_b_ID
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)         :: E
  type(bz_samp)        :: Xk,q
@@ -42,7 +48,8 @@ subroutine PARALLEL_global_Real_Time(E,Xk,q,X_type)
  !
  ! Work space
  !
- integer              :: ib1,ib2,ik,nk_G_IO,nk_SERIAL,i_shift
+ integer              :: ib1,ib2,ik,nk_G_IO,nk_SERIAL,i_shift,&
+ &                       blim1(2),blim2(2)
  logical              :: CONSECUTIVE
  !
  CALL PARALLEL_structure(4,(/"k ","b ","q ","qp"/))
@@ -89,12 +96,12 @@ subroutine PARALLEL_global_Real_Time(E,Xk,q,X_type)
  call PAR_INDEX_copy(PAR_IND_Xk_ibz,PAR_IND_WF_k)
  PAR_IND_WF_b_ID=PAR_COM_WF_b_INDEX%CPU_id
  !
- call PARALLEL_index(PAR_IND_B_mat_ordered,(/ UP_matrix_index(1,RT_bands(2)-RT_bands(1)+1)-1 /),&
+ call PARALLEL_index(PAR_IND_B_mat_ordered,(/ UP_matrix_index(1,RT_bands(2)-RT_bands(1)+1) /),&
 &                    COMM=PAR_COM_WF_b_INDEX,CONSECUTIVE=.TRUE.,NO_EMPTIES=.TRUE.)
  !
  call PARALLEL_live_message("Bands Matrix (ordered)",ENVIRONMENT="Real_Time",&
 &                           LOADED=PAR_IND_B_mat_ordered%n_of_elements(PAR_IND_WF_b_ID+1),&
-&                           TOTAL=UP_matrix_index(1,RT_bands(2)-RT_bands(1)+1)-1,&
+&                           TOTAL=UP_matrix_index(1,RT_bands(2)-RT_bands(1)+1),&
 &                           NCPU=PAR_COM_WF_b_INDEX%n_CPU)
  !
  YAMBO_ALLOC(PAR_IND_WF_b%n_of_elements,(PAR_COM_WF_b_INDEX%n_CPU))
@@ -104,7 +111,7 @@ subroutine PARALLEL_global_Real_Time(E,Xk,q,X_type)
  !
  do ib1=RT_bands(1),RT_bands(2)
    do ib2=ib1,RT_bands(2)
-     if (PAR_IND_B_mat_ordered%element_1D(  UP_matrix_index(ib1-RT_bands(1)+1,ib2-RT_bands(1)+1)-1 )) then
+     if (PAR_IND_B_mat_ordered%element_1D(  UP_matrix_index(ib1-RT_bands(1)+1,ib2-RT_bands(1)+1) )) then
        if (.not.PAR_IND_WF_b%element_1D(ib1)) then
          PAR_IND_WF_b%element_1D(ib1)=.TRUE.
          PAR_IND_WF_b%n_of_elements(PAR_IND_WF_b_ID+1)=PAR_IND_WF_b%n_of_elements(PAR_IND_WF_b_ID+1)+1
@@ -148,7 +155,13 @@ subroutine PARALLEL_global_Real_Time(E,Xk,q,X_type)
    !
  endif
  !
- call PARALLEL_MATRIX_distribute(PAR_COM_G_b_INDEX,PAR_IND_B_mat,RT_bands)
+ if (COLLISIONS_CV_only) then
+   blim1=(/RT_bands(1),maxval(n_met_bands)/)
+   blim2=(/minval(n_full_bands),RT_bands(2)/)
+   call PARALLEL_MATRIX_distribute_cv(PAR_COM_G_b_INDEX,PAR_IND_B_mat,blim1,blim2)
+ else
+   call PARALLEL_MATRIX_distribute(PAR_COM_G_b_INDEX,PAR_IND_B_mat,RT_bands)
+ endif
  !
  call PAR_INDEX_copy(PAR_IND_Xk_ibz,PAR_IND_DIPk_ibz)
  PAR_IND_DIPk_ibz_ID=PAR_IND_Xk_ibz_ID
@@ -156,14 +169,24 @@ subroutine PARALLEL_global_Real_Time(E,Xk,q,X_type)
  call PAR_build_index(PAR_IND_DIPk_ibz,Xk%nibz,PAR_DIPk_ibz_index,PAR_DIPk_nibz)
  YAMBO_ALLOC(PAR_Xk_ibz_index,(Xk%nibz))
  call PAR_build_index(PAR_IND_Xk_ibz,Xk%nibz,PAR_Xk_ibz_index,PAR_Xk_nibz)
- do ib1=RT_bands(1),RT_bands(2)
-   do ib2=RT_bands(1),RT_bands(2)
-     if (PAR_IND_B_mat%element_1D( B_mat_index(ib1,ib2,RT_bands) ) ) then
-        PAR_IND_WF_b%element_1D(ib1)=.TRUE.
-        PAR_IND_WF_b%element_1D(ib2)=.TRUE.
-     endif
+ !
+ if (COLLISIONS_CV_only) then
+   do ib1=blim1(1),blim1(2)
+     do ib2=blim2(1),blim2(2)
+       if (.not.PAR_IND_B_mat%element_1D( B_mat_index_cv(ib1,ib2,blim1,blim2) ) ) cycle
+       PAR_IND_WF_b%element_1D(ib1)=.TRUE.
+       PAR_IND_WF_b%element_1D(ib2)=.TRUE.
+     enddo
    enddo
- enddo
+ else
+   do ib1=RT_bands(1),RT_bands(2)
+     do ib2=RT_bands(1),RT_bands(2)
+       if (.not.PAR_IND_B_mat%element_1D( B_mat_index(ib1,ib2,RT_bands) ) ) cycle
+       PAR_IND_WF_b%element_1D(ib1)=.TRUE.
+       PAR_IND_WF_b%element_1D(ib2)=.TRUE.
+     enddo
+   enddo
+ endif
  !
  nk_G_IO=0
  PAR_G_k_range=0
diff --git a/src/parallel/PARALLEL_global_Response_G.F b/src/parallel/PARALLEL_global_Response_G.F
index 3e13f6a888..5e61afd875 100644
--- a/src/parallel/PARALLEL_global_Response_G.F
+++ b/src/parallel/PARALLEL_global_Response_G.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine PARALLEL_global_Response_G(E,Xk,q,ENVIRONMENT,X_type)
  !
  use drivers,       ONLY:l_life
@@ -30,8 +34,9 @@ subroutine PARALLEL_global_Response_G(E,Xk,q,ENVIRONMENT,X_type)
  use parallel_m,    ONLY:PAR_nQ_ibz,PAR_Xk_nbz,PAR_Xk_nibz
  ! ID's
  use parallel_m,    ONLY:PAR_IND_Xk_ibz_ID,PAR_IND_Xk_bz_ID,PAR_IND_Q_ibz_ID
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)         :: E
  type(bz_samp)        :: Xk,q
diff --git a/src/parallel/PARALLEL_global_Response_T.F b/src/parallel/PARALLEL_global_Response_T.F
index 576920ac36..7a2e6cf17a 100644
--- a/src/parallel/PARALLEL_global_Response_T.F
+++ b/src/parallel/PARALLEL_global_Response_T.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine PARALLEL_global_Response_T_base(Xk)
  !
  use R_lattice,     ONLY:bz_samp,nXkbz,nXkibz
@@ -24,8 +28,9 @@ subroutine PARALLEL_global_Response_T_base(Xk)
  use parallel_m,    ONLY:PAR_Kk_nibz,PAR_Xk_nbz,PAR_Xk_nibz
  ! ID's
  use parallel_m,    ONLY:PAR_IND_Kk_ibz_ID,PAR_IND_Xk_ibz_ID,PAR_IND_Xk_bz_ID
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp)        :: Xk
  !
diff --git a/src/parallel/PARALLEL_global_Response_T_transitions.F b/src/parallel/PARALLEL_global_Response_T_transitions.F
index 329565d15d..a683f482ab 100644
--- a/src/parallel/PARALLEL_global_Response_T_transitions.F
+++ b/src/parallel/PARALLEL_global_Response_T_transitions.F
@@ -5,10 +5,14 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine PARALLEL_global_Response_T_transitions(Xk)
  !
  use R_lattice,     ONLY:bz_samp,nXkibz
- use BS,            ONLY:BS_nT_at_k,BS_nT_grps,BS_n_eh_spaces
+ use BS,            ONLY:BS_nT_at_k,BS_nT_grps,BS_n_eh_spaces,l_BSE_kernel_full
  use IO_int,        ONLY:IO_and_Messaging_switch
  use parallel_int,  ONLY:PARALLEL_index,PARALLEL_live_message
  use parallel_m,    ONLY:PAR_n_c_bands,PAR_n_v_bands,PP_indexes_reset,master_cpu
@@ -22,8 +26,9 @@ subroutine PARALLEL_global_Response_T_transitions(Xk)
  use parallel_m,    ONLY:PAR_BS_nT_col_grps
  ! ID's
  use parallel_m,    ONLY:PAR_IND_CON_BANDS_DIP_ID,PAR_IND_VAL_BANDS_DIP_ID
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp)        :: Xk
  !
@@ -70,7 +75,7 @@ subroutine PARALLEL_global_Response_T_transitions(Xk)
  call PP_indexes_reset(PAR_IND_T_ordered)
  !
  call PARALLEL_index(PAR_IND_T_ordered,(/BS_nT_grps,BS_nT_grps/),COMM=PAR_COM_T_INDEX,&
-&                    MASK=PAR_IND_T_groups%element_1D,ORDERED=.TRUE.,NO_EMPTIES=.FALSE.)
+&                    MASK=PAR_IND_T_groups%element_1D,ORDERED=.not.l_BSE_kernel_full,NO_EMPTIES=.FALSE.)
  !
  call PARALLEL_live_message("(e/h)->(e/h)' Transitions (ordered)",ENVIRONMENT="Response_T_space",&
 &                           LOADED=PAR_IND_T_ordered%n_of_elements(PAR_COM_T_INDEX%CPU_id+1),&
diff --git a/src/parallel/PARALLEL_global_Self_Energy.F b/src/parallel/PARALLEL_global_Self_Energy.F
index 78e4b76ef7..f565cf2f79 100644
--- a/src/parallel/PARALLEL_global_Self_Energy.F
+++ b/src/parallel/PARALLEL_global_Self_Energy.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine PARALLEL_global_Self_Energy(E,Xk,q,ENVIRONMENT)
  !
  use electrons,     ONLY:levels
@@ -51,8 +55,9 @@ subroutine PARALLEL_global_Self_Energy(E,Xk,q,ENVIRONMENT)
  use drivers,       ONLY:l_elph_corr
  use ELPH,          ONLY:elph_use_q_grid
 #endif
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)         :: E
  type(bz_samp)        :: Xk,q
diff --git a/src/parallel/PARALLEL_global_defaults.F b/src/parallel/PARALLEL_global_defaults.F
index e8d6d49b08..3ec97a5de9 100644
--- a/src/parallel/PARALLEL_global_defaults.F
+++ b/src/parallel/PARALLEL_global_defaults.F
@@ -35,7 +35,7 @@ subroutine PARALLEL_global_defaults(ENVIRONMENT)
  !
  if (ncpu>=8 ) n_par_la=4
  if (ncpu>=32) n_par_la=16
- if (ncpu>=64) n_par_la=36
+!if (ncpu>=64) n_par_la=36
  !
 #endif
  !
diff --git a/src/parallel/PARALLEL_global_dimensions.F b/src/parallel/PARALLEL_global_dimensions.F
index a8eb0777bc..4aecdd625b 100644
--- a/src/parallel/PARALLEL_global_dimensions.F
+++ b/src/parallel/PARALLEL_global_dimensions.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine PARALLEL_global_dimensions(E,Xk,q,ENVIRONMENT)
  !
  use com,           ONLY:secnm
@@ -35,7 +39,9 @@ subroutine PARALLEL_global_dimensions(E,Xk,q,ENVIRONMENT)
  use electrons,     ONLY:levels
  use R_lattice,     ONLY:bz_samp,nqibz,nqbz
  use QP_m,          ONLY:QP_n_states,QP_nb,QP_n_G_bands
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  type(levels)         :: E
  type(bz_samp)        :: Xk,q
diff --git a/src/parallel/PARALLEL_global_indexes.F b/src/parallel/PARALLEL_global_indexes.F
index b8bdbd78e9..5c5c0053b7 100644
--- a/src/parallel/PARALLEL_global_indexes.F
+++ b/src/parallel/PARALLEL_global_indexes.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine PARALLEL_global_indexes(E,Xk,q,ENVIRONMENT,X,Dip,RESET,Dip_limits_pre_defined)
  !
  ! LOGICALS
@@ -26,8 +30,9 @@ subroutine PARALLEL_global_indexes(E,Xk,q,ENVIRONMENT,X,Dip,RESET,Dip_limits_pre
  use electrons,     ONLY:levels
  use R_lattice,     ONLY:bz_samp
  use pars,          ONLY:SP
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)               ::E
  type(bz_samp)              ::Xk,q
@@ -109,7 +114,7 @@ subroutine PARALLEL_global_indexes(E,Xk,q,ENVIRONMENT,X,Dip,RESET,Dip_limits_pre
      Do_dip_limits=.not.Dip_limits_pre_defined
    endif
    if (Do_dip_limits) then
-     if (Dip%bands_ordered.or.Dip%Energy_treshold<0._SP) then
+     if (Dip%bands_ordered.or.Dip%Energy_threshold<0._SP) then
        Dip%ib_lim(1)=maxval(E%nbm)
        Dip%ib_lim(2)=minval(E%nbf)+1
        if (l_X_terminator) Dip%ib_lim(2)=Dip%ib(1)
diff --git a/src/parallel/PARALLEL_global_reset.F b/src/parallel/PARALLEL_global_reset.F
index f2be43d976..8bdfe661e1 100644
--- a/src/parallel/PARALLEL_global_reset.F
+++ b/src/parallel/PARALLEL_global_reset.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include <y_memory.h>
+!
 subroutine PARALLEL_global_reset(ENVIRONMENT,X_type)
  !
  use R_lattice,     ONLY:bz_samp
@@ -54,8 +58,9 @@ subroutine PARALLEL_global_reset(ENVIRONMENT,X_type)
 #if defined _SCALAPACK
  use SLK_m,         ONLY:SLK_COM_INDEX,SLK_COM_A2A
 #endif
+ use y_memory_alloc
  !
-#include <y_memory.h>
+ implicit none
  !
  type(bz_samp)        :: Xk
  character(*)         :: ENVIRONMENT
diff --git a/src/parallel/PARALLEL_index.F b/src/parallel/PARALLEL_index.F
index b0718f9c79..f51cfabdb9 100644
--- a/src/parallel/PARALLEL_index.F
+++ b/src/parallel/PARALLEL_index.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine PARALLEL_index(px,uplim,low_range,COMM,CONSECUTIVE,ORDERED,NO_EMPTIES,MASK)
  !
  use pars,        ONLY:SP
@@ -13,8 +17,9 @@ subroutine PARALLEL_index(px,uplim,low_range,COMM,CONSECUTIVE,ORDERED,NO_EMPTIES
  use parallel_int,ONLY:PP_redux_wait
  use stderr,      ONLY:intc
  use vec_operate, ONLY:sort
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(PP_indexes)        ::px
  integer                 ::uplim(:)
diff --git a/src/parallel/PARALLEL_scheme_initialize.F b/src/parallel/PARALLEL_scheme_initialize.F
index 8b6100a192..d2d4bf1eed 100644
--- a/src/parallel/PARALLEL_scheme_initialize.F
+++ b/src/parallel/PARALLEL_scheme_initialize.F
@@ -5,12 +5,17 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine PARALLEL_scheme_initialize(WHAT,ENVIRONMENT,PARALL_PARs,N,TABLE,MATRIX)
  !
  use parallel_m,   ONLY:PAR_scheme
  use parallel_int, ONLY:PARALLEL_live_message,PARALLEL_index,PARALLEL_MATRIX_distribute
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  character(*)               :: WHAT
  character(*)               :: ENVIRONMENT
diff --git a/src/parser/PARSER.c b/src/parser/PARSER.c
index 7e00c0eb6a..776e4d0378 100644
--- a/src/parser/PARSER.c
+++ b/src/parser/PARSER.c
@@ -17,303 +17,391 @@
  02111-1307, USA.
 */
 
-#include <stdio.h>
-#include <stdlib.h>
 #include <ctype.h>
 #include <math.h>
 #include <parser.h>
-#include <symbols.h>
+#include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
+#include <symbols.h>
 
 static FILE *fout;
 
 static char *str_trim(char *in)
 {
-	char *c, *s = in;
-
-	for(c=s; isspace(*c); c++);
-	for(; *c != '\0'; *s++=*c++);
-	if (s != in)
-	{
-		for(s--; isspace(*s); s--);
-		*(s+1) = '\0';
-	}
-	else *s = '\0';
-	return in;
+    char *c, *s = in;
+
+    for (c = s; isspace(*c); c++);
+    for (; *c != '\0'; *s++ = *c++);
+    if (s != in)
+    {
+        for (s--; isspace(*s); s--);
+        *(s + 1) = '\0';
+    }
+    else
+    {
+        *s = '\0';
+    }
+    return in;
 }
 
 static int parse_get_line(FILE *f, char **s, int *length)
 {
-	int i, c;
-
-	i = 0;
-	do{
-		c = getc(f);
-		if(c == '#') /* skip comments */
-			while(c!=EOF && c!='\n') c = getc(f);
-		else if(c != EOF){
-			if (i == *length - 1){
-				*length *= 2;
-				*s = (char *)realloc(*s, *length + 1);
-			}
-			(*s)[i++] = c;
-		}
-	}while(c != EOF && c != '\n');
-	(*s)[i] = '\0';
-	
-	str_trim(*s);
-	return c;
+    int i, c;
+
+    i = 0;
+    do
+    {
+        c = getc(f);
+        if (c == '#') /* skip comments */
+        {
+            while (c != EOF && c != '\n')
+            {
+                c = getc(f);
+            }
+        }
+        else if (c != EOF)
+        {
+            if (i == *length - 1)
+            {
+                *length *= 2;
+                *s = (char *)realloc(*s, *length + 1);
+            }
+            (*s)[i++] = c;
+        }
+    } while (c != EOF && c != '\n');
+    (*s)[i] = '\0';
+
+    str_trim(*s);
+    return c;
 }
 
 int parse_init(char *file_in, char *file_out)
 {
-	FILE *f;
-	char *s;
-	int c, length = 0;
-
-	sym_init_table();
-
-	if(strcmp(file_in, "-") == 0)
-		f = stdin;
-	else
-		f = fopen(file_in, "r");
-
-	if(strcmp(file_out, "-") == 0)
-		fout = stdout;
-	else{
-		fout = fopen(file_out, "w");
-		setvbuf(fout, NULL, _IONBF, 0);
-	}
-	/* fprintf(fout, "\n => Parser started\n\n");*/
-
-	if(!f)
-		return -1; /* error opening file */
-
-	/* we now read in the file and parse */
-	length = 40;
-	s = (char *)malloc(length + 1);
-	do{
-		c = parse_get_line(f, &s, &length);
-		if(*s){
-			if(*s == '%'){ /* we have a block */
-				*s = ' ';
-				str_trim(s);
-				if(getsym(s) != NULL){ /* error */
-					/*fprintf(stderr, "%s \"%s\" %s", "Block", s, "already defined");*/
-					do{ /* skip block */
-						c = parse_get_line(f, &s, &length);
-					}while(c != EOF && *s != '%');
-				}else{ /* parse block */
-					symrec *rec;
-					rec = putsym(s, S_BLOCK);
-					rec->value.block = (sym_block *)malloc(sizeof(sym_block));
-					rec->value.block->n = 0;
-					rec->value.block->lines = NULL;
-					do{
-						c = parse_get_line(f, &s, &length);
-						if(*s && *s != '%'){
-							char *s1, *tok;
-							int l, col;
-
-							l = rec->value.block->n;
-							rec->value.block->n++;
-							rec->value.block->lines = (sym_block_line *)
-								realloc((void *)rec->value.block->lines, sizeof(sym_block_line)*(l+1));
-							rec->value.block->lines[l].n = 0;
-							rec->value.block->lines[l].fields = NULL;
-
-							/* parse columns */
-							for(s1 = s; (tok = strtok(s1, "|")) != NULL; s1 = NULL){
-								char *tok2 = strdup(tok);
-								str_trim(tok2);
-
-								col = rec->value.block->lines[l].n;
-								rec->value.block->lines[l].n++;
-								rec->value.block->lines[l].fields = (char **)
-									realloc((void *)rec->value.block->lines[l].fields, sizeof(char *)*(col+1));
-								rec->value.block->lines[l].fields[col] = tok2;
-							}
-						}
-					}while(c != EOF && *s != '%');
-				}
-			}else{ /* we can parse it np */
-				parse_result c;
-				parse_exp(s, &c);
-			}
-		}
-	}while(c != EOF);
-
-	free(s);
-	if(f != stdin)
-		fclose(f);
-
-	sym_clear_reserved();
-
-	return 0;
+    FILE *f;
+    char *s;
+    int c, length = 0;
+
+    sym_init_table();
+
+    if (strcmp(file_in, "-") == 0)
+    {
+        f = stdin;
+    }
+    else
+    {
+        f = fopen(file_in, "r");
+    }
+
+    if (strcmp(file_out, "-") == 0)
+    {
+        fout = stdout;
+    }
+    else
+    {
+        fout = fopen(file_out, "w");
+        setvbuf(fout, NULL, _IONBF, 0);
+    }
+    /* fprintf(fout, "\n => Parser started\n\n");*/
+
+    if (!f)
+    {
+        return -1; /* error opening file */
+    }
+
+    /* we now read in the file and parse */
+    length = 40;
+    s = (char *)malloc(length + 1);
+    do
+    {
+        c = parse_get_line(f, &s, &length);
+        if (*s)
+        {
+            if (*s == '%')
+            { /* we have a block */
+                *s = ' ';
+                str_trim(s);
+                if (getsym(s) != NULL)
+                { /* error */
+                    /*fprintf(stderr, "%s \"%s\" %s", "Block", s, "already
+                     * defined");*/
+                    do
+                    { /* skip block */
+                        c = parse_get_line(f, &s, &length);
+                    } while (c != EOF && *s != '%');
+                }
+                else
+                { /* parse block */
+                    symrec *rec;
+                    rec = putsym(s, S_BLOCK);
+                    rec->value.block = (sym_block *)malloc(sizeof(sym_block));
+                    rec->value.block->n = 0;
+                    rec->value.block->lines = NULL;
+                    do
+                    {
+                        c = parse_get_line(f, &s, &length);
+                        if (*s && *s != '%')
+                        {
+                            char *s1, *tok;
+                            int l, col;
+
+                            l = rec->value.block->n;
+                            rec->value.block->n++;
+                            rec->value.block->lines = (sym_block_line *)realloc(
+                                (void *)rec->value.block->lines,
+                                sizeof(sym_block_line) * (l + 1));
+                            rec->value.block->lines[l].n = 0;
+                            rec->value.block->lines[l].fields = NULL;
+
+                            /* parse columns */
+                            for (s1 = s; (tok = strtok(s1, "|")) != NULL;
+                                 s1 = NULL)
+                            {
+                                char *tok2 = strdup(tok);
+                                str_trim(tok2);
+
+                                col = rec->value.block->lines[l].n;
+                                rec->value.block->lines[l].n++;
+                                rec->value.block->lines[l]
+                                    .fields = (char **)realloc(
+                                    (void *)rec->value.block->lines[l].fields,
+                                    sizeof(char *) * (col + 1));
+                                rec->value.block->lines[l].fields[col] = tok2;
+                            }
+                        }
+                    } while (c != EOF && *s != '%');
+                }
+            }
+            else
+            { /* we can parse it np */
+                parse_result c;
+                parse_exp(s, &c);
+            }
+        }
+    } while (c != EOF);
+
+    free(s);
+    if (f != stdin)
+    {
+        fclose(f);
+    }
+
+    sym_clear_reserved();
+
+    return 0;
 }
 
 void parse_end()
 {
-	sym_end_table();
-	/* fprintf(fout, "\n => Parser ended"); */
-	if(fout != stdout)
-		fclose(fout);
+    sym_end_table();
+    /* fprintf(fout, "\n => Parser ended"); */
+    if (fout != stdout)
+    {
+        fclose(fout);
+    }
 }
 
 int parse_isdef(char *name)
 {
-	if(getsym(name) == NULL)
-		return 0;
-	return 1;
+    if (getsym(name) == NULL)
+    {
+        return 0;
+    }
+    return 1;
 }
 
 int parse_int(char *name, int def)
 {
-	symrec *ptr;
-	int ret;
-
-	ptr = getsym(name);	
-	if(ptr && ptr->type == S_CMPLX){
-		ret = (int)rint(GSL_REAL(ptr->value.c));
-		/*fprintf(fout,"    [parser] %s = %d\n", name, ret);*/
-	}else{
-		ret = def;
-		/*fprintf(fout, "    [parser] %s = %d\t[default value]\n", name, ret);*/
-	}
-	return ret;
+    symrec *ptr;
+    int ret;
+
+    ptr = getsym(name);
+    if (ptr && ptr->type == S_CMPLX)
+    {
+        ret = (int)rint(GSL_REAL(ptr->value.c));
+        /*fprintf(fout,"    [parser] %s = %d\n", name, ret);*/
+    }
+    else
+    {
+        ret = def;
+        /*fprintf(fout, "    [parser] %s = %d\t[default value]\n", name, ret);*/
+    }
+    return ret;
 }
 
 double parse_double(char *name, double def)
 {
-	symrec *ptr;
-	double ret;
-
-	ptr = getsym(name);	
-	if(ptr && ptr->type == S_CMPLX){
-		ret = GSL_REAL(ptr->value.c);
-		/*fprintf(fout, "    [parser] %s = %g\n", name, ret);*/
-	}else{
-		ret = def;
-		/*fprintf(fout, "    [parser] %s = %g\t[default value]\n", name, ret);*/
-	}
-	return ret;
+    symrec *ptr;
+    double ret;
+
+    ptr = getsym(name);
+    if (ptr && ptr->type == S_CMPLX)
+    {
+        ret = GSL_REAL(ptr->value.c);
+        /*fprintf(fout, "    [parser] %s = %g\n", name, ret);*/
+    }
+    else
+    {
+        ret = def;
+        /*fprintf(fout, "    [parser] %s = %g\t[default value]\n", name, ret);*/
+    }
+    return ret;
 }
 
 gsl_complex parse_complex(char *name, gsl_complex def)
 {
-	symrec *ptr;
-	gsl_complex ret;
-
-	ptr = getsym(name);	
-	if(ptr && ptr->type == S_CMPLX){
-		ret = ptr->value.c;
-		/*fprintf(fout, "    [parser] %s = (%g, %g)\n", name, GSL_REAL(ret), GSL_IMAG(ret));*/
-	}else{
-		ret = def;
-		/*fprintf(fout, "    [parser] %s = (%g, %g)\t[default value]\n", name, GSL_REAL(ret), GSL_IMAG(ret));*/
-	}
-	return ret;
+    symrec *ptr;
+    gsl_complex ret;
+
+    ptr = getsym(name);
+    if (ptr && ptr->type == S_CMPLX)
+    {
+        ret = ptr->value.c;
+        /*fprintf(fout, "    [parser] %s = (%g, %g)\n", name, GSL_REAL(ret),
+         * GSL_IMAG(ret));*/
+    }
+    else
+    {
+        ret = def;
+        /*fprintf(fout, "    [parser] %s = (%g, %g)\t[default value]\n", name,
+         * GSL_REAL(ret), GSL_IMAG(ret));*/
+    }
+    return ret;
 }
 
 char *parse_string(char *name, char *def)
 {
-	symrec *ptr;
-	char *ret;
-
-	ptr = getsym(name);	
-	if(ptr && ptr->type == S_STR){
-		ret = ptr->value.str;
-		/*fprintf(fout, "    [parser] %s = \"%s\"\n", name, ret);*/
-	}else{
-		ret = def;
-		/*fprintf(fout, "    [parser] %s = \"%s\"\t[default value]\n", name, ret);*/
-	}
-	return ret;
+    symrec *ptr;
+    char *ret;
+
+    ptr = getsym(name);
+    if (ptr && ptr->type == S_STR)
+    {
+        ret = ptr->value.str;
+        /*fprintf(fout, "    [parser] %s = \"%s\"\n", name, ret);*/
+    }
+    else
+    {
+        ret = def;
+        /*fprintf(fout, "    [parser] %s = \"%s\"\t[default value]\n", name,
+         * ret);*/
+    }
+    return ret;
 }
 
 int parse_block_n(char *name)
 {
-	symrec *ptr;
-
-	ptr = getsym(name);
-	if(ptr && ptr->type == S_BLOCK){
-		return ptr->value.block->n;
-	}else
-		return 0;
+    symrec *ptr;
+
+    ptr = getsym(name);
+    if (ptr && ptr->type == S_BLOCK)
+    {
+        return ptr->value.block->n;
+    }
+    else
+    {
+        return 0;
+    }
 }
 
 static int parse_block_work(char *name, int l, int col, parse_result *r)
 {
-	symrec *ptr;
-
-	ptr = getsym(name);
-	if(ptr && ptr->type == S_BLOCK){
-		if(l < 0 || l >= ptr->value.block->n)
-			return -2; /* dimension error */
-		if(col < 0 || col >= ptr->value.block->lines[l].n)
-			return -2;
-
-		return parse_exp(ptr->value.block->lines[l].fields[col], r);
-	}else
-		return -1;
+    symrec *ptr;
+
+    ptr = getsym(name);
+    if (ptr && ptr->type == S_BLOCK)
+    {
+        if (l < 0 || l >= ptr->value.block->n)
+        {
+            return -2; /* dimension error */
+        }
+        if (col < 0 || col >= ptr->value.block->lines[l].n)
+        {
+            return -2;
+        }
+
+        return parse_exp(ptr->value.block->lines[l].fields[col], r);
+    }
+    else
+    {
+        return -1;
+    }
 }
 
 int parse_block_int(char *name, int l, int col, int *r)
 {
-	int o;
-	parse_result pr;
-
-	o = parse_block_work(name, l, col, &pr);
-
-	if(o == 0 && pr.type == PR_CMPLX){
-		*r = (int)rint(GSL_REAL(pr.value.c));
-		/*fprintf(fout, "    [parser] %s(%d, %d) = %d\n", name, l, col, *r);*/
-		return 0;
-	}else
-		return o;
+    int o;
+    parse_result pr;
+
+    o = parse_block_work(name, l, col, &pr);
+
+    if (o == 0 && pr.type == PR_CMPLX)
+    {
+        *r = (int)rint(GSL_REAL(pr.value.c));
+        /*fprintf(fout, "    [parser] %s(%d, %d) = %d\n", name, l, col, *r);*/
+        return 0;
+    }
+    else
+    {
+        return o;
+    }
 }
 
 int parse_block_double(char *name, int l, int col, double *r)
 {
-	int o;
-	parse_result pr;
-
-	o = parse_block_work(name, l, col, &pr);
-
-	if(o == 0 && pr.type == PR_CMPLX){
-		*r = GSL_REAL(pr.value.c);
-		/*fprintf(fout, "    [parser] %s(%d, %d) = %g\n", name, l, col, *r);*/
-		return 0;
-	}else
-		return o;
+    int o;
+    parse_result pr;
+
+    o = parse_block_work(name, l, col, &pr);
+
+    if (o == 0 && pr.type == PR_CMPLX)
+    {
+        *r = GSL_REAL(pr.value.c);
+        /*fprintf(fout, "    [parser] %s(%d, %d) = %g\n", name, l, col, *r);*/
+        return 0;
+    }
+    else
+    {
+        return o;
+    }
 }
 
 int parse_block_complex(char *name, int l, int col, gsl_complex *r)
 {
-	int o;
-	parse_result pr;
-
-	o = parse_block_work(name, l, col, &pr);
-
-	if(o == 0 && pr.type == PR_CMPLX){
-		*r = pr.value.c;
-		/*fprintf(fout, "    [parser] %s(%d, %d) = (%g,%g)\n", name, l, col, GSL_REAL(*r), GSL_IMAG(*r));*/
-		return 0;
-	}else
-		return o;
+    int o;
+    parse_result pr;
+
+    o = parse_block_work(name, l, col, &pr);
+
+    if (o == 0 && pr.type == PR_CMPLX)
+    {
+        *r = pr.value.c;
+        /*fprintf(fout, "    [parser] %s(%d, %d) = (%g,%g)\n", name, l, col,
+         * GSL_REAL(*r), GSL_IMAG(*r));*/
+        return 0;
+    }
+    else
+    {
+        return o;
+    }
 }
 
 int parse_block_string(char *name, int l, int col, char **r)
 {
-	int o;
-	parse_result pr;
-
-	o = parse_block_work(name, l, col, &pr);
-
-	if(o == 0 && pr.type == PR_STR){
-		*r = pr.value.s;
-		/*fprintf(fout, "    [parser] %s(%d, %d) = \"%s\"\n", name, l, col, *r);*/
-		return 0;
-	}else
-		return o;
+    int o;
+    parse_result pr;
+
+    o = parse_block_work(name, l, col, &pr);
+
+    if (o == 0 && pr.type == PR_STR)
+    {
+        *r = pr.value.s;
+        /*fprintf(fout, "    [parser] %s(%d, %d) = \"%s\"\n", name, l, col,
+         * *r);*/
+        return 0;
+    }
+    else
+    {
+        return o;
+    }
 }
diff --git a/src/parser/PARSER_exp.c b/src/parser/PARSER_exp.c
index 97c0ff1b03..500b756519 100644
--- a/src/parser/PARSER_exp.c
+++ b/src/parser/PARSER_exp.c
@@ -21,20 +21,20 @@
 #include <gsl_complex.h>
 #include <gsl_complex_math.h>
 #include <parser.h>
-#include <symbols.h>
 #include <stdlib.h>
 #include <string.h>
+#include <symbols.h>
 
 static char *par_string;
 static int par_pos;
 parse_result par_res;
 
 int yylex();
-int yyerror (char *s)  /* Called by yyparse on error */
+int yyerror(char *s) /* Called by yyparse on error */
 {
-  /* Do nothing */
-	/* printf("%s\n", s); */
-	return 0;
+    /* Do nothing */
+    /* printf("%s\n", s); */
+    return 0;
 }
 
 /* include the parser */
@@ -42,99 +42,135 @@ int yyerror (char *s)  /* Called by yyparse on error */
 
 int parse_exp(char *exp, parse_result *r)
 {
-	int o;
-
-	par_string = exp;
-	par_pos = 0;
-
-	o = yyparse();
-	if(o == 0){
-		r->type = par_res.type;
-		if(r->type == PR_CMPLX)
-			r->value.c = par_res.value.c;
-		else
-			r->value.s = par_res.value.s;
-	}
-	return o;
+    int o;
+
+    par_string = exp;
+    par_pos = 0;
+
+    o = yyparse();
+    if (o == 0)
+    {
+        r->type = par_res.type;
+        if (r->type == PR_CMPLX)
+        {
+            r->value.c = par_res.value.c;
+        }
+        else
+        {
+            r->value.s = par_res.value.s;
+        }
+    }
+    return o;
 }
 
 int get_real(char *s, double *d)
 {
-	int n=0;
-	sscanf(s, "%lg", d);
-	while(*s && (isdigit(*s) || *s=='.' || *s=='e' || *s=='E')){
-		if((*s=='e' || *s=='E') && (*(s+1)=='+' || *(s+1)=='-')) {s++; n++;}
-		s++; n++;
-	}
-	return n;
+    int n = 0;
+    sscanf(s, "%lg", d);
+    while (*s && (isdigit(*s) || *s == '.' || *s == 'e' || *s == 'E'))
+    {
+        if ((*s == 'e' || *s == 'E') && (*(s + 1) == '+' || *(s + 1) == '-'))
+        {
+            s++;
+            n++;
+        }
+        s++;
+        n++;
+    }
+    return n;
 }
 
-int yylex (){
-	int c;
-	static char *symbuf = 0;
-	static int length = 0;
-     
-	/* Ignore whitespace, get first nonwhite character.  */
-	while ((c = par_string[par_pos++]) == ' ' || c == '\t');
-	
-	if (c == '\0')
-		return '\n';
-	
-	/* Char starts a number => parse the number.         */
-	if (c == '.' || isdigit (c)){
-		par_pos--;
-		par_pos += get_real(&par_string[par_pos], &GSL_REAL(yylval.val));
-		return NUM;
-	}
-     
-	/* Char starts an identifier => read the name.       */
-	if (isalpha (c) || c == '\'' || c == '\"'){
-		symrec *s;
-		char startc = c;
-		int i;
-		
-		/* Initially make the buffer long enough
-			 for a 40-character symbol name.  */
-		if (length == 0)
-			length = 40, symbuf = (char *)malloc (length + 1);
-		
-		if(startc == '\'' || startc == '\"')
-			c = par_string[par_pos++];
-		else
-			startc = 0; /* false */
-
-		i = 0;
-		do{
-			/* If buffer is full, make it bigger.        */
-			if (i == length){
-				length *= 2;
-				symbuf = (char *)realloc (symbuf, length + 1);
-			}
-			/* Add this character to the buffer.         */
-			symbuf[i++] = c;
-			/* Get another character.                    */
-			c = par_string[par_pos++];
-		}while (c != '\0' && ((startc && c!=startc) || 
-				      (!startc && (isalnum(c) || c == '_' ))));
-		
-		if(!startc) par_pos--;
-		symbuf[i] = '\0';
-		
-		if(!startc){
-			s = getsym (symbuf);
-			if (s == 0)
-				s = putsym (symbuf, S_CMPLX);
-			yylval.tptr = s;
-			if(s->type == S_CMPLX)
-				return VAR;
-			else
-				return FNCT;
-		}else{
-			yylval.str = strdup(symbuf);
-			return STR;
-		}
-	}
-	
-	/* Any other character is a token by itself.        */
-	return c;
+int yylex()
+{
+    int c;
+    static char *symbuf = 0;
+    static int length = 0;
+
+    /* Ignore whitespace, get first nonwhite character.  */
+    while ((c = par_string[par_pos++]) == ' ' || c == '\t');
+
+    if (c == '\0')
+    {
+        return '\n';
+    }
+
+    /* Char starts a number => parse the number.         */
+    if (c == '.' || isdigit(c))
+    {
+        par_pos--;
+        par_pos += get_real(&par_string[par_pos], &GSL_REAL(yylval.val));
+        return NUM;
+    }
+
+    /* Char starts an identifier => read the name.       */
+    if (isalpha(c) || c == '\'' || c == '\"')
+    {
+        symrec *s;
+        char startc = c;
+        int i;
+
+        /* Initially make the buffer long enough
+                 for a 40-character symbol name.  */
+        if (length == 0)
+        {
+            length = 40, symbuf = (char *)malloc(length + 1);
+        }
+
+        if (startc == '\'' || startc == '\"')
+        {
+            c = par_string[par_pos++];
+        }
+        else
+        {
+            startc = 0; /* false */
+        }
+
+        i = 0;
+        do
+        {
+            /* If buffer is full, make it bigger.        */
+            if (i == length)
+            {
+                length *= 2;
+                symbuf = (char *)realloc(symbuf, length + 1);
+            }
+            /* Add this character to the buffer.         */
+            symbuf[i++] = c;
+            /* Get another character.                    */
+            c = par_string[par_pos++];
+        } while (c != '\0' && ((startc && c != startc) ||
+                               (!startc && (isalnum(c) || c == '_'))));
+
+        if (!startc)
+        {
+            par_pos--;
+        }
+        symbuf[i] = '\0';
+
+        if (!startc)
+        {
+            s = getsym(symbuf);
+            if (s == 0)
+            {
+                s = putsym(symbuf, S_CMPLX);
+            }
+            yylval.tptr = s;
+            if (s->type == S_CMPLX)
+            {
+                return VAR;
+            }
+            else
+            {
+                return FNCT;
+            }
+        }
+        else
+        {
+            yylval.str = strdup(symbuf);
+            return STR;
+        }
+    }
+
+    /* Any other character is a token by itself.        */
+    return c;
 }
diff --git a/src/parser/PARSER_interface.c b/src/parser/PARSER_interface.c
index 79f4220259..152bbffe5e 100644
--- a/src/parser/PARSER_interface.c
+++ b/src/parser/PARSER_interface.c
@@ -17,104 +17,100 @@
  02111-1307, USA.
 */
 
-#include <string.h>
-#include <wrapper.h>
 #include <ctype.h>
 #include <math.h>
 #include <parser.h>
+#include <string.h>
 #include <symbols.h>
+#include <wrapper.h>
 
 /* Interface to the parsing routines */
-int C_FUNC(iparse_init, IPARSE_INIT)
-		 (char *in, char *out)
-{ 
-	return parse_init(in, out); 
+int C_FUNC(iparse_init, IPARSE_INIT)(char *in, char *out)
+{
+    return parse_init(in, out);
 }
 
-void C_FUNC(iparse_end, IPARSE_END)
-		 ()
-{ 
-	parse_end(); 
-}
+void C_FUNC(iparse_end, IPARSE_END)() { parse_end(); }
 
-int C_FUNC(iparse_isdef, IPARSE_ISDEF)
-		 (char *name)
-{ 
-	return parse_isdef(name); 
-}
+int C_FUNC(iparse_isdef, IPARSE_ISDEF)(char *name) { return parse_isdef(name); }
 
-void C_FUNC(iparse_int, IPARSE_INT)
-		 (char *name, int *def, int *res)
-{ 
-	*res = parse_int(name, *def); 
+void C_FUNC(iparse_int, IPARSE_INT)(char *name, int *def, int *res)
+{
+    *res = parse_int(name, *def);
 }
 
-void C_FUNC(iparse_double, IPARSE_DOUBLE)
-		 (char *name, double *def, double *res)
+void C_FUNC(iparse_double, IPARSE_DOUBLE)(char *name, double *def, double *res)
 {
-	*res = parse_double(name, *def); 
+    *res = parse_double(name, *def);
 }
 
-void C_FUNC(iparse_complex, IPARSE_COMPLEX)
-		 (char *name, gsl_complex *def, gsl_complex *res)
+void C_FUNC(iparse_complex, IPARSE_COMPLEX)(char *name, gsl_complex *def,
+                                            gsl_complex *res)
 {
-	*res = parse_complex(name, *def); 
+    *res = parse_complex(name, *def);
 }
 
-void C_FUNC(iparse_string, IPARSE_STRING)
-		 (char *name, char *def, char *res)
+void C_FUNC(iparse_string, IPARSE_STRING)(char *name, char *def, char *res)
 {
-	char *c = parse_string(name, def);
-	int len = strlen(c);
-	strcpy(res, c);
-	res[len] = res[len + 1];
-        /*fprintf(stderr, " %s %d %s",name,len,c);*/
+    char *c = parse_string(name, def);
+    int len = strlen(c);
+    strcpy(res, c);
+    res[len] = res[len + 1];
+    /*fprintf(stderr, " %s %d %s",name,len,c);*/
 }
 
-static void parse_block_error(char *type, char *name, int l, int c){
-	/*fprintf(stderr, "Error: block \"%s\" does not contain a %s in line %d and col %d",
-					name, type, l, c);
-	exit(1);*/
+static void parse_block_error(char *type, char *name, int l, int c)
+{
+    /*fprintf(stderr, "Error: block \"%s\" does not contain a %s in line %d and
+    col %d", name, type, l, c); exit(1);*/
 }
 
-int C_FUNC(iparse_block_n, IPARSE_BLOCK_N)
-		 (char *name)
+int C_FUNC(iparse_block_n, IPARSE_BLOCK_N)(char *name)
 {
-	return parse_block_n(name);
+    return parse_block_n(name);
 }
 
-void C_FUNC(iparse_block_int, IPARSE_BLOCK_INT)
-		 (char *name, int *l, int *c, int *res)
+void C_FUNC(iparse_block_int, IPARSE_BLOCK_INT)(char *name, int *l, int *c,
+                                                int *res)
 {
-	if(parse_block_int(name, *l, *c, res) != 0)
-		parse_block_error("int", name, *l, *c);
+    if (parse_block_int(name, *l, *c, res) != 0)
+    {
+        parse_block_error("int", name, *l, *c);
+    }
 }
 
-void C_FUNC(iparse_block_double, IPARSE_BLOCK_DOUBLE)
-		 (char *name, int *l, int *c, double *res)
+void C_FUNC(iparse_block_double, IPARSE_BLOCK_DOUBLE)(char *name, int *l,
+                                                      int *c, double *res)
 {
-	if(parse_block_double(name, *l, *c, res) != 0)
-		parse_block_error("double", name, *l, *c);
+    if (parse_block_double(name, *l, *c, res) != 0)
+    {
+        parse_block_error("double", name, *l, *c);
+    }
 }
 
-void C_FUNC(iparse_block_complex, IPARSE_BLOCK_COMPLEX)
-		 (char *name, int *l, int *c, gsl_complex *res)
+void C_FUNC(iparse_block_complex,
+            IPARSE_BLOCK_COMPLEX)(char *name, int *l, int *c, gsl_complex *res)
 {
-	if(parse_block_complex(name, *l, *c, res) != 0)
-		parse_block_error("complex", name, *l, *c);
+    if (parse_block_complex(name, *l, *c, res) != 0)
+    {
+        parse_block_error("complex", name, *l, *c);
+    }
 }
 
-void C_FUNC(iparse_block_string, IPARSE_BLOCK_STRING)
-		 (char *name, int *l, int *c, char *res)
+void C_FUNC(iparse_block_string, IPARSE_BLOCK_STRING)(char *name, int *l,
+                                                      int *c, char *res)
 {
-	char *s;
-	int len;
-
-	if(parse_block_string(name, *l, *c, &s) != 0)
-		parse_block_error("string", name, *l, *c);
-	else{
-		len = strlen(s);
-		strcpy(res, s);
-		res[len] = res[len + 1];
-	}
+    char *s;
+    int len;
+
+    if (parse_block_string(name, *l, *c, &s) != 0)
+    {
+        parse_block_error("string", name, *l, *c);
+    }
+    else
+    {
+        len = strlen(s);
+        strcpy(res, s);
+        res[len] = res[len + 1];
+    }
 }
diff --git a/src/parser/PARSER_math.c b/src/parser/PARSER_math.c
index 9078f41821..6b94fa1ae2 100644
--- a/src/parser/PARSER_math.c
+++ b/src/parser/PARSER_math.c
@@ -1,17 +1,17 @@
 /* complex/math.c
- * 
+ *
  * Copyright (C) 1996, 1997, 1998, 1999, 2000 Jorma Olavi T�htinen, Brian Gough
- * 
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or (at
  * your option) any later version.
- * 
+ *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
@@ -41,957 +41,896 @@
  *   4.4.37, 4.4.38, 4.4.39
  */
 
-#include <math.h>
 #include <gsl_complex.h>
 #include <gsl_complex_math.h>
+#include <math.h>
 
 /**********************************************************************
  * Complex numbers
  **********************************************************************/
 
 #ifndef HIDE_INLINE_STATIC
-gsl_complex
-gsl_complex_rect (double x, double y)
-{				/* return z = x + i y */
-  gsl_complex z;
-  GSL_SET_COMPLEX (&z, x, y);
-  return z;
+gsl_complex gsl_complex_rect(double x, double y)
+{ /* return z = x + i y */
+    gsl_complex z;
+    GSL_SET_COMPLEX(&z, x, y);
+    return z;
 }
 #endif
 
-gsl_complex
-gsl_complex_polar (double r, double theta)
-{				/* return z = r exp(i theta) */
-  gsl_complex z;
-  GSL_SET_COMPLEX (&z, r * cos (theta), r * sin (theta));
-  return z;
+gsl_complex gsl_complex_polar(double r, double theta)
+{ /* return z = r exp(i theta) */
+    gsl_complex z;
+    GSL_SET_COMPLEX(&z, r * cos(theta), r * sin(theta));
+    return z;
 }
 
 /**********************************************************************
  * Properties of complex numbers
  **********************************************************************/
 
-double
-gsl_complex_arg (gsl_complex z)
-{				/* return arg(z),  -pi < arg(z) <= +pi */
-  return atan2 (GSL_IMAG (z), GSL_REAL (z));
+double gsl_complex_arg(gsl_complex z)
+{ /* return arg(z),  -pi < arg(z) <= +pi */
+    return atan2(GSL_IMAG(z), GSL_REAL(z));
 }
 
-double
-gsl_complex_abs (gsl_complex z)
-{				/* return |z| */
-  return hypot (GSL_REAL (z), GSL_IMAG (z));
+double gsl_complex_abs(gsl_complex z)
+{ /* return |z| */
+    return hypot(GSL_REAL(z), GSL_IMAG(z));
 }
 
-double
-gsl_complex_abs2 (gsl_complex z)
-{				/* return |z|^2 */
-  double x = GSL_REAL (z);
-  double y = GSL_IMAG (z);
+double gsl_complex_abs2(gsl_complex z)
+{ /* return |z|^2 */
+    double x = GSL_REAL(z);
+    double y = GSL_IMAG(z);
 
-  return (x * x + y * y);
+    return (x * x + y * y);
 }
 
-double
-gsl_complex_logabs (gsl_complex z)
-{				/* return log|z| */
-  double xabs = fabs (GSL_REAL (z));
-  double yabs = fabs (GSL_IMAG (z));
-  double max, u;
+double gsl_complex_logabs(gsl_complex z)
+{ /* return log|z| */
+    double xabs = fabs(GSL_REAL(z));
+    double yabs = fabs(GSL_IMAG(z));
+    double max, u;
 
-  if (xabs >= yabs)
+    if (xabs >= yabs)
     {
-      max = xabs;
-      u = yabs / xabs;
+        max = xabs;
+        u = yabs / xabs;
     }
-  else
+    else
     {
-      max = yabs;
-      u = xabs / yabs;
+        max = yabs;
+        u = xabs / yabs;
     }
 
-  /* Handle underflow when u is close to 0 */
+    /* Handle underflow when u is close to 0 */
 
-  return log (max) + 0.5 * log1p (u * u);
+    return log(max) + 0.5 * log1p(u * u);
 }
 
-
 /***********************************************************************
  * Complex arithmetic operators
  ***********************************************************************/
 
-gsl_complex
-gsl_complex_add (gsl_complex a, gsl_complex b)
-{				/* z=a+b */
-  double ar = GSL_REAL (a), ai = GSL_IMAG (a);
-  double br = GSL_REAL (b), bi = GSL_IMAG (b);
+gsl_complex gsl_complex_add(gsl_complex a, gsl_complex b)
+{ /* z=a+b */
+    double ar = GSL_REAL(a), ai = GSL_IMAG(a);
+    double br = GSL_REAL(b), bi = GSL_IMAG(b);
 
-  gsl_complex z;
-  GSL_SET_COMPLEX (&z, ar + br, ai + bi);
-  return z;
+    gsl_complex z;
+    GSL_SET_COMPLEX(&z, ar + br, ai + bi);
+    return z;
 }
 
-gsl_complex
-gsl_complex_add_real (gsl_complex a, double x)
-{				/* z=a+x */
-  gsl_complex z;
-  GSL_SET_COMPLEX (&z, GSL_REAL (a) + x, GSL_IMAG (a));
-  return z;
+gsl_complex gsl_complex_add_real(gsl_complex a, double x)
+{ /* z=a+x */
+    gsl_complex z;
+    GSL_SET_COMPLEX(&z, GSL_REAL(a) + x, GSL_IMAG(a));
+    return z;
 }
 
-gsl_complex
-gsl_complex_add_imag (gsl_complex a, double y)
-{				/* z=a+iy */
-  gsl_complex z;
-  GSL_SET_COMPLEX (&z, GSL_REAL (a), GSL_IMAG (a) + y);
-  return z;
+gsl_complex gsl_complex_add_imag(gsl_complex a, double y)
+{ /* z=a+iy */
+    gsl_complex z;
+    GSL_SET_COMPLEX(&z, GSL_REAL(a), GSL_IMAG(a) + y);
+    return z;
 }
 
+gsl_complex gsl_complex_sub(gsl_complex a, gsl_complex b)
+{ /* z=a-b */
+    double ar = GSL_REAL(a), ai = GSL_IMAG(a);
+    double br = GSL_REAL(b), bi = GSL_IMAG(b);
 
-gsl_complex
-gsl_complex_sub (gsl_complex a, gsl_complex b)
-{				/* z=a-b */
-  double ar = GSL_REAL (a), ai = GSL_IMAG (a);
-  double br = GSL_REAL (b), bi = GSL_IMAG (b);
-
-  gsl_complex z;
-  GSL_SET_COMPLEX (&z, ar - br, ai - bi);
-  return z;
+    gsl_complex z;
+    GSL_SET_COMPLEX(&z, ar - br, ai - bi);
+    return z;
 }
 
-gsl_complex
-gsl_complex_sub_real (gsl_complex a, double x)
-{				/* z=a-x */
-  gsl_complex z;
-  GSL_SET_COMPLEX (&z, GSL_REAL (a) - x, GSL_IMAG (a));
-  return z;
+gsl_complex gsl_complex_sub_real(gsl_complex a, double x)
+{ /* z=a-x */
+    gsl_complex z;
+    GSL_SET_COMPLEX(&z, GSL_REAL(a) - x, GSL_IMAG(a));
+    return z;
 }
 
-gsl_complex
-gsl_complex_sub_imag (gsl_complex a, double y)
-{				/* z=a-iy */
-  gsl_complex z;
-  GSL_SET_COMPLEX (&z, GSL_REAL (a), GSL_IMAG (a) - y);
-  return z;
+gsl_complex gsl_complex_sub_imag(gsl_complex a, double y)
+{ /* z=a-iy */
+    gsl_complex z;
+    GSL_SET_COMPLEX(&z, GSL_REAL(a), GSL_IMAG(a) - y);
+    return z;
 }
 
-gsl_complex
-gsl_complex_mul (gsl_complex a, gsl_complex b)
-{				/* z=a*b */
-  double ar = GSL_REAL (a), ai = GSL_IMAG (a);
-  double br = GSL_REAL (b), bi = GSL_IMAG (b);
+gsl_complex gsl_complex_mul(gsl_complex a, gsl_complex b)
+{ /* z=a*b */
+    double ar = GSL_REAL(a), ai = GSL_IMAG(a);
+    double br = GSL_REAL(b), bi = GSL_IMAG(b);
 
-  gsl_complex z;
-  GSL_SET_COMPLEX (&z, ar * br - ai * bi, ar * bi + ai * br);
-  return z;
+    gsl_complex z;
+    GSL_SET_COMPLEX(&z, ar * br - ai * bi, ar * bi + ai * br);
+    return z;
 }
 
-gsl_complex
-gsl_complex_mul_real (gsl_complex a, double x)
-{				/* z=a*x */
-  gsl_complex z;
-  GSL_SET_COMPLEX (&z, x * GSL_REAL (a), x * GSL_IMAG (a));
-  return z;
+gsl_complex gsl_complex_mul_real(gsl_complex a, double x)
+{ /* z=a*x */
+    gsl_complex z;
+    GSL_SET_COMPLEX(&z, x * GSL_REAL(a), x * GSL_IMAG(a));
+    return z;
 }
 
-gsl_complex
-gsl_complex_mul_imag (gsl_complex a, double y)
-{				/* z=a*iy */
-  gsl_complex z;
-  GSL_SET_COMPLEX (&z, -y * GSL_IMAG (a), y * GSL_REAL (a));
-  return z;
+gsl_complex gsl_complex_mul_imag(gsl_complex a, double y)
+{ /* z=a*iy */
+    gsl_complex z;
+    GSL_SET_COMPLEX(&z, -y * GSL_IMAG(a), y * GSL_REAL(a));
+    return z;
 }
 
-gsl_complex
-gsl_complex_div (gsl_complex a, gsl_complex b)
-{				/* z=a/b */
-  double ar = GSL_REAL (a), ai = GSL_IMAG (a);
-  double br = GSL_REAL (b), bi = GSL_IMAG (b);
+gsl_complex gsl_complex_div(gsl_complex a, gsl_complex b)
+{ /* z=a/b */
+    double ar = GSL_REAL(a), ai = GSL_IMAG(a);
+    double br = GSL_REAL(b), bi = GSL_IMAG(b);
 
-  double s = 1.0 / gsl_complex_abs (b);
+    double s = 1.0 / gsl_complex_abs(b);
 
-  double sbr = s * br;
-  double sbi = s * bi;
+    double sbr = s * br;
+    double sbi = s * bi;
 
-  double zr = (ar * sbr + ai * sbi) * s;
-  double zi = (ai * sbr - ar * sbi) * s;
+    double zr = (ar * sbr + ai * sbi) * s;
+    double zi = (ai * sbr - ar * sbi) * s;
 
-  gsl_complex z;
-  GSL_SET_COMPLEX (&z, zr, zi);
-  return z;
+    gsl_complex z;
+    GSL_SET_COMPLEX(&z, zr, zi);
+    return z;
 }
 
-gsl_complex
-gsl_complex_div_real (gsl_complex a, double x)
-{				/* z=a/x */
-  gsl_complex z;
-  GSL_SET_COMPLEX (&z, GSL_REAL (a) / x, GSL_IMAG (a) / x);
-  return z;
+gsl_complex gsl_complex_div_real(gsl_complex a, double x)
+{ /* z=a/x */
+    gsl_complex z;
+    GSL_SET_COMPLEX(&z, GSL_REAL(a) / x, GSL_IMAG(a) / x);
+    return z;
 }
 
-gsl_complex
-gsl_complex_div_imag (gsl_complex a, double y)
-{				/* z=a/(iy) */
-  gsl_complex z;
-  GSL_SET_COMPLEX (&z, GSL_IMAG (a) / y,  - GSL_REAL (a) / y);
-  return z;
+gsl_complex gsl_complex_div_imag(gsl_complex a, double y)
+{ /* z=a/(iy) */
+    gsl_complex z;
+    GSL_SET_COMPLEX(&z, GSL_IMAG(a) / y, -GSL_REAL(a) / y);
+    return z;
 }
 
-gsl_complex
-gsl_complex_conjugate (gsl_complex a)
-{				/* z=conj(a) */
-  gsl_complex z;
-  GSL_SET_COMPLEX (&z, GSL_REAL (a), -GSL_IMAG (a));
-  return z;
+gsl_complex gsl_complex_conjugate(gsl_complex a)
+{ /* z=conj(a) */
+    gsl_complex z;
+    GSL_SET_COMPLEX(&z, GSL_REAL(a), -GSL_IMAG(a));
+    return z;
 }
 
-gsl_complex
-gsl_complex_negative (gsl_complex a)
-{				/* z=-a */
-  gsl_complex z;
-  GSL_SET_COMPLEX (&z, -GSL_REAL (a), -GSL_IMAG (a));
-  return z;
+gsl_complex gsl_complex_negative(gsl_complex a)
+{ /* z=-a */
+    gsl_complex z;
+    GSL_SET_COMPLEX(&z, -GSL_REAL(a), -GSL_IMAG(a));
+    return z;
 }
 
-gsl_complex
-gsl_complex_inverse (gsl_complex a)
-{				/* z=1/a */
-  double s = 1.0 / gsl_complex_abs (a);
+gsl_complex gsl_complex_inverse(gsl_complex a)
+{ /* z=1/a */
+    double s = 1.0 / gsl_complex_abs(a);
 
-  gsl_complex z;
-  GSL_SET_COMPLEX (&z, (GSL_REAL (a) * s) * s, -(GSL_IMAG (a) * s) * s);
-  return z;
+    gsl_complex z;
+    GSL_SET_COMPLEX(&z, (GSL_REAL(a) * s) * s, -(GSL_IMAG(a) * s) * s);
+    return z;
 }
 
 /**********************************************************************
  * Elementary complex functions
  **********************************************************************/
 
-gsl_complex
-gsl_complex_sqrt (gsl_complex a)
-{				/* z=sqrt(a) */
-  gsl_complex z;
+gsl_complex gsl_complex_sqrt(gsl_complex a)
+{ /* z=sqrt(a) */
+    gsl_complex z;
 
-  if (GSL_REAL (a) == 0.0 && GSL_IMAG (a) == 0.0)
+    if (GSL_REAL(a) == 0.0 && GSL_IMAG(a) == 0.0)
     {
-      GSL_SET_COMPLEX (&z, 0, 0);
+        GSL_SET_COMPLEX(&z, 0, 0);
     }
-  else
+    else
     {
-      double x = fabs (GSL_REAL (a));
-      double y = fabs (GSL_IMAG (a));
-      double w;
+        double x = fabs(GSL_REAL(a));
+        double y = fabs(GSL_IMAG(a));
+        double w;
 
-      if (x >= y)
-	{
-	  double t = y / x;
-	  w = sqrt (x) * sqrt (0.5 * (1.0 + sqrt (1.0 + t * t)));
-	}
-      else
-	{
-	  double t = x / y;
-	  w = sqrt (y) * sqrt (0.5 * (t + sqrt (1.0 + t * t)));
-	}
+        if (x >= y)
+        {
+            double t = y / x;
+            w = sqrt(x) * sqrt(0.5 * (1.0 + sqrt(1.0 + t * t)));
+        }
+        else
+        {
+            double t = x / y;
+            w = sqrt(y) * sqrt(0.5 * (t + sqrt(1.0 + t * t)));
+        }
 
-      if (GSL_REAL (a) >= 0.0)
-	{
-	  double ai = GSL_IMAG (a);
-	  GSL_SET_COMPLEX (&z, w, ai / (2.0 * w));
-	}
-      else
-	{
-	  double ai = GSL_IMAG (a);
-	  double vi = (ai >= 0) ? w : -w;
-	  GSL_SET_COMPLEX (&z, ai / (2.0 * vi), vi);
-	}
+        if (GSL_REAL(a) >= 0.0)
+        {
+            double ai = GSL_IMAG(a);
+            GSL_SET_COMPLEX(&z, w, ai / (2.0 * w));
+        }
+        else
+        {
+            double ai = GSL_IMAG(a);
+            double vi = (ai >= 0) ? w : -w;
+            GSL_SET_COMPLEX(&z, ai / (2.0 * vi), vi);
+        }
     }
 
-  return z;
+    return z;
 }
 
-gsl_complex
-gsl_complex_sqrt_real (double x)
-{				/* z=sqrt(x) */
-  gsl_complex z;
+gsl_complex gsl_complex_sqrt_real(double x)
+{ /* z=sqrt(x) */
+    gsl_complex z;
 
-  if (x >= 0)
+    if (x >= 0)
     {
-      GSL_SET_COMPLEX (&z, sqrt (x), 0.0);
+        GSL_SET_COMPLEX(&z, sqrt(x), 0.0);
     }
-  else
+    else
     {
-      GSL_SET_COMPLEX (&z, 0.0, sqrt (-x));
+        GSL_SET_COMPLEX(&z, 0.0, sqrt(-x));
     }
 
-  return z;
+    return z;
 }
 
-gsl_complex
-gsl_complex_exp (gsl_complex a)
-{				/* z=exp(a) */
-  double rho = exp (GSL_REAL (a));
-  double theta = GSL_IMAG (a);
+gsl_complex gsl_complex_exp(gsl_complex a)
+{ /* z=exp(a) */
+    double rho = exp(GSL_REAL(a));
+    double theta = GSL_IMAG(a);
 
-  gsl_complex z;
-  GSL_SET_COMPLEX (&z, rho * cos (theta), rho * sin (theta));
-  return z;
+    gsl_complex z;
+    GSL_SET_COMPLEX(&z, rho * cos(theta), rho * sin(theta));
+    return z;
 }
 
-gsl_complex
-gsl_complex_pow (gsl_complex a, gsl_complex b)
-{				/* z=a^b */
-  gsl_complex z;
+gsl_complex gsl_complex_pow(gsl_complex a, gsl_complex b)
+{ /* z=a^b */
+    gsl_complex z;
 
-  if (GSL_REAL (a) == 0 && GSL_IMAG (a) == 0.0)
+    if (GSL_REAL(a) == 0 && GSL_IMAG(a) == 0.0)
     {
-      GSL_SET_COMPLEX (&z, 0.0, 0.0);
+        GSL_SET_COMPLEX(&z, 0.0, 0.0);
     }
-  else
+    else
     {
-      double logr = gsl_complex_logabs (a);
-      double theta = gsl_complex_arg (a);
+        double logr = gsl_complex_logabs(a);
+        double theta = gsl_complex_arg(a);
 
-      double br = GSL_REAL (b), bi = GSL_IMAG (b);
+        double br = GSL_REAL(b), bi = GSL_IMAG(b);
 
-      double rho = exp (logr * br - bi * theta);
-      double beta = theta * br + bi * logr;
+        double rho = exp(logr * br - bi * theta);
+        double beta = theta * br + bi * logr;
 
-      GSL_SET_COMPLEX (&z, rho * cos (beta), rho * sin (beta));
+        GSL_SET_COMPLEX(&z, rho * cos(beta), rho * sin(beta));
     }
 
-  return z;
+    return z;
 }
 
-gsl_complex
-gsl_complex_pow_real (gsl_complex a, double b)
-{				/* z=a^b */
-  gsl_complex z;
+gsl_complex gsl_complex_pow_real(gsl_complex a, double b)
+{ /* z=a^b */
+    gsl_complex z;
 
-  if (GSL_REAL (a) == 0 && GSL_IMAG (a) == 0)
+    if (GSL_REAL(a) == 0 && GSL_IMAG(a) == 0)
     {
-      GSL_SET_COMPLEX (&z, 0, 0);
+        GSL_SET_COMPLEX(&z, 0, 0);
     }
-  else
+    else
     {
-      double logr = gsl_complex_logabs (a);
-      double theta = gsl_complex_arg (a);
-      double rho = exp (logr * b);
-      double beta = theta * b;
-      GSL_SET_COMPLEX (&z, rho * cos (beta), rho * sin (beta));
+        double logr = gsl_complex_logabs(a);
+        double theta = gsl_complex_arg(a);
+        double rho = exp(logr * b);
+        double beta = theta * b;
+        GSL_SET_COMPLEX(&z, rho * cos(beta), rho * sin(beta));
     }
 
-  return z;
+    return z;
 }
 
-gsl_complex
-gsl_complex_log (gsl_complex a)
-{				/* z=log(a) */
-  double logr = gsl_complex_logabs (a);
-  double theta = gsl_complex_arg (a);
+gsl_complex gsl_complex_log(gsl_complex a)
+{ /* z=log(a) */
+    double logr = gsl_complex_logabs(a);
+    double theta = gsl_complex_arg(a);
 
-  gsl_complex z;
-  GSL_SET_COMPLEX (&z, logr, theta);
-  return z;
+    gsl_complex z;
+    GSL_SET_COMPLEX(&z, logr, theta);
+    return z;
 }
 
-gsl_complex
-gsl_complex_log10 (gsl_complex a)
-{				/* z = log10(a) */
-  return gsl_complex_mul_real (gsl_complex_log (a), 1 / log (10.));
+gsl_complex gsl_complex_log10(gsl_complex a)
+{ /* z = log10(a) */
+    return gsl_complex_mul_real(gsl_complex_log(a), 1 / log(10.));
 }
 
-gsl_complex
-gsl_complex_log_b (gsl_complex a, gsl_complex b)
+gsl_complex gsl_complex_log_b(gsl_complex a, gsl_complex b)
 {
-  return gsl_complex_div (gsl_complex_log (a), gsl_complex_log (b));
+    return gsl_complex_div(gsl_complex_log(a), gsl_complex_log(b));
 }
 
 /***********************************************************************
  * Complex trigonometric functions
  ***********************************************************************/
 
-gsl_complex
-gsl_complex_sin (gsl_complex a)
-{				/* z = sin(a) */
-  double R = GSL_REAL (a), I = GSL_IMAG (a);
+gsl_complex gsl_complex_sin(gsl_complex a)
+{ /* z = sin(a) */
+    double R = GSL_REAL(a), I = GSL_IMAG(a);
 
-  gsl_complex z;
+    gsl_complex z;
 
-  if (I == 0.0) 
+    if (I == 0.0)
     {
-      /* avoid returing negative zero (-0.0) for the imaginary part  */
+        /* avoid returing negative zero (-0.0) for the imaginary part  */
 
-      GSL_SET_COMPLEX (&z, sin (R), 0.0);  
-    } 
-  else 
+        GSL_SET_COMPLEX(&z, sin(R), 0.0);
+    }
+    else
     {
-      GSL_SET_COMPLEX (&z, sin (R) * cosh (I), cos (R) * sinh (I));
+        GSL_SET_COMPLEX(&z, sin(R) * cosh(I), cos(R) * sinh(I));
     }
 
-  return z;
+    return z;
 }
 
-gsl_complex
-gsl_complex_cos (gsl_complex a)
-{				/* z = cos(a) */
-  double R = GSL_REAL (a), I = GSL_IMAG (a);
+gsl_complex gsl_complex_cos(gsl_complex a)
+{ /* z = cos(a) */
+    double R = GSL_REAL(a), I = GSL_IMAG(a);
 
-  gsl_complex z;
+    gsl_complex z;
 
-  if (I == 0.0) 
+    if (I == 0.0)
     {
-      /* avoid returing negative zero (-0.0) for the imaginary part  */
+        /* avoid returing negative zero (-0.0) for the imaginary part  */
 
-      GSL_SET_COMPLEX (&z, cos (R), 0.0);  
-    } 
-  else 
+        GSL_SET_COMPLEX(&z, cos(R), 0.0);
+    }
+    else
     {
-      GSL_SET_COMPLEX (&z, cos (R) * cosh (I), sin (R) * sinh (-I));
+        GSL_SET_COMPLEX(&z, cos(R) * cosh(I), sin(R) * sinh(-I));
     }
 
-  return z;
+    return z;
 }
 
-gsl_complex
-gsl_complex_tan (gsl_complex a)
-{				/* z = tan(a) */
-  double R = GSL_REAL (a), I = GSL_IMAG (a);
+gsl_complex gsl_complex_tan(gsl_complex a)
+{ /* z = tan(a) */
+    double R = GSL_REAL(a), I = GSL_IMAG(a);
 
-  gsl_complex z;
+    gsl_complex z;
 
-  if (fabs (I) < 1)
+    if (fabs(I) < 1)
     {
-      double D = pow (cos (R), 2.0) + pow (sinh (I), 2.0);
+        double D = pow(cos(R), 2.0) + pow(sinh(I), 2.0);
 
-      GSL_SET_COMPLEX (&z, 0.5 * sin (2 * R) / D, 0.5 * sinh (2 * I) / D);
+        GSL_SET_COMPLEX(&z, 0.5 * sin(2 * R) / D, 0.5 * sinh(2 * I) / D);
     }
-  else
+    else
     {
-      double u = exp (-I);
-      double C = 2 * u / (1 - pow (u, 2.0));
-      double D = 1 + pow (cos (R), 2.0) * pow (C, 2.0);
+        double u = exp(-I);
+        double C = 2 * u / (1 - pow(u, 2.0));
+        double D = 1 + pow(cos(R), 2.0) * pow(C, 2.0);
 
-      double S = pow (C, 2.0);
-      double T = 1.0 / tanh (I);
+        double S = pow(C, 2.0);
+        double T = 1.0 / tanh(I);
 
-      GSL_SET_COMPLEX (&z, 0.5 * sin (2 * R) * S / D, T / D);
+        GSL_SET_COMPLEX(&z, 0.5 * sin(2 * R) * S / D, T / D);
     }
 
-  return z;
+    return z;
 }
 
-gsl_complex
-gsl_complex_sec (gsl_complex a)
-{				/* z = sec(a) */
-  gsl_complex z = gsl_complex_cos (a);
-  return gsl_complex_inverse (z);
+gsl_complex gsl_complex_sec(gsl_complex a)
+{ /* z = sec(a) */
+    gsl_complex z = gsl_complex_cos(a);
+    return gsl_complex_inverse(z);
 }
 
-gsl_complex
-gsl_complex_csc (gsl_complex a)
-{				/* z = csc(a) */
-  gsl_complex z = gsl_complex_sin (a);
-  return gsl_complex_inverse(z);
+gsl_complex gsl_complex_csc(gsl_complex a)
+{ /* z = csc(a) */
+    gsl_complex z = gsl_complex_sin(a);
+    return gsl_complex_inverse(z);
 }
 
-
-gsl_complex
-gsl_complex_cot (gsl_complex a)
-{				/* z = cot(a) */
-  gsl_complex z = gsl_complex_tan (a);
-  return gsl_complex_inverse (z);
+gsl_complex gsl_complex_cot(gsl_complex a)
+{ /* z = cot(a) */
+    gsl_complex z = gsl_complex_tan(a);
+    return gsl_complex_inverse(z);
 }
 
 /**********************************************************************
  * Inverse Complex Trigonometric Functions
  **********************************************************************/
 
-gsl_complex
-gsl_complex_arcsin (gsl_complex a)
-{				/* z = arcsin(a) */
-  double R = GSL_REAL (a), I = GSL_IMAG (a);
-  gsl_complex z;
+gsl_complex gsl_complex_arcsin(gsl_complex a)
+{ /* z = arcsin(a) */
+    double R = GSL_REAL(a), I = GSL_IMAG(a);
+    gsl_complex z;
 
-  if (I == 0)
+    if (I == 0)
     {
-      z = gsl_complex_arcsin_real (R);
+        z = gsl_complex_arcsin_real(R);
     }
-  else
+    else
     {
-      double x = fabs (R), y = fabs (I);
-      double r = hypot (x + 1, y), s = hypot (x - 1, y);
-      double A = 0.5 * (r + s);
-      double B = x / A;
-      double y2 = y * y;
+        double x = fabs(R), y = fabs(I);
+        double r = hypot(x + 1, y), s = hypot(x - 1, y);
+        double A = 0.5 * (r + s);
+        double B = x / A;
+        double y2 = y * y;
+
+        double real, imag;
 
-      double real, imag;
+        const double A_crossover = 1.5, B_crossover = 0.6417;
 
-      const double A_crossover = 1.5, B_crossover = 0.6417;
+        if (B <= B_crossover)
+        {
+            real = asin(B);
+        }
+        else
+        {
+            if (x <= 1)
+            {
+                double D = 0.5 * (A + x) * (y2 / (r + x + 1) + (s + (1 - x)));
+                real = atan(x / sqrt(D));
+            }
+            else
+            {
+                double Apx = A + x;
+                double D = 0.5 * (Apx / (r + x + 1) + Apx / (s + (x - 1)));
+                real = atan(x / (y * sqrt(D)));
+            }
+        }
+
+        if (A <= A_crossover)
+        {
+            double Am1;
+
+            if (x < 1)
+            {
+                Am1 = 0.5 * (y2 / (r + (x + 1)) + y2 / (s + (1 - x)));
+            }
+            else
+            {
+                Am1 = 0.5 * (y2 / (r + (x + 1)) + (s + (x - 1)));
+            }
+
+            imag = log1p(Am1 + sqrt(Am1 * (A + 1)));
+        }
+        else
+        {
+            imag = log(A + sqrt(A * A - 1));
+        }
 
-      if (B <= B_crossover)
-	{
-	  real = asin (B);
-	}
-      else
-	{
-	  if (x <= 1)
-	    {
-	      double D = 0.5 * (A + x) * (y2 / (r + x + 1) + (s + (1 - x)));
-	      real = atan (x / sqrt (D));
-	    }
-	  else
-	    {
-	      double Apx = A + x;
-	      double D = 0.5 * (Apx / (r + x + 1) + Apx / (s + (x - 1)));
-	      real = atan (x / (y * sqrt (D)));
-	    }
-	}
-
-      if (A <= A_crossover)
-	{
-	  double Am1;
-
-	  if (x < 1)
-	    {
-	      Am1 = 0.5 * (y2 / (r + (x + 1)) + y2 / (s + (1 - x)));
-	    }
-	  else
-	    {
-	      Am1 = 0.5 * (y2 / (r + (x + 1)) + (s + (x - 1)));
-	    }
-
-	  imag = log1p (Am1 + sqrt (Am1 * (A + 1)));
-	}
-      else
-	{
-	  imag = log (A + sqrt (A * A - 1));
-	}
-
-      GSL_SET_COMPLEX (&z, (R >= 0) ? real : -real, (I >= 0) ? imag : -imag);
-    }
-
-  return z;
+        GSL_SET_COMPLEX(&z, (R >= 0) ? real : -real, (I >= 0) ? imag : -imag);
+    }
+
+    return z;
 }
 
-gsl_complex
-gsl_complex_arcsin_real (double a)
-{				/* z = arcsin(a) */
-  gsl_complex z;
+gsl_complex gsl_complex_arcsin_real(double a)
+{ /* z = arcsin(a) */
+    gsl_complex z;
 
-  if (fabs (a) <= 1.0)
+    if (fabs(a) <= 1.0)
     {
-      GSL_SET_COMPLEX (&z, asin (a), 0.0);
-    }
-  else
+        GSL_SET_COMPLEX(&z, asin(a), 0.0);
+    }
+    else
     {
-      if (a < 0.0)
-	{
-	  GSL_SET_COMPLEX (&z, -M_PI_2, acosh (-a));
-	}
-      else
-	{
-	  GSL_SET_COMPLEX (&z, M_PI_2, -acosh (a));
-	}
+        if (a < 0.0)
+        {
+            GSL_SET_COMPLEX(&z, -M_PI_2, acosh(-a));
+        }
+        else
+        {
+            GSL_SET_COMPLEX(&z, M_PI_2, -acosh(a));
+        }
     }
 
-  return z;
+    return z;
 }
 
-gsl_complex
-gsl_complex_arccos (gsl_complex a)
-{				/* z = arccos(a) */
-  double R = GSL_REAL (a), I = GSL_IMAG (a);
-  gsl_complex z;
+gsl_complex gsl_complex_arccos(gsl_complex a)
+{ /* z = arccos(a) */
+    double R = GSL_REAL(a), I = GSL_IMAG(a);
+    gsl_complex z;
 
-  if (I == 0)
+    if (I == 0)
     {
-      z = gsl_complex_arccos_real (R);
+        z = gsl_complex_arccos_real(R);
     }
-  else
+    else
     {
-      double x = fabs (R), y = fabs (I);
-      double r = hypot (x + 1, y), s = hypot (x - 1, y);
-      double A = 0.5 * (r + s);
-      double B = x / A;
-      double y2 = y * y;
+        double x = fabs(R), y = fabs(I);
+        double r = hypot(x + 1, y), s = hypot(x - 1, y);
+        double A = 0.5 * (r + s);
+        double B = x / A;
+        double y2 = y * y;
 
-      double real, imag;
+        double real, imag;
 
-      const double A_crossover = 1.5, B_crossover = 0.6417;
+        const double A_crossover = 1.5, B_crossover = 0.6417;
 
-      if (B <= B_crossover)
-	{
-	  real = acos (B);
-	}
-      else
-	{
-	  if (x <= 1)
-	    {
-	      double D = 0.5 * (A + x) * (y2 / (r + x + 1) + (s + (1 - x)));
-	      real = atan (sqrt (D) / x);
-	    }
-	  else
-	    {
-	      double Apx = A + x;
-	      double D = 0.5 * (Apx / (r + x + 1) + Apx / (s + (x - 1)));
-	      real = atan ((y * sqrt (D)) / x);
-	    }
-	}
-
-      if (A <= A_crossover)
-	{
-	  double Am1;
-
-	  if (x < 1)
-	    {
-	      Am1 = 0.5 * (y2 / (r + (x + 1)) + y2 / (s + (1 - x)));
-	    }
-	  else
-	    {
-	      Am1 = 0.5 * (y2 / (r + (x + 1)) + (s + (x - 1)));
-	    }
+        if (B <= B_crossover)
+        {
+            real = acos(B);
+        }
+        else
+        {
+            if (x <= 1)
+            {
+                double D = 0.5 * (A + x) * (y2 / (r + x + 1) + (s + (1 - x)));
+                real = atan(sqrt(D) / x);
+            }
+            else
+            {
+                double Apx = A + x;
+                double D = 0.5 * (Apx / (r + x + 1) + Apx / (s + (x - 1)));
+                real = atan((y * sqrt(D)) / x);
+            }
+        }
+
+        if (A <= A_crossover)
+        {
+            double Am1;
+
+            if (x < 1)
+            {
+                Am1 = 0.5 * (y2 / (r + (x + 1)) + y2 / (s + (1 - x)));
+            }
+            else
+            {
+                Am1 = 0.5 * (y2 / (r + (x + 1)) + (s + (x - 1)));
+            }
 
-	  imag = log1p (Am1 + sqrt (Am1 * (A + 1)));
-	}
-      else
-	{
-	  imag = log (A + sqrt (A * A - 1));
-	}
+            imag = log1p(Am1 + sqrt(Am1 * (A + 1)));
+        }
+        else
+        {
+            imag = log(A + sqrt(A * A - 1));
+        }
 
-      GSL_SET_COMPLEX (&z, (R >= 0) ? real : M_PI - real, (I >= 0) ? -imag : imag);
+        GSL_SET_COMPLEX(&z, (R >= 0) ? real : M_PI - real,
+                        (I >= 0) ? -imag : imag);
     }
 
-  return z;
+    return z;
 }
 
-gsl_complex
-gsl_complex_arccos_real (double a)
-{				/* z = arccos(a) */
-  gsl_complex z;
+gsl_complex gsl_complex_arccos_real(double a)
+{ /* z = arccos(a) */
+    gsl_complex z;
 
-  if (fabs (a) <= 1.0)
+    if (fabs(a) <= 1.0)
     {
-      GSL_SET_COMPLEX (&z, acos (a), 0);
+        GSL_SET_COMPLEX(&z, acos(a), 0);
     }
-  else
+    else
     {
-      if (a < 0.0)
-	{
-	  GSL_SET_COMPLEX (&z, M_PI, -acosh (-a));
-	}
-      else
-	{
-	  GSL_SET_COMPLEX (&z, 0, acosh (a));
-	}
+        if (a < 0.0)
+        {
+            GSL_SET_COMPLEX(&z, M_PI, -acosh(-a));
+        }
+        else
+        {
+            GSL_SET_COMPLEX(&z, 0, acosh(a));
+        }
     }
 
-  return z;
+    return z;
 }
 
-gsl_complex
-gsl_complex_arctan (gsl_complex a)
-{				/* z = arctan(a) */
-  double R = GSL_REAL (a), I = GSL_IMAG (a);
-  gsl_complex z;
+gsl_complex gsl_complex_arctan(gsl_complex a)
+{ /* z = arctan(a) */
+    double R = GSL_REAL(a), I = GSL_IMAG(a);
+    gsl_complex z;
 
-  if (I == 0)
+    if (I == 0)
     {
-      GSL_SET_COMPLEX (&z, atan (R), 0);
+        GSL_SET_COMPLEX(&z, atan(R), 0);
     }
-  else
+    else
     {
-      /* FIXME: This is a naive implementation which does not fully
-         take into account cancellation errors, overflow, underflow
-         etc.  It would benefit from the Hull et al treatment. */
+        /* FIXME: This is a naive implementation which does not fully
+           take into account cancellation errors, overflow, underflow
+           etc.  It would benefit from the Hull et al treatment. */
 
-      double r = hypot (R, I);
+        double r = hypot(R, I);
 
-      double imag;
+        double imag;
 
-      double u = 2 * I / (1 + r * r);
+        double u = 2 * I / (1 + r * r);
 
-      /* FIXME: the following cross-over should be optimized but 0.1
-         seems to work ok */
+        /* FIXME: the following cross-over should be optimized but 0.1
+           seems to work ok */
 
-      if (fabs (u) < 0.1)
-	{
-	  imag = 0.25 * (log1p (u) - log1p (-u));
-	}
-      else
-	{
-	  double A = hypot (R, I + 1);
-	  double B = hypot (R, I - 1);
-	  imag = 0.5 * log (A / B);
-	}
+        if (fabs(u) < 0.1)
+        {
+            imag = 0.25 * (log1p(u) - log1p(-u));
+        }
+        else
+        {
+            double A = hypot(R, I + 1);
+            double B = hypot(R, I - 1);
+            imag = 0.5 * log(A / B);
+        }
 
-      if (R == 0)
-	{
-	  if (I > 1)
-	    {
-	      GSL_SET_COMPLEX (&z, M_PI_2, imag);
-	    }
-	  else if (I < -1)
-	    {
-	      GSL_SET_COMPLEX (&z, -M_PI_2, imag);
-	    }
-	  else
-	    {
-	      GSL_SET_COMPLEX (&z, 0, imag);
-	    };
-	}
-      else
-	{
-	  GSL_SET_COMPLEX (&z, 0.5 * atan2 (2 * R, ((1 + r) * (1 - r))), imag);
-	}
+        if (R == 0)
+        {
+            if (I > 1)
+            {
+                GSL_SET_COMPLEX(&z, M_PI_2, imag);
+            }
+            else if (I < -1)
+            {
+                GSL_SET_COMPLEX(&z, -M_PI_2, imag);
+            }
+            else
+            {
+                GSL_SET_COMPLEX(&z, 0, imag);
+            };
+        }
+        else
+        {
+            GSL_SET_COMPLEX(&z, 0.5 * atan2(2 * R, ((1 + r) * (1 - r))), imag);
+        }
     }
 
-  return z;
+    return z;
 }
 
-gsl_complex
-gsl_complex_arcsec (gsl_complex a)
-{				/* z = arcsec(a) */
-  gsl_complex z = gsl_complex_inverse (a);
-  return gsl_complex_arccos (z);
+gsl_complex gsl_complex_arcsec(gsl_complex a)
+{ /* z = arcsec(a) */
+    gsl_complex z = gsl_complex_inverse(a);
+    return gsl_complex_arccos(z);
 }
 
-gsl_complex
-gsl_complex_arcsec_real (double a)
-{				/* z = arcsec(a) */
-  gsl_complex z;
+gsl_complex gsl_complex_arcsec_real(double a)
+{ /* z = arcsec(a) */
+    gsl_complex z;
 
-  if (a <= -1.0 || a >= 1.0)
+    if (a <= -1.0 || a >= 1.0)
     {
-      GSL_SET_COMPLEX (&z, acos (1 / a), 0.0);
+        GSL_SET_COMPLEX(&z, acos(1 / a), 0.0);
     }
-  else
+    else
     {
-      if (a >= 0.0)
-	{
-	  GSL_SET_COMPLEX (&z, 0, acosh (1 / a));
-	}
-      else
-	{
-	  GSL_SET_COMPLEX (&z, M_PI, -acosh (-1 / a));
-	}
+        if (a >= 0.0)
+        {
+            GSL_SET_COMPLEX(&z, 0, acosh(1 / a));
+        }
+        else
+        {
+            GSL_SET_COMPLEX(&z, M_PI, -acosh(-1 / a));
+        }
     }
 
-  return z;
+    return z;
 }
 
-gsl_complex
-gsl_complex_arccsc (gsl_complex a)
-{				/* z = arccsc(a) */
-  gsl_complex z = gsl_complex_inverse (a);
-  return gsl_complex_arcsin (z);
+gsl_complex gsl_complex_arccsc(gsl_complex a)
+{ /* z = arccsc(a) */
+    gsl_complex z = gsl_complex_inverse(a);
+    return gsl_complex_arcsin(z);
 }
 
-gsl_complex
-gsl_complex_arccsc_real (double a)
-{				/* z = arccsc(a) */
-  gsl_complex z;
+gsl_complex gsl_complex_arccsc_real(double a)
+{ /* z = arccsc(a) */
+    gsl_complex z;
 
-  if (a <= -1.0 || a >= 1.0)
+    if (a <= -1.0 || a >= 1.0)
     {
-      GSL_SET_COMPLEX (&z, asin (1 / a), 0.0);
+        GSL_SET_COMPLEX(&z, asin(1 / a), 0.0);
     }
-  else
+    else
     {
-      if (a >= 0.0)
-	{
-	  GSL_SET_COMPLEX (&z, M_PI_2, -acosh (1 / a));
-	}
-      else
-	{
-	  GSL_SET_COMPLEX (&z, -M_PI_2, -acosh (-1 / a));
-	}
+        if (a >= 0.0)
+        {
+            GSL_SET_COMPLEX(&z, M_PI_2, -acosh(1 / a));
+        }
+        else
+        {
+            GSL_SET_COMPLEX(&z, -M_PI_2, -acosh(-1 / a));
+        }
     }
 
-  return z;
+    return z;
 }
 
-gsl_complex
-gsl_complex_arccot (gsl_complex a)
-{				/* z = arccot(a) */
-  gsl_complex z;
+gsl_complex gsl_complex_arccot(gsl_complex a)
+{ /* z = arccot(a) */
+    gsl_complex z;
 
-  if (GSL_REAL (a) == 0.0 && GSL_IMAG (a) == 0.0)
+    if (GSL_REAL(a) == 0.0 && GSL_IMAG(a) == 0.0)
     {
-      GSL_SET_COMPLEX (&z, M_PI_2, 0);
+        GSL_SET_COMPLEX(&z, M_PI_2, 0);
     }
-  else
+    else
     {
-      z = gsl_complex_inverse (a);
-      z = gsl_complex_arctan (z);
+        z = gsl_complex_inverse(a);
+        z = gsl_complex_arctan(z);
     }
 
-  return z;
+    return z;
 }
 
 /**********************************************************************
  * Complex Hyperbolic Functions
  **********************************************************************/
 
-gsl_complex
-gsl_complex_sinh (gsl_complex a)
-{				/* z = sinh(a) */
-  double R = GSL_REAL (a), I = GSL_IMAG (a);
+gsl_complex gsl_complex_sinh(gsl_complex a)
+{ /* z = sinh(a) */
+    double R = GSL_REAL(a), I = GSL_IMAG(a);
 
-  gsl_complex z;
-  GSL_SET_COMPLEX (&z, sinh (R) * cos (I), cosh (R) * sin (I));
-  return z;
+    gsl_complex z;
+    GSL_SET_COMPLEX(&z, sinh(R) * cos(I), cosh(R) * sin(I));
+    return z;
 }
 
-gsl_complex
-gsl_complex_cosh (gsl_complex a)
-{				/* z = cosh(a) */
-  double R = GSL_REAL (a), I = GSL_IMAG (a);
+gsl_complex gsl_complex_cosh(gsl_complex a)
+{ /* z = cosh(a) */
+    double R = GSL_REAL(a), I = GSL_IMAG(a);
 
-  gsl_complex z;
-  GSL_SET_COMPLEX (&z, cosh (R) * cos (I), sinh (R) * sin (I));
-  return z;
+    gsl_complex z;
+    GSL_SET_COMPLEX(&z, cosh(R) * cos(I), sinh(R) * sin(I));
+    return z;
 }
 
-gsl_complex
-gsl_complex_tanh (gsl_complex a)
-{				/* z = tanh(a) */
-  double R = GSL_REAL (a), I = GSL_IMAG (a);
+gsl_complex gsl_complex_tanh(gsl_complex a)
+{ /* z = tanh(a) */
+    double R = GSL_REAL(a), I = GSL_IMAG(a);
 
-  gsl_complex z;
+    gsl_complex z;
 
-  if (fabs(R) < 1.0) 
+    if (fabs(R) < 1.0)
     {
-      double D = pow (cos (I), 2.0) + pow (sinh (R), 2.0);
-      
-      GSL_SET_COMPLEX (&z, sinh (R) * cosh (R) / D, 0.5 * sin (2 * I) / D);
+        double D = pow(cos(I), 2.0) + pow(sinh(R), 2.0);
+
+        GSL_SET_COMPLEX(&z, sinh(R) * cosh(R) / D, 0.5 * sin(2 * I) / D);
     }
-  else
+    else
     {
-      double D = pow (cos (I), 2.0) + pow (sinh (R), 2.0);
-      double F = 1 + pow (cos (I) / sinh (R), 2.0);
+        double D = pow(cos(I), 2.0) + pow(sinh(R), 2.0);
+        double F = 1 + pow(cos(I) / sinh(R), 2.0);
 
-      GSL_SET_COMPLEX (&z, 1.0 / (tanh (R) * F), 0.5 * sin (2 * I) / D);
+        GSL_SET_COMPLEX(&z, 1.0 / (tanh(R) * F), 0.5 * sin(2 * I) / D);
     }
 
-  return z;
+    return z;
 }
 
-gsl_complex
-gsl_complex_sech (gsl_complex a)
-{				/* z = sech(a) */
-  gsl_complex z = gsl_complex_cosh (a);
-  return gsl_complex_inverse (z);
+gsl_complex gsl_complex_sech(gsl_complex a)
+{ /* z = sech(a) */
+    gsl_complex z = gsl_complex_cosh(a);
+    return gsl_complex_inverse(z);
 }
 
-gsl_complex
-gsl_complex_csch (gsl_complex a)
-{				/* z = csch(a) */
-  gsl_complex z = gsl_complex_sinh (a);
-  return gsl_complex_inverse (z);
+gsl_complex gsl_complex_csch(gsl_complex a)
+{ /* z = csch(a) */
+    gsl_complex z = gsl_complex_sinh(a);
+    return gsl_complex_inverse(z);
 }
 
-gsl_complex
-gsl_complex_coth (gsl_complex a)
-{				/* z = coth(a) */
-  gsl_complex z = gsl_complex_tanh (a);
-  return gsl_complex_inverse (z);
+gsl_complex gsl_complex_coth(gsl_complex a)
+{ /* z = coth(a) */
+    gsl_complex z = gsl_complex_tanh(a);
+    return gsl_complex_inverse(z);
 }
 
 /**********************************************************************
  * Inverse Complex Hyperbolic Functions
  **********************************************************************/
 
-gsl_complex
-gsl_complex_arcsinh (gsl_complex a)
-{				/* z = arcsinh(a) */
-  gsl_complex z = gsl_complex_mul_imag(a, 1.0);
-  z = gsl_complex_arcsin (z);
-  z = gsl_complex_mul_imag (z, -1.0);
-  return z;
+gsl_complex gsl_complex_arcsinh(gsl_complex a)
+{ /* z = arcsinh(a) */
+    gsl_complex z = gsl_complex_mul_imag(a, 1.0);
+    z = gsl_complex_arcsin(z);
+    z = gsl_complex_mul_imag(z, -1.0);
+    return z;
 }
 
-gsl_complex
-gsl_complex_arccosh (gsl_complex a)
-{				/* z = arccosh(a) */
-  gsl_complex z = gsl_complex_arccos (a);
-  z = gsl_complex_mul_imag (z, GSL_IMAG(z) > 0 ? -1.0 : 1.0);
-  return z;
+gsl_complex gsl_complex_arccosh(gsl_complex a)
+{ /* z = arccosh(a) */
+    gsl_complex z = gsl_complex_arccos(a);
+    z = gsl_complex_mul_imag(z, GSL_IMAG(z) > 0 ? -1.0 : 1.0);
+    return z;
 }
 
-gsl_complex
-gsl_complex_arccosh_real (double a)
-{				/* z = arccosh(a) */
-  gsl_complex z;
+gsl_complex gsl_complex_arccosh_real(double a)
+{ /* z = arccosh(a) */
+    gsl_complex z;
 
-  if (a >= 1)
+    if (a >= 1)
     {
-      GSL_SET_COMPLEX (&z, acosh (a), 0);
+        GSL_SET_COMPLEX(&z, acosh(a), 0);
     }
-  else
+    else
     {
-      if (a >= -1.0)
-	{
-	  GSL_SET_COMPLEX (&z, 0, acos (a));
-	}
-      else
-	{
-	  GSL_SET_COMPLEX (&z, acosh (-a), M_PI);
-	}
+        if (a >= -1.0)
+        {
+            GSL_SET_COMPLEX(&z, 0, acos(a));
+        }
+        else
+        {
+            GSL_SET_COMPLEX(&z, acosh(-a), M_PI);
+        }
     }
 
-  return z;
+    return z;
 }
 
-gsl_complex
-gsl_complex_arctanh (gsl_complex a)
-{				/* z = arctanh(a) */
-  if (GSL_IMAG (a) == 0.0)
+gsl_complex gsl_complex_arctanh(gsl_complex a)
+{ /* z = arctanh(a) */
+    if (GSL_IMAG(a) == 0.0)
     {
-      return gsl_complex_arctanh_real (GSL_REAL (a));
+        return gsl_complex_arctanh_real(GSL_REAL(a));
     }
-  else
+    else
     {
-      gsl_complex z = gsl_complex_mul_imag(a, 1.0);
-      z = gsl_complex_arctan (z);
-      z = gsl_complex_mul_imag (z, -1.0);
-      return z;
+        gsl_complex z = gsl_complex_mul_imag(a, 1.0);
+        z = gsl_complex_arctan(z);
+        z = gsl_complex_mul_imag(z, -1.0);
+        return z;
     }
 }
 
-gsl_complex
-gsl_complex_arctanh_real (double a)
-{				/* z = arctanh(a) */
-  gsl_complex z;
+gsl_complex gsl_complex_arctanh_real(double a)
+{ /* z = arctanh(a) */
+    gsl_complex z;
 
-  if (a > -1.0 && a < 1.0)
+    if (a > -1.0 && a < 1.0)
     {
-      GSL_SET_COMPLEX (&z, atanh (a), 0);
+        GSL_SET_COMPLEX(&z, atanh(a), 0);
     }
-  else
+    else
     {
-      GSL_SET_COMPLEX (&z, atanh (1 / a), (a < 0) ? M_PI_2 : -M_PI_2);
+        GSL_SET_COMPLEX(&z, atanh(1 / a), (a < 0) ? M_PI_2 : -M_PI_2);
     }
 
-  return z;
+    return z;
 }
 
-gsl_complex
-gsl_complex_arcsech (gsl_complex a)
-{				/* z = arcsech(a); */
-  gsl_complex t = gsl_complex_inverse (a);
-  return gsl_complex_arccosh (t);
+gsl_complex gsl_complex_arcsech(gsl_complex a)
+{ /* z = arcsech(a); */
+    gsl_complex t = gsl_complex_inverse(a);
+    return gsl_complex_arccosh(t);
 }
 
-gsl_complex
-gsl_complex_arccsch (gsl_complex a)
-{				/* z = arccsch(a) */
-  gsl_complex t = gsl_complex_inverse (a);
-  return gsl_complex_arcsinh (t);
+gsl_complex gsl_complex_arccsch(gsl_complex a)
+{ /* z = arccsch(a) */
+    gsl_complex t = gsl_complex_inverse(a);
+    return gsl_complex_arcsinh(t);
 }
 
-gsl_complex
-gsl_complex_arccoth (gsl_complex a)
-{				/* z = arccoth(a) */
-  gsl_complex t = gsl_complex_inverse (a);
-  return gsl_complex_arctanh (t);
+gsl_complex gsl_complex_arccoth(gsl_complex a)
+{ /* z = arccoth(a) */
+    gsl_complex t = gsl_complex_inverse(a);
+    return gsl_complex_arctanh(t);
 }
diff --git a/src/parser/PARSER_symbols.c b/src/parser/PARSER_symbols.c
index 6c7104578b..8ad36146f8 100644
--- a/src/parser/PARSER_symbols.c
+++ b/src/parser/PARSER_symbols.c
@@ -17,13 +17,14 @@
  02111-1307, USA.
 */
 
-#include <string.h>
-#include <strings.h>
-#include <stdio.h>
-#include <stdlib.h>
 #include <ctype.h>
-#include <math.h>
 #include <gsl_complex_math.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+
 #include "symbols.h"
 
 /* The symbol table: a chain of `struct symrec'.  */
@@ -31,195 +32,226 @@ symrec *sym_table = (symrec *)0;
 
 char *str_tolower(char *in)
 {
-	char *s;
-	for(s=in; *s; s++)
-		*s = tolower(*s);
-	return in;
+    char *s;
+    for (s = in; *s; s++)
+    {
+        *s = tolower(*s);
+    }
+    return in;
 }
 
-symrec *putsym (char *sym_name, symrec_type sym_type)
+symrec *putsym(char *sym_name, symrec_type sym_type)
 {
-	symrec *ptr;
-	ptr = (symrec *)malloc(sizeof(symrec));
-
-	/* names are always lowercase */
-	ptr->name = strdup(sym_name);
-	str_tolower(ptr->name);
-
-	ptr->type = sym_type;
-	GSL_SET_COMPLEX(&ptr->value.c, 0, 0); /* set value to 0 even if fctn.  */
-	ptr->next = (struct symrec *)sym_table;
-	sym_table = ptr;
-	return ptr;
+    symrec *ptr;
+    ptr = (symrec *)malloc(sizeof(symrec));
+
+    /* names are always lowercase */
+    ptr->name = strdup(sym_name);
+    str_tolower(ptr->name);
+
+    ptr->type = sym_type;
+    GSL_SET_COMPLEX(&ptr->value.c, 0, 0); /* set value to 0 even if fctn.  */
+    ptr->next = (struct symrec *)sym_table;
+    sym_table = ptr;
+    return ptr;
 }
 
-symrec *getsym (char *sym_name)
+symrec *getsym(char *sym_name)
 {
-	symrec *ptr;
-	for (ptr = sym_table; ptr != (symrec *) 0;
-			 ptr = (symrec *)ptr->next)
-		if (strcasecmp(ptr->name,sym_name) == 0)
-			return ptr;
-	return (symrec *) 0;
+    symrec *ptr;
+    for (ptr = sym_table; ptr != (symrec *)0; ptr = (symrec *)ptr->next)
+    {
+        if (strcasecmp(ptr->name, sym_name) == 0)
+        {
+            return ptr;
+        }
+    }
+    return (symrec *)0;
 }
 
-int rmsym (char *sym_name)
+int rmsym(char *sym_name)
 {
-	symrec *ptr, *prev;
-	for (prev = (symrec *) 0, ptr = sym_table; ptr != (symrec *) 0;
-			 prev = ptr, ptr = ptr->next)
-		if (strcasecmp(ptr->name,sym_name) == 0){
-			if(prev == (symrec *) 0)
-				sym_table = ptr->next;
-			else
-				prev->next = ptr->next;
-			free(ptr);
-
-			return 1;
-		}
-
-	return 0;
+    symrec *ptr, *prev;
+    for (prev = (symrec *)0, ptr = sym_table; ptr != (symrec *)0;
+         prev = ptr, ptr = ptr->next)
+    {
+        if (strcasecmp(ptr->name, sym_name) == 0)
+        {
+            if (prev == (symrec *)0)
+            {
+                sym_table = ptr->next;
+            }
+            else
+            {
+                prev->next = ptr->next;
+            }
+            free(ptr);
+
+            return 1;
+        }
+    }
+
+    return 0;
 }
 
-struct init_fntc{
-	char *fname;
-	gsl_complex (*fnct)(gsl_complex);
+struct init_fntc
+{
+    char *fname;
+    gsl_complex (*fnct)(gsl_complex);
 };
 
-static struct init_fntc arith_fncts[] = {
-	{"sqrt",  gsl_complex_sqrt},
-	{"exp",   gsl_complex_exp},
-	{"ln",    gsl_complex_log},
-	{"log",   gsl_complex_log},
-	{"log10", gsl_complex_log10},
-
-	{"sin",  gsl_complex_sin},
-	{"cos",  gsl_complex_cos},
-	{"tan",  gsl_complex_tan},
-	{"sec",  gsl_complex_sec},
-	{"csc",  gsl_complex_csc},
-	{"cot",  gsl_complex_cot},
-
-	{"asin", gsl_complex_arcsin},
-	{"acos", gsl_complex_arccos},
-	{"atan", gsl_complex_arctan},
-	{"asec", gsl_complex_arcsec},
-	{"acsc", gsl_complex_arccsc},
-	{"acot", gsl_complex_arccot},
-
-	{"sinh", gsl_complex_sinh},
-	{"cosh", gsl_complex_cosh},
-	{"tanh", gsl_complex_tanh},
-	{"sech", gsl_complex_sech},
-	{"csch", gsl_complex_csch},
-	{"coth", gsl_complex_coth},
-	
-	{"asinh", gsl_complex_arcsinh},
-	{"acosh", gsl_complex_arccosh},
-	{"atanh", gsl_complex_arctanh},
-	{"asech", gsl_complex_arcsech},
-	{"acsch", gsl_complex_arccsch},
-	{"acoth", gsl_complex_arccoth},	
-
-	{0, 0}
-};
+static struct init_fntc arith_fncts[] = {{"sqrt", gsl_complex_sqrt},
+                                         {"exp", gsl_complex_exp},
+                                         {"ln", gsl_complex_log},
+                                         {"log", gsl_complex_log},
+                                         {"log10", gsl_complex_log10},
 
-struct init_cnst{
-	char *fname;
-	double c;
-};
+                                         {"sin", gsl_complex_sin},
+                                         {"cos", gsl_complex_cos},
+                                         {"tan", gsl_complex_tan},
+                                         {"sec", gsl_complex_sec},
+                                         {"csc", gsl_complex_csc},
+                                         {"cot", gsl_complex_cot},
 
-static struct init_cnst arith_cnts[] = {
-	{"pi",    M_PI}, {"e",     M_E},
-	{"true",  1}, {"t",     1}, {"yes",   1},
-	{"false", 0}, {"f",     0}, {"no",    0},
-	{"sphere", 1}, {"cylinder", 2}, {"minimum", 3}, {"parallelepiped", 4},
-	{"real_space", 0}, {"fourier_space", 1},
-	{0, 0}
-};
+                                         {"asin", gsl_complex_arcsin},
+                                         {"acos", gsl_complex_arccos},
+                                         {"atan", gsl_complex_arctan},
+                                         {"asec", gsl_complex_arcsec},
+                                         {"acsc", gsl_complex_arccsc},
+                                         {"acot", gsl_complex_arccot},
+
+                                         {"sinh", gsl_complex_sinh},
+                                         {"cosh", gsl_complex_cosh},
+                                         {"tanh", gsl_complex_tanh},
+                                         {"sech", gsl_complex_sech},
+                                         {"csch", gsl_complex_csch},
+                                         {"coth", gsl_complex_coth},
+
+                                         {"asinh", gsl_complex_arcsinh},
+                                         {"acosh", gsl_complex_arccosh},
+                                         {"atanh", gsl_complex_arctanh},
+                                         {"asech", gsl_complex_arcsech},
+                                         {"acsch", gsl_complex_arccsch},
+                                         {"acoth", gsl_complex_arccoth},
 
-static char *reserved_symbols[] = {
-	"x", "y", "z", "r", 0
+                                         {0, 0}};
+
+struct init_cnst
+{
+    char *fname;
+    double c;
 };
 
-void sym_init_table ()  /* puts arithmetic functions in table. */
+static struct init_cnst arith_cnts[] = {{"pi", M_PI},
+                                        {"e", M_E},
+                                        {"true", 1},
+                                        {"t", 1},
+                                        {"yes", 1},
+                                        {"false", 0},
+                                        {"f", 0},
+                                        {"no", 0},
+                                        {"sphere", 1},
+                                        {"cylinder", 2},
+                                        {"minimum", 3},
+                                        {"parallelepiped", 4},
+                                        {"real_space", 0},
+                                        {"fourier_space", 1},
+                                        {0, 0}};
+
+static char *reserved_symbols[] = {"x", "y", "z", "r", 0};
+
+void sym_init_table() /* puts arithmetic functions in table. */
 {
-	int i;
-	symrec *ptr;
-	for (i = 0; arith_fncts[i].fname != 0; i++){
-		ptr = putsym (arith_fncts[i].fname, S_FNCT);
-		ptr->value.fnctptr = arith_fncts[i].fnct;
-	}
-
-	/* now the constants */
-	for (i = 0; arith_cnts[i].fname != 0; i++){
-		ptr = putsym(arith_cnts[i].fname, S_CMPLX);
-		GSL_SET_COMPLEX(&ptr->value.c, arith_cnts[i].c, 0);
-	}
+    int i;
+    symrec *ptr;
+    for (i = 0; arith_fncts[i].fname != 0; i++)
+    {
+        ptr = putsym(arith_fncts[i].fname, S_FNCT);
+        ptr->value.fnctptr = arith_fncts[i].fnct;
+    }
+
+    /* now the constants */
+    for (i = 0; arith_cnts[i].fname != 0; i++)
+    {
+        ptr = putsym(arith_cnts[i].fname, S_CMPLX);
+        GSL_SET_COMPLEX(&ptr->value.c, arith_cnts[i].c, 0);
+    }
 }
 
 void sym_clear_reserved()
 {
-	int i;
-	for (i = 0; reserved_symbols[i] != 0; i++){
-		rmsym(reserved_symbols[i]);
-	}
+    int i;
+    for (i = 0; reserved_symbols[i] != 0; i++)
+    {
+        rmsym(reserved_symbols[i]);
+    }
 }
 
 void sym_end_table()
 {
-	symrec *ptr, *ptr2;
-	int l, col;
-
-	for (ptr = sym_table; ptr != NULL;){
-		free(ptr->name);
-		switch(ptr->type){
-		case S_STR:
-			free(ptr->value.str);
-			break;
-		case S_BLOCK:
-			if(ptr->value.block->n > 0){
-				for(l = 0; l < ptr->value.block->n; l++){
-					if(ptr->value.block->lines[l].n > 0){
-						for(col = 0; col < ptr->value.block->lines[l].n; col++)
-							free(ptr->value.block->lines[l].fields[col]);
-						free(ptr->value.block->lines[l].fields);
-					}
-				}
-				free(ptr->value.block->lines);
-			}
-			free(ptr->value.block);
-			break;
-		case S_CMPLX:
-		case S_FNCT:
-			break;
-		}
-		ptr2 = ptr->next;
-		free(ptr);
-		ptr = ptr2;
-	}
-
-	sym_table = NULL;
+    symrec *ptr, *ptr2;
+    int l, col;
+
+    for (ptr = sym_table; ptr != NULL;)
+    {
+        free(ptr->name);
+        switch (ptr->type)
+        {
+            case S_STR:
+                free(ptr->value.str);
+                break;
+            case S_BLOCK:
+                if (ptr->value.block->n > 0)
+                {
+                    for (l = 0; l < ptr->value.block->n; l++)
+                    {
+                        if (ptr->value.block->lines[l].n > 0)
+                        {
+                            for (col = 0; col < ptr->value.block->lines[l].n;
+                                 col++)
+                            {
+                                free(ptr->value.block->lines[l].fields[col]);
+                            }
+                            free(ptr->value.block->lines[l].fields);
+                        }
+                    }
+                    free(ptr->value.block->lines);
+                }
+                free(ptr->value.block);
+                break;
+            case S_CMPLX:
+            case S_FNCT:
+                break;
+        }
+        ptr2 = ptr->next;
+        free(ptr);
+        ptr = ptr2;
+    }
+
+    sym_table = NULL;
 }
 
 void sym_output_table()
 {
-	symrec *ptr;
-	for(ptr = sym_table; ptr != NULL; ptr = ptr->next){
-		printf("%s", ptr->name);
-		switch(ptr->type){
-		case S_CMPLX:
-			printf(" = (%lf,%lf)\n", GSL_REAL(ptr->value.c), GSL_IMAG(ptr->value.c));
-			break;
-		case S_STR:
-			printf(" = \"%s\"\n", ptr->value.str);
-			break;
-		case S_BLOCK:
-			printf("%s\n", " <= BLOCK");
-			break;
-		case S_FNCT:
-			printf("%s\n", " <= FUNCTION");
-			break;
-		}
-	}
+    symrec *ptr;
+    for (ptr = sym_table; ptr != NULL; ptr = ptr->next)
+    {
+        printf("%s", ptr->name);
+        switch (ptr->type)
+        {
+            case S_CMPLX:
+                printf(" = (%lf,%lf)\n", GSL_REAL(ptr->value.c),
+                       GSL_IMAG(ptr->value.c));
+                break;
+            case S_STR:
+                printf(" = \"%s\"\n", ptr->value.str);
+                break;
+            case S_BLOCK:
+                printf("%s\n", " <= BLOCK");
+                break;
+            case S_FNCT:
+                printf("%s\n", " <= FUNCTION");
+                break;
+        }
+    }
 }
diff --git a/src/parser/grammar.c b/src/parser/grammar.c
index 7918926f01..760b106315 100644
--- a/src/parser/grammar.c
+++ b/src/parser/grammar.c
@@ -17,26 +17,25 @@
  02111-1307, USA.
 */
 
-
 /*  A Bison parser, made from grammar.y
     by GNU Bison version 1.28  */
 
-#define YYBISON 1  /* Identify Bison output.  */
+#define YYBISON 1 /* Identify Bison output.  */
 
-#define	NUM	257
-#define	STR	258
-#define	VAR	259
-#define	FNCT	260
-#define	NEG	261
+#define NUM 257
+#define STR 258
+#define VAR 259
+#define FNCT 260
+#define NEG 261
 
 #line 2 "grammar.y"
 
-
 #line 5 "grammar.y"
-typedef union {
-	gsl_complex val;  /* For returning numbers.               */
-	char *str;        /* For strings                          */
-	symrec  *tptr;    /* For returning symbol-table pointers  */
+typedef union
+{
+    gsl_complex val; /* For returning numbers.               */
+    char *str;       /* For strings                          */
+    symrec *tptr;    /* For returning symbol-table pointers  */
 } YYSTYPE;
 #include <stdio.h>
 
@@ -46,139 +45,86 @@ typedef union {
 #endif
 #endif
 
-
-
-#define	YYFINAL		41
-#define	YYFLAG		-32768
-#define	YYNTBASE	18
+#define YYFINAL 41
+#define YYFLAG -32768
+#define YYNTBASE 18
 
 #define YYTRANSLATE(x) ((unsigned)(x) <= 261 ? yytranslate[x] : 22)
 
-static const char yytranslate[] = {     0,
-     2,     2,     2,     2,     2,     2,     2,     2,     2,    14,
-     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-     2,     2,     2,     2,     2,     2,     2,     2,     2,    15,
-    16,    10,     9,    17,     8,     2,    11,     2,     2,     2,
-     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-     7,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-     2,     2,     2,    13,     2,     2,     2,     2,     2,     2,
-     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-     2,     2,     2,     2,     2,     2,     2,     2,     2,     2,
-     2,     2,     2,     2,     2,     1,     3,     4,     5,     6,
-    12
-};
+static const char yytranslate[] = {
+    0,  2, 2, 2,  2, 2, 2,  2, 2, 2, 14, 2, 2, 2, 2, 2, 2, 2, 2,  2,  2,  2,
+    2,  2, 2, 2,  2, 2, 2,  2, 2, 2, 2,  2, 2, 2, 2, 2, 2, 2, 15, 16, 10, 9,
+    17, 8, 2, 11, 2, 2, 2,  2, 2, 2, 2,  2, 2, 2, 2, 2, 2, 7, 2,  2,  2,  2,
+    2,  2, 2, 2,  2, 2, 2,  2, 2, 2, 2,  2, 2, 2, 2, 2, 2, 2, 2,  2,  2,  2,
+    2,  2, 2, 2,  2, 2, 13, 2, 2, 2, 2,  2, 2, 2, 2, 2, 2, 2, 2,  2,  2,  2,
+    2,  2, 2, 2,  2, 2, 2,  2, 2, 2, 2,  2, 2, 2, 2, 2, 2, 2, 2,  2,  2,  2,
+    2,  2, 2, 2,  2, 2, 2,  2, 2, 2, 2,  2, 2, 2, 2, 2, 2, 2, 2,  2,  2,  2,
+    2,  2, 2, 2,  2, 2, 2,  2, 2, 2, 2,  2, 2, 2, 2, 2, 2, 2, 2,  2,  2,  2,
+    2,  2, 2, 2,  2, 2, 2,  2, 2, 2, 2,  2, 2, 2, 2, 2, 2, 2, 2,  2,  2,  2,
+    2,  2, 2, 2,  2, 2, 2,  2, 2, 2, 2,  2, 2, 2, 2, 2, 2, 2, 2,  2,  2,  2,
+    2,  2, 2, 2,  2, 2, 2,  2, 2, 2, 2,  2, 2, 2, 2, 2, 2, 2, 2,  2,  2,  2,
+    2,  2, 2, 2,  2, 2, 2,  2, 2, 2, 2,  2, 2, 2, 1, 3, 4, 5, 6,  12};
 
 #if YYDEBUG != 0
-static const short yyprhs[] = {     0,
-     0,     1,     4,     6,     9,    12,    15,    17,    19,    23,
-    28,    32,    36,    40,    44,    47,    51,    57,    61,    63
-};
+static const short yyprhs[] = {0,  0,  1,  4,  6,  9,  12, 15, 17, 19, 23,
+                               28, 32, 36, 40, 44, 47, 51, 57, 61, 63};
 
-static const short yyrhs[] = {    -1,
-    18,    19,     0,    14,     0,    20,    14,     0,    21,    14,
-     0,     1,    14,     0,     3,     0,     5,     0,     5,     7,
-    20,     0,     6,    15,    20,    16,     0,    20,     9,    20,
-     0,    20,     8,    20,     0,    20,    10,    20,     0,    20,
-    11,    20,     0,     8,    20,     0,    20,    13,    20,     0,
-    15,    20,    17,    20,    16,     0,    15,    20,    16,     0,
-     4,     0,     5,     7,     4,     0
-};
+static const short yyrhs[] = {
+    -1, 18, 19, 0,  14, 0, 20, 14, 0,  21, 14, 0,  1, 14, 0,  3,  0,
+    5,  0,  5,  7,  20, 0, 6,  15, 20, 16, 0,  20, 9, 20, 0,  20, 8,
+    20, 0,  20, 10, 20, 0, 20, 11, 20, 0,  8,  20, 0, 20, 13, 20, 0,
+    15, 20, 17, 20, 16, 0, 15, 20, 16, 0,  4,  0,  5, 7,  4,  0};
 
 #endif
 
 #if YYDEBUG != 0
-static const short yyrline[] = { 0,
-    27,    28,    31,    33,    34,    35,    38,    39,    40,    41,
-    42,    43,    44,    45,    46,    47,    48,    49,    51,    52
-};
-#endif
-
-
-#if YYDEBUG != 0 || defined (YYERROR_VERBOSE)
-
-static const char * const yytname[] = {   "$","error","$undefined.","NUM","STR",
-"VAR","FNCT","'='","'-'","'+'","'*'","'/'","NEG","'^'","'\\n'","'('","')'","','",
-"input","line","exp","string", NULL
-};
-#endif
-
-static const short yyr1[] = {     0,
-    18,    18,    19,    19,    19,    19,    20,    20,    20,    20,
-    20,    20,    20,    20,    20,    20,    20,    20,    21,    21
-};
-
-static const short yyr2[] = {     0,
-     0,     2,     1,     2,     2,     2,     1,     1,     3,     4,
-     3,     3,     3,     3,     2,     3,     5,     3,     1,     3
-};
-
-static const short yydefact[] = {     1,
-     0,     0,     7,    19,     8,     0,     0,     3,     0,     2,
-     0,     0,     6,     0,     0,     8,    15,     0,     0,     0,
-     0,     0,     0,     4,     5,    20,     9,     0,     0,    18,
-     0,    12,    11,    13,    14,    16,    10,     0,    17,     0,
-     0
-};
-
-static const short yydefgoto[] = {     1,
-    10,    27,    12
-};
-
-static const short yypact[] = {-32768,
-    23,   -13,-32768,-32768,     9,    10,    40,-32768,    40,-32768,
-    66,    -5,-32768,    36,    40,    25,     4,    -6,    40,    40,
-    40,    40,    40,-32768,-32768,-32768,    73,    48,    40,-32768,
-    40,     2,     2,     4,     4,     4,-32768,    57,-32768,    33,
--32768
-};
-
-static const short yypgoto[] = {-32768,
--32768,    -1,-32768
-};
-
-
-#define	YYLAST		86
-
-
-static const short yytable[] = {    11,
-    13,    19,    20,    21,    22,    17,    23,    18,    25,    30,
-    31,    21,    22,    28,    23,    14,    23,    32,    33,    34,
-    35,    36,    40,     2,    15,     3,     4,     5,     6,    38,
-     7,    29,    41,     0,     0,     0,     8,     9,     3,    26,
-    16,     6,     3,     7,    16,     6,     0,     7,     0,     0,
-     9,     0,     0,     0,     9,    19,    20,    21,    22,     0,
-    23,     0,     0,    37,    19,    20,    21,    22,     0,    23,
-     0,     0,    39,    19,    20,    21,    22,     0,    23,    24,
-    19,    20,    21,    22,     0,    23
-};
-
-static const short yycheck[] = {     1,
-    14,     8,     9,    10,    11,     7,    13,     9,    14,    16,
-    17,    10,    11,    15,    13,     7,    13,    19,    20,    21,
-    22,    23,     0,     1,    15,     3,     4,     5,     6,    31,
-     8,     7,     0,    -1,    -1,    -1,    14,    15,     3,     4,
-     5,     6,     3,     8,     5,     6,    -1,     8,    -1,    -1,
-    15,    -1,    -1,    -1,    15,     8,     9,    10,    11,    -1,
-    13,    -1,    -1,    16,     8,     9,    10,    11,    -1,    13,
-    -1,    -1,    16,     8,     9,    10,    11,    -1,    13,    14,
-     8,     9,    10,    11,    -1,    13
-};
+static const short yyrline[] = {0,  27, 28, 31, 33, 34, 35, 38, 39, 40, 41,
+                                42, 43, 44, 45, 46, 47, 48, 49, 51, 52};
+#endif
+
+#if YYDEBUG != 0 || defined(YYERROR_VERBOSE)
+
+static const char *const yytname[] = {
+    "$",   "error", "$undefined.", "NUM",  "STR", "VAR",    "FNCT",  "'='",
+    "'-'", "'+'",   "'*'",         "'/'",  "NEG", "'^'",    "'\\n'", "'('",
+    "')'", "','",   "input",       "line", "exp", "string", NULL};
+#endif
+
+static const short yyr1[] = {0,  18, 18, 19, 19, 19, 19, 20, 20, 20, 20,
+                             20, 20, 20, 20, 20, 20, 20, 20, 21, 21};
+
+static const short yyr2[] = {0, 0, 2, 1, 2, 2, 2, 1, 1, 3, 4,
+                             3, 3, 3, 3, 2, 3, 5, 3, 1, 3};
+
+static const short yydefact[] = {
+    1, 0, 0, 7, 19, 8,  0, 0, 3, 0,  2, 0,  0,  6,  0,  0,  8,  15, 0,  0, 0,
+    0, 0, 0, 4, 5,  20, 9, 0, 0, 18, 0, 12, 11, 13, 14, 16, 10, 0,  17, 0, 0};
+
+static const short yydefgoto[] = {1, 10, 27, 12};
+
+static const short yypact[] = {
+    -32768, 23, -13,    -32768, -32768, 9,  10,     40, -32768, 40, -32768,
+    66,     -5, -32768, 36,     40,     25, 4,      -6, 40,     40, 40,
+    40,     40, -32768, -32768, -32768, 73, 48,     40, -32768, 40, 2,
+    2,      4,  4,      4,      -32768, 57, -32768, 33, -32768};
+
+static const short yypgoto[] = {-32768, -32768, -1, -32768};
+
+#define YYLAST 86
+
+static const short yytable[] = {
+    11, 13, 19, 20, 21, 22, 17, 23, 18, 25, 30, 31, 21, 22, 28, 23, 14, 23,
+    32, 33, 34, 35, 36, 40, 2,  15, 3,  4,  5,  6,  38, 7,  29, 41, 0,  0,
+    0,  8,  9,  3,  26, 16, 6,  3,  7,  16, 6,  0,  7,  0,  0,  9,  0,  0,
+    0,  9,  19, 20, 21, 22, 0,  23, 0,  0,  37, 19, 20, 21, 22, 0,  23, 0,
+    0,  39, 19, 20, 21, 22, 0,  23, 24, 19, 20, 21, 22, 0,  23};
+
+static const short yycheck[] = {
+    1,  14, 8,  9,  10, 11, 7,  13, 9,  14, 16, 17, 10, 11, 15, 13, 7,  13,
+    19, 20, 21, 22, 23, 0,  1,  15, 3,  4,  5,  6,  31, 8,  7,  0,  -1, -1,
+    -1, 14, 15, 3,  4,  5,  6,  3,  8,  5,  6,  -1, 8,  -1, -1, 15, -1, -1,
+    -1, 15, 8,  9,  10, 11, -1, 13, -1, -1, 16, 8,  9,  10, 11, -1, 13, -1,
+    -1, 16, 8,  9,  10, 11, -1, 13, 14, 8,  9,  10, 11, -1, 13};
 /* -*-C-*-  Note some compilers choke on comments on `#line' lines.  */
 #line 3 "/usr/lib/bison.simple"
 /* This file comes from bison-1.28.  */
@@ -219,16 +165,17 @@ static const short yycheck[] = {     1,
 #define YYSTACK_USE_ALLOCA
 #define alloca __builtin_alloca
 #else /* not GNU C.  */
-#if (!defined (__STDC__) && defined (sparc)) || defined (__sparc__) || defined (__sparc) || defined (__sgi) || (defined (__sun) && defined (__i386))
+#if (!defined(__STDC__) && defined(sparc)) || defined(__sparc__) || \
+    defined(__sparc) || defined(__sgi) || (defined(__sun) && defined(__i386))
 #define YYSTACK_USE_ALLOCA
 #include <alloca.h>
 #else /* not sparc */
 /* We think this test detects Watcom and Microsoft C.  */
 /* This used to test MSDOS, but that is a bad idea
    since that symbol is in the user namespace.  */
-#if (defined (_MSDOS) || defined (_MSDOS_)) && !defined (__TURBOC__)
-#if 0 /* No need for malloc.h, which pollutes the namespace;
-	 instead, just don't use alloca.  */
+#if (defined(_MSDOS) || defined(_MSDOS_)) && !defined(__TURBOC__)
+#if 0 /* No need for malloc.h, which pollutes the namespace; \
+         instead, just don't use alloca.  */
 #include <malloc.h>
 #endif
 #else /* not MSDOS, or __TURBOC__ */
@@ -236,12 +183,12 @@ static const short yycheck[] = {     1,
 /* I don't know what this was needed for, but it pollutes the namespace.
    So I turned it off.   rms, 2 May 1997.  */
 /* #include <malloc.h>  */
- #pragma alloca
+#pragma alloca
 #define YYSTACK_USE_ALLOCA
 #else /* not MSDOS, or __TURBOC__, or _AIX */
 #if 0
-#ifdef __hpux /* haible@ilog.fr says this works for HPUX 9.05 and up,
-		 and on HPUX 10.  Eventually we can turn this on.  */
+#ifdef __hpux /* haible@ilog.fr says this works for HPUX 9.05 and up, \
+                 and on HPUX 10.  Eventually we can turn this on.  */
 #define YYSTACK_USE_ALLOCA
 #define alloca __builtin_alloca
 #endif /* __hpux */
@@ -263,50 +210,54 @@ static const short yycheck[] = {     1,
    It is replaced by the list of actions, each action
    as one case of the switch.  */
 
-#define yyerrok		(yyerrstatus = 0)
-#define yyclearin	(yychar = YYEMPTY)
-#define YYEMPTY		-2
-#define YYEOF		0
-#define YYACCEPT	goto yyacceptlab
-#define YYABORT 	goto yyabortlab
-#define YYERROR		goto yyerrlab1
+#define yyerrok (yyerrstatus = 0)
+#define yyclearin (yychar = YYEMPTY)
+#define YYEMPTY -2
+#define YYEOF 0
+#define YYACCEPT goto yyacceptlab
+#define YYABORT goto yyabortlab
+#define YYERROR goto yyerrlab1
 /* Like YYERROR except do call yyerror.
    This remains here temporarily to ease the
    transition to the new meaning of YYERROR, for GCC.
    Once GCC version 2 has supplanted version 1, this can go.  */
-#define YYFAIL		goto yyerrlab
-#define YYRECOVERING()  (!!yyerrstatus)
-#define YYBACKUP(token, value) \
-do								\
-  if (yychar == YYEMPTY && yylen == 1)				\
-    { yychar = (token), yylval = (value);			\
-      yychar1 = YYTRANSLATE (yychar);				\
-      YYPOPSTACK;						\
-      goto yybackup;						\
-    }								\
-  else								\
-    { yyerror ("syntax error: cannot back up"); YYERROR; }	\
-while (0)
-
-#define YYTERROR	1
-#define YYERRCODE	256
+#define YYFAIL goto yyerrlab
+#define YYRECOVERING() (!!yyerrstatus)
+#define YYBACKUP(token, value)                       \
+    do                                               \
+        if (yychar == YYEMPTY && yylen == 1)         \
+        {                                            \
+            yychar = (token), yylval = (value);      \
+            yychar1 = YYTRANSLATE(yychar);           \
+            YYPOPSTACK;                              \
+            goto yybackup;                           \
+        }                                            \
+        else                                         \
+        {                                            \
+            yyerror("syntax error: cannot back up"); \
+            YYERROR;                                 \
+        }                                            \
+    while (0)
+
+#define YYTERROR 1
+#define YYERRCODE 256
 
 #ifndef YYPURE
-#define YYLEX		yylex()
+#define YYLEX yylex()
 #endif
 
 #ifdef YYPURE
 #ifdef YYLSP_NEEDED
 #ifdef YYLEX_PARAM
-#define YYLEX		yylex(&yylval, &yylloc, YYLEX_PARAM)
+#define YYLEX yylex(&yylval, &yylloc, YYLEX_PARAM)
 #else
-#define YYLEX		yylex(&yylval, &yylloc)
+#define YYLEX yylex(&yylval, &yylloc)
 #endif
 #else /* not YYLSP_NEEDED */
 #ifdef YYLEX_PARAM
-#define YYLEX		yylex(&yylval, YYLEX_PARAM)
+#define YYLEX yylex(&yylval, YYLEX_PARAM)
 #else
-#define YYLEX		yylex(&yylval)
+#define YYLEX yylex(&yylval)
 #endif
 #endif /* not YYLSP_NEEDED */
 #endif
@@ -315,27 +266,27 @@ while (0)
 
 #ifndef YYPURE
 
-int	yychar;			/*  the lookahead symbol		*/
-YYSTYPE	yylval;			/*  the semantic value of the		*/
-				/*  lookahead symbol			*/
+int yychar;     /*  the lookahead symbol		*/
+YYSTYPE yylval; /*  the semantic value of the		*/
+/*  lookahead symbol			*/
 
 #ifdef YYLSP_NEEDED
-YYLTYPE yylloc;			/*  location data for the lookahead	*/
-				/*  symbol				*/
+YYLTYPE yylloc; /*  location data for the lookahead	*/
+/*  symbol				*/
 #endif
 
-int yynerrs;			/*  number of parse errors so far       */
-#endif  /* not YYPURE */
+int yynerrs; /*  number of parse errors so far       */
+#endif       /* not YYPURE */
 
 #if YYDEBUG != 0
-int yydebug;			/*  nonzero means print parse trace	*/
+int yydebug; /*  nonzero means print parse trace	*/
 /* Since this is uninitialized, it does not stop multiple parsers
    from coexisting.  */
 #endif
 
 /*  YYINITDEPTH indicates the initial size of the parser's stacks	*/
 
-#ifndef	YYINITDEPTH
+#ifndef YYINITDEPTH
 #define YYINITDEPTH 200
 #endif
 
@@ -349,51 +300,52 @@ int yydebug;			/*  nonzero means print parse trace	*/
 #ifndef YYMAXDEPTH
 #define YYMAXDEPTH 10000
 #endif
-
+
 /* Define __yy_memcpy.  Note that the size argument
    should be passed with type unsigned int, because that is what the non-GCC
    definitions require.  With GCC, __builtin_memcpy takes an arg
    of type size_t, but it can handle unsigned int.  */
 
-#if __GNUC__ > 1		/* GNU C and GNU C++ define this.  */
-#define __yy_memcpy(TO,FROM,COUNT)	__builtin_memcpy(TO,FROM,COUNT)
-#else				/* not GNU C or C++ */
+#if __GNUC__ > 1 /* GNU C and GNU C++ define this.  */
+#define __yy_memcpy(TO, FROM, COUNT) __builtin_memcpy(TO, FROM, COUNT)
+#else /* not GNU C or C++ */
 #ifndef __cplusplus
 
 /* This is the most reliable way to avoid incompatibilities
    in available built-in functions on various systems.  */
-static void
-__yy_memcpy (to, from, count)
-     char *to;
-     char *from;
-     unsigned int count;
+static void __yy_memcpy(to, from, count) char *to;
+char *from;
+unsigned int count;
 {
-  register char *f = from;
-  register char *t = to;
-  register int i = count;
+    register char *f = from;
+    register char *t = to;
+    register int i = count;
 
-  while (i-- > 0)
-    *t++ = *f++;
+    while (i-- > 0)
+    {
+        *t++ = *f++;
+    }
 }
 
 #else /* __cplusplus */
 
 /* This is the most reliable way to avoid incompatibilities
    in available built-in functions on various systems.  */
-static void
-__yy_memcpy (char *to, char *from, unsigned int count)
+static void __yy_memcpy(char *to, char *from, unsigned int count)
 {
-  register char *t = to;
-  register char *f = from;
-  register int i = count;
+    register char *t = to;
+    register char *f = from;
+    register int i = count;
 
-  while (i-- > 0)
-    *t++ = *f++;
+    while (i-- > 0)
+    {
+        *t++ = *f++;
+    }
 }
 
 #endif
 #endif
-
+
 #line 217 "/usr/lib/bison.simple"
 
 /* The user can define YYPARSE_PARAM as the name of an argument to be passed
@@ -410,7 +362,7 @@ __yy_memcpy (char *to, char *from, unsigned int count)
 #define YYPARSE_PARAM_ARG YYPARSE_PARAM
 #define YYPARSE_PARAM_DECL void *YYPARSE_PARAM;
 #endif /* not __cplusplus */
-#else /* not YYPARSE_PARAM */
+#else  /* not YYPARSE_PARAM */
 #define YYPARSE_PARAM_ARG
 #define YYPARSE_PARAM_DECL
 #endif /* not YYPARSE_PARAM */
@@ -418,76 +370,79 @@ __yy_memcpy (char *to, char *from, unsigned int count)
 /* Prevent warning if -Wstrict-prototypes.  */
 #ifdef __GNUC__
 #ifdef YYPARSE_PARAM
-int yyparse (void *);
+int yyparse(void *);
 #else
-int yyparse (void);
+int yyparse(void);
 #endif
 #endif
 
-int
-yyparse(YYPARSE_PARAM_ARG)
-     YYPARSE_PARAM_DECL
+int yyparse(YYPARSE_PARAM_ARG) YYPARSE_PARAM_DECL
 {
-  register int yystate;
-  register int yyn;
-  register short *yyssp;
-  register YYSTYPE *yyvsp;
-  int yyerrstatus;	/*  number of tokens to shift before error messages enabled */
-  int yychar1 = 0;		/*  lookahead token as an internal (translated) token number */
-
-  short	yyssa[YYINITDEPTH];	/*  the state stack			*/
-  YYSTYPE yyvsa[YYINITDEPTH];	/*  the semantic value stack		*/
-
-  short *yyss = yyssa;		/*  refer to the stacks thru separate pointers */
-  YYSTYPE *yyvs = yyvsa;	/*  to allow yyoverflow to reallocate them elsewhere */
+    register int yystate;
+    register int yyn;
+    register short *yyssp;
+    register YYSTYPE *yyvsp;
+    int yyerrstatus; /*  number of tokens to shift before error messages enabled
+                      */
+    int yychar1 =
+        0; /*  lookahead token as an internal (translated) token number */
+
+    short yyssa[YYINITDEPTH];   /*  the state stack			*/
+    YYSTYPE yyvsa[YYINITDEPTH]; /*  the semantic value stack		*/
+
+    short *yyss = yyssa; /*  refer to the stacks thru separate pointers */
+    YYSTYPE *yyvs =
+        yyvsa; /*  to allow yyoverflow to reallocate them elsewhere */
 
 #ifdef YYLSP_NEEDED
-  YYLTYPE yylsa[YYINITDEPTH];	/*  the location stack			*/
-  YYLTYPE *yyls = yylsa;
-  YYLTYPE *yylsp;
+    YYLTYPE yylsa[YYINITDEPTH]; /*  the location stack			*/
+    YYLTYPE *yyls = yylsa;
+    YYLTYPE *yylsp;
 
-#define YYPOPSTACK   (yyvsp--, yyssp--, yylsp--)
+#define YYPOPSTACK (yyvsp--, yyssp--, yylsp--)
 #else
-#define YYPOPSTACK   (yyvsp--, yyssp--)
+#define YYPOPSTACK (yyvsp--, yyssp--)
 #endif
 
-  int yystacksize = YYINITDEPTH;
-  int yyfree_stacks = 0;
+    int yystacksize = YYINITDEPTH;
+    int yyfree_stacks = 0;
 
 #ifdef YYPURE
-  int yychar;
-  YYSTYPE yylval;
-  int yynerrs;
+    int yychar;
+    YYSTYPE yylval;
+    int yynerrs;
 #ifdef YYLSP_NEEDED
-  YYLTYPE yylloc;
+    YYLTYPE yylloc;
 #endif
 #endif
 
-  YYSTYPE yyval;		/*  the variable used to return		*/
-				/*  semantic values from the action	*/
-				/*  routines				*/
+    YYSTYPE yyval; /*  the variable used to return		*/
+                   /*  semantic values from the action	*/
+                   /*  routines				*/
 
-  int yylen;
+    int yylen;
 
 #if YYDEBUG != 0
-  if (yydebug)
-    fprintf(stderr, "Starting parse\n");
+    if (yydebug)
+    {
+        fprintf(stderr, "Starting parse\n");
+    }
 #endif
 
-  yystate = 0;
-  yyerrstatus = 0;
-  yynerrs = 0;
-  yychar = YYEMPTY;		/* Cause a token to be read.  */
+    yystate = 0;
+    yyerrstatus = 0;
+    yynerrs = 0;
+    yychar = YYEMPTY; /* Cause a token to be read.  */
 
-  /* Initialize stack pointers.
-     Waste one element of value and location stack
-     so that they stay on the same level as the state stack.
-     The wasted elements are never initialized.  */
+    /* Initialize stack pointers.
+       Waste one element of value and location stack
+       so that they stay on the same level as the state stack.
+       The wasted elements are never initialized.  */
 
-  yyssp = yyss - 1;
-  yyvsp = yyvs;
+    yyssp = yyss - 1;
+    yyvsp = yyvs;
 #ifdef YYLSP_NEEDED
-  yylsp = yyls;
+    yylsp = yyls;
 #endif
 
 /* Push a new state, which is found in  yystate  .  */
@@ -495,434 +450,547 @@ yyparse(YYPARSE_PARAM_ARG)
    have just been pushed. so pushing a state here evens the stacks.  */
 yynewstate:
 
-  *++yyssp = yystate;
+    *++yyssp = yystate;
 
-  if (yyssp >= yyss + yystacksize - 1)
+    if (yyssp >= yyss + yystacksize - 1)
     {
-      /* Give user a chance to reallocate the stack */
-      /* Use copies of these so that the &'s don't force the real ones into memory. */
-      YYSTYPE *yyvs1 = yyvs;
-      short *yyss1 = yyss;
+        /* Give user a chance to reallocate the stack */
+        /* Use copies of these so that the &'s don't force the real ones into
+         * memory. */
+        YYSTYPE *yyvs1 = yyvs;
+        short *yyss1 = yyss;
 #ifdef YYLSP_NEEDED
-      YYLTYPE *yyls1 = yyls;
+        YYLTYPE *yyls1 = yyls;
 #endif
 
-      /* Get the current used size of the three stacks, in elements.  */
-      int size = yyssp - yyss + 1;
+        /* Get the current used size of the three stacks, in elements.  */
+        int size = yyssp - yyss + 1;
 
 #ifdef yyoverflow
-      /* Each stack pointer address is followed by the size of
-	 the data in use in that stack, in bytes.  */
+        /* Each stack pointer address is followed by the size of
+           the data in use in that stack, in bytes.  */
 #ifdef YYLSP_NEEDED
-      /* This used to be a conditional around just the two extra args,
-	 but that might be undefined if yyoverflow is a macro.  */
-      yyoverflow("parser stack overflow",
-		 &yyss1, size * sizeof (*yyssp),
-		 &yyvs1, size * sizeof (*yyvsp),
-		 &yyls1, size * sizeof (*yylsp),
-		 &yystacksize);
+        /* This used to be a conditional around just the two extra args,
+           but that might be undefined if yyoverflow is a macro.  */
+        yyoverflow("parser stack overflow", &yyss1, size * sizeof(*yyssp),
+                   &yyvs1, size * sizeof(*yyvsp), &yyls1, size * sizeof(*yylsp),
+                   &yystacksize);
 #else
-      yyoverflow("parser stack overflow",
-		 &yyss1, size * sizeof (*yyssp),
-		 &yyvs1, size * sizeof (*yyvsp),
-		 &yystacksize);
+        yyoverflow("parser stack overflow", &yyss1, size * sizeof(*yyssp),
+                   &yyvs1, size * sizeof(*yyvsp), &yystacksize);
 #endif
 
-      yyss = yyss1; yyvs = yyvs1;
+        yyss = yyss1;
+        yyvs = yyvs1;
 #ifdef YYLSP_NEEDED
-      yyls = yyls1;
+        yyls = yyls1;
 #endif
 #else /* no yyoverflow */
-      /* Extend the stack our own way.  */
-      if (yystacksize >= YYMAXDEPTH)
-	{
-	  yyerror("parser stack overflow");
-	  if (yyfree_stacks)
-	    {
-	      free (yyss);
-	      free (yyvs);
+        /* Extend the stack our own way.  */
+        if (yystacksize >= YYMAXDEPTH)
+        {
+            yyerror("parser stack overflow");
+            if (yyfree_stacks)
+            {
+                free(yyss);
+                free(yyvs);
 #ifdef YYLSP_NEEDED
-	      free (yyls);
-#endif
-	    }
-	  return 2;
-	}
-      yystacksize *= 2;
-      if (yystacksize > YYMAXDEPTH)
-	yystacksize = YYMAXDEPTH;
+                free(yyls);
+#endif
+            }
+            return 2;
+        }
+        yystacksize *= 2;
+        if (yystacksize > YYMAXDEPTH)
+        {
+            yystacksize = YYMAXDEPTH;
+        }
 #ifndef YYSTACK_USE_ALLOCA
-      yyfree_stacks = 1;
-#endif
-      yyss = (short *) YYSTACK_ALLOC (yystacksize * sizeof (*yyssp));
-      __yy_memcpy ((char *)yyss, (char *)yyss1,
-		   size * (unsigned int) sizeof (*yyssp));
-      yyvs = (YYSTYPE *) YYSTACK_ALLOC (yystacksize * sizeof (*yyvsp));
-      __yy_memcpy ((char *)yyvs, (char *)yyvs1,
-		   size * (unsigned int) sizeof (*yyvsp));
+        yyfree_stacks = 1;
+#endif
+        yyss = (short *)YYSTACK_ALLOC(yystacksize * sizeof(*yyssp));
+        __yy_memcpy((char *)yyss, (char *)yyss1,
+                    size * (unsigned int)sizeof(*yyssp));
+        yyvs = (YYSTYPE *)YYSTACK_ALLOC(yystacksize * sizeof(*yyvsp));
+        __yy_memcpy((char *)yyvs, (char *)yyvs1,
+                    size * (unsigned int)sizeof(*yyvsp));
 #ifdef YYLSP_NEEDED
-      yyls = (YYLTYPE *) YYSTACK_ALLOC (yystacksize * sizeof (*yylsp));
-      __yy_memcpy ((char *)yyls, (char *)yyls1,
-		   size * (unsigned int) sizeof (*yylsp));
+        yyls = (YYLTYPE *)YYSTACK_ALLOC(yystacksize * sizeof(*yylsp));
+        __yy_memcpy((char *)yyls, (char *)yyls1,
+                    size * (unsigned int)sizeof(*yylsp));
 #endif
 #endif /* no yyoverflow */
 
-      yyssp = yyss + size - 1;
-      yyvsp = yyvs + size - 1;
+        yyssp = yyss + size - 1;
+        yyvsp = yyvs + size - 1;
 #ifdef YYLSP_NEEDED
-      yylsp = yyls + size - 1;
+        yylsp = yyls + size - 1;
 #endif
 
 #if YYDEBUG != 0
-      if (yydebug)
-	fprintf(stderr, "Stack size increased to %d\n", yystacksize);
+        if (yydebug)
+        {
+            fprintf(stderr, "Stack size increased to %d\n", yystacksize);
+        }
 #endif
 
-      if (yyssp >= yyss + yystacksize - 1)
-	YYABORT;
+        if (yyssp >= yyss + yystacksize - 1)
+        {
+            YYABORT;
+        }
     }
 
 #if YYDEBUG != 0
-  if (yydebug)
-    fprintf(stderr, "Entering state %d\n", yystate);
+    if (yydebug)
+    {
+        fprintf(stderr, "Entering state %d\n", yystate);
+    }
 #endif
 
-  goto yybackup;
- yybackup:
+    goto yybackup;
+yybackup:
 
-/* Do appropriate processing given the current state.  */
-/* Read a lookahead token if we need one and don't already have one.  */
-/* yyresume: */
+    /* Do appropriate processing given the current state.  */
+    /* Read a lookahead token if we need one and don't already have one.  */
+    /* yyresume: */
 
-  /* First try to decide what to do without reference to lookahead token.  */
+    /* First try to decide what to do without reference to lookahead token.  */
 
-  yyn = yypact[yystate];
-  if (yyn == YYFLAG)
-    goto yydefault;
+    yyn = yypact[yystate];
+    if (yyn == YYFLAG)
+    {
+        goto yydefault;
+    }
 
-  /* Not known => get a lookahead token if don't already have one.  */
+    /* Not known => get a lookahead token if don't already have one.  */
 
-  /* yychar is either YYEMPTY or YYEOF
-     or a valid token in external form.  */
+    /* yychar is either YYEMPTY or YYEOF
+       or a valid token in external form.  */
 
-  if (yychar == YYEMPTY)
+    if (yychar == YYEMPTY)
     {
 #if YYDEBUG != 0
-      if (yydebug)
-	fprintf(stderr, "Reading a token: ");
+        if (yydebug)
+        {
+            fprintf(stderr, "Reading a token: ");
+        }
 #endif
-      yychar = YYLEX;
+        yychar = YYLEX;
     }
 
-  /* Convert token to internal form (in yychar1) for indexing tables with */
+    /* Convert token to internal form (in yychar1) for indexing tables with */
 
-  if (yychar <= 0)		/* This means end of input. */
+    if (yychar <= 0) /* This means end of input. */
     {
-      yychar1 = 0;
-      yychar = YYEOF;		/* Don't call YYLEX any more */
+        yychar1 = 0;
+        yychar = YYEOF; /* Don't call YYLEX any more */
 
 #if YYDEBUG != 0
-      if (yydebug)
-	fprintf(stderr, "Now at end of input.\n");
+        if (yydebug)
+        {
+            fprintf(stderr, "Now at end of input.\n");
+        }
 #endif
     }
-  else
+    else
     {
-      yychar1 = YYTRANSLATE(yychar);
+        yychar1 = YYTRANSLATE(yychar);
 
 #if YYDEBUG != 0
-      if (yydebug)
-	{
-	  fprintf (stderr, "Next token is %d (%s", yychar, yytname[yychar1]);
-	  /* Give the individual parser a way to print the precise meaning
-	     of a token, for further debugging info.  */
+        if (yydebug)
+        {
+            fprintf(stderr, "Next token is %d (%s", yychar, yytname[yychar1]);
+            /* Give the individual parser a way to print the precise meaning
+               of a token, for further debugging info.  */
 #ifdef YYPRINT
-	  YYPRINT (stderr, yychar, yylval);
+            YYPRINT(stderr, yychar, yylval);
 #endif
-	  fprintf (stderr, ")\n");
-	}
+            fprintf(stderr, ")\n");
+        }
 #endif
     }
 
-  yyn += yychar1;
-  if (yyn < 0 || yyn > YYLAST || yycheck[yyn] != yychar1)
-    goto yydefault;
+    yyn += yychar1;
+    if (yyn < 0 || yyn > YYLAST || yycheck[yyn] != yychar1)
+    {
+        goto yydefault;
+    }
 
-  yyn = yytable[yyn];
+    yyn = yytable[yyn];
 
-  /* yyn is what to do for this token type in this state.
-     Negative => reduce, -yyn is rule number.
-     Positive => shift, yyn is new state.
-       New state is final state => don't bother to shift,
-       just return success.
-     0, or most negative number => error.  */
+    /* yyn is what to do for this token type in this state.
+       Negative => reduce, -yyn is rule number.
+       Positive => shift, yyn is new state.
+         New state is final state => don't bother to shift,
+         just return success.
+       0, or most negative number => error.  */
 
-  if (yyn < 0)
+    if (yyn < 0)
     {
-      if (yyn == YYFLAG)
-	goto yyerrlab;
-      yyn = -yyn;
-      goto yyreduce;
+        if (yyn == YYFLAG)
+        {
+            goto yyerrlab;
+        }
+        yyn = -yyn;
+        goto yyreduce;
+    }
+    else if (yyn == 0)
+    {
+        goto yyerrlab;
     }
-  else if (yyn == 0)
-    goto yyerrlab;
 
-  if (yyn == YYFINAL)
-    YYACCEPT;
+    if (yyn == YYFINAL)
+    {
+        YYACCEPT;
+    }
 
-  /* Shift the lookahead token.  */
+    /* Shift the lookahead token.  */
 
 #if YYDEBUG != 0
-  if (yydebug)
-    fprintf(stderr, "Shifting token %d (%s), ", yychar, yytname[yychar1]);
+    if (yydebug)
+    {
+        fprintf(stderr, "Shifting token %d (%s), ", yychar, yytname[yychar1]);
+    }
 #endif
 
-  /* Discard the token being shifted unless it is eof.  */
-  if (yychar != YYEOF)
-    yychar = YYEMPTY;
+    /* Discard the token being shifted unless it is eof.  */
+    if (yychar != YYEOF)
+    {
+        yychar = YYEMPTY;
+    }
 
-  *++yyvsp = yylval;
+    *++yyvsp = yylval;
 #ifdef YYLSP_NEEDED
-  *++yylsp = yylloc;
+    *++yylsp = yylloc;
 #endif
 
-  /* count tokens shifted since error; after three, turn off error status.  */
-  if (yyerrstatus) yyerrstatus--;
+    /* count tokens shifted since error; after three, turn off error status.  */
+    if (yyerrstatus)
+    {
+        yyerrstatus--;
+    }
 
-  yystate = yyn;
-  goto yynewstate;
+    yystate = yyn;
+    goto yynewstate;
 
 /* Do the default action for the current state.  */
 yydefault:
 
-  yyn = yydefact[yystate];
-  if (yyn == 0)
-    goto yyerrlab;
+    yyn = yydefact[yystate];
+    if (yyn == 0)
+    {
+        goto yyerrlab;
+    }
 
 /* Do a reduction.  yyn is the number of a rule to reduce with.  */
 yyreduce:
-  yylen = yyr2[yyn];
-  if (yylen > 0)
-    yyval = yyvsp[1-yylen]; /* implement default value of the action */
+    yylen = yyr2[yyn];
+    if (yylen > 0)
+    {
+        yyval = yyvsp[1 - yylen]; /* implement default value of the action */
+    }
 
 #if YYDEBUG != 0
-  if (yydebug)
+    if (yydebug)
     {
-      int i;
+        int i;
 
-      fprintf (stderr, "Reducing via rule %d (line %d), ",
-	       yyn, yyrline[yyn]);
+        fprintf(stderr, "Reducing via rule %d (line %d), ", yyn, yyrline[yyn]);
 
-      /* Print the symbols being reduced, and their result.  */
-      for (i = yyprhs[yyn]; yyrhs[i] > 0; i++)
-	fprintf (stderr, "%s ", yytname[yyrhs[i]]);
-      fprintf (stderr, " -> %s\n", yytname[yyr1[yyn]]);
+        /* Print the symbols being reduced, and their result.  */
+        for (i = yyprhs[yyn]; yyrhs[i] > 0; i++)
+        {
+            fprintf(stderr, "%s ", yytname[yyrhs[i]]);
+        }
+        fprintf(stderr, " -> %s\n", yytname[yyr1[yyn]]);
     }
 #endif
 
-
-  switch (yyn) {
-
-case 4:
+    switch (yyn)
+    {
+        case 4:
 #line 33 "grammar.y"
-{ par_res.value.c = yyvsp[-1].val; par_res.type = PR_CMPLX; YYACCEPT;;
-    break;}
-case 5:
+        {
+            par_res.value.c = yyvsp[-1].val;
+            par_res.type = PR_CMPLX;
+            YYACCEPT;
+            ;
+            break;
+        }
+        case 5:
 #line 34 "grammar.y"
-{ par_res.value.s = yyvsp[-1].str; par_res.type = PR_STR;   YYACCEPT;;
-    break;}
-case 6:
+        {
+            par_res.value.s = yyvsp[-1].str;
+            par_res.type = PR_STR;
+            YYACCEPT;
+            ;
+            break;
+        }
+        case 6:
 #line 35 "grammar.y"
-{ yyerrok; YYABORT;;
-    break;}
-case 7:
+        {
+            yyerrok;
+            YYABORT;
+            ;
+            break;
+        }
+        case 7:
 #line 38 "grammar.y"
-{ yyval.val = yyvsp[0].val;                         ;
-    break;}
-case 8:
+        {
+            yyval.val = yyvsp[0].val;
+            ;
+            break;
+        }
+        case 8:
 #line 39 "grammar.y"
-{ yyval.val = yyvsp[0].tptr->value.c;                ;
-    break;}
-case 9:
+        {
+            yyval.val = yyvsp[0].tptr->value.c;
+            ;
+            break;
+        }
+        case 9:
 #line 40 "grammar.y"
-{ yyval.val = yyvsp[0].val; yyvsp[-2].tptr->value.c = yyvsp[0].val; yyvsp[-2].tptr->type = S_CMPLX;;
-    break;}
-case 10:
+        {
+            yyval.val = yyvsp[0].val;
+            yyvsp[-2].tptr->value.c = yyvsp[0].val;
+            yyvsp[-2].tptr->type = S_CMPLX;
+            ;
+            break;
+        }
+        case 10:
 #line 41 "grammar.y"
-{ yyval.val = (*(yyvsp[-3].tptr->value.fnctptr))(yyvsp[-1].val); ;
-    break;}
-case 11:
+        {
+            yyval.val = (*(yyvsp[-3].tptr->value.fnctptr))(yyvsp[-1].val);
+            ;
+            break;
+        }
+        case 11:
 #line 42 "grammar.y"
-{ yyval.val = gsl_complex_add(yyvsp[-2].val, yyvsp[0].val);    ;
-    break;}
-case 12:
+        {
+            yyval.val = gsl_complex_add(yyvsp[-2].val, yyvsp[0].val);
+            ;
+            break;
+        }
+        case 12:
 #line 43 "grammar.y"
-{ yyval.val = gsl_complex_sub(yyvsp[-2].val, yyvsp[0].val);    ;
-    break;}
-case 13:
+        {
+            yyval.val = gsl_complex_sub(yyvsp[-2].val, yyvsp[0].val);
+            ;
+            break;
+        }
+        case 13:
 #line 44 "grammar.y"
-{ yyval.val = gsl_complex_mul(yyvsp[-2].val, yyvsp[0].val);    ;
-    break;}
-case 14:
+        {
+            yyval.val = gsl_complex_mul(yyvsp[-2].val, yyvsp[0].val);
+            ;
+            break;
+        }
+        case 14:
 #line 45 "grammar.y"
-{ yyval.val = gsl_complex_div(yyvsp[-2].val, yyvsp[0].val);    ;
-    break;}
-case 15:
+        {
+            yyval.val = gsl_complex_div(yyvsp[-2].val, yyvsp[0].val);
+            ;
+            break;
+        }
+        case 15:
 #line 46 "grammar.y"
-{ yyval.val = gsl_complex_negative(yyvsp[0].val);   ;
-    break;}
-case 16:
+        {
+            yyval.val = gsl_complex_negative(yyvsp[0].val);
+            ;
+            break;
+        }
+        case 16:
 #line 47 "grammar.y"
-{ yyval.val = gsl_complex_pow(yyvsp[-2].val, yyvsp[0].val);    ;
-    break;}
-case 17:
+        {
+            yyval.val = gsl_complex_pow(yyvsp[-2].val, yyvsp[0].val);
+            ;
+            break;
+        }
+        case 17:
 #line 48 "grammar.y"
-{ GSL_SET_COMPLEX (&yyval.val, GSL_REAL(yyvsp[-3].val), GSL_REAL(yyvsp[-1].val)); ;
-    break;}
-case 18:
+        {
+            GSL_SET_COMPLEX(&yyval.val, GSL_REAL(yyvsp[-3].val),
+                            GSL_REAL(yyvsp[-1].val));
+            ;
+            break;
+        }
+        case 18:
 #line 49 "grammar.y"
-{ yyval.val = yyvsp[-1].val;                         ;
-    break;}
-case 19:
+        {
+            yyval.val = yyvsp[-1].val;
+            ;
+            break;
+        }
+        case 19:
 #line 51 "grammar.y"
-{ yyval.str = yyvsp[0].str; ;
-    break;}
-case 20:
+        {
+            yyval.str = yyvsp[0].str;
+            ;
+            break;
+        }
+        case 20:
 #line 52 "grammar.y"
-{ yyval.str = yyvsp[0].str; yyvsp[-2].tptr->value.str = yyvsp[0].str; yyvsp[-2].tptr->type = S_STR; ;
-    break;}
-}
-   /* the action file gets copied in in place of this dollarsign */
+        {
+            yyval.str = yyvsp[0].str;
+            yyvsp[-2].tptr->value.str = yyvsp[0].str;
+            yyvsp[-2].tptr->type = S_STR;
+            ;
+            break;
+        }
+    }
+        /* the action file gets copied in in place of this dollarsign */
 #line 543 "/usr/lib/bison.simple"
-
-  yyvsp -= yylen;
-  yyssp -= yylen;
+
+    yyvsp -= yylen;
+    yyssp -= yylen;
 #ifdef YYLSP_NEEDED
-  yylsp -= yylen;
+    yylsp -= yylen;
 #endif
 
 #if YYDEBUG != 0
-  if (yydebug)
+    if (yydebug)
     {
-      short *ssp1 = yyss - 1;
-      fprintf (stderr, "state stack now");
-      while (ssp1 != yyssp)
-	fprintf (stderr, " %d", *++ssp1);
-      fprintf (stderr, "\n");
+        short *ssp1 = yyss - 1;
+        fprintf(stderr, "state stack now");
+        while (ssp1 != yyssp)
+        {
+            fprintf(stderr, " %d", *++ssp1);
+        }
+        fprintf(stderr, "\n");
     }
 #endif
 
-  *++yyvsp = yyval;
+    *++yyvsp = yyval;
 
 #ifdef YYLSP_NEEDED
-  yylsp++;
-  if (yylen == 0)
+    yylsp++;
+    if (yylen == 0)
     {
-      yylsp->first_line = yylloc.first_line;
-      yylsp->first_column = yylloc.first_column;
-      yylsp->last_line = (yylsp-1)->last_line;
-      yylsp->last_column = (yylsp-1)->last_column;
-      yylsp->text = 0;
+        yylsp->first_line = yylloc.first_line;
+        yylsp->first_column = yylloc.first_column;
+        yylsp->last_line = (yylsp - 1)->last_line;
+        yylsp->last_column = (yylsp - 1)->last_column;
+        yylsp->text = 0;
     }
-  else
+    else
     {
-      yylsp->last_line = (yylsp+yylen-1)->last_line;
-      yylsp->last_column = (yylsp+yylen-1)->last_column;
+        yylsp->last_line = (yylsp + yylen - 1)->last_line;
+        yylsp->last_column = (yylsp + yylen - 1)->last_column;
     }
 #endif
 
-  /* Now "shift" the result of the reduction.
-     Determine what state that goes to,
-     based on the state we popped back to
-     and the rule number reduced by.  */
+    /* Now "shift" the result of the reduction.
+       Determine what state that goes to,
+       based on the state we popped back to
+       and the rule number reduced by.  */
 
-  yyn = yyr1[yyn];
+    yyn = yyr1[yyn];
 
-  yystate = yypgoto[yyn - YYNTBASE] + *yyssp;
-  if (yystate >= 0 && yystate <= YYLAST && yycheck[yystate] == *yyssp)
-    yystate = yytable[yystate];
-  else
-    yystate = yydefgoto[yyn - YYNTBASE];
+    yystate = yypgoto[yyn - YYNTBASE] + *yyssp;
+    if (yystate >= 0 && yystate <= YYLAST && yycheck[yystate] == *yyssp)
+    {
+        yystate = yytable[yystate];
+    }
+    else
+    {
+        yystate = yydefgoto[yyn - YYNTBASE];
+    }
 
-  goto yynewstate;
+    goto yynewstate;
 
-yyerrlab:   /* here on detecting error */
+yyerrlab: /* here on detecting error */
 
-  if (! yyerrstatus)
+    if (!yyerrstatus)
     /* If not already recovering from an error, report this error.  */
     {
-      ++yynerrs;
+        ++yynerrs;
 
 #ifdef YYERROR_VERBOSE
-      yyn = yypact[yystate];
-
-      if (yyn > YYFLAG && yyn < YYLAST)
-	{
-	  int size = 0;
-	  char *msg;
-	  int x, count;
-
-	  count = 0;
-	  /* Start X at -yyn if nec to avoid negative indexes in yycheck.  */
-	  for (x = (yyn < 0 ? -yyn : 0);
-	       x < (sizeof(yytname) / sizeof(char *)); x++)
-	    if (yycheck[x + yyn] == x)
-	      size += strlen(yytname[x]) + 15, count++;
-	  msg = (char *) malloc(size + 15);
-	  if (msg != 0)
-	    {
-	      strcpy(msg, "parse error");
-
-	      if (count < 5)
-		{
-		  count = 0;
-		  for (x = (yyn < 0 ? -yyn : 0);
-		       x < (sizeof(yytname) / sizeof(char *)); x++)
-		    if (yycheck[x + yyn] == x)
-		      {
-			strcat(msg, count == 0 ? ", expecting `" : " or `");
-			strcat(msg, yytname[x]);
-			strcat(msg, "'");
-			count++;
-		      }
-		}
-	      yyerror(msg);
-	      free(msg);
-	    }
-	  else
-	    yyerror ("parse error; also virtual memory exceeded");
-	}
-      else
+        yyn = yypact[yystate];
+
+        if (yyn > YYFLAG && yyn < YYLAST)
+        {
+            int size = 0;
+            char *msg;
+            int x, count;
+
+            count = 0;
+            /* Start X at -yyn if nec to avoid negative indexes in yycheck.  */
+            for (x = (yyn < 0 ? -yyn : 0);
+                 x < (sizeof(yytname) / sizeof(char *)); x++)
+            {
+                if (yycheck[x + yyn] == x)
+                {
+                    size += strlen(yytname[x]) + 15, count++;
+                }
+            }
+            msg = (char *)malloc(size + 15);
+            if (msg != 0)
+            {
+                strcpy(msg, "parse error");
+
+                if (count < 5)
+                {
+                    count = 0;
+                    for (x = (yyn < 0 ? -yyn : 0);
+                         x < (sizeof(yytname) / sizeof(char *)); x++)
+                    {
+                        if (yycheck[x + yyn] == x)
+                        {
+                            strcat(msg, count == 0 ? ", expecting `" : " or `");
+                            strcat(msg, yytname[x]);
+                            strcat(msg, "'");
+                            count++;
+                        }
+                    }
+                }
+                yyerror(msg);
+                free(msg);
+            }
+            else
+            {
+                yyerror("parse error; also virtual memory exceeded");
+            }
+        }
+        else
 #endif /* YYERROR_VERBOSE */
-	yyerror("parse error");
+            yyerror("parse error");
     }
 
-  goto yyerrlab1;
-yyerrlab1:   /* here on error raised explicitly by an action */
+    goto yyerrlab1;
+yyerrlab1: /* here on error raised explicitly by an action */
 
-  if (yyerrstatus == 3)
+    if (yyerrstatus == 3)
     {
-      /* if just tried and failed to reuse lookahead token after an error, discard it.  */
+        /* if just tried and failed to reuse lookahead token after an error,
+         * discard it.  */
 
-      /* return failure if at end of input */
-      if (yychar == YYEOF)
-	YYABORT;
+        /* return failure if at end of input */
+        if (yychar == YYEOF)
+        {
+            YYABORT;
+        }
 
 #if YYDEBUG != 0
-      if (yydebug)
-	fprintf(stderr, "Discarding token %d (%s).\n", yychar, yytname[yychar1]);
+        if (yydebug)
+        {
+            fprintf(stderr, "Discarding token %d (%s).\n", yychar,
+                    yytname[yychar1]);
+        }
 #endif
 
-      yychar = YYEMPTY;
+        yychar = YYEMPTY;
     }
 
-  /* Else will try to reuse lookahead token
-     after shifting the error token.  */
+    /* Else will try to reuse lookahead token
+       after shifting the error token.  */
 
-  yyerrstatus = 3;		/* Each real token shifted decrements this */
+    yyerrstatus = 3; /* Each real token shifted decrements this */
 
-  goto yyerrhandle;
+    goto yyerrhandle;
 
-yyerrdefault:  /* current state does not do anything special for the error token. */
+yyerrdefault: /* current state does not do anything special for the error token.
+               */
 
 #if 0
   /* This is wrong; only states that explicitly want error tokens
@@ -931,86 +999,102 @@ case 20:
   if (yyn) goto yydefault;
 #endif
 
-yyerrpop:   /* pop the current state because it cannot handle the error token */
+yyerrpop: /* pop the current state because it cannot handle the error token */
 
-  if (yyssp == yyss) YYABORT;
-  yyvsp--;
-  yystate = *--yyssp;
+    if (yyssp == yyss)
+    {
+        YYABORT;
+    }
+    yyvsp--;
+    yystate = *--yyssp;
 #ifdef YYLSP_NEEDED
-  yylsp--;
+    yylsp--;
 #endif
 
 #if YYDEBUG != 0
-  if (yydebug)
+    if (yydebug)
     {
-      short *ssp1 = yyss - 1;
-      fprintf (stderr, "Error: state stack now");
-      while (ssp1 != yyssp)
-	fprintf (stderr, " %d", *++ssp1);
-      fprintf (stderr, "\n");
+        short *ssp1 = yyss - 1;
+        fprintf(stderr, "Error: state stack now");
+        while (ssp1 != yyssp)
+        {
+            fprintf(stderr, " %d", *++ssp1);
+        }
+        fprintf(stderr, "\n");
     }
 #endif
 
 yyerrhandle:
 
-  yyn = yypact[yystate];
-  if (yyn == YYFLAG)
-    goto yyerrdefault;
+    yyn = yypact[yystate];
+    if (yyn == YYFLAG)
+    {
+        goto yyerrdefault;
+    }
 
-  yyn += YYTERROR;
-  if (yyn < 0 || yyn > YYLAST || yycheck[yyn] != YYTERROR)
-    goto yyerrdefault;
+    yyn += YYTERROR;
+    if (yyn < 0 || yyn > YYLAST || yycheck[yyn] != YYTERROR)
+    {
+        goto yyerrdefault;
+    }
 
-  yyn = yytable[yyn];
-  if (yyn < 0)
+    yyn = yytable[yyn];
+    if (yyn < 0)
     {
-      if (yyn == YYFLAG)
-	goto yyerrpop;
-      yyn = -yyn;
-      goto yyreduce;
+        if (yyn == YYFLAG)
+        {
+            goto yyerrpop;
+        }
+        yyn = -yyn;
+        goto yyreduce;
+    }
+    else if (yyn == 0)
+    {
+        goto yyerrpop;
     }
-  else if (yyn == 0)
-    goto yyerrpop;
 
-  if (yyn == YYFINAL)
-    YYACCEPT;
+    if (yyn == YYFINAL)
+    {
+        YYACCEPT;
+    }
 
 #if YYDEBUG != 0
-  if (yydebug)
-    fprintf(stderr, "Shifting error token, ");
+    if (yydebug)
+    {
+        fprintf(stderr, "Shifting error token, ");
+    }
 #endif
 
-  *++yyvsp = yylval;
+    *++yyvsp = yylval;
 #ifdef YYLSP_NEEDED
-  *++yylsp = yylloc;
+    *++yylsp = yylloc;
 #endif
 
-  yystate = yyn;
-  goto yynewstate;
+    yystate = yyn;
+    goto yynewstate;
 
- yyacceptlab:
-  /* YYACCEPT comes here.  */
-  if (yyfree_stacks)
+yyacceptlab:
+    /* YYACCEPT comes here.  */
+    if (yyfree_stacks)
     {
-      free (yyss);
-      free (yyvs);
+        free(yyss);
+        free(yyvs);
 #ifdef YYLSP_NEEDED
-      free (yyls);
+        free(yyls);
 #endif
     }
-  return 0;
+    return 0;
 
- yyabortlab:
-  /* YYABORT comes here.  */
-  if (yyfree_stacks)
+yyabortlab:
+    /* YYABORT comes here.  */
+    if (yyfree_stacks)
     {
-      free (yyss);
-      free (yyvs);
+        free(yyss);
+        free(yyvs);
 #ifdef YYLSP_NEEDED
-      free (yyls);
+        free(yyls);
 #endif
     }
-  return 1;
+    return 1;
 }
 #line 54 "grammar.y"
-
diff --git a/src/parser/mod_itm.F b/src/parser/mod_itm.F
index a42d011529..ae3ad4eb2f 100644
--- a/src/parser/mod_itm.F
+++ b/src/parser/mod_itm.F
@@ -41,7 +41,7 @@ module it_m
  integer, parameter :: V_all=99
  !
  integer, parameter :: maxrnlvls=60
- integer, parameter :: maxflags =60
+ integer, parameter :: maxflags =70
  integer, parameter :: maxi1vars=100
  integer, parameter :: maxi2vars=50
  integer, parameter :: maxi3vars=50
@@ -50,7 +50,7 @@ module it_m
  integer, parameter :: maxr3vars=100
  integer, parameter :: maxr4vars=50
  integer, parameter :: maxc1vars=50
- integer, parameter :: maxchvars=100
+ integer, parameter :: maxchvars=150
  integer, parameter :: nvars=(maxi1vars+2*maxi2vars+2*maxi3vars+&
 &                             maxr1vars+2*maxr2vars+2*maxr3vars+4*maxr4vars+&
 &                             maxc1vars+maxchvars)
diff --git a/src/pol_function/OPTICS_driver.F b/src/pol_function/OPTICS_driver.F
index da0c91b927..2a50ba932a 100644
--- a/src/pol_function/OPTICS_driver.F
+++ b/src/pol_function/OPTICS_driver.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM, DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine OPTICS_driver(Xen,Xk,q,wv,X,Dip)
  ! 
  ! Optics driver
@@ -27,8 +31,9 @@ subroutine OPTICS_driver(Xen,Xk,q,wv,X,Dip)
  use TDDFT,         ONLY:F_xc_gspace,FXC_K_diagonal
  use interfaces,    ONLY:WF_load,WF_free,WF_load
  use matrix,        ONLY:MATRIX_reset
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)  :: Xen
  type(bz_samp) :: Xk,q
@@ -51,20 +56,20 @@ subroutine OPTICS_driver(Xen,Xk,q,wv,X,Dip)
  !
  allocate(X_par(1))
  !
- do iq=X%iq(1),X%iq(2)
-   !
-   ! PARALLEL indexes
-   !==================
-   call PARALLEL_global_indexes(Xen,Xk,q,"Response_G_space",X=X)
-   !
-   ! WF distribution 
-   !=================
-   call PARALLEL_WF_distribute(K_index=PAR_IND_Xk_ibz,B_index=PAR_IND_CON_BANDS_X(X%whoami),&
+ ! PARALLEL indexes
+ !==================
+ call PARALLEL_global_indexes(Xen,Xk,q,"Response_G_space",X=X)
+ !
+ ! WF distribution 
+ !=================
+ call PARALLEL_WF_distribute(K_index=PAR_IND_Xk_ibz,B_index=PAR_IND_CON_BANDS_X(X%whoami),&
 &                              Bp_index=PAR_IND_VAL_BANDS_X(X%whoami),CLEAN_UP=.TRUE.)
-   !
-   call PARALLEL_WF_index(COMM=PAR_COM_Q_A2A)
-   !
-   if (WF_buffered_IO) call WF_load(WF_buffer,0,1,X%ib,(/1,Xk%nibz/),space='B',title='-BUFFER')
+ !
+ call PARALLEL_WF_index(COMM=PAR_COM_Q_A2A)
+ !
+ if (WF_buffered_IO) call WF_load(WF_buffer,0,1,X%ib,(/1,Xk%nibz/),space='B',title='-BUFFER')
+ !
+ do iq=X%iq(1),X%iq(2)
    !
    ! TDDFT setup
    call TDDFT_do_X_W_typs(iq,X,wv)
@@ -162,6 +167,8 @@ subroutine OPTICS_driver(Xen,Xk,q,wv,X,Dip)
    !
  enddo
  !
+ call PARALLEL_global_reset("Response_G_space",X%whoami)
+ !
  ! CLEAN
  !
  deallocate(X_par)
diff --git a/src/pol_function/X_AVERAGE_do_it.F b/src/pol_function/X_AVERAGE_do_it.F
index 64d98f9ffd..176fe7735e 100644
--- a/src/pol_function/X_AVERAGE_do_it.F
+++ b/src/pol_function/X_AVERAGE_do_it.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<dev_defs.h>
+!
 subroutine X_AVERAGE_do_it(what,X_par)
  !
  ! The procedures to copy from Host to Device, that are included here, maybe not needded.
@@ -17,7 +21,6 @@ subroutine X_AVERAGE_do_it(what,X_par)
  use devxlib,       ONLY:devxlib_memcpy_h2d,devxlib_memcpy_d2d,devxlib_memset_d
  use matrix,        ONLY:PAR_matrix,MATRIX_reset
  !
-#include<dev_defs.h>
  !
  implicit none
  !
diff --git a/src/pol_function/X_Double_Grid_setup.F b/src/pol_function/X_Double_Grid_setup.F
index eee7b704b8..c19374b2a9 100644
--- a/src/pol_function/X_Double_Grid_setup.F
+++ b/src/pol_function/X_Double_Grid_setup.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): DS AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine X_Double_Grid_setup(iq,k,k_FGbz,nTrans_percent,Radius)
  !
  ! Define which kpts I will use in a consistent way between q=0 and q/=0
@@ -29,8 +33,9 @@ subroutine X_Double_Grid_setup(iq,k,k_FGbz,nTrans_percent,Radius)
 #if defined _RT
  use real_time,     ONLY:l_RT_uses_E_FineGd
 #endif
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer                       :: iq
  type(bz_samp)                 :: k
diff --git a/src/pol_function/X_GreenF_analytical.F b/src/pol_function/X_GreenF_analytical.F
index 3c5f92811c..6c17d99791 100644
--- a/src/pol_function/X_GreenF_analytical.F
+++ b/src/pol_function/X_GreenF_analytical.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine X_GreenF_analytical(iq,transitions,Xw,Xen,Xk,GreenF,ordering,space,no_occupations,X_terminator)
  !
  ! Compute the GreenF in frequency space starting from it analytical
@@ -23,8 +27,9 @@ subroutine X_GreenF_analytical(iq,transitions,Xw,Xen,Xk,GreenF,ordering,space,no
  use R_lattice,  ONLY:qindx_X,bz_samp
  use frequency,  ONLY:w_samp
  use parallel_m, ONLY:PAR_Xk_bz_index
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer      :: iq,transitions(4)
  type(w_samp) :: Xw
diff --git a/src/pol_function/X_GreenF_remap.F b/src/pol_function/X_GreenF_remap.F
index ea48aa34a4..a0236e171e 100644
--- a/src/pol_function/X_GreenF_remap.F
+++ b/src/pol_function/X_GreenF_remap.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine X_GreenF_remap(bands,E,W)
  !
  use pars,          ONLY:SP,pi
@@ -12,7 +16,9 @@ subroutine X_GreenF_remap(bands,E,W)
  use LIVE_t,        ONLY:live_timing
  use electrons,     ONLY:levels,n_sp_pol,E_reset
  use frequency,     ONLY:w_samp,W_reset
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  integer      :: bands(2)
  type(w_samp) :: W
  type(levels) :: E
diff --git a/src/pol_function/X_delta_part.F b/src/pol_function/X_delta_part.F
index 675aae8ba4..5eae9ff7ea 100644
--- a/src/pol_function/X_delta_part.F
+++ b/src/pol_function/X_delta_part.F
@@ -52,14 +52,18 @@ subroutine X_delta_part(NG,NW,X,Gamp)
  complex(SP):: Xt(NG,NG)
  !
  if (i_space_inv==1) then
-   forall(i1=1:NG,i2=1:NG,i3=1:NW) &
-&        X(i1,i2,i3)=aimag(X(i1,i2,i3))*Gamp(i1,i2)
+   forall(i1=1:NG,i2=1:NG,i3=1:NW) 
+     X(i1,i2,i3)=aimag(X(i1,i2,i3))*Gamp(i1,i2)
+   end forall
  else
    do i3=1,NW
-     forall(i1=1:NG,i2=1:NG) Xt(i1,i2)  =X(i1,i2,i3)*Gamp(i1,i2)
-     forall(i1=1:NG,i2=1:NG) X(i1,i2,i3)=&
-&                                    0.5*aimag(Xt(i1,i2)+Xt(i2,i1))-&
-&                               (0.,0.5)* real(Xt(i1,i2)-Xt(i2,i1))
+     forall(i1=1:NG,i2=1:NG) 
+       Xt(i1,i2)  =X(i1,i2,i3)*Gamp(i1,i2)
+     end forall
+     forall(i1=1:NG,i2=1:NG) 
+       X(i1,i2,i3)=0.5_SP*aimag(Xt(i1,i2)+Xt(i2,i1))-&
+&                 (0._SP,0.5_SP)* real(Xt(i1,i2)-Xt(i2,i1))
+     end forall
    enddo
  endif
  !
diff --git a/src/pol_function/X_dielectric_matrix.F b/src/pol_function/X_dielectric_matrix.F
index eab77a26a5..f0043e109a 100644
--- a/src/pol_function/X_dielectric_matrix.F
+++ b/src/pol_function/X_dielectric_matrix.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 integer function X_dielectric_matrix(Xen,Xk,q,X,Xw,Dip,SILENT_MODE,CHILD)
  ! 
  ! Calculates and stores on file the dielectric matrix.
@@ -32,8 +36,9 @@ integer function X_dielectric_matrix(Xen,Xk,q,X,Xw,Dip,SILENT_MODE,CHILD)
  use interfaces,    ONLY:WF_load,WF_free
  use QP_m,          ONLY:QP_n_W_freqs_redux
  use matrix,        ONLY:MATRIX_reset,MATRIX_copy
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)  ::Xen 
  type(bz_samp) ::Xk,q
@@ -62,6 +67,10 @@ integer function X_dielectric_matrix(Xen,Xk,q,X,Xw,Dip,SILENT_MODE,CHILD)
  !
  call X_AVERAGE_setup("INIT",-1,X%q0)
  !
+ ! Some check
+ !
+ if(X%ib(2)>Xen%nb) call warning("More bands required in X than available in the current SAVE.")
+ !
  l_X_terminator = X_terminator_Kind/='none'
  if (l_X_terminator.and.trim(X_terminator_Kind)/='BG') then
    call error(' [X] unknown X_terminator_Kind = '//trim(X_terminator_Kind))
@@ -185,14 +194,12 @@ integer function X_dielectric_matrix(Xen,Xk,q,X,Xw,Dip,SILENT_MODE,CHILD)
      call FREQUENCIES_reset(Xw,"all")
      if (X%iq(1)==X%iq(2)) X_dielectric_matrix=X%iq(1)
      return
+   else
+     if(X%ib(2)>Xen%nb) call error("Too many bands required in X. Run DFT with more bands!") 
    endif
    !
  endif
  !
- ! Some check
- !
- if(X%ib(2)>Xen%nb) call error("Too many bands required in X. Run DFT with more bands!")
- !
  ! Parallel distribution
  !=======================
  if (.not.(l_life.and.X%iq(1)>1)) then
@@ -205,9 +212,6 @@ integer function X_dielectric_matrix(Xen,Xk,q,X,Xw,Dip,SILENT_MODE,CHILD)
  if (.not.io_RESPONSE) X_MEM_n_tot_freqs=X_MEM_n_freqs*PAR_nQ_ibz
  if (.not.allocated(X_par)) then
    if (     io_RESPONSE) allocate(X_par(1))
-!#if !defined _PAR_IO
-!   if (     io_RESPONSE) call X_ALLOC_elemental('X',(/X%ng,X%ng,X_MEM_n_tot_freqs/))
-!#endif
    if (.not.io_RESPONSE) allocate(X_par(PAR_nQ_ibz))
    if (.not.io_RESPONSE) call X_ALLOC_elemental('X',(/X%ng,X%ng,X_MEM_n_tot_freqs/))
  endif
@@ -308,16 +312,13 @@ integer function X_dielectric_matrix(Xen,Xk,q,X,Xw,Dip,SILENT_MODE,CHILD)
      !
      call X_irredux(iq,"Xo"//trim(X_what),X_par(iq_mem),Xen,Xk,Xw,X,Dip)  
      !
-     if (l_write_disk_Xo.and.n_OPTICAL_dir_to_eval==1) then
-       call X_mat_filling(iq_mem,X,Xw,X_MEM_n_freqs,X_MEM_n_tot_freqs,X_mat)
-       call elemental_IO( iq , .TRUE. )
-     endif
-     !
      if (.not.l_rpa_IP) call X_redux(iq,"X"//trim(X_what),X_par(iq_mem),Xw,X)
      !
      if (n_OPTICAL_dir_to_eval>1.and.iq_dir==1) call MATRIX_copy(X_par(iq_mem),X_par_average,.TRUE.)
      if (n_OPTICAL_dir_to_eval>1)               call X_AVERAGE_do_it("ACCUMULATE",X_par(iq_mem))
      !
+     if (iq_dir<n_OPTICAL_dir_to_eval.and.io_RESPONSE) call MATRIX_reset(X_par(1))
+     !
    enddo
    !
    if (n_OPTICAL_dir_to_eval>1) then
@@ -325,32 +326,12 @@ integer function X_dielectric_matrix(Xen,Xk,q,X,Xw,Dip,SILENT_MODE,CHILD)
      call X_AVERAGE_setup("CLOSE",iq,X%q0)
    endif
    !
-   ! DS MERGE <
-   ! The following lines were after the call to X_redux and before the call to io_X before the merge
-   ! I moved them here after the merge
-   !
-   ! allocation of X_mat
-   if (io_RESPONSE.and..not.allocated(X_mat)) then
-#if ! defined _PAR_IO
-     call X_ALLOC_elemental('X',(/X%ng,X%ng,Xw%n_freqs/))
-#endif
-   endif
-   !
-   ! DS MERGE >
-   !
-   ! X_par => X_mat
+   ! X_par => X_mat : this is needed when the I/O is off
    !
    call X_mat_filling(iq_mem,X,Xw,X_MEM_n_freqs,X_MEM_n_tot_freqs,X_mat)
    !
    call elemental_IO( iq , .FALSE. )
-   !
-   ! X_mat (and other few quantities) deallocate
-   !
-   if (io_RESPONSE.and.allocated(X_mat)) then
-#if ! defined _PAR_IO
-     call X_ALLOC_elemental('X')
-#endif
-   endif
+   call elemental_IO( iq , .TRUE. )
    !
    ! CLEAN (1)
    ! Frequencies must not be cleaned in the case of lifetimes calculations when the EM1d
@@ -363,7 +344,7 @@ integer function X_dielectric_matrix(Xen,Xk,q,X,Xw,Dip,SILENT_MODE,CHILD)
    endif
    !
    ! CLEAN (2)
-   if (io_RESPONSE) call MATRIX_reset(X_par(iq_mem))
+   if (io_RESPONSE) call MATRIX_reset(X_par(1))
    !
    if (use_X_DbGd) then
      YAMBO_FREE(X_DbGd_nkpt_at_q)
@@ -377,9 +358,6 @@ integer function X_dielectric_matrix(Xen,Xk,q,X,Xw,Dip,SILENT_MODE,CHILD)
  ! CLEAN (3)
  !
  if (io_RESPONSE) then
-#if !defined _PAR_IO
-   YAMBO_FREE(X_mat)
-#endif
    deallocate(X_par)
  endif
  if (allocated(F_xc_gspace)) then
@@ -440,7 +418,7 @@ subroutine elemental_IO(iq_,this_is_Xo)
      SEC           =(/1,1/)
      if (iq_>0) SEC=(/2*iq_,2*iq_+1/)
      !
-     call io_control(ACTION=OP_APP_CL,COM=COM,SEC=SEC,ID=ID(idb),COMM=local_yMPI_COMM,DO_IT=condition)
+     call io_control(ACTION=OP_APP_CL,COM=COM,SEC=SEC,ID=ID(idb),COMM=LOCAL_yMPI_COMM,DO_IT=condition)
      i_err=io_X(X,Xw,ID(idb))
      !
      if (this_is_Xo) then
diff --git a/src/pol_function/X_eh_setup.F b/src/pol_function/X_eh_setup.F
index f5d551fba9..c1ade61e72 100644
--- a/src/pol_function/X_eh_setup.F
+++ b/src/pol_function/X_eh_setup.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 integer function X_eh_setup(iq,X,Xen,Xk,minmax_ehe)
  !
  use pars,          ONLY:SP
@@ -13,8 +17,9 @@ integer function X_eh_setup(iq,X,Xen,Xk,minmax_ehe)
  use electrons,     ONLY:levels,n_sp_pol,spin_occ
  use frequency,     ONLY:ordered_grid_index
  use parallel_m,    ONLY:PAR_IND_CON_BANDS_X,PAR_IND_Xk_bz,PAR_IND_VAL_BANDS_X
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)  ::Xen 
  type(bz_samp) ::Xk
diff --git a/src/pol_function/X_half_mat_check.F b/src/pol_function/X_half_mat_check.F
index ffdc2ba157..f0e7cf6aa9 100644
--- a/src/pol_function/X_half_mat_check.F
+++ b/src/pol_function/X_half_mat_check.F
@@ -19,8 +19,6 @@ subroutine X_half_mat_check(Xen,Xk,Xw)
  type(bz_samp),intent(in)   :: Xk
  type(w_samp) ,intent(in)   :: Xw
  !
- call WF_spatial_inversion(Xen,Xk)
- !
  if (i_space_inv==1) then
    X_FILL_UP_matrix_only= .TRUE.
  else
diff --git a/src/pol_function/X_irredux.F b/src/pol_function/X_irredux.F
index 60a9173882..79b2f0157e 100644
--- a/src/pol_function/X_irredux.F
+++ b/src/pol_function/X_irredux.F
@@ -5,6 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): AMDSAFFA
 !
+! headers
+!
+#include<dev_defs.h>
+#include<y_memory.h>
+!
 !> @callgraph
 !> @callergraph
 subroutine X_irredux(iq,what,X_par,Xen,Xk,Xw,X,Dip)
@@ -55,9 +60,9 @@ subroutine X_irredux(iq,what,X_par,Xen,Xk,Xw,X,Dip)
  use gpu_m,         ONLY:have_gpu
  use devxlib,       ONLY:devxlib_memcpy_d2d,devxlib_memcpy_d2h,devxlib_memcpy_h2d
  use timing_m,      ONLY:timing
+ use y_memory_alloc
  !
-#include<dev_defs.h>
-#include<y_memory.h>
+ implicit none
  !
  type(PAR_matrix), target :: X_par
  type(levels)         :: Xen
diff --git a/src/pol_function/X_irredux_residuals.F b/src/pol_function/X_irredux_residuals.F
index 2b1048959e..ba54a9eb3d 100644
--- a/src/pol_function/X_irredux_residuals.F
+++ b/src/pol_function/X_irredux_residuals.F
@@ -5,8 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): DS AM AF IM
 !
+! headers
+!
 #include<dev_defs.h>
 !
+!
 !> @callgraph
 !> @callergraph
 subroutine X_irredux_residuals(Xen,Xk,X,Dip,i_cg,iq,Xo_res,Xo_scatt,&
@@ -112,7 +115,7 @@ subroutine X_irredux_residuals(Xen,Xk,X,Dip,i_cg,iq,Xo_res,Xo_scatt,&
    !
    Z_eh_occ = Xen%f(iv,ikp,i_spin)*(spin_occ-Xen%f(ic,ik,i_spin))/spin_occ/real(Xk%nbz,SP)/DL_vol*real(Z_)
    !
-   if (iq==1.and.abs(coarse_grid_Pt(i_cg))<Dip%Energy_treshold) Z_eh_occ=1._SP
+   if (iq==1.and.abs(coarse_grid_Pt(i_cg))<Dip%Energy_threshold) Z_eh_occ=1._SP
    !
    if (l_X_term_vv) Z_eh_occ = Xen%f(iv,ikp,i_spin)*Xen%f(ic,ik,i_spin)/spin_occ/real(Xk%nbz,SP)/DL_vol*real(Z_)
    !
@@ -204,10 +207,7 @@ subroutine X_irredux_residuals(Xen,Xk,X,Dip,i_cg,iq,Xo_res,Xo_scatt,&
      !
    endif
    !
-   ! Filling the upper triangular part of the residual here ! 
-   !-------------^^^^^---------------------------------------
-   !   
-   ! AF: in this implementation we just store the data in rhotw_mp
+   ! AF: Here we store the data in rhotw_mp
    !     Later on, we multiply rhotw_mp*rhotw_mp^dag to build 
    !     the whole residue matrix (both upper and lower parts)
    !
@@ -330,7 +330,7 @@ subroutine X_irredux_residuals(Xen,Xk,X,Dip,i_cg,iq,Xo_res,Xo_scatt,&
  !
  ! finalize
  !
- if (iq==1.and.abs(coarse_grid_Pt(i_cg))<Dip%Energy_treshold) then
+ if (iq==1.and.abs(coarse_grid_Pt(i_cg))<Dip%Energy_threshold) then
     !DEV_ACC_DEBUG data present(Xo_res)
     !DEV_ACC serial
     !DEV_OMPGPU target map(present,alloc:Xo_res)
diff --git a/src/pol_function/X_pre_setup.F b/src/pol_function/X_pre_setup.F
index 66034c1eb7..5ba74f3d3f 100644
--- a/src/pol_function/X_pre_setup.F
+++ b/src/pol_function/X_pre_setup.F
@@ -7,7 +7,7 @@
 !
 subroutine X_pre_setup(Xen,Xk,X)
  !
- use pars,          ONLY:SP
+ use pars,          ONLY:SP,lchlen
  use X_m,           ONLY:X_t,use_X_DbGd,X_DbGd_nkpts,l_X_terminator,X_use_lin_sys,&
 &                        X_use_gpu,Chi_linalg_mode,X_ng,l_write_disk_Xo
  use electrons,     ONLY:levels,n_sp_pol
@@ -30,6 +30,8 @@ subroutine X_pre_setup(Xen,Xk,X)
  ! Work Space
  !
  real(SP), external :: G2E
+ integer            :: Xng_tmp
+ character(lchlen)  :: mesg
  !
  if (X%ib(1)<0) X%ib(1)=1
  if (X%ib(2)<0) X%ib(2)=1
@@ -59,10 +61,16 @@ subroutine X_pre_setup(Xen,Xk,X)
  ! response function is accessed in G-G' mode. Here I ensure that all G-G' are in the
  ! range provided externally
  !
+ if (l_cohsex) call parser("UseEbands",COHSEX_use_empties)
+ !
  if ((l_cohsex.and..not.COHSEX_use_empties).or.l_X_terminator) then
-     X%ng=eval_G_minus_G(X%ng,0,COMM=PAR_COM_WORLD)
-     call warning(' Response block size reduced to '//intc(X%ng) &
-&     //'RL ('//intc(int(1000.*G2E(X%ng)))//'mHa)')
+   Xng_tmp=eval_G_minus_G(X%ng,0,COMM=PAR_COM_WORLD)
+   if (Xng_tmp/=X%ng) then
+     mesg=' Response block size reduced from '//intc(X%ng)//' to '//intc(Xng_tmp) //'RL'
+     mesg=trim(mesg)//' ('//intc(int(1000.*G2E(X%ng)))//' -->'//intc(int(1000.*G2E(Xng_tmp)))//' mHa)'
+     call warning(trim(mesg))
+     X%ng=Xng_tmp
+   endif
  endif
  !
  ! Fxc Checks
diff --git a/src/pol_function/X_redux.F b/src/pol_function/X_redux.F
index cfc0551978..f70b834f17 100644
--- a/src/pol_function/X_redux.F
+++ b/src/pol_function/X_redux.F
@@ -5,7 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): AM AF 
 !
+! headers
+!
 #include<dev_defs.h>
+#include<y_memory.h>
+!
 !
 subroutine X_redux(iq,what,X_par,Xw,X)
  !
@@ -13,7 +17,7 @@ subroutine X_redux(iq,what,X_par,Xw,X)
  !
  ! More precisely the routine handles:
  !
- !  1. Xo(XUP/XDN/SER): INPUT
+ !  1. Xo(SER)        : INPUT
  !  2. Dyson(SER/SLK) : SOLVER
  !  3. X(PAR/SER)     : OUTPUT
  !
@@ -41,8 +45,9 @@ subroutine X_redux(iq,what,X_par,Xw,X)
                          devxlib_memcpy_d2h
  use devxlib_async, ONLY:devxlib_async_synchronize
  use gpu_m
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(PAR_matrix) :: X_par
  type(X_t)        :: X
diff --git a/src/qp/QED_Sigma_c.F b/src/qp/QED_Sigma_c.F
index ab57619861..424b708ac2 100644
--- a/src/qp/QED_Sigma_c.F
+++ b/src/qp/QED_Sigma_c.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): PM AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine QED_Sigma_c(E,k,q,qp)
  !
  use pars,                ONLY:SP,pi,cI,rZERO
@@ -12,7 +16,7 @@ subroutine QED_Sigma_c(E,k,q,qp)
  use drivers,             ONLY:l_elel_corr,l_use_collisions,l_elph_corr,l_elphoton_scatt
  use electrons,           ONLY:levels,spin_occ,n_met_bands
  use D_lattice,           ONLY:DL_vol
- use R_lattice,           ONLY:qindx_S,bz_samp,nqbz,RL_vol,RIM_n_rand_pts
+ use R_lattice,           ONLY:qindx_S,bz_samp,nqbz,RL_vol,RIM_sphe_n_rand_pts
  use QP_m,                ONLY:QP_t,QP_n_G_bands,QP_Sc,QP_QED_ng,QP_nb,QP_states_simmetrize,&
 &                              QP_dSc,QP_time_order_sign,QP_table,QP_n_states
  use IO_m,                ONLY:io_COLLs
@@ -28,8 +32,9 @@ subroutine QED_Sigma_c(E,k,q,qp)
  use parallel_m,          ONLY:PAR_IND_Xk_ibz,PAR_IND_G_b,PAR_IND_QP,PAR_IND_Q_ibz,PAR_nQ_ibz,PAR_nQP
  use functions,           ONLY:theta_function
  use timing_m,            ONLY:timing
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)              ::E
  type(bz_samp)             ::k,q
@@ -67,7 +72,7 @@ subroutine QED_Sigma_c(E,k,q,qp)
  !
  ! RIM 
  !=====
- if (RIM_n_rand_pts==0) RIM_n_rand_pts=1000000
+ if (RIM_sphe_n_rand_pts==0) RIM_sphe_n_rand_pts=1000000
  !
  call k_ibz2bz(q,'i',.TRUE.)
  !
diff --git a/src/qp/QP_W2Sc.F b/src/qp/QP_W2Sc.F
index ef05a41b47..145ce620d5 100644
--- a/src/qp/QP_W2Sc.F
+++ b/src/qp/QP_W2Sc.F
@@ -62,9 +62,11 @@ subroutine QP_W2Sc(iqbz,k,E,Xw,Sc_W)
      !
      ! 1st term: (spin_occ-f_os+fbose)
      !
-     forall(i_w=1:Xw%n_freqs) QP_W_here(i_w)=QP_W(i_qp_mem,i_q_mem,i_b_mem,i_w)*    &
+     do i_w=1,Xw%n_freqs 
+       QP_W_here(i_w)=QP_W(i_qp_mem,i_q_mem,i_b_mem,i_w)*    &
 &                        (spin_occ-E%f(os(1),os(2),os(3))+bose_f(real(Xw%p(i_w))))* &
 &                         bose_decay(real(Xw%p(i_w)))
+     enddo
      !
      call Kramers_Kronig(QP_W_here,real(Xw%p(:)),QP_n_W_freqs,dSc,          &
 &            real(Sc_W(i_qp)%p(:))-QP_time_order_sign*cI*aimag(Sc_W(i_qp)%p(:)),         &
@@ -72,9 +74,11 @@ subroutine QP_W2Sc(iqbz,k,E,Xw,Sc_W)
      !
      ! 2nd term: (f_os+fbose)
      !
-     forall(i_w=1:Xw%n_freqs) QP_W_here(i_w)=QP_W(i_qp_mem,i_q_mem,i_b_mem,i_w)* &
+     do i_w=1,Xw%n_freqs 
+       QP_W_here(i_w)=QP_W(i_qp_mem,i_q_mem,i_b_mem,i_w)* &
 &                         (E%f(os(1),os(2),os(3))+bose_f(real(Xw%p(i_w))))*      &
 &                         bose_decay(real(Xw%p(i_w)))
+     enddo
      !
      call Kramers_Kronig(-QP_W_here,real(Xw%p(:)),QP_n_W_freqs,dSc,         &
 &            -real(Sc_W(i_qp)%p(:))+cI*aimag(Sc_W(i_qp)%p(:)),              &
@@ -82,7 +86,9 @@ subroutine QP_W2Sc(iqbz,k,E,Xw,Sc_W)
      !
    enddo
    !
-   forall(i_w=1:Sc_W(i_qp)%n_freqs) QP_Sc(i_qp,i_w)=QP_Sc(i_qp,i_w)+dSc(i_w)
+   do i_w=1,Sc_W(i_qp)%n_freqs 
+     QP_Sc(i_qp,i_w)=QP_Sc(i_qp,i_w)+dSc(i_w)
+   enddo
    !
    deallocate(dSc)
    !
diff --git a/src/qp/QP_driver.F b/src/qp/QP_driver.F
index 9cd663fd19..7ec29b01bc 100644
--- a/src/qp/QP_driver.F
+++ b/src/qp/QP_driver.F
@@ -4,6 +4,10 @@
 ! Copyright (C) 2006 The Yambo Team
 !
 ! Authors (see AUTHORS file for details): AM
+!
+! headers
+!
+#include<y_memory.h>
 ! 
 subroutine QP_driver(X,Xen,Xk,en,k,q,Xw,Dip)
  !
@@ -35,8 +39,9 @@ subroutine QP_driver(X,Xen,Xk,en,k,q,Xw,Dip)
  use parallel_m,    ONLY:PAR_COM_WORLD,PAR_IND_WF_linear
  use stderr,        ONLY:intc
  use descriptors,   ONLY:IO_desc,IO_desc_reset,IO_desc_add
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)  ::en,Xen
  type(bz_samp) ::Xk,k,q
@@ -434,7 +439,9 @@ subroutine check_qp_states(QP_table,QP_state,qp,en,Xen)
   !  
   use QP_m,          ONLY:QP_t
   use electrons,     ONLY:n_sp_pol,levels
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
   !
   !
   type(QP_t), intent(in)      :: qp
diff --git a/src/qp/QP_expand.F b/src/qp/QP_expand.F
index 38852cfc85..634f3a26f9 100644
--- a/src/qp/QP_expand.F
+++ b/src/qp/QP_expand.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine QP_expand(k,qp,qp_expanded)
  !
  use pars,          ONLY:SP
@@ -12,8 +16,9 @@ subroutine QP_expand(k,qp,qp_expanded)
  use R_lattice,     ONLY:bz_samp
  use electrons,     ONLY:n_sp_pol
  use descriptors,   ONLY:IO_desc_duplicate
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp)    ::k
  type(QP_t)       ::qp,qp_expanded
diff --git a/src/qp/QP_interpolate_W.F b/src/qp/QP_interpolate_W.F
index 78a79b55c1..6fa12630db 100644
--- a/src/qp/QP_interpolate_W.F
+++ b/src/qp/QP_interpolate_W.F
@@ -5,13 +5,18 @@
 !
 ! Authors (see AUTHORS file for details): AG
 !
+! headers
+!
+#include<dev_defs.h>
+#include<y_memory.h>
+!
 subroutine QP_interpolate_W(X,Xw,q,mode)
  !
  use pars,          ONLY:SP,pi,zero_dfl,schlen,DP,rZERO
  use com,           ONLY:msg
  use R_lattice,     ONLY:bz_samp,RIM_W_ng,b,k_grid_b,bare_qpg,&
 &                        RIM_W_is_diagonal,RIM_W,RIM_W_d,f_coeff,idir,RIM_W_E,&
-&                        RIM_id_epsm1_reference,RIM_epsm1,RIM_qpg
+&                        RIM_id_epsm1_reference,RIM_epsm1,RIM_qpg,eps_env
  use vec_operate,   ONLY:c2a,v_norm
  use ALLOC,         ONLY:X_ALLOC_elemental
  use X_m,           ONLY:X_mat,X_t
@@ -23,9 +28,9 @@ subroutine QP_interpolate_W(X,Xw,q,mode)
  use D_lattice,     ONLY:alat,a
  use timing_m,      ONLY:timing
  use parallel_m,    ONLY:master_cpu
+ use y_memory_alloc
  !
-#include<dev_defs.h>
-#include<y_memory.h>
+ implicit none
  !
  type(X_t)    :: X
  type(w_samp) :: Xw
@@ -94,6 +99,7 @@ subroutine QP_interpolate_W(X,Xw,q,mode)
 #ifdef _GPU
    YAMBO_ALLOC_GPU_SOURCE(DEV_VAR(RIM_W),RIM_W)
 #endif
+   !RIM_W=RIM_W/eps_env
    return
  endif
  !
@@ -378,6 +384,7 @@ subroutine QP_interpolate_W(X,Xw,q,mode)
    io_err=io_RIM_W(ID,mode,Xw)
  endif
  !
+ !RIM_W=RIM_W/eps_env
 #ifdef _GPU
  YAMBO_ALLOC_GPU_SOURCE(DEV_VAR(RIM_W),RIM_W)
 #endif
@@ -390,8 +397,9 @@ subroutine find_q_nns(q,q_grid_b_iku,idx_q,idx_is,idx_G)
  use pars,          ONLY:SP
  use R_lattice,     ONLY:bz_samp,RIM_W_ng,g_vec,ng_vec,rl_sop
  use vec_operate,   ONLY:v_is_zero
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp):: q
  real(SP)     :: q_grid_b_iku(3,2)
diff --git a/src/qp/QP_life_transitions.F b/src/qp/QP_life_transitions.F
index 0d16058495..48358e01ea 100644
--- a/src/qp/QP_life_transitions.F
+++ b/src/qp/QP_life_transitions.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 integer function QP_life_transitions(iqibz_in,E,k,q,X_life_W)
  !
  use pars,          ONLY:SP
@@ -13,8 +17,9 @@ integer function QP_life_transitions(iqibz_in,E,k,q,X_life_W)
  use frequency,     ONLY:w_samp,bare_grid_N,coarse_grid_N,coarse_grid_Pt,ordered_grid_index
  use electrons,     ONLY:levels,spin
  use R_lattice,     ONLY:qindx_S,bz_samp
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  type(levels) ::E
  type(bz_samp)::q,k
  type(w_samp) ::X_life_W
diff --git a/src/qp/QP_mpa.F b/src/qp/QP_mpa.F
index beae81406d..358a097e8b 100644
--- a/src/qp/QP_mpa.F
+++ b/src/qp/QP_mpa.F
@@ -4,8 +4,12 @@
 ! Copyright (C) 2006 The Yambo Team
 !
 ! Authors (see AUTHORS file for details): DALV AF AM
-! 
+!
+! headers
+!
 #include<dev_defs.h>
+#include<y_memory.h>
+! 
 !
 subroutine QP_mpa(X,Xk,E,k,q,qp,Xw,GW_iter)
  !
@@ -47,20 +51,22 @@ subroutine QP_mpa(X,Xk,E,k,q,qp,Xw,GW_iter)
  use functions,     ONLY:bose_E_cut
  use devxlib,       ONLY:devxlib_memcpy_h2d,devxlib_memcpy_d2h,devxlib_memcpy_d2d
  use gpu_m,         ONLY:have_gpu
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
- type(levels)        ::E
- type(bz_samp)       ::Xk,k,q
- type(X_t)           ::X
- type(QP_t)          ::qp
- type(w_samp)        ::Xw
- integer             ::GW_iter
+ type(levels) ::E
+ type(bz_samp)::Xk,k,q
+ type(X_t)    ::X
+ type(QP_t)   ::qp
+ type(w_samp) ::Xw
+ integer      ::GW_iter
  !
  ! Work Space
  !
- integer          :: i_qp,i_w,iqbz,iqibz,ib,ig1,ig2,iqs,i_qp_to_start,iq_to_start,is,&
-&                    iq_mem,X_range(2),io_err,ID,IO_ACT,timing_steps
+ integer          :: i_qp,i_w,iqbz,iqibz,ib,i_v,i_c,i_k,i_kmq,i_kmq_Go,i_kmq_bz,i_kmq_s,&
+&                    i_sp_pol,ig1,ig2,iqs,i_qp_to_start,iq_to_start,is,iq_mem,&
+&                    X_range(2),io_err,ID,IO_ACT,timing_steps
  !
  complex(SP), allocatable          :: W_(:),dc(:)
  type(elemental_collision), target :: isc,iscp
@@ -113,16 +119,16 @@ subroutine QP_mpa(X,Xk,E,k,q,qp,Xw,GW_iter)
  iq_to_start  =1
  QP_Sc        =cZERO
  !
- ! for G & Sigma
  ! Sc Energy points (1 type each QP state !)
  !-------------------------------------------
- !
  if (trim(QP_solver)=='n') then
    !
    do i_qp=1,qp%n_states
      Sc_W(i_qp)%n_freqs=QP_dSc_steps
      YAMBO_ALLOC(Sc_W(i_qp)%p,(Sc_W(i_qp)%n_freqs))
-     forall (i_w=1:QP_dSc_steps) Sc_W(i_qp)%p(i_w)=qp%E_bare(i_qp)+(i_w-1)*QP_dSc_delta
+     do i_w=1,QP_dSc_steps 
+       Sc_W(i_qp)%p(i_w)=qp%E_bare(i_qp)+(i_w-1)*QP_dSc_delta
+     enddo
    enddo
    !
  else if (trim(QP_solver)=='g') then
@@ -130,7 +136,7 @@ subroutine QP_mpa(X,Xk,E,k,q,qp,Xw,GW_iter)
    QP_dSc_steps=QP_Sc_steps
    do i_qp=1,qp%n_states
      call W_reset(Sc_W(i_qp))
-     Sc_W(i_qp)%n_freqs=QP_Sc_steps
+     Sc_W(i_qp)%n_freqs =QP_Sc_steps
      call FREQUENCIES_Green_Function(i_qp,Sc_W(i_qp),E%E,.FALSE.)
      !
      if(Sc_W(i_qp)%grid_type=="ra") then
@@ -173,13 +179,14 @@ subroutine QP_mpa(X,Xk,E,k,q,qp,Xw,GW_iter)
  ! Redefine iq_to_start to be CPU-dependent
  !
  do iqbz=1,q%nbz 
-   if (PAR_IND_Q_ibz%element_1D(q%sstar(iqbz,1))) then
+   iqibz=q%sstar(iqbz,1)
+   if (PAR_IND_Q_ibz%element_1D(iqibz)) then
      iq_to_start=iqbz
      exit
    endif
  enddo
  !
- if( .not.l_sc_run ) then
+ if ( .not.l_sc_run ) then
    call PARALLEL_WF_distribute(K_index=PAR_IND_Xk_ibz,B_index=PAR_IND_G_b,CLEAN_UP=.TRUE.)
    call PARALLEL_WF_distribute(QP_index=PAR_IND_QP)
    call PARALLEL_WF_index( )
@@ -221,8 +228,7 @@ subroutine QP_mpa(X,Xk,E,k,q,qp,Xw,GW_iter)
  !------------
  !
  ! MB: we load X_par and not X_mat (refer to QP_ppa_cohsex)
- !     Here deallocate X_mat
- !
+ ! deallocate X_mat
  call X_ALLOC_elemental("X")
  !
  if (.not.io_RESPONSE) call error("[MPA] .not. io_RESPONSE untested with MPA")
@@ -230,8 +236,8 @@ subroutine QP_mpa(X,Xk,E,k,q,qp,Xw,GW_iter)
  if (.not.allocated(X_par).and.io_RESPONSE) then
    allocate(X_par(1))
  endif
- if (io_RESPONSE) call X_ALLOC_parallel(X_par(1),X%ng,2*X%mpa_npoles,"X")
  !
+ if (io_RESPONSE) call X_ALLOC_parallel(X_par(1),X%ng,2*X%mpa_npoles,"X")
  X_rows(1:2)=X_par(1)%rows(1:2)
  X_cols(1:2)=X_par(1)%cols(1:2)
  !
@@ -255,11 +261,13 @@ subroutine QP_mpa(X,Xk,E,k,q,qp,Xw,GW_iter)
  ! for the fitting of poles and residues, aimed at distributing
  ! G vectors. This parallelism is incompatible at present with the
  ! global G_parallelism of the self-energy
- !
- call PP_indexes_reset(px)
- l_para_G_MPA=.not.(PAR_COM_RL_INDEX%n_cpu>1)
+ ! 
+ ! 2025/10/19 DS: This is buggy. Switchin it off
+ !l_para_G_MPA=.not.(PAR_COM_RL_INDEX%n_cpu>1)
+ l_para_G_MPA=.false.
  !
  if (l_para_G_MPA) then
+    call PP_indexes_reset(px)
     call PARALLEL_index(px,uplim=X_cols(2:2),low_range=X_cols(1:1),&
 &                       COMM=PAR_COM_X_WORLD,CONSECUTIVE=.true.)
     ig_first=px%first_of_1D(PAR_COM_X_WORLD%cpu_id+1)
@@ -291,7 +299,7 @@ subroutine QP_mpa(X,Xk,E,k,q,qp,Xw,GW_iter)
  endif
  !
  if(io_MULTIPOLE) then
-   call io_control(ACTION=OP_WR_CL,COM=REP,SEC=(/1/),ID=ID2,COMM=local_yMPI_COMM,DO_IT=condition)
+   call io_control(ACTION=OP_WR_CL,COM=REP,SEC=(/1/),ID=ID2,COMM=LOCAL_yMPI_COMM,DO_IT=condition)
    io_err=io_MPA(X,Xw,ID2)
    call IO_and_Messaging_switch("SAVE")
    call IO_and_Messaging_switch("+io_out",CONDITION=condition)
@@ -301,14 +309,14 @@ subroutine QP_mpa(X,Xk,E,k,q,qp,Xw,GW_iter)
  !
  call timing('GW(mpa)_init',OPR='stop')
  !
- Q_loop: do iqbz=iq_to_start,q%nbz 
+ Q_loop: do iqbz=iq_to_start,q%nbz
    !
-   if (.not.PAR_IND_Q_ibz%element_1D(q%sstar(iqbz,1))) cycle
+   iqs  =q%sstar(iqbz,2)
+   iqibz=q%sstar(iqbz,1)
    !
+   if (.not.PAR_IND_Q_ibz%element_1D(iqibz)) cycle
    !
-   isc%qs(2:)=(/q%sstar(iqbz,1),q%sstar(iqbz,2)/)
-   iqibz=isc%qs(2)
-   iqs  =isc%qs(3)
+   isc%qs(2:3)=(/iqibz,iqs/)
    !
    if (iqibz/=isc%iqref) then
      !
@@ -437,7 +445,7 @@ subroutine QP_mpa(X,Xk,E,k,q,qp,Xw,GW_iter)
        MP_Qerr=MP_Qerr/X%ng**2
        cond_numQ(:)=cond_numQ(:)/X%ng**2
        !
-       call io_control(ACTION=OP_APP_CL,COM=REP,SEC=(/2*iqibz,2*iqibz+1/),ID=ID2,COMM=local_yMPI_COMM,DO_IT=condition)
+       call io_control(ACTION=OP_APP_CL,COM=REP,SEC=(/2*iqibz,2*iqibz+1/),ID=ID2,COMM=LOCAL_yMPI_COMM,DO_IT=condition)
        io_err=io_MPA(X,Xw,ID2)
        call msg('r',' :: PP cond fix/tot      ',PPcond_Qrate)
        call msg('r',' :: Mean np reduction    ',MPred_rate)
@@ -494,12 +502,22 @@ subroutine QP_mpa(X,Xk,E,k,q,qp,Xw,GW_iter)
      !
      if (i_qp==QP_n_states) i_qp_to_start=1
      !
+     i_v=QP_table(i_qp,1)
+     i_c=QP_table(i_qp,2)
+     i_k=QP_table(i_qp,3)
+     i_sp_pol=spin(QP_table(i_qp,:))
+     !
+     i_kmq_Go=qindx_S(i_k,iqbz,2)
+     i_kmq_bz=qindx_S(i_k,iqbz,1)
+     !
+     i_kmq  =k%sstar(i_kmq_bz,1)
+     i_kmq_s=k%sstar(i_kmq_bz,2)
      !
-     isc%is=(/QP_table(i_qp,1),QP_table(i_qp,3),1,spin(QP_table(i_qp,:))/)
-     isc%os(2:)=(/k%sstar(qindx_S(isc%is(2),iqbz,1),:),spin(QP_table(i_qp,:))/)
-     isc%qs(1)=qindx_S(QP_table(i_qp,3),iqbz,2)
+     isc%is=(/i_v,i_k,  1,      i_sp_pol/)
+     isc%os=(/0,  i_kmq,i_kmq_s,i_sp_pol/)
+     isc%qs(1)=i_kmq_Go
      !
-     iscp%is=(/QP_table(i_qp,2),QP_table(i_qp,3),1,spin(QP_table(i_qp,:))/)
+     iscp%is=(/i_c,i_k,1,i_sp_pol/)
      iscp%qs=isc%qs
      !
      ! DALV: here the grid is centered in E0
@@ -509,15 +527,15 @@ subroutine QP_mpa(X,Xk,E,k,q,qp,Xw,GW_iter)
        !
        if (.not.PAR_IND_G_b%element_1D(ib)) cycle
        !
-       if (q%sstar(iqbz,2)==1) call live_timing(steps=1)
+       if (iqs==1) call live_timing(steps=1)
        !
        isc%os(1)=ib
        !
        call scatter_Bamp_gpu(isc)
        iscp%os=isc%os
        !
-       if (any(isc%is/=iscp%is))  then
-         call scatter_Bamp(iscp)
+       if (any(isc%is/=iscp%is)) then
+         call scatter_Bamp_gpu(iscp)
        else
          if (     have_gpu) call devxlib_memcpy_d2d(iscp_rhotw_p, isc_rhotw_p)
          if (.not.have_gpu) iscp_rhotw_p=isc_rhotw_p
diff --git a/src/qp/QP_ppa_cohsex.F b/src/qp/QP_ppa_cohsex.F
index 38b4b47e62..462f4f684b 100644
--- a/src/qp/QP_ppa_cohsex.F
+++ b/src/qp/QP_ppa_cohsex.F
@@ -2,6 +2,11 @@
 ! License-Identifier: GPL
 !
 ! Authors (see AUTHORS file for details): AM [AF,IM,MB]
+!
+! headers
+!
+#include<dev_defs.h>
+#include<y_memory.h>
 ! 
 ! This file is distributed under the terms of the GNU 
 ! General Public License. You can redistribute it and/or 
@@ -17,7 +22,11 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
 #include<dev_defs.h>
+#include<y_memory.h>
+!
 !
 subroutine QP_ppa_cohsex(X,Xk,E,k,q,qp,Xw,GW_iter)
  !
@@ -34,7 +43,7 @@ subroutine QP_ppa_cohsex(X,Xk,E,k,q,qp,Xw,GW_iter)
 &                        l_rt_carriers_in_use,l_rim_w
  use parallel_int,  ONLY:PP_wait,PP_redux_wait,PARALLEL_global_indexes,PARALLEL_WF_index,&
 &                        PARALLEL_WF_distribute
- use parallel_m,    ONLY:PAR_IND_Xk_ibz,PAR_IND_G_b,PAR_IND_QP,&
+ use parallel_m,    ONLY:PAR_IND_Xk_ibz,PAR_IND_G_b,PAR_IND_QP,PAR_COM_QP_A2A,&
 &                        PAR_IND_Q_ibz,PAR_IND_Q_ibz_ID,PAR_IND_QP_ID,&
 &                        PAR_Q_ibz_index,n_WF_bands_to_load,HEAD_QP_cpu
  use collision_el,  ONLY:elemental_collision,elemental_collision_free,elemental_collision_alloc
@@ -66,8 +75,9 @@ subroutine QP_ppa_cohsex(X,Xk,E,k,q,qp,Xw,GW_iter)
  use drivers,       ONLY:Finite_Tel
  use D_lattice,     ONLY:Bose_Temp
  use functions,     ONLY:bose_E_cut
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels) ::E
  type(bz_samp)::Xk,k,q
@@ -79,14 +89,15 @@ subroutine QP_ppa_cohsex(X,Xk,E,k,q,qp,Xw,GW_iter)
  ! Work Space
  !
  type(w_samp)     :: Sc_W(qp%n_states)
- integer          :: i_qp,i_w,iqbz,iqibz,ib,ig1,ig2,alloc_err,iqs,iscs_save(2,4),&
-&                    i_qp_to_start,iq_to_start,is,iq_mem,io_err,ID,IO_ACT,timing_steps
+ integer          :: i_qp,i_w,iqbz,iqibz,ib,i_v,i_c,i_k,i_kmq,i_kmq_Go,i_kmq_bz,i_kmq_s,&
+&                    i_sp_pol,ig1,ig2,iqs,i_qp_to_start,iq_to_start,is,iq_mem,&
+&                    alloc_err,iscs_save(2,4),io_err,ID,IO_ACT,timing_steps
  integer          :: iw
  integer          :: X_range1,X_range2
  integer          :: X_rows1,X_rows2,X_cols1,X_cols2,X_nrows,X_ncols
  !
  character(schlen):: ch,SECTION_name,W_name
- logical          :: X_is_TR_rotated,l_X_ALLOC_elemental,l_RIM_W_g
+ logical          :: X_is_TR_rotated,l_X_ALLOC_elemental,fill_QP_sc,l_RIM_W_g
  real(SP)         :: eet_cutoff0_sp(n_sp_pol),eet_cutoff0,eet_cutoff1_sp(n_sp_pol),eet_cutoff1
  real(SP)         :: X_ppaE,bose_PPA_E,eet_energy
  real(SP)         :: E_kmq,f_kmq,PPA_E
@@ -126,7 +137,9 @@ subroutine QP_ppa_cohsex(X,Xk,E,k,q,qp,Xw,GW_iter)
    do i_qp=1,qp%n_states
      Sc_W(i_qp)%n_freqs=QP_dSc_steps
      YAMBO_ALLOC(Sc_W(i_qp)%p,(Sc_W(i_qp)%n_freqs))
-     forall (i_w=1:QP_dSc_steps) Sc_W(i_qp)%p(i_w)=qp%E_bare(i_qp)+(i_w-1)*QP_dSc_delta
+     do i_w=1,QP_dSc_steps 
+       Sc_W(i_qp)%p(i_w)=qp%E_bare(i_qp)+(i_w-1)*QP_dSc_delta
+     enddo
    enddo
    !
  else if (trim(QP_solver)=='g') then
@@ -197,27 +210,25 @@ subroutine QP_ppa_cohsex(X,Xk,E,k,q,qp,Xw,GW_iter)
  ! WF distributed & load
  !=======================
  !
- if ( .not.l_sc_run ) call PARALLEL_global_indexes(E,k,q,"Self_Energy")
+ if ( .not.l_sc_run ) then
+   call PARALLEL_global_indexes(E,k,q,"Self_Energy")
+   call PARALLEL_WF_distribute(K_index=PAR_IND_Xk_ibz,B_index=PAR_IND_G_b,CLEAN_UP=.TRUE.)
+   call PARALLEL_WF_distribute(QP_index=PAR_IND_QP)
+   call PARALLEL_WF_index( )
+ endif
+ !
+ if(.not.l_sc_run) ch='-GW'
+ if(     l_sc_run) ch='-SC'
  !
  ! Redefine iq_to_start to be CPU-dependent
  !
  do iqbz=1,q%nbz 
-   if (PAR_IND_Q_ibz%element_1D(q%sstar(iqbz,1))) then
+   iqibz=q%sstar(iqbz,1)
+   if (PAR_IND_Q_ibz%element_1D(iqibz)) then
      iq_to_start=iqbz
      exit
    endif
  enddo
- !
- if( .not.l_sc_run ) then
-   call PARALLEL_WF_distribute(K_index=PAR_IND_Xk_ibz,B_index=PAR_IND_G_b,CLEAN_UP=.TRUE.)
-   call PARALLEL_WF_distribute(QP_index=PAR_IND_QP)
-   call PARALLEL_WF_index( )
- endif
- !
- ch='-GW'
-#if defined _SC
- if(l_sc_run) ch='-SC'
-#endif
  !
  ! wf and collisions dimension
  !-----------------------------
@@ -237,10 +248,6 @@ subroutine QP_ppa_cohsex(X,Xk,E,k,q,qp,Xw,GW_iter)
  io_err=io_X(X,Xw,ID)
  if (io_err<0.and.io_RESPONSE) call error('Incomplete and/or broken PPA/Static diel. fun. database')
  !
- ! Test the spatial Inversion
- !   
- call WF_spatial_inversion(E,Xk)
- !
  ! ALLOCATION
  !------------
  !
@@ -265,7 +272,6 @@ subroutine QP_ppa_cohsex(X,Xk,E,k,q,qp,Xw,GW_iter)
  !
  ! Elemental Collisions
  !====================== 
- isc%iqref=0
  call elemental_collision_alloc(isc,NG=isc%ngrho,TITLE="GW")
  call elemental_collision_alloc(iscp,NG=isc%ngrho,TITLE="GW")
  !
@@ -279,9 +285,9 @@ subroutine QP_ppa_cohsex(X,Xk,E,k,q,qp,Xw,GW_iter)
 &             PAR_IND_Q_ibz%n_of_elements(PAR_IND_Q_ibz_ID+1)*&
 &             count( PAR_IND_G_b%element_1D(QP_n_G_bands(1):QP_n_G_bands(2)) )
  !
- ch=trim(SECTION_name)
+ if (.not.l_sc_run) ch=trim(SECTION_name)
 #if defined _SC
- if (l_sc_run) ch=trim(SECTION_name)//'@it'//trim(intc(it_now))
+ if (     l_sc_run) ch=trim(SECTION_name)//'@it'//trim(intc(it_now))
 #endif
  call live_timing(trim(ch),timing_steps)
  !
@@ -299,11 +305,12 @@ subroutine QP_ppa_cohsex(X,Xk,E,k,q,qp,Xw,GW_iter)
  !
  Q_loop: do iqbz=iq_to_start,q%nbz
    !
-   if (.not.PAR_IND_Q_ibz%element_1D(q%sstar(iqbz,1))) cycle
+   iqs  =q%sstar(iqbz,2)
+   iqibz=q%sstar(iqbz,1)
+   !
+   if (.not.PAR_IND_Q_ibz%element_1D(iqibz)) cycle
    !
-   isc%qs(2:)=(/q%sstar(iqbz,1),q%sstar(iqbz,2)/)
-   iqibz=isc%qs(2)
-   iqs  =isc%qs(3)
+   isc%qs(2:)=(/iqibz,iqs/)
    !
    if (iqibz/=isc%iqref) then
      !
@@ -465,6 +472,17 @@ subroutine QP_ppa_cohsex(X,Xk,E,k,q,qp,Xw,GW_iter)
      !
      if (i_qp==QP_n_states) i_qp_to_start=1
      !
+     i_v=QP_table(i_qp,1)
+     i_c=QP_table(i_qp,2)
+     i_k=QP_table(i_qp,3)
+     i_sp_pol=spin(QP_table(i_qp,:))
+     !
+     i_kmq_Go=qindx_S(i_k,iqbz,2)
+     i_kmq_bz=qindx_S(i_k,iqbz,1)
+     !
+     i_kmq  =k%sstar(i_kmq_bz,1)
+     i_kmq_s=k%sstar(i_kmq_bz,2)
+     !
 !#if defined _SC  
 !     !
 !     ! In OEP only vc matrix elements so ... no cc' no vv' 
@@ -473,11 +491,11 @@ subroutine QP_ppa_cohsex(X,Xk,E,k,q,qp,Xw,GW_iter)
 !     if (l_sc_srpa.and.(QP_table(i_qp,1)>E%nbf.or.QP_table(i_qp,2)<=E%nbf)) cycle
 !#endif
      !
-     isc%is=(/QP_table(i_qp,1),QP_table(i_qp,3),1,spin(QP_table(i_qp,:))/)
-     isc%os(2:)=(/k%sstar(qindx_S(isc%is(2),iqbz,1),:),spin(QP_table(i_qp,:))/)
-     isc%qs(1)=qindx_S(QP_table(i_qp,3),iqbz,2)
+     isc%is=(/i_v,i_k,  1,      i_sp_pol/)
+     isc%os=(/0,  i_kmq,i_kmq_s,i_sp_pol/)
+     isc%qs(1)=i_kmq_Go
      !
-     iscp%is=(/QP_table(i_qp,2),QP_table(i_qp,3),1,spin(QP_table(i_qp,:))/)
+     iscp%is=(/i_c,i_k,1,i_sp_pol/)
      iscp%qs=isc%qs
      !
      dc=cZERO
@@ -485,10 +503,13 @@ subroutine QP_ppa_cohsex(X,Xk,E,k,q,qp,Xw,GW_iter)
      ! COH (using completeness relation)
      !
      if (((l_sc_srpa.or.l_sc_coh.or.l_cohsex).and..not.COHSEX_use_empties).or.l_GW_terminator) then
+       !
+       ! Why this is inside a iq loop if nothing depends on q ??
        !
        iscs_save(1,: )=isc%os
        iscs_save(2,:3)=isc%qs
-       isc%os=(/QP_table(i_qp,2),QP_table(i_qp,3),1,spin(QP_table(i_qp,:))/)
+       !
+       isc%os=(/i_c,i_k,1,i_sp_pol/)
        isc%qs=(/1,1,1/)
        !
        call scatter_Bamp_gpu(isc)
@@ -620,16 +641,10 @@ subroutine QP_ppa_cohsex(X,Xk,E,k,q,qp,Xw,GW_iter)
          ! AF: for the time being, the SC case is treated as if g-parallelism is
          ! disabled
          !
-#if defined _SC
-         if (l_sc_run) then
-           if (PAR_COM_QP_A2A%CPU_id==0)    QP_Sc(i_qp,:)=QP_Sc(i_qp,:)+dc(1)
-         else
-#endif
-!          if (HEAD_QP_cpu)                 QP_Sc(i_qp,:)=QP_Sc(i_qp,:)+dc(1)
-           if (PAR_COM_G_b_INDEX%CPU_id==0) QP_Sc(i_qp,:)=QP_Sc(i_qp,:)+dc(1)
-#if defined _SC
-         endif
-#endif
+         if (     l_sc_run) fill_QP_Sc=PAR_COM_QP_A2A%CPU_id==0
+         if (.not.l_sc_run) fill_QP_Sc=PAR_COM_G_b_INDEX%CPU_id==0
+         !
+         if (fill_QP_Sc) QP_Sc(i_qp,:)=QP_Sc(i_qp,:)+dc(1)
          !
          dc=cZERO
          !
@@ -644,7 +659,7 @@ subroutine QP_ppa_cohsex(X,Xk,E,k,q,qp,Xw,GW_iter)
        !
        if (.not.PAR_IND_G_b%element_1D(ib)) cycle
        !
-       if (q%sstar(iqbz,2)==1) call live_timing(steps=1)
+       if (iqs==1) call live_timing(steps=1)
        !
        isc%os(1)=ib
        !
@@ -685,10 +700,12 @@ subroutine QP_ppa_cohsex(X,Xk,E,k,q,qp,Xw,GW_iter)
          endif
          !
          ! DALV: here the grid is center in E0
-         forall (i_w=1:QP_dSc_steps) W_(i_w)=Sc_W(i_qp)%p(i_w)+cI*QP_G_damp
+         do i_w=1,QP_dSc_steps 
+           W_(i_w)=Sc_W(i_qp)%p(i_w)+cI*QP_G_damp
+         enddo
          !
-         E_kmq=E%E(isc%os(1),isc%os(2),isc%os(4))
-         f_kmq=E%f(isc%os(1),isc%os(2),isc%os(4))
+         E_kmq=E%E(ib,i_kmq,i_sp_pol)
+         f_kmq=E%f(ib,i_kmq,i_sp_pol)
          !
          !
          do i_w=1,QP_dSc_steps
@@ -776,7 +793,7 @@ subroutine QP_ppa_cohsex(X,Xk,E,k,q,qp,Xw,GW_iter)
          !
          ! SEX
          !
-         if (l_sc_sex.or.l_cohsex) dc(1)=-4._SP/spin_occ*pi*pre_factor*e%f(isc%os(1),isc%os(2),isc%os(4))
+         if (l_sc_sex.or.l_cohsex) dc(1)=-4._SP/spin_occ*pi*pre_factor*e%f(ib,i_kmq,i_sp_pol)
          !
          ! COH (when no empties are used the COH part is indeed calculated above)
          !
@@ -793,7 +810,9 @@ subroutine QP_ppa_cohsex(X,Xk,E,k,q,qp,Xw,GW_iter)
      if(l_ppa.and.l_GW_terminator) then
        !
        ! DALV: here the grid is center in E0
-       forall (i_w=1:QP_dSc_steps) W_(i_w)=Sc_W(i_qp)%p(i_w)+cI*QP_G_damp
+       do i_w=1,QP_dSc_steps 
+         W_(i_w)=Sc_W(i_qp)%p(i_w)+cI*QP_G_damp
+       enddo
        !
        do i_w=1,QP_dSc_steps
          !
@@ -811,7 +830,6 @@ subroutine QP_ppa_cohsex(X,Xk,E,k,q,qp,Xw,GW_iter)
          !DEV_OMPGPU &           reduction(+:dp_dummy_r,dp_dummy_i)
          !DEV_OMP parallel do default(shared), private(ig1,ig2,PPA_E,PPA_R,QP_ppa_EET,ctmp), &
          !DEV_OMP &        reduction(+:dp_dummy), collapse(2)
-         !
          do ig2=X_cols1,X_cols2
          do ig1=X_rows1,X_rows2
            !
@@ -824,7 +842,7 @@ subroutine QP_ppa_cohsex(X,Xk,E,k,q,qp,Xw,GW_iter)
            if (Finite_Tel) lW=conjg(W_i)
            QP_ppa_EET=real(spin_occ)/(lW-eet_energy-PPA_E)
 #else
-           QP_ppa_EET= QP_ppa_EET_terminator(W_i,E,isc%is,PPA_E,ig1,ig2,isc%qs(2),&
+           QP_ppa_EET= QP_ppa_EET_terminator(W_i,E,isc%is,PPA_E,ig1,ig2,iqs,&
 &                                            eet_cutoff0,eet_cutoff1)
 #endif
            !
@@ -873,10 +891,8 @@ subroutine QP_ppa_cohsex(X,Xk,E,k,q,qp,Xw,GW_iter)
  YAMBO_FREE_GPU(conjg_iscp_rhotw)
  YAMBO_FREE(conjg_iscp_rhotw)
  !
- l_X_ALLOC_elemental=.true.
-#if defined _SC
- l_X_ALLOC_elemental=l_sc_run.and.io_RESPONSE
-#endif
+ if(.not.l_sc_run) l_X_ALLOC_elemental=.true.
+ if(     l_sc_run) l_X_ALLOC_elemental=io_RESPONSE
  if(l_X_ALLOC_elemental) then
    do iq_mem = 1,size(X_par)
      call MATRIX_reset(X_par(iq_mem))
@@ -901,7 +917,7 @@ subroutine QP_ppa_cohsex(X,Xk,E,k,q,qp,Xw,GW_iter)
  ! AM, Sept 2019. The COH potential seems to break (in some case of a large amount) the
  ! energy degenerations. 
  !
- if (.not.l_sc_run.and..not.l_rt_carriers_in_use.and.l_QP_symmetrize) then
+ if (.not.l_sc_run.and.(.not.l_rt_carriers_in_use).and.l_QP_symmetrize) then
    do i_w=1,QP_dSc_steps
      if (i_w==1) call QP_states_simmetrize(E,what="COHSEX Sc",V_complex=QP_Sc(:,1),warn_me=.TRUE.)
      if (i_w> 1) call QP_states_simmetrize(E,V_complex=QP_Sc(:,i_w))
diff --git a/src/qp/QP_real_axis.F b/src/qp/QP_real_axis.F
index b17335ea63..4f3e4aee2c 100644
--- a/src/qp/QP_real_axis.F
+++ b/src/qp/QP_real_axis.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 !@brief GW self-energy on the real axis
 !
 subroutine QP_real_axis(X,Xen,Xk,en,k,q,qp,Xw,Dip,GW_iter)
@@ -48,8 +52,9 @@ subroutine QP_real_axis(X,Xen,Xk,en,k,q,qp,Xw,Dip,GW_iter)
  use wrapper,       ONLY:Vstar_dot_V,V_dot_V
  use wrapper_omp,   ONLY:Vstar_dot_V_omp,V_dot_V_omp
  use timing_m,      ONLY:timing
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)   :: en,Xen
  type(bz_samp)  :: Xk,k,q
@@ -64,8 +69,9 @@ subroutine QP_real_axis(X,Xen,Xk,en,k,q,qp,Xw,Dip,GW_iter)
  type(elemental_collision) ::scattering_main,scattering_prime
  type(w_samp)     :: Sc_W(qp%n_states),X_life_W(q%nibz)
  integer          :: i1,i2,i3,i4,i_or,iqbz,iqibz,ib,i_err,i_q_to_start,iqs,&
+&                    i_v,i_c,i_k,i_kmq,i_kmq_bz,i_kmq_Go,i_kmq_s,i_sp_pol,&
 &                    io_err,X_id,WID,IO_ACT,timing_steps,i_q_mem,X_range(2),i_qp,i_qp_mem,i_q_W_mem,i_b_mem
- integer          :: X_rows1,X_rows2,X_cols1,X_cols2
+ integer          :: X_rows1,X_rows2,X_cols1,X_cols2,n_freqs
  complex(SP)      :: lrhotw(X%ng),W_dummy
  real(SP)            ::life_Fe,life_Fh
  integer, allocatable::life_W_table(:,:)
@@ -262,19 +268,25 @@ subroutine QP_real_axis(X,Xen,Xk,en,k,q,qp,Xw,Dip,GW_iter)
    QP_W=cZERO
  endif
  !
- ! Test the spatial Inversion
- !
- call WF_spatial_inversion(en,Xk)
- !
  ! ALLOCATION (scattering_main)
  !
- call X_ALLOC_elemental('X',(/X%ng,X%ng,QP_n_W_freqs_redux/))
+ if (io_RESPONSE) then
+   !
+   call X_ALLOC_elemental('X',(/X%ng,X%ng,QP_n_W_freqs_redux/))
+   !
+   ! Replace X_mat with X_mat TO BE completed
+   ! At present X_par is not used
+   ! Here X_par is allocated just to define X_par(1)%rows
+   !
+   allocate(X_par(1))
+   call X_ALLOC_parallel(X_par(1),X%ng,Xw%n_freqs,"X INIT_ONLY")
+   !
+ endif
  !
  call elemental_collision_alloc(scattering_main,NG=X%ng,GAMP_NG=(/X%ng,X%ng/),TITLE="GW")
  call elemental_collision_alloc(scattering_prime,NG=X%ng,TITLE="GW")
  !
  QP_Sc=cZERO
- scattering_main%iqref=0
  !
  ! Main Loop
  !===========
@@ -299,6 +311,7 @@ subroutine QP_real_axis(X,Xen,Xk,en,k,q,qp,Xw,Dip,GW_iter)
    io_err=io_X(X,Xw,X_id)
  endif
  !
+ !
  do iqbz=i_q_to_start,q%nbz
    !
    if (.not.l_life.and.io_SCREEN) QP_W=cZERO
@@ -344,11 +357,9 @@ subroutine QP_real_axis(X,Xen,Xk,en,k,q,qp,Xw,Dip,GW_iter)
        !
      endif
      !
-     if (l_life) then
-       call X_delta_part(X%ng,X_life_W(iqibz)%n_freqs,X_mat(:,:,X_range(1):X_range(2)),scattering_main%gamp)
-     else
-       call X_delta_part(X%ng,Xw%n_freqs,X_mat(:,:,X_range(1):X_range(2)),scattering_main%gamp)
-     endif
+     n_freqs=Xw%n_freqs
+     if (l_life) n_freqs=X_life_W(iqibz)%n_freqs
+     call X_delta_part(X%ng,n_freqs,X_mat(:,:,X_range(1):X_range(2)),scattering_main%gamp)
      !
      if (l_life) QP_n_W_freqs=0
      !
@@ -364,12 +375,6 @@ subroutine QP_real_axis(X,Xen,Xk,en,k,q,qp,Xw,Dip,GW_iter)
      X_is_TR_rotated=.true.
    endif
    !
-   ! mapping of X_par from X_mat
-   ! TO BE completed (avoiding reference to X_mat in the following)
-   !
-   if (.not. allocated(X_par) ) allocate(X_par(1))
-   call X_ALLOC_parallel(X_par(1),X%ng,Xw%n_freqs,"X")
-   !
    ! Useful when GPU porting will be introduced
    ! X_blc_p=> DEV_VAR(X_par%blc)
    !
@@ -383,11 +388,22 @@ subroutine QP_real_axis(X,Xen,Xk,en,k,q,qp,Xw,Dip,GW_iter)
      !
      i_qp_mem=PAR_QP_index(i_qp)
      !
-     scattering_main%is=(/QP_table(i_qp,1),QP_table(i_qp,3),1,spin(QP_table(i_qp,:))/)
-     scattering_main%os(2:)=(/k%sstar(qindx_S(scattering_main%is(2),iqbz,1),:),spin(QP_table(i_qp,:))/)
-     scattering_prime%is=(/QP_table(i_qp,2),QP_table(i_qp,3),1,spin(QP_table(i_qp,:))/)
+     i_v=QP_table(i_qp,1)
+     i_c=QP_table(i_qp,2)
+     i_k=QP_table(i_qp,3)
+     i_sp_pol=spin(QP_table(i_qp,:))
+     !
+     i_kmq_Go=qindx_S(i_k,iqbz,2)
+     i_kmq_bz=qindx_S(i_k,iqbz,1)
      !
-     scattering_main%qs(1)=qindx_S(QP_table(i_qp,3),iqbz,2)
+     i_kmq  =k%sstar(i_kmq_bz,1)
+     i_kmq_s=k%sstar(i_kmq_bz,2)
+     !
+     scattering_main%is=(/i_v,i_k,1,i_sp_pol/)
+     scattering_main%os=(/0,i_kmq,i_kmq_s,i_sp_pol/)
+     scattering_prime%is=(/i_c,i_k,1,i_sp_pol/)
+     !
+     scattering_main%qs(1)=i_kmq_Go
      scattering_prime%qs=scattering_main%qs
      !
      if (.not.PAR_IND_QP%element_1D(i_qp)) then
@@ -398,11 +414,10 @@ subroutine QP_real_axis(X,Xen,Xk,en,k,q,qp,Xw,Dip,GW_iter)
        !
        if (l_life) then
          do ib=QP_n_G_bands(1),QP_n_G_bands(2)
-           scattering_main%os(1)=ib
-           i_or=IOR(e2et((/scattering_main%is(:2),scattering_main%is(4)/),            &
-&                        (/scattering_main%os(:2),scattering_main%os(4)/),en,life_Fe),&
-&                   h2ht((/scattering_main%is(:2),scattering_main%is(4)/),            &
-&                        (/scattering_main%os(:2),scattering_main%os(4)/),en,life_Fh))
+           i_or=IOR(e2et((/i_v,i_k,i_sp_pol/),            &
+&                        (/ ib,i_kmq,i_sp_pol/),en,life_Fe),&
+&                   h2ht((/i_v,i_k,i_sp_pol/),            &
+&                        (/ ib,i_kmq,i_sp_pol/),en,life_Fh))
            QP_n_W_freqs=QP_n_W_freqs+i_or
          enddo
        endif
@@ -416,10 +431,10 @@ subroutine QP_real_axis(X,Xen,Xk,en,k,q,qp,Xw,Dip,GW_iter)
        i_b_mem=PAR_G_bands_index(ib)
        !
        if (l_life) then
-         i_or=IOR(e2et((/scattering_main%is(:2),scattering_main%is(4)/),            &
-&                      (/scattering_main%os(:2),scattering_main%os(4)/),en,life_Fe),&
-&                 h2ht((/scattering_main%is(:2),scattering_main%is(4)/),            &
-&                      (/scattering_main%os(:2),scattering_main%os(4)/),en,life_Fh))
+         i_or=IOR(e2et((/i_v,i_k,i_sp_pol/),            &
+&                      (/ ib,i_kmq,i_sp_pol/),en,life_Fe),&
+&                 h2ht((/i_v,i_k,i_sp_pol/),            &
+&                      (/ ib,i_kmq,i_sp_pol/),en,life_Fh))
          QP_n_W_freqs=QP_n_W_freqs+i_or
        endif
        !
@@ -496,9 +511,6 @@ subroutine QP_real_axis(X,Xen,Xk,en,k,q,qp,Xw,Dip,GW_iter)
      enddo bands_loop
    enddo qp_loop
    !
-   call MATRIX_reset(X_par(1))
-   deallocate(X_par)
-   !
    if (.not.l_life) then
      !
      QP_W_er(iqbz,:)=Xw%er
@@ -560,7 +572,10 @@ subroutine QP_real_axis(X,Xen,Xk,en,k,q,qp,Xw,Dip,GW_iter)
    enddo
  endif
  !
- if (allocated(X_par)) deallocate(X_par)
+ if (io_RESPONSE) then
+   call MATRIX_reset(X_par(1))
+   deallocate(X_par)
+ endif
  !
  call X_ALLOC_elemental('X')
  call WF_free(WF)
diff --git a/src/qp/QP_secant_driver.F b/src/qp/QP_secant_driver.F
index 04695aa67c..703921838d 100644
--- a/src/qp/QP_secant_driver.F
+++ b/src/qp/QP_secant_driver.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine QP_secant_driver(X,Xen,Xk,en,k,q,qp,Xw,Dip)
  !
  !Procedure for non-perturbative solution of Dyson equation.
@@ -47,8 +51,9 @@ subroutine QP_secant_driver(X,Xen,Xk,en,k,q,qp,Xw,Dip)
  use IO_int,        ONLY:io_control
  use IO_m,          ONLY:NONE,RD,RD_CL,OP_RD,OP_RD_CL,io_SCREEN
  use stderr,        ONLY:intc
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  type(levels)::en,Xen
  type(bz_samp)::Xk,k,q
  type(X_t):: X
@@ -90,8 +95,10 @@ subroutine QP_secant_driver(X,Xen,Xk,en,k,q,qp,Xw,Dip)
  !
  !Brackets
  !
- forall(i1=1:qp%n_states) Eqp_raxis(i1,1)=qp%E_bare(i1)
- forall(i1=1:qp%n_states) Eqp_raxis(i1,2)=Eqp_raxis(i1,1)+1./HA2EV
+ do i1=1,qp%n_states 
+   Eqp_raxis(i1,1)=qp%E_bare(i1)
+   Eqp_raxis(i1,2)=Eqp_raxis(i1,1)+1./HA2EV
+ enddo
  !
  QP_solver_state=1
  call W2Sc_local_call()
@@ -157,7 +164,9 @@ subroutine QP_secant_driver(X,Xen,Xk,en,k,q,qp,Xw,Dip)
    call W_reset(Sc_W(i1))
    Sc_W(i1)%n_freqs=QP_dSc_steps
    YAMBO_ALLOC(Sc_W(i1)%p,(QP_dSc_steps))
-   forall (i2=1:QP_dSc_steps) Sc_W(i1)%p(i2)=Eqp_raxis(i1,1)+(i2-1)*QP_dSc_delta
+   do i2=1,QP_dSc_steps 
+     Sc_W(i1)%p(i2)=Eqp_raxis(i1,1)+(i2-1)*QP_dSc_delta
+   enddo
  enddo
  YAMBO_FREE(QP_solver_state)
  !
@@ -166,7 +175,7 @@ subroutine QP_secant_driver(X,Xen,Xk,en,k,q,qp,Xw,Dip)
  do i1=1,QP_n_states
    do i2=1,QP_dSc_steps-1
      QP_dSc(i1,i2)=(QP_Sc(i1,i2+1)-QP_Sc(i1,i2))/QP_dSc_delta
-     Z(i2)=1./(1.-QP_dSc(i1,i2))
+     Z(i2)=1._SP/(1._SP-QP_dSc(i1,i2))
      Eqp(i2)=Eqp_raxis(i1,1)+cmplx(0.,real(Z(i2))*aimag(QP_Sc(i1,1)),SP)
    enddo
    qp%E(i1)=Eqp(1)
diff --git a/src/qp/QP_states_simmetrize.F b/src/qp/QP_states_simmetrize.F
index f7bc4cbe5f..3295b862af 100644
--- a/src/qp/QP_states_simmetrize.F
+++ b/src/qp/QP_states_simmetrize.F
@@ -14,7 +14,7 @@ subroutine QP_states_simmetrize(en,what,V_real,V_complex,state_is_2do,warn_me)
  use QP_m,          ONLY:QP_n_states,QP_table
  use units,         ONLY:HA2EV
  use vec_operate,   ONLY:sort,degeneration_finder
- use electrons,     ONLY:levels,spin,n_sp_pol
+ use electrons,     ONLY:levels,spin,n_sp_pol,deg_threshold
  use stderr,        ONLY:real2ch,intc
  !
  implicit none
@@ -33,7 +33,7 @@ subroutine QP_states_simmetrize(en,what,V_real,V_complex,state_is_2do,warn_me)
 &              first_el(QP_n_states),n_of_el(QP_n_states),n_deg_grp
  character(schlen)   :: title=" ",states_ch=" "
  logical             :: check_the_diffs=.FALSE.
- real(SP), parameter :: E_deg_treshold=0.0001_SP/HA2EV,V_deg_percentual_treshold=0.01
+ real(SP), parameter :: V_deg_percentual_treshold=0.01
  !
  if (present(what))    title=what
  if (present(warn_me)) check_the_diffs=warn_me
@@ -45,7 +45,7 @@ subroutine QP_states_simmetrize(en,what,V_real,V_complex,state_is_2do,warn_me)
  enddo
  !
  call sort(Eo_sorted,indx=Eo_sorted_index)
- call degeneration_finder(Eo_sorted,QP_n_states,first_el,n_of_el,n_deg_grp,E_deg_treshold)
+ call degeneration_finder(QP_n_states,first_el,n_of_el,n_deg_grp,Er=Eo_sorted,deg_accuracy=deg_threshold)
  !
  if (present(state_is_2do)) then
    state_is_2do=.TRUE.
diff --git a/src/qp/XCo_Hartree_Fock.F b/src/qp/XCo_Hartree_Fock.F
index b1f94d7a03..1b8ce1f340 100644
--- a/src/qp/XCo_Hartree_Fock.F
+++ b/src/qp/XCo_Hartree_Fock.F
@@ -5,7 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
 #include<dev_defs.h>
+#include<y_memory.h>
+!
 !
 subroutine XCo_Hartree_Fock(E,k,Xk,q,mode)
  !
@@ -39,8 +43,9 @@ subroutine XCo_Hartree_Fock(E,k,Xk,q,mode)
  use pseudo,        ONLY:becp, pp_is_uspp
  use qe_pseudo_m,   ONLY:beccopy
  use devxlib,       ONLY:devxlib_memcpy_d2d
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels) ::E       
  type(bz_samp)::k,Xk,q
@@ -53,7 +58,7 @@ subroutine XCo_Hartree_Fock(E,k,Xk,q,mode)
  complex(SP), pointer DEV_ATTR     :: isc_rhotw_p(:), iscp_rhotw_p(:)
  !
  integer                  :: iq,iq_ibz,is_ibz,ib,ibp,jb,i_qp
- integer                  :: ik,ikp,iGp,i_sp_pol  ! QP list
+ integer                  :: ik,ikp,ikp_Go,ikp_s,ikp_bz,i_sp_pol
  integer                  :: Sx_lower_band,Sx_upper_band(2),n_lt_bands,n_lt_steps
  integer                  :: ig_first,ig_last
  complex(DP)              :: DP_Sx, DP_Sx_l
@@ -153,7 +158,7 @@ subroutine XCo_Hartree_Fock(E,k,Xk,q,mode)
    !  !!! num_threads(n_outthr)
    !DEV_OMP parallel default(shared), &
    !DEV_OMP          private(isc,iscp,gamp_p,isc_rhotw_p,iscp_rhotw_p,&
-   !DEV_OMP &                iq,iq_ibz,is_ibz,ikp,iGp,jb,DP_Sx_l), reduction(+:DP_sx)
+   !DEV_OMP &                iq,iq_ibz,is_ibz,ikp_Go,ikp_s,ikp_bz,jb,DP_Sx_l), reduction(+:DP_sx)
    !
    call OPENMP_update(master_thread)
    !
@@ -204,14 +209,17 @@ subroutine XCo_Hartree_Fock(E,k,Xk,q,mode)
      ! m   =QP_table(i_qp,2)
      ! k   =QP_table(i_qp,3)
      !
-     ikp=k%sstar(qindx_S(ik,iq,1),1)
-     iGp=k%sstar(qindx_S(ik,iq,1),2)
+     ikp_Go=qindx_S(ik,iq,2)
+     ikp_bz=qindx_S(ik,iq,1)
+     !
+     ikp  =k%sstar(ikp_bz,1)
+     ikp_s=k%sstar(ikp_bz,2)
      !
-     isc%is =(/ib, ik , 1 ,i_sp_pol/)
-     isc%os(2:)=(/ ikp,iGp,i_sp_pol/)
-     iscp%is=(/ibp,ik , 1 ,i_sp_pol/)
+     isc%is =(/ib, ik , 1   ,i_sp_pol/)
+     isc%os(2:)=(/ ikp,ikp_s,i_sp_pol/)
+     iscp%is=(/ibp,ik , 1   ,i_sp_pol/)
      !
-     isc%qs(1)=qindx_S(ik,iq,2)
+     isc%qs(1)=ikp_Go
      iscp%qs=isc%qs
      !
      if (pp_is_uspp) then
diff --git a/src/qp/XCo_driver.F b/src/qp/XCo_driver.F
index 594595b017..685f1b107f 100644
--- a/src/qp/XCo_driver.F
+++ b/src/qp/XCo_driver.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine XCo_driver(E,k,Xk,q)
  !
  use zeros,         ONLY:zero_dfl
@@ -24,8 +28,9 @@ subroutine XCo_driver(E,k,Xk,q)
  use hamiltonian,   ONLY:l_sc_V_is_local
  use global_XC,     ONLY:H_SE_FOCK,H_SE_HARTREE,H_SE_COH,H_SE_SEX
 #endif
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels) ::E       
  type(bz_samp)::k,Xk,q
diff --git a/src/qp/XCo_local.F b/src/qp/XCo_local.F
index 9b27bd033e..6fccd37edc 100644
--- a/src/qp/XCo_local.F
+++ b/src/qp/XCo_local.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine XCo_local(E,Xk)
  !
  ! Local V_xc
@@ -36,8 +40,9 @@ subroutine XCo_local(E,Xk)
  use SC,            ONLY:load_SC_components,SC_fft_size,compatible_SC_DB
 #endif
  use timing_m,      ONLY:timing
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels) ::E       
  type(bz_samp)::Xk
@@ -129,7 +134,7 @@ subroutine XCo_local(E,Xk)
        endif
        !
        if (.not.PAR_IND_WF_k%element_1D(ik)) cycle
-       if (.not.PAR_IND_B_mat_ordered%element_1D( UP_matrix_index(ib-H_ref_bands(1)+1,ibp-H_ref_bands(1)+1)-1 )) cycle
+       if (.not.PAR_IND_B_mat_ordered%element_1D( UP_matrix_index(ib-H_ref_bands(1)+1,ibp-H_ref_bands(1)+1) )) cycle
        !
      else
 #endif
@@ -147,11 +152,11 @@ subroutine XCo_local(E,Xk)
      ! 
      !$omp parallel do default(shared), private(ifft)
      do ifft=1,fft_size
-       c_ws(ifft,1)=V_xc(ifft,1)*WF%c(ifft,1,iwfp)
+       c_ws(ifft,1)=V_xc(ifft,1)*WF%r(ifft,1,iwfp)
      enddo
      !$omp end parallel do
      !
-     QP_Vxc(i1)=Vstar_dot_V_omp(fft_size*n_spinor,WF%c(:,:,iwf),c_ws)
+     QP_Vxc(i1)=Vstar_dot_V_omp(fft_size*n_spinor,cmplx(WF%r(:,:,iwf),kind=SP),c_ws(:,:))
      !
      if (pp_is_uspp) then
        call PP_vloc_augment(QP_Vxc(i1),V_xc,ik,ib,ibp)
@@ -189,7 +194,7 @@ subroutine XCo_local(E,Xk)
 #if defined _RT
      if (l_real_time) then
        if (.not.PAR_IND_WF_k%element_1D(ik)) cycle
-       if (.not.PAR_IND_B_mat_ordered%element_1D( UP_matrix_index(ib-H_ref_bands(1)+1,ibp-H_ref_bands(1)+1)-1 )) cycle
+       if (.not.PAR_IND_B_mat_ordered%element_1D( UP_matrix_index(ib-H_ref_bands(1)+1,ibp-H_ref_bands(1)+1) )) cycle
      else
 #endif
        if (.not.PAR_IND_WF_linear%element_2D(ib,ik)) cycle
@@ -202,9 +207,9 @@ subroutine XCo_local(E,Xk)
        iwf =WF%index(ib,ik,i_sp_pol)
        iwfp=WF%index(ibp,ik,i_sp_pol)
        !
-       c_ws(:,1)=V_xc_mat(:,i_sp_pol,i_sp_pol)*WF%c(:,1,iwfp)
+       c_ws(:,1)=V_xc_mat(:,i_sp_pol,i_sp_pol)*WF%r(:,1,iwfp)
        !
-       QP_Vxc(i1)=Vstar_dot_V_omp(fft_size*n_spinor,WF%c(:,:,iwf),c_ws)
+       QP_Vxc(i1)=Vstar_dot_V_omp(fft_size*n_spinor,cmplx(WF%r(:,:,iwf),kind=SP),c_ws(:,:))
        !
        if (pp_is_uspp) then
          call error('[PPs] USPP: nspin=2 and n_spinor=1 not implemented')
@@ -215,10 +220,10 @@ subroutine XCo_local(E,Xk)
        iwf=WF%index(ib,ik,1)
        iwfp=WF%index(ibp,ik,1)
        !
-       c_ws(:,1)=V_xc_mat(:,1,1)*WF%c(:,1,iwfp)+V_xc_mat(:,1,2)*WF%c(:,2,iwfp)
-       c_ws(:,2)=V_xc_mat(:,2,1)*WF%c(:,2,iwfp)+V_xc_mat(:,2,2)*WF%c(:,2,iwfp)
+       c_ws(:,1)=V_xc_mat(:,1,1)*WF%r(:,1,iwfp)+V_xc_mat(:,1,2)*WF%r(:,2,iwfp)
+       c_ws(:,2)=V_xc_mat(:,2,1)*WF%r(:,2,iwfp)+V_xc_mat(:,2,2)*WF%r(:,2,iwfp)
        !
-       QP_Vxc(i1)=Vstar_dot_V_omp(fft_size*n_spinor,WF%c(:,:,iwf),c_ws)
+       QP_Vxc(i1)=Vstar_dot_V_omp(fft_size*n_spinor,cmplx(WF%r(:,:,iwf),kind=SP),c_ws(:,:))
        !
        if (pp_is_uspp) then
          call error('[PPs] USPP: nspin=2 and n_spinor>1 not implemented')
diff --git a/src/qp/XCo_report.F b/src/qp/XCo_report.F
index 4373be70e2..013f6b39bc 100644
--- a/src/qp/XCo_report.F
+++ b/src/qp/XCo_report.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine XCo_report(qp,E,k)
  !
  use drivers,       ONLY:l_real_time,l_gw0
@@ -18,8 +22,9 @@ subroutine XCo_report(qp,E,k)
  use com,           ONLY:msg
  use stderr,        ONLY:intc,real2ch
  use xc_functionals,ONLY:E_xc_val
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(QP_t)    ::qp
  type(levels)  ::E
diff --git a/src/qp_control/QP_DBs_add_me.F b/src/qp_control/QP_DBs_add_me.F
index a730742794..1f66bccb5d 100644
--- a/src/qp_control/QP_DBs_add_me.F
+++ b/src/qp_control/QP_DBs_add_me.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine QP_DBs_add_me(qp_base,qp2add,plan_EWZG,SAME_states,SAME_desc,OP,PRE_FAC)
  !
  ! This routine just adds to qp_base qp with the plan plan_EWZG
@@ -12,8 +16,9 @@ subroutine QP_DBs_add_me(qp_base,qp2add,plan_EWZG,SAME_states,SAME_desc,OP,PRE_F
  use pars,           ONLY:cZERO,rZERO,cI,SP,rONE
  use QP_m,           ONLY:QP_t,QP_reset,QP_copy,QP_alloc
  use electrons,      ONLY:n_sp_pol
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(QP_t)         ::qp_base,qp2add
  logical            ::plan_EWZG(4),SAME_states,SAME_desc
diff --git a/src/qp_control/QP_DBs_merge.F b/src/qp_control/QP_DBs_merge.F
index b6a5402577..a6e26499f4 100644
--- a/src/qp_control/QP_DBs_merge.F
+++ b/src/qp_control/QP_DBs_merge.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine QP_DBs_merge(n_dbs,qp2merge,plan_EWZG,n_GROUNDS,qp,OP,PRE_FAC,FILES)
  !
  use pars,           ONLY:SP
@@ -14,8 +18,9 @@ subroutine QP_DBs_merge(n_dbs,qp2merge,plan_EWZG,n_GROUNDS,qp,OP,PRE_FAC,FILES)
  use C_driver,       ONLY:code_bin
  use vec_operate,    ONLY:v_is_zero
  use descriptors,    ONLY:IO_desc_duplicate
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer              :: n_dbs,n_GROUNDS
  type(QP_t)           :: qp2merge(n_dbs)
diff --git a/src/qp_control/QP_apply.F b/src/qp_control/QP_apply.F
index 504b170681..d02560ca76 100644
--- a/src/qp_control/QP_apply.F
+++ b/src/qp_control/QP_apply.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine QP_apply(bands,en,k,object,msg_fmt,qp_impose,QP_ctl_impose,main_section)
  !
  ! This routine manages the external qp corrections.
@@ -37,8 +41,9 @@ subroutine QP_apply(bands,en,k,object,msg_fmt,qp_impose,QP_ctl_impose,main_secti
 &                        QP_ctl_ID,QP_ctl_t,QP_ctl_msg_fmt,QP_ctl_xc_STRING,QP_ctl_what,&
 &                        At_least_one_band_corrected
  use global_XC,     ONLY:correct_global_XC
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer       ::bands(2)
  type(levels)  ::en
diff --git a/src/qp_control/QP_apply_QP.F b/src/qp_control/QP_apply_QP.F
index db7d27168b..84a0302e3d 100644
--- a/src/qp_control/QP_apply_QP.F
+++ b/src/qp_control/QP_apply_QP.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine QP_apply_QP(i_ACTION,bands,en,k,qp)
  !
  use pars,          ONLY:rZERO
@@ -12,8 +16,9 @@ subroutine QP_apply_QP(i_ACTION,bands,en,k,qp)
  use R_lattice,     ONLY:bz_samp
  use electrons,     ONLY:levels,n_sp_pol
  use QP_CTL_m,      ONLY:QP_action
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)    ::en
  type(QP_t)      ::qp
diff --git a/src/qp_control/QP_apply_dump_user_CTLs.F b/src/qp_control/QP_apply_dump_user_CTLs.F
index fe6abe6197..ff3cb22947 100644
--- a/src/qp_control/QP_apply_dump_user_CTLs.F
+++ b/src/qp_control/QP_apply_dump_user_CTLs.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine QP_apply_dump_user_CTLs( E, object )
  !
  use pars,          ONLY:schlen
@@ -14,8 +18,9 @@ subroutine QP_apply_dump_user_CTLs( E, object )
  use QP_CTL_m,      ONLY:QP_action,n_QP_actions,QP_ctl_what,QP_ctl_DB_user,&
 &                        QP_ctl_ID,QP_CTL_is_active,QP_CTL_fill
  use stderr,        ONLY:STRING_split,STRING_same
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type (levels) ::E
  character(1)  ::object
diff --git a/src/qp_control/QP_apply_interpolate_QP.F b/src/qp_control/QP_apply_interpolate_QP.F
index 6ec69d2481..e304927964 100644
--- a/src/qp_control/QP_apply_interpolate_QP.F
+++ b/src/qp_control/QP_apply_interpolate_QP.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine QP_apply_interpolate_QP(i_ACTION,bands,en,k,qp)
  !
  ! Extend the QP correction from the kpt in qp to the kpt in k
@@ -30,8 +34,9 @@ subroutine QP_apply_interpolate_QP(i_ACTION,bands,en,k,qp)
  use QP_CTL_m,      ONLY:QP_action,QP_ctl_msg_fmt,QP_ctl_xc_STRING,At_least_one_band_corrected
  use interpolate,   ONLY:NN_n_of_nearest,INTERP_obj,INTERPOLATE_msg_fmt,&
 &                        INTERPOLATION_driver_seed,INTERPOLATION_driver_do,INTERP_shell_factor
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)  ::en
  type(QP_t)    ::qp
diff --git a/src/qp_control/QP_apply_the_ACTION.F b/src/qp_control/QP_apply_the_ACTION.F
index 577b8e1599..d37d5ffbc6 100644
--- a/src/qp_control/QP_apply_the_ACTION.F
+++ b/src/qp_control/QP_apply_the_ACTION.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine QP_apply_the_ACTION(i_ACTION,bands,E,k)
  !
  ! Here I apply the strecth for both QP_action.
@@ -13,8 +17,9 @@ subroutine QP_apply_the_ACTION(i_ACTION,bands,E,k)
  use electrons,     ONLY:levels,n_sp_pol
  use R_lattice,     ONLY:bz_samp
  use QP_CTL_m,      ONLY:QP_action,QP_ctl_xc_STRING,At_least_one_band_corrected
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)    ::E
  type(bz_samp)   ::k
diff --git a/src/qp_control/QP_load_DB.F b/src/qp_control/QP_load_DB.F
index d4af4933f6..3160259597 100644
--- a/src/qp_control/QP_load_DB.F
+++ b/src/qp_control/QP_load_DB.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine QP_load_DB(i_ACTION,bands,en,k)
  !
  use drivers,       ONLY:l_chi,l_bss
@@ -14,8 +18,9 @@ subroutine QP_load_DB(i_ACTION,bands,en,k)
  use QP_CTL_m,      ONLY:QP_action
  use IO_int,        ONLY:io_control
  use IO_m,          ONLY:OP_RD_CL,DUMP,REP
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)    ::en
  type(bz_samp)   ::k
diff --git a/src/qp_control/QP_load_GFs.F b/src/qp_control/QP_load_GFs.F
index 435e952631..07a6217743 100644
--- a/src/qp_control/QP_load_GFs.F
+++ b/src/qp_control/QP_load_GFs.F
@@ -5,14 +5,19 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine QP_load_GFs(bands,qp,E)
  !
  ! Using qp%GreenF/qp%GreenF_W I define here the Green Functions
  !
  use electrons,     ONLY:n_sp_pol,spin,levels
  use QP_m,          ONLY:QP_t,GF_is_causal
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels) ::E
  type(QP_t)   ::qp
diff --git a/src/real_time_control/.objects b/src/real_time_control/.objects
index 52d48c2a5e..e46d6cdb05 100644
--- a/src/real_time_control/.objects
+++ b/src/real_time_control/.objects
@@ -3,9 +3,9 @@ objs = RT_alloc.o RT_free.o RT_Dynamics_is_over.o RT_RESTART_database_copy.o $(d
        RT_Lifetimes_merge_intra_k.o RT_Lifetimes_merge_inter_k.o RT_Lifetimes_merge_driver.o \
        RT_output_INIT.o RT_output.o RT_databases_IO.o \
        RT_Observables.o RT_mean_lifetimes_and_temperatures.o RT_average_operator.o RT_occupations_FIT.o \
-       RT_carriers_object.o RT_k_grid.o RT_apply.o \
+       RT_carriers_object.o RT_k_grid.o RT_apply.o RT_small_a.o \
        RT_interpolation_driver.o RT_interpolation_doit_and_transfer.o \
        RT_update_TIME_CONF_descriptions.o \
        RT_entropy.o RT_carriers_number.o RT_carriers_temperature.o RT_energy_components.o  RT_output_and_IO_driver.o \
-       RT_Fluence_and_PI_kind.o RT_Rabi_frequency.o RT_Rabi_frequency_ALL_and_print.o
+       RT_Fluence_and_PI_kind.o RT_Rabi_frequency.o RT_Rabi_frequency_ALL_and_print.o RT_field_from_file.o
 #endif
diff --git a/src/real_time_control/DOUBLE_project.dep b/src/real_time_control/DOUBLE_project.dep
index db06478179..07a445dff3 100644
--- a/src/real_time_control/DOUBLE_project.dep
+++ b/src/real_time_control/DOUBLE_project.dep
@@ -18,6 +18,7 @@
  RT_do_it.o
  RT_energy_components.o
  RT_entropy.o
+ RT_field_from_file.o
  RT_free.o
  RT_interpolation_doit_and_transfer.o
  RT_interpolation_driver.o
@@ -27,5 +28,6 @@
  RT_output.o
  RT_output_INIT.o
  RT_output_and_IO_driver.o
+ RT_small_a.o
  RT_update_TIME_CONF_descriptions.o
 
diff --git a/src/real_time_control/NL_project.dep b/src/real_time_control/NL_project.dep
new file mode 100644
index 0000000000..bbbad1cf24
--- /dev/null
+++ b/src/real_time_control/NL_project.dep
@@ -0,0 +1,2 @@
+ RT_databases_IO.o
+
diff --git a/src/real_time_control/RT_Fluence_and_PI_kind.F b/src/real_time_control/RT_Fluence_and_PI_kind.F
index b7b1142f35..e4a77cacef 100644
--- a/src/real_time_control/RT_Fluence_and_PI_kind.F
+++ b/src/real_time_control/RT_Fluence_and_PI_kind.F
@@ -24,7 +24,7 @@ subroutine RT_Fluence_and_PI_kind(En,i_field)
  type(gauge_field) :: A
  !
  logical      :: l_print_rabi
- integer      :: it0,it,last_it_evaluated,NE_i_time_save
+ integer      :: it0,itmax,it,last_it_evaluated,NE_i_time_save
  real(SP)     :: I(NE_steps),Omega(NE_steps),T(NE_steps)
  !
  real(SP),external :: RIntegrate,RT_Rabi_frequency
@@ -58,12 +58,21 @@ subroutine RT_Fluence_and_PI_kind(En,i_field)
  Efield(i_field)%pi_kind=0._SP
  !
  it0=Efield(i_field)%t_initial_indx
+ itmax=nint(Efield(i_field)%width/RT_step)
+ !
+ if ( index(Efield(i_field)%ef_name,'QSSIN')>0   .or.  &
+    & index(Efield(i_field)%ef_name,'QSFIELD')>0 .or.  &
+    & index(Efield(i_field)%ef_name,'GAUSS')>0 ) then
+   itmax=10*itmax
+ else
+   itmax=itmax+10
+ endif
  !
  do it=it0,NE_steps
    !
    T(it)=it*RT_step
    !
-   if ((it-it0)>6*nint(Efield(i_field)%width/RT_step)) exit
+   if ((it-it0)>itmax) exit
    !
    NE_i_time=it
    !
diff --git a/src/real_time_control/RT_Observables.F b/src/real_time_control/RT_Observables.F
index 8b3e250b0d..1d2b89638a 100644
--- a/src/real_time_control/RT_Observables.F
+++ b/src/real_time_control/RT_Observables.F
@@ -14,7 +14,7 @@ subroutine RT_Observables(en,k,dG_lesser,OBSERVABLES)
  !
  use pars,           ONLY:SP
  use R_lattice,      ONLY:bz_samp
- use electrons,      ONLY:levels
+ use electrons,      ONLY:levels,n_sp_pol
  use real_time,      ONLY:RT_nk,RT_bands
  use fields,         ONLY:A_tot
  use timing_m,       ONLY:timing
@@ -23,7 +23,7 @@ subroutine RT_Observables(en,k,dG_lesser,OBSERVABLES)
  !
  type(levels),  intent(inout) :: en
  type(bz_samp), intent(in)    :: k
- complex(SP),   intent(in)    :: dG_lesser(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),RT_nk)
+ complex(SP),   intent(in)    :: dG_lesser(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),RT_nk,n_sp_pol)
  real(SP),      intent(inout) :: OBSERVABLES(25)
  !
  ! Workspace
diff --git a/src/real_time_control/RT_Rabi_frequency.F b/src/real_time_control/RT_Rabi_frequency.F
index 5c6688ab4c..8e5f5d8eb7 100644
--- a/src/real_time_control/RT_Rabi_frequency.F
+++ b/src/real_time_control/RT_Rabi_frequency.F
@@ -13,9 +13,9 @@ function RT_Rabi_frequency(En,i_field,l_print_rabi)
  use pars,           ONLY:SP,rZERO,cZERO,cI
  use units,          ONLY:HA2EV,PS2AUT
  use R_lattice,      ONLY:nkibz
- use electrons,      ONLY:n_sp_pol,levels
+ use electrons,      ONLY:n_sp_pol,levels,deg_threshold
  use X_m,            ONLY:global_gauge
- use fields,         ONLY:Efield,field_frequency
+ use fields,         ONLY:Efield
  use real_time,      ONLY:RT_bands,RT_nbands
  use dipoles,        ONLY:DIP_iR,DIP_v
  use parallel_m,     ONLY:PAR_IND_Xk_ibz,PAR_Xk_ibz_index
@@ -40,11 +40,7 @@ function RT_Rabi_frequency(En,i_field,l_print_rabi)
  len_gauge=trim(global_gauge)=="length"
  vel_gauge=trim(global_gauge)=="velocity"
  !
- effective_frequency=0._SP
- do i_fr=1,Efield(i_field)%n_frequencies
-   effective_frequency=effective_frequency+field_frequency(Efield(i_field),i_fr)
- enddo
- effective_frequency=effective_frequency/real(Efield(i_field)%n_frequencies,SP)
+ effective_frequency=Efield(i_field)%frequency
  !
  RT_Rabi_frequency=rZERO
  E_dist=0.1_SP
@@ -74,8 +70,8 @@ function RT_Rabi_frequency(En,i_field,l_print_rabi)
    do ik=1,nkibz
      if (.not.PAR_IND_Xk_ibz%element_1D(ik)) cycle
      ik_mem=PAR_Xk_ibz_index(ik)
-     call degeneration_finder(En%E(RT_bands(1):RT_bands(2),ik,i_sp_pol),RT_nbands,first_el,n_of_el,n_deg_grp, &
-     &                        0.0001_SP/HA2EV,Include_single_values=.true.)
+     call degeneration_finder(RT_nbands,first_el,n_of_el,n_deg_grp,Er=En%E(RT_bands(1):RT_bands(2),ik,i_sp_pol), &
+     &                        deg_accuracy=deg_threshold,Include_single_values=.true.)
      do i_grp1=1,n_deg_grp
        ib1=first_el(i_grp1)+RT_bands(1)-1
        do i_grp2=i_grp1+1,n_deg_grp
diff --git a/src/real_time_control/RT_Rabi_frequency_ALL_and_print.F b/src/real_time_control/RT_Rabi_frequency_ALL_and_print.F
index 4d93e55f52..c3344b4d50 100644
--- a/src/real_time_control/RT_Rabi_frequency_ALL_and_print.F
+++ b/src/real_time_control/RT_Rabi_frequency_ALL_and_print.F
@@ -13,9 +13,9 @@ subroutine RT_Rabi_frequency_ALL_and_print(En,i_field)
  use pars,           ONLY:SP,cZERO,cI
  use units,          ONLY:HA2EV,PS2AUT
  use R_lattice,      ONLY:nkibz
- use electrons,      ONLY:n_sp_pol,levels
+ use electrons,      ONLY:n_sp_pol,levels,deg_threshold
  use X_m,            ONLY:global_gauge
- use fields,         ONLY:Efield,field_frequency
+ use fields,         ONLY:Efield
  use real_time,      ONLY:RT_bands,RT_nbands
  use dipoles,        ONLY:DIP_iR,DIP_v
  use parallel_m,     ONLY:PAR_IND_Xk_ibz,PAR_Xk_ibz_index
@@ -41,18 +41,14 @@ subroutine RT_Rabi_frequency_ALL_and_print(En,i_field)
  !
  call OUTPUT_driver('rabi_frequencies',action="append")
  !
- effective_frequency=0._SP
- do i_fr=1,Efield(i_field)%n_frequencies
-   effective_frequency=effective_frequency+field_frequency(Efield(i_field),i_fr)
- enddo
- effective_frequency=effective_frequency/real(Efield(i_field)%n_frequencies,SP)
+ effective_frequency=Efield(i_field)%frequency
  !
  do i_sp_pol=1,n_sp_pol
    do ik=1,nkibz
      if (.not.PAR_IND_Xk_ibz%element_1D(ik)) cycle
      ik_mem=PAR_Xk_ibz_index(ik)
-     call degeneration_finder(En%E(RT_bands(1):RT_bands(2),ik,i_sp_pol),RT_nbands,first_el,n_of_el,n_deg_grp, &
-     &                        0.0001_SP/HA2EV,Include_single_values=.true.)
+     call degeneration_finder(RT_nbands,first_el,n_of_el,n_deg_grp,Er=En%E(RT_bands(1):RT_bands(2),ik,i_sp_pol), &
+     &                        deg_accuracy=deg_threshold,Include_single_values=.true.)
      do i_grp1=1,n_deg_grp
        ib1=first_el(i_grp1)+RT_bands(1)-1
        do i_grp2=i_grp1+1,n_deg_grp
diff --git a/src/real_time_control/RT_alloc.F b/src/real_time_control/RT_alloc.F
index 53141f5f30..464a3f1ff0 100644
--- a/src/real_time_control/RT_alloc.F
+++ b/src/real_time_control/RT_alloc.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine RT_alloc(en,what)
  !
  use pars,           ONLY:SP,cZERO
@@ -22,12 +26,13 @@ subroutine RT_alloc(en,what)
 &                         RT_RTA_H_occ_infty,l_RT_relax_time_approx
  use RT_lifetimes,   ONLY:RT_EL_lifetimes_alloc,RT_PH_lifetimes_alloc
  use real_time,      ONLY:RT_levels,RT_is_dynamically_dephased,&
-&                         Ho_plus_Sigma,H_EQ,H_pseudo_eq,RT_Vnl_xc,HARTREE0,XC0,G_lesser_reference,&
+&                         Ho_plus_Sigma,H_EQ,H_field,H_pseudo_eq,RT_Vnl_xc,HARTREE0,XC0,G_lesser_reference,&
 &                         dG_lesser,G_lesser,MEM_pointer,G_MEM_steps,REF_V_xc_sc,REF_V_hartree_sc,rho_reference,&
 &                         magn_reference,RT_bands,RT_nk,l_RT_is_WFfree,l_RT_rotate_DM,&
-&                         l_velocity_gauge_corr
+&                         l_velocity_gauge_corr,dG_avg
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels), intent(in) :: en
  character(*), intent(in) :: what
@@ -72,13 +77,17 @@ subroutine RT_alloc(en,what)
    !
    YAMBO_ALLOC(H_EQ,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),nk(1):nk(2),n_sp_pol))
    !
+   YAMBO_ALLOC(H_field,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),nk(1):nk(2),n_sp_pol))
+   !
    if (l_velocity_gauge_corr) then
      YAMBO_ALLOC(H_pseudo_eq,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),RT_nk,n_sp_pol))
    endif
    !
    if(l_use_Hxc_collisions) then
      YAMBO_ALLOC(RT_Vnl_xc,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),nXkibz,n_sp_pol))
-     RT_Vnl_xc=(0._SP,0._SP)
+     RT_Vnl_xc=cZERO
+     YAMBO_ALLOC(dG_avg,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),nXkibz,n_sp_pol))
+     dG_avg=cZERO
    endif
    !
    YAMBO_ALLOC(HARTREE0,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),nXkibz,n_sp_pol))
@@ -88,9 +97,9 @@ subroutine RT_alloc(en,what)
    XC0=cZERO
    !
    ! G's ...
-   YAMBO_ALLOC(G_lesser_reference,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),RT_nk))
-   YAMBO_ALLOC(dG_lesser,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),nk(1):nk(2),G_MEM_steps))
-   YAMBO_ALLOC(G_lesser,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),nk(1):nk(2),G_MEM_steps))
+   YAMBO_ALLOC(G_lesser_reference,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),RT_nk,n_sp_pol))
+   YAMBO_ALLOC(dG_lesser,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),nk(1):nk(2),n_sp_pol,G_MEM_steps))
+   YAMBO_ALLOC(G_lesser,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),nk(1):nk(2),n_sp_pol,G_MEM_steps))
    G_lesser_reference=cZERO
    dG_lesser         =cZERO
    G_lesser          =cZERO
diff --git a/src/real_time_control/RT_average_operator.F b/src/real_time_control/RT_average_operator.F
index 51537b9635..979564e530 100644
--- a/src/real_time_control/RT_average_operator.F
+++ b/src/real_time_control/RT_average_operator.F
@@ -26,7 +26,7 @@ subroutine RT_average_operator(N_V,O_VALUE,V_VALUE,O_REAL,O_QP,O_MATRIX,&
  !
  type(bz_samp), intent(in) :: k
  type(levels),  intent(in) :: en
- complex(SP),   intent(in) :: G(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2))
+ complex(SP),   intent(in) :: G(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2),n_sp_pol)
  integer,       intent(in) :: nK_mem,N_V
  !
  ! Optionals
@@ -44,7 +44,7 @@ subroutine RT_average_operator(N_V,O_VALUE,V_VALUE,O_REAL,O_QP,O_MATRIX,&
  !
  integer     :: ib,ibp,ik,RT_bands2,ik_ibz,i_sp_pol,iE_fg,i_cmp
  real(SP)    :: deltaE,deltaE_fg
- complex(SP) :: Hk(N_V,RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2)),VALUE_tmp(N_V), &
+ complex(SP) :: Hk(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),N_V),VALUE_tmp(N_V), &
 &               Hk_tmp(RT_bands(2),RT_bands(2)),                                    &
 &               Hk_vec_tmp(N_V,RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
  logical     :: l_DbGd_energy,l_DbGd_dipole
@@ -81,33 +81,33 @@ subroutine RT_average_operator(N_V,O_VALUE,V_VALUE,O_REAL,O_QP,O_MATRIX,&
          if(l_DbGd_energy.and.RTibz%k_range(ik_ibz,1)/=ik) then
            iE_fg=RTibz%E_map(ik)
            do ib=RT_bands(1),RT_bands(2)
-             Hk(1,ib,ib)=Hk_tmp(ib,ib)-en%E(ib,ik_ibz,i_sp_pol)+en%FG%E(ib,iE_fg,i_sp_pol)
+             Hk(ib,ib,1)=Hk_tmp(ib,ib)-en%E(ib,ik_ibz,i_sp_pol)+en%FG%E(ib,iE_fg,i_sp_pol)
              do ibp=ib+1,RT_bands(2)
-               Hk(1,ib,ibp)=Hk_tmp(ib,ibp)
-               Hk(1,ibp,ib)=Hk_tmp(ibp,ib)
+               Hk(ib,ibp,1)=Hk_tmp(ib,ibp)
+               Hk(ibp,ib,1)=Hk_tmp(ibp,ib)
              enddo
            enddo
          else if(l_DbGd_dipole.and.RTibz%k_range(ik_ibz,1)/=ik) then
            iE_fg=RTibz%E_map(ik)
            do ib=RT_bands(1),RT_bands(2)
-             Hk(i_cmp,ib,ib)=Hk_vec_tmp(i_cmp,ib,ib)
+             Hk(ib,ib,i_cmp)=Hk_vec_tmp(i_cmp,ib,ib)
              do ibp=ib+1,RT_bands(2)
                deltaE   =en%E(ib,ik_ibz,i_sp_pol)  -en%E(ibp,ik_ibz,i_sp_pol)
                deltaE_fg=en%FG%E(ib,iE_fg,i_sp_pol)-en%FG%E(ibp,iE_fg,i_sp_pol)
                if(abs(deltaE)>1.E-5 .and. abs(deltaE_Fg)>1.E-5) then
-                 Hk(i_cmp,ib,ibp)=Hk_vec_tmp(i_cmp,ib,ibp)*deltaE/deltaE_fg
-                 Hk(i_cmp,ibp,ib)=Hk_vec_tmp(i_cmp,ibp,ib)*deltaE/deltaE_fg
+                 Hk(ib,ibp,i_cmp)=Hk_vec_tmp(i_cmp,ib,ibp)*deltaE/deltaE_fg
+                 Hk(ibp,ib,i_cmp)=Hk_vec_tmp(i_cmp,ibp,ib)*deltaE/deltaE_fg
                else
-                 Hk(i_cmp,ibp,ib)=Hk_vec_tmp(i_cmp,ibp,ib)
+                 Hk(ibp,ib,i_cmp)=Hk_vec_tmp(i_cmp,ibp,ib)
                endif
              enddo
            enddo
          else
-           if(N_V==1) Hk(1,:,:)    =Hk_tmp(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
-           if(N_V> 1) Hk(i_cmp,:,:)=Hk_vec_tmp(i_cmp,:,:)
+           if(N_V==1) Hk(:,:,1)    =Hk_tmp(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
+           if(N_V> 1) Hk(:,:,i_cmp)=Hk_vec_tmp(i_cmp,:,:)
          endif 
          !
-         VALUE_tmp(i_cmp)=VALUE_tmp(i_cmp)-cI*Vstar_dot_V_omp(RT_bands2,Hk(i_cmp,:,:),G(:,:,ik))*RTibz%weights(ik)
+         VALUE_tmp(i_cmp)=VALUE_tmp(i_cmp)-cI*Vstar_dot_V_omp(RT_bands2,Hk(:,:,i_cmp),G(:,:,ik,i_sp_pol))*RTibz%weights(ik)
          !
        enddo
        !
diff --git a/src/real_time_control/RT_carriers_number.F b/src/real_time_control/RT_carriers_number.F
index 2af013e03d..05f4fa4a79 100644
--- a/src/real_time_control/RT_carriers_number.F
+++ b/src/real_time_control/RT_carriers_number.F
@@ -9,7 +9,7 @@ subroutine RT_carriers_number(en,k,Ncarr)
  !
  use pars,           ONLY:SP
  use R_lattice,      ONLY:bz_samp
- use electrons,      ONLY:levels
+ use electrons,      ONLY:levels,n_sp_pol
  use RT_control,     ONLY:NE_i_start_time
  use RT_occupations, ONLY:RT_el_occ,RT_ho_occ
  use real_time,      ONLY:RTibz,NE_i_time,RT_levels,RT_bands
@@ -22,28 +22,30 @@ subroutine RT_carriers_number(en,k,Ncarr)
  !
  ! Workspace
  !
- integer     :: ib,ik,ik_RT
+ integer     :: ib,ik,ik_RT,i_Sp
  real(SP)    :: VALUE_tmp(2)
  !
  ! Electrons and holes number
  !
  Ncarr = 0._SP
  !
+ do i_sp=1,n_sp_pol
  do ik=1,en%nk
    do ib=RT_bands(1),RT_bands(2)
      VALUE_tmp=0._SP
      do ik_RT=RTibz%k_range(ik,1),RTibz%k_range(ik,2)
-       if ( RT_levels%E(ib,ik_RT,1)<=0._SP ) then
-         if (NE_i_time==NE_i_start_time) VALUE_tmp(1)=VALUE_tmp(1)+RT_ho_occ%N(ib,ik_RT)*RTibz%weights(ik_RT)
-         if (NE_i_time/=NE_i_start_time) VALUE_tmp(1)=VALUE_tmp(1)-RT_el_occ%dN(ib,ik_RT)*RTibz%weights(ik_RT)
+       if ( RT_levels%E(ib,ik_RT,i_sp)<=0._SP ) then
+         if (NE_i_time==NE_i_start_time) VALUE_tmp(1)=VALUE_tmp(1)+RT_ho_occ%N(ib,ik_RT,i_sp)*RTibz%weights(ik_RT)
+         if (NE_i_time/=NE_i_start_time) VALUE_tmp(1)=VALUE_tmp(1)-RT_el_occ%dN(ib,ik_RT,i_sp)*RTibz%weights(ik_RT)
        else
-         if (NE_i_time==NE_i_start_time) VALUE_tmp(2)=VALUE_tmp(2)+RT_el_occ%N(ib,ik_RT)*RTibz%weights(ik_RT)
-         if (NE_i_time/=NE_i_start_time) VALUE_tmp(2)=VALUE_tmp(2)+RT_el_occ%dN(ib,ik_RT)*RTibz%weights(ik_RT)
+         if (NE_i_time==NE_i_start_time) VALUE_tmp(2)=VALUE_tmp(2)+RT_el_occ%N(ib,ik_RT,i_sp)*RTibz%weights(ik_RT)
+         if (NE_i_time/=NE_i_start_time) VALUE_tmp(2)=VALUE_tmp(2)+RT_el_occ%dN(ib,ik_RT,i_sp)*RTibz%weights(ik_RT)
        endif
      enddo
      Ncarr(2:3)=Ncarr(2:3)+VALUE_tmp
    enddo
  enddo
+ enddo
  !
  Ncarr(1)=Ncarr(2)-Ncarr(3)
  !
diff --git a/src/real_time_control/RT_carriers_object.F b/src/real_time_control/RT_carriers_object.F
index d736640658..2d137d3bd0 100644
--- a/src/real_time_control/RT_carriers_object.F
+++ b/src/real_time_control/RT_carriers_object.F
@@ -53,6 +53,7 @@ subroutine RT_carriers_object(RT,E,K,DB_id,WHAT)
      i_b    =RT%table(i_RT,1)
      i_k    =RT%table(i_RT,2)
      i_k_RT =RT%table(i_RT,3)
+     i_spin =RT%table(i_RT,4)
      call RT_lifetime_to_RT_obj(WHAT,i_b,i_k_RT,i_RT,RT_EH_life,RT_EH_ref_life,RT%GAMMA_EH)
      call RT_lifetime_to_RT_obj(WHAT,i_b,i_k_RT,i_RT,RT_EE_life,RT_EE_ref_life,RT%GAMMA_EE)
      call RT_lifetime_to_RT_obj(WHAT,i_b,i_k_RT,i_RT,RT_EP_emit_life,RT_EP_emit_ref_life,RT%GAMMA_EP_emit)
@@ -68,7 +69,7 @@ subroutine RT_carriers_object(RT,E,K,DB_id,WHAT)
      !
      RT%nk=RT_nk
      RT%nb=RT_bands
-     RT%nstates=RT_nk*(RT_bands(2)-RT_bands(1)+1)
+     RT%nstates=RT_nk*(RT_bands(2)-RT_bands(1)+1)*n_sp_pol
      !
      call RT_obj_alloc(RT)
      !
@@ -115,8 +116,8 @@ subroutine RT_carriers_object(RT,E,K,DB_id,WHAT)
        !
        ! Occupations & Energies
        !
-       if (allocated(RT_el_occ%dN)) RT_el_occ%dN(i_b,i_k_RT)      =RT%OCC_el%dN(i_RT)
-       if (allocated(RT_levels%E) ) RT_levels%E(i_b,i_k_RT,i_spin)=RT%delta_E(i_RT)+RT%E_bare(i_RT)
+       if (allocated(RT_el_occ%dN)) RT_el_occ%dN(i_b,i_k_RT,i_spin)=RT%OCC_el%dN(i_RT)
+       if (allocated(RT_levels%E) ) RT_levels%E(i_b,i_k_RT,i_spin) =RT%delta_E(i_RT)+RT%E_bare(i_RT)
        !
      enddo
      !
@@ -131,7 +132,7 @@ subroutine RT_carriers_object(RT,E,K,DB_id,WHAT)
        !
        ! Occupations & Energies
        !
-       RT%OCC_el%dN(i_RT)=RT_el_occ%dN(i_b,i_k_RT)
+       RT%OCC_el%dN(i_RT)=RT_el_occ%dN(i_b,i_k_RT,i_spin)
        RT%delta_E(i_RT)  =RT_levels%E(i_b,i_k_RT,i_spin)-RT%E_bare(i_RT)
        !
        f_occ =RT%OCC_el%N_ref(i_RT)/spin_occ
diff --git a/src/real_time_control/RT_current_AND_polarization.F b/src/real_time_control/RT_current_AND_polarization.F
index c6a039bf83..bf0ec25dc9 100644
--- a/src/real_time_control/RT_current_AND_polarization.F
+++ b/src/real_time_control/RT_current_AND_polarization.F
@@ -23,7 +23,7 @@ subroutine RT_current_AND_polarization(k,E,G_input,A_input,Ncarr)
  use units,          ONLY:SPEED_OF_LIGHT
  use R_lattice,      ONLY:bz_samp
  use D_lattice,      ONLY:DL_vol
- use electrons,      ONLY:nel,levels,n_spin,Spin_magn,Orbital_magn,Itinerant_magn
+ use electrons,      ONLY:nel,levels,n_spin,n_sp_pol,spin_occ,Spin_magn,Orbital_magn,Itinerant_magn
  use real_time,      ONLY:l_NE_dynamics,RT_ind_J,RT_ind_J_prev,RT_bands,&
 &                         RT_P,RT_spin_magn,RT_orbt_magn,RT_P_prev,G_lesser_reference,NE_i_time,&
 &                         RT_dyn_step,l_P_integrating_J,l_velocity_gauge_diam,integrator_step
@@ -37,13 +37,14 @@ subroutine RT_current_AND_polarization(k,E,G_input,A_input,Ncarr)
  !
  type(bz_samp),     intent(in) :: k
  type(levels),      intent(in) :: E
- complex(SP),       intent(in) :: G_input(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2))
  type(gauge_field), intent(in) :: A_input
- real(SP),          intent(in) :: Ncarr(3)
+ real(SP),     intent(in) :: Ncarr(3)
+ complex(SP),  intent(in) :: G_input(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2),n_sp_pol)
  !
  ! Work Space
  !
  complex(SP)         :: RT_M_tmp(3)
+ integer             :: nel_effect
  !
  if ( .not. l_NE_dynamics ) return
  !
@@ -64,7 +65,13 @@ subroutine RT_current_AND_polarization(k,E,G_input,A_input,Ncarr)
  !               (it will be computed from the current in the velocity gauge)
  !               The proper value is set in RT_propagate_fields
  !
- if(l_velocity_gauge_diam) RT_ind_J(:)=RT_ind_J(:)+A_input%vecpot(:)*real(nel,SP)/(2._SP*SPEED_OF_LIGHT)
+ if(l_velocity_gauge_diam) then
+   ! The diamagnetic term is needed to obtain a gauge invariant current
+   ! However the below definition of nel_effect would be exact
+   ! only in the limit RT_bands(2) --> \infty
+   nel_effect=nel-(RT_bands(1)-1)*spin_occ
+   RT_ind_J(:)=RT_ind_J(:)+A_input%vecpot(:)*real(nel_effect,SP)/(2._SP*SPEED_OF_LIGHT)
+ endif
  !
  RT_ind_J=RT_ind_J/DL_vol
  !
diff --git a/src/real_time_control/RT_databases_IO.F b/src/real_time_control/RT_databases_IO.F
index 24f4aa0be0..0a938cbac4 100644
--- a/src/real_time_control/RT_databases_IO.F
+++ b/src/real_time_control/RT_databases_IO.F
@@ -8,24 +8,24 @@
 subroutine RT_databases_IO(E,DO_IT)
  !
  use electrons,      ONLY:n_spin,levels
- use RT_control,     ONLY:RT_carriers_object,J_cache,P_cache,Ms_cache,Ml_cache,A_cache,OBS_RT_IO_t,&
+ use RT_control,     ONLY:RT_carriers_object,J_cache,P_cache,Ms_cache,Ml_cache,A_cache,OBS_RT_IO_t,Vbands_RT_IO_t,&
 &                         Gless_RESTART_RT_IO_t,CARR_RT_IO_t,CACHE_OBS_steps_now,CACHE_OBS_steps,    &
 &                         SAVE_G_history,NE_i_start_time,CACHE_OBS_last_point,Gless_RT_IO_t,RT_DB_ID,&
-&                         RT_return_db_ID,RT_DB_name
+&                         RT_return_db_ID,RT_DB_name,SAVE_Vb_history, SAVE_Vb_floquet, Vbands_IO_time
 #if defined _ELPH_ITERATIVE
  use RT_lifetimes,   ONLY:l_RT_iterative_ELPH_lifetimes
 #endif
  use real_time,      ONLY:NE_i_time,NE_steps,RT_nk,RT_ind_J,RT_P,RT_spin_magn,RT_orbt_magn,&
 &                         l_NE_with_fields,RT_step,RT_dyn_step,NE_time,RT_carriers,G_MEM_steps
  use IO_int,         ONLY:io_control,IO_and_Messaging_switch
- use IO_m,           ONLY:manage_action,NONE,io_GF,io_CARRIERs,io_OBSERVABLES,OP_APP_CL,OP_WR_CL,&
+ use IO_m,           ONLY:manage_action,NONE,io_GF,io_CARRIERs,io_OBSERVABLES,io_V_bands,OP_APP_CL,OP_WR_CL,&
 &                         OP_IF_START_APP_CL_IF_END,WR_CL_IF_END,OP_APP,WR_CL,WR,OP_APP,OP_WR
  use parallel_m,     ONLY:PAR_G_k_range,myid,HEAD_k_cpu
  use parallel_int,   ONLY:PP_wait,PP_redux_wait,PP_bcast
  use timing_m,       ONLY:timing
  use fields,         ONLY:A_tot
 #if defined _PAR_IO
- use parallel_m,     ONLY:PAR_COM_Xk_ibz_INDEX
+ use parallel_m,     ONLY:PAR_COM_Xk_ibz_INDEX,master_cpu
 #else
  use real_time,      ONLY:RTibz
  use parallel_m,     ONLY:PAR_IND_Xk_ibz,ncpu,myid
@@ -37,17 +37,18 @@ subroutine RT_databases_IO(E,DO_IT)
  implicit none
  !
  type(levels)      :: E
- logical           :: DO_IT
+ logical           :: DO_IT 
  !
  ! Work Space
  !
  integer           :: i_mem,io_OBS,io_carr,IO_ACT,i_time,i_db
  integer, external :: io_RT_components
- logical, external :: RT_do_it
+ logical, external :: RT_do_it,RT_Dynamics_is_over
  !
  ! ... G_lesser
  !
- integer           :: ik,G_IO_steps,io_G,i_cpu,i_G_db,i_G_k_db,i_G_RES_db,i_G_RES_k_db
+ integer           :: ik,G_IO_steps,io_G,io_Vb,i_cpu,i_G_db,i_G_k_db,i_G_RES_db,i_G_RES_k_db,idb_Vb,idb_Vb_k
+ logical           :: Start_sampling 
 #if defined _ELPH_ITERATIVE
  integer           :: io_THETA
 #endif
@@ -253,8 +254,64 @@ subroutine RT_databases_IO(E,DO_IT)
    !
  endif
  !
+#if defined _NL
+ !
+ ! V_bands
+ !====================
+ if (io_V_bands) then
+   !
+#if !defined _PAR_IO
+   call error(" Vbands restart not coded for serial I/O")
+#endif
+   !
+   idb_Vb     =RT_return_db_ID("V_bands")
+   idb_Vb_k   =RT_return_db_ID("V_bands_K_section")
+   !
+   ! Serial SECTION 1 
+   !
+   if(NE_i_time==NE_i_start_time) then
+     if (NE_i_time==1) IO_ACT=OP_WR_CL
+     if (NE_i_time> 1) IO_ACT=OP_APP_CL
+     call io_control(ACTION=IO_ACT,COM=NONE,SEC=(/1/),ID=RT_DB_ID(idb_Vb))
+     io_Vb=io_RT_components("V_bands",RT_DB_ID(idb_Vb))
+   endif
+   !
+   Start_sampling = .true.
+   if (SAVE_Vb_floquet) then 
+     if (NE_i_time < Vbands_IO_time(1).or.NE_i_time > Vbands_IO_time(2))  Start_sampling = .false. 
+     if (NE_i_time==NE_i_start_time.or.RT_Dynamics_is_over()) Start_sampling = .true.
+     if (NE_i_time ==  Vbands_IO_time(1)) Vbands_RT_IO_t%last_point =  Vbands_IO_time(1) - Vbands_RT_IO_t%INTERVAL_steps
+   endif
+   !
+   if ((RT_do_it('Vb').or.DO_IT) .and.Start_sampling) then
+     !
+     if ((.not.SAVE_Vb_history).and.(.not.SAVE_Vb_floquet)) Vbands_RT_IO_t%N=1
+     if (SAVE_Vb_history.or.SAVE_Vb_floquet) Vbands_RT_IO_t%N=Vbands_RT_IO_t%N+1 
+     !
+     Vbands_RT_IO_t%Time(Vbands_RT_IO_t%N)=NE_time
+     !
+     IO_ACT=manage_action(OP_IF_START_APP_CL_IF_END,NE_i_time,NE_i_start_time,NE_steps)
+     !
+     ! SERIAL SECTION 2 
+     !
+     call io_control(ACTION=IO_ACT,COM=NONE,SEC=(/2/),ID=RT_DB_ID(idb_Vb))
+     io_Vb=io_RT_components("V_bands",RT_DB_ID(idb_Vb))
+     !
+     ! (PARALLEL) k-SECTION 3
+     !
+     !call io_control(ACTION=IO_ACT,COM=NONE,SEC=(/3/),ID=RT_DB_ID(idb_Vb_k),COMM=PAR_COM_Xk_ibz_INDEX,DO_IT=HEAD_k_CPU)
+     call io_control(ACTION=IO_ACT,COM=NONE,SEC=(/3/),ID=RT_DB_ID(idb_Vb_k))
+     io_Vb=io_RT_components("V_bands_K_section",RT_DB_ID(idb_Vb_k))
+     !
+   endif
+   !
+ endif
+ !
+#endif
+ !
  if (RT_do_it('OBS').or.DO_IT ) call PP_bcast( OBS_RT_IO_t%last_point,0)
  if (RT_do_it('CARR').or.DO_IT) call PP_bcast(CARR_RT_IO_t%last_point,0)
+ if (RT_do_it('Vb').or.DO_IT)   call PP_bcast(Vbands_RT_IO_t%last_point,0)
  if (RT_do_it('G').or.DO_IT)    call PP_bcast(Gless_RESTART_RT_IO_t%last_point,0)
  if (RT_do_it('G').or.DO_IT)    call PP_bcast(Gless_RT_IO_t%last_point,0)
  !
diff --git a/src/real_time_control/RT_do_it.F b/src/real_time_control/RT_do_it.F
index a65deb92ed..6fab85cabe 100644
--- a/src/real_time_control/RT_do_it.F
+++ b/src/real_time_control/RT_do_it.F
@@ -7,7 +7,7 @@
 !
 logical function RT_do_it(what)
  !
- use RT_control,     ONLY:OBS_RT_IO_t,Gless_RESTART_RT_IO_t,CARR_RT_IO_t,CACHE_OBS_INTERVAL_time,&
+ use RT_control,     ONLY:OBS_RT_IO_t,Gless_RESTART_RT_IO_t,Vbands_RT_IO_t,CARR_RT_IO_t,CACHE_OBS_INTERVAL_time,&
 &                         CACHE_OBS_last_point,OUTPUT_RT_IO_t,NE_i_start_time,STOP_the_dynamics_NOW
  use real_time,      ONLY:NE_i_time,RT_step,NE_time_step_update_jump,NE_time_step_update_last_point
  !
@@ -41,6 +41,10 @@ logical function RT_do_it(what)
    INTERVAL_steps=Gless_RESTART_RT_IO_t%INTERVAL_steps
    last_point    =Gless_RESTART_RT_IO_t%last_point
  endif
+ if (what=="Vb") then
+   INTERVAL_steps=Vbands_RT_IO_t%INTERVAL_steps
+   last_point    =Vbands_RT_IO_t%last_point
+ endif
  if (what=="OBS_cache") then
    INTERVAL_steps=nint(CACHE_OBS_INTERVAL_time/RT_step)
    last_point    =CACHE_OBS_last_point
@@ -58,7 +62,7 @@ logical function RT_do_it(what)
  !
  ! Do we STOP now ?
  !==================
- if (what=="G".and.RT_do_it) then
+ if ((what=="G".or.what=="Vb").and.RT_do_it) then
    STOP_the_dynamics_NOW=STOP_now(.FALSE.)
  endif
  !
diff --git a/src/real_time_control/RT_energy_components.F b/src/real_time_control/RT_energy_components.F
index f37fcf6c83..f139d1a8c5 100644
--- a/src/real_time_control/RT_energy_components.F
+++ b/src/real_time_control/RT_energy_components.F
@@ -10,7 +10,7 @@ subroutine RT_energy_components(en,k,dG_lesser,Energies,dE_IP,what)
  use pars,           ONLY:SP,cZERO
  use DIPOLES,        ONLY:P_square
  use R_lattice,      ONLY:bz_samp
- use electrons,      ONLY:levels,n_spin
+ use electrons,      ONLY:levels,n_spin,n_sp_pol
  use hamiltonian,    ONLY:V_hartree_sc,Hzero
  use units,          ONLY:HA2EV
  use FFT_m,          ONLY:fft_size
@@ -26,12 +26,12 @@ subroutine RT_energy_components(en,k,dG_lesser,Energies,dE_IP,what)
  !
  type(levels),  intent(in) :: en
  type(bz_samp), intent(in) :: k
- complex(SP),   intent(in) :: dG_lesser(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2))
+ complex(SP),   intent(in) :: dG_lesser(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2),n_sp_pol)
  real(SP),      intent(out):: Energies(5),dE_IP
  character(3),  intent(in) :: what
  !
  real(SP) , allocatable    :: E_xc_rt(:,:)
- complex(SP) :: G_full(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2)), &
+ complex(SP) :: G_full(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2),n_sp_pol), &
 &               dE_tot,dE_kin,dE_ion,dE_h,dE_xc,dE_hxc,dE_h_sc,E_xc_sc,E_xc_ref,dE_xc_sc,dE_hxc_sc
  !
  if(.not.eval_energy) return
@@ -51,7 +51,7 @@ subroutine RT_energy_components(en,k,dG_lesser,Energies,dE_IP,what)
  dE_hxc_sc =cZERO
  !
  if(     trim(what)=='el') G_full=dG_lesser
- if(.not.trim(what)=='el') G_full=dG_lesser+G_lesser_reference(:,:,PAR_G_k_range(1):PAR_G_k_range(2))
+ if(.not.trim(what)=='el') G_full=dG_lesser+G_lesser_reference(:,:,PAR_G_k_range(1):PAR_G_k_range(2),:)
  !
  ! Non-interacting energy variation
  !
diff --git a/src/real_time_control/RT_entropy.F b/src/real_time_control/RT_entropy.F
index 5f54f0c15f..91ab0b5a75 100644
--- a/src/real_time_control/RT_entropy.F
+++ b/src/real_time_control/RT_entropy.F
@@ -10,7 +10,7 @@ subroutine RT_entropy(en,k,dG_lesser,Entr,TEntr,EnEntr,E_IP)
  use pars,           ONLY:SP,cZERO,rZERO
  use R_lattice,      ONLY:bz_samp
  use units,          ONLY:HA2KEL
- use electrons,      ONLY:levels,spin_occ
+ use electrons,      ONLY:levels,spin_occ,n_sp_pol
  use RT_occupations, ONLY:RT_el_occ,RT_ho_occ
  use real_time,      ONLY:RT_bands,eval_entropy,RT_levels,RTibz
  use parallel_m,     ONLY:PAR_G_k_range
@@ -19,18 +19,18 @@ subroutine RT_entropy(en,k,dG_lesser,Entr,TEntr,EnEntr,E_IP)
  !
  type(levels),  intent(in) :: en
  type(bz_samp), intent(in) :: k
- complex(SP),   intent(in) :: dG_lesser(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2))
+ complex(SP),   intent(in) :: dG_lesser(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2),n_sp_pol)
  real(SP),      intent(in) :: E_IP
  real(SP),     intent(out) :: TEntr(3)
  real(SP),   intent(inout) :: Entr(3),EnEntr(3)
  !
  ! Workspace
  !
- integer     :: ib1,ib2,ik,ik_RT,i1
+ integer     :: ib1,ib2,ik,ik_RT,i1,i_sp
  real(SP)    :: Energies_e(5),Energies_h(5),tmp_E(2),Entr_prev(3),EnEntr_prev(3),   &
 &               S_e,S_h,VALUE_tmp(3),one_over_T
- complex(SP) :: dG_lesser_h(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2)), &
-&               dG_lesser_e(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2))
+ complex(SP) :: dG_lesser_h(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2),n_sp_pol), &
+&               dG_lesser_e(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2),n_sp_pol)
  !
  TEntr=rZERO
  Entr=rZERO
@@ -48,17 +48,19 @@ subroutine RT_entropy(en,k,dG_lesser,Entr,TEntr,EnEntr,E_IP)
  dG_lesser_h=cZERO
  dG_lesser_e=cZERO
  !
+ do i_sp=1,n_sp_pol
  do ik_RT=PAR_G_k_range(1),PAR_G_k_range(2)
    do ib1=RT_bands(1),RT_bands(2)
      do ib2=RT_bands(1),RT_bands(2)
-       if ( RT_levels%E(ib1,ik_RT,1)<=0._SP .and. RT_levels%E(ib2,ik_RT,1)<=0._SP ) then
-         dG_lesser_h(ib1,ib2,ik_RT)=dG_lesser(ib1,ib2,ik_RT)
-       else if ( RT_levels%E(ib1,ik_RT,1)> 0._SP .and. RT_levels%E(ib2,ik_RT,1)> 0._SP ) then
-         dG_lesser_e(ib1,ib2,ik_RT)=dG_lesser(ib1,ib2,ik_RT)
+       if ( RT_levels%E(ib1,ik_RT,i_sp)<=0._SP .and. RT_levels%E(ib2,ik_RT,i_sp)<=0._SP ) then
+         dG_lesser_h(ib1,ib2,ik_RT,i_sp)=dG_lesser(ib1,ib2,ik_RT,i_sp)
+       else if ( RT_levels%E(ib1,ik_RT,i_sp)> 0._SP .and. RT_levels%E(ib2,ik_RT,i_sp)> 0._SP ) then
+         dG_lesser_e(ib1,ib2,ik_RT,i_sp)=dG_lesser(ib1,ib2,ik_RT,i_sp)
        endif
      enddo
    enddo
  enddo
+ enddo
  !
  !
  ! Compute IP energy
@@ -71,22 +73,24 @@ subroutine RT_entropy(en,k,dG_lesser,Entr,TEntr,EnEntr,E_IP)
  !
  Entr= 0._SP
  !
+ do i_sp=1,n_sp_pol
  do ik=1,en%nk
    do ib1=RT_bands(1),RT_bands(2)
      VALUE_tmp=0._SP
      do ik_RT=RTibz%k_range(ik,1),RTibz%k_range(ik,2)
        S_h=0._SP
        S_e=0._SP
-       if(RT_ho_occ%N(ib1,ik_RT)>0._SP) S_h=-RT_ho_occ%N(ib1,ik_RT)*log(RT_ho_occ%N(ib1,ik_RT)/spin_occ)
-       if(RT_el_occ%N(ib1,ik_RT)>0._SP) S_e=-RT_el_occ%N(ib1,ik_RT)*log(RT_el_occ%N(ib1,ik_RT)/spin_occ)
+       if(RT_ho_occ%N(ib1,ik_RT,i_sp)>0._SP) S_h=-RT_ho_occ%N(ib1,ik_RT,i_sp)*log(RT_ho_occ%N(ib1,ik_RT,i_sp)/spin_occ)
+       if(RT_el_occ%N(ib1,ik_RT,i_sp)>0._SP) S_e=-RT_el_occ%N(ib1,ik_RT,i_sp)*log(RT_el_occ%N(ib1,ik_RT,i_sp)/spin_occ)
        VALUE_tmp(1)=VALUE_tmp(1)+S_e+S_h
-       if ( RT_levels%E(ib1,ik_RT,1)<=0._SP ) VALUE_tmp(2)=VALUE_tmp(2)+S_h
-       if ( RT_levels%E(ib1,ik_RT,1)> 0._SP ) VALUE_tmp(3)=VALUE_tmp(3)+S_e
+       if ( RT_levels%E(ib1,ik_RT,i_sp)<=0._SP ) VALUE_tmp(2)=VALUE_tmp(2)+S_h
+       if ( RT_levels%E(ib1,ik_RT,i_sp)> 0._SP ) VALUE_tmp(3)=VALUE_tmp(3)+S_e
        VALUE_tmp=VALUE_tmp*RTibz%weights(ik_RT)
      enddo
      Entr=Entr+VALUE_tmp
    enddo
  enddo
+ enddo
  !
  ! Temperature derived from Entropy to Energy variation ratio
  !
diff --git a/src/real_time_control/RT_field_from_file.F b/src/real_time_control/RT_field_from_file.F
new file mode 100644
index 0000000000..d5a83a9bcc
--- /dev/null
+++ b/src/real_time_control/RT_field_from_file.F
@@ -0,0 +1,155 @@
+!
+!        Copyright (C) 2000-2022 the YAMBO team
+!            http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): DS
+!
+! This file is distributed under the terms of the GNU
+! General Public License. You can redistribute it and/or
+! modify it under the terms of the GNU General Public
+! License as published by the Free Software Foundation;
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will
+! be useful, but WITHOUT ANY WARRANTY; without even the
+! implied warranty of MERCHANTABILITY or FITNESS FOR A
+! PARTICULAR PURPOSE.  See the GNU General Public License
+! for more details.
+!
+! You should have received a copy of the GNU General Public
+! License along with this program; if not, write to the Free
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston,
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+subroutine RT_load_field_from_file_init(filename1,filename2,Integr_nfrac)
+ !
+ use pars,       ONLY:SP
+ use com,        ONLY:com_path
+ use fields,     ONLY:field_from_file_steps,field_from_file_dt
+ use real_time,  ONLY:RT_step
+ !
+ implicit none
+ !
+ character(*), intent(in)   :: filename1,filename2
+ integer,      intent(in)   :: Integr_nfrac
+ !
+ integer :: n_time_steps1,n_time_steps2,ndt1
+ real(SP):: dt1,dt2
+ !
+ call warning(" Loading Field from file: "//trim(filename1)//" [experimental]")
+ !
+ open(unit=99,file=trim(filename1))
+ read(99,*) n_time_steps1,dt1
+ close(99)
+ if (filename2/="") then
+   open(unit=99,file=trim(filename2))
+   read(99,*) n_time_steps2,dt2
+   close(99)
+   if(n_time_steps1/=n_time_steps2) call error(" Field from file, polarizations with different dimensions")
+   if(abs(dt1-dt2)/(dt1+dt2)>1.E-4) call error(" Field from file, polarizations with different time step")
+ endif
+ field_from_file_steps=max(field_from_file_steps,n_time_steps1)
+ if (field_from_file_dt> 0._SP) then
+   if(abs(field_from_file_dt-dt1)/(field_from_file_dt+dt1)>1.E-5) &
+   &  call error(" Multiple fields from file must have the same time step")
+ endif
+ if (field_from_file_dt==0._SP) field_from_file_dt=dt1
+ !
+ dt1=RT_step/field_from_file_dt
+ if (abs(dt1-real(nint(dt1),SP))>1.E-5) call error(" Field time step non consistent with time step")
+ if (mod(nint(dt1),Integr_nfrac)/=0) call error("Field time step not consistent with integrator")
+ !
+end subroutine RT_load_field_from_file_init
+!
+!
+subroutine RT_load_field_from_file(filename1,filename2,i_field)
+ !
+ use real_time,  ONLY:RT_step
+ use com,        ONLY:com_path
+ use pars,       ONLY:n_ext_fields_max,SP
+ use fields,     ONLY:Efield,field_from_file,field_from_file_fname, &
+ &                    field_from_file_steps,field_from_file_dt
+ !
+ implicit none
+ !
+ character(*), intent(in)   :: filename1,filename2
+ integer,      intent(in)   :: i_field
+ !
+ integer  :: i1,i_field_file,n_time_steps1,n_time_steps2,idata,i_pol1_data,i_pol2_data
+ real(SP) :: tmp_data(4,2),dt1,dt2
+ !
+ i_field_file=-1
+ do i1=1,n_ext_fields_max
+   if ( field_from_file_fname(i1)/="" ) cycle
+   i_field_file=i1
+   exit
+ enddo
+ !
+ field_from_file_fname(i_field_file)=filename1
+ tmp_data=0._SP
+ !
+ if(i_field_file==1) then
+   allocate(field_from_file(field_from_file_steps,7,n_ext_fields_max))
+   field_from_file=0._SP
+ endif
+ open(unit= 99,file=trim(filename1))
+ read( 99,*) n_time_steps1,dt1
+ if (filename2/="") then
+   open(unit=100,file=trim(filename2))
+   read(100,*) n_time_steps2,dt2
+ endif
+ do i1=1,n_time_steps1
+   read( 99,*) tmp_data(:,1)
+   if (filename2/="") read(100,*) tmp_data(:,2)
+   field_from_file(i1,1,i_field_file)=tmp_data(1,1)
+   do idata=2,4
+     i_pol1_data=2*idata-2
+     i_pol2_data=2*idata-1
+     field_from_file(i1,i_pol1_data,i_field_file)=tmp_data(idata,1)
+     field_from_file(i1,i_pol2_data,i_field_file)=tmp_data(idata,2)
+   enddo
+ enddo
+ close( 99)
+ if (filename2/="") close(100)
+ !
+ i1=nint(RT_step/field_from_file_dt)
+ Efield(i_field)%t_final_indx=(n_time_steps1/i1-4)+Efield(i_field)%t_initial_indx-1
+ Efield(i_field)%t_final=RT_step*(Efield(i_field)%t_final_indx-1)
+ !
+end subroutine RT_load_field_from_file
+!
+!
+subroutine RT_print_field_to_file_init(i_field)
+ !
+ ! I think this subroutine is not used
+ !
+ use stderr,         ONLY:intc
+ use com,            ONLY:com_path
+ use pars,           ONLY:schlen,SP
+ use fields,         ONLY:Efield
+ !
+ implicit none
+ !
+ integer, intent(in)   :: i_field
+ !
+ integer :: n_time_steps
+ real(SP):: dt
+ character(schlen) :: filename1,filename2
+ !
+ filename1="RT_EXTFIELD"//trim(intc(i_field))//"_P1.time"
+ filename2=""
+ if ( trim(Efield(i_field)%ef_pol) == "circular" ) filename2="RT_EXTFIELD"//trim(intc(i_field))//"_P2.time"
+ !
+ n_time_steps=(Efield(i_field)%t_final_indx-Efield(i_field)%t_initial_indx+1)*2+8
+ ! To be fixed !
+ dt=0._SP
+ !
+ open(unit=100+i_field,file=trim(com_path)//"/"//trim(filename1))
+ write(100+i_field,*) n_time_steps,dt
+ !
+ if (filename2/="") then
+   open(unit=200+i_field,file=trim(com_path)//"/"//trim(filename2))
+   write(200+i_field,*) n_time_steps,dt
+ endif
+ !
+end subroutine RT_print_field_to_file_init
diff --git a/src/real_time_control/RT_free.F b/src/real_time_control/RT_free.F
index 0a2ed16823..ea37a6ac22 100644
--- a/src/real_time_control/RT_free.F
+++ b/src/real_time_control/RT_free.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine RT_free( )
  !
  use R_lattice,      ONLY:bz_samp_FineGd_reset
@@ -15,8 +19,8 @@ subroutine RT_free( )
 #endif
  use hamiltonian,    ONLY:H_free
  use xc_functionals, ONLY:E_xc
- use real_time,      ONLY:DbGd_EE_scatt_table,RT_TIME_status,REF_V_xc_sc,&
-&                         Ho_plus_Sigma,H_EQ,H_pseudo_eq,RT_Vnl_xc,HARTREE0,XC0,G_lesser_reference,&
+ use real_time,      ONLY:DbGd_EE_scatt_table,RT_TIME_status,REF_V_xc_sc,dG_avg,&
+&                         Ho_plus_Sigma,H_EQ,H_field,H_pseudo_eq,RT_Vnl_xc,HARTREE0,XC0,G_lesser_reference,&
 &                         dG_lesser,G_lesser,MEM_pointer,REF_V_xc_sc,REF_V_hartree_sc,rho_reference,&
 &                         magn_reference,DbGd_EE_scatt_table,RT_TIME_status,REF_V_xc_sc,&
 &                         RT_levels,I1_matrix,RT_Deph_Matrix,RTibz
@@ -25,8 +29,9 @@ subroutine RT_free( )
  use RT_iterative,   ONLY:THETA_matrix,THETA_matrix_index
 #endif
  use RT_lifetimes,   ONLY:RT_ALL_lifetimes_free,q_weight_RT
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  call H_free()
  !
@@ -46,8 +51,10 @@ subroutine RT_free( )
  !
  YAMBO_FREE(Ho_plus_Sigma)
  YAMBO_FREE(H_EQ)
+ YAMBO_FREE(H_field)
  YAMBO_FREE(H_pseudo_eq)
  YAMBO_FREE(RT_Vnl_xc)
+ YAMBO_FREE(dG_avg)
  !
  YAMBO_FREE(HARTREE0)
  YAMBO_FREE(XC0)
diff --git a/src/real_time_control/RT_interpolation_driver.F b/src/real_time_control/RT_interpolation_driver.F
index 851626749b..07053e1526 100644
--- a/src/real_time_control/RT_interpolation_driver.F
+++ b/src/real_time_control/RT_interpolation_driver.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): DS AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine RT_interpolation_driver(bands,E,K,what,Time,l_f,l_E,l_W,l_B,NN,VERB)
  !
  use pars,           ONLY:SP,schlen
@@ -15,8 +19,9 @@ subroutine RT_interpolation_driver(bands,E,K,what,Time,l_f,l_E,l_W,l_B,NN,VERB)
  use real_time,      ONLY:RT_carriers,l_TRabs_YPP
  use com,            ONLY:msg
  use interpolate,    ONLY:l_integral_respect,NN_n_of_nearest
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer,       intent(in)    :: bands(2),NN
  type(levels),  intent(inout) :: E
diff --git a/src/real_time_control/RT_k_grid.F b/src/real_time_control/RT_k_grid.F
index a67c3fb178..361c4cd9b7 100644
--- a/src/real_time_control/RT_k_grid.F
+++ b/src/real_time_control/RT_k_grid.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine RT_k_grid(E,k,q)
  !
  use pars,           ONLY:SP
@@ -17,8 +21,9 @@ subroutine RT_k_grid(E,k,q)
  use R_lattice,      ONLY:bz_samp,bz_samp_FineGd_alloc,bz_samp_duplicate_Fg,&
 &                         bz_samp_FineGd_default
  use parallel_m,     ONLY:PAR_IND_Xk_ibz
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels), intent(in)    :: E
  type(bz_samp),intent(in)    :: k
diff --git a/src/real_time_control/RT_output.F b/src/real_time_control/RT_output.F
index 1bd1c9825a..2d6db4477e 100644
--- a/src/real_time_control/RT_output.F
+++ b/src/real_time_control/RT_output.F
@@ -176,6 +176,8 @@ subroutine RT_output(what,VALUES,TIME,E)
      case('total_field')
        data_to_dump(2:4) = -real(A_tot%vecpot_vel(:))/SPEED_OF_LIGHT*AU2VMm1
        data_to_dump(5:7) =-aimag(A_tot%vecpot_vel(:))/SPEED_OF_LIGHT*AU2VMm1
+     case('YPP-Field')
+       data_to_dump(2) = -real(A_ext%vecpot_vel(1))*AU2VMm1 !/SPEED_OF_LIGHT
      case('external_field')
        data_to_dump(2:4) = -real(A_ext%vecpot_vel(:))/SPEED_OF_LIGHT*AU2VMm1
        data_to_dump(5:7) =-aimag(A_ext%vecpot_vel(:))/SPEED_OF_LIGHT*AU2VMm1
diff --git a/src/real_time_control/RT_output_INIT.F b/src/real_time_control/RT_output_INIT.F
index 28b046518f..ca3fe51de0 100644
--- a/src/real_time_control/RT_output_INIT.F
+++ b/src/real_time_control/RT_output_INIT.F
@@ -43,6 +43,7 @@ subroutine RT_output_INIT( )
  call RT_FILE_add(name='probe-polarization',TITLES=(/"Time[fs]","Pol_x   ","Pol_y   ","Pol_z   "/))
  call RT_FILE_add(name='pump-current',TITLES=(/"Time[fs]","j_x     ","j_y     ","j_z     "/))
  call RT_FILE_add(name='pump-polarization',TITLES=(/"Time[fs]","Pol_x   ","Pol_y   ","Pol_z   "/))
+ call RT_FILE_add(name='Field',TITLES=(/"Time[fs]","  E(t)  "/))
  !
  do i_fp=1,3
    if (i_fp==1) dumb_ch="external_field"
diff --git a/src/real_time_control/RT_output_and_IO_driver.F b/src/real_time_control/RT_output_and_IO_driver.F
index d6482a0585..8af05497f1 100644
--- a/src/real_time_control/RT_output_and_IO_driver.F
+++ b/src/real_time_control/RT_output_and_IO_driver.F
@@ -58,6 +58,7 @@ subroutine RT_output_and_IO_driver(what,E,k,OBSERVABLES)
        if(l_RT_induced_field)  call RT_output(what=trim(what)//" tot ind")
      endif
      !
+     !
      if(trim(what)=="close") then
        do i_ID=1,N_RT_databases
          if (RT_DB_ID(i_ID)==0) cycle
diff --git a/src/real_time_control/RT_project.dep b/src/real_time_control/RT_project.dep
index db06478179..07a445dff3 100644
--- a/src/real_time_control/RT_project.dep
+++ b/src/real_time_control/RT_project.dep
@@ -18,6 +18,7 @@
  RT_do_it.o
  RT_energy_components.o
  RT_entropy.o
+ RT_field_from_file.o
  RT_free.o
  RT_interpolation_doit_and_transfer.o
  RT_interpolation_driver.o
@@ -27,5 +28,6 @@
  RT_output.o
  RT_output_INIT.o
  RT_output_and_IO_driver.o
+ RT_small_a.o
  RT_update_TIME_CONF_descriptions.o
 
diff --git a/src/real_time_control/RT_small_a.F b/src/real_time_control/RT_small_a.F
new file mode 100644
index 0000000000..c2f761ca78
--- /dev/null
+++ b/src/real_time_control/RT_small_a.F
@@ -0,0 +1,383 @@
+!
+! License-Identifier: GPL
+!
+! Copyright (C) 2006 The Yambo Team
+!
+! Authors (see AUTHORS file for details): AM CA DS
+!
+! External fields:
+!
+! SIN:      E(t)=\theta(t) * sin(t)
+! SOFTSIN:  E(t)= (c*t^2 + b*t^3 + a*t^4 )* sin(t)  and 0 for t<=0
+! DELTA:    E(t)= \delta(t)
+! GAUSS:    E(t)= exp((t-t_o)^2/(2*sigma^2))
+! THETA:    E(t)= \theta(t)
+! RES:      E(t)= \theta(t)*exp(-i\omega t)
+! ANTIRES:  E(t)= \theta(t)*exp(i\omega t)
+! PULSE:    E(t)=(T-T_0 + sigma)**2 *(T-T_0 -sigma)**2/sigma**4 * cos(w t)
+! QSSIN:    E(t)= exp((t-t_o)^2/(2*sigma^2))*sin(w t)
+! SPULSE:   E(t)=((T-T_0)**2 - sigma**2)**2/sigma**4*cos(w t)
+! PHHG:     E(t)=sin^2(\pi*(T-T_0)/sigma)*cos( w * t) 
+! QSFIELD:  see below
+! FROMFILE: shape of the electric field from file
+!
+! Linear frequency chirp coded for QSSIN pulse: 
+! (see also https://www.rp-photonics.com/chirp.html)
+!
+!    s_sigma_chirp=sigma**2/(2.*(sigma**4+chirp**4))
+!    c_sigma_chirp=chirp**2/(2.*(sigma**4+chirp**4))
+!    sigma_eff=sqrt((sigma**4+chirp**4)/sigma**2)=sqrt(1./(2.*s_sigma_chirp))
+!
+!    sin(omega*(T-T0)+c_sigma_chirp*(T-T_0)**2) * exp(-(T-T_0)**2/(2.*sigma_eff**2) )
+!
+!   = [ exp( cI*(omega*(T-T0))*exp( cI*c_sigma_chirp*(T-T_0)**2) 
+!      -exp(-cI*(omega*(T-T0))*exp(-cI*c_sigma_chirp*(T-T_0)**2) ]
+!      * exp(-(T-T_0)**2 * s_sigma_chirp ) / 2.
+!
+!   = [ exp( cI*(omega*(T-T0))* exp( (s_sigma_chirp+cI*c_sigma_chirp) *(T-T_0)**2)
+!     - exp(-cI*(omega*(T-T0))* exp( (s_sigma_chirp-cI*c_sigma_chirp) *(T-T_0)**2) ]
+!
+!      
+function small_a(T,dt,E_field,order,envelop_only)
+  !
+  ! The vector potential is generally written as
+  !
+  !  order=0  A (t)=-cEo  a (t) theta(t)
+  !  order=1  A'(t)=-cEo (a'(t) theta(t)+a (t) delta(t))
+  !  order=2  A"(t)=-cEo (a"(t) theta(t)+a'(t) delta(t)-a(t) sign(t))
+  !
+  ! the functions theta,delta and sign can be the standard distributions
+  ! or more fancy functions that can mimic the distributions.
+  !
+  ! Note that A is evolved using A''(t) starting from A(0) and A'(0). 
+  !
+  use X_m,       ONLY:global_gauge
+  use pars,      ONLY:SP,cI,cONE,cZERO,pi,schlen
+  use units,     ONLY:FS2AUT
+  use stderr,    ONLY:STRING_split
+  use wrapper,   ONLY:FADEVA
+  use functions, ONLY:theta_function
+  use fields,    ONLY:ext_field,get_field_file_index,field_from_file_dt,&
+  &                   field_from_file,n_fields_defs_max
+  !
+  implicit none
+  !
+  complex(SP), dimension(2) :: small_a
+  !
+  type(ext_field), intent(inout)   ::E_field
+  !
+  real(SP), intent(in)  ::T,dt
+  integer,  intent(in)  ::order
+  logical,  intent(in), optional :: envelop_only
+  !
+  ! Workspace
+  !
+  real(SP)          ::damp_func,a,b,c,sarg,WT,Tl
+  !
+  integer           ::i1,i_fr,i_T,i_field,i_file,n_fields
+  real(SP)          ::Tloc,T_0_fac,T_0,W_0,sigma,chirp,sigma_eff,s_sigma_chirp,c_sigma_chirp,der_fac, &
+  &                   WtimesT,EXPf,fr_shift(2),W_field,W_field_m1,cos_wt,sin_wt
+  complex(SP)       ::f_now,f_t(2),f0t,f1t,cZ,cS,exp_iwt,erfi_z
+  character(schlen) ::field_defs(n_fields_defs_max)
+  logical           ::envelop_only_
+  !
+  ! Zeroing
+  !
+  small_a=cZERO
+  f_t=cZERO
+  damp_func=1._SP
+  !
+  envelop_only_=.false.
+  if(present(envelop_only)) envelop_only_=envelop_only
+  !
+  if(trim(E_field%ef_pol)=="linear"  ) n_fields=1
+  if(trim(E_field%ef_pol)=="circular") n_fields=2
+  !
+  ! Field parameters
+  !
+  sigma=E_field%width
+  chirp=E_field%chirp
+  !
+  s_sigma_chirp=cZERO
+  c_sigma_chirp=cZERO
+  !
+  if (abs(sigma)>0._SP .or. abs(chirp)>0._SP) then
+    s_sigma_chirp=sigma**2/(2._SP*(sigma**4+chirp**4))
+    c_sigma_chirp=chirp**2/(2._SP*(sigma**4+chirp**4))
+  endif
+  !
+  sigma_eff=sqrt((sigma**4+chirp**4)/sigma**2)
+  !
+  field_defs=""
+  call STRING_split(trim(E_field%ef_name),field_defs)
+  !
+  Tloc=T
+  if( index(field_defs(1),"RECT")>0 .and. order==0 .and. abs(T)>=sigma) Tloc=sigma
+  !
+  select case( trim(field_defs(1)) )
+    case('STATIC','RECT','RECTSIN','SIN','DELTA')
+      ! Fields which do not need T_0
+      W_0=0._SP
+      T_0=0._SP
+      damp_func=1._SP
+      ! DEBUG <
+      ! damping for sin 
+      !if (sigma>0._SP) then
+      !  T_0=5*sigma
+      !  damp_func=1._SP
+      !  if (T<T_0)             damp_func=exp( (T-T_0)/sigma)
+      !  if (T>NE_tot_time-T_0) damp_func=exp(-(T-NE_tot_time+T_0)/sigma)
+      !endif
+      ! DEBUG >
+    case('FROM_FILE')
+      i_file=get_field_file_index(field_defs(2))
+      T_0=field_from_file(1,1,i_file)*FS2AUT
+      W_0=0._SP
+      damp_func=1._SP
+    case('SOFTSIN','THETA')
+      ! Fields which do not need T_0 and with damp_func
+      W_0=0._SP
+      T_0=0._SP
+      a =  3._SP/sigma**4 
+      b = -8._SP/sigma**3
+      c =  6._SP/sigma**2
+      damp_func=1._SP
+      if(real(T)<sigma.and.sigma>0._SP) damp_func=(a*T**4 + b*T**3 + c*T**2) 
+    case('GAUSS','QSSIN','QSFIELD','PULSE','SPULSE')
+      ! Fields which need T_0
+      W_0=E_field%frequency
+      T_0_fac=3._SP*sigma_eff
+      if(trim(field_defs(2))=="1SIGMA" .or. trim(field_defs(3))=="1SIGMA") T_0_fac=1._SP*sigma_eff
+      if(trim(field_defs(2))=="2SIGMA" .or. trim(field_defs(3))=="2SIGMA") T_0_fac=2._SP*sigma_eff
+      if(trim(field_defs(2))=="3SIGMA" .or. trim(field_defs(3))=="3SIGMA") T_0_fac=3._SP*sigma_eff
+      if(trim(field_defs(2))=="4SIGMA" .or. trim(field_defs(3))=="4SIGMA") T_0_fac=4._SP*sigma_eff
+      if(trim(field_defs(2))=="5SIGMA" .or. trim(field_defs(3))=="5SIGMA") T_0_fac=5._SP*sigma_eff
+      T_0=pi/W_0*(real(nint(W_0/pi*T_0_fac),SP))
+      if( index(field_defs(1),"PULSE")>0.or.index(field_defs(1),"GAUSS")>0 ) T_0=T_0_fac
+  end select
+  !
+  ! Initial and relative phases control
+  fr_shift(1)=0._SP
+  fr_shift(2)=pi/2._SP
+  do i1=1,n_fields_defs_max
+    if( trim(field_defs(i1))=="PHPI180") fr_shift(:)=fr_shift(:)+pi             ! 180 deg
+    if( trim(field_defs(i1))=="PHPI120") fr_shift(:)=fr_shift(:)+pi*2._SP/3._SP ! 120 deg
+    if( trim(field_defs(i1))=="PHPI90")  fr_shift(:)=fr_shift(:)+pi/2._SP       ! 90 deg
+    if( trim(field_defs(i1))=="PHPI60")  fr_shift(:)=fr_shift(:)+pi/3._SP       ! 60 deg
+    if( trim(field_defs(i1))=="PHPI30")  fr_shift(:)=fr_shift(:)+pi/6._SP       ! 30 deg
+    if( trim(field_defs(i1))=="PHPI20")  fr_shift(:)=fr_shift(:)+pi/9._SP       ! 20 deg
+  enddo
+  !
+  E_field%To=T_0
+  !
+  do i_field=1,n_fields
+    !
+    W_field=E_field%frequency
+    W_field_m1=1._SP/W_field
+    if(W_field> 0._SP) W_field_m1=1._SP/W_field
+    if(W_field==0._SP) W_field_m1=0._SP
+    der_fac=W_field+2._SP*c_sigma_chirp*(Tloc-T_0)
+    !
+    ! The frequency shift is applied in two cases
+    ! (i n_fields=2) to have a circular polarized pulse, and
+    WtimesT=W_field*(Tloc-T_0)+fr_shift(i_field)
+    ! each frequency has a different initial phase
+    if(chirp>0._SP) WtimesT=WtimesT+c_sigma_chirp*(Tloc-T_0)**2
+    !
+    if (envelop_only_) then
+      f0t=cONE ; f1t=cONE
+    else
+      ! CONTROL RES / ANTIRES case
+      cos_wt=cos(WtimesT) ; sin_wt=sin(WtimesT); exp_iwt=cos_wt+cI*sin_wt
+      ! f1t=-f0t'
+      f0t=cmplx(cos_wt,0._SP) ; f1t=cmplx(sin_wt,0._SP)
+      if(trim(field_defs(2))=="ANTIRES")     then ; f0t= 0.5_SP*      exp_iwt  ; f1t=-cI*0.5_SP*      exp_iwt   ; endif
+      if(trim(field_defs(2))=="RES")         then ; f0t= 0.5_SP*conjg(exp_iwt) ; f1t= cI*0.5_SP*conjg(exp_iwt)  ; endif
+    endif
+    !
+    EXPf=exp(-(T-T_0)**2/(2._SP*sigma_eff**2) )
+    !
+    select case( trim(field_defs(1)) )
+    case('FROM_FILE')
+      i_T=nint((T-T_0)/field_from_file_dt)+1
+      if (i_T<=0.or.envelop_only_) then
+        f_now=0
+      else
+        if (order==0 ) f_now=field_from_file(i_T,2+(i_field-1),i_file)
+        if (order==1 ) f_now=field_from_file(i_T,4+(i_field-1),i_file)
+        if (order==2 ) f_now=field_from_file(i_T,6+(i_field-1),i_file)
+      endif
+      !
+    case('STATIC')
+      if (order==0 ) f_now=T
+      if (order==1 ) f_now=1._SP
+      if (order==2 ) f_now=0._SP
+      !
+    case('RECT')
+      if (order==0 ) f_now= Tloc
+      if (order==1 ) f_now= theta_function(sigma-T,dT,0) ! theta function
+      if (order==2 ) f_now=-theta_function(sigma-T,dT,1) ! delta function
+      !
+    case('RECTSIN')
+      if (chirp>0._SP) call error("chirp not implemented with "//trim(field_defs(1)))
+      if (order==0 ) f_now=-(f0t-1._SP)*W_field_m1
+      if (order==1 ) f_now= theta_function(sigma-T,dT,0)*f1t     ! theta function
+      if (order==2 ) f_now=-theta_function(sigma-T,dT,1)*f1t  &  ! delta function
+                          &+theta_function(sigma-T,dT,0)*f0t*der_fac
+      !
+    case('SIN')
+      if (chirp>0._SP) call error("chirp not implemented with "//trim(field_defs(1)))
+      if (order==0 ) f_now=-damp_func*(f0t-1._SP)*W_field_m1
+      if (order==1 ) f_now=+damp_func* f1t
+      if (order==2 ) f_now=+damp_func* f0t       *der_fac
+      !
+    case('SOFTSIN')
+      if (chirp>0._SP) call error("chirp not implemented with "//trim(field_defs(1)))
+      if (order==-1) f_now=-2
+      if (order== 0) f_now=-damp_func*(f0t-1._SP)*W_field_m1
+      if (order== 1) f_now=+damp_func* f1t
+      if (order== 2) f_now=+damp_func* f0t       *der_fac
+      !
+    case('THETA')
+      if (order==0 ) f_now=damp_func*T
+      if (order==1 ) f_now=damp_func
+      if (order==2 ) f_now=0._SP
+      !
+    case('DELTA')
+      if (order==-1) f_now=1._SP
+      if (order==0 ) f_now=1._SP
+      if (order> 0 ) f_now=0._SP
+      !
+    case('PHHG')
+      if (chirp>0._SP) call error("chirp not implemented with "//trim(field_defs(1)))
+      sarg=pi*(T-T_0)/sigma     
+      WT  =W_field*T
+      if(T-T_0<=0.0.or.T-T_0>=sigma.and.order>0) then
+        f_now=0.0
+      elseif(T-T_0>=sigma.and.order==0) then
+        Tl=sigma+T_0
+        WT  =W_field*Tl
+        f_now=-(sigma*sin(((sigma*W_field+2*pi)*Tl-2*pi*T_0)/sigma))/(4*(sigma*W_field+2*pi))  & 
+        & -(sigma*sin(((sigma*W_field-2*pi)*Tl+2*pi*T_0)/sigma))/(4*(sigma*W_field-2*pi))+sin(WT)/(2*W_field)
+      else
+        if (order==0 ) f_now=-(sigma*sin(((sigma*W_field+2*pi)*T-2*pi*T_0)/sigma))/(4*(sigma*W_field+2*pi)) &
+        &  -(sigma*sin(((sigma*W_field-2*pi)*T+2*pi*T_0)/sigma))/(4*(sigma*W_field-2*pi))+sin(WT)/(2*W_field)
+        if (order==1 ) f_now=sin(sarg)**2*cos(WT)
+        if (order==2 ) f_now=(2*pi*cos(WT)*cos(sarg)*sin(sarg))/sigma-W_field*sin(WT)*sin(sarg)**2
+      endif
+      !
+    case('GAUSS')
+      if (order==0 ) f_now= sigma_eff*sqrt(pi/2._SP)* ( erf( (T-T_0)/(sigma_eff*sqrt(2._SP)) )+1._SP )
+      if (order==1 ) f_now= Expf
+      if (order==2 ) f_now=-Expf*(T-T_0)/sigma_eff**2
+      !
+    case('QSSIN')
+      !
+      ! W    =FADEVA function
+      ! W(-z)=exp(-z^2)*(1-  erf (i*z))
+      !      =exp(-z^2)*(1-i*erfi( z ))
+      !
+      ! ERFI(z)=i*(exp(z^2)*W(-z)-1)
+      !
+      if (trim(global_gauge)=="velocity" .and. chirp>0._SP) &
+      &  call error("chirp not implemented with QSSIN and velocity gauge")
+      !
+      cZ=(sigma**2*W_field-cI*(T-T_0))/(sigma*sqrt(2._SP))
+      cS=cmplx(W_field**2*sigma**2/2._SP,fr_shift(i_field),SP)
+      !           
+      if (order==0 .and. trim(global_gauge)=="velocity" ) erfi_z=-sqrt(pi/2._SP)*sigma/2._SP*         &
+      &                       cI*( (exp(      cZ **2-      cS )*FADEVA(-      cZ )-exp(-      cS )) + &
+      &                            (exp(conjg(cZ)**2-conjg(cS))*FADEVA(-conjg(cZ))-exp(-conjg(cS))) )
+      if (order==0 .and. trim(global_gauge)=="length" )   erfi_z=cZERO
+      !
+      if (order==0 ) f_now= erfi_z
+      if (order==1 ) f_now=                     f1t              *EXPf
+      if (order==2 ) f_now=(der_fac*f0t-(T-T_0)*f1t/sigma_eff**2)*EXPf
+      !
+    case('QSFIELD')
+      !
+      if (order==0 ) f_now=                      f1t          *EXPf
+      if (order==1 ) f_now=( der_fac*f0t-(T-T_0)*f1t/sigma**2)*EXPf
+      if (order==2 ) f_now=(-der_fac*f1t-f1t/sigma**2                           &
+                     &             -der_fac*(T-T_0)*f1t/sigma**2                &
+                     &             -(T-T_0)*(der_fac*f0t-(T-T_0)*f1t/sigma**2)/sigma**2)*EXPf
+      f_now=f_now/der_fac
+      !
+    case('PULSE')
+      if (chirp>0._SP) call error("chirp not implemented with "//trim(field_defs(1)))
+      !
+      if(abs((T-T_0))< sigma) then
+        if (order==0 ) f_now= 0._SP
+        if (order==1 ) f_now=       (T-T_0 + sigma)**2 *(T-T_0 -sigma)**2/sigma**4 *f0t
+        if (order==2 ) f_now=(4._SP*(T-T_0 + sigma)    *(T-T_0 -sigma)**2/sigma**4)*f0t &
+        &                   -(1._SP*(T-T_0 + sigma)**2 *(T-T_0 -sigma)**2/sigma**4)*W_field*f1t
+      endif
+      !
+    case('SPULSE')
+      if (chirp>0._SP) call error("chirp not implemented with "//trim(field_defs(1)))
+      T_0=sigma
+      W_0=W_field
+      f_now=cZERO
+      if(abs((T-T_0))< sigma) then
+        if (order==0 ) f_now=(4.0*W_0*(T-T_0)*f0t*(-sigma**2*W_0**2+W_0**2*(T-T_0)**2-6.0)   &
+        &+f1t*(sigma**4*W_0**4-2.0*sigma**2*W_0**2*(W_0**2*(T-T_0)**2-2.0)+W_0**4*(T-T_0)**4-12.0* &
+        & W_0**2*(T -T_0)**2+24))/W_0**5/sigma**4+                                                 &
+        &   ((4.0*W_0*sigma*f0t*(-sigma**2*W_0**2+W_0**2*sigma**2-6.0)                             &
+        &+f1t*(sigma**4*W_0**4-2.0*sigma**2*W_0**2*(W_0**2*sigma**2-2.0)+W_0**4*sigma**4-12.0*     &
+        &               W_0**2*sigma**2+24))/W_0**5/sigma**4) 
+        if (order==1 ) f_now=((T-T_0)**2 - sigma**2)**2/sigma**4*f0t
+        if (order==2 ) f_now=(2._SP*(T-T_0 + sigma)    *(T-T_0 -sigma)**2/sigma**4                 &
+        &                          +2._SP*(T-T_0 + sigma)**2 *(T-T_0 -sigma)   /sigma**4 )    *f0t &
+        &                          -      (T-T_0 + sigma)**2 *(T-T_0 -sigma)**2/sigma**4  *W_0*f1t
+      endif
+    end select
+    !
+    f_t(i_field)=f_t(i_field)+f_now
+    !
+  enddo
+  !
+  small_a=f_t
+  !
+end function small_a
+!
+function small_a_frequency(W_i,E_field,iErr)
+  !
+  use pars,      ONLY:SP,cZERO,cI,pi,schlen
+  use stderr,    ONLY:STRING_split
+  use fields,    ONLY:ext_field,n_fields_defs_max
+  !
+  implicit none
+  !
+  complex(SP) :: small_a_frequency
+  !
+  type(ext_field)   :: E_field
+  complex(SP)       :: W_i
+  integer           :: iErr
+  !
+  real(SP)          ::W_0
+  complex(SP)       ::local_a(2)
+  character(schlen) ::field_defs(n_fields_defs_max)
+  !
+  iErr=-1
+  local_a=cZERO
+  !
+  field_defs=""
+  call STRING_split(trim(E_field%ef_name),field_defs)
+  !
+  W_0=E_field%frequency
+  select case( trim(field_defs(1)) )
+  case('SIN')
+    iErr=0
+    local_a(1)=local_a(1)+(1._SP/(W_i-W_0)                 -1._SP/W_0)/2._SP  ! RES
+    local_a(2)=local_a(2)+(               -1._SP/(W_i+W_0) -1._SP/W_0)/2._SP  ! ARES
+  case('DELTA')
+    iErr=0
+    local_a=1._SP/2._SP
+  end select
+  !
+  if(trim(field_defs(2))==    'RES') local_a(2)=0._SP
+  if(trim(field_defs(2))=='ANTIRES') local_a(1)=0._SP
+  !
+  small_a_frequency=local_a(1)+local_a(2)
+  !
+end function small_a_frequency
diff --git a/src/real_time_drivers/ELPH_project.dep b/src/real_time_drivers/ELPH_project.dep
new file mode 100644
index 0000000000..e3d1b42bc9
--- /dev/null
+++ b/src/real_time_drivers/ELPH_project.dep
@@ -0,0 +1,2 @@
+  RT_relaxation.o
+
diff --git a/src/real_time_drivers/RT_driver.F b/src/real_time_drivers/RT_driver.F
index 9dbf7f9f2e..5971283a9f 100644
--- a/src/real_time_drivers/RT_driver.F
+++ b/src/real_time_drivers/RT_driver.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM CA DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine RT_driver(E,X,k,q,Dip)
  !
  use pars,           ONLY:SP,lchlen
@@ -32,7 +36,7 @@ subroutine RT_driver(E,X,k,q,Dip)
 &                         NE_steps,RT_step,G_lesser,RT_bands,&
 &                         NE_i_time,eval_energy,rho_reference,magn_reference,RT_nk, &
 &                         NE_time,RT_carriers,RT_dyn_step,l_RT_is_WFfree,&
-&                         dG_lesser,NE_tot_time,&
+&                         dG_lesser,NE_tot_time,l_length_grad_k,&
 &                         i_MEM_prev,i_MEM_now,RT_is_dynamically_dephased,&
 &                         l_RT_update_Energies,l_NE_with_fields,l_update_SOC,l_velocity_gauge_corr
  use plasma,          ONLY:PLASMA_free,EH_gas
@@ -44,8 +48,9 @@ subroutine RT_driver(E,X,k,q,Dip)
  use parallel_m,      ONLY:PAR_IND_WF_b,PAR_IND_WF_k,PAR_IND_DIPk_ibz,PAR_IND_B_mat,PAR_COM_Xk_ibz_INDEX
  use units,           ONLY:FS2AUT
  use RT_lifetimes,    ONLY:l_initial_equilibrium
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  ! Notice that E is never changed!
  !
@@ -121,6 +126,7 @@ subroutine RT_driver(E,X,k,q,Dip)
  !
  call DIPOLE_dimensions(E,Dip,RT_bands,(/0._SP,0._SP,0._SP/))
  call DIPOLE_IO(k,E,Dip,'read ',io_err,'RT')
+ if(io_err/=0) call error("Error in Dipoles I/O")
  !
  ! In dipole_driver the IO is k-dictated. Here I want only the master to write
  !
@@ -130,7 +136,12 @@ subroutine RT_driver(E,X,k,q,Dip)
  !
  call section('=','Initialization')
  !#################################
- call RT_initialize(E,k,q)
+ call RT_initialize(E,k,q,Dip)
+ !
+ if (l_length_grad_k) then
+   call OVERLAPS_IO(k,E,Dip,'read',io_err,'RT')
+   if(io_err/=0) call error("Error in Overlaps I/O")
+ endif
  !
  ! Dipoles & WF dimensions
  !=========================
@@ -269,7 +280,7 @@ subroutine RT_driver(E,X,k,q,Dip)
    !
    ! Calculate the RT Hamiltonian Ho_plus_Sigma
    !============================================
-   call RT_Hamiltonian(dG_lesser(:,:,:,i_MEM_now),A_tot,E,k)
+   call RT_Hamiltonian(dG_lesser(:,:,:,:,i_MEM_now),A_tot,E,k)
    !
    ! Rotation in the Coherent Hamiltonian basis
    !============================================
@@ -277,7 +288,7 @@ subroutine RT_driver(E,X,k,q,Dip)
    !
    ! Energy, number of carriers, fit of electronic temperatures
    !=============================================================
-   call RT_Observables(E,k,dG_lesser(:,:,:,i_MEM_now),OBSERVABLES)
+   call RT_Observables(E,k,dG_lesser(:,:,:,:,i_MEM_now),OBSERVABLES)
    !
    ! Calculate all relaxation and dissipation terms
    !===============================================
@@ -307,7 +318,7 @@ subroutine RT_driver(E,X,k,q,Dip)
    !
    ! Integration
    !=============
-   call RT_Integrator(G_lesser(:,:,:,i_MEM_now),dG_lesser(:,:,:,i_MEM_now),dG_lesser(:,:,:,i_MEM_prev),A_tot,A_tot,E,k,q)
+   call RT_Integrator(G_lesser(:,:,:,:,i_MEM_now),dG_lesser(:,:,:,:,i_MEM_now),dG_lesser(:,:,:,:,i_MEM_prev),A_tot,A_tot,E,k,q)
    !
  enddo
  !
@@ -335,7 +346,7 @@ subroutine CLOSE_and_clean( )
    !
    subroutine RT_timing(steps)
      !
-     integer :: steps
+     integer, intent(in) :: steps
      !
      if (steps==0) then
        !
@@ -359,7 +370,7 @@ subroutine RT_timing(steps)
        if (NE_i_time>LT_pump_steps+1.and.LT_pump) then
          LT_pump=.FALSE.
          call live_timing( )
-         call live_timing('[RT] Dynamics (pump-free region) ',NE_steps-LT_pump_steps-NE_i_start_time+1,DEPTH=0.01_SP)
+         call live_timing('[RT] Dynamics (pump-free region) ',NE_steps-max(LT_pump_steps,NE_i_start_time)+1,DEPTH=0.01_SP)
        endif
        call live_timing(steps=steps)
        !
diff --git a/src/real_time_hamiltonian/.objects b/src/real_time_hamiltonian/.objects
index 2574475c03..14f3d23b5b 100644
--- a/src/real_time_hamiltonian/.objects
+++ b/src/real_time_hamiltonian/.objects
@@ -1,3 +1,3 @@
 #if defined _RT
-objs = RT_Hamiltonian.o RT_Hamiltonian_diagonalize.o RT_apply_field.o 
+objs = RT_Hamiltonian.o RT_Hamiltonian_diagonalize.o RT_apply_field.o RT_gradk_rho_times_E.o
 #endif
diff --git a/src/real_time_hamiltonian/DOUBLE_project.dep b/src/real_time_hamiltonian/DOUBLE_project.dep
index bb99b1bcd7..8321226358 100644
--- a/src/real_time_hamiltonian/DOUBLE_project.dep
+++ b/src/real_time_hamiltonian/DOUBLE_project.dep
@@ -1,4 +1,5 @@
  RT_Hamiltonian.o
  RT_Hamiltonian_diagonalize.o
  RT_apply_field.o
+ RT_gradk_rho_times_E.o
 
diff --git a/src/real_time_hamiltonian/RT_Hamiltonian.F b/src/real_time_hamiltonian/RT_Hamiltonian.F
index 550c99009d..cf049586d5 100644
--- a/src/real_time_hamiltonian/RT_Hamiltonian.F
+++ b/src/real_time_hamiltonian/RT_Hamiltonian.F
@@ -36,16 +36,15 @@ subroutine RT_Hamiltonian(dG_in,A_input,E,k)
  !
  use pars,           ONLY:SP,cONE,cZERO
  use wrapper_omp,    ONLY:M_plus_alpha_M_omp
- use electrons,      ONLY:levels,n_spinor,n_spin
+ use electrons,      ONLY:levels,n_spinor,n_spin,n_sp_pol
  use FFT_m,          ONLY:fft_size
  use drivers,        ONLY:l_use_Hxc_collisions
  use R_lattice,      ONLY:bz_samp,nXkibz
  use hamiltonian,    ONLY:V_hartree_sc,H_nl_sc,rho_n,magn_n,V_xc_sc
  use real_time,      ONLY:Ho_plus_Sigma,l_NE_with_fields,REF_V_xc_sc, &
-&                         rho_reference,magn_reference,RT_Vnl_xc,H_EQ,         &
+&                         rho_reference,magn_reference,RT_Vnl_xc,H_field,H_EQ,    &
 &                         l_RT_RWA,eval_HARTREE,eval_DFT,RTibz,RT_potential,      &
-&                         RT_bands,RT_nbands,    &
-&                         l_update_SOC,l_velocity_gauge_corr
+&                         RT_bands,RT_nbands,dG_avg,l_update_SOC,l_velocity_gauge_corr
  use fields,         ONLY:gauge_field
  use wave_func,      ONLY:WF
  use parallel_m,     ONLY:PAR_IND_Xk_ibz,PAR_COM_Xk_ibz_A2A,PAR_G_k_range
@@ -60,12 +59,12 @@ subroutine RT_Hamiltonian(dG_in,A_input,E,k)
  !
  type(levels),  intent(inout) :: E
  type(bz_samp), intent(in)    :: k
- complex(SP),   intent(in)    :: dG_in(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2))
+ complex(SP),   intent(in)    :: dG_in(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2),n_sp_pol)
  type(gauge_field), intent(in):: A_input
  !
  ! Work Space
  !
- integer                  :: ik,ik_RT,ib
+ integer                  :: ik,ik_RT,ib,i_sp_pol
  logical                  :: l_H_contains_Hartree
  real(SP),    allocatable :: delta_rho(:),delta_magn(:,:)
  complex(SP), allocatable :: V_scf(:),V_scf_grad(:,:),V_xc_mat(:,:,:)
@@ -107,7 +106,7 @@ subroutine RT_Hamiltonian(dG_in,A_input,E,k)
    !
    deallocate(delta_rho)
    !
-   if(n_spin>1 .and. eval_DFT) then
+   if(n_spin>1 .and. eval_DFT.and..not.l_use_Hxc_collisions) then
      allocate(delta_magn(fft_size,3))
      call el_magnetization_matrix(dG_in,E,k,delta_magn,RT_bands(1))
      magn_n=magn_reference+delta_magn
@@ -116,7 +115,7 @@ subroutine RT_Hamiltonian(dG_in,A_input,E,k)
    !
    ! The Vxc term
    !--------------
-   if (eval_DFT) then
+   if (eval_DFT.and..not.l_use_Hxc_collisions) then
      call XC_potential_driver(E,k,WF_kind,WF_xc_functional,1)
      V_xc_sc=V_xc-REF_V_xc_sc
    endif
@@ -128,34 +127,36 @@ subroutine RT_Hamiltonian(dG_in,A_input,E,k)
      allocate(V_scf(fft_size))
      V_scf=cZERO
      if(eval_HARTREE) V_scf=V_scf+V_Hartree_sc(:,1)
-     if(eval_DFT)     then
+     if(eval_DFT.and..not.l_use_Hxc_collisions)  then
        allocate(V_xc_mat(fft_size,n_spinor,n_spinor))
        call Build_V_xc_mat(V_xc_sc,V_xc_mat)
        V_scf=V_scf+V_xc_mat(:,1,1)+V_xc_mat(:,2,2)
        deallocate(V_xc_mat)
      endif
      allocate(V_scf_grad(fft_size,3))
-     call eval_Gradient(V_scf,V_scf_grad,1,"potential")
+     call eval_Gradient_rho_pot(V_scf,V_scf_grad)
      deallocate(V_scf)
      !
    endif
    !
    ! Add H_nl_sc to Ho_plus_Sigma 
    !---------------------------------
+   do i_sp_pol=1,n_sp_pol
    do ik=1,nXkibz 
      !
      H_nl_sc=cZERO
      !
      if (.not.PAR_IND_Xk_ibz%element_1D(ik) ) cycle
      !
-     call V_real_space_to_H(ik,1,H_nl_sc,WF,'def+xc',V=V_hartree_sc,Vxc=V_xc_sc)
+     call V_real_space_to_H(ik,i_sp_pol,H_nl_sc,WF,'def+xc',V=V_hartree_sc,Vxc=V_xc_sc)
      if(n_spinor==2.and.l_update_soc) call Vgrad_real_space_to_H(ik,H_nl_sc,WF,V_scf_grad)
      !
      call PP_redux_wait(H_nl_sc,COMM=PAR_COM_Xk_ibz_A2A%COMM)
      !
-     forall(ik_RT=RTibz%k_range(ik,1):RTibz%k_range(ik,2)) Ho_plus_Sigma(:,:,ik_RT,1)=H_nl_sc
+     forall (ik_RT=RTibz%k_range(ik,1):RTibz%k_range(ik,2)) Ho_plus_Sigma(:,:,ik_RT,i_sp_pol)=H_nl_sc
      !
    enddo
+   enddo
    !
    l_H_contains_Hartree=index(trim(RT_potential),"HARTREE")>0
    if(allocated(V_scf_grad)) deallocate(V_scf_grad)
@@ -166,22 +167,26 @@ subroutine RT_Hamiltonian(dG_in,A_input,E,k)
  !======================================================
  if (l_use_Hxc_collisions) then
    !
-   call COLLISIONS_compose_rt(dG_in)
+   call Build_dG_avg(dG_in,dG_avg)
+   call COLLISIONS_compose_rt(dG_avg)
+   !
    !
    ! Add RT_Vnl_xc to Ho_plus_Sigma 
    !---------------------------------
-   do ik=1,nXkibz 
+   do i_sp_pol=1,n_sp_pol
+   do ik=1,nXkibz
      if (.not.PAR_IND_Xk_ibz%element_1D(ik)) cycle
      if (l_H_contains_HARTREE) then
        do ik_RT =RTibz%k_range(ik,1),RTibz%k_range(ik,2)
-         Ho_plus_Sigma(:,:,ik_RT,1)=Ho_plus_Sigma(:,:,ik_RT,1)+RT_Vnl_xc(:,:,ik,1)
+         Ho_plus_Sigma(:,:,ik_RT,i_sp_pol)=Ho_plus_Sigma(:,:,ik_RT,i_sp_pol)+RT_Vnl_xc(:,:,ik,i_sp_pol)
        enddo
      else
        do ik_RT =RTibz%k_range(ik,1),RTibz%k_range(ik,2)
-         Ho_plus_Sigma(:,:,ik_RT,1)=RT_Vnl_xc(:,:,ik,1)
+         Ho_plus_Sigma(:,:,ik_RT,i_sp_pol)=RT_Vnl_xc(:,:,ik,i_sp_pol)
        enddo
      endif
    enddo
+   enddo
    !
  endif
  !
@@ -191,14 +196,20 @@ subroutine RT_Hamiltonian(dG_in,A_input,E,k)
    !
    call timing('RT Apply Field',OPR='start')
    !
+   H_field=cZERO
+   do i_sp_pol=1,n_sp_pol
    do ik=1,nXkibz 
      if (.not.PAR_IND_Xk_ibz%element_1D(ik)) cycle
      H_nl_sc=cZERO
-     call RT_apply_field(ik,1,H_nl_sc,A_input)
+     call RT_apply_field(ik,i_sp_pol,H_nl_sc,A_input)
+     call RT_gradk_rho_times_E(ik,i_sp_pol,H_nl_sc,dG_in,A_input,k,E)
      do ik_RT =RTibz%k_range(ik,1),RTibz%k_range(ik,2)
-       call M_plus_alpha_M_omp(RT_nbands,cONE,H_nl_sc,Ho_plus_sigma(:,:,ik_RT,1))
+       H_field(:,:,ik_RT,i_sp_pol)=H_nl_sc
+       call M_plus_alpha_M_omp(RT_nbands,cONE,H_nl_sc,Ho_plus_sigma(:,:,ik_RT,i_sp_pol))
      enddo
    enddo
+   enddo
+   !
    !
    call timing('RT Apply Field',OPR='stop')
    !
diff --git a/src/real_time_hamiltonian/RT_apply_field.F b/src/real_time_hamiltonian/RT_apply_field.F
index 9f3f762250..9750e51d4e 100644
--- a/src/real_time_hamiltonian/RT_apply_field.F
+++ b/src/real_time_hamiltonian/RT_apply_field.F
@@ -7,7 +7,8 @@
 !
 subroutine RT_apply_field(ik,i_sp_pol,H_nl_sc,A_input)
  !
- ! This subroutine add the vector potential to the Hamiltonian
+ ! This subroutine adds the external field to the Hamiltonian,
+ ! either as A*j (velocity gauge) or as E*r (length gauge)
  !
  use pars,           ONLY:SP,cI
  use drivers,        ONLY:l_nl_optics
@@ -16,7 +17,7 @@ subroutine RT_apply_field(ik,i_sp_pol,H_nl_sc,A_input)
  use fields,         ONLY:gauge_field
  use DIPOLES,        ONLY:DIP_iR,DIP_P,DIP_v,DIP_P_spinor
  use X_m,            ONLY:global_gauge
- use real_time,      ONLY:RT_bands,l_update_SOC,l_velocity_gauge_corr
+ use real_time,      ONLY:RT_bands,RT_bands_frozen,l_update_SOC,l_velocity_gauge_corr
  use vec_operate,    ONLY:cross_product
  use D_lattice,      ONLY:sigma_x,sigma_y,sigma_z
  use parallel_m,     ONLY:PAR_Xk_ibz_index
@@ -41,40 +42,47 @@ subroutine RT_apply_field(ik,i_sp_pol,H_nl_sc,A_input)
    ! Velocity gauge
    !================
    !
+   ! The velocity gauge is defined by
+   ! A=  int dt' E(t')
+   ! V= -1/2 A^2(t)
+   !
+   ! The definition of V is usued to remove the term 1/2A^2 from the Hamiltonian
+   ! It corresponds to a gauge transformation function
+   ! Lambda = -1/2 int dt' A^2(t') dt
+   ! see discussion in sec. 4.A of  J. of Optical Society B, 39(2), 535 (2022)
+   !
    A_vec_pot=A_input%vecpot/SPEED_OF_LIGHT ! Gaussian units
    !
    ! Add the vector potential to the singular part of the Self-Energy
    ! in case of complex Efield puts the correct Hermitian matrix
    ! and the intraband optical matrix elements have been set to zero
    !
-!$omp parallel do default(shared), private(i1,Jtot,A_dot_J)
+   !$omp parallel do default(shared), private(i1,Jtot,A_dot_J)
    do i1=RT_bands(1),RT_bands(2)
+     !
+     if ( RT_bands_frozen(i1)==1 ) cycle
      !
      ! Case A: the pseudo-potential is correctly gauged, so A*p enters the hamiltonian
      if (     l_velocity_gauge_corr) Jtot(1:3)= -DIP_P(1:3,i1,i1,PAR_Xk_ibz_index(ik),i_sp_pol)
      ! Case b: the pseudo-potential is   *not*   gauged, so A*v enters the hamiltonian.
      !         A*v accounts for the linear expansion of the gauge correction to the non local potential
      if (.not.l_velocity_gauge_corr) Jtot(1:3)= -DIP_v(1:3,i1,i1,PAR_Xk_ibz_index(ik),i_sp_pol)
-     !	   
-     if(.not.l_nl_optics) then
-       !	   
-       ! This term can be set to zero with the freedom of the velocity gauge
-       ! see discussion in sec. 4.A of  J. of Optical Society B, 39(2), 535 (2022)
-       ! we do not include it in yambo_nl
-       !	   
-       Jtot(1:3)=Jtot(1:3)+A_vec_pot(1:3)/2._SP
-       !	   
-     endif
      !
      A_dot_J(1)=sum(A_vec_pot(1:3)*      Jtot(1:3) )
      A_dot_J(2)=sum(A_vec_pot(1:3)*conjg(Jtot(1:3)))
      H_nl_sc(i1,i1)=H_nl_sc(i1,i1)-real(A_dot_J(1),SP)
+     !
    enddo
-!$omp end parallel do
+   !$omp end parallel do
    !
-!$omp parallel do default(shared), private(i1,i2,Jtot,A_dot_J)
+   !$omp parallel do default(shared), private(i1,i2,Jtot,A_dot_J)
    do i1=RT_bands(1),RT_bands(2)
+     !
+     if ( RT_bands_frozen(i1)==1 ) cycle
+     !
      do i2=i1+1,RT_bands(2)
+       !
+       if ( RT_bands_frozen(i2)==1 ) cycle
        !
        ! Note that (P)_cv = DIP_P (:,iv,ic,:,:)
        ! the off-diagonal diamagnetic term is zero
@@ -104,6 +112,9 @@ subroutine RT_apply_field(ik,i_sp_pol,H_nl_sc,A_input)
    ! Intraband terms
    !
    do i1=RT_bands(1),RT_bands(2)
+     !
+     if ( RT_bands_frozen(i1)==1 ) cycle
+     !
      POL(:)=cI*DIP_iR(:,i1,i1,PAR_Xk_ibz_index(ik),i_sp_pol)
      E_dot_P(1)=sum(E_vec_pot(1:3)*      POL(1:3) )
      E_dot_P(2)=sum(E_vec_pot(1:3)*conjg(POL(1:3)))
@@ -131,7 +142,12 @@ subroutine RT_apply_field(ik,i_sp_pol,H_nl_sc,A_input)
    ! Inter-band terms
    !
    do i1=RT_bands(1),RT_bands(2)
+     !
+     if ( RT_bands_frozen(i1)==1 ) cycle
+     !
      do i2=i1+1,RT_bands(2)
+       !
+       if ( RT_bands_frozen(i2)==1 ) cycle
        !
        ! Note that (iR)_cv = DIP_iR (:,iv,ic,:,:)
        !
diff --git a/src/real_time_hamiltonian/RT_gradk_rho_times_E.F b/src/real_time_hamiltonian/RT_gradk_rho_times_E.F
new file mode 100644
index 0000000000..9c9666f56b
--- /dev/null
+++ b/src/real_time_hamiltonian/RT_gradk_rho_times_E.F
@@ -0,0 +1,452 @@
+!
+! License-Identifier: GPL
+!
+! Copyright (C) 2025 The Yambo Team
+!
+! Authors (see AUTHORS file for details): DS
+!
+subroutine RT_gradk_rho_times_E_wrong(ik,i_sp_pol,H_nl_sc,dG_in,A_input,Xk,Xen)
+ !
+ ! This subroutine computes the k-gradient of the density matrix,
+ ! and multiplies it by the electric field.
+ ! See Phys. Rev. B 76, 035213 (2007)
+ !
+ ! To guage the density matrix it uses the phases matrixi of the dipoles
+ ! defined in Phys. Rev. Lett. 131, 236902 (2023)
+ ! In doing so we move into a gauge where the dipoles projected along E are real.
+ ! Not sure if such gauge really makes sense ...
+ !
+ use pars,           ONLY:SP,cI,cZERO,cONE,pi
+ use units,          ONLY:SPEED_OF_LIGHT,HA2EV
+ use real_time,      ONLY:l_length_grad_k,RT_bands
+ use fields,         ONLY:gauge_field
+ use real_time,      ONLY:RT_bands,NE_i_time,NE_i_last_field
+ use vec_operate,    ONLY:k_periodic_idx
+ use matrix_operate, ONLY:hermitian
+ use DIPOLES,        ONLY:DIP_S,DIP_iR,l_force_SndOrd
+ use electrons,      ONLY:levels,n_sp_pol
+ use R_lattice,      ONLY:bz_samp,k_map
+ use D_lattice,      ONLY:a
+ use parallel_m,     ONLY:PAR_G_k_range
+ use parser_m,       ONLY:parser
+ !
+ implicit none
+ !
+ integer,           intent(in) :: ik,i_sp_pol
+ complex(SP),    intent(inout) :: H_nl_sc(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
+ complex(SP),       intent(in) :: dG_in(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2),n_sp_pol)
+ type(gauge_field), intent(in) :: A_input
+ type(bz_samp),     intent(in) :: Xk
+ type(levels),      intent(in) :: Xen
+ !
+ ! Workspace
+ !
+ logical     :: l_dips_term
+ integer     :: id_red,max_step,istep,idx(3),idir,ib1,jb1,nb(2),&
+                ikm1,ikp1,ikbz,ikm1_ibz,ikm1_is,ikp1_ibz,ikp1_is
+ real(SP)    :: q_fac,E_field(3)
+ complex(SP) :: DIP_kp_c(3),DIP_km_c(3),DIP_k_c(3)
+ real(SP)    :: DIP_kp_mod,DIP_km_mod,DIP_k_mod
+ !
+ complex(SP), allocatable :: DIP_kp(:,:),DIP_km(:,:)
+ !
+ complex(SP) :: gradk_rho(RT_Bands(1):RT_bands(2),RT_bands(1):RT_bands(2),3),&
+ &          gradk_rho_red(RT_Bands(1):RT_bands(2),RT_Bands(1):RT_bands(2),2)
+ !
+ ! Warning: kpts parallelization to be fixed, since rho is distributed
+ !
+ if (.not.l_length_grad_k) return
+ !
+ ! Length gauge
+ !===============
+ E_field=-A_input%vecpot_vel/SPEED_OF_LIGHT ! Gaussian units
+ !
+ if (NE_i_time>NE_i_last_field) return
+ !
+ nb=RT_bands
+ call parser("RTgradkDips",l_dips_term)
+ !
+ allocate(DIP_kp(nb(1):nb(2),nb(1):nb(2)))
+ allocate(DIP_km(nb(1):nb(2),nb(1):nb(2)))
+ !
+ ikbz=Xk%k_table(ik,1)
+ !
+ gradk_rho=cZERO
+ !
+ do id_red=1,3  ! loop on reciprocal lattice
+   !
+   if(k_map%max_kdir(id_red)==1) cycle
+   !
+   max_step=1
+   !
+   ! I need at leat 5 k-points in the line to
+   ! use the second order formula
+   !
+   if(k_map%max_kdir(id_red)>=6.and..not.l_force_SndOrd) max_step=2
+   !
+   gradk_rho_red=cZERO
+   !
+   do istep=1,max_step 
+     !
+     idx=k_map%k_map_inv(ikbz,:)
+     idx(id_red)=idx(id_red)-istep
+     idx=k_periodic_idx(idx,k_map)
+     ikm1=k_map%k_map_dir(idx(1),idx(2),idx(3))
+     !
+     ikm1_ibz=Xk%sstar(ikm1,1)
+     ikm1_is =Xk%sstar(ikm1,2)
+     !
+     idx=k_map%k_map_inv(ikbz,:)
+     idx(id_red)=idx(id_red)+istep
+     idx=k_periodic_idx(idx,k_map)
+     ikp1=k_map%k_map_dir(idx(1),idx(2),idx(3))
+     !
+     ikp1_ibz=Xk%sstar(ikp1,1)
+     ikp1_is =Xk%sstar(ikp1,2)
+     !
+     if (l_dips_term) then
+     DIP_km=cZERO
+     DIP_kp=cZERO
+     do ib1=RT_bands(1),RT_bands(2)
+       do jb1=RT_bands(1),RT_bands(2)
+         call DIPOLE_rotate(jb1,ib1,ikm1,i_sp_pol,"DIP_iR",Xk,DIP_km_c)
+         call DIPOLE_rotate(jb1,ib1,ikp1,i_sp_pol,"DIP_iR",Xk,DIP_kp_c)
+         ! DEBUG lines to verify the time gain when non-expanding on the fly
+         !DIP_km_c=DIP_iR(:,ib1,jb1,ikm1_ibz,i_sp_pol)
+         !DIP_kp_c=DIP_iR(:,ib1,jb1,ikp1_ibz,i_sp_pol)
+         ! As always in yambo, band indexes are inverted in the dipoles
+         DIP_km_mod=sqrt(abs(DIP_km_c(1))**2+abs(DIP_km_c(2))**2+abs(DIP_km_c(3))**2)
+         DIP_kp_mod=sqrt(abs(DIP_kp_c(1))**2+abs(DIP_kp_c(2))**2+abs(DIP_kp_c(3))**2)
+         do idir=1,3
+           DIP_km(ib1,jb1)=DIP_km(ib1,jb1)-cI*DIP_km_c(idir)*a(id_red,idir)/DIP_km_mod
+           DIP_kp(ib1,jb1)=DIP_kp(ib1,jb1)-cI*DIP_kp_c(idir)*a(id_red,idir)/DIP_kp_mod
+         enddo
+       enddo
+     enddo
+     else
+       DIP_km=cONE
+       DIP_kp=cONE
+     endif
+     !
+     ! 1/q factor = k_map%max_kdir(id_red)/(4._SP*pi*real(istep,SP))
+     q_fac=k_map%max_kdir(id_red)/(4._SP*pi*real(istep,SP))
+     !
+     do ib1=RT_bands(1),RT_bands(2)
+       do jb1=RT_bands(1),RT_bands(2)
+         gradk_rho_red(ib1,jb1,istep)=gradk_rho_red(ib1,jb1,istep)+  &
+             &  -cI*(dG_in(ib1,jb1,ikp1_ibz,i_sp_pol)*conjg(DIP_km(ib1,jb1))+ &
+             &      -dG_in(ib1,jb1,ikm1_ibz,i_sp_pol)*conjg(DIP_kp(ib1,jb1))  )
+       enddo
+     enddo
+     !
+     gradk_rho_red(:,:,istep)=gradk_rho_red(:,:,istep)*q_fac/2._SP
+     !
+   enddo ! istep
+   !
+   ! Rotate in cartesian coordinates
+   !
+   if(max_step==1) then
+     !      
+     ! First order formula D(dk) 
+     !
+     do idir=1,3
+       gradk_rho(:,:,idir)=gradk_rho(:,:,idir)+transpose(gradk_rho_red(:,:,1))*a(id_red,idir)
+     enddo
+     !
+   else
+     !      
+     ! Second order formula   ( 4*D(dk) - D(2*dk) ) / 3
+     !
+     do idir=1,3
+       gradk_rho(:,:,idir)=gradk_rho(:,:,idir)+transpose(4._SP*gradk_rho_red(:,:,1)-gradk_rho_red(:,:,2))/3._SP*a(id_red,idir)
+     enddo
+     !
+   endif
+   !
+ enddo   ! id_red
+ !
+ l_dips_term=.false.
+ !
+ do ib1=RT_bands(1),RT_bands(2)
+   do jb1=RT_bands(1),RT_bands(2)
+     if (l_dips_term) then
+     call DIPOLE_rotate(jb1,ib1,ikbz,i_sp_pol,"DIP_iR",Xk,DIP_k_c)
+     ! DEBUG lines to verify the time gain when non-expanding on the fly
+     !DIP_km_c=DIP_iR(:,ib1,jb1,ikm1_ibz,i_sp_pol)
+     ! As always in yambo, band indexes are inverted in the dipoles
+     DIP_k_mod =sqrt(abs(DIP_k_c(1))**2+abs(DIP_k_c(2))**2+abs(DIP_k_c(3))**2)
+     DIP_k_c=-cI*DIP_kp_c/DIP_k_mod
+     else
+       DIP_k_c=cONE
+     endif
+     do idir=1,3
+       H_nl_sc(ib1,jb1)=H_nl_sc(ib1,jb1)-gradk_rho(ib1,jb1,idir)*E_field(idir)*DIP_k_c(idir)
+     enddo
+   enddo
+ enddo
+ !
+ !
+end subroutine RT_gradk_rho_times_E_wrong
+!
+!
+subroutine RT_gradk_rho_times_E(ik,i_sp_pol,H_nl_sc,dG_in,A_input,Xk,Xen)
+ !
+ ! This subroutine computes the k-gradient of the density matrix,
+ ! and multiplies it by the electric field.
+ ! See Phys. Rev. B 76, 035213 (2007)
+ !
+ use pars,           ONLY:SP,cI,cZERO,pi
+ use units,          ONLY:SPEED_OF_LIGHT,HA2EV
+ use real_time,      ONLY:l_length_grad_k,RT_bands
+ use fields,         ONLY:gauge_field
+ use real_time,      ONLY:RT_bands,NE_i_time,NE_i_last_field
+ use vec_operate,    ONLY:k_periodic_idx,degeneration_finder
+ use matrix_operate, ONLY:hermitian
+ use DIPOLES,        ONLY:DIP_S,DIP_iR,l_force_SndOrd
+ use electrons,      ONLY:levels,n_sp_pol
+ use R_lattice,      ONLY:bz_samp,k_map
+ use D_lattice,      ONLY:a
+ use parallel_m,     ONLY:PAR_G_k_range
+ use parser_m,       ONLY:parser
+ !
+ implicit none
+ !
+ integer,           intent(in) :: ik,i_sp_pol
+ complex(SP),    intent(inout) :: H_nl_sc(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
+ complex(SP),       intent(in) :: dG_in(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2),n_sp_pol)
+ type(gauge_field), intent(in) :: A_input
+ type(bz_samp),     intent(in) :: Xk
+ type(levels),      intent(in) :: Xen
+ !
+ ! Workspace
+ !
+ logical     :: l_dips_term,l_svd
+ integer     :: id_red,max_step,istep,idx(3),idir,ib1,ib2,jb1,jb2,nb(2),&
+                ikm1,ikp1,ikbz,ikm1_ibz,ikm1_is,ikp1_ibz,ikp1_is
+ real(SP)    :: q_fac,E_field(3)
+ complex(SP) :: DIP_kp_c(3),DIP_km_c(3)
+ integer     :: ib,ibp,i_grp,first_el_k(RT_bands(2)-RT_bands(1)+1),n_of_el_k(RT_bands(2)-RT_bands(1)+1),n_deg_grp_k
+ logical     :: deg_table_k(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
+ real(SP)    :: deg_thresh
+ !
+ !complex(SP) :: Mp_kmq(RT_bands(2),RT_bands(2),RT_bands(2),RT_bands(2)),&
+ !&              Mm_kpq(RT_bands(2),RT_bands(2),RT_bands(2),RT_bands(2)),&
+ !&              Sp_kmq(RT_bands(2),RT_bands(2)),Sm_kpq(RT_bands(2),RT_bands(2)),&
+ !&              A1p_kmq(RT_bands(2),RT_bands(2)),A1m_kpq(RT_bands(2),RT_bands(2)),&
+ !&              A2p_kmq(RT_bands(2),RT_bands(2)),A2m_kpq(RT_bands(2),RT_bands(2)),&
+ !&              DIP_kp(RT_bands(2),RT_bands(2)),DIP_km(RT_bands(2),RT_bands(2))
+ !
+ complex(SP), allocatable :: Mp_kmq(:,:,:,:),Mm_kpq(:,:,:,:),&
+ &              Sp_kmq(:,:),Sm_kpq(:,:),gp_kmq(:,:),gm_kpq(:,:),&
+ &              A1p_kmq(:,:),A1m_kpq(:,:),A2p_kmq(:,:),A2m_kpq(:,:),&
+ &              DIP_kp(:,:),DIP_km(:,:)
+ !
+ complex(SP) :: gradk_rho(RT_Bands(1):RT_bands(2),RT_bands(1):RT_bands(2),3),&
+ &          gradk_rho_red(RT_Bands(1):RT_bands(2),RT_Bands(1):RT_bands(2),2)
+ !
+ ! Warning: kpts parallelization to be fixed, since rho is distributed
+ !
+ if (.not.l_length_grad_k) return
+ !
+ ! Length gauge
+ !===============
+ E_field=-A_input%vecpot_vel/SPEED_OF_LIGHT ! Gaussian units
+ !
+ if (NE_i_time>NE_i_last_field) return
+ !
+ call parser("RTgradkDips",l_dips_term)
+ l_svd=.true.
+ deg_thresh=1.E-5/HA2EV
+ !
+ nb=RT_bands
+ !
+ allocate(Mp_kmq(nb(1):nb(2),nb(1):nb(2),nb(1):nb(2),nb(1):nb(2)))
+ allocate(Mm_kpq(nb(1):nb(2),nb(1):nb(2),nb(1):nb(2),nb(1):nb(2)))
+ !
+ allocate(Sp_kmq(nb(1):nb(2),nb(1):nb(2)))
+ allocate(Sm_kpq(nb(1):nb(2),nb(1):nb(2)))
+ !
+ allocate(gp_kmq(nb(1):nb(2),nb(1):nb(2)))
+ allocate(gm_kpq(nb(1):nb(2),nb(1):nb(2)))
+ !
+ allocate(A1p_kmq(nb(1):nb(2),nb(1):nb(2)))
+ allocate(A1m_kpq(nb(1):nb(2),nb(1):nb(2)))
+ !
+ allocate(A2p_kmq(nb(1):nb(2),nb(1):nb(2)))
+ allocate(A2m_kpq(nb(1):nb(2),nb(1):nb(2)))
+
+ allocate(DIP_kp(nb(1):nb(2),nb(1):nb(2)))
+ allocate(DIP_km(nb(1):nb(2),nb(1):nb(2)))
+ !
+ ikbz=Xk%k_table(ik,1)
+ !
+ ! define degeneration table at ik
+ !
+ call degeneration_finder(nb(2)-nb(1)+1,first_el_k,n_of_el_k,n_deg_grp_k,&
+ &    Er=Xen%E(nb(1):nb(2),ik,i_sp_pol),deg_accuracy=deg_thresh,Include_single_values=.true.)
+ !
+ deg_table_k=.false.
+ do i_grp=1,n_deg_grp_k
+   do ib=nb(1)+first_el_k(i_grp)-1,nb(1)+first_el_k(i_grp)+n_of_el_k(i_grp)-2
+     do ibp=nb(1)+first_el_k(i_grp)-1,nb(1)+first_el_k(i_grp)+n_of_el_k(i_grp)-2
+       deg_table_k(ib,ibp)=.true.
+       deg_table_k(ibp,ib)=.true.
+     enddo
+   enddo
+ enddo
+ !
+ gradk_rho=cZERO
+ !
+ do id_red=1,3  ! loop on reciprocal lattice
+   !
+   if(k_map%max_kdir(id_red)==1) cycle
+   !
+   max_step=1
+   !
+   ! I need at leat 5 k-points in the line to
+   ! use the second order formula
+   !
+   if(k_map%max_kdir(id_red)>=6.and..not.l_force_SndOrd) max_step=2
+   !
+   gradk_rho_red=cZERO
+   !
+   do istep=1,max_step 
+     !
+     idx=k_map%k_map_inv(ikbz,:)
+     idx(id_red)=idx(id_red)-istep
+     idx=k_periodic_idx(idx,k_map)
+     ikm1=k_map%k_map_dir(idx(1),idx(2),idx(3))
+     !
+     ikm1_ibz=Xk%sstar(ikm1,1)
+     ikm1_is =Xk%sstar(ikm1,2)
+     !
+     idx=k_map%k_map_inv(ikbz,:)
+     idx(id_red)=idx(id_red)+istep
+     idx=k_periodic_idx(idx,k_map)
+     ikp1=k_map%k_map_dir(idx(1),idx(2),idx(3))
+     !
+     ikp1_ibz=Xk%sstar(ikp1,1)
+     ikp1_is =Xk%sstar(ikp1,2)
+     !
+     Sm_kpq(:,:)=hermitian(DIP_S(nb(1):,nb(1):,id_red+(istep-1)*3,ikbz,i_sp_pol))   ! (g-(k+q))= g+(k)\dag
+     Sp_kmq(:,:)=          DIP_S(nb(1):,nb(1):,id_red+(istep-1)*3,ikm1,i_sp_pol)    !  g+(k-q) =(g-(k)\dag)
+     !
+     ! k+q part
+     ! Disconnect non connected blocks
+     call zeroing_disconnected_blocks(Xen,Xk,RT_bands,ikp1,i_sp_pol,&
+     &                         deg_table_k,deg_thresh,Sm_kpq,gm_kpq)
+     ! Perform SVD
+     call SERIAL_SVD(nb(2)-nb(1)+1,Sm_kpq,'uni',0)
+     !
+     ! k-q part
+     ! Disconnect non connected blocks
+     call zeroing_disconnected_blocks(Xen,Xk,RT_Bands,ikm1,i_sp_pol,&
+     &                         deg_table_k,deg_thresh,Sp_kmq,gp_kmq)
+     ! Perform SVD
+     call SERIAL_SVD(nb(2)-nb(1)+1,Sp_kmq,'uni',0)
+     !
+     ! define M matrix
+     do ib1=RT_bands(1),RT_bands(2)
+       do ib2=RT_bands(1),RT_bands(2)
+         Mp_kmq(ib1,ib2,:,:)=conjg(gp_kmq(ib1,ib2))*gp_kmq(:,:)
+         Mm_kpq(ib1,ib2,:,:)=conjg(gm_kpq(ib1,ib2))*gm_kpq(:,:)
+       enddo
+     enddo
+     !
+     if (l_dips_term) then
+       !
+       ! I need to rotate the dipoles in k-space,
+       ! and then project them along the id_red direction
+       ! Note: this term is quitre slow.
+       ! Pre-expanding the dipoles with the old
+       ! DIPOLE expand wuold speed-up this part
+       !
+       DIP_km=cZERO
+       DIP_kp=cZERO
+       do ib1=RT_bands(1),RT_bands(2)
+         do jb1=RT_bands(1),RT_bands(2)
+           call DIPOLE_rotate(jb1,ib1,ikm1,i_sp_pol,"DIP_iR",Xk,DIP_km_c)
+           call DIPOLE_rotate(jb1,ib1,ikp1,i_sp_pol,"DIP_iR",Xk,DIP_kp_c)
+           ! DEBUG lines to verify the time gain when non-expanding on the fly
+           !DIP_km_c=DIP_iR(:,ib1,jb1,ikm1_ibz,i_sp_pol)
+           !DIP_kp_c=DIP_iR(:,ib1,jb1,ikp1_ibz,i_sp_pol)
+           ! As always in yambo, band indexes are inverted in the dipoles
+           do idir=1,3
+             DIP_km(ib1,jb1)=DIP_km(ib1,jb1)-cI*DIP_km_c(idir)*a(id_red,idir)
+             DIP_kp(ib1,jb1)=DIP_kp(ib1,jb1)-cI*DIP_kp_c(idir)*a(id_red,idir)
+           enddo
+         enddo
+       enddo
+       !
+       !
+       do ib1=RT_bands(1),RT_bands(2)
+         do jb1=RT_bands(1),RT_bands(2)
+           do ib2=RT_bands(1),RT_bands(2)
+             !A1_ll' ib1=l jb1=l' ib2=s
+             A1p_kmq(ib1,jb1)=-cI*dG_in(ib2,jb1,ikm1_ibz,i_sp_pol)*conjg(DIP_km(ib2,ib1))
+             A1m_kpq(ib1,jb1)=-cI*dG_in(ib2,jb1,ikp1_ibz,i_sp_pol)*conjg(DIP_kp(ib2,ib1))
+             !A2_ll' ib1=l jb1=l' ib2=s
+             A2p_kmq(ib1,jb1)=-cI*dG_in(ib1,ib2,ikm1_ibz,i_sp_pol)*      DIP_km(ib2,jb1)
+             A2m_kpq(ib1,jb1)=-cI*dG_in(ib1,ib2,ikp1_ibz,i_sp_pol)*      DIP_kp(ib2,jb1)
+           enddo
+         enddo
+       enddo
+       !
+     else
+       !
+       A1p_kmq=cZERO
+       A1m_kpq=cZERO
+       A2p_kmq=cZERO
+       A2m_kpq=cZERO
+       !
+     endif
+     !
+     ! 1/q factor = k_map%max_kdir(id_red)/(4._SP*pi*real(istep,SP))
+     q_fac=k_map%max_kdir(id_red)/(4._SP*pi*real(istep,SP))
+     !
+     do ib1=RT_bands(1),RT_bands(2)
+       do jb1=RT_bands(1),RT_bands(2)
+         do ib2=RT_bands(1),RT_bands(2)
+           do jb2=RT_bands(1),RT_bands(2)
+             ! ib1=n, jb1=m, ib2=l, jb2=l'
+             gradk_rho_red(ib1,jb1,istep)=gradk_rho_red(ib1,jb1,istep)+ &
+             &  -cI*Mm_kpq(ib2,ib1,jb2,jb1)*dG_in(ib2,jb2,ikp1_ibz,i_sp_pol)+    &
+             &  +cI*Mp_kmq(ib2,ib1,jb2,jb1)*dG_in(ib2,jb2,ikm1_ibz,i_sp_pol)+    &
+             &  -cI/q_fac*Mm_kpq(ib2,ib1,jb2,jb1)*(A1m_kpq(ib2,jb2)-A2m_kpq(ib2,jb2))+ &
+             &  -cI/q_fac*Mp_kmq(ib2,ib1,jb2,jb1)*(A1p_kmq(ib2,jb2)-A2p_kmq(ib2,jb2))
+           enddo
+         enddo
+       enddo
+     enddo
+     !
+     gradk_rho_red(:,:,istep)=gradk_rho_red(:,:,istep)*q_fac/2._SP
+     !
+   enddo ! istep
+   !
+   ! Rotate in cartesian coordinates
+   !
+   if(max_step==1) then
+     !      
+     ! First order formula D(dk)      
+     !
+     do idir=1,3
+       gradk_rho(:,:,idir)=gradk_rho(:,:,idir)+transpose(gradk_rho_red(:,:,1))*a(id_red,idir)
+     enddo
+     !
+   else
+     !      
+     ! Second order formula   ( 4*D(dk) - D(2*dk) ) / 3
+     !
+     do idir=1,3
+       gradk_rho(:,:,idir)=gradk_rho(:,:,idir)+transpose(4._SP*gradk_rho_red(:,:,1)-gradk_rho_red(:,:,2))/3._SP*a(id_red,idir)
+     enddo
+     !
+   endif
+   !
+ enddo   ! id_red
+ !
+ do idir=1,3
+   H_nl_sc(:,:)=H_nl_sc(:,:)-gradk_rho(:,:,idir)*E_field(idir)
+ enddo
+ !
+end subroutine RT_gradk_rho_times_E
diff --git a/src/real_time_hamiltonian/RT_project.dep b/src/real_time_hamiltonian/RT_project.dep
index bb99b1bcd7..8321226358 100644
--- a/src/real_time_hamiltonian/RT_project.dep
+++ b/src/real_time_hamiltonian/RT_project.dep
@@ -1,4 +1,5 @@
  RT_Hamiltonian.o
  RT_Hamiltonian_diagonalize.o
  RT_apply_field.o
+ RT_gradk_rho_times_E.o
 
diff --git a/src/real_time_initialize/.objects b/src/real_time_initialize/.objects
index 7f63c40398..11d0e4a376 100644
--- a/src/real_time_initialize/.objects
+++ b/src/real_time_initialize/.objects
@@ -3,6 +3,7 @@ ELPH_objs = RT_ELPH_initialize.o
 #endif
 #if defined _RT
 RT_head_objs = RT_initialize.o RT_start_and_restart.o 
-RT_foot_objs = RT_Field_Commensurable_Frequencies.o RT_Dephasing_Matrix.o RT_G_lesser_init.o RT_occupations_update.o
+RT_foot_objs = RT_Dephasing_Matrix.o RT_G_lesser_init.o \
+               RT_occupations_update.o
 #endif
 objs = $(RT_head_objs) RT_occupations_and_levels_init.o $(RT_foot_objs) $(ELPH_objs)
diff --git a/src/real_time_initialize/DOUBLE_project.dep b/src/real_time_initialize/DOUBLE_project.dep
index c350f2f4de..aa30fd3003 100644
--- a/src/real_time_initialize/DOUBLE_project.dep
+++ b/src/real_time_initialize/DOUBLE_project.dep
@@ -1,6 +1,5 @@
  RT_Dephasing_Matrix.o
  RT_ELPH_initialize.o
- RT_Field_Commensurable_Frequencies.o
  RT_G_lesser_init.o
  RT_initialize.o
  RT_occupations_and_levels_init.o
diff --git a/src/real_time_initialize/RT_Dephasing_Matrix.F b/src/real_time_initialize/RT_Dephasing_Matrix.F
index 05af9b9f53..03188e6982 100644
--- a/src/real_time_initialize/RT_Dephasing_Matrix.F
+++ b/src/real_time_initialize/RT_Dephasing_Matrix.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine RT_Dephasing_Matrix(E)
  !
  use pars,          ONLY:SP
@@ -15,8 +19,9 @@ subroutine RT_Dephasing_Matrix(E)
  use real_time,     ONLY:RT_Deph_Matrix,RT_is_statically_dephased,Phase_LifeTime,&
 &                        RAD_LifeTime,RT_bands,RT_INPUT_Deph_limits,RT_deph_cv_only, &
 &                        RT_deph_deg_thresh
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels), intent(in) :: E
  !
diff --git a/src/real_time_initialize/RT_ELPH_initialize.F b/src/real_time_initialize/RT_ELPH_initialize.F
index 29df6cd43f..b883ec6bca 100644
--- a/src/real_time_initialize/RT_ELPH_initialize.F
+++ b/src/real_time_initialize/RT_ELPH_initialize.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine RT_ELPH_initialize(E,k,q)
  ! 
  use parser_m,       ONLY:parser
@@ -13,8 +17,9 @@ subroutine RT_ELPH_initialize(E,k,q)
  use drivers,        ONLY:l_elph_scatt,l_phel_scatt
  use R_lattice,      ONLY:bz_samp
  use ELPH,           ONLY:PH_freqs_sq,PH_W_debye,GKKP_ep_sq,GKKP_pe_sq,l_GKKP_expanded_DB_exists,elph_nQ
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)             :: E
  type(bz_samp),intent(in) :: k,q
@@ -33,7 +38,7 @@ subroutine RT_ELPH_initialize(E,k,q)
  !
  ! El-Ph databases check
  !=======================
- call ELPH_databases_check(RT_bands(2))
+ call ELPH_databases_check(RT_bands)
  if (.not.l_GKKP_expanded_DB_exists) then
    call warning('ELPH (expanded) database not found and/or incompatible OR QP widths not provided. E-P scattering switched off')
    l_elph_scatt   =.FALSE.
diff --git a/src/real_time_initialize/RT_Field_Commensurable_Frequencies.F b/src/real_time_initialize/RT_Field_Commensurable_Frequencies.F
deleted file mode 100644
index 357cde4366..0000000000
--- a/src/real_time_initialize/RT_Field_Commensurable_Frequencies.F
+++ /dev/null
@@ -1,70 +0,0 @@
-!
-! License-Identifier: GPL
-!
-! Copyright (C) 2011 The Yambo Team
-!
-! Authors (see AUTHORS file for details): AM
-!
-subroutine RT_Field_Commensurable_Frequencies()
- !
- ! Check on the PROBE/PUMP frequencies. To be possible to find, in ypp_rt, the coefficients of the 
- ! Fourier series I need ALL the frequencies to be Wn=n deltaW. Therefore frequency(1) and
- ! frequency(2) must be choosed accordingly.
- ! 
- ! Moreover to extract the Fourier coefficients I need to integrate over aa Time range
- ! equal to 2*pi/deltaW. This must be not larger then NE_tot_time 
- !
- use pars,           ONLY:SP,schlen
- use stderr,         ONLY:STRING_split
- use fields,         ONLY:Efield,n_ext_fields,EtoT
- use real_time,      ONLY:NE_tot_time
- !
- implicit none
- !
- integer  :: i_master_field,i_f,i_Pump,i_Probe
- real(SP) :: W_step,T_treshold,T_step
- real(SP), parameter :: treshold=0.8
- character(schlen) ::pump_defs(3),probe_defs(3)
- !
- if (n_ext_fields<2) return
- !
- i_Probe=1
- i_Pump =2
- !
- call STRING_split(trim(Efield(i_Pump)%ef_name),pump_defs)
- call STRING_split(trim(Efield(i_Probe)%ef_name),probe_defs)
- !
- if ( .not.any((/pump_defs(1)=='SIN',probe_defs(1)=='SIN'/)) ) return
- !
- i_master_field=0
- if ( abs( Efield(i_Pump)%frequency(2) -Efield(i_Pump)%frequency(1) ) > epsilon(1._SP)  )  i_master_field=i_Pump
- if ( abs( Efield(i_Probe)%frequency(2)-Efield(i_Probe)%frequency(1))  >&
-&     abs( Efield(i_Pump)%frequency(2) -Efield(i_Pump)%frequency(1) ) ) i_master_field=i_Probe
- !
- if (i_master_field==0) return
- !
- W_step=Efield(i_master_field)%W_step
- !
- T_step=EtoT(E=W_step)
- T_treshold=treshold*NE_tot_time
- if (T_step > treshold*NE_tot_time) then
-   T_step=T_treshold
-   W_step=EtoT(T=T_step)
- endif
- !
- Efield(i_master_field)%W_step=W_step
- !
- do i_f=1,n_ext_fields
-   !
-   Efield(i_f)%frequency(1)=nint(Efield(i_f)%frequency(1)/W_step)*W_step
-   Efield(i_f)%frequency(2)=nint(Efield(i_f)%frequency(2)/W_step)*W_step
-   !
-   if ( abs(Efield(i_f)%frequency(1)) < 1.E-5 ) Efield(i_f)%frequency(1)=W_step
-   !
-   Efield(i_f)%n_frequencies=nint((Efield(i_f)%frequency(2)-Efield(i_f)%frequency(1))/W_step)+1
-   !
-   if ( Efield(i_f)%n_frequencies == 1) Efield(i_f)%W_step=0.
-   !
- enddo 
- !
-end subroutine RT_Field_Commensurable_Frequencies
diff --git a/src/real_time_initialize/RT_G_lesser_init.F b/src/real_time_initialize/RT_G_lesser_init.F
index 6a0edfd630..7a57f49c10 100644
--- a/src/real_time_initialize/RT_G_lesser_init.F
+++ b/src/real_time_initialize/RT_G_lesser_init.F
@@ -12,26 +12,29 @@ subroutine RT_G_lesser_init( )
  use parallel_m,     ONLY:PAR_IND_Xk_ibz
  use real_time,      ONLY:dG_lesser,G_lesser_reference,RTibz,RT_bands,G_lesser
  use R_lattice,      ONLY:nkibz
+ use electrons,      ONLY:n_sp_pol
  !
  implicit none
  !
  ! Work Space
  !
- integer :: ik,ik_RT,ib
+ integer :: ik,ik_RT,ib,i_sp_pol
  !
  do ib=RT_bands(1),RT_bands(2)
+   do i_sp_pol=1,n_sp_pol
    do ik=1,nkibz
      do ik_RT=RTibz%k_range(ik,1),RTibz%k_range(ik,2)
        !
-       G_lesser_reference(ib,ib,ik_RT) = cmplx(rZERO,RT_el_occ%N_ref(ib,ik_RT),SP)
+       G_lesser_reference(ib,ib,ik_RT,i_sp_pol) = cmplx(rZERO,RT_el_occ%N_ref(ib,ik_RT,i_sp_pol),SP)
        !
        if (.not.PAR_IND_Xk_ibz%element_1D(ik) ) cycle
        !
-       G_lesser(ib,ib,ik_RT,1)  = cmplx(rZERO,RT_el_occ%N(ib,ik_RT),SP)
-       dG_lesser(ib,ib,ik_RT,1) = cmplx(rZERO,RT_el_occ%N(ib,ik_RT)-RT_el_occ%N_ref(ib,ik_RT),SP)
+       G_lesser(ib,ib,ik_RT,i_sp_pol,1)  = cmplx(rZERO,RT_el_occ%N(ib,ik_RT,i_sp_pol),SP)
+       dG_lesser(ib,ib,ik_RT,i_sp_pol,1) = cmplx(rZERO,RT_el_occ%N(ib,ik_RT,i_sp_pol)-RT_el_occ%N_ref(ib,ik_RT,i_sp_pol),SP)
        !
      enddo
    enddo
+  enddo
  enddo
  !
 end subroutine RT_G_lesser_init
diff --git a/src/real_time_initialize/RT_initialize.F b/src/real_time_initialize/RT_initialize.F
index d981ed9b37..8770264100 100644
--- a/src/real_time_initialize/RT_initialize.F
+++ b/src/real_time_initialize/RT_initialize.F
@@ -5,14 +5,18 @@
 !
 ! Authors (see AUTHORS file for details):
 !
-subroutine RT_initialize(E,k,q)
+! headers
+!
+#include<y_memory.h>
+!
+subroutine RT_initialize(E,k,q,Dip)
  !
  use drivers,        ONLY:l_use_collisions,l_elph_scatt,l_elel_scatt,l_sc_hartree,l_use_Hxc_collisions,&
 &                         l_sc_is_libDFT,l_sc_sex,l_sc_fock,l_elphoton_scatt,l_rt_carriers_in_use,l_phel_scatt
 #if defined _PHEL
  use drivers,        ONLY:l_phel_scatt
 #endif
- use pars,           ONLY:SP,pi,schlen
+ use pars,           ONLY:SP,pi,schlen,n_fields_defs_max
  use stderr,         ONLY:STRING_split
  use units,          ONLY:HA2EV,HBAR_eVfs,AU2VMm1,AU2KWCMm2,AUT2FS
  use stderr,         ONLY:intc
@@ -20,6 +24,7 @@ subroutine RT_initialize(E,k,q)
  use QP_CTL_m,       ONLY:QP_ctl_user
  use vec_operate,    ONLY:normalize_v
  use plasma,         ONLY:EH_gas
+ use DIPOLES,        ONLY:DIPOLE_t
  use X_m,            ONLY:global_gauge
  use electrons,      ONLY:levels
  use collision_ext,  ONLY:COLLISIONS_have_HARTREE
@@ -30,13 +35,13 @@ subroutine RT_initialize(E,k,q)
 &                         Life_MEM_steps,l_RT_lifetimes_Tfit,l_RT_skip_ph_abs_lifetimes,l_RT_iterative_ELPH_lifetimes
  use real_time,      ONLY:RT_potential,RT_step,l_NE_with_fields,RT_bands,RT_dyn_step,RT_nk,   &
 &                         NE_steps,NE_tot_time,l_RT_induced_field,Gr_kind,       &
-&                         RAD_LifeTime,Phase_LifeTime,         &
+&                         RAD_LifeTime,Phase_LifeTime,l_length_grad_k,NE_i_last_field,        &
 &                         NE_MEM_treshold,G_MEM_steps,l_RT_uses_E_FineGd,DbGd_EE_Nk_table,    &
 &                         l_RT_impose_N_dN,Integrator_name,l_RT_impose_E,DbGd_EE_percent,     &
 &                         l_RT_update_Energies,eval_HARTREE,eval_DFT,l_NE_dynamics,           &
 &                         NE_time_step_update_jump,NE_initial_time_step_update,NE_i_time,     &
-&                         RAD_magnification,NE_step_update_treshold, &
-&                         NE_time_step_update_jump_INPUT,NE_initial_time_step_update,          &
+&                         RAD_magnification,NE_step_update_treshold,Integrator_nfrac,         &
+&                         NE_time_step_update_jump_INPUT,NE_initial_time_step_update,         &
 &                         RT_is_dynamically_dephased,RT_is_statically_dephased,RT_MAX_step,   &
 &                         eval_energy,eval_entropy,l_RT_is_WFfree,l_update_SOC,    &
 &                         l_RT_eq_is_zero_temp,l_RT_include_eq_scatt,  &
@@ -49,17 +54,19 @@ subroutine RT_initialize(E,k,q)
  use parser_m,       ONLY:parser
  use parallel_m,     ONLY:PAR_IND_Xk_ibz,PAR_COM_Xk_ibz_INDEX,PAR_G_k_range
  use parallel_int,   ONLY:PP_redux_wait
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels), intent(inout) :: E
- type(bz_samp),intent(in)    :: q,k
+ type(bz_samp),   intent(in) :: q,k
+ type(DIPOLE_t),  intent(in) :: Dip
  !
  ! Work space
  !
- integer                  :: i1,N_T_samplings,ik,NK_cpu(PAR_COM_Xk_ibz_INDEX%n_CPU)
- character(schlen)        :: MSG_ch,field_defs(3)
- real(SP)                 :: T_memory
+ integer                  :: i1,i_defs,N_T_samplings,ik,NK_cpu(PAR_COM_Xk_ibz_INDEX%n_CPU)
+ character(schlen)        :: MSG_ch,field_defs(n_fields_defs_max)
+ real(SP)                 :: T_memory,sigma_eff,U_pond
  logical                  :: l_Full_RT_T_evolution,l_skip_it,l_velocity_gauge_tmp
  real(SP), parameter      :: thresh=1.E-5_SP
 #if defined _RT_SCATT
@@ -98,6 +105,7 @@ subroutine RT_initialize(E,k,q)
  call parser('RTzeroTempRef',        l_RT_eq_is_zero_temp)
  call parser('RTEqScatt',            l_RT_include_eq_scatt)
  call parser('RTskipPHabs',          l_RT_skip_ph_abs_lifetimes)
+ call parser('RTgradk',              l_length_grad_k)
  !
  if (.not.l_RT_skip_ph_abs_lifetimes) l_RT_skip_ph_abs_lifetimes=.not.(Bose_Temp>0..or.l_phel_scatt)
  !
@@ -127,6 +135,8 @@ subroutine RT_initialize(E,k,q)
    PAR_G_k_range(2)=RTibz%k_range(PAR_G_k_range(2),2)
  endif
  !
+ if (l_length_grad_k .and. .not.allocated(k%k_table))  call k_build_up_BZ_tables(k)
+ !
  ! Integrator FLAGS
  !==================
  call RT_Integrator_init()
@@ -220,38 +230,51 @@ subroutine RT_initialize(E,k,q)
  !
  l_NE_with_fields=.false.
  !
+ ! Initial check for fields defined from file
+ do i1=1,n_ext_fields
+   field_defs=""
+   Efield(i1)%ef_name=trim(Efield(i1)%ef_name)
+   call STRING_split(Efield(i1)%ef_name,field_defs)
+   if ( field_defs(1)/='FROM_FILE') cycle
+   call RT_load_field_from_file_init(field_defs(2),field_defs(3),Integrator_nfrac)
+ enddo
+ !
+ NE_i_last_field=0
+ !
  do i1=1,n_ext_fields
    !
    Efield(i1)%ef_name=trim(Efield(i1)%ef_name)
    !
    MSG_ch="Field#"//trim(intc(i1))//" "//trim(Efield(i1)%ef_name)
    !
-   if(trim(Efield(i1)%ef_name)/='none') then
+   call STRING_split(Efield(i1)%ef_name,field_defs)
+   !
+   if(trim(field_defs(1))=='none') cycle
      !
-     if (.not.field_is_ok( Efield(i1)) ) call error(trim(MSG_ch)//' not supported or polarization is wrong')
+     if (.not.field_is_ok( Efield(i1) ) ) call error(trim(MSG_ch)//' not supported or polarization is wrong')
      !
      l_NE_with_fields=.true.
      !
-     call STRING_split(Efield(i1)%ef_name,field_defs)
-     !
-     if(field_defs(1)/='DELTA'.and.Efield(i1)%frequency(1)==0._SP) &
+     if( (field_defs(1)/='DELTA'.and.field_defs(1)/='GAUSS'.and.       &
+&         field_defs(1)/='RECT' .and.field_defs(1)/='FROM_FILE')       &
+&       .and. Efield(i1)%frequency==0._SP)                          &
 &      call error(trim(MSG_ch)//" field has zero frequency.")
      !
      if(Efield(i1)%intensity==0._SP) &
 &      call warning(trim(MSG_ch)//" field has zero intensity.")
      !
-     if( all( abs(Efield(i1)%versor(:))<epsilon(1.) ) ) &
+     if( all( abs(Efield(i1)%versor(:))<epsilon(1._SP) ) ) &
 &      call error(trim(MSG_ch)//" field has undefined direction.")
      !
      if ( ((field_defs(1)=='QSSIN') .or. (field_defs(1)=='GAUSS') .or. (field_defs(1)=='PULSE')) &
-&        .and. Efield(i1)%width<epsilon(1.)  ) &
+&        .and. Efield(i1)%width<epsilon(1._SP)  ) &
 &      call error(trim(MSG_ch)//" field has undefined duration.")
      !
      Efield(i1)%versor   =normalize_v(Efield(i1)%versor)
      Efield(i1)%amplitude=Efield_strength(Efield(i1)%intensity,'AU')
-     !
+     ! t_initial_indx cannot be 1, a previous time step is always needed
+     ! see definition of "step_case" in function "theta_function" in src/modules/mod_functions.F
      Efield(i1)%t_initial_indx=max(nint(Efield(i1)%t_initial/RT_step)+1,2)
-     !
      Efield(i1)%t_initial=real(Efield(i1)%t_initial_indx-1)*RT_step
      !
      call Check_symmetries(Efield(i1)%versor,"electric")
@@ -259,16 +282,21 @@ subroutine RT_initialize(E,k,q)
      if ( field_defs(1)=='DELTA' ) then
        Efield(i1)%t_final_indx=Efield(i1)%t_initial_indx+2
        Efield(i1)%t_final=RT_step*(Efield(i1)%t_final_indx-1)
-     else if ( (field_defs(1)=='QSSIN') .or. (field_defs(1)=='QSFIEL') .or. &
-     &         (field_defs(1)=='GAUSS') .or.                                     &
+     else if ( (field_defs(1)=='QSSIN') .or. (field_defs(1)=='QSFIELD') .or. &
+     &         (field_defs(1)=='GAUSS') .or.                                 &
+     &         (field_defs(1)=='RECT')  .or. (field_defs(1)=='RECTSIN') .or. &
      &         (field_defs(1)=='PULSE') .or. (field_defs(1)=='SPULSE')      ) then
+       ! This is a first evaluation of the total fluence
+       ! The time dependent fluence is evaluated during the dynamics
        call RT_Fluence_and_PI_kind(E,i1)
+     else if ( field_defs(1)=='FROM_FILE') then
+       call RT_load_field_from_file(field_defs(2),field_defs(3),i1)
      else
        Efield(i1)%t_final_indx=    NE_steps
        Efield(i1)%t_final=RT_step*(NE_steps-1)
      endif
      !
-   endif
+   NE_i_last_field=max(NE_i_last_field,Efield(i1)%t_final_indx)
    !
  enddo
  !
@@ -276,7 +304,7 @@ subroutine RT_initialize(E,k,q)
  !
  ! Check on the PROBE/PUMP frequencies
  !====================================
- call RT_Field_Commensurable_Frequencies()
+! call RT_Field_Commensurable_Frequencies()
  !
  if ( .not. l_NE_dynamics ) call error('No external fields nor manual pumping provided')
  !
@@ -441,27 +469,25 @@ subroutine RT_initialize(E,k,q)
    call msg( 'nr','---Fields---')
    call msg('r','Gauge                               ',trim(global_gauge))
    !
-   if (any(Efield(:)%W_step>0)) then
-     call msg('r','Pump/Probe periodicity',EtoT(E=maxval(Efield(:)%W_step))*AUT2FS,'[fs]')
-     call msg('r','Pump/Probe energy step',maxval(Efield(:)%W_step)*HA2EV,'[eV]')
-   endif
-   !
    do i1=1,n_ext_fields
      MSG_ch="Field#"//trim(intc(i1))
      call msg('r', trim(MSG_ch)//' field                       ',trim(Efield(i1)%ef_name))
      call msg('r', trim(MSG_ch)//' polarization                ',trim(Efield(i1)%ef_pol))
-     call msg('r', trim(MSG_ch)//' frequency range             ',(/Efield(i1)%frequency*HA2EV,&
+     call msg('r', trim(MSG_ch)//' frequency                   ',(/Efield(i1)%frequency*HA2EV,&
 &                  2._SP*pi*HBAR_evfs/(Efield(i1)%frequency*HA2EV)/),'[eV/fs]')
-     call msg('r', trim(MSG_ch)//' elemental oscillation       ',EtoT(E=Efield(i1)%frequency(2)-&
-&                                                                       Efield(i1)%frequency(1))*AUT2FS,'[fs]')
-     call msg('r', trim(MSG_ch)//' width                       ',Efield(i1)%width*AUT2FS,'[fs]')
+     sigma_eff=sqrt((Efield(i1)%width**4+Efield(i1)%chirp**4)/Efield(i1)%width**2)
+     call msg('r', trim(MSG_ch)//' Fourier limited width       ',Efield(i1)%width*AUT2FS,'[fs]')
+     call msg('r', trim(MSG_ch)//' Real time  width            ',sigma_eff*AUT2FS,'[fs]')
      call msg('r', trim(MSG_ch)//' start time                  ',Efield(i1)%t_initial*AUT2FS,'[fs]')
-     call msg('r', trim(MSG_ch)//' energy steps                ',Efield(i1)%n_frequencies)
      call msg('r', trim(MSG_ch)//' electric field              ',Efield(i1)%amplitude*AU2VMm1,'[V/m]')
      call msg('r', trim(MSG_ch)//' max intensity               ',Efield(i1)%intensity*AU2KWCMm2,'[kW/cm^2]')
      !
+     U_pond=Efield(i1)%amplitude**2/4._SP/Efield(i1)%frequency**2
+     call msg('r', trim(MSG_ch)//' ponderomotive energy        ',U_pond*HA2EV,'[eV/mu]')
+     call msg('r', trim(MSG_ch)//' Adiabatic parameter         ',U_pond/Efield(i1)%frequency,'[1/mu]')
+     !
      if( (field_defs(1)=='QSSIN') .or. (field_defs(1)=='GAUSS') .or. (field_defs(1)=='DELTA') ) then
-       call msg('r', trim(MSG_ch)//' max fluence                 ',Efield(i1)%fluence,'[nJ/cm^2]')
+       call msg('r', trim(MSG_ch)//' max fluence                 ',Efield(i1)%fluence*AU2KWCMm2,'[nJ/cm^2]')
        call msg('r', trim(MSG_ch)//' area at selected Rabi trans.',Efield(i1)%pi_kind )
        call msg('r', trim(MSG_ch)//' final time                  ',Efield(i1)%t_final*AUT2FS,'[fs]')
        call msg('r', trim(MSG_ch)//' half maximum full width     ',Efield(i1)%width*2.*sqrt(2.*log(2.))*AUT2FS,'[fs]')
@@ -479,7 +505,7 @@ subroutine RT_adjust_equilibrium()
      !
      use electrons,          ONLY:spin_occ,n_sp_pol
      !
-     integer :: ik,ik_RT_DbGd,ik_E_fg,ib
+     integer :: i_sp_pol,ik,ik_RT_DbGd,ik_E_fg,ib
      !
      if(.not.l_RT_eq_is_zero_temp) return
      !
@@ -502,30 +528,32 @@ subroutine RT_adjust_equilibrium()
      ! In case RT_apply has been used the reference is
      ! defined from the %fo occupations
      !
-     do ib=RT_bands(1),RT_bands(2)
-       do ik=1,k%nibz
-         do ik_RT_DbGd=RTibz%k_range(ik,1),RTibz%k_range(ik,2)
+     do i_sp_pol=1,n_sp_pol
+     do ik=1,k%nibz
+       do ik_RT_DbGd=RTibz%k_range(ik,1),RTibz%k_range(ik,2)
+         do ib=RT_bands(1),RT_bands(2)
            !
            if (l_RT_uses_E_FineGd) then
              ik_E_fg=RTibz%E_map(ik_RT_DbGd)
-             if(E%FG%E(ib,ik_E_fg,1)<=E%E_Fermi) then
-               E%FG%df(ib,ik_E_fg,1)=(E%FG%f(ib,ik_E_fg,1)-spin_occ)
+             if(E%FG%E(ib,ik_E_fg,i_sp_pol)<=E%E_Fermi) then
+               E%FG%df(ib,ik_E_fg,i_sp_pol)=(E%FG%f(ib,ik_E_fg,i_sp_pol)-spin_occ)
              else
-               E%FG%df(ib,ik_E_fg,1)= E%FG%f(ib,ik_E_fg,1)
+               E%FG%df(ib,ik_E_fg,i_sp_pol)= E%FG%f(ib,ik_E_fg,i_sp_pol)
              endif
            else
-             if(E%E(ib,ik,1)<=E%E_Fermi) then
-               E%df(ib,ik,1)=(E%f(ib,ik,1)-spin_occ)
-               E%fo(ib,ik,1)= spin_occ
+             if(E%E(ib,ik,i_sp_pol)<=E%E_Fermi) then
+               E%df(ib,ik,i_sp_pol)=(E%f(ib,ik,i_sp_pol)-spin_occ)
+               E%fo(ib,ik,i_sp_pol)= spin_occ
              else
-               E%df(ib,ik,1)= E%f(ib,ik,1)
-               E%fo(ib,ik,1)= 0._SP
+               E%df(ib,ik,i_sp_pol)= E%f(ib,ik,i_sp_pol)
+               E%fo(ib,ik,i_sp_pol)= 0._SP
              endif
            endif
            !
          enddo
        enddo
      enddo
+     enddo
      !
    end subroutine RT_adjust_equilibrium
    !
diff --git a/src/real_time_initialize/RT_occupations_and_levels_init.F b/src/real_time_initialize/RT_occupations_and_levels_init.F
index 698ff51556..a8a4c9492d 100644
--- a/src/real_time_initialize/RT_occupations_and_levels_init.F
+++ b/src/real_time_initialize/RT_occupations_and_levels_init.F
@@ -95,8 +95,8 @@ subroutine RT_occupations_and_levels_init(bands,E,k,q,k_FG,E_FG)
        k_FG%pt(ik_RT,:)       = kpt
        !
        ! .. occ
-       RT_el_occ%N_ref(ib,ik_RT) = f_occ_ref(1)
-       RT_ho_occ%N_ref(ib,ik_RT) = spin_occ-f_occ_ref(1)
+       RT_el_occ%N_ref(ib,ik_RT,:) = f_occ_ref(:)
+       RT_ho_occ%N_ref(ib,ik_RT,:) = spin_occ-f_occ_ref(:)
        !
        if (l_RT_relax_time_approx) then
          RT_RTA_E_occ_infty(ib,ik_FG)= Fermi_fnc(  Energy(1)-E%E_CBm(1)-RT_RTA_chem(1) ,RT_RTA_temp(1))*spin_occ
@@ -107,13 +107,13 @@ subroutine RT_occupations_and_levels_init(bands,E,k,q,k_FG,E_FG)
        if (.not.PAR_IND_Xk_ibz%element_1D(ik) ) cycle
 #endif
        !
-       RT_el_occ%dN(ib,ik_RT)   = f_occ(1)-f_occ_ref(1)
-       RT_el_occ%N(ib,ik_RT)    = f_occ(1)
-       RT_ho_occ%N(ib,ik_RT)    = spin_occ-f_occ(1)
+       RT_el_occ%dN(ib,ik_RT,:)   = f_occ(:)-f_occ_ref(:)
+       RT_el_occ%N(ib,ik_RT,:)    = f_occ(:)
+       RT_ho_occ%N(ib,ik_RT,:)    = spin_occ-f_occ(:)
        !
        if (.not.RT_is_dynamically_dephased) cycle
        !
-       RT_life_occ%dN(ib,ik_RT) = f_occ(1)-f_occ_ref(1)
+       RT_life_occ%dN(ib,ik_RT,:) = f_occ(:)-f_occ_ref(:)
        !
      enddo
    enddo
diff --git a/src/real_time_initialize/RT_occupations_update.F b/src/real_time_initialize/RT_occupations_update.F
index 6c1ceb6304..110df9bb56 100644
--- a/src/real_time_initialize/RT_occupations_update.F
+++ b/src/real_time_initialize/RT_occupations_update.F
@@ -11,14 +11,14 @@ subroutine RT_occupations_update( )
  use parallel_int,   ONLY:PP_redux_wait
  use real_time,      ONLY:dG_lesser,G_lesser_reference,RT_bands,RT_is_dynamically_dephased,RTibz
  use RT_occupations, ONLY:RT_el_occ,RT_ho_occ,RT_life_occ,RT_occupation_clean
- use electrons,      ONLY:spin_occ
+ use electrons,      ONLY:spin_occ,n_sp_pol
  use R_lattice,      ONLY:nkibz
  !
  implicit none
  !
  ! Work Space
  !
- integer :: ib,ik,ik_RT
+ integer :: ib,ik,ik_RT,i_sp
  !
  ! El/Ho Occupations are defined from G_lesser and G_lesser_reference
  !===================
@@ -27,25 +27,27 @@ subroutine RT_occupations_update( )
  call RT_occupation_clean(RT_life_occ)
  !
  do ib=RT_bands(1),RT_bands(2)
+   do i_sp=1,n_sp_pol
    do ik=1,nkibz
      do ik_RT=RTibz%k_range(ik,1),RTibz%k_range(ik,2)
        !
-       RT_el_occ%N_ref(ib,ik_RT) =          aimag(G_lesser_reference(ib,ib,ik_RT))
-       RT_ho_occ%N_ref(ib,ik_RT) = spin_occ-aimag(G_lesser_reference(ib,ib,ik_RT))
+       RT_el_occ%N_ref(ib,ik_RT,i_sp) =          aimag(G_lesser_reference(ib,ib,ik_RT,i_sp))
+       RT_ho_occ%N_ref(ib,ik_RT,i_sp) = spin_occ-aimag(G_lesser_reference(ib,ib,ik_RT,i_sp))
        !
        if (.not.PAR_IND_Xk_ibz%element_1D(ik) ) cycle
        !
-       RT_el_occ%dN(ib,ik_RT)= aimag(dG_lesser(ib,ib,ik_RT,1))
+       RT_el_occ%dN(ib,ik_RT,i_sp)= aimag(dG_lesser(ib,ib,ik_RT,i_sp,1))
        !
-       RT_el_occ%N(ib,ik_RT) = RT_el_occ%N_ref(ib,ik_RT)+RT_el_occ%dN(ib,ik_RT)
-       RT_ho_occ%N(ib,ik_RT) = RT_ho_occ%N_ref(ib,ik_RT)-RT_el_occ%dN(ib,ik_RT)
+       RT_el_occ%N(ib,ik_RT,i_sp) = RT_el_occ%N_ref(ib,ik_RT,i_sp)+RT_el_occ%dN(ib,ik_RT,i_sp)
+       RT_ho_occ%N(ib,ik_RT,i_sp) = RT_ho_occ%N_ref(ib,ik_RT,i_sp)-RT_el_occ%dN(ib,ik_RT,i_sp)
        !
        if (.not.RT_is_dynamically_dephased) cycle
        !
-       RT_life_occ%dN(ib,ik_RT) = RT_el_occ%dN(ib,ik_RT)
+       RT_life_occ%dN(ib,ik_RT,i_sp) = RT_el_occ%dN(ib,ik_RT,i_sp)
        !
      enddo
    enddo
+   enddo
  enddo
  !
  call PP_redux_wait(RT_el_occ%N,COMM=PAR_COM_Xk_ibz_INDEX%COMM)
diff --git a/src/real_time_initialize/RT_project.dep b/src/real_time_initialize/RT_project.dep
index 7d13d8b4e4..6ff0d7b761 100644
--- a/src/real_time_initialize/RT_project.dep
+++ b/src/real_time_initialize/RT_project.dep
@@ -1,5 +1,4 @@
  RT_Dephasing_Matrix.o
- RT_Field_Commensurable_Frequencies.o
  RT_G_lesser_init.o
  RT_initialize.o
  RT_occupations_and_levels_init.o
diff --git a/src/real_time_initialize/RT_start_and_restart.F b/src/real_time_initialize/RT_start_and_restart.F
index e9229f2ad2..251b04f884 100644
--- a/src/real_time_initialize/RT_start_and_restart.F
+++ b/src/real_time_initialize/RT_start_and_restart.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM CA
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine RT_start_and_restart(E,k,q)
  !
  ! This subroutine initialize some variables and restart from SC
@@ -50,8 +54,9 @@ subroutine RT_start_and_restart(E,k,q)
  use parallel_m,     ONLY:PAR_G_k_range,PAR_IND_Xk_ibz
  use parallel_int,   ONLY:PP_wait,PP_redux_wait
  use RT_output_m,    ONLY:RT_desc,TIME_STEP_desc,Gr_desc
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)      :: E
  type(bz_samp)     :: k,q
@@ -74,24 +79,26 @@ subroutine RT_start_and_restart(E,k,q)
  ! The Equilibrium Hamiltonian
  !=========================
  H_EQ=cZERO
+ do i_sp_pol=1,n_sp_pol
  do ik=1,nXkibz
    if (.not.PAR_IND_Xk_ibz%element_1D(ik)) cycle
    do ik_RT=RTibz%k_range(ik,1),RTibz%k_range(ik,2)
      do ib=RT_bands(1),RT_bands(2)
        if (ik_RT/=RTibz%k_range(ik,1)) then
          iE_fg=RTibz%E_map(ik_RT)
-         H_EQ(ib,ib,ik_RT,1)=E_reference%FG%E(ib,iE_fg,1)
+         H_EQ(ib,ib,ik_RT,i_sp_pol)=E_reference%FG%E(ib,iE_fg,i_sp_pol)
        else
-         H_EQ(ib,ib,ik_RT,1)=E_reference%E(ib,ik,1)
+         H_EQ(ib,ib,ik_RT,i_sp_pol)=E_reference%E(ib,ik,i_sp_pol)
        endif
      enddo
    enddo
  enddo
+ enddo
  !
  if(.not.l_RT_is_WFfree) then
    !
-   call              el_density_matrix(G_lesser_reference(:,:,PAR_G_k_range(1):PAR_G_k_range(2)),E,k,rho_reference,1)
-   if(n_spin>1) call el_magnetization_matrix(G_lesser_reference(:,:,PAR_G_k_range(1):PAR_G_k_range(2)),E,k,magn_reference,1)
+   call el_density_matrix(G_lesser_reference(:,:,PAR_G_k_range(1):PAR_G_k_range(2),:),E,k,rho_reference,1)
+   if(n_spin>1) call el_magnetization_matrix(G_lesser_reference(:,:,PAR_G_k_range(1):PAR_G_k_range(2),:),E,k,magn_reference,1)
    !
    call Bare_Hamiltonian(E,k,k)
    !
@@ -119,7 +126,7 @@ subroutine RT_start_and_restart(E,k,q)
  ! Perform a first I/O to check which DBs are present.
  !=====================================================
  RESTART=.FALSE.
- do i_db=1,N_RT_databases
+ do i_db=1,N_RT_databases-2
    COM_MODE=NONE
    if (i_db==RT_return_db_ID("ANY_G")) COM_MODE=REP
    call io_control(ACTION=OP_RD_CL,COM=COM_MODE,SEC=(/1/),MODE=VERIFY,ID=RT_DB_ID(i_db))
@@ -171,7 +178,7 @@ subroutine RT_start_and_restart(E,k,q)
    !
    !AMBO_FREE(RT_TIME_status)
    !
-   ! I use G_lesser(:,:,:) to reconstruct everything
+   ! I use G_lesser to reconstruct everything
    ! Note that the databases must be loaded at the RESTART time (NE_time)
    !
    i_db=RT_return_db_ID("G_lesser_RESTART_K_section")
@@ -190,7 +197,7 @@ subroutine RT_start_and_restart(E,k,q)
    ! The G_lesser must be now rebuild from the reference (T=0) and latest dG components
    !
    do i_mem=1,G_MEM_steps
-     G_lesser(:,:,:,i_mem)=G_lesser_reference(:,:,PAR_G_k_range(1):PAR_G_k_range(2))+dG_lesser(:,:,:,i_mem)
+     G_lesser(:,:,:,:,i_mem)=G_lesser_reference(:,:,PAR_G_k_range(1):PAR_G_k_range(2),:)+dG_lesser(:,:,:,:,i_mem)
    enddo
    !
 #if defined _ELPH_ITERATIVE
@@ -250,7 +257,7 @@ subroutine RT_start_and_restart(E,k,q)
  ! Initialize the Vector Potential
  !=================================
  !
- if (l_NE_with_fields) call RT_propagate_fields(E,k,A_tot,A_tot,dG_lesser(:,:,:,1),NE_time-RT_step,RT_step)
+ if (l_NE_with_fields) call RT_propagate_fields(E,k,A_tot,A_tot,dG_lesser(:,:,:,:,1),NE_time-RT_step,RT_step)
  !
  ! Pseudo potential terms needed for velocity gauge
  !==================================================
diff --git a/src/real_time_propagation/RT_Dephasing_step.F b/src/real_time_propagation/RT_Dephasing_step.F
index 6e105c86f2..0c6643005e 100644
--- a/src/real_time_propagation/RT_Dephasing_step.F
+++ b/src/real_time_propagation/RT_Dephasing_step.F
@@ -5,7 +5,7 @@
 !
 ! Authors (see AUTHORS file for details): DS AM
 !
-subroutine RT_Dephasing_step(dG_out,dG_in,ik,deltaT)
+subroutine RT_Dephasing_step(dG_out,dG_in,ik,i_sp,deltaT)
  !
  use pars,           ONLY:SP
  use real_time,      ONLY:RT_is_statically_dephased,RT_Deph_Matrix,RT_deph_range,&
@@ -13,10 +13,10 @@ subroutine RT_Dephasing_step(dG_out,dG_in,ik,deltaT)
  !
  implicit none
  !
- complex(SP), intent(out) :: dG_out(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
- complex(SP), intent(in)  :: dG_in(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
- integer,     intent(in)  :: ik
- real(SP),    intent(in)  :: deltaT
+ complex(SP), intent(inout) :: dG_out(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
+ complex(SP), intent(in)    :: dG_in(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
+ integer,     intent(in)    :: ik,i_sp
+ real(SP),    intent(in)    :: deltaT
  !
  ! Note that dG_in is ONLY the coherent part of the equation. It must be added
  ! to dG_out if we are not using the EXP integrator. This is done here or in RT_apply_RWA
diff --git a/src/real_time_propagation/RT_EULER_step.F b/src/real_time_propagation/RT_EULER_step.F
index 64d7e91957..a07cb3d47c 100644
--- a/src/real_time_propagation/RT_EULER_step.F
+++ b/src/real_time_propagation/RT_EULER_step.F
@@ -5,68 +5,137 @@
 !
 ! Authors (see AUTHORS file for details): CA DS
 !
-subroutine RT_EULER_step(dG_in,dG_out,ik,nbf,deltaT)
+subroutine RT_EULER_step(dG_in,dG_out,ik,i_sp_pol,nbf,dT)
  !
  use pars,           ONLY:SP,cZERO,cI
- use real_time,      ONLY:l_RT_PERT
- use wrapper_omp,    ONLY:M_by_M_omp
- use real_time,      ONLY:Ho_plus_Sigma,RT_bands,RT_nbands, &
-&                         G_lesser_reference
+ use real_time,      ONLY:l_RT_PERT,l_RT_FRES,l_RT_RWA,&
+ &                        H_EQ,H_field,Ho_plus_Sigma,  &
+ &                        G_lesser_reference,RT_bands
  !
  implicit none
  !
- integer,     intent(in)  :: ik,nbf
- real(SP),    intent(in)  :: deltaT
+ integer,     intent(in)  :: ik,i_sp_pol,nbf(2)
+ real(SP),    intent(in)  :: dT
  complex(SP), intent(in)  :: dG_in(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
  complex(SP), intent(out) :: dG_out(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
  !
  ! Workspace
  !
- complex(SP)  :: I1_k(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
- complex(SP)  :: I2_k(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
+ complex(SP)  :: G_tmp(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
  complex(SP)  :: G_k(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
+ complex(SP)  :: H_k(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
  !
  integer      :: ib1,ib2
  !
- ! I1 = Ho_plus_Sigma*G   
- ! I2 = G*Ho_plus_Sigma
+ if (.not. l_RT_pert) then
+   !
+   G_k=dG_in+G_lesser_reference(:,:,ik,i_sp_pol)
+   H_k=Ho_plus_Sigma(:,:,ik,i_sp_pol)
+   call RT_EULER_step_apply(G_k,H_k,dG_out,nbf(i_sp_pol),dT,"def")
+   !
+   return
+   !
+ endif
  !
- if (.not.l_RT_PERT) G_k=dG_in+G_lesser_reference(:,:,ik)
- if (     l_RT_PERT) G_k=      G_lesser_reference(:,:,ik)
+ ! Part of the hamiltonina multiplied by rho0+rho1
  !
- call M_by_M_omp('n','n',RT_nbands,Ho_plus_Sigma(:,:,ik,1),G_k,I1_k)
- call M_by_M_omp('n','n',RT_nbands,G_k,Ho_plus_Sigma(:,:,ik,1),I2_k)
+ H_k=Ho_plus_Sigma(:,:,ik,i_sp_pol)
+ if(.not.l_RT_RWA) H_k=H_k-H_EQ(:,:,ik,i_sp_pol)
+ if(  l_RT_FRES  ) H_k=H_k-H_field(:,:,ik,i_sp_pol)
  !
- ! dG_out = G(T0+dt) -G(T0)= -i dt * ( I1(T0)-I2(T0) )
+ ! First the rho0 term. This gives rho1.
+ !
+ G_k=G_lesser_reference(:,:,ik,i_sp_pol)
+ !
+ call RT_EULER_step_apply(G_k,H_k,dG_out,nbf(i_sp_pol),dT,"def")
  !
- dG_out=-cI*deltaT*(I1_k-I2_k)
+ ! Then the rho1 term. This gives rho2.
  !
- if (.not.l_RT_PERT) return
+ ! The first order rho is zero in the vv' and cc' channel
+ ! Thus I compute the second order rho, constructed using the 
+ ! first order rho from the cv channel
  !
- I1_k=cZERO
- I2_k=cZERO
+ ! N.B. In the case l_RT_FRES the above sentence is not true anymore
+ !      However I still want only the cv channel to enter here
+ !      to avoid energy shifts due to repulsion
+ !
+ ! Take in input only the cv channel
  !
  G_k=cZERO
- do ib1=RT_bands(1),nbf
-   do ib2=nbf+1,RT_bands(2)
+ do ib1=RT_bands(1),nbf(i_sp_pol)
+   do ib2=nbf(i_sp_pol)+1,RT_bands(2)
      G_k(ib1,ib2)=dG_in(ib1,ib2)
      G_k(ib2,ib1)=dG_in(ib2,ib1)
    enddo
  enddo
  !
- call M_by_M_omp('n','n',RT_nbands,Ho_plus_Sigma(:,:,ik,1),G_k,I1_k)
- call M_by_M_omp('n','n',RT_nbands,G_k,Ho_plus_Sigma(:,:,ik,1),I2_k)
+ call RT_EULER_step_apply(G_k,H_k,dG_out,nbf(i_sp_pol),dT,"pop")
  !
- do ib1=RT_bands(1),nbf
-   do ib2=RT_bands(1),nbf
-     dG_out(ib1,ib2)=-cI*deltaT*(I1_k(ib1,ib2)-I2_k(ib1,ib2))
-   enddo
- enddo
+ if( l_RT_RWA .and. (.not.l_RT_FRES) ) return
  !
- do ib1=nbf+1,RT_bands(2)
-   do ib2=nbf+1,RT_bands(2)
-     dG_out(ib1,ib2)=-cI*deltaT*(I1_k(ib1,ib2)-I2_k(ib1,ib2))
-   enddo
- enddo
+ ! Part of the hamiltonian multiplied by the full rho
+ !
+ H_k=cZERO
+ if(.not.l_RT_RWA) H_k=H_EQ(:,:,ik,i_sp_pol)
+ if(  l_RT_FRES  ) H_k=H_k+H_field(:,:,ik,i_sp_pol)
+ G_k=dG_in+G_lesser_reference(:,:,ik,i_sp_pol)
+ !
+ call RT_EULER_step_apply(G_k,H_k,dG_out,nbf(i_sp_pol),dT,"add")
  !
 end subroutine RT_EULER_step
+!
+!
+subroutine RT_EULER_step_apply(G_k,H_k,dG_out,nbf,deltaT,mode)
+ !
+ use pars,           ONLY:SP,cI
+ use wrapper_omp,    ONLY:M_by_M_omp
+ use real_time,      ONLY:RT_bands,RT_nbands
+ !
+ implicit none
+ !
+ character(*),intent(in)  :: mode
+ integer,     intent(in)  :: nbf
+ real(SP),    intent(in)  :: deltaT
+ complex(SP), intent(in)  :: G_k(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
+ complex(SP), intent(in)  :: H_k(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
+ complex(SP), intent(out) :: dG_out(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
+ !
+ ! Workspace
+ !
+ complex(SP)  :: I1_k(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
+ complex(SP)  :: I2_k(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
+ !
+ integer      :: ib1,ib2
+ !
+ ! I1 = Ho_plus_Sigma*G   
+ ! I2 = G*Ho_plus_Sigma
+ !
+ call M_by_M_omp('n','n',RT_nbands,H_k,G_k,I1_k)
+ call M_by_M_omp('n','n',RT_nbands,G_k,H_k,I2_k)
+ !
+ ! dG_out = G(T0+dt) -G(T0)= -i dt * ( I1(T0)-I2(T0) )
+ !
+ if(mode=="def") dG_out=-cI*deltaT*(I1_k-I2_k)
+ !
+ if(mode=="add") dG_out=dG_out-cI*deltaT*(I1_k-I2_k)
+ !
+ if(mode=="pop") then
+   !
+   ! Save in output only the vv' and cc' channels
+   ! This will control the normalization of the residuals
+   !
+   do ib1=RT_bands(1),nbf
+     do ib2=RT_bands(1),nbf
+       dG_out(ib1,ib2)=dG_out(ib1,ib2)-cI*deltaT*(I1_k(ib1,ib2)-I2_k(ib1,ib2))
+     enddo
+   enddo
+   !
+   do ib1=nbf+1,RT_bands(2)
+     do ib2=nbf+1,RT_bands(2)
+       dG_out(ib1,ib2)=dG_out(ib1,ib2)-cI*deltaT*(I1_k(ib1,ib2)-I2_k(ib1,ib2))
+     enddo
+   enddo
+   !
+ endif
+ !
+end subroutine RT_EULER_step_apply
diff --git a/src/real_time_propagation/RT_EXP_step_accurate.F b/src/real_time_propagation/RT_EXP_step_accurate.F
index 0ce62016b9..114abb2244 100644
--- a/src/real_time_propagation/RT_EXP_step_accurate.F
+++ b/src/real_time_propagation/RT_EXP_step_accurate.F
@@ -5,7 +5,7 @@
 !
 ! Authors (see AUTHORS file for details): CA DS
 !
-subroutine RT_EXP_step_accurate(dG_old,dG_inp,dG_out,ik,deltaT)
+subroutine RT_EXP_step_accurate(dG_old,dG_inp,dG_out,ik,i_sp_pol,deltaT)
  !
  ! The EXP integrator is better than the EULER integrator if Ho_plus_Sigma
  ! changes in time more slowly that G
@@ -19,7 +19,7 @@ subroutine RT_EXP_step_accurate(dG_old,dG_inp,dG_out,ik,deltaT)
  !
  implicit none
  !
- integer,     intent(in)  :: ik
+ integer,     intent(in)  :: ik,i_sp_pol
  real(SP),    intent(in)  :: deltaT
  complex(SP), intent(in)  :: dG_old(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
  complex(SP), intent(in)  :: dG_inp(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
@@ -34,12 +34,12 @@ subroutine RT_EXP_step_accurate(dG_old,dG_inp,dG_out,ik,deltaT)
  !
  dG_out=cZERO
  !
- G_comm=dG_inp+G_lesser_reference(:,:,ik)
+ G_comm=dG_inp+G_lesser_reference(:,:,ik,i_sp_pol)
  !
  do i_order=1,Integrator_exp_order
    !
-   call M_by_M_omp('n','n',RT_nbands,-cI*deltaT*Ho_plus_Sigma(:,:,ik,1)/real(i_order,SP),G_comm,U1_k)
-   call M_by_M_omp('n','n',RT_nbands,G_comm,+cI*deltaT*Ho_plus_Sigma(:,:,ik,1)/real(i_order,SP),U2_k)
+   call M_by_M_omp('n','n',RT_nbands,-cI*deltaT*Ho_plus_Sigma(:,:,ik,i_sp_pol)/real(i_order,SP),G_comm,U1_k)
+   call M_by_M_omp('n','n',RT_nbands,G_comm,+cI*deltaT*Ho_plus_Sigma(:,:,ik,i_sp_pol)/real(i_order,SP),U2_k)
    !
    ! dG_out = G(T0+dt) -G(T0)=  U1(T0)+U2(T0) 
    !
diff --git a/src/real_time_propagation/RT_EXP_step_std.F b/src/real_time_propagation/RT_EXP_step_std.F
index 5fe1bbd3d4..286b02cd5a 100644
--- a/src/real_time_propagation/RT_EXP_step_std.F
+++ b/src/real_time_propagation/RT_EXP_step_std.F
@@ -5,7 +5,7 @@
 !
 ! Authors (see AUTHORS file for details): CA DS
 !
-subroutine RT_EXP_step_std(dG_old,dG_inp,dG_out,ik,deltaT)
+subroutine RT_EXP_step_std(dG_old,dG_inp,dG_out,ik,i_sp_pol,deltaT)
  !
  ! The EXP integrator is better than the EULER integrator if Ho_plus_Sigma
  ! changes in time more slowly that G
@@ -17,7 +17,7 @@ subroutine RT_EXP_step_std(dG_old,dG_inp,dG_out,ik,deltaT)
  !
  implicit none
  !
- integer,     intent(in)  :: ik
+ integer,     intent(in)  :: ik,i_sp_pol
  real(SP),    intent(in)  :: deltaT
  complex(SP), intent(in)  :: dG_old(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
  complex(SP), intent(in)  :: dG_inp(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
@@ -47,8 +47,8 @@ subroutine RT_EXP_step_std(dG_old,dG_inp,dG_out,ik,deltaT)
  ! U1 = exp ( -i * Ho_plus_Sigma * dt)
  ! U2 = exp ( +i + Ho_plus_Sigma * dt)
  !
- U1_km1=-cI*deltaT*Ho_plus_Sigma(:,:,ik,1)
- U2_km1=+cI*deltaT*Ho_plus_Sigma(:,:,ik,1)
+ U1_km1=-cI*deltaT*Ho_plus_Sigma(:,:,ik,i_sp_pol)
+ U2_km1=+cI*deltaT*Ho_plus_Sigma(:,:,ik,i_sp_pol)
  !
  if(integrator_exp_order>1) then
    M1_tmp=U2_km1
@@ -82,11 +82,11 @@ subroutine RT_EXP_step_std(dG_old,dG_inp,dG_out,ik,deltaT)
    call M_by_M_omp('n','n',RT_nbands,U1_k,dG_inp,M1_tmp)
    call M_by_M_omp('n','n',RT_nbands,M1_tmp,U2_k,dG_out)
    !
-   call M_by_M_omp('n','n',RT_nbands,U1_km1,G_lesser_reference(:,:,ik),M1_tmp)
-   call M_by_M_omp('n','n',RT_nbands,G_lesser_reference(:,:,ik),U2_km1,M2_tmp)
+   call M_by_M_omp('n','n',RT_nbands,U1_km1,G_lesser_reference(:,:,ik,i_sp_pol),M1_tmp)
+   call M_by_M_omp('n','n',RT_nbands,G_lesser_reference(:,:,ik,i_sp_pol),U2_km1,M2_tmp)
    dG_out=dG_out+(M1_tmp+M2_tmp)
    !
-   call M_by_M_omp('n','n',RT_nbands,U1_km1,G_lesser_reference(:,:,ik),M2_tmp)
+   call M_by_M_omp('n','n',RT_nbands,U1_km1,G_lesser_reference(:,:,ik,i_sp_pol),M2_tmp)
    call M_by_M_omp('n','n',RT_nbands,M2_tmp,U2_km1,M1_tmp)
    dG_out=dG_out+M1_tmp-dG_inp
    !
@@ -94,9 +94,9 @@ subroutine RT_EXP_step_std(dG_old,dG_inp,dG_out,ik,deltaT)
    !
    ! dG_out = G(T0+dt) -G(T0) = U1(T;dt) G(T0) U2(T;-dt) - G(T0)
    !
-   call M_by_M_omp('n','n',RT_nbands,U1_k,dG_inp+G_lesser_reference(:,:,ik),M1_tmp)
+   call M_by_M_omp('n','n',RT_nbands,U1_k,dG_inp+G_lesser_reference(:,:,ik,i_sp_pol),M1_tmp)
    call M_by_M_omp('n','n',RT_nbands,M1_tmp,U2_k,dG_out)
-   dG_out=dG_out-dG_inp-G_lesser_reference(:,:,ik)
+   dG_out=dG_out-dG_inp-G_lesser_reference(:,:,ik,i_sp_pol)
    !
  endif
  !
diff --git a/src/real_time_propagation/RT_Ext_fields.F b/src/real_time_propagation/RT_Ext_fields.F
index ed92e83437..00050a39db 100644
--- a/src/real_time_propagation/RT_Ext_fields.F
+++ b/src/real_time_propagation/RT_Ext_fields.F
@@ -26,10 +26,12 @@ subroutine RT_Ext_fields(A,time,i_f)
  !    the induced current is included in the dynamics ( RT_propagate_fields )
  !
  use pars,           ONLY:SP
- use units,          ONLY:SPEED_OF_LIGHT
+ use units,          ONLY:SPEED_OF_LIGHT,FS2AUT
  use real_time,      ONLY:RT_dyn_step,NE_i_time
  use functions,      ONLY:theta_function
- use fields,         ONLY:Efield,small_a,gauge_field,compute_intensity,compute_envelop,A_vecpot_reset
+ use fields_int,     ONLY:small_a
+ use fields,         ONLY:Efield,gauge_field,A_vecpot_reset,&
+&                         compute_envelop,compute_intensity
  !
  implicit none
  !
@@ -45,7 +47,8 @@ subroutine RT_Ext_fields(A,time,i_f)
  !
  Field_time=time-Efield(i_f)%t_initial
  !
- if (Efield(i_f)%ef_name=="none" .or. Efield(i_f)%t_initial_indx>(NE_i_time+2) ) then
+ if (Efield(i_f)%ef_name=="none" .or. NE_i_time<Efield(i_f)%t_initial_indx-2 .or. &
+&    (index(Efield(i_f)%ef_name,"FROM_FILE")>0 .and. NE_i_time>Efield(i_f)%t_final_indx+2) ) then
    call A_vecpot_reset(A)
    return
  endif
@@ -56,9 +59,9 @@ subroutine RT_Ext_fields(A,time,i_f)
  !
  do ikind=1,2
    !
-   A_of_t=small_a(Field_time,Efield(i_f),0,envelop_only=(ikind==1))
-   E_of_t=small_a(Field_time,Efield(i_f),1,envelop_only=(ikind==1))
-   J_of_t=small_a(Field_time,Efield(i_f),2,envelop_only=(ikind==1))
+   A_of_t=small_a(Field_time,RT_dyn_step,Efield(i_f),0,envelop_only=(ikind==1))
+   E_of_t=small_a(Field_time,RT_dyn_step,Efield(i_f),1,envelop_only=(ikind==1))
+   J_of_t=small_a(Field_time,RT_dyn_step,Efield(i_f),2,envelop_only=(ikind==1))
    !
    select case(Efield(i_f)%ef_pol)
    case("linear")
diff --git a/src/real_time_propagation/RT_Glesser_evolve.F b/src/real_time_propagation/RT_Glesser_evolve.F
index c25d78ec21..ab7d0343a5 100644
--- a/src/real_time_propagation/RT_Glesser_evolve.F
+++ b/src/real_time_propagation/RT_Glesser_evolve.F
@@ -14,7 +14,7 @@ subroutine RT_Glesser_evolve(En,kpt,dG_old,dG_in,dG_out,dT,dTp)
  !  T0+dt = T+dt'
  !
  use pars,           ONLY:SP,cZERO
- use electrons,      ONLY:levels,spin_occ
+ use electrons,      ONLY:levels,spin_occ,n_sp_pol
  use R_lattice,      ONLY:bz_samp
  use parallel_m,     ONLY:PAR_IND_Xk_ibz,PAR_G_k_range
  use real_time,      ONLY:l_RT_EULER,l_RT_EXP,l_RT_INV, &
@@ -27,31 +27,32 @@ subroutine RT_Glesser_evolve(En,kpt,dG_old,dG_in,dG_out,dT,dTp)
  type(levels),   intent(in) :: En
  !
  real(SP),       intent(in) :: dT,dTp
- complex(SP),    intent(in) :: dG_old(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2))
- complex(SP),    intent(in) :: dG_in(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2))
- complex(SP),   intent(out) :: dG_out(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2))
+ complex(SP),    intent(in) :: dG_old(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2),n_sp_pol)
+ complex(SP),    intent(in) :: dG_in(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2),n_sp_pol)
+ complex(SP),   intent(out) :: dG_out(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2),n_sp_pol)
  !
  ! Work space
  !
  logical :: l_RT_STD
- integer :: ik
+ integer :: ik,i_sp
  !
  dG_out=cZERO
  !
  l_RT_STD=.not.(l_RT_ACC.or.l_RT_DIAG)
  !
+ do i_sp=1,n_sp_pol
  do ik=1,RT_nk
    !
    if( .not.PAR_IND_Xk_ibz%element_1D(RTibz%k_map(ik)) ) cycle
    !
-   if (l_RT_EXP.and.l_RT_ACC ) call RT_EXP_step_accurate(dG_old(:,:,ik),dG_in(:,:,ik),dG_out(:,:,ik),ik,dT)
-   if (l_RT_EXP.and.l_RT_STD ) call RT_EXP_step_std(dG_old(:,:,ik),dG_in(:,:,ik),dG_out(:,:,ik),ik,dT)
+   if (l_RT_EXP.and.l_RT_ACC ) call RT_EXP_step_accurate(dG_old(:,:,ik,i_sp),dG_in(:,:,ik,i_sp),dG_out(:,:,ik,i_sp),ik,i_sp,dT)
+   if (l_RT_EXP.and.l_RT_STD ) call RT_EXP_step_std(dG_old(:,:,ik,i_sp),dG_in(:,:,ik,i_sp),dG_out(:,:,ik,i_sp),ik,i_sp,dT)
    !
-   if (l_RT_EULER            ) call RT_EULER_step(dG_in(:,:,ik),dG_out(:,:,ik),ik,En%nbf,dT)
+   if (l_RT_EULER            ) call RT_EULER_step(dG_in(:,:,ik,i_sp),dG_out(:,:,ik,i_sp),ik,i_sp,En%nbf,dT)
    !
-   if (l_RT_INV.and.l_RT_ACC ) call RT_INV_step_accurate(dG_old(:,:,ik),dG_in(:,:,ik),dG_out(:,:,ik),ik,dT)
-   if (l_RT_INV.and.l_RT_DIAG) call RT_INV_step_diago(dG_old(:,:,ik),dG_in(:,:,ik),dG_out(:,:,ik),ik,dT)
-   if (l_RT_INV.and.l_RT_STD ) call RT_INV_step_std(dG_old(:,:,ik),dG_in(:,:,ik),dG_out(:,:,ik),ik,dT)
+   if (l_RT_INV.and.l_RT_ACC ) call RT_INV_step_accurate(dG_old(:,:,ik,i_sp),dG_in(:,:,ik,i_sp),dG_out(:,:,ik,i_sp),ik,i_sp,dT)
+   if (l_RT_INV.and.l_RT_DIAG) call RT_INV_step_diago(dG_old(:,:,ik,i_sp),dG_in(:,:,ik,i_sp),dG_out(:,:,ik,i_sp),ik,i_sp,dT)
+   if (l_RT_INV.and.l_RT_STD ) call RT_INV_step_std(dG_old(:,:,ik,i_sp),dG_in(:,:,ik,i_sp),dG_out(:,:,ik,i_sp),ik,i_sp,dT)
    !
    ! Here the term G(T0) [dG_old] is added to dG(t)
    !
@@ -59,25 +60,26 @@ subroutine RT_Glesser_evolve(En,kpt,dG_old,dG_in,dG_out,dT,dTp)
    !
    ! Dephasing is integrated analitically in this case.
    !
-   if(     l_RT_RWA)           call RT_apply_RWA(ik,En,kpt,dT,dTp,dG_out(:,:,ik),dG_old(:,:,ik))
+   if(     l_RT_RWA)           call RT_apply_RWA(ik,i_sp,En,kpt,dT,dTp,dG_out(:,:,ik,i_sp),dG_old(:,:,ik,i_sp))
    !
-   if(.not.l_RT_RWA)           call RT_Dephasing_step(dG_out(:,:,ik),dG_old(:,:,ik),ik,dT)
+   if(.not.l_RT_RWA)           call RT_Dephasing_step(dG_out(:,:,ik,i_sp),dG_old(:,:,ik,i_sp),ik,i_sp,dT)
    !
    ! Carriers relaxation step:
    !
    ! G(T0+dt) = G(T0) - i dt * Sigma_relax(T)
    !
-   call RT_Scattering_step(dG_old(:,:,ik),dG_in(:,:,ik),dG_out(:,:,ik),ik,dT)
+   call RT_Scattering_step(dG_old(:,:,ik,i_sp),dG_in(:,:,ik,i_sp),dG_out(:,:,ik,i_sp),ik,i_sp,dT)
    !
-   call RT_G_symmetrization(dG_out(:,:,ik),ik)
+   call RT_G_symmetrization(dG_out(:,:,ik,i_sp),ik,i_sp)
    !
  enddo
+ enddo
  !
  contains
    !
-   subroutine RT_G_symmetrization(G_inout,ik)
+   subroutine RT_G_symmetrization(G_inout,ik,i_sp)
      !
-     integer,     intent(in)    :: ik
+     integer,     intent(in)    :: ik,i_sp
      complex(SP), intent(inout) :: G_inout(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
      !
      integer                :: ib,ibp
@@ -88,11 +90,11 @@ subroutine RT_G_symmetrization(G_inout,ik)
        !
        G_inout(ib,ib)=cmplx(0._SP,aimag(G_inout(ib,ib)))
        !
-       E_occ_tmp= aimag(G_inout(ib,ib))+            aimag(G_lesser_reference(ib,ib,ik))
-       H_occ_tmp=-aimag(G_inout(ib,ib))+ ( spin_occ-aimag(G_lesser_reference(ib,ib,ik)) )
+       E_occ_tmp= aimag(G_inout(ib,ib))+            aimag(G_lesser_reference(ib,ib,ik,i_sp))
+       H_occ_tmp=-aimag(G_inout(ib,ib))+ ( spin_occ-aimag(G_lesser_reference(ib,ib,ik,i_sp)) )
        !
-       if ( E_occ_tmp<0._SP .or. H_occ_tmp>spin_occ ) G_inout(ib,ib)=                      G_lesser_reference(ib,ib,ik)
-       if ( H_occ_tmp<0._SP .or. E_occ_tmp>spin_occ ) G_inout(ib,ib)=cmplx(0._SP,spin_occ)-G_lesser_reference(ib,ib,ik)
+       if ( E_occ_tmp<0._SP .or. H_occ_tmp>spin_occ ) G_inout(ib,ib)=                      G_lesser_reference(ib,ib,ik,i_sp)
+       if ( H_occ_tmp<0._SP .or. E_occ_tmp>spin_occ ) G_inout(ib,ib)=cmplx(0._SP,spin_occ)-G_lesser_reference(ib,ib,ik,i_sp)
        !
        do ibp=ib+1,RT_bands(2)
          !
diff --git a/src/real_time_propagation/RT_INV_step_accurate.F b/src/real_time_propagation/RT_INV_step_accurate.F
index 994d317f40..39b33bc326 100644
--- a/src/real_time_propagation/RT_INV_step_accurate.F
+++ b/src/real_time_propagation/RT_INV_step_accurate.F
@@ -5,7 +5,7 @@
 !
 ! Authors (see AUTHORS file for details): CA DS
 !
-subroutine RT_INV_step_accurate(dG_old,dG_inp,dG_out,ik,deltaT)
+subroutine RT_INV_step_accurate(dG_old,dG_inp,dG_out,ik,i_sp_pol,deltaT)
  !
  ! The last term in the following come from the fact that G0=G_reference
  ! is not included in G(t) nor in G(t+dt)
@@ -31,7 +31,7 @@ subroutine RT_INV_step_accurate(dG_old,dG_inp,dG_out,ik,deltaT)
  !
  implicit none
  !
- integer,     intent(in)  :: ik
+ integer,     intent(in)  :: ik,i_sp_pol
  real(SP),    intent(in)  :: deltaT
  complex(SP), intent(in)  :: dG_old(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
  complex(SP), intent(in)  :: dG_inp(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
@@ -49,22 +49,22 @@ subroutine RT_INV_step_accurate(dG_old,dG_inp,dG_out,ik,deltaT)
  !
  complex(SP), pointer :: vec_dG_out(:)
  !
- H1_k=-cI*2*deltaT*Ho_plus_Sigma(:,:,ik,1)
- H2_k=+cI*2*deltaT*Ho_plus_Sigma(:,:,ik,1)
+ H1_k=-cI*2*deltaT*Ho_plus_Sigma(:,:,ik,i_sp_pol)
+ H2_k=+cI*2*deltaT*Ho_plus_Sigma(:,:,ik,i_sp_pol)
  !
- call M_by_M_omp('n','n',RT_nbands,H1_k,G_lesser_reference(:,:,ik),M1_tmp)
- call M_by_M_omp('n','n',RT_nbands,G_lesser_reference(:,:,ik),H2_k,M2_tmp)
+ call M_by_M_omp('n','n',RT_nbands,H1_k,G_lesser_reference(:,:,ik,i_sp_pol),M1_tmp)
+ call M_by_M_omp('n','n',RT_nbands,G_lesser_reference(:,:,ik,i_sp_pol),H2_k,M2_tmp)
  G_com=M1_tmp+M2_tmp
  !
- H1_k=I1_matrix-cI*deltaT*Ho_plus_Sigma(:,:,ik,1)
- H2_k=I1_matrix+cI*deltaT*Ho_plus_Sigma(:,:,ik,1)
+ H1_k=I1_matrix-cI*deltaT*Ho_plus_Sigma(:,:,ik,i_sp_pol)
+ H2_k=I1_matrix+cI*deltaT*Ho_plus_Sigma(:,:,ik,i_sp_pol)
  !
  call M_by_M_omp('n','n',RT_nbands,H1_k,dG_old,M1_tmp)
  call M_by_M_omp('n','n',RT_nbands,dG_old,H2_k,M2_tmp)
  dG_out=G_com+M1_tmp+M2_tmp
  !
  !*******************************************************
- !U_k=Ho_plus_Sigma(:,:,ik,1)
+ !U_k=Ho_plus_Sigma(:,:,ik,i_sp_pol)
  !call SERIAL_HERMITIAN_diagonalization(RT_nbands,U_k,En)
  !!
  !! Generate the matrix  
diff --git a/src/real_time_propagation/RT_INV_step_diago.F b/src/real_time_propagation/RT_INV_step_diago.F
index f940851435..0e89948fcd 100644
--- a/src/real_time_propagation/RT_INV_step_diago.F
+++ b/src/real_time_propagation/RT_INV_step_diago.F
@@ -5,7 +5,7 @@
 !
 ! Authors (see AUTHORS file for details): CA DS
 !
-subroutine RT_INV_step_diago(dG_old,dG_inp,dG_out,ik,deltaT)
+subroutine RT_INV_step_diago(dG_old,dG_inp,dG_out,ik,i_sp_pol,deltaT)
  !
  ! G(T+dt)-G(T)=-idt/2[H,G(T+dt)+G(T)]-idt[H,G0]
  !
@@ -27,7 +27,7 @@ subroutine RT_INV_step_diago(dG_old,dG_inp,dG_out,ik,deltaT)
  !
  implicit none
  !
- integer,     intent(in)  :: ik
+ integer,     intent(in)  :: ik,i_sp_pol
  real(SP),    intent(in)  :: deltaT
  complex(SP), intent(in)  :: dG_old(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
  complex(SP), intent(in)  :: dG_inp(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
@@ -46,15 +46,15 @@ subroutine RT_INV_step_diago(dG_old,dG_inp,dG_out,ik,deltaT)
  !
  logical     :: l_use_superindex=.true.
  !
- H1_k=-cI*2*deltaT*Ho_plus_Sigma(:,:,ik,1)
- H2_k=+cI*2*deltaT*Ho_plus_Sigma(:,:,ik,1)
+ H1_k=-cI*2*deltaT*Ho_plus_Sigma(:,:,ik,i_sp_pol)
+ H2_k=+cI*2*deltaT*Ho_plus_Sigma(:,:,ik,i_sp_pol)
  !
- call M_by_M_omp('n','n',RT_nbands,H1_k,G_lesser_reference(:,:,ik),M1_tmp)
- call M_by_M_omp('n','n',RT_nbands,G_lesser_reference(:,:,ik),H2_k,M2_tmp)
+ call M_by_M_omp('n','n',RT_nbands,H1_k,G_lesser_reference(:,:,ik,i_sp_pol),M1_tmp)
+ call M_by_M_omp('n','n',RT_nbands,G_lesser_reference(:,:,ik,i_sp_pol),H2_k,M2_tmp)
  G_com=M1_tmp+M2_tmp
  !
- H1_k=I1_matrix-cI*deltaT*Ho_plus_Sigma(:,:,ik,1)
- H2_k=I1_matrix+cI*deltaT*Ho_plus_Sigma(:,:,ik,1)
+ H1_k=I1_matrix-cI*deltaT*Ho_plus_Sigma(:,:,ik,i_sp_pol)
+ H2_k=I1_matrix+cI*deltaT*Ho_plus_Sigma(:,:,ik,i_sp_pol)
  !
  call M_by_M_omp('n','n',RT_nbands,H1_k,dG_old,M1_tmp)
  call M_by_M_omp('n','n',RT_nbands,dG_old,H2_k,M2_tmp)
@@ -62,12 +62,12 @@ subroutine RT_INV_step_diago(dG_old,dG_inp,dG_out,ik,deltaT)
  !
  ! Diagonalize U_k
  !
- U_k=Ho_plus_Sigma(:,:,ik,1)
+ U_k=Ho_plus_Sigma(:,:,ik,i_sp_pol)
  call SERIAL_HERMITIAN_diagonalization(RT_nbands,U_k,En)
  !
  ! Implementation works, and it is much faster than before
  ! However it is not sensitive
- !U_k=cmplx(Ho_plus_Sigma(:,:,ik,1),kind=DP)
+ !U_k=cmplx(Ho_plus_Sigma(:,:,ik,i_sp_pol),kind=DP)
  !call ZHEEV('V','U',RT_nbands,U_k,RT_nbands,En,v_cmplx,lwork,v_real,i_fail)
  !
  ! Rotate G_com in the new basis set
diff --git a/src/real_time_propagation/RT_INV_step_std.F b/src/real_time_propagation/RT_INV_step_std.F
index 9eca8f06f6..921055bf13 100644
--- a/src/real_time_propagation/RT_INV_step_std.F
+++ b/src/real_time_propagation/RT_INV_step_std.F
@@ -5,7 +5,7 @@
 !
 ! Authors (see AUTHORS file for details): CA DS
 !
-subroutine RT_INV_step_std(dG_old,dG_inp,dG_out,ik,deltaT)
+subroutine RT_INV_step_std(dG_old,dG_inp,dG_out,ik,i_sp_pol,deltaT)
  !
  use pars,           ONLY:SP,cI
  use wrapper_omp,    ONLY:M_by_M_omp
@@ -16,7 +16,7 @@ subroutine RT_INV_step_std(dG_old,dG_inp,dG_out,ik,deltaT)
  !
  implicit none
  !
- integer,     intent(in)  :: ik
+ integer,     intent(in)  :: ik,i_sp_pol
  real(SP),    intent(in)  :: deltaT
  complex(SP), intent(in)  :: dG_old(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
  complex(SP), intent(in)  :: dG_inp(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
@@ -40,7 +40,7 @@ subroutine RT_INV_step_std(dG_old,dG_inp,dG_out,ik,deltaT)
  !
  ! Taylor expansion
  !
- H_k=cI*deltaT/2._SP*Ho_plus_Sigma(:,:,ik,1)
+ H_k=cI*deltaT/2._SP*Ho_plus_Sigma(:,:,ik,i_sp_pol)
  !
  if(l_use_lin_sys) then
    !
@@ -95,11 +95,11 @@ subroutine RT_INV_step_std(dG_old,dG_inp,dG_out,ik,deltaT)
    call M_by_M_omp('n','n',RT_nbands,T1_k,dG_old,M1_tmp)
    call M_by_M_omp('n','n',RT_nbands,M1_tmp,T2_k,dG_out)
    !
-   call M_by_M_omp('n','n',RT_nbands,T1_km1,G_lesser_reference(:,:,ik),M1_tmp)
-   call M_by_M_omp('n','n',RT_nbands,G_lesser_reference(:,:,ik),T2_km1,M2_tmp)
+   call M_by_M_omp('n','n',RT_nbands,T1_km1,G_lesser_reference(:,:,ik,i_sp_pol),M1_tmp)
+   call M_by_M_omp('n','n',RT_nbands,G_lesser_reference(:,:,ik,i_sp_pol),T2_km1,M2_tmp)
    dG_out=dG_out+(M1_tmp+M2_tmp)
    !
-   call M_by_M_omp('n','n',RT_nbands,T1_km1,G_lesser_reference(:,:,ik),M2_tmp)
+   call M_by_M_omp('n','n',RT_nbands,T1_km1,G_lesser_reference(:,:,ik,i_sp_pol),M2_tmp)
    call M_by_M_omp('n','n',RT_nbands,M2_tmp,T2_km1,M1_tmp)
    dG_out=dG_out+M1_tmp-dG_old
    !
@@ -107,9 +107,9 @@ subroutine RT_INV_step_std(dG_old,dG_inp,dG_out,ik,deltaT)
    !
    ! dG_out = G(T0+dt) -G(T0) = U1(T;dt) G(T0) U2(T;-dt) - G(T0)
    !
-   call M_by_M_omp('n','n',RT_nbands,T1_k,dG_old+G_lesser_reference(:,:,ik),M1_tmp)
+   call M_by_M_omp('n','n',RT_nbands,T1_k,dG_old+G_lesser_reference(:,:,ik,i_sp_pol),M1_tmp)
    call M_by_M_omp('n','n',RT_nbands,M1_tmp,T2_k,dG_out)
-   dG_out=dG_out-dG_old-G_lesser_reference(:,:,ik)
+   dG_out=dG_out-dG_old-G_lesser_reference(:,:,ik,i_sp_pol)
    !
  endif
  !
diff --git a/src/real_time_propagation/RT_IO_type_time_steps.F b/src/real_time_propagation/RT_IO_type_time_steps.F
index 524e07a1cf..f897aaa165 100644
--- a/src/real_time_propagation/RT_IO_type_time_steps.F
+++ b/src/real_time_propagation/RT_IO_type_time_steps.F
@@ -11,7 +11,7 @@ subroutine RT_IO_type_time_steps(what)
  use RT_control,     ONLY:CARR_RT_IO_t,OBS_RT_IO_t,RT_IO_t,Gless_RESTART_RT_IO_t,OUTPUT_RT_IO_t,&
 &                         SETUP_RT_IO_type,CACHE_OBS_INTERVAL_time_INPUT,TIME_adjust,&
 &                         CACHE_OBS_INTERVAL_time,CACHE_OBS_steps,RT_control_free,&
-&                         RT_control_alloc,Gless_RT_IO_t,SAVE_G_history
+&                         RT_control_alloc,Vbands_RT_IO_T,Gless_RT_IO_t,SAVE_G_history
  ! 
  implicit none
  !
@@ -22,6 +22,7 @@ subroutine RT_IO_type_time_steps(what)
    call SETUP_RT_IO_type(CARR_RT_IO_t,who="CARR",what=what)
    call SETUP_RT_IO_type(OUTPUT_RT_IO_t,who="OUTPUT",what=what)
    call SETUP_RT_IO_type(Gless_RESTART_RT_IO_t,who="GF",what=what)
+   call SETUP_RT_IO_type(Vbands_RT_IO_t,who="Vb",what=what)
    call SETUP_RT_IO_type(OBS_RT_IO_t,who="OBS",what=what)
    if (SAVE_G_history) call SETUP_RT_IO_type(Gless_RT_IO_t,who="GF_history",what=what)
    !
diff --git a/src/real_time_propagation/RT_Integrator.F b/src/real_time_propagation/RT_Integrator.F
index 02d7370f43..d9ea54607e 100644
--- a/src/real_time_propagation/RT_Integrator.F
+++ b/src/real_time_propagation/RT_Integrator.F
@@ -35,7 +35,7 @@ subroutine RT_Integrator(G_new,dG_new,dG_old,A_new,A_old,E,k,q)
  !                G(t),H[G(t)],dt/2        --> G1(t+dt/2)
  !                G(t),H[G1(t+dt/2)],dt/2  --> G2(t+dt/2)
  !                G(t),H[G2(t+dt/2)),dt    --> G3(t+dt)
- !                G(t),H[G3(t+dt)),dtcccc  --> G4(t+dt)
+ !                G(t),H[G3(t+dt)),dt      --> G4(t+dt)
  !                      G(t+dt)=G1(t+dt)/6+G2(t+dt)/3+G3(t+dt)/3+G2(t+dt)/6
  !
  !  In addition the IP oscillations and dephasing are "analytically integrated out"
@@ -47,10 +47,10 @@ subroutine RT_Integrator(G_new,dG_new,dG_old,A_new,A_old,E,k,q)
  use R_lattice,      ONLY:bz_samp
  use RT_control,     ONLY:TIME_adjust
  use real_time,      ONLY:Integrator_step,RT_nk,RT_bands,a_tableau,b_tableau,&
-&                         c_tableau,RWA_table,Integrator_nsteps,G_lesser_reference,&
+&                         c_tableau,RWA_table,Int_Ns=>Integrator_nsteps,G_lesser_reference,&
 &                         NE_time,NE_i_time,RT_step,RT_dyn_step
  use fields,         ONLY:gauge_field
- use electrons,      ONLY:levels
+ use electrons,      ONLY:levels,n_sp_pol
  use timing_m,       ONLY:timing
  use parallel_m,     ONLY:PAR_G_k_range
  !
@@ -58,9 +58,9 @@ subroutine RT_Integrator(G_new,dG_new,dG_old,A_new,A_old,E,k,q)
  !
  type(bz_samp),        intent(in) :: k,q
  type(levels),      intent(inout) :: E
- complex(SP),       intent(inout) :: G_new(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2))
- complex(SP),          intent(in) :: dG_old(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2))
- complex(SP),         intent(out) :: dG_new(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2))
+ complex(SP), intent(inout) :: G_new(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2),n_sp_pol)
+ complex(SP),    intent(in) :: dG_old(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2),n_sp_pol)
+ complex(SP),   intent(out) :: dG_new(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2),n_sp_pol)
  type(gauge_field),    intent(in) :: A_old
  type(gauge_field),   intent(out) :: A_new
  !
@@ -68,18 +68,18 @@ subroutine RT_Integrator(G_new,dG_new,dG_old,A_new,A_old,E,k,q)
  !
  integer     :: i_step
  real(SP)    :: delta_T1,delta_T2,delta_T3
- complex(SP) :: dG_tmp(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2),Integrator_nsteps+1)
+ complex(SP) :: dG_tmp(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2),n_sp_pol,Int_Ns+1)
  !
- type(gauge_field) :: A_tmp(Integrator_nsteps+1)
+ type(gauge_field) :: A_tmp(Int_Ns+1)
  !
  call timing('RT integrator',OPR='start')
  !
- dG_tmp(:,:,:,1)=dG_old
- A_tmp(1)       =A_old
+ dG_tmp(:,:,:,:,1)=dG_old
+ A_tmp(1)         =A_old
  !
  Integrator_step=1
  !
- do i_step=1,Integrator_nsteps
+ do i_step=1,Int_Ns
    !
    if (a_tableau(i_step)==0._SP) cycle 
    !  
@@ -89,7 +89,7 @@ subroutine RT_Integrator(G_new,dG_new,dG_old,A_new,A_old,E,k,q)
    !
    ! Evaluate @ T+dt1
    !
-   call RT_Glesser_evolve(E,k,dG_old,dG_tmp(:,:,:,i_step-1),dG_tmp(:,:,:,i_step),delta_T1,delta_T3)
+   call RT_Glesser_evolve(E,k,dG_old,dG_tmp(:,:,:,:,i_step-1),dG_tmp(:,:,:,:,i_step),delta_T1,delta_T3)
    !
    ! ... Update of logicals before the occupations update
    !
@@ -97,11 +97,11 @@ subroutine RT_Integrator(G_new,dG_new,dG_old,A_new,A_old,E,k,q)
    call RT_relaxation_logicals( )
 #endif
    !
-   call RT_occupations_eval(dG_tmp(:,:,:,i_step))
+   call RT_occupations_eval(dG_tmp(:,:,:,:,i_step))
    !
    ! Fields @ T+dt2
    !
-   call RT_propagate_fields(E,k,A_old,A_tmp(i_step),dG_tmp(:,:,:,i_step),NE_time,delta_T2)
+   call RT_propagate_fields(E,k,A_old,A_tmp(i_step),dG_tmp(:,:,:,:,i_step),NE_time,delta_T2)
    !
    Integrator_step=Integrator_step+1
    !
@@ -109,7 +109,7 @@ subroutine RT_Integrator(G_new,dG_new,dG_old,A_new,A_old,E,k,q)
    !
    ! Recalculate H and S using G(t+dt1) 
    !
-   call RT_Hamiltonian(dG_tmp(:,:,:,i_step),A_tmp(i_step),E,k)             ! The time specifications here is useless
+   call RT_Hamiltonian(dG_tmp(:,:,:,:,i_step),A_tmp(i_step),E,k)             ! The time specifications here is useless
    !
 #if defined _RT_SCATT
    call RT_relaxation(E,k,q,NE_time+delta_T1)                                 ! since nothing explicitely depends on time
@@ -121,8 +121,8 @@ subroutine RT_Integrator(G_new,dG_new,dG_old,A_new,A_old,E,k,q)
  !
  ! final step @ T+dt
  !
- call RT_Glesser_evolve(E,k,dG_old,dG_tmp(:,:,:,Integrator_nsteps),dG_tmp(:,:,:,Integrator_nsteps+1), &
-&                                               RT_dyn_step,RT_dyn_step*RWA_table(Integrator_nsteps+1))
+ call RT_Glesser_evolve(E,k,dG_old,dG_tmp(:,:,:,:,Int_Ns),dG_tmp(:,:,:,:,Int_Ns+1), &
+&                                        RT_dyn_step,RT_dyn_step*RWA_table(Int_Ns+1))
  !
  call RT_build_dG_new()
  !
@@ -136,7 +136,7 @@ subroutine RT_Integrator(G_new,dG_new,dG_old,A_new,A_old,E,k,q)
  !
  ! Fields @ T+dT
  !
- call RT_propagate_fields(E,k,A_old,A_tmp(Integrator_nsteps+1),dG_new,NE_time,RT_dyn_step)
+ call RT_propagate_fields(E,k,A_old,A_tmp(Int_Ns+1),dG_new,NE_time,RT_dyn_step)
  !
  call RT_build_A_new()
  !
@@ -156,50 +156,53 @@ subroutine RT_build_dG_new()
      !
      use parallel_m,     ONLY:PAR_IND_Xk_ibz
      use real_time,      ONLY:RTibz
+     use electrons,      ONLY:n_sp_pol
      !
      ! Now I remove dG_old from dG_tmp and rescale to a_tableau
      !
-     integer  :: ik
+     integer  :: ik,i_sp
      !
+     do i_sp=1,n_sp_pol
      do ik=1,RT_nk
        !
        if (.not.PAR_IND_Xk_ibz%element_1D(RTibz%k_map(ik)) ) cycle
        !
-       do i_step=1,Integrator_nsteps
+       do i_step=1,Int_Ns
          if (a_tableau(i_step)==0._SP) cycle 
-         dG_tmp(:,:,ik,i_step)=(dG_tmp(:,:,ik,i_step)-dG_old(:,:,ik))/a_tableau(i_step)
+         dG_tmp(:,:,ik,i_sp,i_step)=(dG_tmp(:,:,ik,i_sp,i_step)-dG_old(:,:,ik,i_sp))/a_tableau(i_step)
        enddo
-       dG_tmp(:,:,ik,Integrator_nsteps+1)=dG_tmp(:,:,ik,Integrator_nsteps+1)-dG_old(:,:,ik)
+       dG_tmp(:,:,ik,i_sp,Int_Ns+1)=dG_tmp(:,:,ik,i_sp,Int_Ns+1)-dG_old(:,:,ik,i_sp)
        !
        ! Finally I average using b_tableau
        !
-       dG_new(:,:,ik)=dG_old(:,:,ik)
-       do i_step=1,Integrator_nsteps
-         dG_new(:,:,ik)=dG_new(:,:,ik)+dG_tmp(:,:,ik,i_step+1)*b_tableau(i_step)
+       dG_new(:,:,ik,i_sp)=dG_old(:,:,ik,i_sp)
+       do i_step=1,Int_Ns
+         dG_new(:,:,ik,i_sp)=dG_new(:,:,ik,i_sp)+dG_tmp(:,:,ik,i_sp,i_step+1)*b_tableau(i_step)
        enddo
        !
      enddo
+     enddo
      !
-     G_new=G_lesser_reference(:,:,PAR_G_k_range(1):PAR_G_k_range(2))+dG_new
+     G_new=G_lesser_reference(:,:,PAR_G_k_range(1):PAR_G_k_range(2),:)+dG_new
      !
    end subroutine RT_build_dG_new
    !
    subroutine RT_build_A_new()
      !
-     do i_step=1,Integrator_nsteps
+     do i_step=1,Int_Ns
        if (a_tableau(i_step)==0._SP) cycle 
        A_tmp(i_step)%vecpot    =(A_tmp(i_step)%vecpot    -A_old%vecpot)/a_tableau(i_step)
        A_tmp(i_step)%vecpot_vel=(A_tmp(i_step)%vecpot_vel-A_old%vecpot_vel)/a_tableau(i_step)
        A_tmp(i_step)%vecpot_acc=(A_tmp(i_step)%vecpot_acc-A_old%vecpot_acc)/a_tableau(i_step)
      enddo
-     A_tmp(Integrator_nsteps+1)%vecpot    =A_tmp(Integrator_nsteps+1)%vecpot    -A_old%vecpot
-     A_tmp(Integrator_nsteps+1)%vecpot_vel=A_tmp(Integrator_nsteps+1)%vecpot_vel-A_old%vecpot_vel
-     A_tmp(Integrator_nsteps+1)%vecpot_acc=A_tmp(Integrator_nsteps+1)%vecpot_acc-A_old%vecpot_acc
+     A_tmp(Int_Ns+1)%vecpot    =A_tmp(Int_Ns+1)%vecpot    -A_old%vecpot
+     A_tmp(Int_Ns+1)%vecpot_vel=A_tmp(Int_Ns+1)%vecpot_vel-A_old%vecpot_vel
+     A_tmp(Int_Ns+1)%vecpot_acc=A_tmp(Int_Ns+1)%vecpot_acc-A_old%vecpot_acc
      !
      ! Finally I average using b_tableau
      !
      A_new=A_old
-     do i_step=1,Integrator_nsteps
+     do i_step=1,Int_Ns
         A_new%vecpot    =A_new%vecpot    +A_tmp(i_step+1)%vecpot    *b_tableau(i_step)
         A_new%vecpot_vel=A_new%vecpot_vel+A_tmp(i_step+1)%vecpot_vel*b_tableau(i_step)
         A_new%vecpot_acc=A_new%vecpot_acc+A_tmp(i_step+1)%vecpot_acc*b_tableau(i_step)
diff --git a/src/real_time_propagation/RT_Integrator_init.F b/src/real_time_propagation/RT_Integrator_init.F
index a8e7ab3057..deeadd0552 100644
--- a/src/real_time_propagation/RT_Integrator_init.F
+++ b/src/real_time_propagation/RT_Integrator_init.F
@@ -5,19 +5,25 @@
 !
 ! Authors (see AUTHORS file for details): DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine RT_Integrator_init()
  ! 
  use pars,           ONLY:SP,cONE,cZERO
  use com,            ONLY:msg
  use stderr,         ONLY:intc
- use real_time,      ONLY:l_RT_EULER,l_RT_EXP,l_RT_INV,            &
+ use real_time,      ONLY:l_RT_EULER,l_RT_EXP,l_RT_INV,l_RT_FRES,  &
 &                         l_RT_RWA,l_RT_ACC,l_RT_DIAG,l_RT_PERT,   &
 &                         l_RT_HEUN,l_RT_RK2,l_RT_RK4,l_RT_SIMPLE, &
 &                         Integrator_name,integrator_exp_order,    &
 &                         a_tableau,b_tableau,c_tableau,RWA_table, &
-&                         I1_matrix,Integrator_nsteps,RT_bands
+&                         I1_matrix,Integrator_nsteps,RT_bands,    &
+&                         Integrator_nfrac
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer  :: ib
  !
@@ -61,8 +67,11 @@ subroutine RT_Integrator_init()
  l_RT_DIAG=  l_RT_INV                 .and. &
 &           index(Integrator_name,'DIAG' )/=0.or.index(Integrator_name,'diag' )/=0
  !
- l_RT_PERT= (l_RT_EULER.and.l_RT_RWA) .and. &
-&           index(Integrator_name,'PERT'  )/=0.or.index(Integrator_name,'pert' )/=0
+ l_RT_PERT=  l_RT_EULER               .and. &
+&           index(Integrator_name,'PERT' )/=0.or.index(Integrator_name,'pert' )/=0
+ !
+ l_RT_FRES=  l_RT_PERT               .and. &
+&           index(Integrator_name,'FRES' )/=0.or.index(Integrator_name,'fres' )/=0
  !
  ! d) Reset integrator name
  !
@@ -80,6 +89,7 @@ subroutine RT_Integrator_init()
  if (l_RT_ACC   ) Integrator_name = trim(Integrator_name)//" + ACC"
  if (l_RT_DIAG  ) Integrator_name = trim(Integrator_name)//" + DIAG"
  if (l_RT_PERT  ) Integrator_name = trim(Integrator_name)//" + PERT"
+ if (l_RT_FRES  ) Integrator_name = trim(Integrator_name)//" + FRES"
  !
  call msg('s','Integrator '//trim(Integrator_name))
  !
@@ -91,6 +101,7 @@ subroutine RT_Integrator_init()
  !
  if (l_RT_SIMPLE) then
    Integrator_nsteps = 1
+   Integrator_nfrac = 1
    b_tableau(1) = 1._SP
    c_tableau(2) = 1._SP
    RWA_table(2) = 1._SP
@@ -98,6 +109,7 @@ subroutine RT_Integrator_init()
  !
  if (l_RT_RK2   ) then
    Integrator_nsteps = 2
+   Integrator_nfrac = 2
    c_tableau(2) = 1._SP/2._SP
    a_tableau(2) = 1._SP/2._SP
    b_tableau(1:2) = (/0._SP      ,1._SP      /)
@@ -106,6 +118,7 @@ subroutine RT_Integrator_init()
  !
  if (l_RT_HEUN  ) then
    Integrator_nsteps = 2
+   Integrator_nfrac = 2
    c_tableau(2) = 1._SP
    a_tableau(2) = 1._SP
    b_tableau(1:2) = (/1._SP/2._SP,1._SP/2._SP/)
@@ -114,6 +127,7 @@ subroutine RT_Integrator_init()
  !
  if (l_RT_RK4   ) then
    Integrator_nsteps = 4
+   Integrator_nfrac = 6
    c_tableau(2:4) = (/1._SP/2._SP,1._SP/2._SP,1._SP      /)
    a_tableau(2:4) = (/1._SP/2._SP,1._SP/2._SP,1._SP      /)
    b_tableau(1:4) = (/1._SP/6._SP,1._SP/3._SP,1._SP/3._SP,1._SP/6._SP/)
diff --git a/src/real_time_propagation/RT_Scattering_step.F b/src/real_time_propagation/RT_Scattering_step.F
index cb8e9427f8..0f6ecfb1e9 100644
--- a/src/real_time_propagation/RT_Scattering_step.F
+++ b/src/real_time_propagation/RT_Scattering_step.F
@@ -5,7 +5,7 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
-subroutine RT_Scattering_step(dG_old,dG_inp,dG_out,ik,deltaT)
+subroutine RT_Scattering_step(dG_old,dG_inp,dG_out,ik,i_sp_pol,deltaT)
  !
  ! The propagation of the scattering part is done in terms of electrons and holes occupations.
  ! The lifetimes used (computed by RT_relaxation) are all constructed in terms of f_e and f_h
@@ -32,7 +32,7 @@ subroutine RT_Scattering_step(dG_old,dG_inp,dG_out,ik,deltaT)
  ! Here remember that the occupations stored in the module are always equal to dG_inp
  ! 
  use pars,           ONLY:SP,cI,rZERO
- use electrons,      ONLY:spin_occ
+ use electrons,      ONLY:spin_occ,n_sp_pol
  use real_time,      ONLY:RT_bands,RTibz,l_RT_include_eq_scatt,RT_is_statically_dephased,&
 &                         RT_is_dynamically_dephased,RT_Deph_Matrix,G_lesser_reference
  use RT_occupations, ONLY:RT_el_occ,RT_ho_occ,l_RT_relax_time_approx,RT_RTA_E_occ_infty,RT_RTA_H_occ_infty
@@ -43,7 +43,7 @@ subroutine RT_Scattering_step(dG_old,dG_inp,dG_out,ik,deltaT)
  complex(SP), intent(in)    :: dG_old(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
  complex(SP), intent(in)    :: dG_inp(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
  complex(SP), intent(inout) :: dG_out(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
- integer,     intent(in)    :: ik
+ integer,     intent(in)    :: ik,i_sp_pol
  real(SP),    intent(in)    :: deltaT
  !
  integer     :: ib
@@ -52,18 +52,20 @@ subroutine RT_Scattering_step(dG_old,dG_inp,dG_out,ik,deltaT)
  !
  if( .not. RT_is_dynamically_dephased) return
  !
+ if (n_sp_pol==2) call error("RT_Scattering step not fully ported for n_sp_pol=2")
+ !
  do ib=RT_bands(1),RT_bands(2)
    !
    ! 1=electrons
    ! 2=holes
    !
-   f_(1)=RT_el_occ%N(ib,ik)
-   f_(2)=RT_ho_occ%N(ib,ik)
+   f_(1)=RT_el_occ%N(ib,ik,i_sp_pol)
+   f_(2)=RT_ho_occ%N(ib,ik,i_sp_pol)
    !
-   f0(1)=         aimag(G_lesser_reference(ib,ib,ik))
-   f0(2)=spin_occ-aimag(G_lesser_reference(ib,ib,ik))
-   df(1)= RT_el_occ%dN(ib,ik)
-   df(2)=-RT_el_occ%dN(ib,ik)
+   f0(1)=         aimag(G_lesser_reference(ib,ib,ik,i_sp_pol))
+   f0(2)=spin_occ-aimag(G_lesser_reference(ib,ib,ik,i_sp_pol))
+   df(1)= RT_el_occ%dN(ib,ik,i_sp_pol)
+   df(2)=-RT_el_occ%dN(ib,ik,i_sp_pol)
    !
    ! The GAMMA0 term could be moved inside RT_apply_RWA.F
    ! For the moment I keep it here, together with the rest of the relaxation term
@@ -111,8 +113,8 @@ subroutine RT_Scattering_step(dG_old,dG_inp,dG_out,ik,deltaT)
      !  f_inf(2)= GAMMA_(1)/GAMMA__TOT*spin_occ
      !endif
      !if( GAMMA__TOT<=0._SP  ) then
-     !  f_inf(1)=         aimag(G_lesser_reference(ib,ib,ik))
-     !  f_inf(2)=spin_occ-aimag(G_lesser_reference(ib,ib,ik))
+     !  f_inf(1)=         aimag(G_lesser_reference(ib,ib,ik,i_sp_pol))
+     !  f_inf(2)=spin_occ-aimag(G_lesser_reference(ib,ib,ik,i_sp_pol))
      !endif
      !df_out(:) = -deltaT*(GAMMA0_TOT*df(:) + GAMMA0_TOT*(f0(:)-f_inf(:)) + &
      !&                    dGAMMA_TOT*df(:) + dGAMMA_TOT*(f0(:)-f_inf(:)) )
diff --git a/src/real_time_propagation/RT_apply_RWA.F b/src/real_time_propagation/RT_apply_RWA.F
index 2b011f2c9e..3c3a06d53f 100644
--- a/src/real_time_propagation/RT_apply_RWA.F
+++ b/src/real_time_propagation/RT_apply_RWA.F
@@ -5,7 +5,7 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
-subroutine RT_apply_RWA(ik,E,k,dT,dTp,dG_out,dG_in)
+subroutine RT_apply_RWA(ik,i_sp,E,k,dT,dTp,dG_out,dG_in)
  !
  use pars,           ONLY:SP,cONE
  use electrons,      ONLY:levels
@@ -15,7 +15,7 @@ subroutine RT_apply_RWA(ik,E,k,dT,dTp,dG_out,dG_in)
  !
  implicit none
  !
- integer,      intent(in)     :: ik
+ integer,      intent(in)     :: ik,i_sp
  type(levels), intent(in)     :: E
  type(bz_samp),intent(in)     :: k
  real(SP),     intent(in)     :: dT,dTp
@@ -44,7 +44,7 @@ subroutine RT_apply_RWA(ik,E,k,dT,dTp,dG_out,dG_in)
      if (NE_time<RT_deph_range(1).and.RT_deph_range(1)>0._SP) sumGAMMA=0._SP
      if (NE_time>RT_deph_range(2).and.RT_deph_range(2)>0._SP) sumGAMMA=0._SP
      !
-     deltaE=RT_levels%E(i_n,ik,1)-RT_levels%E(i_np,ik,1)
+     deltaE=RT_levels%E(i_n,ik,i_sp)-RT_levels%E(i_np,ik,i_sp)
      !
      if (l_RT_EXP) then
        !
@@ -88,7 +88,7 @@ subroutine RT_apply_RWA(ik,E,k,dT,dTp,dG_out,dG_in)
    ! It is higly non satisfactory that
    !     Gamma_neq[f_eq] /= Gamma_eq                              (1)
    ! This explains the need for the special definition
-   !    sumGAMMA=abs(E%W(i_n,ik,1))+abs(E%W(i_np,ik,1))           (2)
+   !    sumGAMMA=abs(E%W(i_n,ik,i_sp))+abs(E%W(i_np,ik,i_sp))           (2)
    ! instead of
    !    sumGAMMA=sum(REF_lifetime(i_n,ik,:))                     (3)
    ! with i_n=i_np.
diff --git a/src/real_time_propagation/RT_occupations_eval.F b/src/real_time_propagation/RT_occupations_eval.F
index 76f066b011..17b5a8ecc8 100644
--- a/src/real_time_propagation/RT_occupations_eval.F
+++ b/src/real_time_propagation/RT_occupations_eval.F
@@ -13,15 +13,15 @@ subroutine RT_occupations_eval(dG)
  use RT_occupations, ONLY:RT_el_occ,RT_ho_occ
  use real_time,      ONLY:RTibz,RT_bands,RT_nk,RT_nbands,G_lesser_reference,l_RT_rotate_DM
  use hamiltonian,    ONLY:H_rotation
- use electrons,      ONLY:spin_occ
+ use electrons,      ONLY:spin_occ,n_sp_pol
  !
  implicit none
  !
- complex(SP), intent(in) :: dG(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2))
+ complex(SP), intent(in) :: dG(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2),n_sp_pol)
  !
  ! Work Space
  !
- integer                 :: ik,ib
+ integer                 :: ik,ib,i_sp
  real(SP)                :: df_H
  complex(SP)             :: G_rot(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2)),&
 &                           G_ref(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2))
@@ -36,42 +36,44 @@ subroutine RT_occupations_eval(dG)
  RT_ho_occ%N=0._SP
  RT_el_occ%dN=0._SP
  !
+ do i_sp=1,n_sp_pol
  do ik=1,RT_nk
    !
    if (.not.PAR_IND_Xk_ibz%element_1D(RTibz%k_map(ik)) ) cycle
    !
    if (l_RT_rotate_DM.and.(RT_do_it('OUT').or.RT_do_it('CARR'))) then
-     G_rot=G_lesser_reference(:,:,ik)+dG(:,:,ik)
-     call OBS_rotate(H_rotation(:,:,ik,1),G_rot,RT_nbands,1)
-     G_ref=G_lesser_reference(:,:,ik)
-     call OBS_rotate(H_rotation(:,:,ik,1),G_ref,RT_nbands,1)
+     G_rot=G_lesser_reference(:,:,ik,i_sp)+dG(:,:,ik,i_sp)
+     call OBS_rotate(H_rotation(:,:,ik,i_sp),G_rot,RT_nbands,1)
+     G_ref=G_lesser_reference(:,:,ik,i_sp)
+     call OBS_rotate(H_rotation(:,:,ik,i_sp),G_ref,RT_nbands,1)
    endif
    !
    if (l_RT_rotate_DM.and.(RT_do_it('OUT').or.RT_do_it('CARR'))) then
      do ib=RT_bands(1),RT_bands(2)
-       RT_el_occ%N(ib,ik)= aimag(G_rot(ib,ib))
-       RT_ho_occ%N(ib,ik)=-aimag(G_rot(ib,ib))+spin_occ
+       RT_el_occ%N(ib,ik,i_sp)= aimag(G_rot(ib,ib))
+       RT_ho_occ%N(ib,ik,i_sp)=-aimag(G_rot(ib,ib))+spin_occ
        ! This would be the correct definition
        ! RT_el_occ%dN(ib,ik)=aimag(G_rot(ib,ib)-G_ref(ib,ib))
        ! but I use this definition to consistently reconstruct f from df in ypp
-       RT_el_occ%dN(ib,ik)=aimag(G_rot(ib,ib)-G_lesser_reference(ib,ib,ik))
+       RT_el_occ%dN(ib,ik,i_sp)=aimag(G_rot(ib,ib)-G_lesser_reference(ib,ib,ik,i_sp))
      enddo
    else
      do ib=RT_bands(1),RT_bands(2)
        !
-       RT_el_occ%N(ib,ik)= aimag(dG(ib,ib,ik))+aimag(G_lesser_reference(ib,ib,ik))
+       RT_el_occ%N(ib,ik,i_sp)= aimag(dG(ib,ib,ik,i_sp))+aimag(G_lesser_reference(ib,ib,ik,i_sp))
        !
        ! AM, July 2015. Intel compilers nest in ordered way the mathematical parenthesis. By using 
        ! df_H this problem is solved.
        !
-       df_H                   =spin_occ-aimag(G_lesser_reference(ib,ib,ik))
-       RT_ho_occ%N(ib,ik)=-aimag(dG(ib,ib,ik))+ df_H
+       df_H                   =spin_occ-aimag(G_lesser_reference(ib,ib,ik,i_sp))
+       RT_ho_occ%N(ib,ik,i_sp)=-aimag(dG(ib,ib,ik,i_sp))+ df_H
        !
-       RT_el_occ%dN(ib,ik)=aimag(dG(ib,ib,ik))
+       RT_el_occ%dN(ib,ik,i_sp)=aimag(dG(ib,ib,ik,i_sp))
      enddo
    endif
    ! 
  enddo
+ enddo
  !
  call PP_redux_wait(RT_el_occ%N,COMM=PAR_COM_Xk_ibz_INDEX%COMM)
  call PP_redux_wait(RT_ho_occ%N,COMM=PAR_COM_Xk_ibz_INDEX%COMM)
diff --git a/src/real_time_propagation/RT_propagate_fields.F b/src/real_time_propagation/RT_propagate_fields.F
index e81777bb80..604d9dde27 100644
--- a/src/real_time_propagation/RT_propagate_fields.F
+++ b/src/real_time_propagation/RT_propagate_fields.F
@@ -24,14 +24,15 @@ subroutine RT_propagate_fields(E,k,A_old,A_new,G_inp,T,T_step)
  !    the induced current is included in the dynamics ( RT_propagate_fields )
  ! 
  use pars,           ONLY:SP,pi
+ use electrons,      ONLY:spin_occ
  use units,          ONLY:SPEED_OF_LIGHT
- use electrons,      ONLY:levels,nel
+ use electrons,      ONLY:levels,nel,n_sp_pol
  use R_lattice,      ONLY:bz_samp
  use X_m,            ONLY:global_gauge
  use parallel_m,     ONLY:PAR_G_k_range
  use real_time,      ONLY:RT_ind_J_prev,RT_ind_J,RT_P_prev,RT_P,RT_dyn_step,   &
 &                         l_RT_induced_field,Integrator_step,Integrator_nsteps,&
-&                         RT_bands
+&                         RT_bands,l_velocity_gauge_diam
  use fields,         ONLY:gauge_field,A_ind,A_ext,A_vecpot_reset,compute_intensity,compute_envelop
  !
  implicit none
@@ -41,10 +42,11 @@ subroutine RT_propagate_fields(E,k,A_old,A_new,G_inp,T,T_step)
  real(SP),             intent(in) :: T,T_step
  type(gauge_field),    intent(in) :: A_old
  type(gauge_field),   intent(out) :: A_new
- complex(SP),          intent(in) :: G_inp(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2))
+ complex(SP),  intent(in) :: G_inp(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2),n_sp_pol)
  !
  ! Work space
  !
+ integer           :: nel_effect
  type(gauge_field) :: A_ind_old
  type(gauge_field) :: A_ind_new
  type(gauge_field) :: A_null
@@ -99,7 +101,10 @@ subroutine RT_propagate_fields(E,k,A_old,A_new,G_inp,T,T_step)
      !
      ! Update the current replacing the null vector potential with the real one
      !
-     RT_ind_J=RT_ind_J+(A_ind_new%vecpot(:)+A_ext%vecpot(:))*real(nel,SP)/(2._SP*SPEED_OF_LIGHT)
+     if(l_velocity_gauge_diam) then
+       nel_effect=nel-(RT_bands(1)-1)*spin_occ
+       RT_ind_J=RT_ind_J+(A_ind_new%vecpot(:)+A_ext%vecpot(:))*real(nel_effect,SP)/(2._SP*SPEED_OF_LIGHT)
+     endif
      A_ind_new%vecpot_acc =                        real(RT_ind_J)     *4._SP*pi*SPEED_OF_LIGHT
    endif
    !
diff --git a/src/real_time_propagation/RT_time_step_update.F b/src/real_time_propagation/RT_time_step_update.F
index d1c3e6e40a..45a411ec25 100644
--- a/src/real_time_propagation/RT_time_step_update.F
+++ b/src/real_time_propagation/RT_time_step_update.F
@@ -19,7 +19,7 @@ subroutine RT_time_step_update( E )
  use RT_control,     ONLY:TIME_adjust
  use RT_output_m,    ONLY:TIME_STEP_desc
  use com,            ONLY:msg
- use electrons,      ONLY:levels
+ use electrons,      ONLY:levels,n_sp_pol
  use parallel_m,     ONLY:PAR_COM_Xk_ibz_INDEX,PAR_IND_Xk_ibz
  use parallel_int,   ONLY:PP_redux_wait
  use interfaces,     ONLY:DESC_write
@@ -32,7 +32,7 @@ subroutine RT_time_step_update( E )
  ! Work Space 
  ! 
  logical           :: l_Update_Time_Step_Manual
- integer           :: ib,ibp,ik,nT_updated,i_desc_ref
+ integer           :: ib,ibp,ik,i_sp_pol,nT_updated,i_desc_ref
  complex(SP)       :: dG_dT_max(2),dG_dT(2),ERROR,dG(3)
  real(SP)          :: dT_now,dT_previous,dT_next
 #if defined _RT_SCATT
@@ -54,19 +54,21 @@ subroutine RT_time_step_update( E )
  !
  if(l_update_time_step_manual) then
    !
+   do i_sp_pol=1,n_sp_pol
    do ik=1,RT_nk
      !
      if( .not.PAR_IND_Xk_ibz%element_1D(RTibz%k_map(ik)) ) cycle
      do ib=RT_bands(1),E%nbf(1)
        do ibp=E%nbf(1)+1,RT_bands(2)
-         dG_lesser(ib,ibp,ik,:)=cZERO
-         dG_lesser(ibp,ib,ik,:)=cZERO
-          G_lesser(ib,ibp,ik,:)=cZERO
-          G_lesser(ibp,ib,ik,:)=cZERO
+         dG_lesser(ib,ibp,ik,i_sp_pol,:)=cZERO
+         dG_lesser(ibp,ib,ik,i_sp_pol,:)=cZERO
+          G_lesser(ib,ibp,ik,i_sp_pol,:)=cZERO
+          G_lesser(ibp,ib,ik,i_sp_pol,:)=cZERO
        enddo
      enddo
      !
    enddo
+   enddo
    !
    dT_next=dT_now*RT_step_manual_prefactor
    !
@@ -77,23 +79,25 @@ subroutine RT_time_step_update( E )
    dG_dT_max   =cZERO
    dG          =cZERO
    !
+   do i_sp_pol=1,n_sp_pol
    do ik=1,RT_nk
      !
      if( .not.PAR_IND_Xk_ibz%element_1D(RTibz%k_map(ik)) ) cycle
      !
      do ib=RT_bands(1),RT_bands(2)
        do ibp=RT_bands(1),RT_bands(2)
-         dG_dT(1)=(dG_lesser(ib,ibp,ik,i_MEM_prev)-dG_lesser(ib,ibp,ik,i_MEM_old))/dT_previous
-         dG_dT(2)=(dG_lesser(ib,ibp,ik,i_MEM_now)-dG_lesser(ib,ibp,ik,i_MEM_prev))/dT_now
+         dG_dT(1)=(dG_lesser(ib,ibp,ik,i_sp_pol,i_MEM_prev)-dG_lesser(ib,ibp,ik,i_sp_pol,i_MEM_old))/dT_previous
+         dG_dT(2)=(dG_lesser(ib,ibp,ik,i_sp_pol,i_MEM_now)-dG_lesser(ib,ibp,ik,i_sp_pol,i_MEM_prev))/dT_now
          if ( abs(dG_dT(2)) >  abs(dG_dT_max(2)) ) then
-           dG(1)=dG_lesser(ib,ibp,ik,i_MEM_old)
-           dG(2)=dG_lesser(ib,ibp,ik,i_MEM_prev)
-           dG(3)=dG_lesser(ib,ibp,ik,i_MEM_now)
+           dG(1)=dG_lesser(ib,ibp,ik,i_sp_pol,i_MEM_old)
+           dG(2)=dG_lesser(ib,ibp,ik,i_sp_pol,i_MEM_prev)
+           dG(3)=dG_lesser(ib,ibp,ik,i_sp_pol,i_MEM_now)
            dG_dT_max=dG_dT
          endif
        enddo
      enddo
    enddo
+   enddo
    !
    call PP_redux_wait(dG,COMM=PAR_COM_Xk_ibz_INDEX%COMM)
    call PP_redux_wait(dG_dT_max,COMM=PAR_COM_Xk_ibz_INDEX%COMM)
diff --git a/src/sc/OEP_ApplySigmaCSX.F b/src/sc/OEP_ApplySigmaCSX.F
index 0cdbeeab30..d361602f03 100644
--- a/src/sc/OEP_ApplySigmaCSX.F
+++ b/src/sc/OEP_ApplySigmaCSX.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): MG
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine OEP_ApplySigmaCSX(X,Xw,E,k,q,U_csx)
  !
  !  Apply the NL local sX+cH operator to the valence wfs.
@@ -42,8 +46,9 @@ subroutine OEP_ApplySigmaCSX(X,Xw,E,k,q,U_csx)
  use X_m,             ONLY:X_mat,X_t
  !
  ! I/O
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  complex(SP),intent(inout)  :: U_csx(nkibz,maxval(n_met_bands),fft_size)
  type(levels), intent(in)   :: E
  type(bz_samp), intent(in)  :: k,q
@@ -67,10 +72,6 @@ subroutine OEP_ApplySigmaCSX(X,Xw,E,k,q,U_csx)
  call elemental_collision_free(isc)
  call PP_indexes_reset(px)
  !
- isc%ngrho=maxval(G_m_G)
- isc%iqref=0
- !
- !
  call io_control(ACTION=OP_RD_CL,COM=REP,SEC=(/1,2/),MODE=VERIFY,ID=ID)
  io_err=io_X(X,Xw,ID)
  if (io_err<0) call error('Incomplete and/or broken static diel. fun. database')
@@ -79,6 +80,10 @@ subroutine OEP_ApplySigmaCSX(X,Xw,E,k,q,U_csx)
  YAMBO_ALLOC(PPaR_ws,(X%ng,X%ng))
  YAMBO_ALLOC(PPaR,(X%ng,X%ng))
  !
+ ! 2022/06/30 DS:
+ ! should it be NG=maxval(G_m_G)? In the old version of the code,
+ !   isc%ngrho=maxval(G_m_G)
+ ! was set few lines before. However the call to elemental owrites it
  call elemental_collision_alloc(isc,TITLE="OEP_SigmaCSX",NG=X%ng,GAMP_NG=(/X%ng,X%ng/))
  YAMBO_ALLOC(local_u,(fft_size))
  YAMBO_ALLOC(u,(X%ng))
@@ -104,7 +109,7 @@ subroutine OEP_ApplySigmaCSX(X,Xw,E,k,q,U_csx)
      call io_control(ACTION=IO_ACT,COM=NONE,SEC=(/2*iqibz,2*iqibz+1/),ID=ID)
      io_err=io_X(X,Xw,ID)
      !
-     forall(i1=1:X%ng,i2=1:X%ng) PPaR(i1,i2)=X_mat(i1,i2,1)*isc%gamp(i1,i2) ! Poles and Residuals
+     forall (i1=1:X%ng,i2=1:X%ng) PPaR(i1,i2)=X_mat(i1,i2,1)*isc%gamp(i1,i2) ! Poles and Residuals
      !
      PPaR_is_TR_rotated=.false.
      !
@@ -112,7 +117,7 @@ subroutine OEP_ApplySigmaCSX(X,Xw,E,k,q,U_csx)
    !
    if (iqs>nsym/(i_time_rev+1) .and. i_space_inv == 0 .and..not.PPaR_is_TR_rotated) then
      PPaR_is_TR_rotated=.true.
-     forall(i1=1:X%ng,i2=1:X%ng) PPaR_ws(i2,i1)=PPaR(i1,i2)
+     forall (i1=1:X%ng,i2=1:X%ng) PPaR_ws(i2,i1)=PPaR(i1,i2)
      PPaR(:,:)=PPaR_ws(:,:)
    endif
    !
diff --git a/src/sc/OEP_ApplySigmaX.F b/src/sc/OEP_ApplySigmaX.F
index 4dc04ac26f..d7dfb9dd7d 100644
--- a/src/sc/OEP_ApplySigmaX.F
+++ b/src/sc/OEP_ApplySigmaX.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): MG
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine OEP_ApplySigmaX(E,k,q,U_x)
  !
  !  Apply the NL local X operator to the valence wfs:
@@ -35,8 +39,9 @@ subroutine OEP_ApplySigmaX(E,k,q,U_x)
  use SC,              ONLY:it_now
  !
  ! I/O
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  complex(SP),intent(out)  :: U_x(nkibz,maxval(n_met_bands),fft_size)
  type(levels), intent(in) :: E
  type(bz_samp), intent(in):: k,q
diff --git a/src/sc/OEP_app_inversion.F b/src/sc/OEP_app_inversion.F
index 4f48ae8d5e..221014e782 100644
--- a/src/sc/OEP_app_inversion.F
+++ b/src/sc/OEP_app_inversion.F
@@ -5,6 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): MG
 !
+! headers
+!
+#include<y_memory.h>
+#include<yambo_wfs.h>
+!
 subroutine OEP_app_inversion(E,k,Vkli,Vslt,U,rho)
  !
  !  Exchange KLI/CEDA
@@ -37,8 +42,10 @@ subroutine OEP_app_inversion(E,k,Vkli,Vslt,U,rho)
  use stderr,          ONLY:intc
  !
  ! I/O
+ use y_memory_alloc
+ !
+ implicit none
  !
-#include<y_memory.h>
  real(SP),    intent(out):: Vkli(fft_size)
  real(SP),    intent(in) :: Vslt(fft_size),rho(fft_size)
  complex(SP), intent(in) :: U(nkibz,maxval(n_met_bands),fft_size)
@@ -47,7 +54,7 @@ subroutine OEP_app_inversion(E,k,Vkli,Vslt,U,rho)
  !
  ! Work Space
  !
- integer                  :: i1,ik,iv1,iv2,is,ig1,ig2,ifft,offt
+ integer                  :: i1,ik,iv1,iv2,is,ig1,ig2,i_wf,o_wf
  real(SP)                 :: EWeight, Prefactor
  complex(SP), allocatable :: rhotw_sum(:),rhotw_sum_TR(:),rho_xc(:),&
 &                            Vkli_RL(:),Ko(:,:),rhotw_prd(:,:),rhotw_rs(:)
@@ -109,13 +116,13 @@ subroutine OEP_app_inversion(E,k,Vkli,Vslt,U,rho)
      if (l_oep_EWeight) EWeight = E%E(iv1,ik,1)+ E%E_Fermi 
      do iv2 = 1, iv1!E%nbf(1)
        if (l_oep_kli.and.(iv2.ne.iv1)) cycle
-       ifft=WF%index(iv1,ik,1)
-       offt=WF%index(iv2,ik,1)
+       i_wf=WF%index(iv1,ik,1)
+       o_wf=WF%index(iv2,ik,1)
        rhotw_sum = (0._SP,0._SP)
        rhotw_sum_TR = (0._SP,0._SP)
        Prefactor = 1.0_SP
        if (iv1.ne.iv2) Prefactor = 2.0_SP
-       SigmaXC = dot_product(WF%c(:,1,ifft),U(ik,iv2,:)) 
+       SigmaXC = dot_product(WF%r(:,1,i_wf),U(ik,iv2,:)) 
        do i1 = 1,k%nstar(ik)
          is  = k%star(ik,i1)
          l_is_TR = is>nsym/(i_time_rev+1)
@@ -124,8 +131,8 @@ subroutine OEP_app_inversion(E,k,Vkli,Vslt,U,rho)
          isc%qs = (/1,1,1/)
          call scatter_Bamp(isc)
          !
-         !rhotw_ds(:)=conjg(WF%c(fft_rot_r(:,is),1,ifft))*WF%c(fft_rot_r(:,is),1,offt)/EWeight
-         rhotw_ds(:)=WF%c(fft_rot_r(:,is),1,ifft)*conjg(WF%c(fft_rot_r(:,is),1,offt))/EWeight
+         !rhotw_ds(:)=conjg(WF%r(fft_rot_r(:,is),1,i_wf))*WF%r(fft_rot_r(:,is),1,o_wf)/EWeight
+         rhotw_ds(:)=WF%r(fft_rot_r(:,is),1,i_wf)*wfconjg(WF%r(fft_rot_r(:,is),1,o_wf))/EWeight
          !
          rhotw_ds = rhotw_ds/(rho*real(fft_size,SP)) 
          rhotw_rs = cZERO
@@ -146,7 +153,7 @@ subroutine OEP_app_inversion(E,k,Vkli,Vslt,U,rho)
          rhotw_rs(1:QP_ng_Sx) = rhotw_ds(fft_g_table(1:QP_ng_Sx,1))
          !forall(ig1=2:QP_ng_Sx,ig2=2:QP_ng_Sx) rhotw_prd(ig1,ig2) = rhotw_prd(ig1,ig2) + &
          !  & Prefactor*E%f(iv1,ik,1)*conjg(rhotw_rs(ig1))*isc%rhotw(ig2)
-         forall(ig1=2:QP_ng_Sx,ig2=2:QP_ng_Sx) rhotw_prd(ig1,ig2) = rhotw_prd(ig1,ig2) + &
+	 forall (ig1=2:QP_ng_Sx,ig2=2:QP_ng_Sx) rhotw_prd(ig1,ig2) = rhotw_prd(ig1,ig2) + &
            & Prefactor*E%f(iv1,ik,1)*rhotw_rs(ig1)*isc%rhotw(ig2)
          !
          if (l_is_TR) then
diff --git a/src/sc/OEP_driver.F b/src/sc/OEP_driver.F
index 473d6507de..9965e9472d 100644
--- a/src/sc/OEP_driver.F
+++ b/src/sc/OEP_driver.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): MG
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine OEP_driver(X,Xw,E,k,q,V_oep)
  !
  ! Driver for OEP potentials. Variables: 
@@ -33,8 +37,9 @@ subroutine OEP_driver(X,Xw,E,k,q,V_oep)
  use drivers,         ONLY:l_oep_exact,l_oep_slater,l_sc_srpa
  use SC,              ONLY:it_now,l_oep_iterative,l_oep_EWeight 
  use frequency,       ONLY:w_samp
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  real(SP),intent(inout)  :: V_oep(fft_size)
  type(X_t),intent(in)    :: X(2)  
  type(w_samp),intent(in) :: Xw
diff --git a/src/sc/OEP_exact_inversion.F b/src/sc/OEP_exact_inversion.F
index 9fb052ceb8..51d3700c42 100644
--- a/src/sc/OEP_exact_inversion.F
+++ b/src/sc/OEP_exact_inversion.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): MG
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine OEP_exact_inversion(X,E,k,Voep,W_x)
  !
  !  Exact exchange/static RPA potential (voep in direct space)
@@ -31,8 +35,9 @@ subroutine OEP_exact_inversion(X,E,k,Voep,W_x)
  use wave_func,       ONLY:WF
  !
  ! I/O
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  real(SP),intent(out)    :: Voep(fft_size)
  complex(SP),intent(in)  :: W_x(nkibz,maxval(n_met_bands),fft_size)
  type(X_t)    :: X
diff --git a/src/sc/SC_add_XC.F b/src/sc/SC_add_XC.F
index 294707f11a..327f825fb4 100644
--- a/src/sc/SC_add_XC.F
+++ b/src/sc/SC_add_XC.F
@@ -57,12 +57,12 @@ subroutine SC_add_XC(X,Xw,Xk,E,k,q,iteration,V_xc_nm1,ELEC,HOLE,DELTA)
  if (l_use_Hxc_collisions.or.l_use_COH_collisions) then
    !
    G_sex=cZERO
-   forall(ib=SC_bands(1):SC_bands(2),ik=1:E%nk) G_sex(ib,ib,ik)=cI*E%f(ib,ik,1)
+   forall (ib=SC_bands(1):SC_bands(2),ik=1:E%nk) G_sex(ib,ib,ik)=cI*E%f(ib,ik,1)
    G_coh=cZERO
-   forall(ib=SC_bands(1):SC_bands(2),ik=1:E%nk) G_coh(ib,ib,ik)=cI
+   forall (ib=SC_bands(1):SC_bands(2),ik=1:E%nk) G_coh(ib,ib,ik)=cI
    !
    ! Now I build the G_collisions rotating the non interaction one
-   ! G_collisions(ib,ib,ik)=(0.,1)*en%f(ib,ik)
+   ! G_collisions(ib,ib,ik)=(0._SP,1._SP)*en%f(ib,ik)
    !
    do ik=1,E%nk
      call OBS_rotate(H_rotation(:,:,ik,1),G_sex(:,:,ik),SC_nbands,-1)
diff --git a/src/sc/SC_driver.F b/src/sc/SC_driver.F
index 2b0fa65bd2..43dc99fc0a 100644
--- a/src/sc/SC_driver.F
+++ b/src/sc/SC_driver.F
@@ -5,13 +5,17 @@
 !
 ! Authors (see AUTHORS file for details): AM MG
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine SC_driver(X,Xw,Xk,E,k,q,Dip)
  !
  use pars,           ONLY:SP,schlen,cZERO,rZERO
  use units,          ONLY:HA2EV
  use drivers,        ONLY:l_sc_coh,l_sc_sex,l_sc_is_libDFT,l_eval_collisions,l_sc_hartree,&
 &                         l_sc_exx,l_sc_electric,l_sc_fock,l_use_collisions
- use electrons,      ONLY:levels,nel,n_met_bands,n_sp_pol,n_spin,spin,n_spinor
+ use electrons,      ONLY:levels,nel,n_met_bands,n_sp_pol,n_spin,spin,n_spinor,deg_threshold
  use R_lattice,      ONLY:bz_samp,nkibz
  use QP_m,           ONLY:QP_nk,QP_Sc,QP_n_states,QP_Vnl_xc,SC_E_threshold
  use SC,             ONLY:SC_bands,SC_iterations,SC_rho_threshold,E_convergence,      &
@@ -48,8 +52,9 @@ subroutine SC_driver(X,Xw,Xk,E,k,q,Dip)
  use collision_ext,  ONLY:COH_COLL_element,HXC_COLL_element,COLLISIONS_have_HARTREE
  use electrons,      ONLY:Spin_magn
  use electric,       ONLY:ELECTRIC_alloc,ELECTRIC_free,W_electric
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels) ::E
  type(bz_samp)::k,q,Xk
@@ -157,9 +162,7 @@ subroutine SC_driver(X,Xw,Xk,E,k,q,Dip)
  if(.not.l_SC_RESTART.and.l_use_collisions) then
    !      
    H_rotation=cZERO
-   forall(i1=SC_bands(1):SC_bands(2))
-     H_rotation(i1,i1,:,:)=1._SP
-   end forall
+   forall (i1=SC_bands(1):SC_bands(2)) H_rotation(i1,i1,:,:)=1._SP
    !
  else if (.not.l_SC_cycle.and.l_SC_RESTART) then
    !
@@ -219,7 +222,7 @@ subroutine SC_driver(X,Xw,Xk,E,k,q,Dip)
        IO_ACT=manage_action(RD_CL_IF_END,i_frag,1,QP_nk*n_sp_pol)
        call io_control(ACTION=IO_ACT,COM=NONE,SEC=(/1+i_frag/),ID=ID(2))
        io_WF=io_SC_components('WF',E,ID(2))
-       call WF_rotate(ik,i_sp_pol,WFo,fft_size)
+       call WF_rotate(ik,i_sp_pol,WFo,fft_size,"R")
      enddo
    enddo
    !
@@ -391,7 +394,7 @@ subroutine SC_driver(X,Xw,Xk,E,k,q,Dip)
          ! I/O [H_nl_sc]
          !
          IO_ACT=manage_action(WR_CL_IF_END,i_frag,1,QP_nk*n_sp_pol)
-	 call io_control(ACTION=IO_ACT,COM=NONE,SEC=(/i_frag+1/),ID=ID(3))
+         call io_control(ACTION=IO_ACT,COM=NONE,SEC=(/i_frag+1/),ID=ID(3))
          io_V=io_SC_components('V',E,ID(3))
          !
          ! Mixing for non-local self-energies
@@ -455,7 +458,7 @@ subroutine SC_driver(X,Xw,Xk,E,k,q,Dip)
        ! New wave functions 
        !====================
        !
-       call WF_rotate(ik,i_sp_pol,WFo,fft_size)
+       call WF_rotate(ik,i_sp_pol,WFo,fft_size,"R")
        !
        if (l_NSC_shot) call live_timing(steps=1)
        ! 
@@ -528,7 +531,7 @@ subroutine SC_driver(X,Xw,Xk,E,k,q,Dip)
          call WF_load(WF,WF_G_max,WF_Go_indx,SC_bands,(/1,nkibz/),title='-SC',force_WFo=.true.)
          !
          do ik=1,QP_nk
-           call WF_rotate(ik,i_sp_pol,WFo,fft_size)
+           call WF_rotate(ik,i_sp_pol,WFo,fft_size,"R")
          enddo
          !
        endif
@@ -822,8 +825,8 @@ subroutine SC_report()
          !
          ! Check for degenerate bands in the bare energies
          !
-         call degeneration_finder(E_reference%E(SC_bands(1):SC_bands(2),ik,i_sp_pol),&
-         &                             SC_nbands,first_el,n_of_el,n_deg_grp,0.0001/HA2EV)
+         call degeneration_finder(SC_nbands,first_el,n_of_el,n_deg_grp,&
+         &                        Er=E_reference%E(SC_bands(1):SC_bands(2),ik,i_sp_pol),deg_accuracy=deg_threshold)
          !
          do ib=SC_bands(1),SC_bands(2)
            !
@@ -843,7 +846,7 @@ subroutine SC_report()
            ! 
            ! simmetrize the spreads summing over all possible degenerate bands
            !
-           forall (ibp=SC_bands(1):SC_bands(2)) WS_spread(ibp)=abs(H_rotation(ibp,ib,ik,i_sp_pol))**2*100._SP/HA2EV
+	   forall (ibp=SC_bands(1):SC_bands(2)) WS_spread(ibp)=abs(H_rotation(ibp,ib,ik,i_sp_pol))**2*100._SP/HA2EV
            !
            ibp=b_of_max_proj(ib,ik,i_sp_pol)
            spread(ib,ik,i_sp_pol)=WS_spread( ibp )
diff --git a/src/sc/SC_start_and_restart.F b/src/sc/SC_start_and_restart.F
index 5d0b55e2c3..8cc5035380 100644
--- a/src/sc/SC_start_and_restart.F
+++ b/src/sc/SC_start_and_restart.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine SC_start_and_restart(X,E,Xk,Dip)
  !
  use pars,           ONLY:schlen
@@ -34,8 +38,9 @@ subroutine SC_start_and_restart(X,E,Xk,Dip)
  use FFT_m,          ONLY:fft_size
  use collision_ext,  ONLY:COLLISIONS_naming,COLLISIONS_have_HARTREE
  use descriptors,    ONLY:IO_desc_reset
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels) ::E
  type(X_t)    ::X(2)  ! Xx Em1s
diff --git a/src/sc/V_Mean_Potential.F b/src/sc/V_Mean_Potential.F
index 39f134fd35..a58a576efd 100644
--- a/src/sc/V_Mean_Potential.F
+++ b/src/sc/V_Mean_Potential.F
@@ -28,7 +28,7 @@ subroutine V_mean_potential(ik,i_sp_pol,WFo)
  !
  ! Work Space
  !
- integer    :: ib,ifft,ir,ibp
+ integer    :: ib,i_wf,ir,ibp
  complex(SP):: Sigma_times_R(SC_bands(1):SC_bands(2),SC_bands(1):SC_bands(2))
  !
  Sigma_Times_R=cZERO
@@ -56,16 +56,16 @@ subroutine V_mean_potential(ik,i_sp_pol,WFo)
    !
    do ibp=SC_bands(1),SC_bands(2)
      !
-     ifft=WFo%index(ibp,ik,1)
+     i_wf=WFo%index(ibp,ik,1)
      !
-     V_mean(:,ib)=V_mean(:,ib)+WFo%c(:,1,ifft)*Sigma_times_R(ibp,ib)
+     V_mean(:,ib)=V_mean(:,ib)+WFo%r(:,1,i_wf)*Sigma_times_R(ibp,ib)
      !
    enddo
    !
-   ifft=WFo%index(ib,ik,1)
+   i_wf=WFo%index(ib,ik,1)
    !
    do ir=1,fft_size
-     V_mean(ir,ib)=V_mean(ir,ib)/WFo%c(ir,1,ifft)
+     V_mean(ir,ib)=V_mean(ir,ib)/WFo%r(ir,1,i_wf)
    enddo
    !
  enddo
diff --git a/src/setup/G_shells_finder.F b/src/setup/G_shells_finder.F
index 649779f8fb..219c571176 100644
--- a/src/setup/G_shells_finder.F
+++ b/src/setup/G_shells_finder.F
@@ -5,7 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
 #include<dev_defs.h>
+#include<y_memory.h>
+!
 !
 subroutine G_shells_finder()
  !
@@ -21,13 +25,15 @@ subroutine G_shells_finder()
 &                       g_vec,g_rot,E_of_shell,rl_sop,minus_G
  use R_lattice,    ONLY:DEV_VAR(g_rot),DEV_VAR(minus_G)
  use vec_operate,  ONLY:iku_v_norm,v_is_zero,sort,degeneration_finder
+ use wave_func,    ONLY:wf_nc_k,E_of_WFs
  use IO_int,       ONLY:io_control
  use IO_m,         ONLY:OP_RD_CL,OP_WR_CL,VERIFY
  use zeros,        ONLY:G_iku_zero,G_mod_zero
  use parallel_m,   ONLY:PP_indexes,myid,PP_indexes_reset
  use parallel_int, ONLY:PP_redux_wait,PARALLEL_index 
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  ! Work Space
  !
@@ -41,6 +47,8 @@ subroutine G_shells_finder()
  integer           :: ID,io_err
  integer, external :: io_GROT
  !
+ E_of_WFs=iku_v_norm(g_vec(wf_nc_k(1),:))**2/2._SP
+ !
  ! I/O (read)
  !============
  !
@@ -90,8 +98,8 @@ subroutine G_shells_finder()
  !
  if(G_mod_zero/=1.E-5) call msg('r','Shells accuracy on Gmod is ',G_mod_zero)
  !
- call degeneration_finder(G_mod,ng_closed,first_G_in_shell,ng_in_shell_TMP,n_g_shells,&
-&                         G_mod_zero,Include_single_values=.TRUE.)
+ call degeneration_finder(ng_closed,first_G_in_shell,ng_in_shell_TMP,n_g_shells,&
+&                        Er=G_mod,deg_accuracy=G_mod_zero,Include_single_values=.TRUE.)
  !
  ! Init
  !
diff --git a/src/setup/QP_state_table_setup.F b/src/setup/QP_state_table_setup.F
index 41294b7697..3a3d977159 100644
--- a/src/setup/QP_state_table_setup.F
+++ b/src/setup/QP_state_table_setup.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine QP_state_table_setup(en)
  !
  ! There are 2 objects that command the QP corrections
@@ -32,8 +36,9 @@ subroutine QP_state_table_setup(en)
 #if defined _SC
  use SC,            ONLY:l_SC_diagonal
 #endif
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  type(levels)::en
  !
  ! Work Space
diff --git a/src/setup/build_spin_sop.F b/src/setup/build_spin_sop.F
index ab5d750a78..045fc2abbc 100644
--- a/src/setup/build_spin_sop.F
+++ b/src/setup/build_spin_sop.F
@@ -5,7 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): DS
 !
+! headers
+!
 #include<dev_defs.h>
+#include<y_memory.h>
+!
 !
 subroutine build_spin_sop()
  !
@@ -16,8 +20,9 @@ subroutine build_spin_sop()
  use D_lattice,      ONLY:DEV_VAR(spin_sop)
  use electrons,      ONLY:n_spinor
  use gpu_m,          ONLY:have_gpu
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  complex(SP)    ::t_rev(2,2)
  complex(SP)    ::spin_RX_delta(2,2),spin_RY_beta(2,2),spin_RZ_alpha(2,2)
@@ -37,6 +42,7 @@ subroutine build_spin_sop()
  endif
  !
  ! T_rev=(-i sigma_y K0 ) with K0 the complex conjugation.
+ ! NB: (-i sigma_y) K0 = K0 (-i sigma_y)
  t_rev=-cI*sigma_y
  !
  do is=1,nsym
diff --git a/src/setup/eval_Mtot.F b/src/setup/eval_Mtot.F
index 80beaa177d..10b80b17e2 100644
--- a/src/setup/eval_Mtot.F
+++ b/src/setup/eval_Mtot.F
@@ -5,20 +5,25 @@
 !
 ! Authors (see AUTHORS file for details): DS
 !
+! headers
+!
+#include <y_memory.h>
+!
 subroutine eval_Mtot(en,k,q)
  !
  use pars,           ONLY:SP
+ use parser_m,       ONLY:parser
  use FFT_m,          ONLY:fft_size
  use wave_func,      ONLY:WF
  use R_lattice,      ONLY:bz_samp
- use electrons,      ONLY:levels
+ use electrons,      ONLY:levels,n_spin,Spin_magn,eval_magn
  use com,            ONLY:msg
- use electrons,      ONLY:eval_magn
  use interfaces,     ONLY:WF_load,el_magnetization
  use parallel_int,   ONLY:PARALLEL_global_indexes,PARALLEL_WF_distribute,&
 &                         PARALLEL_WF_index
+ use y_memory_alloc
  !
-#include <y_memory.h>
+ implicit none
  !
  type(levels)    ::en
  type(bz_samp)   ::k,q
@@ -27,7 +32,11 @@ subroutine eval_Mtot(en,k,q)
  !
  real(SP), allocatable :: magn_tmp(:,:)
  !
- if (eval_magn) return
+ if(n_spin==1) return
+ !
+ call parser("EvalMagn",eval_magn)
+ !
+ if(.not.eval_magn) return
  !
  call msg('r','DL Magnetization [cc]')
  !
@@ -35,12 +44,11 @@ subroutine eval_Mtot(en,k,q)
  call PARALLEL_WF_distribute(CLEAN_UP=.TRUE.)
  call PARALLEL_WF_index( )
  !
- call WF_load(WF,0,1,(/1,en%nbm/),(/1,k%nibz/),space='R',title='-WF')
- !
+ call fft_setup(0,1,.true.)
  YAMBO_ALLOC(magn_tmp,(fft_size,3))
  call el_magnetization(en,k,magn_tmp)
  YAMBO_FREE(magn_tmp)
  !
- call WF_free(WF)
+ call msg('s','Total magnetization (x,y,z)',real(Spin_magn(:),SP),"[Bohr_magneton]")
  !
 end subroutine eval_Mtot
diff --git a/src/setup/eval_minus_G.F b/src/setup/eval_minus_G.F
index d1354a108f..1a4eaed025 100644
--- a/src/setup/eval_minus_G.F
+++ b/src/setup/eval_minus_G.F
@@ -5,7 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
 #include<dev_defs.h>
+#include<y_memory.h>
+!
 !
 subroutine eval_minus_G()
  !
@@ -16,8 +20,9 @@ subroutine eval_minus_G()
  use D_lattice,    ONLY:inv_index
  use R_lattice,    ONLY:ng_closed,g_rot,minus_G
  use R_lattice,    ONLY:DEV_VAR(minus_G)
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  ! Work Space
  !
diff --git a/src/setup/setup.F b/src/setup/setup.F
index 2c4594d527..9e132e71ae 100644
--- a/src/setup/setup.F
+++ b/src/setup/setup.F
@@ -5,21 +5,26 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
-subroutine setup(en,Xen,Ken,k,Xk)
+! headers
+!
+#include <y_memory.h>
+!#include <netcdf.inc>
+!
+subroutine setup(en,q,Xen,Ken,k,Xk)
  !
  use pars,           ONLY:SP,pi,schlen
  use units,          ONLY:Da2AU
  use drivers,        ONLY:l_setup
  use C_driver,       ONLY:code_bin
  use parser_m,       ONLY:parser
- use wave_func,      ONLY:wf_ng,wf_ncx,wf_igk
+ use wave_func,      ONLY:wf_ng,wf_ncx,wf_igk,wf_nc_k,E_of_WFs
  use electrons,      ONLY:levels,n_met_bands,n_full_bands,E_duplicate,E_reset
  use D_lattice,      ONLY:a,alat,nsym,i_time_rev,DL_vol,Tel,dl_sop,atom_mass,atom_pos,atoms_map,&
 &                         atoms_spatial_inversion,i_space_inv,input_Tel_is_negative,&
 &                         inv_index,atoms_string,load_PT_elements,n_atoms_species,Z_species,&
 &                         n_atomic_species,PT_elements,non_periodic_directions,lattice,&
 &                         symmetry_group_table,mag_syms,idt_index,symmetry_equiv_atoms
- use R_lattice,      ONLY:RL_vol,b,n_g_shells,ng_in_shell,Xk_grid_new_mapping,FineGd_desc,&
+ use R_lattice,      ONLY:RL_vol,b,n_g_shells,ng_in_shell,Xk_grid_new_mapping,FineGd_desc,g_rot,&
 &                         rl_sop,bz_samp,g_vec,E_of_shell,ng_closed,bz_samp_duplicate_Fg,MAX_kpts_to_report
  use com,            ONLY:msg,fat_log
  use stderr,         ONLY:intc
@@ -43,16 +48,16 @@ subroutine setup(en,Xen,Ken,k,Xk)
  ! For I/O debug, see below
  !use IO_m,           ONLY:netcdf_call
 #endif
+ use y_memory_alloc
  !
-#include <y_memory.h>
+ implicit none
  !
  ! For I/O debug, see below
 #if defined _HDF5_IO
-!#include <netcdf.inc>
 #endif
  !
  type(levels)    ::en,Xen,Ken
- type(bz_samp)   ::k,Xk
+ type(bz_samp)   ::q,k,Xk
  !
  ! Work Space
  !
@@ -242,14 +247,15 @@ subroutine setup(en,Xen,Ken,k,Xk)
  call G_shells_finder()
  !
  if ( wf_ng > maxval(wf_igk) ) then
-   call warning("wf_ng > maxval(wf_igk), probably because FFTGvecs in input. Reducing it")
+   call msg("r","wf_ng > maxval(wf_igk), likely because FFTGvecs was set in input.")
+   call msg("rn","Reducing wf_ng from "//trim(intc(wf_ng))//" to "//trim(intc(maxval(wf_igk))))
    wf_ng=maxval(wf_igk)
  endif
  !
  call msg('r','nG shells         ',n_g_shells)
  call msg('r','nG charge         ',ng_closed)
- call msg('r','nG WFs            ',wf_ng)
- call msg('r','nC WFs            ',wf_ncx)
+ call msg('r','nG WFs            ',(/wf_ng,wf_nc_k(1)/))
+ call msg('rn','nC WFs           ',wf_ncx)
  !
  ! Indexes of -G. minus_G_index(iG)| G_{minus_G_index(iG)}=-G. When there is no Spatial inversion
  ! the map is built in G_shells_finder
@@ -258,25 +264,34 @@ subroutine setup(en,Xen,Ken,k,Xk)
  !
  n_to_print=min(800,n_g_shells)
  if (.not.fat_log) n_to_print=n_to_print/10
+ if (n_to_print>n_g_shells/2) n_to_print=n_g_shells
  !
- call msg('r','G-vecs. in first '//trim(intc(n_to_print))//' shells',' [ Number ] ')
+ call msg('r','G-vecs. in first (and last) '//trim(intc(n_to_print))//' shells',' [ Number ] ')
  do i1=1,n_to_print,9
    call msg('r',' ',ng_in_shell(i1:min(i1+8,n_to_print)) )
  enddo
- if (n_to_print<n_g_shells) call msg('r',"...")
+ if (n_to_print<n_g_shells) then
+   call msg('r',"...")
+   do i1=n_g_shells-n_to_print,n_g_shells,9
+     call msg('r',' ',ng_in_shell(i1:min(i1+8,n_g_shells)) )
+   enddo
+ endif
+ !
+ call msg('nr','E charge         ',E_of_shell(n_g_shells),' [Ha]')
+ call msg('rn','E WFs            ',E_of_WFs              ,' [Ha]')
  !
- call msg('r','Shell energy in first '//trim(intc(n_to_print))//' shells',' [ mHa ] ')
+ call msg('r','Shell energy in first (and last) '//trim(intc(n_to_print))//' shells',' [ mHa ] ')
  do i1=1,n_to_print,9
    call msg('r',' ',E_of_shell(i1:min(i1+8,n_to_print))*1000._SP)
  enddo
- if (n_to_print<n_g_shells) call msg('r',"...")
- !
- ! First simplified implementation of wf_ng_1st_BZ & wf_ng_2st_BZ
- ! A more correct implementation would involve a rotation of the shells
- ! and the evaluation of the resulting wf_ng
+ if (n_to_print<n_g_shells) then
+   call msg('r',"...")
+   do i1=n_g_shells-n_to_print,n_g_shells,9
+     call msg('r',' ',E_of_shell(i1:min(i1+8,n_g_shells))*1000._SP)
+   enddo
+ endif
  !
- wf_ng_1st_BZ=wf_ng
- call PARSER_close_G(wf_ng_1st_BZ,'tRL')
+ wf_ng_1st_BZ=maxval(g_rot(:min(wf_ng,ng_closed),:))
  !
  call section('=','K-grid lattice')
  !=================================
@@ -401,6 +416,14 @@ subroutine setup(en,Xen,Ken,k,Xk)
  ! Once the occupations have been evaluated I dump Xen in Ken
  !
  call E_duplicate(Xen,Ken)
+ ! 
+ ! Test Spatial Inversion for WFs
+ ! Notice that the variable i_space_inv is already defined by
+ ! atoms_spatial_inversion()
+ call WF_spatial_inversion(en,k,q)
+ !
+ ! Magnetization
+ call eval_Mtot(en,k)
  !
 #if defined _SC
  K_kind=X_kind
diff --git a/src/tddft/TDDFT_ALDA_G_space.F b/src/tddft/TDDFT_ALDA_G_space.F
index 4293148a8f..1568ec02cf 100644
--- a/src/tddft/TDDFT_ALDA_G_space.F
+++ b/src/tddft/TDDFT_ALDA_G_space.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine TDDFT_ALDA_G_space(E,k)
  !
  use pars,          ONLY:DP,SP
@@ -17,8 +21,9 @@ subroutine TDDFT_ALDA_G_space(E,k)
  use xc_functionals,ONLY:V_xc,F_xc,magn,XC_potential_driver
  use global_XC,     ONLY:WF_xc_functional,WF_kind
  use interfaces,    ONLY:WF_load,WF_free,eval_G_minus_G
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels) ::E
  type(bz_samp)::k
diff --git a/src/tddft/TDDFT_ALDA_eh_space_G_collisions_L.F b/src/tddft/TDDFT_ALDA_eh_space_G_collisions_L.F
index 12b1300ca6..f7d50b954c 100644
--- a/src/tddft/TDDFT_ALDA_eh_space_G_collisions_L.F
+++ b/src/tddft/TDDFT_ALDA_eh_space_G_collisions_L.F
@@ -5,8 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): DS
 !
+! headers
+!
 #include<dev_defs.h>
 !
+!
 subroutine TDDFT_ALDA_eh_space_G_collisions_L(iq,Xk,i_T_grp,NG,l_bs_tddft_wf_in_loop,tddft_wf,mode)
  !
  ! Calculates the F_xc scattering 
@@ -16,7 +19,7 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_L(iq,Xk,i_T_grp,NG,l_bs_tddft_wf_in_
  !
  use pars,           ONLY:SP,DP,pi,cZERO
  use FFT_m,          ONLY:fft_size
- use wave_func,      ONLY:WF
+ use wave_func,      ONLY:rho_map_size,rho_map,WF
  use interfaces,     ONLY:WF_load,WF_free
  use R_lattice,      ONLY:qindx_X,bz_samp,minus_G
  use BS,             ONLY:l_BS_magnons,BS_T_grp,BS_bands
@@ -36,8 +39,8 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_L(iq,Xk,i_T_grp,NG,l_bs_tddft_wf_in_
  ! 
  ! Work Space
  !
- logical     :: l_load_WFs
- integer     :: i_spinor,j_spinor,ip_spinor,jp_spinor,ifft,irhotw,is_yambo,os_yambo,i_spinor_y,j_spinor_y
+ logical     :: l_use_rho_map,l_load_WFs
+ integer     :: i_spinor,j_spinor,ip_spinor,jp_spinor,ifft,irho,irhotw,is_yambo,os_yambo,i_spinor_y,j_spinor_y
  integer     :: i_T_el,i_T_el_p,N_T_el_p,i_c,i_v,i_sp_c,i_sp_v,i_k_bz,i_k,i_s,i_g0,i_g1,i_g2,i_p_bz,i_g_p,i_p,&
 &               i_sp,i_T_grp_p,is(4),os(4),qs(3),NK(2)
  !
@@ -55,6 +58,9 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_L(iq,Xk,i_T_grp,NG,l_bs_tddft_wf_in_
  WF_symm2_p=cZERO
  rhotwr_p=  cZERO
  !
+ ! Use pointers both for CUDA and to avoid continuous allocation and de-allocation
+ l_use_rho_map=allocated(rho_map)
+ !
 #if defined(__NOTNOW) && ! defined(_CUDA)
  !$omp parallel default(shared), private( K_EXCH_collision, &
  !$omp &        i_T_el,i_k_bz,i_k,i_s, i_p_bz,i_p,i_sp, i_v,i_c,i_sp_c,i_sp_v, &
@@ -85,19 +91,10 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_L(iq,Xk,i_T_grp,NG,l_bs_tddft_wf_in_
    i_sp_c=BS_T_grp(i_T_grp)%table(i_T_el,4)
    i_sp_v=BS_T_grp(i_T_grp)%table(i_T_el,5)
    !
-   is=(/0,i_k,i_s ,i_sp_c/)
-   os=(/0,i_p,i_sp,i_sp_v/)
+   is=(/i_c,i_k,i_s ,i_sp_c/)
+   os=(/i_v,i_p,i_sp,i_sp_v/)
    qs=(/minus_G(i_g_p),iq,1/)
    !
-   if (mode=="R".or.mode=="C".or.mode=="F") then
-     is(1)=i_c
-     os(1)=i_v
-   else if (mode=="A".or.mode=="Q") then
-     ! Same momenta and spin indexes, inverted bands
-     is(1)=i_v
-     os(1)=i_c
-   endif
-   !
    l_load_WFs= l_bs_tddft_wf_in_loop .and. (NK(1)/=min(i_k,i_p).or.NK(2)/=max(i_k,i_p))
    if (l_load_WFs) then
 #if defined(__NOTNOW) && ! defined(_CUDA)
@@ -119,7 +116,8 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_L(iq,Xk,i_T_grp,NG,l_bs_tddft_wf_in_
    call WF_apply_symm(os,WF_symm2_p)
    !
    if( n_spinor==1 ) then
-     rhotwr_p(:)=cmplx(WF_symm1_p(:,1)*conjg(WF_symm2_p(:,1)),kind=DP)
+     if(     l_use_rho_map) rhotwr_p(rho_map(:))=cmplx(WF_symm1_p(:,1)*conjg(WF_symm2_p(:,1)),kind=DP)
+     if(.not.l_use_rho_map) rhotwr_p(        : )=cmplx(WF_symm1_p(:,1)*conjg(WF_symm2_p(:,1)),kind=DP)
      call perform_fft_3d(qs,rhotwr_p,BS_T_grp(i_T_grp)%O_tddft_L(:,i_T_el,1,1))
    endif
    !
@@ -128,8 +126,10 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_L(iq,Xk,i_T_grp,NG,l_bs_tddft_wf_in_
      do i_spinor=1,n_spinor
        do j_spinor=1,n_spinor
          !
-	 do ifft=1,fft_size
-	   rhotwr_p(ifft)=cmplx(WF_symm1_p(ifft,i_spinor)*conjg(WF_symm2_p(ifft,j_spinor)),kind=DP)
+         do irho=1,rho_map_size
+           if(     l_use_rho_map) ifft=rho_map(irho)
+           if(.not.l_use_rho_map) ifft=        irho
+           rhotwr_p(ifft)=cmplx(WF_symm1_p(irho,i_spinor)*conjg(WF_symm2_p(irho,j_spinor)),kind=DP)
          enddo
          call perform_fft_3d(qs,rhotwr_p,BS_T_grp(i_T_grp)%O_tddft_L(:,i_T_el,i_spinor,j_spinor))
          !
diff --git a/src/tddft/TDDFT_ALDA_eh_space_G_collisions_R.F b/src/tddft/TDDFT_ALDA_eh_space_G_collisions_R.F
index 0c815df0e5..38a93ecc85 100644
--- a/src/tddft/TDDFT_ALDA_eh_space_G_collisions_R.F
+++ b/src/tddft/TDDFT_ALDA_eh_space_G_collisions_R.F
@@ -5,8 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): DS
 !
+! headers
+!
 #include<dev_defs.h>
 !
+!
 subroutine TDDFT_ALDA_eh_space_G_collisions_R(iq,Xk,i_T_grp,NG,l_bs_tddft_wf_in_loop,tddft_wf,mode)
  !
  ! Calculates the F_xc scattering 
@@ -16,7 +19,7 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_R(iq,Xk,i_T_grp,NG,l_bs_tddft_wf_in_
  !
  use pars,           ONLY:SP,DP,pi,cZERO
  use FFT_m,          ONLY:fft_size
- use wave_func,      ONLY:WF
+ use wave_func,      ONLY:rho_map_size,rho_map,WF
  use xc_functionals, ONLY:F_xc_mat
  use interfaces,     ONLY:WF_load,WF_free
  use R_lattice,      ONLY:qindx_X,bz_samp,minus_G
@@ -37,8 +40,8 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_R(iq,Xk,i_T_grp,NG,l_bs_tddft_wf_in_
  ! 
  ! Work Space
  !
- logical     :: l_load_WFs
- integer     :: i_spinor,j_spinor,ip_spinor,jp_spinor,ifft,irhotw,is_yambo,os_yambo,i_spinor_y,j_spinor_y,&
+ logical     :: l_use_rho_map,l_load_WFs
+ integer     :: i_spinor,j_spinor,ip_spinor,jp_spinor,ifft,irho,irhotw,is_yambo,os_yambo,i_spinor_y,j_spinor_y,&
 &               i_sp_loop,o_sp_loop
  integer     :: i_T_el,i_T_el_p,N_T_el_p,i_c,i_v,i_sp_c,i_sp_v,i_k_bz,i_k,i_s,i_g0,i_g1,i_g2,i_p_bz,i_g_p,i_p,&
 &               i_sp,i_T_grp_p,is(4),os(4),qs(3),NK(2)
@@ -64,6 +67,7 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_R(iq,Xk,i_T_grp,NG,l_bs_tddft_wf_in_
  rhotwr_p=  cZERO
  !
  ! Use pointers both for CUDA and to avoid continuous allocation and de-allocation
+ l_use_rho_map=allocated(rho_map)
  !
 #if defined(__NOTNOW) && ! defined(_CUDA)
  !$omp parallel default(shared), private( K_EXCH_collision, &
@@ -99,15 +103,6 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_R(iq,Xk,i_T_grp,NG,l_bs_tddft_wf_in_
    os=(/i_v,i_p,i_sp,i_sp_v/)
    qs=(/minus_G(i_g_p),iq,1/)
    !
-   if (mode=="R".or.mode=="Q".or.mode=="F") then
-     is(1)=i_c
-     os(1)=i_v
-   else if (mode=="A".or.mode=="C") then
-     ! Same momenta and spin indexes, inverted bands
-     is(1)=i_v
-     os(1)=i_c
-   endif
-   !
    l_load_WFs= l_bs_tddft_wf_in_loop .and. (NK(1)/=min(i_k,i_p).or.NK(2)/=max(i_k,i_p))
    if (l_load_WFs) then
 #if defined(__NOTNOW) && ! defined(_CUDA)
@@ -130,7 +125,10 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_R(iq,Xk,i_T_grp,NG,l_bs_tddft_wf_in_
    !
    if( n_spin==1 ) then
      !
-     rhotwr_p(:)=cmplx(F_xc_mat(:,1,1,1,1)*WF_symm1_p(:,1)*conjg(WF_symm2_p(:,1)),kind=DP)
+     if (     l_use_rho_map) rhotwr_p(rho_map(:))=cmplx(F_xc_mat(rho_map(:),1,1,1,1)*&
+     &                                             WF_symm1_p(:,1)*conjg(WF_symm2_p(:,1)),kind=DP)
+     if (.not.l_use_rho_map) rhotwr_p(        : )=cmplx(F_xc_mat(        : ,1,1,1,1)*&
+     &                                             WF_symm1_p(:,1)*conjg(WF_symm2_p(:,1)),kind=DP)
      call perform_fft_3d(qs,rhotwr_p,BS_T_grp(i_T_grp)%O_tddft_R(:,i_T_el,1,1))
      !
    endif
@@ -147,7 +145,9 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_R(iq,Xk,i_T_grp,NG,l_bs_tddft_wf_in_
          is_yambo=o_sp_loop
          os_yambo=i_sp_loop
          ! To check/fix the spinorial version 
-         rhotwr_p(:)=cmplx(F_xc_mat(:,is_yambo,os_yambo,is(4),os(4))* &
+         if (     l_use_rho_map) rhotwr_p(rho_map(:))=cmplx(F_xc_mat(rho_map(:),is_yambo,os_yambo,is(4),os(4))* &
+         &                                             WF_symm1_p(:,1)*conjg(WF_symm2_p(:,1)),kind=DP)
+         if (.not.l_use_rho_map) rhotwr_p(        : )=cmplx(F_xc_mat(        : ,is_yambo,os_yambo,is(4),os(4))* &
          &                                             WF_symm1_p(:,1)*conjg(WF_symm2_p(:,1)),kind=DP)
          call perform_fft_3d(qs,rhotwr_p,BS_T_grp(i_T_grp)%O_tddft_R(:,i_T_el,i_sp_loop,o_sp_loop))
          !
@@ -164,10 +164,12 @@ subroutine TDDFT_ALDA_eh_space_G_collisions_R(iq,Xk,i_T_grp,NG,l_bs_tddft_wf_in_
          rhotwr_p=cZERO
          do ip_spinor=1,n_spinor
            do jp_spinor=1,n_spinor
-             do ifft=1,fft_size
+             do irho=1,rho_map_size
+               if(     l_use_rho_map) ifft=rho_map(irho)
+               if(.not.l_use_rho_map) ifft=        irho
                rhotwr_p(ifft)=rhotwr_p(ifft)+                        &
                & cmplx(F_xc_mat(ifft,i_spinor_y,j_spinor_y,ip_spinor,jp_spinor)* &
-	       & WF_symm1_p(ifft,ip_spinor)*conjg(WF_symm2_p(ifft,jp_spinor)),kind=DP)
+               & WF_symm1_p(irho,ip_spinor)*conjg(WF_symm2_p(irho,jp_spinor)),kind=DP)
              enddo
            enddo
          enddo
diff --git a/src/tddft/TDDFT_ALDA_eh_space_G_kernel.F b/src/tddft/TDDFT_ALDA_eh_space_G_kernel.F
index 94e0378420..c350401706 100644
--- a/src/tddft/TDDFT_ALDA_eh_space_G_kernel.F
+++ b/src/tddft/TDDFT_ALDA_eh_space_G_kernel.F
@@ -7,11 +7,7 @@
 !
 function TDDFT_ALDA_eh_space_G_kernel(BS_n_g_fxc, BS_T_grp_ip, i_Tp, BS_T_grp_ik, i_Tk) result(H_x)
  !
- ! Calculates the F_xc scattering 
- !
- !  (ic(1),ik(1),is(1)) --<--:...:--<-- (ic(2),ik(2),is(2))
- !                              :Fxc:
- !  (iv(1),ik(3),is(3)) -->--:...:-->-- (iv(2),ik(4),is(4))
+ ! It calculates the F_xc matrix elements from the product O_L(k)*O_R(p)
  !
  use pars,           ONLY:SP,pi,cZERO
  use FFT_m,          ONLY:fft_size
@@ -52,3 +48,57 @@ function TDDFT_ALDA_eh_space_G_kernel(BS_n_g_fxc, BS_T_grp_ip, i_Tp, BS_T_grp_ik
  H_x=H_x/4._SP/pi
  !
 end function TDDFT_ALDA_eh_space_G_kernel
+!
+!
+!
+!
+function TDDFT_ALDA_eh_space_G_kernel_coupling(BS_n_g_fxc, BS_T_grp_ip, i_Tp, BS_T_grp_ik, i_Tk) result(H_x)
+ !
+ ! This is used only at q=0
+ !
+ ! It calculates the F_xc matrix elements from the product O_L(k)*Coupling(O_R(p))
+ !
+ ! The Coupling(O) operation uses the relation O_{cvk}(G)=O*_{vck}(-G)
+ !
+ use pars,           ONLY:SP,pi,cZERO
+ use FFT_m,          ONLY:fft_size
+ use R_lattice,      ONLY:minus_G
+ use BS,             ONLY:BS_T_group
+ use wrapper_omp,    ONLY:V_dot_V_omp
+ use electrons,      ONLY:n_spinor,spin_occ
+ !
+ implicit none
+ !
+ integer,                  intent(in) :: BS_n_g_fxc,i_Tp,i_Tk
+ type(BS_T_group), target, intent(in) :: BS_T_grp_ip,BS_T_grp_ik
+ !
+ complex(SP) :: H_x
+ !
+ integer     :: i_sp_xc,o_sp_xc,i_sp_c,i_sp_v
+ !
+ i_sp_c=BS_T_grp_ik%table(i_Tk,4)
+ i_sp_v=BS_T_grp_ik%table(i_Tk,5)
+ !
+ if (n_spinor==1) then
+   H_x=V_dot_V_omp(BS_n_g_fxc,BS_T_grp_ik%O_tddft_L(minus_G(:BS_n_g_fxc),i_Tk,  1   ,  1   ),&
+   &                              BS_T_grp_ip%O_tddft_R(:,i_Tp,i_sp_c,i_sp_v) )
+ else if (n_spinor==2) then
+   H_x=cZERO
+   do i_sp_xc=1,n_spinor
+     do o_sp_xc=1,n_spinor
+       H_x=H_x+V_dot_V_omp(BS_n_g_fxc,BS_T_grp_ik%O_tddft_L(minus_G(:BS_n_g_fxc),i_Tk,i_sp_xc,o_sp_xc),&
+       &                                  BS_T_grp_ip%O_tddft_R(:,i_Tp,i_sp_xc,o_sp_xc) )
+     enddo
+   enddo
+ endif
+ !
+ ! tddft_alda_r_space should be mutiplied by X, it is mutiplied by Co in K
+ ! X  =      spin_occ/DL_vol/Nq 
+ ! Co = 4*pi*spin_occ/DL_vol/Nq
+ ! --> X/Co = 1/4/pi
+ !
+ ! Compared to the implementation of the exchange, there is a conjg operation.
+ ! This is due to the use of ik "on the left", and ip "on the right", which gives a conjg
+ H_x=conjg(H_x)/4._SP/pi
+ !
+end function TDDFT_ALDA_eh_space_G_kernel_coupling
diff --git a/src/tddft/TDDFT_ALDA_eh_space_R_kernel.F b/src/tddft/TDDFT_ALDA_eh_space_R_kernel.F
index b4dd717333..993b945749 100644
--- a/src/tddft/TDDFT_ALDA_eh_space_R_kernel.F
+++ b/src/tddft/TDDFT_ALDA_eh_space_R_kernel.F
@@ -23,6 +23,7 @@ function TDDFT_ALDA_eh_space_R_kernel(is,os,isp,osp,tddft_wf,mode) result(H_x)
  !
  use pars,           ONLY:SP,pi,cZERO
  use FFT_m,          ONLY:fft_size
+ use wave_func,      ONLY:rho_map_size,rho_map
  use xc_functionals, ONLY:F_xc_mat
  use BS,             ONLY:l_BS_magnons
  use wrapper_omp,    ONLY:V_dot_V_omp
@@ -41,7 +42,8 @@ function TDDFT_ALDA_eh_space_R_kernel(is,os,isp,osp,tddft_wf,mode) result(H_x)
  ! 
  ! Work Space
  !
- integer     :: i_spinor,j_spinor,ip_spinor,jp_spinor,ifft,irhotw,&
+ logical     :: l_use_rho_map
+ integer     :: i_spinor,j_spinor,ip_spinor,jp_spinor,ifft,irho,irhotw,&
  &              is_yambo,os_yambo,i_spinor_y,j_spinor_y,isp_loc(4),osp_loc(4)
  !
  complex(SP), pointer :: rhotwr1_p(:)
@@ -68,6 +70,7 @@ function TDDFT_ALDA_eh_space_R_kernel(is,os,isp,osp,tddft_wf,mode) result(H_x)
  rhotwr2_p =cZERO
  !
  ! Use pointers both for CUDA and to avoid continuous allocation and de-allocation
+ l_use_rho_map=allocated(rho_map)
  !
  !
  call WF_apply_symm(is,WF_symm1_p)
@@ -78,9 +81,9 @@ function TDDFT_ALDA_eh_space_R_kernel(is,os,isp,osp,tddft_wf,mode) result(H_x)
  if(n_spinor==2) then
    do i_spinor=1,n_spinor
      do j_spinor=1,n_spinor
-       do ifft=1,fft_size
-         irhotw=ifft+(i_spinor-1)*fft_size+(j_spinor-1)*n_spinor*fft_size
-         rhotwr1_p(irhotw)=conjg(WF_symm1_p(ifft,i_spinor))*WF_symm2_p(ifft,j_spinor)
+       do irho=1,rho_map_size
+         irhotw=irho+(i_spinor-1)*rho_map_size+(j_spinor-1)*n_spinor*rho_map_size
+         rhotwr1_p(irhotw)=conjg(WF_symm1_p(irho,i_spinor))*WF_symm2_p(irho,j_spinor)
        enddo
      enddo
    enddo
@@ -99,7 +102,8 @@ function TDDFT_ALDA_eh_space_R_kernel(is,os,isp,osp,tddft_wf,mode) result(H_x)
  call WF_apply_symm(osp_loc,WF_symm2_p)
  !
  if( n_spin==1 ) then
-   rhotwr2_p(:)=F_xc_mat(:,1,1,1,1)*WF_symm1_p(:,1)*conjg(WF_symm2_p(:,1))
+   if (     l_use_rho_map) rhotwr2_p(:)=F_xc_mat(rho_map(:),1,1,1,1)*WF_symm1_p(:,1)*conjg(WF_symm2_p(:,1))
+   if (.not.l_use_rho_map) rhotwr2_p(:)=F_xc_mat(:,1,1,1,1)*WF_symm1_p(:,1)*conjg(WF_symm2_p(:,1))
  endif
  !
  if(n_sp_pol==2) then
@@ -111,7 +115,8 @@ function TDDFT_ALDA_eh_space_R_kernel(is,os,isp,osp,tddft_wf,mode) result(H_x)
    is_yambo=os(4)
    os_yambo=is(4)
    ! To check/fix the spinorial version 
-   rhotwr2_p(:)=F_xc_mat(:,is_yambo,os_yambo,isp(4),osp(4))*WF_symm1_p(:,1)*conjg(WF_symm2_p(:,1))
+   if (     l_use_rho_map) rhotwr2_p(:)=F_xc_mat(rho_map(:),is_yambo,os_yambo,isp(4),osp(4))*WF_symm1_p(:,1)*conjg(WF_symm2_p(:,1))
+   if (.not.l_use_rho_map) rhotwr2_p(:)=F_xc_mat(:,is_yambo,os_yambo,isp(4),osp(4))*WF_symm1_p(:,1)*conjg(WF_symm2_p(:,1))
  endif
  !
  if(n_spinor==2) then
@@ -122,12 +127,22 @@ function TDDFT_ALDA_eh_space_R_kernel(is,os,isp,osp,tddft_wf,mode) result(H_x)
        j_spinor_y=i_spinor
        do ip_spinor=1,n_spinor
          do jp_spinor=1,n_spinor
-           do ifft=1,fft_size
-             irhotw=ifft+(i_spinor-1)*fft_size+(j_spinor-1)*n_spinor*fft_size
-             rhotwr2_p(irhotw)=rhotwr2_p(irhotw)+                        &
-             & F_xc_mat(ifft,i_spinor_y,j_spinor_y,ip_spinor,jp_spinor)* &
-             & WF_symm1_p(ifft,ip_spinor)*conjg(WF_symm2_p(ifft,jp_spinor))
-           enddo
+           if(     l_use_rho_map) then
+             do irho=1,rho_map_size
+               ifft=rho_map(irho)
+               irhotw=irho+(i_spinor-1)*rho_map_size+(j_spinor-1)*n_spinor*rho_map_size
+               rhotwr2_p(irhotw)=rhotwr2_p(irhotw)+                        &
+               & F_xc_mat(ifft,i_spinor_y,j_spinor_y,ip_spinor,jp_spinor)* &
+               & WF_symm1_p(irho,ip_spinor)*conjg(WF_symm2_p(irho,jp_spinor))
+             enddo
+           else
+             do ifft=1,rho_map_size
+               irhotw=ifft+(i_spinor-1)*rho_map_size+(j_spinor-1)*n_spinor*rho_map_size
+               rhotwr2_p(irhotw)=rhotwr2_p(irhotw)+                        &
+               & F_xc_mat(ifft,i_spinor_y,j_spinor_y,ip_spinor,jp_spinor)* &
+               & WF_symm1_p(irho,ip_spinor)*conjg(WF_symm2_p(irho,jp_spinor))
+             enddo
+           endif
          enddo
        enddo
      enddo
@@ -136,7 +151,7 @@ function TDDFT_ALDA_eh_space_R_kernel(is,os,isp,osp,tddft_wf,mode) result(H_x)
  !
  ! SUM
  !=====
- H_x=V_dot_V_omp(fft_size*n_spinor*n_spinor,rhotwr1_p,rhotwr2_p)
+ H_x=V_dot_V_omp(rho_map_size*n_spinor*n_spinor,rhotwr1_p,rhotwr2_p)
  !
  ! tddft_alda_r_space should be mutiplied by X, it is mutiplied by Co in K
  ! X  = fft_size*spin_occ/DL_vol/Nq 
diff --git a/src/tddft/TDDFT_BSK_2_FXC.F b/src/tddft/TDDFT_BSK_2_FXC.F
index 62abc2fc44..9ad1d7c652 100644
--- a/src/tddft/TDDFT_BSK_2_FXC.F
+++ b/src/tddft/TDDFT_BSK_2_FXC.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine TDDFT_BSK_2_FXC(iq,ik1,ik2,O1x,O2x,O_x_dim,W,X_static)
  !
  use pars,          ONLY:SP
@@ -22,8 +26,9 @@ subroutine TDDFT_BSK_2_FXC(iq,ik1,ik2,O1x,O2x,O_x_dim,W,X_static)
  use IO_m,          ONLY:OP_RD_CL,OP_RD,RD_CL,&
 &                        NONE,OP_WR_CL,REP,VERIFY
  use frequency,     ONLY:w_samp
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(w_samp) ::W
  integer      ::ik1,ik2,O_x_dim,iq
@@ -105,14 +110,22 @@ subroutine TDDFT_BSK_2_FXC(iq,ik1,ik2,O1x,O2x,O_x_dim,W,X_static)
  YAMBO_ALLOC(O2x_qpg,(BS_nT_at_k(ik2),FXC_n_g_corr))
  YAMBO_ALLOC(blk_Eo,(2,O_x_dim))
  !
- forall(i1=1:FXC_n_g_corr) conjg_O1x_qpg(i1,:BS_nT_at_k(ik1))=conjg(O1x(i1,:BS_nT_at_k(ik1)))*bare_qpg(iq,i1)
- forall(i2=1:FXC_n_g_corr) O2x_qpg(:BS_nT_at_k(ik2),i2)=O2x(i2,:BS_nT_at_k(ik2))*bare_qpg(iq,i2)
+ do i1=1,FXC_n_g_corr 
+   conjg_O1x_qpg(i1,:BS_nT_at_k(ik1))=conjg(O1x(i1,:BS_nT_at_k(ik1)))*bare_qpg(iq,i1)
+ enddo
+ do i2=1,FXC_n_g_corr 
+   O2x_qpg(:BS_nT_at_k(ik2),i2)=O2x(i2,:BS_nT_at_k(ik2))*bare_qpg(iq,i2)
+ enddo
  !
  ! Kernel diagonal and simmetrization
  !
  if (ik1==ik2) then
-   forall(i1=1:BS_nT_at_k(ik1)) FXC_K_diagonal(blk_pos(1)+i1)=real(BS_mat(i1,i1))
-   forall(i1=1:BS_nT_at_k(ik1)) BS_mat(i1,i1)=(0.,0.)
+   do i1=1,BS_nT_at_k(ik1) 
+     FXC_K_diagonal(blk_pos(1)+i1)=real(BS_mat(i1,i1))
+   enddo
+   do i1=1,BS_nT_at_k(ik1) 
+     BS_mat(i1,i1)=(0._SP,0._SP)
+   enddo
    do i1=1,BS_nT_at_k(ik1)
      do i2=i1+1,BS_nT_at_k(ik1)
        BS_mat(i2,i1)=conjg(BS_mat(i1,i2))
@@ -122,20 +135,31 @@ subroutine TDDFT_BSK_2_FXC(iq,ik1,ik2,O1x,O2x,O_x_dim,W,X_static)
  !
  ! BS diagonal + Non interacting energies and widths
  !
- forall(i1=1:BS_nT_at_k(ik1)) blk_Eo(1,i1)=BSS_eh_E(blk_pos(1)+i1)+&
+ do i1=1,BS_nT_at_k(ik1) 
+   blk_Eo(1,i1)=BSS_eh_E(blk_pos(1)+i1)+&
 &                                          FXC_K_diagonal(blk_pos(1)+i1)
- forall(i2=1:BS_nT_at_k(ik2)) blk_Eo(2,i2)=BSS_eh_E(blk_pos(2)+i2)+&
+ enddo
+ do i2=1,BS_nT_at_k(ik2)
+   blk_Eo(2,i2)=BSS_eh_E(blk_pos(2)+i2)+&
 &                                          FXC_K_diagonal(blk_pos(2)+i2)
+ enddo
  !
  if (allocated(BSS_eh_W)) then
-   forall(i1=1:BS_nT_at_k(ik1)) blk_Eo(1,i1)=blk_Eo(1,i1)+(0.,1.)*BSS_eh_W(blk_pos(1)+i1)
-   forall(i2=1:BS_nT_at_k(ik2)) blk_Eo(1,i2)=blk_Eo(1,i2)+(0.,1.)*BSS_eh_W(blk_pos(2)+i2)
+   do i1=1,BS_nT_at_k(ik1) 
+     blk_Eo(1,i1)=blk_Eo(1,i1)+(0.,1.)*BSS_eh_W(blk_pos(1)+i1)
+   enddo
+   do i2=1,BS_nT_at_k(ik2) 
+     blk_Eo(1,i2)=blk_Eo(1,i2)+(0.,1.)*BSS_eh_W(blk_pos(2)+i2)
+   enddo
  endif
  !
  ! Projectors
  !
- forall(i1=1:BS_nT_at_k(ik1),i2=1:BS_nT_at_k(ik2)) &
-&      BS_mat(i1,i2)=BS_mat(i1,i2)*Fb2x1(blk_Eo(1,i1),blk_Eo(2,i2),E_same_treshold)
+ do i1=1,BS_nT_at_k(ik1)
+   do i2=1,BS_nT_at_k(ik2) 
+     BS_mat(i1,i2)=BS_mat(i1,i2)*Fb2x1(blk_Eo(1,i1),blk_Eo(2,i2),E_same_treshold)
+   enddo
+ enddo
  !
  ! Degenerations:
  !
@@ -216,13 +240,21 @@ subroutine TDDFT_BSK_2_FXC(iq,ik1,ik2,O1x,O2x,O_x_dim,W,X_static)
    ! Upper diagonal block contribution
    !
    if (FXC_is_retarded) then 
-     forall(i1=1:BS_nT_at_k(ik1)) wk1(i1,:)=right_R(i1,:)*(&
+     do i1=1,BS_nT_at_k(ik1) 
+       wk1(i1,:)=right_R(i1,:)*(&
 &          1._SP/(W%p(iw)-blk_Eo(1,i1))+1._SP/(-W%p(iw)-blk_Eo(1,i1)) )
-     forall(i2=1:BS_nT_at_k(ik2)) wk2(:,i2)= left_R(:,i2)*(&
+     enddo
+     do i2=1,BS_nT_at_k(ik2) 
+       wk2(:,i2)= left_R(:,i2)*(&
 &          1._SP/(W%p(iw)-blk_Eo(2,i2))+1._SP/(-W%p(iw)-blk_Eo(2,i2)) )
+     enddo
    else
-     forall(i1=1:BS_nT_at_k(ik1)) wk1(i1,:)=right_R(i1,:)/(W%p(iw)-blk_Eo(1,i1))
-     forall(i2=1:BS_nT_at_k(ik2)) wk2(:,i2)= left_R(:,i2)/(W%p(iw)-blk_Eo(2,i2))
+     do i1=1,BS_nT_at_k(ik1) 
+       wk1(i1,:)=right_R(i1,:)/(W%p(iw)-blk_Eo(1,i1))
+     enddo
+     do i2=1,BS_nT_at_k(ik2) 
+       wk2(:,i2)= left_R(:,i2)/(W%p(iw)-blk_Eo(2,i2))
+     enddo
    endif
    !
    F_xc_gspace(:,:,iw)=F_xc_gspace(:,:,iw)+Co*(matmul(conjg_O1x_qpg,wk1)+matmul(wk2,O2x_qpg))
@@ -231,18 +263,30 @@ subroutine TDDFT_BSK_2_FXC(iq,ik1,ik2,O1x,O2x,O_x_dim,W,X_static)
    !
    if (ik1/=ik2) then
      if (FXC_is_retarded) then 
-       forall(i1=1:BS_nT_at_k(ik1)) wk1(i1,:)=right_R(i1,:)*(&
+       do i1=1,BS_nT_at_k(ik1) 
+         wk1(i1,:)=right_R(i1,:)*(&
 &          1._SP/(-conjg(W%p(iw))-blk_Eo(1,i1))+1._SP/(conjg(W%p(iw))-blk_Eo(1,i1)) )
-       forall(i2=1:BS_nT_at_k(ik2)) wk2(:,i2)= left_R(:,i2)*(&
+       enddo
+       do i2=1,BS_nT_at_k(ik2) 
+         wk2(:,i2)= left_R(:,i2)*(&
 &          1._SP/(-conjg(W%p(iw))-blk_Eo(2,i2))+1._SP/(conjg(W%p(iw))-blk_Eo(2,i2)) )
+       enddo
      else
-       forall(i1=1:BS_nT_at_k(ik1)) wk1(i1,:)=right_R(i1,:)/(conjg(W%p(iw))-blk_Eo(1,i1))
-       forall(i2=1:BS_nT_at_k(ik2)) wk2(:,i2)= left_R(:,i2)/(conjg(W%p(iw))-blk_Eo(2,i2))
+       do i1=1,BS_nT_at_k(ik1) 
+         wk1(i1,:)=right_R(i1,:)/(conjg(W%p(iw))-blk_Eo(1,i1))
+       enddo
+       do i2=1,BS_nT_at_k(ik2) 
+         wk2(:,i2)= left_R(:,i2)/(conjg(W%p(iw))-blk_Eo(2,i2))
+       enddo
      endif
      !
      FXC_temp=Co*(matmul(conjg_O1x_qpg,wk1)+matmul(wk2,O2x_qpg))
      !
-     forall(i1=1:FXC_n_g_corr,i2=1:FXC_n_g_corr) F_xc_gspace(i1,i2,iw)=F_xc_gspace(i1,i2,iw)+conjg(FXC_temp(i2,i1))
+     do i1=1,FXC_n_g_corr
+       do i2=1,FXC_n_g_corr 
+         F_xc_gspace(i1,i2,iw)=F_xc_gspace(i1,i2,iw)+conjg(FXC_temp(i2,i1))
+       enddo
+     enddo
    endif
    !
    call PP_redux_wait(F_xc_gspace(:,:,iw))
diff --git a/src/tddft/TDDFT_BSK_disk_2_FXC.F b/src/tddft/TDDFT_BSK_disk_2_FXC.F
index 86b9146506..ee745d4456 100644
--- a/src/tddft/TDDFT_BSK_disk_2_FXC.F
+++ b/src/tddft/TDDFT_BSK_disk_2_FXC.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine TDDFT_BSK_disk_2_FXC(iq,Xk,W,X_static)
  !
  use pars,          ONLY:SP,pi
@@ -24,7 +28,9 @@ subroutine TDDFT_BSK_disk_2_FXC(iq,Xk,W,X_static)
  use IO_int,        ONLY:io_control
  use IO_m,          ONLY:OP_RD,RD,NONE,RD_CL,VERIFY,OP_RD_CL,REP
  use TDDFT,         ONLY:F_xc_gspace,FXC_n_g_corr,FXC_K_diagonal,io_BS_Fxc
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  type(bz_samp) ::Xk
  type(w_samp)  ::W
  integer       ::iq
@@ -100,7 +106,7 @@ subroutine TDDFT_BSK_disk_2_FXC(iq,Xk,W,X_static)
      !
      ACTION=RD
      if (ik1==1.and.ik2==nXkibz) ACTION=RD_CL
-     call io_control(ACTION=ACTION,COM=NONE,SEC=(/UP_matrix_index(ik1,ik2)/),ID=ID)
+     call io_control(ACTION=ACTION,COM=NONE,SEC=(/UP_matrix_index(ik1,ik2)+1/),ID=ID)
      io_BS_err=io_BS(iq,X_static,ID)
      !
      if (BS_res_K_exchange) then
diff --git a/src/tddft/TDDFT_PF_coefficient.F b/src/tddft/TDDFT_PF_coefficient.F
index 8c10f6290c..52d7b8a6f8 100644
--- a/src/tddft/TDDFT_PF_coefficient.F
+++ b/src/tddft/TDDFT_PF_coefficient.F
@@ -197,7 +197,7 @@ real function Alpha_LOC(density)
      !
      density_cmplx(:,1)=density(:)
      !
-     call eval_Gradient(density_cmplx,d_density,1,"density")
+     call eval_Gradient_rho_pot(density_cmplx,d_density)
      !
      alpha_ = 0._SP
      do ir = 1, fft_size
diff --git a/src/tddft/TDDFT_do_X_W_typs.F b/src/tddft/TDDFT_do_X_W_typs.F
index 28526fecde..35e1075265 100644
--- a/src/tddft/TDDFT_do_X_W_typs.F
+++ b/src/tddft/TDDFT_do_X_W_typs.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine TDDFT_do_X_W_typs(iq,X,wv)
  !
  ! Prepare X and wv types for a TDDFT calculation
@@ -20,8 +24,9 @@ subroutine TDDFT_do_X_W_typs(iq,X,wv)
 &                        FXC_LRC_alpha,FXC_LRC_beta,io_BS_Fxc
  use IO_int,        ONLY:io_control
  use IO_m,          ONLY:OP_RD_CL,VERIFY,REP
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(X_t)   :: X
  type(w_samp):: wv
diff --git a/src/tools/c_printing.c b/src/tools/c_printing.c
index f620d55644..7f62e72501 100644
--- a/src/tools/c_printing.c
+++ b/src/tools/c_printing.c
@@ -1,42 +1,65 @@
 /*
   License-Identifier: GPL
- 
+
   Copyright (C) 2006 The Yambo Team
- 
+
   Authors (see AUTHORS file for details): AM
 */
 
-#include <string.h>
 #include <stdio.h>
+#include <string.h>
 #include <sys/ioctl.h>
-#include <wrapper.h>
 #include <unistd.h>
+#include <wrapper.h>
 
 int guess_winsize()
 {
- int width;
- struct winsize ws;
- if (!isatty(2)) {width=-1;return width;}
- if( ioctl(STDERR_FILENO, TIOCGWINSZ, &ws) == -1 || ws.ws_col == 0 )
-  width = 79;
- else
-  width = ws.ws_col - 1;
- return width;
+    int width;
+    struct winsize ws;
+    if (!isatty(2))
+    {
+        width = -1;
+        return width;
+    }
+    if (ioctl(STDERR_FILENO, TIOCGWINSZ, &ws) == -1 || ws.ws_col == 0)
+    {
+        width = 79;
+    }
+    else
+    {
+        width = ws.ws_col - 1;
+    }
+    return width;
 };
 int C_FUNC(win_size, WIN_SIZE)(int *win_width)
 {
- *win_width = 0;
- *win_width = guess_winsize();
- return 0;
+    *win_width = 0;
+    *win_width = guess_winsize();
+    return 0;
 };
-int C_FUNC(c_fprintf, C_FPRINTF)(char *lfmt, char *msg,char *rfmt, char *sfmt)
+int C_FUNC(c_fprintf, C_FPRINTF)(char *lfmt, char *msg, char *rfmt, char *sfmt)
 {
- if (strcmp(lfmt,"r")==0) fprintf(stderr,"\r");
- if (strcmp(lfmt,"n")==0) fprintf(stderr,"\n");
- if (strcmp(lfmt,"nn")==0) fprintf(stderr,"\n\n");
- fprintf(stderr,sfmt,msg);
- if (strcmp(rfmt,"n")==0) fprintf(stderr,"\n");
- if (strcmp(rfmt,"nn")==0) fprintf(stderr,"\n\n");
- fflush(stderr);
- return 0;
+    if (strcmp(lfmt, "r") == 0)
+    {
+        fprintf(stderr, "\r");
+    }
+    if (strcmp(lfmt, "n") == 0)
+    {
+        fprintf(stderr, "\n");
+    }
+    if (strcmp(lfmt, "nn") == 0)
+    {
+        fprintf(stderr, "\n\n");
+    }
+    fprintf(stderr, sfmt, msg);
+    if (strcmp(rfmt, "n") == 0)
+    {
+        fprintf(stderr, "\n");
+    }
+    if (strcmp(rfmt, "nn") == 0)
+    {
+        fprintf(stderr, "\n\n");
+    }
+    fflush(stderr);
+    return 0;
 };
diff --git a/src/tools/ct_cclock.c b/src/tools/ct_cclock.c
index 1cd1d5176b..d1c8a83c2b 100644
--- a/src/tools/ct_cclock.c
+++ b/src/tools/ct_cclock.c
@@ -11,14 +11,14 @@
 #include <sys/time.h>
 
 #if defined _irix || defined _ultrix || defined _dec_alpha
-void cclock_ (cpu)
+void cclock_(cpu)
 #elif defined _T3E || defined _T3Efhi
-void CCLOCK (cpu)         /* Apparently MUST be uppercase for Cray */
+void CCLOCK(cpu) /* Apparently MUST be uppercase for Cray */
 #else
-void cclock (cpu)         /* OK for ibm, hp with sppuxOS, ... */
+void cclock(cpu) /* OK for ibm, hp with sppuxOS, ... */
 #endif
 
-double* cpu;
+    double* cpu;
 {
- *cpu = ((double) clock()) / CLOCKS_PER_SEC;
+    *cpu = ((double)clock()) / CLOCKS_PER_SEC;
 }
diff --git a/src/tools/ct_cptimer.c b/src/tools/ct_cptimer.c
index ff636cc782..b39f13ee78 100644
--- a/src/tools/ct_cptimer.c
+++ b/src/tools/ct_cptimer.c
@@ -8,42 +8,47 @@
 
 #include <wrapper.h>
 #if defined(_WIN32)
-#include <windows.h>
 #include <stdint.h>
+#include <windows.h>
 #endif
-#include <sys/time.h>
 #include <sys/resource.h>
+#include <sys/time.h>
 
-double C_FUNC(qe_cclock,QE_CCLOCK)()
+double C_FUNC(qe_cclock, QE_CCLOCK)()
 /*
  Return the second elapsed since Epoch (00:00:00 UTC, January 1, 1970)
 */
 {
- struct timeval tmp;
- double sec;
- gettimeofday( &tmp, (struct timezone *)0 );
- sec = tmp.tv_sec + ((double)tmp.tv_usec)/1000000.0;
- return sec;
+    struct timeval tmp;
+    double sec;
+    gettimeofday(&tmp, (struct timezone *)0);
+    sec = tmp.tv_sec + ((double)tmp.tv_usec) / 1000000.0;
+    return sec;
 }
-double C_FUNC(qe_scnds,QE_SCNDS) ( )
-/* 
- Return the cpu time associated to the current process 
+double C_FUNC(qe_scnds, QE_SCNDS)()
+/*
+ Return the cpu time associated to the current process
 */
 {
- double sec=0.0;
+    double sec = 0.0;
 #if defined(_WIN32)
- // from MSDN docs.
- FILETIME ct,et,kt,ut;
- union { FILETIME ft; uint64_t ui; } cpu;
- if (GetProcessTimes(GetCurrentProcess(),&ct,&et,&kt,&ut)) {
-  cpu.ft = ut;
-  sec = cpu.ui * 0.0000001;
- }
+    // from MSDN docs.
+    FILETIME ct, et, kt, ut;
+    union
+    {
+        FILETIME ft;
+        uint64_t ui;
+    } cpu;
+    if (GetProcessTimes(GetCurrentProcess(), &ct, &et, &kt, &ut))
+    {
+        cpu.ft = ut;
+        sec = cpu.ui * 0.0000001;
+    }
 #else
- static struct rusage T;
- getrusage(RUSAGE_SELF, &T);
- sec = ((double)T.ru_utime.tv_sec + ((double)T.ru_utime.tv_usec)/1000000.0);
+    static struct rusage T;
+    getrusage(RUSAGE_SELF, &T);
+    sec =
+        ((double)T.ru_utime.tv_sec + ((double)T.ru_utime.tv_usec) / 1000000.0);
 #endif
- return sec;
+    return sec;
 }
-
diff --git a/src/tools/ct_etime.c b/src/tools/ct_etime.c
index 68fa77a921..248924c152 100644
--- a/src/tools/ct_etime.c
+++ b/src/tools/ct_etime.c
@@ -5,18 +5,18 @@
    or http://www.gnu.org/copyleft/gpl.txt .
 */
 
-double F90_FUNC(etime,ETIME)(tt)
+double F90_FUNC(etime, ETIME)(tt)
 #if defined _DOUBLE
-double tt[2];
+    double tt[2];
 #else
-float tt[2];
+    float tt[2];
 #endif
 {
- int who;
- struct rusage used;
- who = 0;
- getrusage(who,&used);
- tt[0] = used.ru_utime.tv_sec+((used.ru_utime.tv_usec)/1000000.);
- tt[1] = used.ru_stime.tv_sec+((used.ru_stime.tv_usec)/1000000.);
- return(tt[0]+tt[1]);
+    int who;
+    struct rusage used;
+    who = 0;
+    getrusage(who, &used);
+    tt[0] = used.ru_utime.tv_sec + ((used.ru_utime.tv_usec) / 1000000.);
+    tt[1] = used.ru_stime.tv_sec + ((used.ru_stime.tv_usec) / 1000000.);
+    return (tt[0] + tt[1]);
 }
diff --git a/src/tools/io.c b/src/tools/io.c
index de56995e77..4e9e299e6c 100644
--- a/src/tools/io.c
+++ b/src/tools/io.c
@@ -16,83 +16,71 @@
  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
  02111-1307, USA.
 */
-#include <stdlib.h>
-#include <unistd.h>
 #include <dirent.h>
-#include <sys/stat.h>
-#include <string.h>
-#include <wrapper.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <wrapper.h>
 
-void C_FUNC(imkdir, IMKDIR)
-                 (char *name) 
+void C_FUNC(imkdir, IMKDIR)(char* name)
 {
-        struct stat buf;
-        if(!*name || stat(name, &buf) == 0) return;
-        mkdir(name, 0775);
-}
-                 
-void C_FUNC(ichdir, ICHDIR)
-                 (char *name)
-{
-        chdir(name);
-}
-                 
-void C_FUNC(irename, IRENAME)
-                 (char *namein, char *nameout)
-{       
-        rename(namein,nameout);
+    struct stat buf;
+    if (!*name || stat(name, &buf) == 0)
+    {
+        return;
+    }
+    mkdir(name, 0775);
 }
 
-void C_FUNC(iremove, IREMOVE)
-                 (char *name)
-{
-        remove(name);
-}
+void C_FUNC(ichdir, ICHDIR)(char* name) { chdir(name); }
 
-void C_FUNC(isystem, ISSYSTEM)
-                 (char *name, int* ierr)
+void C_FUNC(irename, IRENAME)(char* namein, char* nameout)
 {
-        *ierr=system(name);
+    rename(namein, nameout);
 }
 
-void C_FUNC(igetcwd, IGETCWD)
-                 (char* name, int* ln)
+void C_FUNC(iremove, IREMOVE)(char* name) { remove(name); }
+
+void C_FUNC(isystem, ISSYSTEM)(char* name, int* ierr) { *ierr = system(name); }
+
+void C_FUNC(igetcwd, IGETCWD)(char* name, int* ln)
 {
-  getcwd(name,256);
-  *ln=strlen(name);
+    getcwd(name, 256);
+    *ln = strlen(name);
 }
- 
-void C_FUNC(igethname, IGETHNAME)
-                 (char* name, int* ln)
+
+void C_FUNC(igethname, IGETHNAME)(char* name, int* ln)
 {
-  gethostname(name,256);
-  *ln=strlen(name);
+    gethostname(name, 256);
+    *ln = strlen(name);
 }
 
-void C_FUNC(ifolder_list, IFOLDER_LIST)
-                 (char* folder, char* list, int* ln)
+void C_FUNC(ifolder_list, IFOLDER_LIST)(char* folder, char* list, int* ln)
 {
- DIR *dir,*subdir;
- struct dirent *ent;
- char the_list[500000]={'\0'};
- char  PWD[256] = ".";
- *ln=0;
- getcwd(PWD,256);
- chdir(folder);
- dir = opendir (".");
- if (dir != NULL) {
-   while ((ent = readdir (dir)) != NULL) 
-    if ( (subdir = opendir (ent->d_name)) != NULL) {
-      strcat(the_list," ");
-      strcat(the_list,ent->d_name);
-      closedir (subdir);
+    DIR *dir, *subdir;
+    struct dirent* ent;
+    char the_list[500000] = {'\0'};
+    char PWD[256] = ".";
+    *ln = 0;
+    getcwd(PWD, 256);
+    chdir(folder);
+    dir = opendir(".");
+    if (dir != NULL)
+    {
+        while ((ent = readdir(dir)) != NULL)
+        {
+            if ((subdir = opendir(ent->d_name)) != NULL)
+            {
+                strcat(the_list, " ");
+                strcat(the_list, ent->d_name);
+                closedir(subdir);
+            }
+        }
+        closedir(dir);
+        *ln = strlen(the_list);
+        sprintf(list, "%s", the_list);
     }
-   closedir (dir);
-   *ln=strlen(the_list);
-   sprintf(list,"%s",the_list);
- }
- chdir(PWD);
+    chdir(PWD);
 }
-
diff --git a/src/tools/memstat.c b/src/tools/memstat.c
index 4cfae2ce93..ed90573afb 100644
--- a/src/tools/memstat.c
+++ b/src/tools/memstat.c
@@ -6,42 +6,41 @@
   or http://www.gnu.org/copyleft/gpl.txt .
 */
 
-#include <wrapper.h>
 #include <have_malloc.h>
+#include <wrapper.h>
 
-/* 
-  This function return the numer of kilobytes allocated by the calling process. 
+/*
+  This function return the numer of kilobytes allocated by the calling process.
   Author: Carlo Cavazzoni.
 */
 
-#if defined (__SVR4) && defined (__sun)
+#if defined(__SVR4) && defined(__sun)
 #define SUN_MALLINFO
 #endif
 
-#if defined(HAVE_MALLINFO) && !defined(__QK_USER__) && !defined(SUN__MALLINFO) 
+#if defined(HAVE_MALLINFO) && !defined(__QK_USER__) && !defined(SUN__MALLINFO)
 #include <malloc.h>
 
-void C_FUNC(memstat,MEMSTAT)(int *kilobytes)
+void C_FUNC(memstat, MEMSTAT)(int *kilobytes)
 {
-
-  struct mallinfo info;  
-  info = mallinfo();
+    struct mallinfo info;
+    info = mallinfo();
 
 #if defined(__AIX)
-  *kilobytes = (info.arena) / 1024 ;
+    *kilobytes = (info.arena) / 1024;
 #else
-/*
-    arena+hblkhd             total taken from the system
-    uordblks+usmblks+hblkhd  total in use by program
-    fordblks+fsmblks         total free within program
+    /*
+        arena+hblkhd             total taken from the system
+        uordblks+usmblks+hblkhd  total in use by program
+        fordblks+fsmblks         total free within program
 
-*/
-  *kilobytes = (info.arena + info.hblkhd) / 1024 ;
+    */
+    *kilobytes = (info.arena + info.hblkhd) / 1024;
 #endif
 
 #else
-void C_FUNC(memstat,MEMSTAT)(int *kilobytes)
+void C_FUNC(memstat, MEMSTAT)(int *kilobytes)
 {
-  *kilobytes = -1;
+    *kilobytes = -1;
 #endif
 }
diff --git a/src/tools/stack.c b/src/tools/stack.c
index b955905807..a71bee3db4 100644
--- a/src/tools/stack.c
+++ b/src/tools/stack.c
@@ -6,16 +6,16 @@
   or http://www.gnu.org/copyleft/gpl.txt .
 */
 
-#include <wrapper.h>
 #include <sys/resource.h>
+#include <wrapper.h>
 
-void C_FUNC(remove_stack_limit,REMOVE_STACK_LIMIT) (void) {
- 
-struct rlimit rlim = { RLIM_INFINITY, RLIM_INFINITY };
-
-if ( setrlimit(RLIMIT_STACK, &rlim) == -1 ) {
-  /* perror("Cannot set stack size to infinity");
-     exit(1); */
-}
+void C_FUNC(remove_stack_limit, REMOVE_STACK_LIMIT)(void)
+{
+    struct rlimit rlim = {RLIM_INFINITY, RLIM_INFINITY};
 
+    if (setrlimit(RLIMIT_STACK, &rlim) == -1)
+    {
+        /* perror("Cannot set stack size to infinity");
+           exit(1); */
+    }
 }
diff --git a/src/wf_and_fft/.objects b/src/wf_and_fft/.objects
index c48017aa5c..c6e7207146 100644
--- a/src/wf_and_fft/.objects
+++ b/src/wf_and_fft/.objects
@@ -29,10 +29,11 @@ FFT_gpu = fft_3d_cuda.o
 FFT_gpu = fft_3d_hip.o
 #endif
 objs = fft_setup.o fft_free.o $(FFT_object) $(FFT_more) $(FFT_gpu) scatter_Bamp_using_the_gradient.o \
-       fft_check_size.o WF_free.o WF_alloc.o WF_shift_kpoint.o WF_symm_kpoint_incl.o \
+       fft_check_size.o WF_free.o WF_alloc.o WF_shift_kpoint.o WF_symm_kpoint_g.o WF_symm_kpoint_incl.o \
        scatter_Bamp_spin.o scatter_Bamp_incl.o scatter_Gamp_incl.o WF_spatial_inversion.o \
-       WF_load.o WF_symm.o WF_apply_symm_incl.o $(WF_rot_object) \
+       WF_phase_matrices.o WF_load.o WF_apply_symm_incl.o $(WF_rot_object) \
        $(WF_der_objects) eval_R_minus_R.o scatter_Modscr.o load_cc.o \
        PP_uspp_init.o PP_addus_vloc_aug.o PP_vloc_augment.o PP_compute_becp.o \
-       PP_becprod_calc.o PP_scatterBamp_init.o PP_rotate_becp.o PP_addus_rhotw_aug.o
+       PP_becprod_calc.o PP_scatterBamp_init.o PP_rotate_becp.o PP_addus_rhotw_aug.o \
+       EXC_WF_symm_qpoint.o
 
diff --git a/src/wf_and_fft/DOUBLE_project.dep b/src/wf_and_fft/DOUBLE_project.dep
index 6ab9e156fb..02fc5c7d60 100644
--- a/src/wf_and_fft/DOUBLE_project.dep
+++ b/src/wf_and_fft/DOUBLE_project.dep
@@ -1,3 +1,4 @@
+ EXC_WF_symm_qpoint.o
  PP_addus_rhotw_aug.o
  PP_addus_vloc_aug.o
  PP_becprod_calc.o
@@ -12,11 +13,12 @@
  WF_derivative.o
  WF_free.o
  WF_load.o
+ WF_phase_matrices.o
  WF_rotate.o
  WF_shift_kpoint.o
  WF_spatial_inversion.o
- WF_symm.o
  WF_symm_kpoint.o
+ WF_symm_kpoint_g.o
  WF_symm_kpoint_incl.o
  eval_GradOperator.o
  eval_R_minus_R.o
diff --git a/src/wf_and_fft/EXC_WF_symm_qpoint.F b/src/wf_and_fft/EXC_WF_symm_qpoint.F
new file mode 100644
index 0000000000..b0efc668ff
--- /dev/null
+++ b/src/wf_and_fft/EXC_WF_symm_qpoint.F
@@ -0,0 +1,207 @@
+!
+! License-Identifier: GPL
+!
+! Copyright (C) 2015 The Yambo Team
+!
+! Authors (see AUTHORS file for details): DS
+!
+! headers
+!
+#include<y_memory.h>
+!
+subroutine EXC_WF_symm_qpoint(Xk,qpt,first_state,nstates,iq,isq,igq,&
+&                                        ID,S_index,EXC_WFs,l_min_mem)
+ !
+ use pars,          ONLY:SP,cZERO,cONE
+ use electrons,     ONLY:n_sp_pol,n_max_deg
+ use D_lattice,     ONLY:idt_index,i_time_rev,nsym,sop_inv,sop_tab
+ use R_lattice,     ONLY:bz_samp,qindx_X,qindx_C
+ use BS_solvers,    ONLY:BS_mat,BSS_n_eig,BSS_eh_table_m1
+ use BS,            ONLY:BS_bands,BS_H_dim
+ use wave_func,     ONLY:io_WF_phases,WF_phases_b_map,WF_phases
+ use IO_int,        ONLY:io_control
+ use IO_m,          ONLY:DUMP,REP,RD
+ !
+ use y_memory_alloc
+ !
+ implicit none
+ !
+ type(bz_samp), intent(in)   :: Xk,qpt
+ integer,       intent(in)   :: first_state,nstates
+ integer,       intent(in)   :: iq,isq,igq
+ integer,       intent(inout):: ID
+ complex(SP),   intent(out)  :: EXC_WFs(BS_H_dim,nstates)
+ integer,       intent(in)   :: S_index(BSS_n_eig)
+ logical,       intent(in)   :: l_min_mem
+ !
+ ! Work space
+ !
+ logical                 :: t_rev
+ integer :: iexc,il,ik,istark,isk,ikbz,ipbz,ipmqbz,ipmqg0,iv1,ic1,iv2,ic2,&
+ &          i_v1_bnd,i_v1_grp,i_c1_bnd,i_c1_grp,i_v2_bnd,i_v2_grp,i_c2_bnd,i_c2_grp,&
+ &          ikmqbz,ikmqg0,iqbz,i_sp_pol,ip,ipmq,istarp,istarpmq,i1,i2,&
+ &          neh1,neh2,io_err,nsz(5),V_bands(2),C_bands(2),ipmq_last
+ complex(SP) :: phase_vv,phase_cc
+ complex(SP), allocatable :: WF_phases_cc(:,:,:,:,:),WF_phases_vv(:,:,:,:,:)
+ !
+ if(isq==idt_index) then
+   do iexc=1,nstates
+     il=S_index(iexc+first_state-1)
+     EXC_WFs(:,iexc)=BS_mat(:,il)
+   enddo
+   return
+ endif
+ !
+ t_rev= (isq>nsym/(i_time_rev+1))
+ !
+ iqbz=qpt%k_table(iq,isq)
+ !
+ !V_bands=(/BS_bands(1),maxval(Ken%nbm)/)
+ !C_bands=(/minval(Ken%nbf)+1,BS_bands(2)/)
+ !
+ EXC_WFs=cZERO
+ !
+ ! First get A(l,R*q)= Ry * A(l,q)
+ !
+ do i_sp_pol=1,n_sp_pol
+   !
+   do ik=1,Xk%nibz
+     !
+     ! For the phases I need:
+     ! - R p_BZ = k_BZ
+     ! - the symmetry used to reach k_BZ, e.g. isk
+     !ik_ref=ikbz
+     !is_ref=isk
+     !
+     ipmq_last=0
+     ! 
+     if (l_min_mem) then
+       !
+       nsz(1:2)=maxval(WF_phases_b_map(:,ik,i_sp_pol,1))
+       nsz(3)  =maxval(WF_phases_b_map(:,ik,i_sp_pol,2))
+       nsz(4)  =nsym
+       nsz(5)  =Xk%nstar(ik)
+       !
+       YAMBO_ALLOC(WF_phases_cc,(nsz(1),nsz(2),nsz(3),nsz(4),nsz(5)))
+       !
+       call io_control(ACTION=RD,COM=REP,SEC=(/3/),MODE=DUMP,ID=ID)
+       io_err=io_WF_phases(BS_bands,ik,0,i_sp_pol,ID,nsz,WF_phases_cc)
+       !
+     endif
+     !
+     do istark=1,Xk%nstar(ik)
+       !
+       isk =Xk%star(ik,istark)
+       ikbz=Xk%k_table(ik,isk)
+       !
+       ! p_BZ = (R)^-1 k_BZ = (R)^-1 (Rk) k_IBZ
+       ipbz=Xk%k_table(ik,sop_tab(sop_inv(isq),isk))
+       !
+       ip    =Xk%sstar(ipbz,1)
+       istarp=Xk%sstar(ipbz,3)
+       !
+       if (ip/=ik) call error("WF_phases: rotation of ik got a different ip in the IBZ")
+       !
+       ! (R)^-1 k_BZ - q_IBZ = p_BZ - q_IBZ
+       ipmqbz=qindx_X(iq,ipbz,1)
+       ipmqg0=qindx_X(iq,ipbz,2)
+       !
+       ipmq    =Xk%sstar(ipmqbz,1)
+       istarpmq=Xk%sstar(ipmqbz,3)
+       !
+       ! For the phases I need R pmq_BZ = k_BZ - R q_IBZ = k_BZ - q_BZ
+       !ikmqbz=qindx_C(ikbz,iqbz,1)
+       !ikmqg0=qindx_C(ikbz,iqbz,2)
+       !
+       if (ipmq/=ipmq_last.and.l_min_mem) then
+         !
+         if (ipmq_last>0) then
+           YAMBO_FREE(WF_phases_vv)
+         endif
+         !
+         nsz(1:2)=maxval(WF_phases_b_map(:,ipmq,i_sp_pol,1))
+         nsz(3)  =maxval(WF_phases_b_map(:,ipmq,i_sp_pol,2))
+         nsz(4)  =nsym
+         nsz(5)  =Xk%nstar(ipmq)
+         !
+         YAMBO_ALLOC(WF_phases_vv,(nsz(1),nsz(2),nsz(3),nsz(4),nsz(5)))
+         !
+         if (ipmq/=ip) then
+           call io_control(ACTION=RD,COM=REP,SEC=(/3/),MODE=DUMP,ID=ID)
+           io_err=io_WF_phases(BS_bands,ipmq,0,i_sp_pol,ID,nsz,WF_phases_vv)
+           ipmq_last=ipmq
+         else
+           WF_phases_vv=WF_phases_cc
+           ipmq_last=ip
+         endif
+         !
+       endif
+       !
+       do iexc=1,nstates
+         il=S_index(iexc+first_state-1)
+         !
+         do iv1 = BS_bands(1),BS_bands(2) 
+           !
+           i_v1_bnd=WF_phases_b_map(iv1,ipmq,i_sp_pol,1)
+           i_v1_grp=WF_phases_b_map(iv1,ipmq,i_sp_pol,2)
+           !
+           do iv2 = BS_bands(1),BS_bands(2) 
+             !
+             i_v2_bnd=WF_phases_b_map(iv2,ipmq,i_sp_pol,1)
+             i_v2_grp=WF_phases_b_map(iv2,ipmq,i_sp_pol,2)
+             !
+             if (i_v1_grp/=i_v2_grp) cycle
+             !
+             do ic1 = BS_bands(1),BS_bands(2)
+               !
+               i_c1_bnd=WF_phases_b_map(ic1,ip,i_sp_pol,1)
+               i_c1_grp=WF_phases_b_map(ic1,ip,i_sp_pol,2)
+               !
+               neh1=BSS_eh_table_m1(ikbz,iv1,ic1,i_sp_pol,1)
+               !
+               if (neh1==0) cycle
+               !
+               do ic2 = BS_bands(1),BS_bands(2) 
+                 !
+                 i_c2_bnd=WF_phases_b_map(ic2,ip,i_sp_pol,1)
+                 i_c2_grp=WF_phases_b_map(ic2,ip,i_sp_pol,2)
+                 !
+                 if (i_c1_grp/=i_c2_grp) cycle
+                 !
+                 neh2=BSS_eh_table_m1(ipbz,iv2,ic2,i_sp_pol,1)
+                 !
+                 if (neh2==0) cycle
+                 !
+                 if (l_min_mem) then
+                   phase_vv=conjg(WF_phases_vv(i_v1_bnd,i_v2_bnd,i_v1_grp,isq,istarpmq))
+                   phase_cc=      WF_phases_cc(i_c1_bnd,i_c2_bnd,i_c1_grp,isq,istarp)
+                 else
+                   i1=1 ; i2=1
+                   if(ipmq>0) i1= sum(Xk%nstar(:ipmq-1))+istarpmq
+                   if(ip  >0) i2= sum(Xk%nstar(:ip  -1))+istarp
+                   phase_vv=conjg(WF_phases(i_v1_bnd,i_v2_bnd,i_v1_grp,isq,i1))
+                   phase_cc=      WF_phases(i_c1_bnd,i_c2_bnd,i_c1_grp,isq,i2)
+                 endif
+                 !
+                 if(.not.t_rev) EXC_WFs(neh1,iexc)=EXC_WFs(neh1,iexc)+      BS_mat(neh2,il) *phase_vv*phase_cc
+                 if(     t_rev) EXC_WFs(neh1,iexc)=EXC_WFs(neh1,iexc)+conjg(BS_mat(neh2,il))*phase_vv*phase_cc
+                 !
+               enddo
+             enddo
+             !
+           enddo
+         enddo
+         !
+       enddo
+       !
+     enddo
+     !
+     if (l_min_mem) then
+       YAMBO_FREE(WF_phases_cc)
+       YAMBO_FREE(WF_phases_vv)
+     endif
+     !
+   enddo
+ enddo
+ !
+end subroutine EXC_WF_symm_qpoint
diff --git a/src/wf_and_fft/PP_compute_becp.F b/src/wf_and_fft/PP_compute_becp.F
index c7b7cc13a8..bfeadc0c8e 100644
--- a/src/wf_and_fft/PP_compute_becp.F
+++ b/src/wf_and_fft/PP_compute_becp.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AF IM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine PP_compute_becp(becp, npwk, wf_nb, wf_c, wf_b_indx)
  !
  ! Compute becp = < beta_j | wfc_n > matrix elements
@@ -17,8 +21,9 @@ subroutine PP_compute_becp(becp, npwk, wf_nb, wf_c, wf_b_indx)
  use qe_pseudo_m,   ONLY:nkb,vkb,bec_type,is_allocated_bec_type,&
 &                        allocate_bec_type,calbec
  use timing_m,      ONLY:timing
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  ! vars
  !
diff --git a/src/wf_and_fft/PP_rotate_becp.F b/src/wf_and_fft/PP_rotate_becp.F
index 3a254fa7d3..fc4d9e1fef 100644
--- a/src/wf_and_fft/PP_rotate_becp.F
+++ b/src/wf_and_fft/PP_rotate_becp.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AF IM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine PP_rotate_becp(isc,ibec,obec)
  !
  ! isc(1), ibnd
@@ -25,8 +29,9 @@ subroutine PP_rotate_becp(isc,ibec,obec)
 &                        irt,atom_pos,n_atoms_species,n_atomic_species
  use R_lattice,     ONLY:k_pt,b
  use qe_pseudo_m,   ONLY:bec_type,beccopy,d1,d2,d3,nh,upf,indv_ijkb0
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  ! vars
  !
diff --git a/src/wf_and_fft/PP_scatterBamp_init.F b/src/wf_and_fft/PP_scatterBamp_init.F
index 7150557f60..c657c46165 100644
--- a/src/wf_and_fft/PP_scatterBamp_init.F
+++ b/src/wf_and_fft/PP_scatterBamp_init.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AF IM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine PP_scatterBamp_init(isc)
  !
  ! init USPP workspace in isc scatterBamp argument
@@ -15,8 +19,9 @@ subroutine PP_scatterBamp_init(isc)
  use R_lattice,     ONLY:q_pt,b,rl_sop
  use vec_operate,   ONLY:c2a
  use timing_m,      ONLY:timing
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  ! vars
  !
diff --git a/src/wf_and_fft/PP_uspp_init.F b/src/wf_and_fft/PP_uspp_init.F
index c3ab7e0ad3..5c01bba052 100644
--- a/src/wf_and_fft/PP_uspp_init.F
+++ b/src/wf_and_fft/PP_uspp_init.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AF IM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine PP_uspp_init()
  !
  ! checks and reads USPP DB, and, in case, initialize
@@ -25,8 +29,9 @@ subroutine PP_uspp_init()
  use gvect,         ONLY:qe_eigts1=>eigts1, qe_eigts2=>eigts2, qe_eigts3=>eigts3,&
 &                        qe_g=>g, qe_gg=>gg, qe_ngm=>ngm, qe_mill=>mill
  use timing_m,      ONLY:timing
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  ! Work Space 
  !
diff --git a/src/wf_and_fft/PP_vloc_augment.F b/src/wf_and_fft/PP_vloc_augment.F
index 453940c0ae..a3b7067804 100644
--- a/src/wf_and_fft/PP_vloc_augment.F
+++ b/src/wf_and_fft/PP_vloc_augment.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AF IM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine PP_vloc_augment(QP_Vloc,Vloc,ik,ib,ibp)
  !
  ! Add the USPP augmentation to the matrix element of a local potential
@@ -18,8 +22,9 @@ subroutine PP_vloc_augment(QP_Vloc,Vloc,ik,ib,ibp)
  use FFT_m,         ONLY:fftw_plan
 #endif
  use qe_pseudo_m,   ONLY:qe_omega=>omega,qe_ngm=>ngm,becprod
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  ! vars
  !
diff --git a/src/wf_and_fft/WF_alloc.F b/src/wf_and_fft/WF_alloc.F
index e9b43dddaf..9dfc4bd364 100644
--- a/src/wf_and_fft/WF_alloc.F
+++ b/src/wf_and_fft/WF_alloc.F
@@ -5,18 +5,23 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<dev_defs.h>
+#include<y_memory.h>
+#include<yambo_wfs.h>
+!
 subroutine WF_alloc(WF,k_extrema_only)
  !
- use pars,          ONLY:cZERO
- use wave_func,     ONLY:WAVEs,states_to_load,wf_ng,wf_ncx
- use FFT_m,         ONLY:fft_size
+ use pars,          ONLY:cZERO,SP
+ use wave_func,     ONLY:WAVEs,wfZERO,states_to_load,wf_ng,wf_ncx,rho_map_size
  use electrons,     ONLY:n_spinor
  use parallel_int,  ONLY:PARALLEL_live_message
  use gpu_m,         ONLY:have_gpu
  use devxlib,       ONLY:devxlib_memset_d
+ use y_memory_alloc
  !
-#include<dev_defs.h>
-#include<y_memory.h>
+ implicit none
  !
  type(WAVEs)           :: WF
  logical, intent(in)   :: k_extrema_only
@@ -57,20 +62,29 @@ subroutine WF_alloc(WF,k_extrema_only)
  !
  call PARALLEL_live_message("Wave-Function states",LOADED=N_loaded,TOTAL=N_total)
  !
- if (WF%space=='R')  wf_grid_size=fft_size
+ if (WF%space=='R')  wf_grid_size=rho_map_size
  if (WF%space=='G')  wf_grid_size=wf_ng
  if (WF%space=='C')  wf_grid_size=wf_ncx
  if (WF%space=='B')  wf_grid_size=wf_ncx
  !
- YAMBO_ALLOC(WF%c,(wf_grid_size,n_spinor,WF%N))
- if (have_gpu) then
-   YAMBO_ALLOC_GPU(DEV_VAR(WF%c),(wf_grid_size,n_spinor,WF%N))
+ if (WF%space=='R')  then
+   YAMBO_ALLOC(WF%r,(wf_grid_size,n_spinor,WF%N))
+   if (have_gpu) then
+     YAMBO_ALLOC_GPU(DEV_VAR(WF%r),(wf_grid_size,n_spinor,WF%N))
+   endif
+   WF%r=wfcmplx(wfZERO,kind=SP)
+   if (have_gpu) call devxlib_memset_d(DEV_VAR(WF%r),wfZERO)
+ else
+   YAMBO_ALLOC(WF%c,(wf_grid_size,n_spinor,WF%N))
+   if (have_gpu) then
+     YAMBO_ALLOC_GPU(DEV_VAR(WF%c),(wf_grid_size,n_spinor,WF%N))
+   endif
+   WF%c=cZERO
+   if (have_gpu) call devxlib_memset_d(DEV_VAR(WF%c),cZERO)
  endif
  !
  YAMBO_ALLOC(WF%index,(WF%b(2),WF%k(2),WF%sp_pol(2)))
- !
- WF%c=cZERO
- if (have_gpu) call devxlib_memset_d(DEV_VAR(WF%c),cZERO)
+ !AMBO_ALLOC(WF%index,(WF%b(1):WF%b(2),WF%k(1):WF%k(2),WF%sp_pol(1):WF%sp_pol(2)))
  !
  WF%index=0
  !
diff --git a/src/wf_and_fft/WF_apply_symm.F b/src/wf_and_fft/WF_apply_symm.F
index 1ca5f3185d..ce0e182321 100644
--- a/src/wf_and_fft/WF_apply_symm.F
+++ b/src/wf_and_fft/WF_apply_symm.F
@@ -5,28 +5,32 @@
 !
 ! Authors (see AUTHORS file for details): DS AF
 !
+! headers
+!
+#include<yambo_wfs.h>
+!
 subroutine DEV_SUB(WF_apply_symm)(isc,WF_symm)
  !
  ! INCLUDED in: WF_apply_symm_incl.F 
  !
  use pars,          ONLY:SP
- use wave_func,     ONLY:WF
+ use wave_func,     ONLY:WF,rho_map_size,rho_map,rho_map_inv
  use FFT_m,         ONLY:DEV_VAR(fft_rot_r),fft_size
  use stderr,        ONLY:intc
  use parallel_m,    ONLY:myid
  use electrons,     ONLY:n_spinor
  use D_lattice,     ONLY:nsym,DEV_VAR(spin_sop),i_time_rev,idt_index
- use devxlib,       ONLY:devxlib_memcpy_d2d,devxlib_conjg_d
+ use devxlib,       ONLY:devxlib_conjg_d !,devxlib_memcpy_d2d
  !
  implicit none
  !
  integer,     intent(in)  :: isc(4)
- complex(SP), intent(out) DEV_ATTR :: WF_symm(fft_size,n_spinor)
+ complex(SP), intent(out) DEV_ATTR :: WF_symm(rho_map_size,n_spinor)
  !
- complex(SP), pointer     DEV_ATTR :: WF_p(:,:,:)
+ WF_RSPACE(SP), pointer     DEV_ATTR :: WF_p(:,:)
  integer  :: i_wf
- integer  :: ifft,isymm
-
+ integer  :: ifft,ifftp,isymm,i_spinor
+ !
  i_wf=WF%index(isc(1),isc(2),isc(4))
  !
 #if defined _MPI
@@ -35,53 +39,101 @@ subroutine DEV_SUB(WF_apply_symm)(isc,WF_symm)
 &                         ' ID'//trim(intc(myid)))
 #endif
  !
- WF_p => DEV_VAR(WF%c)
+ WF_p => DEV_VAR(WF%r)(:,:,i_wf)
  isymm = isc(3)
  !
  if(isc(3)==idt_index) then
    !
-   ! dev2dev copy
-#ifdef _GPU_LOC   
-   call devxlib_memcpy_d2d(WF_symm,DEV_VAR(WF%c)(:,:,i_wf))
-#else
-   WF_symm=WF%c(:,:,i_wf)
-#endif
-   return
+   ! 2025/11/04
+   ! In GAMMA ONLY mode WF_p is real and I cannot use the casting
+   ! inside the call to dexlib_memcpy_d2d.
+   ! To simplify the code (i.e. to avoid defined _GAMMA & _CUDA nested if)
+   ! I do an explicit loop. To check the efficiency compared to
+   ! call devxlib_memcpy_d2d(WF_symm,WF_p) 
    !
- endif
- !
- if(n_spinor==1) then
-   !
-   !DEV_ACC_DEBUG data present(WF_p,WF_symm,fft_rot_r)
+   !DEV_ACC_DEBUG data present(WF_p,WF_symm)
    !DEV_ACC parallel loop
-   !DEV_CUF kernel do(1) <<<*,*>>>
-   !DEV_OMPGPU target map(present,alloc:WF_p,WF_symm,fft_rot_r)
+   !DEV_CUF kernel do(2) <<<*,*>>>
+   !DEV_OMPGPU target map(present,alloc:WF_p,WF_symm)
    !DEV_OMPGPU teams loop
-   !DEV_OMP parallel do default(shared), private(ifft)
-   do ifft = 1, fft_size
-     WF_symm(ifft,1)=WF_p(DEV_VAR(fft_rot_r)(ifft,isymm),1,i_wf)
+   !DEV_OMP parallel do default(shared), private(ifft,i_spinor)
+   do i_spinor = 1, n_spinor
+     do ifft = 1, fft_size
+       WF_symm(ifft,i_spinor)=cmplx(WF_p(ifft,i_spinor),kind=SP)
+     enddo
    enddo
    !DEV_OMPGPU end target
    !DEV_ACC_DEBUG end data
    !
+   return
+   !
+ endif
+ !
+#ifdef _GPU_LOC
+ if (allocated(rho_map)) call error(" rho map procedure not implemented with symmetries and cuda")
+#endif
+ !
+ if(n_spinor==1) then
+   !
+#if !defined _GPU_LOC
+   if (allocated(rho_map)) then
+     !$omp parallel do default(shared), private(ifft,ifftp)
+     do ifft = 1, rho_map_size
+       ifftp=rho_map_inv(DEV_VAR(fft_rot_r)(rho_map(ifft),isymm))
+       WF_symm(ifft,1)=cmplx(WF_p(ifftp,1),kind=SP)
+     enddo
+   else
+#endif
+     !DEV_ACC_DEBUG data present(WF_p,WF_symm,fft_rot_r)
+     !DEV_ACC parallel loop
+     !DEV_CUF kernel do(1) <<<*,*>>>
+     !DEV_OMPGPU target map(present,alloc:WF_p,WF_symm,fft_rot_r)
+     !DEV_OMPGPU teams loop
+     !DEV_OMP parallel do default(shared), private(ifft,ifftp)
+     do ifft = 1, fft_size
+       ifftp=DEV_VAR(fft_rot_r)(ifft,isymm)
+       WF_symm(ifft,1)=cmplx(WF_p(ifftp,1),kind=SP)
+     enddo
+     !DEV_OMPGPU end target
+     !DEV_ACC_DEBUG end data
+#if !defined _GPU_LOC
+   endif
+#endif
+   !
  endif
  !
  if (n_spinor==2) then
    !
-   !DEV_ACC_DEBUG data present(WF_p,WF_symm,fft_rot_r,spin_sop)
-   !DEV_ACC parallel loop
-   !DEV_CUF kernel do(1) <<<*,*>>>
-   !DEV_OMPGPU target map(present,alloc:WF_p,WF_symm,fft_rot_r,spin_sop)
-   !DEV_OMPGPU teams loop
-   !DEV_OMP parallel do default(shared), private(ifft)
-   do ifft = 1, fft_size
-     WF_symm(ifft,1)=DEV_VAR(spin_sop)(1,1,isymm)*WF_p(DEV_VAR(fft_rot_r)(ifft,isymm),1,i_wf)+ &
-                     DEV_VAR(spin_sop)(1,2,isymm)*WF_p(DEV_VAR(fft_rot_r)(ifft,isymm),2,i_wf)
-     WF_symm(ifft,2)=DEV_VAR(spin_sop)(2,1,isymm)*WF_p(DEV_VAR(fft_rot_r)(ifft,isymm),1,i_wf)+ &
-                     DEV_VAR(spin_sop)(2,2,isymm)*WF_p(DEV_VAR(fft_rot_r)(ifft,isymm),2,i_wf)
-   enddo
-   !DEV_OMPGPU end target
-   !DEV_ACC_DEBUG end data
+#if !defined _GPU_LOC
+   if (allocated(rho_map)) then
+     !$omp parallel do default(shared), private(ifft,ifftp)
+     do ifft = 1, rho_map_size
+       ifftp=rho_map_inv(DEV_VAR(fft_rot_r)(rho_map(ifft),isymm))
+       WF_symm(ifft,1)=DEV_VAR(spin_sop)(1,1,isymm)*WF_p(ifftp,1)+ &
+                       DEV_VAR(spin_sop)(1,2,isymm)*WF_p(ifftp,2)
+       WF_symm(ifft,2)=DEV_VAR(spin_sop)(2,1,isymm)*WF_p(ifftp,1)+ &
+                       DEV_VAR(spin_sop)(2,2,isymm)*WF_p(ifftp,2)
+     enddo
+   else
+#endif
+     !DEV_ACC_DEBUG data present(WF_p,WF_symm,fft_rot_r,spin_sop)
+     !DEV_ACC parallel loop
+     !DEV_CUF kernel do(1) <<<*,*>>>
+     !DEV_OMPGPU target map(present,alloc:WF_p,WF_symm,fft_rot_r,spin_sop)
+     !DEV_OMPGPU teams loop
+     !DEV_OMP parallel do default(shared), private(ifft,ifftp)
+     do ifft = 1, fft_size
+       ifftp=DEV_VAR(fft_rot_r)(ifft,isymm)
+       WF_symm(ifft,1)=DEV_VAR(spin_sop)(1,1,isymm)*WF_p(ifftp,1)+ &
+                       DEV_VAR(spin_sop)(1,2,isymm)*WF_p(ifftp,2)
+       WF_symm(ifft,2)=DEV_VAR(spin_sop)(2,1,isymm)*WF_p(ifftp,1)+ &
+                       DEV_VAR(spin_sop)(2,2,isymm)*WF_p(ifftp,2)
+     enddo
+     !DEV_OMPGPU end target
+     !DEV_ACC_DEBUG end data
+#if !defined _GPU_LOC
+   endif
+#endif
    !
  endif
  !
diff --git a/src/wf_and_fft/WF_derivative.F b/src/wf_and_fft/WF_derivative.F
index 3a12d9f168..1e21e66bc4 100644
--- a/src/wf_and_fft/WF_derivative.F
+++ b/src/wf_and_fft/WF_derivative.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine WF_derivative(derivative_bands,derivative_kpt,derivative_sppol,do_derivative,use_live_timing)
  !
  !  Remember wf_t = -i \der_t wf   with t=x,y,z
@@ -24,8 +28,9 @@ subroutine WF_derivative(derivative_bands,derivative_kpt,derivative_sppol,do_der
 #if defined _SC
  use SC,             ONLY:SC_bands,found_SC_DB
 #endif
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  ! Input variables
  integer,intent(in) :: derivative_bands(2)
  integer,intent(in) :: derivative_kpt(2)
diff --git a/src/wf_and_fft/WF_free.F b/src/wf_and_fft/WF_free.F
index dd02d97124..97bcb9b747 100644
--- a/src/wf_and_fft/WF_free.F
+++ b/src/wf_and_fft/WF_free.F
@@ -5,15 +5,20 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<dev_defs.h>
+#include<y_memory.h>
+!
 subroutine WF_free(WF,keep_fft,keep_states_to_load)
  !
  use wave_func,    ONLY:WAVEs
  use pseudo,       ONLY:pp_is_uspp,PP_uspp_free
  use gpu_m,        ONLY:have_gpu
  use devxlib,      ONLY:devxlib_mapped
+ use y_memory_alloc
  !
-#include<dev_defs.h>
-#include<y_memory.h>
+ implicit none
  !
  type(WAVEs)       :: WF
  logical, optional :: keep_fft 
@@ -35,13 +40,17 @@ subroutine WF_free(WF,keep_fft,keep_states_to_load)
  !
  if (have_gpu) then
    if (.not. ( allocated(WF%c) .eqv. devxlib_mapped(DEV_VAR(WF%c)) ) ) &
-&    call error("[WF] inconsistent alloc of GPU wfcs")
+&    call error("[WF] inconsistent alloc of GPU wfcs in G/C space")
+   if (.not. ( allocated(WF%r) .eqv. devxlib_mapped(DEV_VAR(WF%r)) ) ) &
+&    call error("[WF] inconsistent alloc of GPU wfcs in R space")
  endif
  !
- if (.not.allocated(WF%c)) return
+ if (.not.(allocated(WF%c).or.allocated(WF%r))) return
  !
  YAMBO_FREE_GPU(DEV_VAR(WF%c))
  YAMBO_FREE(WF%c)
+ YAMBO_FREE_GPU(DEV_VAR(WF%r))
+ YAMBO_FREE(WF%r)
  YAMBO_FREE(WF%state)
  YAMBO_FREE(WF%index)
  !
diff --git a/src/wf_and_fft/WF_load.F b/src/wf_and_fft/WF_load.F
index 42978a8396..20ec0aba27 100644
--- a/src/wf_and_fft/WF_load.F
+++ b/src/wf_and_fft/WF_load.F
@@ -5,8 +5,6 @@
 !
 ! Authors (see AUTHORS file for details): AM AF
 !
-#include<dev_defs.h>
-!
 !> @brief Load wave-function in different forms
 !!
 !! @param[in]       iG_in                       max-number of G-vectors
@@ -15,14 +13,20 @@
 !! @param[in]       kpts_to_load                k-points range 
 !! @param[in]       k_extrema_only              k-points, load only !first and last in range
 !! @param[in]       sp_pol_to_load              spin-polarization range 
-!! @param[in]       space                       R = real-space,  G = G-space, C = ?, B = ?
+!! @param[in]       space                       R=real-space,G=G-space,C=k-centred,B=buff.-mode
 !! @param[in]       title                       purpose of the wave-function loading
 !! @param[in]       impose_free_and_alloc       force deallocation of already loaded WF
 !! @param[in]       force_WFo                   do not rotate the WF (for yambo_sc)
 !! @param[in]       keep_states_to_load         keep (.TRUE.) or not (.FALSE.) states already loaded
 !! @param[in]       quiet                       turn off report and log for the WF_laod
 !!
-!! @param[out]      WF                  Wave-function object
+!! @param[out]      WF                          Wave-function object
+!
+! headers
+!
+#include<dev_defs.h>
+#include<y_memory.h>
+#include<yambo_wfs.h>
 !
 subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load,k_extrema_only,&
 &                  space,title,impose_free_and_alloc,force_WFo,keep_states_to_load,quiet)
@@ -30,7 +34,7 @@ subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load
  ! Load and (eventually) FFTs the wavefunctions
  !
  use iso_fortran_env
- use pars,          ONLY:SP,DP,schlen,cZERO_DP
+ use pars,          ONLY:SP,DP,schlen,cZERO,cZERO_DP
  use com,           ONLY:msg,core_io_path,fat_log
  use vec_operate,   ONLY:c2a
  use stderr,        ONLY:intc
@@ -40,8 +44,8 @@ subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load
  use FFT_m
 ! use FFT_m,         ONLY:fft_dim_loaded,fft_dim,fft_norm,fft_g_table,fft_size,&
 !&                        fftw_plan,cufft_plan,hipfft_plan,mklgpu_desc,DEV_VAR(fft_g_table)
- use wave_func,     ONLY:wf_nc_k,wf_igk,DEV_VAR(wf_igk),WF_buffer,&
-&                        wf_ng,wf_norm_test,wf_ncx,io_WF,wf_b_indx,&
+ use wave_func,     ONLY:wf_nc_k,wf_igk,DEV_VAR(wf_igk),WF_buffer,rho_map,&
+&                        wf_ng,wf_norm_test,wf_ncx,io_WF,wf_b_indx,rho_map_size,&
 &                        wf_nb_io,wf_nb_to_load,wf_nb_io_groups,WAVEs,states_to_load
  use IO_int,        ONLY:io_control,IO_and_Messaging_switch
  use IO_m,          ONLY:OP_RD,NONE,VERIFY,RD,RD_CL,DUMP
@@ -56,8 +60,9 @@ subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load
  use gpu_m,         ONLY:have_gpu, gpu_devsync, gpu_getstat
  use devxlib,       ONLY:devxlib_memcpy_h2d,devxlib_memcpy_d2h,devxlib_memcpy_d2d,&
                          devxlib_allocated,devxlib_memset_d
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer               :: iG_in,iGo_max_in,bands_to_load(2),kpts_to_load(2)
  type(WAVEs), target   :: WF
@@ -86,7 +91,7 @@ subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load
  !
  complex(DP), allocatable DEV_ATTR :: wf_DP_d(:)
  complex(SP), allocatable DEV_ATTR :: wf_disk_d(:,:,:)
- complex(SP), pointer     DEV_ATTR :: wfc_p(:,:,:)
+ WF_RSPACE(SP),   pointer DEV_ATTR :: wfr_p(:,:,:)
  integer :: WF_N
  !
  ! I/O
@@ -279,7 +284,7 @@ subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load
    YAMBO_ALLOC_GPU(DEV_VAR(wf_DP),(wf_grid_size))
  endif
  !
- wfc_p => DEV_VAR(WF%c)
+ wfr_p => DEV_VAR(WF%r)
  WF_N = WF%N
  !
  !
@@ -425,17 +430,30 @@ subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load
 #endif
               WF_N = WF%N
               !
-              !DEV_ACC_DEBUG data present(wfc_p,wf_DP)
+            !if(allocated(rho_map)) then
+            !  !DEV_ACC_DEBUG data present(wfr_p,wf_DP)
+            !  !DEV_ACC parallel loop
+            !  !DEV_CUF kernel do(1)
+            !  !DEV_OMPGPU target map(present,alloc:wfr_p,wf_DP)
+            !  !DEV_OMPGPU teams loop
+            !  do ic=1,rho_map_size
+            !    wfr_p(ic,i_spinor,WF_N)=cmplx(DEV_VAR(wf_DP)(rho_map(ic)),kind=SP)*fft_norm
+            !  enddo
+            !  !DEV_OMPGPU end target
+            !  !DEV_ACC_DEBUG end data
+            !else
+              !DEV_ACC_DEBUG data present(wfr_p,wf_DP)
               !DEV_ACC parallel loop
               !DEV_CUF kernel do(1)
-              !DEV_OMPGPU target map(present,alloc:wfc_p,wf_DP)
+              !DEV_OMPGPU target map(present,alloc:wfr_p,wf_DP)
               !DEV_OMPGPU teams loop
-              do ic=1,wf_grid_size
-                wfc_p(ic,i_spinor,WF_N)=cmplx(DEV_VAR(wf_DP)(ic),kind=SP)*fft_norm
+              do ic=1,fft_size
+                wfr_p(ic,i_spinor,WF_N)=cmplx(DEV_VAR(wf_DP)(ic),kind=SP)*fft_norm
               enddo
               !DEV_OMPGPU end target
               !DEV_ACC_DEBUG end data
-              !
+            !endif
+            !
             enddo  ! i_spinor
             !
          else
@@ -470,7 +488,10 @@ subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load
               !
               call fft_3d(wf_DP,fft_dim,+2)
 #endif
-              WF%c(:,i_spinor,WF%N)=cmplx(wf_DP(:),kind=SP)*fft_norm
+#if !defined _GPU
+              if(     allocated(rho_map)) WF%r(:,i_spinor,WF%N)=wfcmplx(wf_DP(rho_map(:)),kind=SP)*fft_norm
+#endif
+              if(.not.allocated(rho_map)) WF%r(:,i_spinor,WF%N)=wfcmplx(wf_DP(:)         ,kind=SP)*fft_norm
               !
             enddo  ! i_spinor
 #ifdef _GPU
@@ -493,7 +514,7 @@ subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load
    call msg('rns', '[WF] Copying WF data from GPU device')
    if(WF%space=='R') then
      ! dev2host
-     call devxlib_memcpy_d2h(WF%c, DEV_VAR(WF%c))
+     call devxlib_memcpy_d2h(WF%r, DEV_VAR(WF%r))
    else
      ! host2dev
      call devxlib_memcpy_h2d( DEV_VAR(WF%c), WF%c)
@@ -566,7 +587,7 @@ subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load
    if (WF%space=='G') wf_grid_size=wf_ng
    do i_sp_pol=WF%sp_pol(1),WF%sp_pol(2)
      do ikibz=WF%k(1),WF%k(2)
-       call WF_rotate(ikibz,i_sp_pol,WF,wf_grid_size)
+       call WF_rotate(ikibz,i_sp_pol,WF,wf_grid_size,WF%space)
      enddo
    enddo
    !
@@ -590,8 +611,14 @@ subroutine WF_load(WF,iG_in,iGo_max_in,bands_to_load,kpts_to_load,sp_pol_to_load
      !
      if ( WF%index(ib1,1,1)==0 .or. WF%index(ib2,1,1)==0 ) cycle
      !
-     N=size(WF%c(:,:,WF%index(ib1,1,1)))
-     c=Vstar_dot_V_gpu( N, DEV_VAR(WF%c)(:,:,WF%index(ib1,1,1)), DEV_VAR(WF%c)(:,:,WF%index(ib2,1,1)) )
+     c=cZERO
+     if (WF%space=='R') then
+       N=size(WF%r(:,:,WF%index(ib1,1,1)))
+       c=Vstar_dot_V_gpu( N, DEV_VAR(WF%r)(:,:,WF%index(ib1,1,1)), DEV_VAR(WF%r)(:,:,WF%index(ib2,1,1)) )
+     else
+       N=size(WF%c(:,:,WF%index(ib1,1,1)))
+       c=Vstar_dot_V_gpu( N, DEV_VAR(WF%c)(:,:,WF%index(ib1,1,1)), DEV_VAR(WF%c)(:,:,WF%index(ib2,1,1)) )
+     endif
      !
      if (abs(c)>mxdp) mxdp=abs(c)
      if (abs(c)<mndp) mndp=abs(c)
diff --git a/src/wf_and_fft/WF_phase_matrices.F b/src/wf_and_fft/WF_phase_matrices.F
new file mode 100644
index 0000000000..cb8bbe1d87
--- /dev/null
+++ b/src/wf_and_fft/WF_phase_matrices.F
@@ -0,0 +1,306 @@
+!
+! License-Identifier: GPL
+!
+! Copyright (C) 2015 The Yambo Team
+!
+! Authors (see AUTHORS file for details): DS
+!      
+!> @brief Calculate overlaps between wave-function
+!   <\psi_{Ra(kibz_i)} | \psi_{kbz_j} >
+!  with for Ra(kibz_i)=kbz_j
+!
+!! @param[out]      WF_phases_S            Phase matrices
+!
+! headers
+!
+#include<dev_defs.h>
+#include<y_memory.h>
+!
+subroutine WF_phase_matrices(En,Xk,blim,klim)
+ !    
+ use pars,              ONLY:SP,cZERO
+ use units,             ONLY:HA2EV
+ use LIVE_t,            ONLY:live_timing
+ use R_lattice,         ONLY:bz_samp,G_m_G,minus_G,g_vec,rl_sop
+ use D_lattice,         ONLY:nsym,i_time_rev,sop_tab,spin_sop
+ use electrons,         ONLY:levels,n_max_deg,deg_threshold
+ use wave_func,         ONLY:WAVEs,io_WF_phases,WF_phases_b_map
+ use wave_func,         ONLY:wf_ng_overlaps,wf_ng_1st_BZ,wf_ng,WF_copy,wf_nc_k
+ use stderr,            ONLY:intc
+ use electrons,         ONLY:n_spinor,n_sp_pol
+ use wrapper,           ONLY:Vstar_dot_V
+ use hamiltonian,       ONLY:B_mat_index
+ use matrix_operate,    ONLY:m3det!,UP_matrix_index
+ use vec_operate,       ONLY:degeneration_finder
+ use parallel_m,        ONLY:PAR_IND_WF_k,PAR_IND_WF_b,PAR_IND_B_mat
+ use parallel_int,      ONLY:PP_wait,PP_redux_wait,PARALLEL_WF_index,PARALLEL_WF_distribute
+ use interfaces,        ONLY:WF_symm_kpoint_g,WF_load,WF_free,eval_G_minus_G
+ use IO_int,            ONLY:io_control
+ use IO_m,              ONLY:VERIFY,DUMP,REP,OP_WR,WR,WR_CL,OP_RD_CL
+ use timing_m,          ONLY:timing
+ use y_memory_alloc
+ !
+ implicit none
+ !
+ integer,       intent(in) :: blim(2),klim(2)
+ type(levels),  intent(in) :: En
+ type(bz_samp), intent(in) :: Xk 
+ !
+ ! Work Space
+ !
+ integer           :: first_el(blim(2)-blim(1)+1),n_of_el(blim(2)-blim(1)+1),&
+ &                    blim_n(2),n_deg_grp,i_grp,g0_idx(3,2),nsz(5),max_igk
+ real(SP)          :: g0_length(3)
+ logical           :: opt1,opt2
+ integer           :: ikbz,iRkbz,isk,isloop,isk_c,isk_y,iGk_y,iGk_c,iGk,iGloop,istark,&
+ &                    iby,ibc,iby_phase,ibc_phase,i_sp_pol,ik,&
+ &                    ifrag,s_count,i_wf_y,i_wf_c,iGp,ng_tmp,nb,NG_max
+ complex(SP)       :: det,charac(blim(2)-blim(1)+1,nsym)
+ complex(SP)       :: spin_sop_composed(2,2),s1(2,2),s2(2,2)
+#if defined _CUDA
+ complex(SP) DEV_ATTR     :: spin_sop_composed_d(2,2)
+#endif
+ complex(SP), allocatable :: WF_phases(:,:,:,:,:)
+ !
+ type(WAVEs) :: WF_k,WF_Rk_c,WF_Rk_y,WF_Rk
+ !
+ integer              :: io_err,ID
+ !
+ if (nsym==1) return
+ !
+ ! 2025/09/23
+ ! At present option 2 is buggy with n_spinor=2.
+ ! It produces exc-ph couplings which are not
+ ! fully symmetric. If fixed, this would be more elegant than option 1
+ ! An even better fix would be to correctly detect the double point group in the code
+ !
+ opt1=(n_spinor==2)
+ opt2=.not.opt1
+ !
+ nsz=0
+ call io_control(ACTION=OP_RD_CL,COM=REP,SEC=(/1/),MODE=VERIFY,ID=ID)
+ io_err=io_WF_phases(blim,0,0,0,ID,nsz)
+ !
+ if (io_err==0) return
+ !
+ call timing("WF_phases",OPR="start")
+ !
+ !call k_find_smallest_g0(g0_idx,g0_length)
+ !iGp=max(maxval(g0_idx(:,1)),maxval(g0_idx(:,2)))
+ iGp=2*maxval(Xk%g_table)
+ ng_tmp=eval_G_minus_G(wf_ng_1st_BZ,iGp)
+ !
+ ! Here I need to set wf_ng to a larger value, to allocate WFs
+ ! Of a size large enough to accomodate all possibile g-vectors
+ wf_ng=maxval(G_m_G)
+ !
+ nb=blim(2)-blim(1)+1
+ !
+ !call PARALLEL_WF_distribute(K_index=PAR_IND_WF_k,B_index=PAR_IND_WF_b,CLEAN_UP=.TRUE.)
+ call PARALLEL_WF_distribute(CLEAN_UP=.TRUE.)
+ call PARALLEL_WF_index( )
+ !
+ YAMBO_ALLOC(WF_phases_b_map,(blim(1):blim(2),klim(1):klim(2),n_sp_pol,2))
+ !
+ call io_control(ACTION=OP_WR,COM=REP,SEC=(/1/),MODE=DUMP,ID=ID)
+ io_err=io_WF_phases(blim,0,0,0,ID,nsz)
+ !
+ WF_phases_b_map=0
+ !
+ !call live_timing("WF_phases:",PAR_IND_WF_k%n_of_elements(PAR_IND_DIPk_bz_ID+1)*n_sp_pol)
+ call live_timing("WF_phases:",(sum(Xk%nstar(klim(1):klim(2))))*n_sp_pol)
+ !
+ ifrag=0
+ !
+ if (maxval(wf_nc_k)>ng_tmp) then
+  call error("MAX(wf_nc_k) ("//trim(intc(maxval(wf_nc_k)))//") > wf_ng ("//trim(intc(ng_tmp))&
+             &//"), it's not possible to compute WF phases.")
+ endif
+ !
+ spin_sop_composed=cZERO
+ !
+ do i_sp_pol=1,n_sp_pol
+   !
+   do ik=klim(1),klim(2)
+     !
+     ifrag=ifrag+1
+     !
+     call WF_load(WF_k,0,1,blim,(/ik,ik/),(/i_sp_pol,i_sp_pol/),&
+     & space='G',title='-WF_phases',quiet=ifrag>1,keep_states_to_load=.true.)
+     !
+     call degeneration_finder(nb,first_el,n_of_el,n_deg_grp,Er=En%E(blim(1):blim(2),ik,i_sp_pol),&
+     &                deg_accuracy=deg_threshold,Include_single_values=.true.)
+     !
+     do i_grp=1,n_deg_grp
+       do iby_phase=1,n_of_el(i_grp)
+         iby=iby_phase+first_el(i_grp)-1+blim(1)-1
+         WF_phases_b_map(iby,ik,i_sp_pol,:)=(/iby_phase,i_grp/)
+       enddo
+     enddo
+     !
+     nsz=(/maxval(n_of_el),maxval(n_of_el),n_deg_grp,nsym,Xk%nstar(ik)/)
+     !
+     YAMBO_ALLOC(WF_phases,(nsz(1),nsz(2),nsz(3),nsz(4),nsz(5)))
+     WF_phases=cZERO
+     !
+     ! Here we have to apply to symmetries: 
+     ! (i)  expand from IBZ to BZ (s1)
+     ! (ii) rotate the expanded wfc to get WF_phase at each symmetry (s2, isloop)
+     !
+     ! We cannot use sop_tab to get the index of symmetry s2(s1) in the
+     ! spinorial case, because we are in SU(3) instead of SO(3)
+     !
+     ! Therefore, we need to explicity apply both s1 and s2: we use
+     ! OPTION 1 by DS. OPTION 2 is coded but not working at the moment
+     !
+     do istark=1,Xk%nstar(ik)
+       !
+       !if ( .not.PAR_IND_WF_k%element_1D(ik) ) cycle
+       !
+       isk =Xk%star(ik,istark)
+       ikbz=Xk%k_table(ik,isk)
+       !
+       iGk =minus_G(Xk%g_table(ik,isk))
+       !
+       ! OPTION 1: expand from IBZ (WF_k) to BZ (WF_Rk)
+       if (opt1) then
+         call WF_copy(WF_k,WF_Rk)
+         call WF_symm_kpoint_g(blim,ik,i_sp_pol,isk,iGk,WF_Rk)
+       endif
+       !
+       do isloop=1,nsym
+         !
+         ! symmetry composed
+         isk_c=sop_tab(isloop,isk)
+         iGk_c=minus_G(Xk%g_table(ik,isk_c))
+         !
+         iGloop=G_m_G(iGk_c,iGk)
+         !
+         ! spin_sop with time-reversal are not a space group
+         ! I need to build the composed spin symmetry operator
+         !
+         ! (disabled if n_spinor=2)
+         ! OPTION 2: directly compute composed spin symmetry
+         if(n_spinor==2.and.opt2) then
+           ! The following code is probably wrong
+           s1=spin_sop(:,:,isk)
+           s2=spin_sop(:,:,isloop)
+           !if (isloop>nsym/(1+i_time_rev)) s1=conjg(s1)
+           if (isk   >nsym/(1+i_time_rev)) s1=conjg(s1)
+           if (isloop>nsym/(1+i_time_rev)) then
+             s2=conjg(s2)
+             s1=conjg(s1)
+           endif
+           spin_sop_composed=matmul(s2,s1)
+           if (isk_c >nsym/(1+i_time_rev)) spin_sop_composed=conjg(spin_sop_composed)
+#if defined _CUDA
+           call dev_memcpy(spin_sop_composed_d,spin_sop_composed)
+#endif
+         endif
+         !
+         ! symmetry used by yambo
+         isk_y=Xk%s_table(ik,isk_c)
+         iGk_y =minus_G(Xk%g_table(ik,isk_y))
+         !
+         iRkbz =Xk%k_table(ik,isk_c)
+         !
+         ! OPTION 1: Apply second symmetry operation (WF_Rk_c from isk) 
+         ! to the WF already expanded in the BZ (WF_Rk)
+         if (opt1) then
+           call WF_copy(WF_Rk,WF_Rk_c)
+           call WF_symm_kpoint_g(blim,ik,i_sp_pol,isloop,iGloop,WF_Rk_c,isref=isk,iGref=iGk)
+         endif
+         !
+         ! (disabled if n_spinor=2)
+         ! OPTION 2: Use the composed symmetry, importing the composed spin soc
+         if (opt2) then
+           call WF_copy(WF_k,WF_Rk_c)
+           call WF_symm_kpoint_g(blim,ik,i_sp_pol,isk_c,iGk_c,WF_Rk_c,ssop=DEV_VAR(spin_sop_composed))
+         endif
+         !
+         call WF_copy(WF_k,WF_Rk_y)
+         call WF_symm_kpoint_g(blim,ik,i_sp_pol,isk_y,iGk_y,WF_Rk_y)
+         !
+         do i_grp=1,n_deg_grp
+           blim_n=(/1,n_of_el(i_grp)/)
+           blim_n=blim_n+first_el(i_grp)-1+blim(1)-1
+           !
+           do iby_phase=1,n_of_el(i_grp)
+             iby=iby_phase+first_el(i_grp)-1+blim(1)-1
+             i_wf_y=WF_k%index(iby,ik,i_sp_pol)
+             do ibc_phase=1,n_of_el(i_grp)
+               ibc=ibc_phase+first_el(i_grp)-1+blim(1)-1
+               i_wf_c=WF_k%index(ibc,ik,i_sp_pol)
+               !
+               WF_phases(iby_phase,ibc_phase,i_grp,isloop,istark)= &
+               & Vstar_dot_V(wf_ng*n_spinor,WF_Rk_y%c(:,:,i_wf_y),WF_Rk_c%c(:,:,i_wf_c))
+               !
+             enddo
+             charac(i_grp,isloop)=charac(i_grp,isloop)+WF_phases(iby_phase,iby_phase,i_grp,isloop,istark)
+           enddo
+           !
+         enddo
+         !
+         !do ibk=blim(1),blim(2)
+         !  i_wf_k=WF_k%index(ibk,ik,i_sp_pol)
+         !  do ibp=blim(1),blim(2)
+         !    i_wf_p=WF_p%index(ibp,ip,i_sp_pol)
+         !    !if (.not.PAR_IND_B_mat_ordered%element_1D( UP_matrix_index(ib,ibp)-1 )) cycle
+         !    !if (.not.PAR_IND_B_mat%element_1D( B_mat_index(ib,ibp,(/1,En%blim/))-1 )) cycle
+         !    WF_phases(ibk,ibp,s_count,k_count,i_sp_pol)= &
+         !    & Vstar_dot_V(wf_ng*n_spinor,WF_k%c(:,:,i_wf_k),WF_p_tmp%c(:,:,i_wf_p))
+         !  enddo ! ibp
+         !enddo ! ibk
+         !
+         call WF_free(WF_Rk_c)
+         call WF_free(WF_Rk_y)
+         !
+         ! DEBUG <
+         !call SERIAL_inversion(nb,WF_phases(:,:,s_count,k_count,i_sp_pol),det,.true.)
+         !!
+         !write(*,*) i_sp_pol,ikbz,isp,iGp,abs(det)
+         ! DEBUG >
+         !
+       enddo ! isp
+       !
+       call WF_free(WF_Rk)
+       !
+       call live_timing(steps=1)
+       !
+       !call PP_redux_wait(WF_phases(:,:,s_count,k_count,i_sp_pol))
+       !
+       ! DEBUG <
+       !write(*,*) "Character table at k= ",ikbz
+       !write(*,*) "Number of symmetry elements= ",s_count
+       !do i_grp=1,n_deg_grp
+       !  write(*,*) En%E(first_el(i_grp),ik,i_sp_pol)*HA2EV,n_of_el(i_grp),real(charac(i_grp,:s_count))
+       !enddo
+       !write(*,*) ""
+       ! DEBUG >
+       !
+     enddo ! istar
+     !
+     call io_control(ACTION=WR,COM=REP,SEC=(/3/),MODE=DUMP,ID=ID)
+     io_err=io_WF_phases(blim,ik,0,i_sp_pol,ID,nsz,WF_phases)
+     !
+     YAMBO_FREE(WF_phases)
+     !
+     call WF_free(WF_k)
+     !
+   enddo   ! ik
+   !
+ enddo ! i_sp_pol
+ !
+ call live_timing()
+ !
+ nsz=0
+ call io_control(ACTION=WR_CL,COM=REP,SEC=(/2/),MODE=DUMP,ID=ID)
+ io_err=io_WF_phases(blim,0,0,0,ID,nsz)
+ !
+ YAMBO_FREE(WF_phases_b_map)
+ !
+ call PP_wait()
+ !
+ call timing("WF_phases",OPR="stop")
+ !
+end subroutine WF_phase_matrices
diff --git a/src/wf_and_fft/WF_rotate.F b/src/wf_and_fft/WF_rotate.F
index 1f77975bdf..ae4a406583 100644
--- a/src/wf_and_fft/WF_rotate.F
+++ b/src/wf_and_fft/WF_rotate.F
@@ -5,9 +5,12 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
 #include<dev_defs.h>
+!
  !
-subroutine WF_rotate(ik,i_sp_pol,WFo,wf_size)
+subroutine WF_rotate(ik,i_sp_pol,WFo,wf_size,space)
  !
  ! <r|n'>= <m|n'><r|m>
  !
@@ -26,8 +29,9 @@ subroutine WF_rotate(ik,i_sp_pol,WFo,wf_size)
  !
  implicit none
  !
- integer     :: i_sp_pol,ik,wf_size
- type(WAVEs) :: WFo
+ integer,      intent(in)    :: i_sp_pol,ik,wf_size
+ type(WAVEs),  intent(inout) :: WFo
+ character(1), intent(in)    :: space
  !
  ! Work Space
  !
@@ -44,7 +48,8 @@ subroutine WF_rotate(ik,i_sp_pol,WFo,wf_size)
    do ibp = H_ref_bands(1),H_ref_bands(2)
      i_wfp = WFo%index(ibp,ik,i_sp_pol)
      if (i_wfp==0) cycle
-     WS_wf(:,:,ib)=WS_wf(:,:,ib)+H_rotation(ibp,ib,ik,i_sp_pol)*WFo%c(:,:,i_wfp)
+     if (space/="R") WS_wf(:,:,ib)=WS_wf(:,:,ib)+H_rotation(ibp,ib,ik,i_sp_pol)*WFo%c(:,:,i_wfp)
+     if (space=="R") WS_wf(:,:,ib)=WS_wf(:,:,ib)+H_rotation(ibp,ib,ik,i_sp_pol)*WFo%r(:,:,i_wfp)
    end do
    !
  enddo
@@ -54,15 +59,31 @@ subroutine WF_rotate(ik,i_sp_pol,WFo,wf_size)
    i_wf = WFo%index(ib,ik,i_sp_pol)
    if (i_wf==0) cycle
    !
-   !$omp parallel do default(shared), private(i_c,i_spinor)
-   do i_spinor=1,n_spinor
-     do i_c=1,wf_size
-       WF%c(i_c,i_spinor,i_wf)=WS_wf(i_c,i_spinor,ib)
+   if (space/="R") then
+     !
+     !$omp parallel do default(shared), private(i_c,i_spinor)
+     do i_spinor=1,n_spinor
+       do i_c=1,wf_size
+         WF%c(i_c,i_spinor,i_wf)=WS_wf(i_c,i_spinor,ib)
+       enddo
      enddo
-   enddo
-   !$omp end parallel do
-   !
-   if (have_gpu) call devxlib_memcpy_h2d(DEV_VAR(WF%c)(:,:,i_wf), WF%c(:,:,i_wf))
+     !$omp end parallel do
+     !
+     if (have_gpu) call devxlib_memcpy_h2d(DEV_VAR(WF%c)(:,:,i_wf), WF%c(:,:,i_wf))
+     !
+   else if(space=="R") then
+     !
+     !$omp parallel do default(shared), private(i_c,i_spinor)
+     do i_spinor=1,n_spinor
+       do i_c=1,wf_size
+         WF%r(i_c,i_spinor,i_wf)=WS_wf(i_c,i_spinor,ib)
+       enddo
+     enddo
+     !$omp end parallel do
+     !
+     if (have_gpu) call devxlib_memcpy_h2d(DEV_VAR(WF%r)(:,:,i_wf), WF%r(:,:,i_wf))
+     !
+   endif
    !
  enddo
  !
diff --git a/src/wf_and_fft/WF_shift_kpoint.F b/src/wf_and_fft/WF_shift_kpoint.F
index ca1077b0c5..1c16ca8934 100644
--- a/src/wf_and_fft/WF_shift_kpoint.F
+++ b/src/wf_and_fft/WF_shift_kpoint.F
@@ -3,21 +3,26 @@
 !
 ! Copyright (C) 2015 The Yambo Team
 !
-! Authors (see AUTHORS file for details): MG CA
+! Authors (see AUTHORS file for details): MG CA DS
+!
+! headers
 !
 #include<dev_defs.h>
+#include<y_memory.h>
+!
 !
 subroutine WF_shift_kpoint(b_to_shift,nb_to_shift,ikbz,wf_shift,Xk,WF_k_in,WF_k_out)
  !
  use pars,          ONLY: SP,cZERO
- use electrons,     ONLY: n_spinor,n_sp_pol
+ use electrons,     ONLY: n_spinor
  use R_lattice,     ONLY: k_map,bz_samp
- use R_lattice,     ONLY: DEV_VAR(G_m_G)
+ use R_lattice,     ONLY: DEV_VAR(G_m_G),G_m_G,minus_G
  use wave_func,     ONLY: wf_ng_1st_BZ,wf_ng_overlaps
  use devxlib,       ONLY: devxlib_memcpy_d2d, devxlib_memset_d
  use gpu_m,         ONLY: have_gpu
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer,       intent(in) :: wf_shift(3),ikbz,b_to_shift(2),nb_to_shift
  type(bz_samp), intent(in) :: Xk
@@ -26,8 +31,7 @@ subroutine WF_shift_kpoint(b_to_shift,nb_to_shift,ikbz,wf_shift,Xk,WF_k_in,WF_k_
  !
  ! Work space
  !
- integer :: id,ik,is
- complex(SP), allocatable DEV_ATTR :: WF_tmp(:,:,:)
+ integer :: id,id_loc,ik,is
  integer :: ig,igp,i_b1,i_b2
  integer :: g0_idx(3,2),g0_idx_val
  integer :: i_spinor,i_b
@@ -40,27 +44,31 @@ subroutine WF_shift_kpoint(b_to_shift,nb_to_shift,ikbz,wf_shift,Xk,WF_k_in,WF_k_
  !
  if (i_b2 /= nb_to_shift) call error("[WF_shift_kpoint] Unexpected error")
  !
- YAMBO_ALLOC_GPU(WF_tmp,(wf_ng_overlaps,n_spinor,nb_to_shift))
+ if (all(WF_shift==0)) then
+   call devxlib_memset_d(WF_k_out,cZERO)
+   call devxlib_memcpy_d2d(WF_k_out,WF_k_in, range1=(/1,wf_ng_1st_BZ/))
+   return
+ endif
+ !
+ g0_idx_val=1
+ do id=1,3
+   if(WF_shift(id)==0) cycle
+   ig=k_map%g0_idx(id,WF_shift(id))
+   igp=minus_G(g0_idx_val)
+   g0_idx_val=G_m_G(ig,igp)
+ enddo 
  !
  ! dev2dev
- call devxlib_memset_d(WF_tmp,cZERO)
- call devxlib_memcpy_d2d(WF_tmp,WF_k_in, range1=(/1,wf_ng_1st_BZ/))
+ call devxlib_memset_d(WF_k_out,cZERO)
 
  !
  ! main loop
  !
- do id=1,3
-   !
-   WF_k_out=cZERO
-   !
-   if(WF_shift(id)/=0) then
      !
-     g0_idx_val=k_map%g0_idx(id,WF_shift(id))
-     !
-     !DEV_ACC_DEBUG data present(WF_tmp,G_m_G,WF_k_out)
+     !DEV_ACC_DEBUG data present(WF_k_in,G_m_G,WF_k_out)
      !DEV_ACC parallel loop collapse(3)
      !DEV_CUF kernel do(3) <<<*,*>>>
-     !DEV_OMPGPU target map(present,alloc:WF_tmp,G_m_G,WF_k_out)
+     !DEV_OMPGPU target map(present,alloc:WF_k_in,G_m_G,WF_k_out)
      !DEV_OMPGPU teams loop collapse(3)
      !DEV_OMP parallel do default(shared), private(i_b,i_spinor,ig,igp),collapse(3)
      !
@@ -68,10 +76,10 @@ subroutine WF_shift_kpoint(b_to_shift,nb_to_shift,ikbz,wf_shift,Xk,WF_k_in,WF_k_
        do i_spinor=1,n_spinor
          do ig=1,wf_ng_1st_BZ
            !
-           if(WF_tmp(ig,1,i_b1)==cZERO) cycle
+           if(WF_k_in(ig,1,i_b1)==cZERO) cycle
            !
            igp=DEV_VAR(G_m_G)(ig,g0_idx_val)
-           WF_k_out(igp,i_spinor,i_b)=WF_tmp(ig,i_spinor,i_b)
+           WF_k_out(igp,i_spinor,i_b)=WF_k_in(ig,i_spinor,i_b)
            !
          enddo
        enddo
@@ -81,18 +89,6 @@ subroutine WF_shift_kpoint(b_to_shift,nb_to_shift,ikbz,wf_shift,Xk,WF_k_in,WF_k_
      !DEV_OMPGPU end target
      !DEV_ACC_DEBUG end data
      !
-   else
-     ! dev2dev
-     call devxlib_memcpy_d2d(WF_k_out,WF_tmp)
-   endif
-   ! dev2dev
-   call devxlib_memset_d(WF_tmp,cZERO)
-   call devxlib_memcpy_d2d(WF_tmp,WF_k_out)
-   !
- enddo
- !
- YAMBO_FREE_GPU(WF_tmp)
- YAMBO_FREE(WF_tmp)
  !
 end subroutine WF_shift_kpoint
 
diff --git a/src/wf_and_fft/WF_spatial_inversion.F b/src/wf_and_fft/WF_spatial_inversion.F
index 6128038a1d..7186c13ddf 100644
--- a/src/wf_and_fft/WF_spatial_inversion.F
+++ b/src/wf_and_fft/WF_spatial_inversion.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<yambo_wfs.h>
+!
 subroutine WF_spatial_inversion(en,Xk)
  !
  ! Check if the Inversion is a DL symmetry operation either
@@ -40,7 +44,7 @@ subroutine WF_spatial_inversion(en,Xk)
  !
  ! WF procedure
  !
- if (.not.allocated(WF%c).or.WF%space/='R') return
+ if (.not.allocated(WF%r).or.WF%space/='R') return
  !
  ! When using wf's the space inv is tested using the first
  ! nsym/(i_time_rev+1) and all the nsym. If there is not
@@ -61,7 +65,8 @@ subroutine WF_spatial_inversion(en,Xk)
    !
 #if !defined _OPENMP
    !
-   forall(i1=1:fft_size) cv(i1)=conjg(WF%c(i1,i_spinor,1))*WF%c(i1,i_spinor,1)
+   forall (i1=1:fft_size) &
+   & cv(i1)=wfconjg(cmplx(WF%r(i1,i_spinor,1),kind=SP))*WF%r(i1,i_spinor,1)
    mv(1)=sum(rho_nsi(:)*cv(:))
    mv(2)=sum(rho_si(:)*cv(:))
    !
@@ -70,7 +75,7 @@ subroutine WF_spatial_inversion(en,Xk)
    !$omp parallel do default(shared), private(i1,i_spinor), reduction(+:mv)
    do i_spinor=1,n_spinor
      do i1=1,fft_size
-       cv(i1)=conjg(WF%c(i1,i_spinor,1))*WF%c(i1,i_spinor,1)
+       cv(i1)=wfconjg(WF%r(i1,i_spinor,1))*WF%r(i1,i_spinor,1)
        mv(1)=mv(1) +rho_nsi(i1)*cv(i1)
        mv(2)=mv(2) +rho_si(i1) *cv(i1)
      enddo
diff --git a/src/wf_and_fft/WF_symm.F b/src/wf_and_fft/WF_symm.F
deleted file mode 100644
index 16249535dd..0000000000
--- a/src/wf_and_fft/WF_symm.F
+++ /dev/null
@@ -1,43 +0,0 @@
-!
-! License-Identifier: GPL
-!
-! Copyright (C) 2013 The Yambo Team
-!
-! Authors (see AUTHORS file for details): DS
-!
-function WF_symm(ifft,isc)
- !
- use pars,          ONLY:SP
- use wave_func,     ONLY:WF
- use FFT_m,         ONLY:fft_rot_r
- use electrons,     ONLY:n_spinor
- use D_lattice,     ONLY:nsym,spin_sop,i_time_rev,idt_index
- !
- implicit none
- !
- complex(SP), dimension(n_spinor) :: WF_symm
- !
- integer     :: isc(4)
- !
- integer     :: ifft
- integer     :: i_wf
- !
- i_wf=WF%index(isc(1),isc(2),isc(4))
- !
- if(isc(3)==idt_index) then
-   WF_symm(:)=WF%c(ifft,:,i_wf)
-   return
- endif
- !
- if(n_spinor==1) WF_symm(1)=WF%c(fft_rot_r(ifft,isc(3)),1,i_wf)
- !
- if (n_spinor==2) then
-   WF_symm(1)=spin_sop(1,1,isc(3))*WF%c(fft_rot_r(ifft,isc(3)),1,i_wf)+ &
-&                    spin_sop(1,2,isc(3))*WF%c(fft_rot_r(ifft,isc(3)),2,i_wf)
-   WF_symm(2)=spin_sop(2,1,isc(3))*WF%c(fft_rot_r(ifft,isc(3)),1,i_wf)+ &
-&                    spin_sop(2,2,isc(3))*WF%c(fft_rot_r(ifft,isc(3)),2,i_wf)
- endif
- !
- if(isc(3)>nsym/(i_time_rev+1)) WF_symm=conjg(WF_symm)
- !
-end function
diff --git a/src/wf_and_fft/WF_symm_kpoint.F b/src/wf_and_fft/WF_symm_kpoint.F
index 5b25cd5000..397c0bc2aa 100644
--- a/src/wf_and_fft/WF_symm_kpoint.F
+++ b/src/wf_and_fft/WF_symm_kpoint.F
@@ -47,9 +47,9 @@ subroutine DEV_SUB(WF_symm_kpoint)(b_to_load,ikbz,i_sp_pol,Xk,WF_k_out)
        !
 #if defined _GPU_LOC
        ! dev2dev
-       call devxlib_memcpy_d2d(WF_k_out(:,:,ibl),DEV_VAR(WF%c)(:,:,iwf))
+       call devxlib_memcpy_d2d(WF_k_out(:wf_ng,:,ibl),DEV_VAR(WF%c)(:,:,iwf))
 #else
-       WF_k_out(:,:,ibl)=WF_p(:,:,iwf)
+       WF_k_out(:wf_ng,:,ibl)=WF_p(:,:,iwf)
 #endif
        !
      enddo
@@ -64,14 +64,12 @@ subroutine DEV_SUB(WF_symm_kpoint)(b_to_load,ikbz,i_sp_pol,Xk,WF_k_out)
        if (n_spinor==1) then
          !
          !DEV_ACC_DEBUG data present(WF_k_out,g_rot,WF_p)
-         !DEV_ACC parallel loop collapse(2)
-         !DEV_CUF kernel do(2) <<<*,*>>>
+         !DEV_ACC parallel loop
+         !DEV_CUF kernel do(1) <<<*,*>>>
          !DEV_OMPGPU target map(present,alloc:WF_k_out,g_rot,WF_p)
-         !DEV_OMPGPU teams loop collapse(2)
-         do i_spinor=1,n_spinor
+         !DEV_OMPGPU teams loop
          do i_g=1,wf_ng
-           WF_k_out(DEV_VAR(g_rot)(i_g,is),i_spinor,ibl)=WF_p(i_g,i_spinor,iwf)
-         enddo
+           WF_k_out(DEV_VAR(g_rot)(i_g,is),1,ibl)=WF_p(i_g,1,iwf)
          enddo
          !DEV_OMPGPU end target
          !DEV_ACC_DEBUG end data
diff --git a/src/wf_and_fft/WF_symm_kpoint_g.F b/src/wf_and_fft/WF_symm_kpoint_g.F
new file mode 100644
index 0000000000..82febc73df
--- /dev/null
+++ b/src/wf_and_fft/WF_symm_kpoint_g.F
@@ -0,0 +1,131 @@
+!
+! License-Identifier: GPL
+!
+! Copyright (C) 2015 The Yambo Team
+!
+! Authors (see AUTHORS file for details): MG CA DS AF
+!
+! headers
+!
+#include<dev_defs.h>                                                    
+!
+!
+subroutine WF_symm_kpoint_g(b_to_load,ik,i_sp_pol,isymm,ig0,WF,ssop,isref,igref)
+ !
+ use pars,          ONLY:SP,cZERO
+ use electrons,     ONLY:n_spinor
+ use wave_func,     ONLY:WAVEs,wf_nc_k,DEV_VAR(wf_igk)
+ use wave_func,     ONLY:wf_ng
+ use vec_operate,   ONLY:c2a
+ use R_lattice,     ONLY:DEV_VAR(g_rot),DEV_VAR(G_m_G),&
+ &                       k_pt,rl_sop,g_vec
+ use D_lattice,     ONLY:idt_index,DEV_VAR(spin_sop),sop_inv, &
+ &                       dl_sop,i_time_rev,nsym
+ !
+ implicit none
+ !
+ integer,       intent(in) :: b_to_load(2)
+ integer,       intent(in) :: ik,i_sp_pol,isymm,ig0
+ type (WAVEs), target, intent(inout) :: WF
+ complex(SP) DEV_ATTR, optional, intent(in) :: ssop(2,2)
+ integer,              optional, intent(in) :: isref,igref
+ !
+ ! Work space
+ !
+ complex(SP)              DEV_ATTR :: WF_k_tmp(wf_ng,n_spinor)
+ complex(SP), pointer     DEV_ATTR :: WF_p(:,:)
+ complex(SP)              DEV_ATTR :: ssop_loc(2,2)
+ integer                  DEV_ATTR :: wf_tab_loc(wf_ng,2)
+ !
+ real(SP)    :: krot(3),G_cc(3)
+ !
+ integer :: ib,i_wf,isymm_m1
+ integer :: ic,ig,igp,igr,igs,i_spinor
+ !
+#if defined _GPU
+ call error("subroutine WF_symm_kpoint_g is not gpu ported")
+#endif
+ !
+ if(isymm==idt_index) return
+ !
+ isymm_m1=sop_inv(isymm)
+ !  
+ if (n_spinor==2) then
+   if(present(ssop)) then
+     ssop_loc=ssop
+   else
+     ssop_loc=DEV_VAR(spin_sop)(:,:,isymm)
+   endif
+ endif
+ !
+ if (present(igref).or.present(isref)) then
+   if (.not.present(isref)) call error("WF_symm_kpoint_g: igref provided, but not isref")
+   if (.not.present(igref)) call error("WF_symm_kpoint_g: isref provided, but not igref")
+   do ic=1,wf_nc_k(ik)
+     ig=DEV_VAR(wf_igk)(ic,ik)
+     igp=DEV_VAR(g_rot)(ig,isref)
+     igs=DEV_VAR(G_m_G)(igp,igref)
+     igr=DEV_VAR(g_rot)(igs,isymm)
+     wf_tab_loc(ic,1)=igs
+     wf_tab_loc(ic,2)=DEV_VAR(G_m_G)(igr,ig0)
+   enddo
+ else
+   do ic=1,wf_nc_k(ik)
+     ig=DEV_VAR(wf_igk)(ic,ik)
+     igr=DEV_VAR(g_rot)(ig,isymm)
+     wf_tab_loc(ic,1)=ig
+     wf_tab_loc(ic,2)=DEV_VAR(G_m_G)(igr,ig0)
+   enddo
+ endif
+ !
+ do ib=b_to_load(1),b_to_load(2)
+   !
+   WF_k_tmp=cZERO
+   !
+   i_wf=WF%index(ib,ik,i_sp_pol)
+   WF_p => DEV_VAR(WF%c)(:,:,i_wf)
+   !
+   if (n_spinor==1) then
+     !
+     !DEV_ACC_DEBUG data present(WF_k_tmp,g_rot,WF_p)
+     !DEV_ACC parallel loop private(ig,igs)
+     !DEV_CUF kernel do(1) <<<*,*>>>
+     !DEV_OMPGPU target map(present,alloc:WF_k_tmp,g_rot,WF_p)
+     !DEV_OMPGPU teams loop
+     do ic=1,wf_nc_k(ik)
+       ig =wf_tab_loc(ic,1)
+       igs=wf_tab_loc(ic,2)
+       WF_k_tmp(igs,1)=WF_p(ig,1)
+     enddo
+     !DEV_OMPGPU end target
+     !DEV_ACC_DEBUG end data
+     !
+   else
+     !
+     !DEV_ACC_DEBUG data present(WF_k_tmp,g_rot,WF_p)
+     !DEV_ACC parallel loop private(ig,igs)
+     !DEV_CUF kernel do(1) <<<*,*>>>
+     !DEV_OMPGPU target map(present,alloc:WF_k_tmp,g_rot,WF_p)
+     !DEV_OMPGPU teams loop
+     do ic=1,wf_nc_k(ik)
+       ig =wf_tab_loc(ic,1)
+       igs=wf_tab_loc(ic,2)
+       WF_k_tmp(igs,1)=ssop_loc(1,1)*WF_p(ig,1)+ssop_loc(1,2)*WF_p(ig,2)
+       WF_k_tmp(igs,2)=ssop_loc(2,1)*WF_p(ig,1)+ssop_loc(2,2)*WF_p(ig,2)
+     enddo
+     !DEV_OMPGPU end target
+     !DEV_ACC_DEBUG end data
+   endif
+   !
+   ! take care of time reversal if needed
+   !if(isymm>nsym/(1+i_time_rev)) call dev_conjg(WF_k_tmp)
+   if(isymm>nsym/(1+i_time_rev)) WF_k_tmp=conjg(WF_k_tmp)
+   !
+   !call dev_memcpy(WF_p,WF_k_tmp)
+   WF_p=WF_k_tmp
+   !
+   nullify(WF_p)
+   !
+ enddo
+ !
+end subroutine WF_symm_kpoint_g
diff --git a/src/wf_and_fft/eval_GradOperator.F b/src/wf_and_fft/eval_GradOperator.F
index 594556b8ba..f4d3cb49df 100644
--- a/src/wf_and_fft/eval_GradOperator.F
+++ b/src/wf_and_fft/eval_GradOperator.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): MG
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine eval_GradOperator()
  !
  ! On the fft grid the gradient of a periodic function f(r) acts as
@@ -23,7 +27,9 @@ subroutine eval_GradOperator()
  use FFT_m,     ONLY:fft_size,fft_dim,fftw_plan,fft_g_table
  use D_lattice, ONLY:R_m_R,Grad_R
  use R_lattice, ONLY:ng_vec,g_vec
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  ! Work space
  !
@@ -47,7 +53,7 @@ subroutine eval_GradOperator()
 #else
    call fft_3d(v,fft_dim,1)
 #endif
-   forall(i1=1:fft_size,i2=1:fft_size) Grad_R(i1,i2,ii) = v(R_m_R(i1,i2))/fft_size
+   forall (i1=1:fft_size,i2=1:fft_size) Grad_R(i1,i2,ii) = v(R_m_R(i1,i2))/fft_size
    v = (0._DP,0._DP)
  end do
  !
diff --git a/src/wf_and_fft/eval_R_minus_R.F b/src/wf_and_fft/eval_R_minus_R.F
index 8267d1ee72..cc0cd57f34 100644
--- a/src/wf_and_fft/eval_R_minus_R.F
+++ b/src/wf_and_fft/eval_R_minus_R.F
@@ -5,13 +5,19 @@
 !
 ! Authors (see AUTHORS file for details): MG
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine eval_R_minus_R()
  !
  ! Evaluate the R_i-R_j table
  ! 
  use FFT_m,     ONLY:fft_size,fft_dim,modx
  use D_lattice, ONLY:R_m_R
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  ! Work Space
  ! 
diff --git a/src/wf_and_fft/fft_3d_cuda.F b/src/wf_and_fft/fft_3d_cuda.F
index 757e21109a..150bdb3a62 100644
--- a/src/wf_and_fft/fft_3d_cuda.F
+++ b/src/wf_and_fft/fft_3d_cuda.F
@@ -5,6 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): AF
 !
+! headers
+!
+#include<dev_defs.h>
+#include<y_memory.h> 
+!
 !=====================================================================
 ! Driver to 3D FFT: FFTW, Goedecker
 !
@@ -27,7 +32,6 @@
 !
 !=====================================================================
 !
-#include<dev_defs.h>
 !
 subroutine fft_3d_cuda(c_d,n,fft_sign,cufft_plan)
  !
@@ -41,8 +45,9 @@ subroutine fft_3d_cuda(c_d,n,fft_sign,cufft_plan)
  use openacc
 #endif
  use gpu_m,  only: gpu_devsync
+ use y_memory_alloc
  !
-#include<y_memory.h> 
+ implicit none
  !
  integer   :: fft_sign,n(3)
  integer   :: cufft_plan
diff --git a/src/wf_and_fft/fft_3d_hip.F b/src/wf_and_fft/fft_3d_hip.F
index 8893f7b002..c3cf58c3b1 100644
--- a/src/wf_and_fft/fft_3d_hip.F
+++ b/src/wf_and_fft/fft_3d_hip.F
@@ -4,6 +4,11 @@
 ! Copyright (C) 2022 the YAMBO team
 !
 ! Authors (see AUTHORS file for details): AF
+!
+! headers
+!
+#include<dev_defs.h>
+#include<y_memory.h> 
 ! 
 !=====================================================================
 ! Driver to 3D FFT: FFTW, Goedecker
@@ -27,7 +32,6 @@
 !
 !=====================================================================
 !
-#include<dev_defs.h>
 !
 subroutine fft_3d_hip(c_d,n,fft_sign,hipfft_plan)
  !
@@ -36,8 +40,9 @@ subroutine fft_3d_hip(c_d,n,fft_sign,hipfft_plan)
  use gpu_m,                only: gpu_devsync
  use devxlib_environment
  use hipfft_m
+ use y_memory_alloc
  !
-#include<y_memory.h> 
+ implicit none
  !
  integer      :: fft_sign,n(3)
  type(c_ptr)  :: hipfft_plan
diff --git a/src/wf_and_fft/fft_free.F b/src/wf_and_fft/fft_free.F
index 4d7b8933bc..ab200c389e 100644
--- a/src/wf_and_fft/fft_free.F
+++ b/src/wf_and_fft/fft_free.F
@@ -5,7 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
 #include<dev_defs.h>
+#include<y_memory.h>
+!
 !
 subroutine fft_free()
  !
@@ -29,8 +33,9 @@ subroutine fft_free()
  use FFT_m,    ONLY:mklgpu_desc
 #  endif
 #endif
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer :: ierr
  !
diff --git a/src/wf_and_fft/fft_setup.F b/src/wf_and_fft/fft_setup.F
index 6cfeadb894..f43ef453b5 100644
--- a/src/wf_and_fft/fft_setup.F
+++ b/src/wf_and_fft/fft_setup.F
@@ -5,13 +5,17 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
 #include<dev_defs.h>
+#include<y_memory.h>
+!
 !
-subroutine fft_setup(iG_max,iGo_max,ONLY_SIZE)
+subroutine fft_setup(iG_max_in,iGo_max,ONLY_SIZE)
  !
  use pars,           ONLY:SP,pi
  use D_lattice,      ONLY:a,nsym,dl_sop,sop_inv,i_time_rev,alat
- use R_lattice,      ONLY:b,g_vec,ng_vec
+ use R_lattice,      ONLY:b,g_vec,ng_closed
  use matrix_operate, ONLY:m3inv
  use FFT_m,          ONLY:fft_dim,fft_size,fft_rot_r,fft_rot_r_inv,&
 &                         fft_norm,fft_g_table,fft_multiplier,modx,&
@@ -20,7 +24,7 @@ subroutine fft_setup(iG_max,iGo_max,ONLY_SIZE)
 #if defined _FFTQE
  use fft_base,       ONLY:dffts
 #endif
- use wave_func,      ONLY:wf_ng
+ use wave_func,      ONLY:wf_ng,rho_map_size,rho_map
  use timing_m,       ONLY:timing
  use gpu_m,          ONLY:have_gpu,gpu_devsync
  use devxlib,        ONLY:devxlib_memcpy_h2d,devxlib_memcpy_d2h,devxlib_memset_d
@@ -29,18 +33,22 @@ subroutine fft_setup(iG_max,iGo_max,ONLY_SIZE)
  use mkl_dfti_omp_offload
  use FFT_m,          ONLY:mklgpu_desc
 #endif
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
- integer :: iG_max,iGo_max,ierr
+ integer :: iG_max_in,iGo_max,ierr
  logical :: ONLY_SIZE
+ !
+ ! External function
+ !
 #if defined _FFTQE
  integer, external :: fft_setmap
 #endif
  !
  ! Work Space
  !
- integer  :: i1,i2,i3,i4,iv(3),ln(3),is,space_inv(3,3)
+ integer  :: i1,i2,i3,i4,iG_max,iv(3),ln(3),is,space_inv(3,3),wf_ng_closed
  real(SP) :: v1(3),M1(3,3),M2(3,3),mat(3,3),scal(3)
  real(SP), allocatable :: g_vec_rot(:,:)
  real(SP), allocatable DEV_ATTR :: g_vec_rot_d(:,:)
@@ -65,6 +73,18 @@ subroutine fft_setup(iG_max,iGo_max,ONLY_SIZE)
  YAMBO_ALLOC_MOLD(g_vec_rot,g_vec)
  g_vec_rot=transpose(matmul(mat,transpose(g_vec)))
  !
+ iG_max=iG_max_in
+ ! 2025/10730:
+ ! I set a lower bound to wf_ng_closed which is defined here
+ ! We should discuss why we have a lower bound, and if we really need it
+ wf_ng_closed=wf_ng
+ call PARSER_close_G(wf_ng_closed,'tRL')
+ if (iG_max<wf_ng_closed) then
+   !call warning("Imposing wf_ng_closed as cutoff for fft size")
+   iG_max=wf_ng_closed
+ endif
+ !
+ !
 #ifdef _GPU
  YAMBO_ALLOC_GPU_SOURCE(DEV_VAR(g_vec_rot),g_vec_rot)
 #endif
@@ -74,11 +94,11 @@ subroutine fft_setup(iG_max,iGo_max,ONLY_SIZE)
    YAMBO_FREE_GPU(DEV_VAR(fft_g_table))
    YAMBO_FREE(fft_g_table)
    !
-   YAMBO_ALLOC(fft_g_table,(max(iG_max,wf_ng),iGo_max))
+   YAMBO_ALLOC(fft_g_table,(iG_max,iGo_max))
    fft_g_table=0
    !
    if (have_gpu) then
-     YAMBO_ALLOC_GPU(DEV_VAR(fft_g_table),(max(iG_max,wf_ng),iGo_max))
+     YAMBO_ALLOC_GPU(DEV_VAR(fft_g_table),(iG_max,iGo_max))
      call devxlib_memset_d(DEV_VAR(fft_g_table),val=0)
    endif
    !
@@ -104,7 +124,7 @@ subroutine fft_setup(iG_max,iGo_max,ONLY_SIZE)
    fft_dim_1 = fft_dim(1)
    fft_dim_2 = fft_dim(2)
    fft_dim_3 = fft_dim(3)
-   i1_max = min(max(iG_max,wf_ng),ng_vec)
+   i1_max = min(iG_max,ng_closed)
    !
    YAMBO_ALLOC(ivec_GmG,(iGo_max,i1_max,3))
    YAMBO_ALLOC_GPU(DEV_VAR(ivec_GmG),(iGo_max,i1_max,3))
@@ -123,7 +143,7 @@ subroutine fft_setup(iG_max,iGo_max,ONLY_SIZE)
        iv_2 = nint(DEV_VAR(g_vec_rot)(i1,2) - DEV_VAR(g_vec_rot)(i2,2))
        iv_3 = nint(DEV_VAR(g_vec_rot)(i1,3) - DEV_VAR(g_vec_rot)(i2,3))
        !
-       if (i2==1.or.i1<=iG_max) then
+       if (i2==1.or.i1<=iG_max_in) then
          DEV_VAR(ivec_GmG)(i2,i1,1)=iv_1
          DEV_VAR(ivec_GmG)(i2,i1,2)=iv_2
          DEV_VAR(ivec_GmG)(i2,i1,3)=iv_3
@@ -167,13 +187,13 @@ subroutine fft_setup(iG_max,iGo_max,ONLY_SIZE)
    YAMBO_FREE(ivec_GmG)
    !
 #else
-   do i1=1,min(max(iG_max,wf_ng),ng_vec)
+   do i1=1,min(iG_max,ng_closed)
      do i2=1,iGo_max
        !
        v1=(g_vec_rot(i1,:)-g_vec_rot(i2,:))
        iv=nint(v1)
        !
-       if (i2==1.or.i1<=iG_max) forall(i3=1:3) ln(i3)=max(ln(i3),iv(i3))
+       if (i2==1.or.i1<=iG_max_in) forall (i3=1:3) ln(i3)=max(ln(i3),iv(i3))
 #  if defined _FFTQE
        if (i4>0.and..not.ONLY_SIZE) fft_g_table(i1,i2)=fft_setmap(iv,dffts)
 #  else
@@ -207,6 +227,7 @@ subroutine fft_setup(iG_max,iGo_max,ONLY_SIZE)
      i4=1
      cycle
    endif
+   if(.not.allocated(rho_map)) rho_map_size=fft_size
    !
    YAMBO_FREE_GPU(DEV_VAR(fft_rot_r))
    YAMBO_FREE(fft_rot_r)
diff --git a/src/wf_and_fft/scatter_Bamp_using_the_gradient.F b/src/wf_and_fft/scatter_Bamp_using_the_gradient.F
index 0f74d7359a..1653b2e1fe 100644
--- a/src/wf_and_fft/scatter_Bamp_using_the_gradient.F
+++ b/src/wf_and_fft/scatter_Bamp_using_the_gradient.F
@@ -5,8 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
 #include<dev_defs.h>
 !
+!
 subroutine scatter_Bamp_using_the_gradient(isc,which)
  !
  ! It computs
@@ -50,10 +53,10 @@ subroutine scatter_Bamp_using_the_gradient(isc,which)
  call WF_apply_symm(isc%os,isc%WF_symm_o)
  !
  ! \grad <ib ik_is i_sp|
- if (which=="i") call eval_Gradient(isc%WF_symm_i,WF_der,n_spinor,"wave")
+ if (which=="i") call eval_Gradient_wf(isc%WF_symm_i,WF_der)
  !
  ! \grad | ob ok_os o_sp>   
- if (which=="o") call eval_Gradient(isc%WF_symm_o,WF_der,n_spinor,"wave")
+ if (which=="o") call eval_Gradient_wf(isc%WF_symm_o,WF_der)
  !
  ! Chartesian components of the gradient loop
  !
diff --git a/src/wf_and_fft/scatter_Gamp.F b/src/wf_and_fft/scatter_Gamp.F
index c057c73dcf..388a8d81a8 100644
--- a/src/wf_and_fft/scatter_Gamp.F
+++ b/src/wf_and_fft/scatter_Gamp.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine DEV_SUB(scatter_Gamp)(isc,mode)
  !
  ! INCLUDED in: scatter_Gamp_incl.F
@@ -25,8 +29,9 @@ subroutine DEV_SUB(scatter_Gamp)(isc,mode)
  use devxlib,       ONLY:devxlib_mapped
  use gpu_m
  use timing_m
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(elemental_collision), target::isc
  character(1) ::mode
diff --git a/src/wf_and_fft/scatter_Modscr.F b/src/wf_and_fft/scatter_Modscr.F
index 2f63cc0819..e1eaf860e3 100644
--- a/src/wf_and_fft/scatter_Modscr.F
+++ b/src/wf_and_fft/scatter_Modscr.F
@@ -5,8 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): MGDV
 !
+! headers
+!
 #include<dev_defs.h>
 !
+!
 subroutine scatter_ModScr(isc,mu)
  !
  ! multiply scatterGamp by exp((-|q+G|^2)/(4\mu^2))
diff --git a/src/xc_functionals/Build_F_xc_mat.F b/src/xc_functionals/Build_F_xc_mat.F
index f120e69cf1..1466ab2d89 100644
--- a/src/xc_functionals/Build_F_xc_mat.F
+++ b/src/xc_functionals/Build_F_xc_mat.F
@@ -11,6 +11,7 @@ subroutine Build_F_xc_mat(V_xc,F_xc,F_xc_mat)
  use BS,             ONLY:l_BS_magnons
  use FFT_m,          ONLY:fft_size
  use electrons,      ONLY:n_spin,n_spinor,n_sp_pol
+ use D_lattice,      ONLY:DL_vol
  use xc_functionals, ONLY:magn
  use vec_operate,    ONLY:v_norm
  !
@@ -23,7 +24,7 @@ subroutine Build_F_xc_mat(V_xc,F_xc,F_xc_mat)
  !  Work space
  !
  integer     :: ifft
- real(SP)    :: mod_mag,magn_versor(fft_size,3),magn_inv(fft_size),&
+ real(SP)    :: mod_mag,mod_mag_n,magn_versor(fft_size,3),magn_inv(fft_size),&
  &              Fxc_rho2(fft_size),Fxc_mag2(fft_size),Fxc_rhom(fft_size),Vxc_mag(fft_size)
  !
  F_xc_mat=cZERO
@@ -67,21 +68,28 @@ subroutine Build_F_xc_mat(V_xc,F_xc,F_xc_mat)
      !
    else
      !
-     ! The following is in principle ok
-     ! In practice, due to numerical noise, it gives wrong results
-     ! 
+     ! The following is in principle ok, but slightly less precise
+     ! than the same operation done in XC_eval_LDA_kernel with DP quantities
+     ! There F_xc(:,1:2,1:2) is filled and directly used here
+     ! However, XC_eval_LDA_kernel works for LDA alone. The code below
+     ! would work also for GGA 
+     !
+     ! DEBUG<
      !Vxc_mag(:)  =(V_xc(:,1)-V_xc(:,2))/2._SP
      !!
      !do ifft=1,fft_size
      !  mod_mag=abs(magn(ifft,3))
-     !  if(mod_mag==0._SP) magn_inv(ifft)=0._SP
-     !  if(mod_mag/=0._SP) magn_inv(ifft)=1._SP/mod_mag
+     !  !The same factor is also used in XC_eval_LDA_kernel
+     !  mod_mag=mod_mag*real(fft_size,SP)/DL_vol
+     !  if(mod_mag< 1.E-6_SP) magn_inv(ifft)=0._SP
+     !  if(mod_mag>=1.E-6_SP) magn_inv(ifft)=1._SP/mod_mag
      !enddo
      !!
      !F_xc_mat(:,1,2,1,2)= cZERO
-     !F_xc_mat(:,1,2,2,1)= F_xc(:,1,2) !2*Vxc_mag*magn_inv
-     !F_xc_mat(:,2,1,1,2)= F_xc(:,2,1) !2*Vxc_mag*magn_inv
+     !F_xc_mat(:,1,2,2,1)= 2*Vxc_mag*magn_inv
+     !F_xc_mat(:,2,1,1,2)= 2*Vxc_mag*magn_inv
      !F_xc_mat(:,2,1,2,1)= cZERO
+     ! DEBUG>
      !
      F_xc_mat(:,1,2,1,2)= F_xc(:,1,1)
      F_xc_mat(:,1,2,2,1)= F_xc(:,1,2)
@@ -91,14 +99,20 @@ subroutine Build_F_xc_mat(V_xc,F_xc,F_xc_mat)
    endif
    !
  else if (n_spinor==2) then
+   !
+   ! At variance with the case n_sp_pol=2, here F_xc(:,1:2,1:2 from XC_eval_LDA_kernel
+   ! here contains v2rho2, e.g. the second order derivative w.r.t. the density
+   ! Accordingly, the term proportional to the V_xc(:,1)-V_xc(:,2) is added here
    !
    magn_versor=0._SP
    do ifft=1,fft_size
-     mod_mag=v_norm(magn(ifft,:))
-     if(mod_mag==0._SP) magn_versor(ifft,:)=0._SP
-     if(mod_mag/=0._SP) magn_versor(ifft,:)=magn(ifft,:)/mod_mag
-     if(mod_mag==0._SP) magn_inv(ifft)     =0._SP
-     if(mod_mag/=0._SP) magn_inv(ifft)     =1._SP/mod_mag
+     mod_mag_n=v_norm(magn(ifft,:))
+     ! The same factor is also used in XC_eval_LDA_kernel
+     mod_mag=mod_mag_n*real(fft_size,SP)/DL_vol
+     if(mod_mag< 1.E-6_SP) magn_versor(ifft,:)=0._SP
+     if(mod_mag>=1.E-6_SP) magn_versor(ifft,:)=magn(ifft,:)/mod_mag_n
+     if(mod_mag< 1.E-6_SP) magn_inv(ifft)     =0._SP
+     if(mod_mag>=1.E-6_SP) magn_inv(ifft)     =1._SP/mod_mag
    enddo
    !
    Vxc_mag(:)  =(V_xc(:,1)-V_xc(:,2))/2._SP
@@ -116,6 +130,18 @@ subroutine Build_F_xc_mat(V_xc,F_xc,F_xc_mat)
    F_xc_mat(:,1,2,2,1) = (Fxc_mag2+Vxc_mag*magn_inv)*(magn_versor(:,1)**2+magn_versor(:,2)**2)+2*Vxc_mag*magn_inv   !  (mag)
    F_xc_mat(:,2,1,1,2) = (Fxc_mag2+Vxc_mag*magn_inv)*(magn_versor(:,1)**2+magn_versor(:,2)**2)+2*Vxc_mag*magn_inv   !  (mag)
    F_xc_mat(:,2,1,2,1) = (Fxc_mag2+Vxc_mag*magn_inv)*(magn_versor(:,1)+cI*magn_versor(:,2))**2                      !  (mag)
+   ! 
+   ! DEBUG<
+   !F_xc_mat(:,1,1,1,1)= F_xc(:,1,1)          ! (opt)
+   !F_xc_mat(:,1,1,2,2)= F_xc(:,1,2)          ! (opt)
+   !F_xc_mat(:,2,2,1,1)= F_xc(:,2,1)          ! (opt)
+   !F_xc_mat(:,2,2,2,2)= F_xc(:,2,2)          ! (opt)
+   !!
+   !F_xc_mat(:,1,2,1,2)= cZERO                ! (mag)
+   !F_xc_mat(:,1,2,2,1)= 2*Vxc_mag*magn_inv   ! (mag)
+   !F_xc_mat(:,2,1,1,2)= 2*Vxc_mag*magn_inv   ! (mag)
+   !F_xc_mat(:,2,1,2,1)= cZERO                ! (mag)
+   ! DEBUG>
    !
    ! TO DO !!
    !
@@ -131,4 +157,18 @@ subroutine Build_F_xc_mat(V_xc,F_xc,F_xc_mat)
    !
  endif
  !
+ ! DEBUG<
+ !write(*,*) 2*Vxc_mag(1:10)*magn_inv(1:10)
+ !!
+ !write(*,*) real(F_xc_mat(1:10,1,1,1,1),SP)
+ !write(*,*) real(F_xc_mat(1:10,1,1,2,2),SP)
+ !write(*,*) real(F_xc_mat(1:10,2,2,1,1),SP)
+ !write(*,*) real(F_xc_mat(1:10,2,2,2,2),SP)
+ !!
+ !write(*,*) real(F_xc_mat(1:10,1,2,1,2),SP)
+ !write(*,*) real(F_xc_mat(1:10,1,2,2,1),SP)
+ !write(*,*) real(F_xc_mat(1:10,2,1,1,2),SP)
+ !write(*,*) real(F_xc_mat(1:10,2,1,2,1),SP)
+ ! DEBUG>
+ !
 end subroutine Build_F_xc_mat
diff --git a/src/xc_functionals/XC_eval_gga_potential.F b/src/xc_functionals/XC_eval_gga_potential.F
index b9aac272db..cdc37a0d6c 100644
--- a/src/xc_functionals/XC_eval_gga_potential.F
+++ b/src/xc_functionals/XC_eval_gga_potential.F
@@ -50,8 +50,9 @@ subroutine XC_eval_gga_potential(v1rho,v1sigma,drho,V_xc)
       ! thus I set fac to 2._SP to remove the extra 2
       if (is_xc/=2) fac=1._SP
       if (is_xc==2) fac=2._SP
-      forall(ic=1:3,ifft=1:fft_size) v_drho(ifft,ic) = v_drho(ifft,ic) + &
-&                                         2._SP/fac*real(v1sigma(is_xc,ifft),SP)*drho(ifft,is2,ic)
+      forall (ic=1:3,ifft=1:fft_size) &
+        & v_drho(ifft,ic) = v_drho(ifft,ic) + &
+        & 2._SP/fac*real(v1sigma(is_xc,ifft),SP)*drho(ifft,is2,ic)
     end do
     !
     ! 1. this is done by FFTing, by doing the scalar
diff --git a/src/xc_functionals/XC_eval_lda_kernel.F b/src/xc_functionals/XC_eval_lda_kernel.F
index 799dee880b..7b3d766ffa 100644
--- a/src/xc_functionals/XC_eval_lda_kernel.F
+++ b/src/xc_functionals/XC_eval_lda_kernel.F
@@ -33,7 +33,9 @@ subroutine XC_eval_lda_kernel(v1rho,v2rho2)
      do is1 = 1,n_sp_pol
        do is2 = 1,n_sp_pol
          is_xc = xc_spin_map1(is1,is2)
-         forall(ifft=1:fft_size) F_xc(ifft,is1,is2) = F_xc(ifft,is1,is2) + real(v2rho2(is_xc,ifft),SP)
+         do ifft=1,fft_size 
+           F_xc(ifft,is1,is2) = F_xc(ifft,is1,is2) + real(v2rho2(is_xc,ifft),SP)
+         enddo
        enddo
      enddo
    else
@@ -55,7 +57,9 @@ subroutine XC_eval_lda_kernel(v1rho,v2rho2)
    do is1 = 1,n_spinor
      do is2 = 1,n_spinor
        is_xc = xc_spin_map1(is1,is2)
-       forall(ifft=1:fft_size) F_xc(ifft,is1,is2) = F_xc(ifft,is1,is2) + real(v2rho2(is_xc,ifft),SP)
+       do ifft=1,fft_size
+         F_xc(ifft,is1,is2) = F_xc(ifft,is1,is2) + real(v2rho2(is_xc,ifft),SP)
+       enddo
      enddo
    enddo
  endif
diff --git a/src/xc_functionals/XC_libxc_driver.F b/src/xc_functionals/XC_libxc_driver.F
index 86f8579ef3..04d020fff0 100644
--- a/src/xc_functionals/XC_libxc_driver.F
+++ b/src/xc_functionals/XC_libxc_driver.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): MG DS DV
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine XC_libxc_driver(en,Xk,FUNCTIONAL,ORDER,EXX_FRACTION,EXX_SCREENING)
  !
  ! This routine evaluates the xc functional in yambo 
@@ -22,7 +26,8 @@ subroutine XC_libxc_driver(en,Xk,FUNCTIONAL,ORDER,EXX_FRACTION,EXX_SCREENING)
 #if defined _SC || defined _RT
  use hamiltonian,    ONLY:rho_n,magn_n
 #endif
- use electrons,      ONLY:levels,n_spin,n_spinor
+ use wave_func,      ONLY:rho_map_thresh
+ use electrons,      ONLY:levels,n_spin,n_spinor,n_sp_pol
  use FFT_m,          ONLY:fft_size
  use wrapper,        ONLY:V_dot_V
  use BS,             ONLY:l_BS_magnons
@@ -37,8 +42,9 @@ subroutine XC_libxc_driver(en,Xk,FUNCTIONAL,ORDER,EXX_FRACTION,EXX_SCREENING)
 &                         xc_f03_gga_exc,xc_f03_gga_vxc,xc_f03_gga_fxc
  use interfaces,     ONLY:el_density_and_current,el_magnetization
  use pseudo,         ONLY:pp_rho_nlcc,pp_has_nlcc,PP_nlcc_free
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels) ::en
  type(bz_samp)::Xk
@@ -68,7 +74,7 @@ subroutine XC_libxc_driver(en,Xk,FUNCTIONAL,ORDER,EXX_FRACTION,EXX_SCREENING)
  real(DP),allocatable :: v1rho(:,:),v1sigma(:,:)
  real(DP),allocatable :: vx1(:,:),vx2(:,:)
  real(DP),allocatable :: v2rho2(:,:),v2rhosigma(:,:),v2sigma2(:,:)
- logical              :: EvaluateDGradient,l_Fxc_Libxc
+ logical              :: EvaluateDGradient
  !
  ! Functional and Functional infos
  !
@@ -78,9 +84,6 @@ subroutine XC_libxc_driver(en,Xk,FUNCTIONAL,ORDER,EXX_FRACTION,EXX_SCREENING)
  ! Convert and Initialize
  !===============================
  !
- call parser('FxcLibxc',l_Fxc_Libxc)
- l_Fxc_from_Vxc=(n_spin==1).and..not.l_Fxc_Libxc
- !
  exsr=0._DP
  FUNCTIONALTMP=0
 !Set the functional as a PBE to get Exc, next calculate the GAU-PBE Exc as
@@ -199,8 +202,8 @@ subroutine XC_libxc_driver(en,Xk,FUNCTIONAL,ORDER,EXX_FRACTION,EXX_SCREENING)
      YAMBO_ALLOC(spin_rho_SP_cmplx,(fft_size))
      do is1 = 1,n_spin
        spin_rho_SP_cmplx(:)=cmplx(spin_rho_SP(:,is1),0.0_SP,kind=SP)
-       call eval_Gradient(spin_rho_SP_cmplx,drho_cmplx,1,"density")
-       drho(:,is1,:)=real(drho_cmplx)
+       call eval_Gradient_rho_pot(spin_rho_SP_cmplx,drho_cmplx)
+       drho(:,is1,:)=real(drho_cmplx,kind=SP)
      end do
      YAMBO_FREE(drho_cmplx)
      YAMBO_FREE(spin_rho_SP_cmplx)
@@ -214,7 +217,11 @@ subroutine XC_libxc_driver(en,Xk,FUNCTIONAL,ORDER,EXX_FRACTION,EXX_SCREENING)
      YAMBO_FREE(drho_tmp)
    endif
    !
-   forall (is1=1:n_spin,i1=1:fft_size) drho(i1,is1,:) = drho(i1,is1,:)*fft_size/DL_vol*2._SP*pi/alat(:)
+   do is1=1,n_spin
+     do i1=1,fft_size
+       drho(i1,is1,:) = drho(i1,is1,:)*fft_size/DL_vol*2._SP*pi/alat(:)
+     enddo
+   enddo
    !
    do i1 = 1,fft_size
      do is1 = 1,n_spin
@@ -238,7 +245,9 @@ subroutine XC_libxc_driver(en,Xk,FUNCTIONAL,ORDER,EXX_FRACTION,EXX_SCREENING)
  if (FUNCTIONAL == XC_LDA_C_KP) then ! Special case, XC_LDA_C_KP not available from libxc
    select case(ORDER)
      case(0)
-       forall (i1=1:fft_size) rspts(i1)=(3._DP/4._DP/pi_DP/real(spin_rho_SP(i1,1)/DL_vol,DP))**(1._DP/3._DP)
+       do i1=1,fft_size 
+         rspts(i1)=(3._DP/4._DP/pi_DP/real(spin_rho_SP(i1,1)/DL_vol,DP))**(1._DP/3._DP)
+       enddo
        call xc_rpa_kp(rspts,v1rho)
        E_xc = real(tmp_exc,SP) 
        E_xc_val = V_dot_V(fft_size,rho,E_xc(:))
@@ -254,7 +263,7 @@ subroutine XC_libxc_driver(en,Xk,FUNCTIONAL,ORDER,EXX_FRACTION,EXX_SCREENING)
  !
  ORDER_NOW=ORDER
  if (l_Fxc_from_Vxc.and.ORDER==2) then
-   call msg('s',' Evaluating Fxc as numerical functional derivative of Vxc')
+   call msg('s',' Evaluating Fxc as numerical derivative of Vxc')
    PERT_FAC=1.000001_DP
    ORDER_NOW=1
    YAMBO_ALLOC(V_xc_pert,(fft_size,n_spin))
@@ -269,25 +278,41 @@ subroutine XC_libxc_driver(en,Xk,FUNCTIONAL,ORDER,EXX_FRACTION,EXX_SCREENING)
          E_xc = E_xc + real(tmp_exc,SP)    
        case(1)
          call xc_f03_lda_vxc(fnctl(ixc)%conf, size_IPL, spin_rho_DP(1,1), v1rho(1,1))
-         forall(i1=1:fft_size,i2=1:n_spin) V_xc(i1,i2) = V_xc(i1,i2) + real(v1rho(i2,i1),SP)
+         do i1=1,fft_size
+           do i2=1,n_spin 
+             V_xc(i1,i2) = V_xc(i1,i2) + real(v1rho(i2,i1),SP)
+           enddo
+         enddo
+         ! Needed to build F_xc from functional derivative for n_spin=1
          if (l_Fxc_from_Vxc.and.ORDER==2) then
            spin_rho_DP=spin_rho_DP*PERT_FAC
            call xc_f03_lda_vxc(fnctl(ixc)%conf, size_IPL, spin_rho_DP(1,1), v1rho(1,1))
-           forall(i1=1:fft_size,i2=1:n_spin) V_xc_pert(i1,i2) = V_xc_pert(i1,i2) + real(v1rho(i2,i1),SP)
+           do i1=1,fft_size
+             do i2=1,n_spin 
+               V_xc_pert(i1,i2) = V_xc_pert(i1,i2) + real(v1rho(i2,i1),SP)
+             enddo
+           enddo
            spin_rho_DP=spin_rho_DP/PERT_FAC
          endif
        case(2)
-         if( l_BS_magnons) then
+         if( l_BS_magnons .or. n_spinor==2 ) then
+           ! This is needed by XC_eval_lda_kernel to construct F_xc for magnons with n_sp_pol=2
            call xc_f03_lda_vxc(fnctl(ixc)%conf, size_IPL, spin_rho_DP(1,1), v1rho(1,1))
-           forall(i1=1:fft_size,i2=1:n_spin) V_xc(i1,i2) = V_xc(i1,i2) + real(v1rho(i2,i1),SP)
+           ! V_xc is needed for F_xc_mat in Build_Fxc_mat.F with n_spinor=2, 
+           ! In the magnons case, I could use it also for F_xc_mat with n_sp_pol==2,
+           ! avoiding the call to XC_eval_lda_kernel
+           ! Howerver, the latter is more precise, since it works with DP quantities
+           do i1=1,fft_size
+             do i2=1,n_spin 
+               V_xc(i1,i2) = V_xc(i1,i2) + real(v1rho(i2,i1),SP)
+             enddo
+           enddo
          endif
-         ! I could avoid this, however I need it due to numerical noise
-         ! See also comment in Build_F_xc_mat
-         !if( (.not.l_BS_magnons) .or. n_spinor==2) then
-           call xc_f03_lda_fxc(fnctl(ixc)%conf, size_IPL, spin_rho_DP(1,1), v2rho2(1,1))
-           ! Yambo internal for Kxc
-           call XC_eval_lda_kernel(v1rho,v2rho2)
-         !endif
+         ! Needed to build Fxc unless in magnons case with n_sp_pol==2
+         if( .not.( l_BS_magnons.and.n_sp_pol==2) ) &
+         & call xc_f03_lda_fxc(fnctl(ixc)%conf, size_IPL, spin_rho_DP(1,1), v2rho2(1,1))
+         ! Yambo internal for Kxc
+         call XC_eval_lda_kernel(v1rho,v2rho2)
      end select
    elseif (fnctl(ixc)%family==XC_FAMILY_GGA.or.fnctl(ixc)%family==XC_FAMILY_HYB_GGA) then
         !GAUPBE short range x-energy  and potentials part
@@ -316,18 +341,27 @@ subroutine XC_libxc_driver(en,Xk,FUNCTIONAL,ORDER,EXX_FRACTION,EXX_SCREENING)
          endif
        case(2)
          if (FUNCTIONAL==XC_HYB_GGA_XC_GAUPBE*XC_FACTOR) call error(' Fxc not coded for GAU-PBEP')
-         if( l_BS_magnons) then
+         if( l_BS_magnons .or. n_spinor==2 ) then
+           ! This is in needed by XC_eval_gga_kernel to construct F_xc for magnons with n_sp_pol=2
+           ! For now only v1rho is used by XC_eval_lda_kernel  
            call xc_f03_gga_vxc(fnctl(ixc)%conf, size_IPL, spin_rho_DP(1,1), sigma(1,1), v1rho(1,1), v1sigma(1,1))
-           ! Yambo internal for V_xc
-           call XC_eval_gga_potential(v1rho,v1sigma,drho,V_xc)
+           ! V_xc is needed for F_xc_mat in Build_Fxc_mat.F with n_spinor=2, 
+           ! However the GGA potential does not give good results, so below I use the LDA expression 
+           ! In the magnons case, I could use it also for F_xc_mat with n_sp_pol==2,
+           ! avoiding the call to XC_eval_gga_kernel
+           ! Howerver but the latter is more precise, since it works with DP quantities
+           !call XC_eval_gga_potential(v1rho,v1sigma,drho,V_xc)
+           do i1=1,fft_size
+             do i2=1,n_spin 
+               V_xc(i1,i2) = V_xc(i1,i2) + real(v1rho(i2,i1),SP)
+             enddo
+           enddo
          endif
-         ! I could avoid this, however I need it due to numerical noise
-         ! See also comment in Build_F_xc_mat
-         !if( (.not.l_BS_magnons) .or. n_spinor==2) then
-           call xc_f03_gga_fxc(fnctl(ixc)%conf, size_IPL, spin_rho_DP(1,1), sigma(1,1), v2rho2(1,1), v2rhosigma(1,1), v2sigma2(1,1))
-           ! Yambo internal for Kxc
-           call XC_eval_gga_kernel(v1rho,v1sigma,v2rho2,v2rhosigma,v2sigma2)
-         !endif
+         ! Needed to build Fxc unless in magnons case with n_sp_pol==2
+         if( .not.(l_BS_magnons.and.n_sp_pol==2) ) &
+         & call xc_f03_gga_fxc(fnctl(ixc)%conf, size_IPL, spin_rho_DP(1,1), sigma(1,1), v2rho2(1,1), v2rhosigma(1,1), v2sigma2(1,1))
+         ! Yambo internal for Kxc (for now it is just a wrapperr to lda_kernel)
+         call XC_eval_gga_kernel(v1rho,v1sigma,v2rho2,v2rhosigma,v2sigma2)
      end select
    end if
    call xc_f03_func_end(fnctl(ixc)%conf)
@@ -338,11 +372,19 @@ subroutine XC_libxc_driver(en,Xk,FUNCTIONAL,ORDER,EXX_FRACTION,EXX_SCREENING)
    if (FUNCTIONAL==XC_HYB_GGA_XC_GAUPBE*XC_FACTOR)  E_xc_val= E_xc_val-EXX_FRACTION*real(exsr,SP)
  endif
  !
- if (l_Fxc_from_Vxc.and.ORDER==2) then
-   do ifft=1,fft_size
-     if (abs(spin_rho_DP(1,ifft))<1.E-7_DP) cycle
-     F_xc(ifft,1,1)=real( real(V_xc_pert(ifft,1)-V_xc(ifft,1),DP)/(spin_rho_DP(1,ifft)*(PERT_FAC-1._DP)),SP)
-   enddo
+ if (ORDER==2) then
+   if (l_Fxc_from_Vxc) then
+     ! Compute the functional derivative
+     do ifft=1,fft_size
+       if (abs(spin_rho_DP(1,ifft))<max(rho_map_thresh,1.E-8)) cycle
+       F_xc(ifft,1,1)=real( real(V_xc_pert(ifft,1)-V_xc(ifft,1),DP)/(spin_rho_DP(1,ifft)*(PERT_FAC-1._DP)),SP)
+     enddo
+   endif
+   ! Remove low density points
+   !do ifft=1,fft_size
+   !  if (abs(spin_rho_DP(1,ifft))>max(rho_map_thresh,1.E-8)) cycle
+   !  F_xc(ifft,1,1)=0._SP
+   !enddo
  endif
  !
  if (EvaluateDGradient) then
diff --git a/src/xc_functionals/XC_nlcc_setup.F b/src/xc_functionals/XC_nlcc_setup.F
index 16b41dc32c..fcf0ee5427 100644
--- a/src/xc_functionals/XC_nlcc_setup.F
+++ b/src/xc_functionals/XC_nlcc_setup.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AF
 !
+! headers
+!
+#include <y_memory.h>
+!
 subroutine XC_nlcc_setup()
  !
  ! reads the non-linear core-correction (NLCC) data
@@ -23,7 +27,9 @@ subroutine XC_nlcc_setup()
  use parser_m,      ONLY:parser
  use IO_int,        ONLY:io_control
  use IO_m,          ONLY:OP_RD_CL,REP
-#include <y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  ! Work Space 
  !
diff --git a/src/xc_functionals/el_density_and_current.F b/src/xc_functionals/el_density_and_current.F
index 8d88dfa979..7951dad4fe 100644
--- a/src/xc_functionals/el_density_and_current.F
+++ b/src/xc_functionals/el_density_and_current.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine el_density_and_current(E,k,rho,drho,J,bands,force_spatial_inversion)
  !
  ! Electronic density (and current).
@@ -47,7 +51,7 @@ subroutine el_density_and_current(E,k,rho,drho,J,bands,force_spatial_inversion)
 #if defined _FFTW
  use FFT_m,         ONLY:fftw_plan
 #endif
- use wave_func,     ONLY:WF
+ use wave_func,     ONLY:WF,rho_map,rho_map_size
  use parallel_m,    ONLY:PAR_IND_WF_linear,PAR_COM_density
  use parallel_int,  ONLY:PP_redux_wait
  use pseudo,        ONLY:pp_is_uspp,qe_pseudo_alloc,becp,pp_has_nlcc
@@ -60,8 +64,9 @@ subroutine el_density_and_current(E,k,rho,drho,J,bands,force_spatial_inversion)
  use RT_occupations,ONLY:RT_el_occ
  use real_time,     ONLY:RTibz
 #endif
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp)::k
  type(levels) ::E       
@@ -76,7 +81,7 @@ subroutine el_density_and_current(E,k,rho,drho,J,bands,force_spatial_inversion)
 #if defined _YPP_RT
  integer    :: ik_RT
 #endif
- integer    :: ib,ik,i_sp_pol,i_spinor,rho_syms,i_symm,i_wf,bands_range(2)
+ integer    :: ib,ik,i_sp_pol,i_spinor,rho_syms,i_symm,i_wf,bands_range(2),wf_rspace_size
  integer    :: i_r,ig,i_dir,ifft
  real(SP)   :: f_occ,raux,kw
  logical    :: warn_done,l_si,l_rho,l_drho,l_J
@@ -95,7 +100,7 @@ subroutine el_density_and_current(E,k,rho,drho,J,bands,force_spatial_inversion)
  l_drho    =present(drho)
  l_J       =present(J)
  l_si      =.false.
- if (present(force_spatial_inversion)) l_si=force_spatial_inversion
+ if (present(force_spatial_inversion)) l_si     =force_spatial_inversion
  !
  !
  if (l_rho) then
@@ -107,14 +112,14 @@ subroutine el_density_and_current(E,k,rho,drho,J,bands,force_spatial_inversion)
  if (l_drho) then
    if(pp_is_uspp)  call warning(" USPP corrections to rho gradient not implemented")
    if(pp_has_nlcc) call warning(" NLCC corrections to rho gradient not implemented")
-   YAMBO_ALLOC(WF_der,(fft_size,n_spinor,3))
-   YAMBO_ALLOC(drho_no_sym,(fft_size,3))
+   YAMBO_ALLOC(WF_der,(rho_map_size,n_spinor,3))
+   YAMBO_ALLOC(drho_no_sym,(rho_map_size,3))
    drho       =0._SP
    drho_no_sym=0._SP
  endif
  !
  if (l_J) then
-   YAMBO_ALLOC(WF_der,(fft_size,n_spinor,3))
+   YAMBO_ALLOC(WF_der,(rho_map_size,n_spinor,3))
    call error(" Calculation of current is implemented but not tested")
    J         =0._SP
    J_no_sym  =0._SP
@@ -132,11 +137,14 @@ subroutine el_density_and_current(E,k,rho,drho,J,bands,force_spatial_inversion)
  !
  warn_done=.false.
  !
+ wf_rspace_size=size(WF%r(:,1,1))
  !
  do i_sp_pol=1,n_sp_pol
    bands_range=(/1,E%nbm(i_sp_pol)/)
    if(present(bands)) bands_range=bands
    do ik=1,k%nibz
+     !
+     kw=k%weights(ik)
      !
      do ib=bands_range(1),bands_range(2)
        !
@@ -156,7 +164,7 @@ subroutine el_density_and_current(E,k,rho,drho,J,bands,force_spatial_inversion)
        if(present(bands)) then
          f_occ=0._SP
          do ik_RT=RTibz%k_range(ik,1),RTibz%k_range(ik,2)
-           f_occ=f_occ+RT_el_occ%dN(ib,ik_RT)*RTibz%weights(ik_RT)
+           f_occ=f_occ+RT_el_occ%dN(ib,ik_RT,i_sp_pol)*RTibz%weights(ik_RT)
          enddo
        endif
 #endif
@@ -172,35 +180,46 @@ subroutine el_density_and_current(E,k,rho,drho,J,bands,force_spatial_inversion)
        !
        if (l_rho) then
          !
-         kw=k%weights(ik)
-         !
-         !$omp parallel do default(shared), private(i_spinor,i_r), collapse(2), reduction(+:rho_no_sym)
-         do i_spinor=1,n_spinor
-           do i_r = 1, fft_size
-             rho_no_sym(i_r)=rho_no_sym(i_r)+f_occ*kw*abs(WF%c(i_r,i_spinor,i_wf))**2
+         if (wf_rspace_size<fft_size) then
+           !$omp parallel do default(shared), private(i_spinor,i_r), collapse(2), reduction(+:rho_no_sym)
+           do i_spinor=1,n_spinor
+             do i_r = 1, wf_rspace_size
+               rho_no_sym(rho_map(i_r))=rho_no_sym(rho_map(i_r))+f_occ*kw*abs(WF%r(i_r,i_spinor,i_wf))**2
+             enddo
            enddo
-         enddo
+         else
+           do i_spinor=1,n_spinor
+             do i_r = 1, fft_size
+               rho_no_sym(i_r)=rho_no_sym(i_r)+f_occ*kw*abs(WF%r(i_r,i_spinor,i_wf))**2
+             enddo
+           enddo
+         endif
          !
          ! uspp becsum
          !
          if (pp_is_uspp) then
-           !
-           call sum_bec(ik,i_sp_pol, ib,ib,1, (/f_occ*k%weights(ik)/), becp(ik,i_sp_pol) )
-           !
+           call sum_bec(ik,i_sp_pol, ib,ib,1, (/f_occ*kw/), becp(ik,i_sp_pol) )
          endif
          !
        endif
        !
        if (l_drho) then
          !
-         call eval_Gradient(WF%c(:,:,i_wf),WF_der,n_spinor,"wave")
+         call eval_Gradient_wf(cmplx(WF%r(:,:,i_wf),kind=SP),WF_der)
          !
          do i_dir=1,3
            do i_spinor=1,n_spinor
-             do ifft=1,fft_size
-               drho_no_sym(ifft,i_dir)=drho_no_sym(ifft,i_dir)+E%f(ib,ik,i_sp_pol)*&
-               &  k%weights(ik)*2._SP*real(WF%c(ifft,i_spinor,i_wf)*WF_der(ifft,i_spinor,i_dir))
-             enddo
+             if (wf_rspace_size<fft_size) then
+               do ifft=1,rho_map_size
+                 drho_no_sym(rho_map(ifft),i_dir)=drho_no_sym(rho_map(ifft),i_dir)+E%f(ib,ik,i_sp_pol)*&
+                 &  kw*2._SP*real(WF%r(ifft,i_spinor,i_wf)*WF_der(ifft,i_spinor,i_dir))
+               enddo
+             else
+               do ifft=1,fft_size
+                 drho_no_sym(ifft,i_dir)=drho_no_sym(ifft,i_dir)+E%f(ib,ik,i_sp_pol)*&
+                 &  kw*2._SP*real(WF%r(ifft,i_spinor,i_wf)*WF_der(ifft,i_spinor,i_dir))
+               enddo
+             endif
            enddo
          enddo
          !
@@ -208,14 +227,21 @@ subroutine el_density_and_current(E,k,rho,drho,J,bands,force_spatial_inversion)
        !
        if (l_J) then
          !
-         call eval_Gradient(WF%c(:,:,i_wf),WF_der,n_spinor,"wave")
+         call eval_Gradient_wf(cmplx(WF%r(:,:,i_wf),kind=SP),WF_der)
          !
          do i_dir=1,3
            do i_spinor=1,n_spinor
-             do ifft=1,fft_size
-               J_no_sym(ifft,i_dir)=J_no_sym(ifft,i_dir)+E%f(ib,ik,i_sp_pol)*&
-               &   k%weights(ik)*aimag(WF%c(ifft,i_spinor,i_wf)*WF_der(ifft,i_spinor,i_dir))
-             enddo
+             if (wf_rspace_size<fft_size) then
+               do ifft=1,rho_map_size
+                 J_no_sym(rho_map(ifft),i_dir)=J_no_sym(rho_map(ifft),i_dir)+E%f(ib,ik,i_sp_pol)*&
+                 &   kw*aimag(WF%r(ifft,i_spinor,i_wf)*WF_der(ifft,i_spinor,i_dir))
+               enddo
+             else
+               do ifft=1,fft_size
+                 J_no_sym(ifft,i_dir)=J_no_sym(ifft,i_dir)+E%f(ib,ik,i_sp_pol)*&
+                 &   kw*aimag(WF%r(ifft,i_spinor,i_wf)*WF_der(ifft,i_spinor,i_dir))
+               enddo
+             endif
            enddo
          enddo
          !
@@ -321,3 +347,55 @@ subroutine el_density_and_current(E,k,rho,drho,J,bands,force_spatial_inversion)
 #endif
  ! 
 end subroutine
+
+
+subroutine compute_rho_map(rho)
+ !
+ ! This subroutine defines a grid of sub-points in the whole fft_size
+ ! where the density is bigger than a threshold rho_map_thresh.
+ ! rho_map_thresh has a default 1.e-7 and can be tuned from input.
+ ! The resulting grid has size rho_map_size << fft_size.
+ !
+ ! Doing a loop i1=1,rho_map_size the table rho_map(i1) gives the point
+ ! in the fft_size.
+ !
+ ! This can be useful for isolated systems, where there are region of space
+ ! which are empty. Accordingly, when performing real-space integrals involving
+ ! the wave-functions, only the points in the rho_map_size can be used, with
+ ! a significant speed-up. This was tested in TD-DFT calculations on isolated
+ ! molecules.
+ !
+ use pars,          ONLY:SP
+ use com,           ONLY:msg
+ use FFT_m,         ONLY:fft_size
+ use stderr,        ONLY:intc
+ use D_lattice,     ONLY:nsym,i_time_rev,mag_syms,dl_sop,DL_vol
+ use wave_func,     ONLY:rho_map,rho_map_inv,rho_map_thresh,rho_map_size
+ !
+ implicit none
+ !
+ real(SP), intent(in) :: rho(fft_size)
+ !
+ ! Workspace
+ !
+ integer :: ifft
+ integer, allocatable :: rho_map_tmp(:)
+ !
+ allocate(rho_map_tmp(fft_size))
+ allocate(rho_map_inv(fft_size))
+ rho_map_inv=-1
+ !
+ rho_map_size=0
+ do ifft=1,fft_size
+   if(rho(ifft)*real(fft_size,SP)/real(DL_vol,SP)<rho_map_thresh) cycle
+   rho_map_size=rho_map_size+1
+   rho_map_tmp(rho_map_size)=ifft
+   rho_map_inv(ifft)=rho_map_size
+ enddo
+ !
+ call msg('s','[FFT] Instead of full fft_size '//trim(intc(fft_size))//' using rho_map_size '//trim(intc(rho_map_size)))
+ allocate(rho_map(rho_map_size))
+ rho_map=rho_map_tmp(:rho_map_size)
+ deallocate(rho_map_tmp)
+ !
+end subroutine
diff --git a/src/xc_functionals/el_density_matrix.F b/src/xc_functionals/el_density_matrix.F
index 32a51d256f..380a492845 100644
--- a/src/xc_functionals/el_density_matrix.F
+++ b/src/xc_functionals/el_density_matrix.F
@@ -26,6 +26,8 @@ subroutine el_density_matrix(G_lesser,en,Xk,rho,lowest_band)
  !
  implicit none
  !
+#include<yambo_wfs.h>
+ !
  type(bz_samp), intent(in)  :: Xk
  complex(SP),   intent(in)  :: G_lesser(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2),n_sp_pol)
  integer,       intent(in)  :: lowest_band
@@ -55,11 +57,11 @@ subroutine el_density_matrix(G_lesser,en,Xk,rho,lowest_band)
          !
          if (.not.PAR_IND_WF_linear%element_2D(ib1,ik)) cycle
          !
-         f_occ=en%f(ib1,ik,1)
+         f_occ=en%f(ib1,ik,i_sp_pol)
          !
          i_wf1=WF_tmp%index(ib1,ik,i_sp_pol)
          do i_spinor=1,n_spinor         
-           rho_no_sym(:)=rho_no_sym(:)+real(f_occ*Xk%weights(ik)*abs(WF_tmp%c(:,i_spinor,i_wf1))**2)
+           rho_no_sym(:)=rho_no_sym(:)+real(f_occ*Xk%weights(ik)*abs(WF_tmp%r(:,i_spinor,i_wf1))**2)
          enddo
          !
        enddo
@@ -85,7 +87,7 @@ subroutine el_density_matrix(G_lesser,en,Xk,rho,lowest_band)
        do ib2=ib1,RT_bands(2)
          !
 #if !defined _YPP_RT
-         if (.not.PAR_IND_B_mat_ordered%element_1D(  UP_matrix_index(ib1-RT_bands(1)+1,ib2-RT_bands(1)+1)-1 )) cycle
+         if (.not.PAR_IND_B_mat_ordered%element_1D(  UP_matrix_index(ib1-RT_bands(1)+1,ib2-RT_bands(1)+1) )) cycle
 #endif
          !
          f_occ=cZERO
@@ -98,7 +100,7 @@ subroutine el_density_matrix(G_lesser,en,Xk,rho,lowest_band)
          i_wf1=WF%index(ib1,ik,i_sp_pol)
          i_wf2=WF%index(ib2,ik,i_sp_pol)
          do i_spinor=1,n_spinor
-           rho_no_sym(:)=rho_no_sym(:)+real(f_occ*WF%c(:,i_spinor,i_wf1)*conjg(WF%c(:,i_spinor,i_wf2)),SP)
+           rho_no_sym(:)=rho_no_sym(:)+real(f_occ*WF%r(:,i_spinor,i_wf1)*wfconjg(WF%r(:,i_spinor,i_wf2)),SP)
          enddo
          !
        enddo
diff --git a/src/xc_functionals/el_magnetization.F b/src/xc_functionals/el_magnetization.F
index 6e243e3090..218715bfe3 100644
--- a/src/xc_functionals/el_magnetization.F
+++ b/src/xc_functionals/el_magnetization.F
@@ -19,7 +19,9 @@ subroutine el_magnetization(en,Xk,magn,bands)
  use electrons,      ONLY:levels,n_spin,n_spinor,n_sp_pol
  use D_lattice,      ONLY:nsym,dl_sop,i_time_rev
  use FFT_m,          ONLY:fft_size,fft_rot_r
- use wave_func,      ONLY:WF
+ use wave_func,      ONLY:WF,rho_map,rho_map_size
+ use interfaces,     ONLY:WF_load,WF_free
+ use hamiltonian,    ONLY:WF_G_max,WF_Go_indx
  use electrons,      ONLY:Spin_magn
  use matrix_operate, ONLY:m3det
  use parallel_m,     ONLY:PAR_IND_WF_linear,PAR_COM_density
@@ -31,6 +33,8 @@ subroutine el_magnetization(en,Xk,magn,bands)
  !
  implicit none
  !
+#include<yambo_wfs.h>
+ !
  type(levels), intent(in ) ::en
  type(bz_samp),intent(in ) ::Xk
  real(SP),     intent(out) ::magn(fft_size,3)
@@ -38,7 +42,8 @@ subroutine el_magnetization(en,Xk,magn,bands)
  !
  ! Work Space
  !
- integer :: isym,ifft,ib,ik,i_sp_pol,i_wf,bands_range(2)
+ logical :: l_WFs_on_the_fly
+ integer :: isym,ifft,ib,ik,i_sp_pol,i_wf,bands_range(2),wf_rspace_size
  real(SP):: cv(fft_size,3),tmp_sop(3,3),f_occ
  !
 #if defined _RT
@@ -52,11 +57,21 @@ subroutine el_magnetization(en,Xk,magn,bands)
  !
  if (n_spin==1) return
  !
+ wf_rspace_size=size(WF%r(:,1,1))
+ !
+ l_WFs_on_the_fly=.not.allocated(WF%r)
  !
  do i_sp_pol=1,n_sp_pol
    bands_range=(/1,en%nbm(i_sp_pol)/)
    if(present(bands)) bands_range=bands
    do ik=1,Xk%nibz
+     !
+     if (l_WFs_on_the_fly) then
+       call WF_load(WF,WF_G_max,WF_Go_indx,bands_range,(/ik,ik/),(/i_sp_pol,i_sp_pol/),&
+&                   space='R',title='-mang',keep_states_to_load=.true.)
+       wf_rspace_size=size(WF%r(:,1,1))
+     endif
+     !
      do ib=bands_range(1),bands_range(2)
        !
        if (allocated(PAR_IND_WF_linear%element_2D)) then
@@ -80,32 +95,57 @@ subroutine el_magnetization(en,Xk,magn,bands)
          !
          ! mz
          !
-         cv(:,3)=cv(:,3)+Xk%weights(ik)*(-1)**(i_sp_pol-1)*f_occ*real(conjg(WF%c(:,1,i_wf))*WF%c(:,1,i_wf))
+         if (wf_rspace_size<fft_size) then
+           cv(rho_map(:),3)=cv(rho_map(:),3)+Xk%weights(ik)*(-1)**(i_sp_pol-1)*f_occ*real(wfconjg(WF%r(:,1,i_wf))*WF%r(:,1,i_wf))
+         else
+           cv(:,3)         =cv(:,3)         +Xk%weights(ik)*(-1)**(i_sp_pol-1)*f_occ*real(wfconjg(WF%r(:,1,i_wf))*WF%r(:,1,i_wf))
+         endif
          !
        else if(n_spinor==2) then
          !
          !
          ! mx
          !
-         cv(:,1)=cv(:,1)+Xk%weights(ik)*f_occ*(&
-&                   +real(conjg(WF%c(:,1,i_wf))*WF%c(:,2,i_wf))&
-&                   +real(conjg(WF%c(:,2,i_wf))*WF%c(:,1,i_wf)) )
+         if (wf_rspace_size<fft_size) then
+           cv(rho_map(:),1)=cv(rho_map(:),1)+Xk%weights(ik)*f_occ*(&
+           &                 +real(wfconjg(WF%r(:,1,i_wf))*WF%r(:,2,i_wf))&
+           &                 +real(wfconjg(WF%r(:,2,i_wf))*WF%r(:,1,i_wf)) )
+         else
+           cv(:,1)         =cv(:,1)         +Xk%weights(ik)*f_occ*(&
+           &                 +real(wfconjg(WF%r(:,1,i_wf))*WF%r(:,2,i_wf))&
+           &                 +real(wfconjg(WF%r(:,2,i_wf))*WF%r(:,1,i_wf)) )
+         endif
          !
          ! my
          !
-         cv(:,2)=cv(:,2)+Xk%weights(ik)*f_occ*(&
-&                   +aimag(conjg(WF%c(:,1,i_wf))*WF%c(:,2,i_wf))&
-&                   -aimag(conjg(WF%c(:,2,i_wf))*WF%c(:,1,i_wf)) )
+         if (wf_rspace_size<fft_size) then
+           cv(rho_map(:),2)=cv(rho_map(:),2)+Xk%weights(ik)*f_occ*(&
+&                            +wfaimag(wfconjg(WF%r(:,1,i_wf))*WF%r(:,2,i_wf))&
+&                            -wfaimag(wfconjg(WF%r(:,2,i_wf))*WF%r(:,1,i_wf)) )
+         else
+           cv(:,2)         =cv(:,2)          +Xk%weights(ik)*f_occ*(&
+&                            +wfaimag(wfconjg(WF%r(:,1,i_wf))*WF%r(:,2,i_wf))&
+&                            -wfaimag(wfconjg(WF%r(:,2,i_wf))*WF%r(:,1,i_wf)) )
+         endif
          !
          ! mz
          !
-         cv(:,3)=cv(:,3)+Xk%weights(ik)*f_occ*(&
-&                   +real(conjg(WF%c(:,1,i_wf))*WF%c(:,1,i_wf))&
-&                   -real(conjg(WF%c(:,2,i_wf))*WF%c(:,2,i_wf)) )
+         if (wf_rspace_size<fft_size) then
+           cv(rho_map(:),3)=cv(rho_map(:),3)+Xk%weights(ik)*f_occ*(&
+&                            +real(wfconjg(WF%r(:,1,i_wf))*WF%r(:,1,i_wf))&
+&                            -real(wfconjg(WF%r(:,2,i_wf))*WF%r(:,2,i_wf)) )
+         else
+           cv(:,3)         =cv(:,3)         +Xk%weights(ik)*f_occ*(&
+&                            +real(wfconjg(WF%r(:,1,i_wf))*WF%r(:,1,i_wf))&
+&                            -real(wfconjg(WF%r(:,2,i_wf))*WF%r(:,2,i_wf)) )
+         endif
          !
        endif
        !
      enddo
+     !
+     if (l_WFs_on_the_fly) call WF_free(WF,keep_fft=.true.)
+     !
    enddo
  enddo
  !
@@ -121,8 +161,9 @@ subroutine el_magnetization(en,Xk,magn,bands)
    if (n_spinor==2) then
      tmp_sop(:,:)=dl_sop(:,:,isym)*m3det(dl_sop(:,:,isym))
      if( isym> nsym/(1+i_time_rev) ) tmp_sop(:,:)=-tmp_sop(:,:)
-     forall(ifft=1:fft_size) magn(ifft,:)=magn(ifft,:)+ &
-&                            matmul(tmp_sop, cv(fft_rot_r(ifft,isym),:)/real(nsym,SP))
+     forall (ifft=1:fft_size) &
+       & magn(ifft,:)=magn(ifft,:)+ &
+       & matmul(tmp_sop, cv(fft_rot_r(ifft,isym),:)/real(nsym,SP))
    else
      magn(:,3)=magn(:,3)+cv(fft_rot_r(:,isym),3)/real(nsym,SP)
    endif
diff --git a/src/xc_functionals/el_magnetization_matrix.F b/src/xc_functionals/el_magnetization_matrix.F
index cc1690ae81..a036f7610a 100644
--- a/src/xc_functionals/el_magnetization_matrix.F
+++ b/src/xc_functionals/el_magnetization_matrix.F
@@ -31,6 +31,8 @@ subroutine el_magnetization_matrix(G_lesser,en,Xk,magn,lowest_band)
  !
  implicit none
  !
+#include<yambo_wfs.h>
+ !
  type(bz_samp), intent(in)  :: Xk
  complex(SP),   intent(in)  :: G_lesser(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),PAR_G_k_range(1):PAR_G_k_range(2),n_sp_pol)
  integer,       intent(in)  :: lowest_band
@@ -39,6 +41,7 @@ subroutine el_magnetization_matrix(G_lesser,en,Xk,magn,lowest_band)
  !
  ! Work Space
  !
+ logical :: l_WFs_on_the_fly
  integer :: ib1,ib2,i_sp_pol,ik,ik_RT,isym,ifft,i_wf1,i_wf2
  real(SP):: cv(fft_size,3),tmp_sop(3,3)
  complex(SP) :: f_occ
@@ -51,7 +54,7 @@ subroutine el_magnetization_matrix(G_lesser,en,Xk,magn,lowest_band)
  !
  if (n_spin==1) return
  !
- if (n_sp_pol==2) call error(' case n_sp_pol=2 not yet coded in yambo_rt')
+ l_WFs_on_the_fly=.not.allocated(WF%r)
  !
  if(lowest_band<RT_bands(1)) then
    !
@@ -74,27 +77,28 @@ subroutine el_magnetization_matrix(G_lesser,en,Xk,magn,lowest_band)
            !
            ! mz
            !
-           cv(:,3)=cv(:,3)+Xk%weights(ik)*(-1)**(i_sp_pol-1)*real(f_occ)*real(conjg(WF_tmp%c(:,1,i_wf1))*WF_tmp%c(:,1,i_wf1))
+           cv(:,3)=cv(:,3)+Xk%weights(ik)*(-1)**(i_sp_pol-1)*real(f_occ)*&
+&                      real(wfconjg(WF_tmp%r(:,1,i_wf1))*WF_tmp%r(:,1,i_wf1))
            !
          else if(n_spinor==2) then
            !
            ! mx
            !
            cv(:,1)=cv(:,1)+Xk%weights(ik)*real(f_occ)*(&
-&                     +real(conjg(WF_tmp%c(:,1,i_wf1))*WF_tmp%c(:,2,i_wf1))&
-&                     +real(conjg(WF_tmp%c(:,2,i_wf1))*WF_tmp%c(:,1,i_wf1)) )
+&                     +real(wfconjg(WF_tmp%r(:,1,i_wf1))*WF_tmp%r(:,2,i_wf1))&
+&                     +real(wfconjg(WF_tmp%r(:,2,i_wf1))*WF_tmp%r(:,1,i_wf1)) )
            !
            ! my
            !
            cv(:,2)=cv(:,2)+Xk%weights(ik)*real(f_occ)*(&
-&                     +aimag(conjg(WF_tmp%c(:,1,i_wf1))*WF_tmp%c(:,2,i_wf1))&
-&                     -aimag(conjg(WF_tmp%c(:,2,i_wf1))*WF_tmp%c(:,1,i_wf1)) )
+&                     +wfaimag(wfconjg(WF_tmp%r(:,1,i_wf1))*WF_tmp%r(:,2,i_wf1))&
+&                     -wfaimag(wfconjg(WF_tmp%r(:,2,i_wf1))*WF_tmp%r(:,1,i_wf1)) )
            !
            ! mz
            !
            cv(:,3)=cv(:,3)+Xk%weights(ik)*real(f_occ)*(&
-&                     +real(conjg(WF_tmp%c(:,1,i_wf1))*WF_tmp%c(:,1,i_wf1))&
-&                     -real(conjg(WF_tmp%c(:,2,i_wf1))*WF_tmp%c(:,2,i_wf1)) )
+&                     +real(wfconjg(WF_tmp%r(:,1,i_wf1))*WF_tmp%r(:,1,i_wf1))&
+&                     -real(wfconjg(WF_tmp%r(:,2,i_wf1))*WF_tmp%r(:,2,i_wf1)) )
            !
          endif
          !
@@ -105,11 +109,16 @@ subroutine el_magnetization_matrix(G_lesser,en,Xk,magn,lowest_band)
    !  
  endif
  !
+ do i_sp_pol=1,n_sp_pol
  do ik=1,Xk%nibz
    !
 #if !defined _YPP_RT
    if (.not.PAR_IND_WF_k%element_1D(ik)) cycle
 #endif
+   if (l_WFs_on_the_fly) then
+     call WF_load(WF,WF_G_max,WF_Go_indx,RT_bands,(/ik,ik/),(/i_sp_pol,i_sp_pol/),&
+&                   space='R',title='-RT',keep_states_to_load=.true.)
+   endif
    !      
    do ib1=RT_bands(1),RT_bands(2)
      !
@@ -120,43 +129,53 @@ subroutine el_magnetization_matrix(G_lesser,en,Xk,magn,lowest_band)
      do ib2=ib1,RT_bands(2)
        !
 #if !defined _YPP_RT
-       if (.not.PAR_IND_B_mat_ordered%element_1D(  UP_matrix_index(ib1-RT_bands(1)+1,ib2-RT_bands(1)+1)-1 )) cycle
+       if (.not.PAR_IND_B_mat_ordered%element_1D(  UP_matrix_index(ib1-RT_bands(1)+1,ib2-RT_bands(1)+1) )) cycle
 #endif
        !
        f_occ=cZERO
        do ik_RT=RTibz%k_range(ik,1),RTibz%k_range(ik,2)
-         if (ib1==ib2) f_occ=f_occ-cI*G_lesser(ib1,ib2,ik_RT,1)
-         if (ib1/=ib2) f_occ=f_occ-cI*G_lesser(ib1,ib2,ik_RT,1)*2._SP
+         if (ib1==ib2) f_occ=f_occ-cI*G_lesser(ib1,ib2,ik_RT,i_sp_pol)
+         if (ib1/=ib2) f_occ=f_occ-cI*G_lesser(ib1,ib2,ik_RT,i_sp_pol)*2._SP
          f_occ=f_occ*RTibz%weights(ik_RT)
        enddo
        !
-       i_wf1=WF%index(ib1,ik,1)
-       i_wf2=WF%index(ib2,ik,1)
+       i_wf1=WF%index(ib1,ik,i_sp_pol)
+       i_wf2=WF%index(ib2,ik,i_sp_pol)
        !
-       if (n_spinor==2) then
+       if (n_sp_pol==2) then
+         !
+         ! mz
+         !
+         cv(:,3)=cv(:,3)+(-1)**(i_sp_pol-1)*&
+         &       real(f_occ)*real(wfconjg(WF%r(:,1,i_wf1))*WF%r(:,1,i_wf2))
+         !
+       else if (n_spinor==2) then
          !
          ! mx
          !
          cv(:,1)=cv(:,1)+(&
-&                real(f_occ*conjg(WF%c(:,1,i_wf1))*WF%c(:,2,i_wf2))&
-&               +real(f_occ*conjg(WF%c(:,2,i_wf1))*WF%c(:,1,i_wf2)) )
+&                real(f_occ*wfconjg(WF%r(:,1,i_wf1))*WF%r(:,2,i_wf2))&
+&               +real(f_occ*wfconjg(WF%r(:,2,i_wf1))*WF%r(:,1,i_wf2)) )
          !
          ! my
          !
          cv(:,2)=cv(:,2)+(&
-&                aimag(f_occ*conjg(WF%c(:,1,i_wf1))*WF%c(:,2,i_wf2))&
-&               -aimag(f_occ*conjg(WF%c(:,2,i_wf1))*WF%c(:,1,i_wf2)) )
+&                wfaimag(f_occ*wfconjg(WF%r(:,1,i_wf1))*WF%r(:,2,i_wf2))&
+&               -wfaimag(f_occ*wfconjg(WF%r(:,2,i_wf1))*WF%r(:,1,i_wf2)) )
          !
          ! mz
          !
          cv(:,3)=cv(:,3)+(&
-&                real(f_occ*conjg(WF%c(:,1,i_wf1))*WF%c(:,1,i_wf2))&
-&               -real(f_occ*conjg(WF%c(:,2,i_wf1))*WF%c(:,2,i_wf2)) )
+&                real(f_occ*wfconjg(WF%r(:,1,i_wf1))*WF%r(:,1,i_wf2))&
+&               -real(f_occ*wfconjg(WF%r(:,2,i_wf1))*WF%r(:,2,i_wf2)) )
        endif
        !
      enddo
    enddo
    !
+   if (l_WFs_on_the_fly) call WF_free(WF,keep_fft=.true.)
+   !
+ enddo
  enddo
  !
  call PP_redux_wait(cv)
@@ -169,8 +188,8 @@ subroutine el_magnetization_matrix(G_lesser,en,Xk,magn,lowest_band)
    if (n_spinor==2) then
      tmp_sop(:,:)=dl_sop(:,:,isym)*m3det(dl_sop(:,:,isym))
      if( isym> nsym/(1+i_time_rev) ) tmp_sop(:,:)=-tmp_sop(:,:)
-     forall(ifft=1:fft_size) magn(ifft,:)=magn(ifft,:)+ &
-&                            matmul(tmp_sop, cv(fft_rot_r(ifft,isym),:)/real(nsym,SP) )
+     forall (ifft=1:fft_size) magn(ifft,:)=magn(ifft,:)+ &
+       & matmul(tmp_sop, cv(fft_rot_r(ifft,isym),:)/real(nsym,SP) )
    else
      magn(:,3)=magn(:,3)+cv(fft_rot_r(:,isym),3)/real(nsym,SP)
    endif
diff --git a/vscode_lumen.md b/vscode_lumen.md
new file mode 100644
index 0000000000..831e1f59c5
--- /dev/null
+++ b/vscode_lumen.md
@@ -0,0 +1,28 @@
+# Recommended Extensions
+`Modern Fortran`, `CodeLLDB`, `GDB`, `Better Comments`, `fprettify`.
+
+You have to compile fortls via `pipx` or `python pip`. Subsequently, you might have to specify in  `Modern Fortran`'s settings the `Fortran : fortls PATH` (run `which fortls`).
+
+## GDB-based Fortran debugging
+
+Make sure you have `gdb`.
+
+You must compile yambo with debug flags `CFLAGS="-g -O0" FFLAGS="-g -O0"`
+
+You must compile yambo with debug flags `CFLAGS="-g -O0" FFLAGS="-g -O0"`
+
+Enable debugging from VS code with:
+- Breakpoints
+- Step-by-step execution
+- Variable insepction
+- GDB support
+
+### How to Use Debugger in VS code
+1) Open the yambo folder in VS code
+2) Create a folder `debug_run` and copy inside a SAVE folder and the input files you want to test
+3) Edit the `.vsocde/launch.json_suggested` file accordingly and rename it `.vscode/launch.json`
+4) press F5 for `Run & Debug`
+5) `Debug Yambo` from the dropdown
+6) Open a given file in VS Code, for example `DIPOLE_driver.F` and hover your mouse on a line, then click left of line number to set a red dot (breakpoint). In this way you  set breakpoints by clicking the glutter (left of line numbers).
+7) On the debug menu choose your breakpoints.
+8) Click run
diff --git a/ypp/YPPmodules/YPP_SET_defaults.F b/ypp/YPPmodules/YPP_SET_defaults.F
index 9c00aad6cc..063c4ed68f 100644
--- a/ypp/YPPmodules/YPP_SET_defaults.F
+++ b/ypp/YPPmodules/YPP_SET_defaults.F
@@ -7,8 +7,8 @@
 !
 subroutine YPP_SET_defaults()
  !
- use YPPm,           ONLY:DOS_broadening,DOS_E_range,r_hole,deg_energy ,EXCITON_weight_treshold,&
-                          WF_multiplier,SOC_split,BANDS_bands,BANDS_steps,&
+ use YPPm,           ONLY:DOS_broadening,DOS_E_range,r_fixed,deg_energy ,EXCITON_weight_treshold,&
+                          center_kind,WF_multiplier,SOC_split,BANDS_bands,BANDS_steps,elec_spin,hole_spin,&
 &                         coo_in,coo_out,DOS_E_steps,INTERP_grid ,l_amplitude,l_bands,l_current,l_density  ,&
 &                         l_dos,l_exc_wf   ,l_mag,l_norm_to_one,l_sort,l_sp_wf,l_spin,l_wannier,mag_dir    ,&
 &                         N_path_pts,ncell ,output_fname,p_dir,p_format   ,perturbative_SOC_path,&
@@ -16,7 +16,7 @@ subroutine YPP_SET_defaults()
 &                         WF_ref,what_to_write,SP,HA2EV,E_field,Nel_fac,DOS_bands,AVE_exc,l_interp,  &
 &                         BSiq,l_PROJECT_atom,PDOS_atoms,PDOS_kinds,PDOS_wfcs,PDOS_l,PDOS_j,PDOS_m,&
 &                         EXCITON_Res_treshold,EXCITON_E_treshold,l_PROJECT_line,l_PROJECT_plane,K_grid,&
-&                         DIPs_C_bands,DIPs_V_bands,DIPs_E_range
+&                         MAGNON_Res_threshold,DIPs_C_bands,DIPs_V_bands,DIPs_E_range
  use YPP_symm,       ONLY:wf_ng_cut
  use units,          ONLY:FS2AUT
  use interpolate,    ONLY:INTERP_shell_factor,interpls,max_interpls
@@ -150,10 +150,14 @@ subroutine YPP_SET_defaults()
  l_amplitude=.false.
  l_interp=.false.
  ncell = (/1, 1, 1/) 
- r_hole= (/0._SP,0._SP,0._SP/)
+ r_fixed= (/0._SP,0._SP,0._SP/)
+ center_kind = "Hole"
+ hole_spin="average"
+ elec_spin="average"
  state_ctl = '0 - 0'
  deg_energy =0.01_SP/HA2EV
  EXCITON_Res_treshold =0.0_SP
+ MAGNON_Res_threshold =0.0_SP
  EXCITON_E_treshold =0.0_SP
  EXCITON_weight_treshold =0.05_SP
  WF_multiplier=1._SP
diff --git a/ypp/YPPmodules/mod_YPP.F b/ypp/YPPmodules/mod_YPP.F
index 405c4b8d69..7ad3345b7e 100644
--- a/ypp/YPPmodules/mod_YPP.F
+++ b/ypp/YPPmodules/mod_YPP.F
@@ -129,6 +129,8 @@ module YPPm
  character(1) ::p_format     
  character(1) ::mag_dir     
  character(3) ::p_dir        
+ character(7) ::elec_spin
+ character(7) ::hole_spin
  character(lchlen) ::output_fname 
  character(schlen) ::plot_title 
  character(schlen) ::V_value 
@@ -153,6 +155,7 @@ module YPPm
  ! Exc/SP Properties
  !
  logical      ::l_excitons
+ logical      ::l_magnons
  logical      ::l_electrons
  logical      ::l_sort
  logical      ::l_amplitude
@@ -164,12 +167,17 @@ module YPPm
  integer      ::ncell(3) 
  integer      ::BSiq    
  logical      ::BSiq_via_command_line=.FALSE.
- real(SP)     ::r_hole(3)    
+ real(SP)     ::r_fixed(3)
  real(SP)     ::deg_energy   
  real(SP)     ::EXCITON_E_treshold
  real(SP)     ::EXCITON_Res_treshold
+ real(SP)     ::MAGNON_Res_threshold
  real(SP)     ::EXCITON_weight_treshold   
  real(SP)     ::WF_multiplier   
+ character(7)         :: exc_kind
+ character(schlen)    :: center_kind
+ logical      ::l_exc_occ_interp=.FALSE.
+ real(SP)     ::EXC_min
  character(schlen)    :: AVE_exc
  character(schlen)    :: state_ctl 
  integer              :: EXCITONS_n_user_states=0
@@ -264,12 +272,20 @@ module YPPm
  !
  ! BSE data
  !
- complex(SP), pointer     ::BS_R_left(:)
- complex(SP), pointer     ::BS_R_right(:)
+ complex(SP),pointer     :: BS_R_left(:)         => null()
+ complex(SP),pointer     :: BS_R_right(:)        => null()
+ !
+ real(SP),   pointer     :: BS_R_PL(:,:)         => null()
+ !
+ complex(SP),pointer     :: BS_R_left_kerr(:)    => null()
+ complex(SP),pointer     :: BS_R_right_kerr(:)   => null()
+ complex(SP),pointer     :: BS_R_right_dich(:,:) => null()
+ !
+ complex(SP),pointer     :: BS_R_left_magn(:,:)  => null()
+ complex(SP),pointer     :: BS_R_right_magn(:,:) => null()
+ !
  complex(SP), allocatable ::BS_E(:)
  real(SP),    pointer     ::BS_E_SOC_corr(:,:) => null()
- real(SP),    pointer     ::BS_R_PL(:,:)       => null()
- complex(SP), pointer     ::BS_R_kerr(:)       => null()
  type(X_t)                ::Xbsk 
  !
  ! NL
diff --git a/ypp/YPPmodules/mod_YPP_ELPH.F b/ypp/YPPmodules/mod_YPP_ELPH.F
index c6b7dfb4b3..bfe26e98d6 100644
--- a/ypp/YPPmodules/mod_YPP_ELPH.F
+++ b/ypp/YPPmodules/mod_YPP_ELPH.F
@@ -5,19 +5,25 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 module YPP_ELPH
  !
  use pars,                ONLY:SP,schlen,DP
  use D_lattice,           ONLY:n_atoms
  use R_lattice,           ONLY:nkbz
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  logical :: l_phonons
  logical :: l_eliashberg
  logical :: l_atomic_amplitude
  logical :: l_ph_ass_dos
  logical :: l_gkkp_expand
+ logical :: l_gkkp_expand_only_k
  integer :: elph_steps
  real(SP):: elph_gamma_broad
  real(SP):: ph_broad
@@ -29,6 +35,7 @@ module YPP_ELPH
  ! ...GKKP I/O
  logical :: l_gkkp
  logical :: l_gkkp_db
+ logical :: l_gkkp_sngl
  logical :: l_gkkp_dg
  logical :: l_gkkp_plot
  logical :: l_use_qindxB
@@ -104,7 +111,7 @@ subroutine ELPH_databases_IO_freqs(ph_file,n_q,q_pt,ph_freqs)
  !
  contains
    !
-   subroutine ELPH_DB_alloc()  
+   subroutine ELPH_DB_alloc()
      YAMBO_ALLOC(DB_Q_map,(DB_nq))
      DB_Q_map=0
      YAMBO_ALLOC(DB_K_map,(nkbz))
diff --git a/ypp/YPPmodules/mod_YPP_interfaces.F b/ypp/YPPmodules/mod_YPP_interfaces.F
index 203d73a5ae..54517bcd40 100644
--- a/ypp/YPPmodules/mod_YPP_interfaces.F
+++ b/ypp/YPPmodules/mod_YPP_interfaces.F
@@ -42,13 +42,14 @@ subroutine electrons_dos_elemental(k,E,bands,el_dos,dos_E,spin_fac,pdos_fac,USE_
      logical, optional,intent(in) :: USE_the_DbGd,QUIET,FORCE_occ
    end subroutine electrons_dos_elemental
    !
-   subroutine excitons_sort_and_report(iq,BS_R,BS_E,BS_E_SOC_corr,EXC_spin)
+   subroutine excitons_sort_and_report(iq,Res,BS_E,BS_E_SOC_corr,EXC_spin)
      use pars,          ONLY:SP
      use BS_solvers,    ONLY:BSS_n_eig
      use YPPm,          ONLY:EXCITON_spin
      implicit none
      integer                      :: iq
-     complex(SP)                  :: BS_R(BSS_n_eig),BS_E(BSS_n_eig)
+     real(SP)                     :: Res(BSS_n_eig)
+     complex(SP)                  :: BS_E(BSS_n_eig)
      real(SP), optional, pointer  :: BS_E_SOC_corr(:,:)
      type(EXCITON_spin),optional  :: EXC_spin
    end subroutine
diff --git a/ypp/YPPmodules/mod_YPP_real_time.F b/ypp/YPPmodules/mod_YPP_real_time.F
index b654048a83..3dbecc93a5 100644
--- a/ypp/YPPmodules/mod_YPP_real_time.F
+++ b/ypp/YPPmodules/mod_YPP_real_time.F
@@ -5,12 +5,17 @@
 !
 ! Authors (see AUTHORS file for details): DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 module YPP_real_time
  !
  use units,    ONLY:HA2EV
  use pars,     ONLY:schlen,SP,DP,rZERO
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  ! RT
  !
@@ -33,6 +38,7 @@ module YPP_real_time
  logical  :: l_NL_exc
  logical  :: l_RT_abs
  logical  :: l_RT_pol
+ logical  :: l_RT_fields
  !
  ! RT post-processing (kind)
  !
@@ -122,6 +128,10 @@ module YPP_real_time
  !
  character(schlen)        :: RT_pol_mode="slice"
  !
+ ! (RT) Fields
+ !
+ real(SP)                 :: ypp_chirp=0._SP
+ !
  type RT_ctl
    real(SP) :: delta_W             =0._SP   ! delta W
    real(SP) :: X_W_range(2)        =0._SP   ! Response energy range 
diff --git a/ypp/bits/WANNIER_driver.F b/ypp/bits/WANNIER_driver.F
index 28cc20ddc9..c90a39979f 100644
--- a/ypp/bits/WANNIER_driver.F
+++ b/ypp/bits/WANNIER_driver.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM AR
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine WANNIER_driver(k,E)
  !
  ! Latest update: September 14th, 2016 by Antimo Marrazzo (antimo.marrazzo@epfl.ch)
@@ -22,7 +26,9 @@ subroutine WANNIER_driver(k,E)
  use QP_m,           ONLY:QP_t,QP_reset
  use IO_int,         ONLY:io_control
  use IO_m,           ONLY:OP_RD_CL,LOG,DUMP
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  type(bz_samp) ::k
  type(levels)  ::E
@@ -84,8 +90,9 @@ subroutine scan_file_to(keyword,found,iun_nnkp)
    !-----------------------------------------------------------------------
    !---Subroutine from Wannier90, GPL license------------------------------
    !-----------------------------------------------------------------------
-   !
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
    !
    integer:: iun_nnkp
    character(len=*), intent(in)  :: keyword
diff --git a/ypp/bits/WF_map_perturbative_SOC.F b/ypp/bits/WF_map_perturbative_SOC.F
index 690d02670a..7d8f884a70 100644
--- a/ypp/bits/WF_map_perturbative_SOC.F
+++ b/ypp/bits/WF_map_perturbative_SOC.F
@@ -5,6 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): DS
 !
+! headers
+!
+#include<y_memory.h>
+#include<yambo_wfs.h>
+!
 subroutine WF_map_perturbative_SOC(kpt,En)
  !
  use YPPm,                ONLY:perturbative_SOC_path,SOC_split,SOC_bands_to_map,&
@@ -24,8 +29,9 @@ subroutine WF_map_perturbative_SOC(kpt,En)
  use electrons,           ONLY:levels,n_sp_pol,n_spinor,n_spin
  use LIVE_t,              ONLY:live_timing
  use interfaces,          ONLY:io_DB1_selective_scan
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp), intent(in)    :: kpt
  type(levels),  intent(inout) :: En
@@ -177,8 +183,8 @@ subroutine WF_map_perturbative_SOC(kpt,En)
    !
    call sort(E_sorted,indx=E_sort_map)
    !
-   call degeneration_finder(E_sorted,NB_to_map,first_el,n_of_el,n_deg_grp,&
-&                           SOC_split,Include_single_values=.true.)
+   call degeneration_finder(NB_to_map,first_el,n_of_el,n_deg_grp,&
+&                           Er=E_sorted,deg_accuracy=SOC_split,Include_single_values=.true.)
    !
    group_already_mapped=.false.
    !
@@ -192,10 +198,10 @@ subroutine WF_map_perturbative_SOC(kpt,En)
      if(i_symm>1) then
        do i_fft=1,fft_size
          i_fft_rot=fft_rot_r(i_fft,i_symm)
-         WF_SOC_rot(i_fft,:)=matmul( spin_sop(:,:,i_symm), WF_SOC%c(i_fft_rot,:,i_wf_SOC) )
+         WF_SOC_rot(i_fft,:)=matmul( spin_sop(:,:,i_symm), WF_SOC%r(i_fft_rot,:,i_wf_SOC) )
        enddo
      else
-       WF_SOC_rot=WF_SOC%c(:,:,i_wf_SOC)
+       WF_SOC_rot=WF_SOC%r(:,:,i_wf_SOC)
      endif
      !
      do i_fft=1,fft_size
@@ -245,7 +251,7 @@ subroutine WF_map_perturbative_SOC(kpt,En)
            !
            if (n_sp_pol==2 .and. i_spinor/=i_sp_pol) cycle
            !
-           sq2=sq2+abs(dot_product(WF_SOC_rot(:,i_spinor),WF%c(:,1,i_wf )))**2
+           sq2=sq2+abs(dot_product(WF_SOC_rot(:,i_spinor),WF%r(:,1,i_wf )))**2
            !
          enddo
          !
diff --git a/ypp/dipoles/DIPOLE_ypp_driver.F b/ypp/dipoles/DIPOLE_ypp_driver.F
index 2daa468a93..8cda2a531c 100644
--- a/ypp/dipoles/DIPOLE_ypp_driver.F
+++ b/ypp/dipoles/DIPOLE_ypp_driver.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine DIPOLE_ypp_driver(k,E,q)
  !
  use pars,           ONLY:SP,rZERO,schlen
@@ -26,8 +30,9 @@ subroutine DIPOLE_ypp_driver(k,E,q)
  use YPP_interfaces, ONLY:PROJECTION_setup,PROJECTION_plot
  use parser_m,       ONLY:parser
  use interpolate,    ONLY:INTERPOLATION_driver_seed,INTERPOLATION_driver_do,GRID_k,INTERP_obj
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp)  :: k,q
  type(levels)   :: E
diff --git a/ypp/el-ph/.objects b/ypp/el-ph/.objects
index 34dd17f077..f5fd849b7e 100644
--- a/ypp/el-ph/.objects
+++ b/ypp/el-ph/.objects
@@ -2,5 +2,6 @@
 objs = ELPH_databases.o ELPH_databases_IO_elemental.o ELPH_databases_IO_pol_and_freqs.o ELPH_databases_IO_grids_check.o \
        ELPH_databases_IO_transfer_and_write.o ELPH_databases_IO_gkkp_expand.o ELPH_databases_IO_freqs.o \
        ELPH_eliashberg_dos.o ELPH_general_gFsq.o ELPH_excitonic_gkkp.o ELPH_databases_IO_modes.o \
-       ELPH_atomic_amplitude.o ELPH_double_grid.o Eliashberg_Dos_Func.o ELPH_plot_gkkp.o 
+       ELPH_atomic_amplitude.o ELPH_double_grid.o Eliashberg_Dos_Func.o ELPH_plot_gkkp.o ELPH_sngl_db.o \
+       ELPH_load_indexes.o
 #endif
diff --git a/ypp/el-ph/DOUBLE_project.dep b/ypp/el-ph/DOUBLE_project.dep
index 77cfe8cedd..41555c423a 100644
--- a/ypp/el-ph/DOUBLE_project.dep
+++ b/ypp/el-ph/DOUBLE_project.dep
@@ -11,6 +11,8 @@
  ELPH_eliashberg_dos.o
  ELPH_excitonic_gkkp.o
  ELPH_general_gFsq.o
+ ELPH_load_indexes.o
  ELPH_plot_gkkp.o
+ ELPH_sngl_db.o
  Eliashberg_Dos_Func.o
 
diff --git a/ypp/el-ph/ELPH_atomic_amplitude.F b/ypp/el-ph/ELPH_atomic_amplitude.F
index 6757bad96e..64b4598a8f 100644
--- a/ypp/el-ph/ELPH_atomic_amplitude.F
+++ b/ypp/el-ph/ELPH_atomic_amplitude.F
@@ -5,12 +5,16 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine ELPH_atomic_amplitude(q)
  !
  use pars,                ONLY:SP
  use units,               ONLY:BO2ANG,pi
  use com,                 ONLY:msg
- use R_lattice,           ONLY:bz_samp,RL_vol,RIM_n_rand_pts
+ use R_lattice,           ONLY:bz_samp,RL_vol,RIM_sphe_n_rand_pts
  use D_lattice,           ONLY:n_atoms
  use ELPH,                ONLY:ph_modes,PH_freqs_sq,elph_nQ,elph_use_q_grid,&
 &                              PH_pol_vector,PH_qpt
@@ -22,8 +26,9 @@ subroutine ELPH_atomic_amplitude(q)
  use ALLOC,               ONLY:ELPH_alloc
  use linear_algebra,      ONLY:DIAGO,USE_LK
  use stderr,              ONLY:intc
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  type(bz_samp)::q
  !
  ! Work Space 
@@ -62,11 +67,11 @@ subroutine ELPH_atomic_amplitude(q)
  !
  YAMBO_ALLOC(q_weight,(nq_to_sum))
  !
- RIM_n_rand_pts=100000
+ RIM_sphe_n_rand_pts=100000
  if (elph_use_q_grid) then
    q_weight=q%weights
  else
-   if (RIM_n_rand_pts>0) call msg('s','Using RIM with '//trim(intc(RIM_n_rand_pts))//' point')
+   if (RIM_sphe_n_rand_pts>0) call msg('s','Using RIM with '//trim(intc(RIM_sphe_n_rand_pts))//' point')
    call rim_spherical(nq_to_sum,PH_qpt(1:nq_to_sum,1:3),q_weight,(3.*RL_vol/nq_to_sum/4./pi)**(1./3.),2,.TRUE.)
  endif
  !
diff --git a/ypp/el-ph/ELPH_databases.F b/ypp/el-ph/ELPH_databases.F
index 50f0585b76..1ac6f6af1c 100644
--- a/ypp/el-ph/ELPH_databases.F
+++ b/ypp/el-ph/ELPH_databases.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine ELPH_databases(k,E,q)
  !
  use units,               ONLY:HA2EV
@@ -12,23 +16,26 @@ subroutine ELPH_databases(k,E,q)
  use stderr,              ONLY:intc
  use electrons,           ONLY:levels
  use ALLOC,               ONLY:ELPH_alloc
- use ELPH,                ONLY:elph_use_q_grid,ph_modes,elph_nb,elph_nQ,GKKP,GKKP_bare,PH_W_debye,&
-&                              PH_freqs_sq,PH_qpt,PH_pol_vector,GKKP_me,elph_nk_bz,PH_kpt_bz,l_GKKP_hosts_bare_dV
+ use ELPH,                ONLY:elph_use_q_grid,ph_modes,elph_bands,elph_nb,elph_nQ,GKKP,GKKP_bare,PH_W_debye,&
+&                              PH_freqs_sq,PH_qpt,PH_pol_vector,GKKP_me,elph_nk_bz,PH_kpt_bz,&
+&                              l_GKKP_debug,l_GKKP_hosts_bare_dV,l_GKKP_hosts_DW
  use com,                 ONLY:msg
  use D_lattice,           ONLY:nsym,sop_tab
- use R_lattice,           ONLY:bz_samp,qindx_B,nqibz,bse_scattering
+ use R_lattice,           ONLY:bz_samp,nqbz,nqibz
  use parser_m,            ONLY:parser
  use vec_operate,         ONLY:v_is_zero
  use zeros,               ONLY:k_iku_zero
  use YPP_ELPH,            ONLY:ELPH_DB_alloc,ELPH_DB_free,DB_kind,DB_nq,DB_ph_K_sym_table,DB_nb,&
 &                              ph_freqs_file,ph_modes_file,use_ext_ph_freqs,use_ext_ph_modes,DB_ph_modes,&
-&                              DB_PH_pol_vec,DB_Q_map,DB_ph_freq_sq,DB_PH_qpt,DB_nk,l_gkkp_expand,l_use_qindxB
+&                              DB_PH_pol_vec,DB_Q_map,DB_ph_freq_sq,DB_PH_qpt,DB_nk,l_gkkp_expand,l_use_qindxB, &
+&                              l_gkkp_expand_only_k
  use LIVE_t,              ONLY:live_timing 
  use IO_int,              ONLY:io_control
  use IO_m,                ONLY:OP_APP_CL,REP,OP_RD_CL
  use parallel_int,        ONLY:PARALLEL_global_indexes
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp) ::k,q
  type(levels)  ::E
@@ -36,9 +43,11 @@ subroutine ELPH_databases(k,E,q)
  ! Work Space...
  !
  integer            ::io_err(3),iq,ik,is,idb,i_star,iq_bz,ID,qindx_ID,qindx_ID_frag
- character(schlen)  ::what
- integer, external  ::ELPH_databases_IO_elemental,io_ELPH,io_QINDX,qindx_B_init
+ logical            ::l_GKKP_skip_DW
+ character(schlen)  ::what,db_name
+ integer, external  ::ELPH_databases_IO_elemental,io_ELPH
  logical, external  ::file_exists
+ logical            ::l_no_store_dw
  real(SP)           ::v(3)
  type(GKKP_me)      ::GKKP_save,GKKP_bare_save
  !
@@ -48,44 +57,20 @@ subroutine ELPH_databases(k,E,q)
  !=================
  call parser('GkkpReadBare',l_GKKP_hosts_bare_dV)
  call parser('GkkpExpand',l_gkkp_expand)
+ call parser('GkkpDebug',l_GKKP_debug)
+ call parser('GkkpExpOnlyK',l_gkkp_expand_only_k)
+ call parser('GkkpSkipDW',l_GKKP_skip_DW)
  call parser('UseQindxB',l_use_qindxB)
  !
- if(l_gkkp_expand) then
-   !      
-   ! Setup parallelization to load qindx_B
-   ! ======================================
-   !
-   call PARALLEL_global_indexes(E,k,q,"BZ_Indexes")
-   call PARALLEL_SETUP_K_scheme("BZINDX")
-   !
-   if(l_use_qindxB) then
-     !
-     if(.not.bse_scattering) call error(' Please run setup with BSEscatt flag ')
-     !
-     ! here qindx_B is allocated
-     call io_control(ACTION=OP_RD_CL,COM=REP,SEC=(/5/),ID=ID)
-     io_err(1)=io_QINDX(k,q,ID)
-     if (io_err(1)/=0) then
-       call msg('s',"Missing k/q scattering database")
-       call error("Please run a setup with the BSEscatt flag activated")
-     endif
-     !
-     ! Initialize qindx_B IO
-     ! =============================
-     io_err(2)=qindx_B_init(qindx_ID,qindx_ID_frag) 
-     if(io_err(2)/=0) call error("Error reading qindx_B")
-     !
-   else
-     ! otherwise qindx_S is allocated
-     call io_control(ACTION=OP_RD_CL,COM=REP,SEC=(/4/),ID=ID)
-     io_err(1)=io_QINDX(k,q,ID)
-     if (io_err(1)/=0) then
-       call msg('s',"Missing k/q scattering database")
-       call error("Please (re)run the setup")
-     endif
-   endif
-   !
- endif
+ l_GKKP_hosts_DW=.not.l_GKKP_skip_DW
+ !
+ if(l_gkkp_expand.and.l_gkkp_expand_only_k) &
+&  call error(' Both flags "GkkpExpand" and "GkkpExpOnlyK" cannot be present at the same moment! ')
+ !
+ ! Load symmetry indexes if expantion is required
+ ! =================================================
+ ! 
+ if(l_gkkp_expand.or.l_gkkp_expand_only_k) call ELPH_load_indexes(k,E,q,qindx_ID,qindx_ID_frag)
  !
  ! Existence and dimensions 
  !==========================
@@ -186,14 +171,32 @@ subroutine ELPH_databases(k,E,q)
  DB_Q_map=0
  do idb=1,DB_nq
    do iq=1,nqibz
-     v=DB_PH_qpt(idb,:)
-     if (v_is_zero(v+q%pt(iq,:),zero_=k_iku_zero)) DB_Q_map(idb)=iq
+     v=DB_PH_qpt(idb,:)-q%pt(iq,:)
+     if (v_is_zero(v,zero_=k_iku_zero)) DB_Q_map(idb)=iq
    enddo
  enddo
  !
  elph_use_q_grid=.FALSE.
  if(DB_nq==nqibz.and.all(DB_Q_map/=0)) elph_use_q_grid=.TRUE.
  !
+ ! If l_gkkp_expand_only_k I suppose the q-grid is in the BZ
+ !
+ if(l_gkkp_expand_only_k) then
+   DB_Q_map=0
+   do idb=1,DB_nq
+     do iq=1,nqbz
+       v=DB_PH_qpt(idb,:)
+       if (v_is_zero(v+q%ptbz(iq,:),zero_=k_iku_zero)) DB_Q_map(idb)=iq
+     enddo
+   enddo
+   !
+   elph_use_q_grid=.FALSE.
+   if(DB_nq==nqbz.and.all(DB_Q_map/=0)) elph_use_q_grid=.TRUE.
+   !
+   if(.not.elph_use_q_grid) call error('Expand only-k works only with uniform q-grids in the BZ')
+   !
+ endif
+ !
  if (.not.elph_use_q_grid) then
    if (DB_Q_map(1)/=1) call error("First qpt must be Gamma.")
    do idb=1,DB_nq
@@ -208,7 +211,12 @@ subroutine ELPH_databases(k,E,q)
  ! ... Transfer and write ...
  !----------------------------
  ph_modes   =DB_ph_modes
- elph_nb    =DB_nb
+ if(elph_bands(1)>=1 .and. elph_bands(2)<DB_nb) then
+   elph_nb=elph_bands(2)-elph_bands(1)+1
+ else
+   elph_nb    =DB_nb
+   elph_bands=(/1,elph_nb/)
+ endif
  elph_nQ    =DB_nq
  elph_nk_bz =k%nbz
  if (l_gkkp_expand) elph_nQ=q%nbz
@@ -224,11 +232,15 @@ subroutine ELPH_databases(k,E,q)
  call msg('s',':: Q-points(read)        :',DB_nq)
  call msg('s',':: Q-points(written)     :',elph_nQ)
  call msg('s',':: K-points              :',DB_nk)
- call msg('s',':: Bands                 :',DB_nb)
+ call msg('s',':: Bands (all)           :',DB_nb)
+ call msg('s',':: Bands (loading)       :',elph_bands)
  call msg('s',':: Branches              :',DB_ph_modes)
  call msg('s',':: Uniform sampling      :',elph_use_q_grid)
+ call msg('s',':: Q-points in the BZ    :',l_gkkp_expand_only_k)
  call msg('s',':: Symmetry expanded     :',l_gkkp_expand)
- if(l_gkkp_expand) call msg('s',':: Use qindx_B to expand gkkp     :',l_use_qindxB)
+ call msg('s',':: El-Ph contains DW     :',l_GKKP_hosts_DW)
+ call msg('s',':: El-Ph has bare dV     :',l_GKKP_hosts_bare_dV)
+ call msg('s',':: Use qindx_B           :',l_use_qindxB)
  call msg('s',':: Debye Energy          :',PH_W_debye*HA2EV*1000.,"[meV]")
  !
  !...alloc...
@@ -264,8 +276,13 @@ subroutine ELPH_databases(k,E,q)
  enddo
  !
  !... GKKP
- call live_timing('ELPH databases: transfer and I/O',DB_nq)
  call section("*","== Q-points list in Yambo (iku units) ==")
+ !
+ db_name='gkkp'
+ if(l_gkkp_expand.or.l_gkkp_expand_only_k) db_name='gkkp_expanded'
+ !
+ call live_timing('ELPH databases: transfer and I/O',DB_nq)
+ !
  do idb=1,DB_nq
    !
    if(elph_use_q_grid) then
@@ -303,13 +320,13 @@ subroutine ELPH_databases(k,E,q)
        endif
        if (iq_bz==1) call io_control(ACTION=OP_APP_CL,SEC=(/1,2/),ID=ID)
        if (iq_bz> 1) call io_control(ACTION=OP_APP_CL,SEC=(/iq_bz+1/),ID=ID)
-       io_err(3)=io_ELPH(ID,'gkkp_expanded')
+       io_err(3)=io_ELPH(ID,db_name)
        call msg('r','',PH_qpt(iq_bz,:),USE_TABS=.TRUE.)
      enddo
    else
      if (iq==1) call io_control(ACTION=OP_APP_CL,SEC=(/1,2/),ID=ID)
      if (iq> 1) call io_control(ACTION=OP_APP_CL,SEC=(/iq+1/),ID=ID)
-     io_err(3)=io_ELPH(ID,'gkkp')
+     io_err(3)=io_ELPH(ID,db_name)
      call msg('r','',PH_qpt(iq,:),USE_TABS=.TRUE.)
    endif
    !
diff --git a/ypp/el-ph/ELPH_databases_IO_elemental.F b/ypp/el-ph/ELPH_databases_IO_elemental.F
index 314764825b..7957362801 100644
--- a/ypp/el-ph/ELPH_databases_IO_elemental.F
+++ b/ypp/el-ph/ELPH_databases_IO_elemental.F
@@ -119,8 +119,10 @@ integer function ELPH_databases_IO_elemental(what,IDB)
  !
  ! GKKP
  !
- if (IDB==1) DB_grad_at_gamma=cZERO
-                      DB_gkkp=cZERO
+ if (IDB==1) then
+   DB_grad_at_gamma=cZERO
+   DB_gkkp=cZERO
+ endif
  !
  do ik=1,DB_nk
    !
diff --git a/ypp/el-ph/ELPH_databases_IO_gkkp_expand.F b/ypp/el-ph/ELPH_databases_IO_gkkp_expand.F
index 94044290ee..c0499615a9 100644
--- a/ypp/el-ph/ELPH_databases_IO_gkkp_expand.F
+++ b/ypp/el-ph/ELPH_databases_IO_gkkp_expand.F
@@ -8,11 +8,12 @@
 subroutine ELPH_databases_IO_gkkp_expand(mode,iq,iq_s,k,q,qindx_ID,qindx_ID_frag,GKKP)
  !
  use pars,                ONLY:SP
- use R_lattice,           ONLY:bz_samp,qindx_B,qindx_B_load,qindx_S
+ use R_lattice,           ONLY:bz_samp,qindx_B,qindx_B_load,qindx_C
  use ALLOC,               ONLY:ELPH_alloc
  use D_lattice,           ONLY:nsym,i_time_rev,sop_inv,sop_tab
- use ELPH,                ONLY:elph_nb,GKKP_me,PH_qpt
+ use ELPH,                ONLY:elph_bands,GKKP_me,PH_qpt,l_GKKP_debug
  use YPP_ELPH,            ONLY:DB_ph_K_sym_table,YAMBO_K_found,l_use_qindxB
+ use parser_m,            ONLY:parser
  !
  implicit none
  !
@@ -30,6 +31,31 @@ subroutine ELPH_databases_IO_gkkp_expand(mode,iq,iq_s,k,q,qindx_ID,qindx_ID_frag
  integer, external ::qindx_B_init,qindx_B_close 
  integer       :: qindx_tmp(2),iq_bz
  !
+ ! Debug: print kpt/qpt coordinates for plotting
+ integer :: iaux
+ if (l_GKKP_debug) then
+   open(1, file='K_grid_bz.dat')
+   open(2, file='Q_grid_bz.dat')
+   open(3, file='PH_Q_grid_bz.dat')
+   open(4, file='K_grid_Ibz.dat')
+   call k_ibz2bz(k,"i",.false.)
+   call k_ibz2bz(q,"i",.false.)
+   do iaux=1,k%nibz
+     write (4,*) k%pt(iaux,:)
+   enddo
+   do iaux=1,k%nbz
+     write (1,*) k%ptbz(iaux,:)
+   enddo
+   do iaux=1,q%nbz
+     write (2,*) q%ptbz(iaux,:)
+     write (3,*) PH_qpt(iaux,:)
+   enddo
+   close(1)
+   close(2)
+   close(3)
+   close(4)
+ endif
+ !
  ! "K" mode
  !==========
  !
@@ -47,6 +73,7 @@ subroutine ELPH_databases_IO_gkkp_expand(mode,iq,iq_s,k,q,qindx_ID,qindx_ID_frag
      do is=1,nsmall
        ik_bz=DB_ph_K_sym_table(ik,sop_inv(small(is)))
        if (local_K_found(ik_bz)) cycle
+       if (l_GKKP_debug) write (*,*) "Expanding k ",ik_bz," from ",ik
        GKKP%dVc(:,:,:,ik_bz,1)=GKKP%dVc(:,:,:,ik,1)
        if (allocated(GKKP%dVr)) GKKP%dVr(:,:,:,ik_bz,1)=GKKP%dVr(:,:,:,ik,1)
        GKKP%E_kpq(iq)%E(:,ik_bz,1)=GKKP%E_kpq(iq)%E(:,ik,1)
@@ -67,7 +94,7 @@ subroutine ELPH_databases_IO_gkkp_expand(mode,iq,iq_s,k,q,qindx_ID,qindx_ID_frag
  !
  call ELPH_alloc("LEVELS dV",GKKP=GKKP_expanded,Nq_mem=1)
  !
- if(l_use_qindxB) qindx_tmp=qindx_B_init(qindx_ID,qindx_ID_frag) 
+ if(l_use_qindxB) qindx_tmp=qindx_B_init(qindx_ID,qindx_ID_frag,"minus_q") 
  !
  do ik_bz=1,k%nbz
    !
@@ -81,6 +108,8 @@ subroutine ELPH_databases_IO_gkkp_expand(mode,iq,iq_s,k,q,qindx_ID,qindx_ID_frag
        qindx_tmp=qindx_B_load(ok_bz,ik_bz,qindx_ID_frag)
        iq_bz=qindx_tmp(1)
        !
+       ! q_bz = ki_bz - ko_bz
+       !
        if (iq_bz==iq) then
          ok=k%sstar(ok_bz,1)
          os=k%sstar(ok_bz,2)
@@ -90,9 +119,10 @@ subroutine ELPH_databases_IO_gkkp_expand(mode,iq,iq_s,k,q,qindx_ID,qindx_ID_frag
      !
    else
      !
-     ! Use qindx_S     
+     ! Use qindx_S:   q_bz = ki_ibz - ko_bz
+     ! Use qindx_C:   q_bz = ki_bz - ko_bz
      !
-     ok_bz=qindx_S(ik,iq,1)
+     ok_bz=qindx_C(ik_bz,iq,1)
      ok=k%sstar(ok_bz,1)
      os=k%sstar(ok_bz,2)
      !
@@ -126,16 +156,16 @@ subroutine ELPH_databases_IO_gkkp_expand(mode,iq,iq_s,k,q,qindx_ID,qindx_ID_frag
      !
    endif
    !
-   do ib=1,elph_nb
-     do ob=1,elph_nb
+   do ib=elph_bands(1),elph_bands(2)
+     do ob=elph_bands(1),elph_bands(2)
        if (l_bands_reverted) then
          GKKP_expanded%dVc(:,ob,ib,ik_bz,1)=conjg(GKKP%dVc(:,ib,ob,ik_bz_gkkp,1))
        else
          GKKP_expanded%dVc(:,ib,ob,ik_bz,1)=GKKP%dVc(:,ib,ob,ik_bz_gkkp,1)
        endif
-       !
      enddo
    enddo
+   !
  enddo
  !
  ! For the energies E(k+q)
@@ -164,9 +194,9 @@ subroutine ELPH_databases_IO_gkkp_expand(mode,iq,iq_s,k,q,qindx_ID,qindx_ID_frag
      !
    else
      !      
-     ! Use qindx_S
+     ! Use qindx_C
      !
-     ok_bz=qindx_S(ik,iq,1)
+     ok_bz=qindx_C(ik_bz,iq,1)
      ok=k%sstar(ok_bz,1)
      os=k%sstar(ok_bz,2)
      !
diff --git a/ypp/el-ph/ELPH_databases_IO_transfer_and_write.F b/ypp/el-ph/ELPH_databases_IO_transfer_and_write.F
index ba1b83ed86..83069a575c 100644
--- a/ypp/el-ph/ELPH_databases_IO_transfer_and_write.F
+++ b/ypp/el-ph/ELPH_databases_IO_transfer_and_write.F
@@ -10,7 +10,7 @@ subroutine ELPH_databases_IO_transfer_and_write(IDB,k,E,GKKP)
  use pars,                ONLY:SP,DP,rZERO,cZERO
  use D_lattice,           ONLY:n_atoms
  use R_lattice,           ONLY:bz_samp
- use ELPH,                ONLY:GKKP_me,elph_use_q_grid
+ use ELPH,                ONLY:GKKP_me,elph_use_q_grid,elph_bands
  use YPP_ELPH,            ONLY:DB_ph_pol_vec,DB_E_k,DB_kind,DB_K_map,DB_nk,DB_gkkp,&
 &                              DB_nb,DB_ph_modes,DB_grad_at_gamma,DB_E_k_plus_q,DB_Q_map
  use electrons,           ONLY:levels
@@ -32,7 +32,7 @@ subroutine ELPH_databases_IO_transfer_and_write(IDB,k,E,GKKP)
  if (elph_use_q_grid) iq=DB_Q_map(IDB)
  !
  GKKP%dVc=cZERO
- if (allocated(GKKP%dVr)) GKKP%dVr=rZERO
+ if (allocated(GKKP%dVr))  GKKP%dVr=rZERO
  !
  ! GKKP & Levels
  !===============
@@ -70,14 +70,14 @@ subroutine ELPH_databases_IO_transfer_and_write(IDB,k,E,GKKP)
      !
      E_shift=DB_E_k(1,ik_db)-(E%E(1,k%sstar(ik_bz,1),1)+E%E_Fermi)
      !
-     GKKP%E_kpq(iq)%E(:,ik_bz,1)=DB_E_k_plus_q(:,ik_db)-E_shift
+     GKKP%E_kpq(iq)%E(elph_bands(1):elph_bands(2),ik_bz,1)=DB_E_k_plus_q(elph_bands(1):elph_bands(2),ik_db)-E_shift
      !
    endif
    !
-   do ib1=1,DB_nb
-     do ib2=1,DB_nb
+   do ib1=elph_bands(1),elph_bands(2)
+     do ib2=elph_bands(1),elph_bands(2)
        !
-       GKKP%dVc(:,ib1,ib2,ik_bz,1)=DB_gkkp(ib1,ib2,:,ik_db)
+       GKKP%dVc(:,ib1,ib2,ik_bz,1)=cmplx(DB_gkkp(ib1,ib2,:,ik_db),kind=SP)
        !
        if (trim(GKKP%KIND)=="dV_bare") cycle
        !
diff --git a/ypp/el-ph/ELPH_double_grid.F b/ypp/el-ph/ELPH_double_grid.F
index 3053553ea4..53f1daf10a 100644
--- a/ypp/el-ph/ELPH_double_grid.F
+++ b/ypp/el-ph/ELPH_double_grid.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): CA
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine ELPH_double_grid(k,E,q)
  !
  use pars,                ONLY:SP,pi,schlen
@@ -12,15 +16,17 @@ subroutine ELPH_double_grid(k,E,q)
  use R_lattice,           ONLY:bz_samp,bz_samp_reset,BZ_FineGd_mode,nkibz,bz_samp_FineGd_reset
  use YPP_ELPH,            ONLY:ph_modes_file,ph_freqs_file
  use stderr,              ONLY:STRING_same,intc
- use ELPH,                ONLY:PH_freqs_sq,elph_use_q_grid,ph_modes,GKKP,elph_nb,PH_freqs,elph_nQ,EkplusQ_mode, &
+ use ELPH,                ONLY:PH_freqs_sq,elph_use_q_grid,ph_modes,GKKP,&
+&                              elph_bands,PH_freqs,elph_nQ,EkplusQ_mode, &
 &                              elph_grids_are_expanded         
  use com,                 ONLY:msg
  use parser_m,            ONLY:parser
  use IO_int,              ONLY:io_control
  use IO_m,                ONLY:OP_RD_CL,REP,DUMP,OP_WR_CL
  use parser_m,            ONLY:parser
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp) ::k,q
  type(levels)  ::E
@@ -78,7 +84,8 @@ subroutine ELPH_double_grid(k,E,q)
  !
  if(.not.elph_use_q_grid) call error('Random q-grid not implemented yet ')
  !
- if(elph_nb/=E%nb) call warning('Nypp/el-ph/ELPH_double_grid.Fumber of bands in the GKKP different from the total number of bands!')
+ if(elph_bands(1)/=1 .or. elph_bands(2)/=E%nb) &
+& call warning('Number of bands in the GKKP different from the total number of bands')
  !
  call E_reset(PH_freqs)
  PH_freqs%nb=ph_modes
diff --git a/ypp/el-ph/ELPH_eliashberg_dos.F b/ypp/el-ph/ELPH_eliashberg_dos.F
index 44ca4be612..1e1c5c7851 100644
--- a/ypp/el-ph/ELPH_eliashberg_dos.F
+++ b/ypp/el-ph/ELPH_eliashberg_dos.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine ELPH_eliashberg_dos(k,en,q)
  !
  use pars,                ONLY:SP,schlen,pi
@@ -15,7 +19,7 @@ subroutine ELPH_eliashberg_dos(k,en,q)
  use YPPm,                ONLY:l_dos
  use YPP_ELPH,            ONLY:elph_steps,elph_Ef,elph_gamma_broad,&
 &                              ph_broad,l_eliashberg
- use ELPH,                ONLY:ph_modes,elph_nb,GKKP,PH_freqs_sq,PH_freqs,&
+ use ELPH,                ONLY:ph_modes,elph_bands,GKKP,PH_freqs_sq,PH_freqs,&
 &                              PH_W_debye,elph_nQ,elph_use_q_grid,&
 &                              GKKP,setup_k_plus_q_levels,use_PH_DbGd
  use IO_int,              ONLY:io_control
@@ -25,8 +29,9 @@ subroutine ELPH_eliashberg_dos(k,en,q)
  use parallel_m,          ONLY:PP_indexes,myid
  use parallel_int,        ONLY:PP_redux_wait,PARALLEL_index 
  use functions,           ONLY:Fermi_fnc_derivative
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  type(levels) ::en
  type(bz_samp)::k,q
  !
@@ -210,7 +215,7 @@ subroutine ELPH_eliashberg_dos(k,en,q)
    call of_open_close(o_file_name,'ot')
    call msg('o eli','#',' Eliashberg Function & Gamma factors',INDENT=0)
    call msg('o eli','#','',INDENT=0)
-   call msg('o eli',         '#  Bands              ',elph_nb,INDENT=0)
+   call msg('o eli',         '#  Bands              ',elph_bands,INDENT=0)
    call msg('o eli','#','',INDENT=0)
    do iq=1,nq_todo
      write (ch,'(a,i6.6,a)') '# Gamma (',iq,') [GHz]'
@@ -282,8 +287,8 @@ subroutine elph_gamma(iq)
      iibz1=k%sstar(i1,1)
      if (elph_use_q_grid) iibz2=k%sstar(qindx_X(iq,i1,1),1)
      !
-     do ib1=1,elph_nb
-       do ib2=1,elph_nb
+     do ib1=elph_bands(1),elph_bands(2)
+       do ib2=elph_bands(1),elph_bands(2)
          !        
          Ek  =en%E(ib1,iibz1,1)+Ef_diff
          !
@@ -313,8 +318,8 @@ subroutine elph_gamma(iq)
    !
    ! Gamma factors symmetrization
    !
-   call degeneration_finder(abs(PH_freqs_sq(iq,:)),ph_modes,first_el,n_of_el,&
-&                           n_deg_grp,1.E-10_SP)
+   call degeneration_finder(ph_modes,first_el,n_of_el,n_deg_grp,&
+&                           Er=abs(PH_freqs_sq(iq,:)),deg_accuracy=1.E-10_SP)
    do i1=1,n_deg_grp
      !
      sym_gamma=0._SP
diff --git a/ypp/el-ph/ELPH_excitonic_gkkp.F b/ypp/el-ph/ELPH_excitonic_gkkp.F
index 8778bcd962..9e47cc8fb7 100644
--- a/ypp/el-ph/ELPH_excitonic_gkkp.F
+++ b/ypp/el-ph/ELPH_excitonic_gkkp.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine ELPH_excitonic_gkkp(Xk)
  !
  use pars,                ONLY:SP,schlen,cZERO
@@ -23,8 +27,9 @@ subroutine ELPH_excitonic_gkkp(Xk)
  use com,                 ONLY:msg,of_open_close
  use stderr,              ONLY:intc
  use YPPm,                ONLY:EXCITONS_user_indexes,EXCITONS_n_user_states
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp) :: Xk
  !
diff --git a/ypp/el-ph/ELPH_general_gFsq.F b/ypp/el-ph/ELPH_general_gFsq.F
index 810101933f..b540836b7c 100644
--- a/ypp/el-ph/ELPH_general_gFsq.F
+++ b/ypp/el-ph/ELPH_general_gFsq.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine ELPH_general_gFsq(k,en,Xk,Xen,q,BS_E_degs)
  !
  use pars,                ONLY:SP,schlen
@@ -29,8 +33,9 @@ subroutine ELPH_general_gFsq(k,en,Xk,Xen,q,BS_E_degs)
  use parallel_m,          ONLY:PP_indexes,PP_indexes_reset,myid,PAR_nQ_bz
  use parallel_int,        ONLY:PP_wait,PP_redux_wait,PARALLEL_index 
  use functions,           ONLY:Fermi_fnc_derivative
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels) ::en,Xen
  type(bz_samp)::k,Xk,q
diff --git a/ypp/el-ph/ELPH_load_indexes.F b/ypp/el-ph/ELPH_load_indexes.F
new file mode 100644
index 0000000000..771ecc8e9b
--- /dev/null
+++ b/ypp/el-ph/ELPH_load_indexes.F
@@ -0,0 +1,85 @@
+!
+!        Copyright (C) 2000-2022 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): AM DS
+!
+! headers
+!
+#include<y_memory.h>
+! 
+! This file is distributed under the terms of the GNU 
+! General Public License. You can redistribute it and/or 
+! modify it under the terms of the GNU General Public 
+! License as published by the Free Software Foundation; 
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will 
+! be useful, but WITHOUT ANY WARRANTY; without even the 
+! implied warranty of MERCHANTABILITY or FITNESS FOR A 
+! PARTICULAR PURPOSE.  See the GNU General Public License 
+! for more details.
+!
+! You should have received a copy of the GNU General Public 
+! License along with this program; if not, write to the Free 
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, 
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+subroutine ELPH_load_indexes(k,E,q,qindx_ID,qindx_ID_frag)
+ !
+ use units,               ONLY:HA2EV
+ use pars,                ONLY:schlen,SP
+ use stderr,              ONLY:intc
+ use com,                 ONLY:msg
+ use YPP_ELPH,            ONLY:l_use_qindxB
+ use electrons,           ONLY:levels
+ use R_lattice,           ONLY:bz_samp,bse_scattering,coll_scattering
+ use parser_m,            ONLY:parser
+ use IO_int,              ONLY:io_control
+ use IO_m,                ONLY:OP_APP_CL,REP,OP_RD_CL
+ use parallel_int,        ONLY:PARALLEL_global_indexes
+ !
+ use y_memory_alloc
+ !
+ implicit none
+ !
+ type(bz_samp) ::k,q
+ type(levels)  ::E
+ integer       ::qindx_ID,qindx_ID_frag
+ !
+ ! Work Space...
+ !
+ integer            ::io_err(3),ID
+ integer, external  ::io_QINDX,qindx_B_init
+ !
+ if(l_use_qindxB) then
+   !
+   bse_scattering=.true.
+   !
+   ! here qindx_B is allocated
+   call io_control(ACTION=OP_RD_CL,COM=REP,SEC=(/5/),ID=ID)
+   io_err(1)=io_QINDX(k,q,ID,'minus_q')
+   if (io_err(1)/=0) then
+     call msg('s',"Missing k/q scattering database")
+     call error("Please run a setup with the BSEscatt flag activated")
+   endif
+   !
+   ! Initialize qindx_B IO
+   ! =============================
+   io_err(2)=qindx_B_init(qindx_ID,qindx_ID_frag)
+   if(io_err(2)/=0) call error("Error reading qindx_B")
+   !
+ else
+   ! otherwise qindx_C is allocated
+   !
+   coll_scattering=.true.
+   !
+   call io_control(ACTION=OP_RD_CL,COM=REP,SEC=(/6/),ID=ID)
+   io_err(1)=io_QINDX(k,q,ID,'minus_q')
+   if (io_err(1)/=0) then
+     call msg('s',"Missing k/q scattering database")
+     call error("Please (re)run the setup")
+   endif
+ endif
+ !
+end subroutine
diff --git a/ypp/el-ph/ELPH_plot_gkkp.F b/ypp/el-ph/ELPH_plot_gkkp.F
index a52ee328f8..3905a612ec 100644
--- a/ypp/el-ph/ELPH_plot_gkkp.F
+++ b/ypp/el-ph/ELPH_plot_gkkp.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): CA
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine ELPH_plot_gkkp(k,E,q)
  !
  use pars,                ONLY:SP,pi,schlen
@@ -19,8 +23,9 @@ subroutine ELPH_plot_gkkp(k,E,q)
  use IO_m,                ONLY:REP,deliver_IO_error_message,DUMP,manage_action,RD_CL_IF_END,OP_RD
  use stderr,              ONLY:intc
  use LIVE_t,              ONLY:live_timing
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp) ::k,q
  type(levels)  ::E
diff --git a/ypp/el-ph/ELPH_sngl_db.F b/ypp/el-ph/ELPH_sngl_db.F
new file mode 100644
index 0000000000..ce13ef83da
--- /dev/null
+++ b/ypp/el-ph/ELPH_sngl_db.F
@@ -0,0 +1,123 @@
+!
+!        Copyright (C) 2000-2022 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): AM
+!
+! headers
+!
+#include<y_memory.h>
+! 
+! This file is distributed under the terms of the GNU 
+! General Public License. You can redistribute it and/or 
+! modify it under the terms of the GNU General Public 
+! License as published by the Free Software Foundation; 
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will 
+! be useful, but WITHOUT ANY WARRANTY; without even the 
+! implied warranty of MERCHANTABILITY or FITNESS FOR A 
+! PARTICULAR PURPOSE.  See the GNU General Public License 
+! for more details.
+!
+! You should have received a copy of the GNU General Public 
+! License along with this program; if not, write to the Free 
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, 
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+subroutine ELPH_sngl_db(k,E,q)
+ !
+ use units,               ONLY:HA2EV
+ use pars,                ONLY:schlen,SP
+ use stderr,              ONLY:intc
+ use ALLOC,               ONLY:ELPH_alloc
+ use ELPH,                ONLY:GKKP,elph_nb,ph_modes,elph_nQ,elph_nk_bz
+ use YPP_ELPH,            ONLY:ELPH_DB_alloc,ELPH_DB_free,DB_PH_kpt,DB_nq,DB_q_map,DB_PH_qpt,&
+&                              DB_ph_modes,DB_nb,DB_nq         
+ use com,                 ONLY:msg
+ use electrons,           ONLY:levels
+ use YPP_ELPH,            ONLY:l_gkkp_expand_only_k,elph_dbs_path,DB_nk
+ use R_lattice,           ONLY:bz_samp
+ use parser_m,            ONLY:parser
+ use zeros,               ONLY:k_iku_zero
+ use vec_operate,         ONLY:v_is_zero
+ use LIVE_t,              ONLY:live_timing
+ use IO_m,                ONLY:REP,OP_WR_CL,VERIFY,OP_APP_CL,OP_RD_CL
+ use IO_int,              ONLY:io_control
+ !
+ use y_memory_alloc
+ !
+ implicit none
+ !
+ type(bz_samp)      :: k,q
+ type(levels)       :: E
+ integer            :: qindx_ID,qindx_ID_frag,io_err,iqbz,idb
+ integer, external  :: ELPH_databases_IO_elemental,io_ELPH
+ character(schlen)  :: db_name 
+ integer            :: n_dbs_found,ID
+ real(SP)           :: v(3)
+ !
+ call parser('GkkpExpOnlyK',l_gkkp_expand_only_k)
+ !
+ if(l_gkkp_expand_only_k) call ELPH_load_indexes(k,E,q,qindx_ID,qindx_ID_frag)
+ !
+ call msg('s','Inspecting databases ...')
+ n_dbs_found=0
+ !
+ ! Withtout symmetries at maximum there will be q%nbz databases
+ do idb=1,q%nbz
+   io_err=ELPH_databases_IO_elemental("dV EXIST",idb)
+   if (io_err==0) n_dbs_found=n_dbs_found+1 
+ enddo
+ DB_nq=n_dbs_found
+ !
+ call msg('sr','Found '//intc(n_dbs_found)//' databases in: '//trim(elph_dbs_path))
+ if(n_dbs_found==0) return
+ !
+ call ELPH_DB_alloc( )
+ !
+ call k_ibz2bz(k,'i',.TRUE.)
+ call k_ibz2bz(q,'i',.TRUE.)
+ !
+ db_name='gkkp_expanded'
+ !
+ ph_modes   =DB_ph_modes
+ elph_nb    =DB_nb
+ elph_nQ    =q%nbz
+ elph_nk_bz =k%nbz
+ ! 
+ call ELPH_alloc("PHONONS")
+ call ELPH_alloc("LEVELS dV DW",GKKP=GKKP,Nq_mem=1)
+ !
+ call live_timing('ELPH databases: ',DB_nq) 
+ do idb=1,q%nbz
+   io_err=ELPH_databases_IO_elemental("dV EXIST",idb)
+   if(io_err==0) then
+     if(DB_nk/=k%nbz.and..not.l_gkkp_expand_only_k) call error('Wrong number of k-points ')
+     !
+     ! the q-point is read with KPT
+     !
+     io_err=ELPH_databases_IO_elemental("dV FREQS POL_VEC KPT",idb)
+     call ELPH_databases_IO_grids_check(k)
+     io_err=ELPH_databases_IO_elemental("dV KPT GKKP",idb)
+     call ELPH_databases_IO_transfer_and_write(idb,k,E,GKKP)
+     !
+     ! Search for the q-point
+     !
+     do iqbz=1,q%nbz
+       v=DB_PH_qpt(idb,:)
+       if (v_is_zero(v+q%ptbz(iqbz,:),zero_=k_iku_zero)) DB_Q_map(idb)=iqbz
+     enddo
+     !
+     if(DB_Q_map(idb)==0) call error('Q-point '//intc(idb)//' not found ! ')
+     !
+     if (DB_Q_map(idb)==1) call io_control(ACTION=OP_APP_CL,SEC=(/1,2/),ID=ID)
+     if (DB_Q_map(idb)> 1) call io_control(ACTION=OP_APP_CL,SEC=(/DB_Q_map(idb)+1/),ID=ID)
+     io_err=io_ELPH(ID,db_name)
+     call live_timing(steps=1)
+     !
+   endif
+ enddo
+ call live_timing( )
+ !
+end subroutine
diff --git a/ypp/el-ph/Eliashberg_Dos_Func.F b/ypp/el-ph/Eliashberg_Dos_Func.F
index 6cf3b7c7db..65dd34fff2 100644
--- a/ypp/el-ph/Eliashberg_Dos_Func.F
+++ b/ypp/el-ph/Eliashberg_Dos_Func.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): CA
 !
+! headers
+!
+#include<y_memory.h>
+!
 real(SP) function Eliashberg_Dos_Func(iq,im,ph_en, ph_broad, PH_E, q) 
  !
  use pars,                ONLY:SP
@@ -12,8 +16,9 @@ real(SP) function Eliashberg_Dos_Func(iq,im,ph_en, ph_broad, PH_E, q)
  use R_lattice,           ONLY:bz_samp
  use functions,           ONLY:Fermi_fnc_derivative
  use ELPH,                ONLY:PH_freqs_sq,use_PH_DbGd
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer,      intent(in) :: iq,im
  real(SP),     intent(in) :: ph_en,ph_broad
diff --git a/ypp/el-ph/YPP_ELPH_project.dep b/ypp/el-ph/YPP_ELPH_project.dep
index 77cfe8cedd..41555c423a 100644
--- a/ypp/el-ph/YPP_ELPH_project.dep
+++ b/ypp/el-ph/YPP_ELPH_project.dep
@@ -11,6 +11,8 @@
  ELPH_eliashberg_dos.o
  ELPH_excitonic_gkkp.o
  ELPH_general_gFsq.o
+ ELPH_load_indexes.o
  ELPH_plot_gkkp.o
+ ELPH_sngl_db.o
  Eliashberg_Dos_Func.o
 
diff --git a/ypp/electrons/electrons_WFs.F b/ypp/electrons/electrons_WFs.F
index 9398557f78..1fa8e73853 100644
--- a/ypp/electrons/electrons_WFs.F
+++ b/ypp/electrons/electrons_WFs.F
@@ -5,6 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+#include<yambo_wfs.h>
+!
 subroutine electrons_WFs(Xen)
  !
  use pars,           ONLY:SP,lchlen
@@ -34,8 +39,9 @@ subroutine electrons_WFs(Xen)
  use com,            ONLY:jobstr
  use R_lattice,      ONLY:nkibz
 #endif
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)  ::Xen
  !
@@ -156,7 +162,7 @@ subroutine electrons_WFs(Xen)
            !
            i_wf=WF%index(ib,ik,1)
            !
-           forall(ir=1:fft_size) v2plot_c(ir)=v2plot_c(ir)+WF%c(ir,1,i_wf)
+           forall(ir=1:fft_size) v2plot_c(ir)=v2plot_c(ir)+WF%r(ir,1,i_wf)
            !
          else
            !
@@ -268,8 +274,8 @@ subroutine electrons_WFs(Xen)
 #if defined _YPP_SC
      if (.not.l_mean_potential) then 
 #endif
-       forall(ir=1:fft_size) v2plot(ir)=real( WF%c(ir,1,i_wf)*conjg( WF%c(ir,1,i_wf) ) )
-       if (n_spinor==2) forall(ir=1:fft_size) v2plot(ir)=v2plot(ir) + real( WF%c(ir,2,i_wf)*conjg( WF%c(ir,2,i_wf) ) )
+       forall(ir=1:fft_size) v2plot(ir)=real( WF%r(ir,1,i_wf)*wfconjg( WF%r(ir,1,i_wf) ), kind=SP )
+       if (n_spinor==2) forall(ir=1:fft_size) v2plot(ir)=v2plot(ir) + real( WF%r(ir,2,i_wf)*wfconjg( WF%r(ir,2,i_wf) ),kind=SP )
 #if defined _YPP_SC
      endif
 #endif
diff --git a/ypp/electrons/electrons_angular_momentum.F b/ypp/electrons/electrons_angular_momentum.F
index aa44623edd..e9e94c9742 100644
--- a/ypp/electrons/electrons_angular_momentum.F
+++ b/ypp/electrons/electrons_angular_momentum.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine electrons_angular_momentum(nkpt_angular,nband_angular)
  !
  ! Important:
@@ -28,8 +32,9 @@ subroutine electrons_angular_momentum(nkpt_angular,nband_angular)
  use LIVE_t,         ONLY:live_timing
  use units,          ONLY:SPEED_OF_LIGHT
  use com,            ONLY:msg,of_open_close
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  ! Input variables
  integer                  :: nkpt_angular(2), nband_angular(2)
@@ -154,17 +159,17 @@ subroutine electrons_angular_momentum(nkpt_angular,nband_angular)
          ! Mi = [r x (p + 1/c*A)]_i = Li +1/c*(r x A)_i for i=x,y,z
          ! These are the mechanicals momenta and are gauge independent
          if(angular_dir(1)) M_tmp(1,:)=L_tmp(1,:)+1./SPEED_OF_LIGHT*(x_cc(:,2,1)*A_magn_z(:)- &
-&                             x_cc(:,3,1)*A_magn_y(:))*WF%c(:,1,ifft)
+&                             x_cc(:,3,1)*A_magn_y(:))*WF%r(:,1,ifft)
          if(angular_dir(2)) M_tmp(2,:)=L_tmp(2,:)+1./SPEED_OF_LIGHT*(x_cc(:,3,1)*A_magn_x(:)- &
-&                             x_cc(:,1,1)*A_magn_z(:))*WF%c(:,1,ifft)
+&                             x_cc(:,1,1)*A_magn_z(:))*WF%r(:,1,ifft)
          if(angular_dir(3)) M_tmp(3,:)=L_tmp(3,:)+1./SPEED_OF_LIGHT*(x_cc(:,1,1)*A_magn_y(:)- &
-&                             x_cc(:,2,1)*A_magn_x(:))*WF%c(:,1,ifft)
+&                             x_cc(:,2,1)*A_magn_x(:))*WF%r(:,1,ifft)
        endif
        !
        do i1=1,3
          if(.not.angular_dir(i1)) cycle
-         L(i1) = Vstar_dot_V(fft_size,WF%c(:,1,ifft),L_tmp(i1,:))
-         if(MAG_landau) M(i1) = Vstar_dot_V(fft_size,WF%c(:,1,ifft),M_tmp(i1,:))
+         L(i1) = Vstar_dot_V(fft_size,cmplx(WF%r(:,1,ifft),kind=SP),L_tmp(i1,:))
+         if(MAG_landau) M(i1) = Vstar_dot_V(fft_size,cmplx(WF%r(:,1,ifft),kind=SP),M_tmp(i1,:))
          L_tot(i1,ik)=L_tot(i1,ik)+L(i1)
          if(MAG_landau) M_tot(i1,ik)=M_tot(i1,ik)+M(i1)
        enddo
diff --git a/ypp/electrons/electrons_bands.F b/ypp/electrons/electrons_bands.F
index bd5cbb2744..da261b0dbc 100644
--- a/ypp/electrons/electrons_bands.F
+++ b/ypp/electrons/electrons_bands.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): CA DS AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine electrons_bands(Xk,Xen)
  !
  ! Levels produced by this routine:
@@ -21,28 +25,31 @@ subroutine electrons_bands(Xk,Xen)
  use YPPm,           ONLY:BANDS_steps,INTERP_grid,&
 &                         BANDS_bands,CIRCUIT_E_DB_path,CIRCUIT_E_db,USER_k,CIRCUIT_k,&
 &                         BANDS_path,BANDS_from_db,l_PROJECT_line,l_PROJECT_plane
- use electrons,      ONLY:levels,n_sp_pol,n_spinor,E_reset,n_spin
+ use electrons,      ONLY:levels,n_sp_pol,n_spinor,E_reset,n_spin,nel_cond,E_duplicate,spin_occ
  use R_lattice,      ONLY:bz_samp
- use D_lattice,      ONLY:lattice
+ use functions,      ONLY:Fermi_fnc
+ use D_lattice,      ONLY:lattice,Tel
  use com,            ONLY:msg 
  use parser_m,       ONLY:parser
- use interpolate,    ONLY:electrons_bands_interpolate,GRID_k,INTERP_mode,INTERP_shell_factor
+ use interpolate,    ONLY:electrons_bands_interpolate,GRID_k,INTERP_mode,INTERP_shell_factor,INTERP_obj
  use QP_CTL_m,       ONLY:QP_apply
  use interfaces,     ONLY:OCCUPATIONS_Fermi
+ use interpolate,    ONLY:INTERPOLATION_driver_seed,INTERPOLATION_driver_do
 #if defined _YPP_SC
  use SC,             ONLY:SC_neq_kind
 #endif
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp), intent(inout) :: Xk
  type(levels),  intent(inout) :: Xen
  !
  ! Work Space
  !
- type(levels)          :: GRID_E,CIRCUIT_E,CIRCUIT_dE
- integer               :: ID_spin,ID_magn,ID_bands(3),IDs(5),nfiles(5),nqnt,ib
- real(SP)              :: dEf
+ type(levels)          :: GRID_E,CIRCUIT_E,CIRCUIT_dE,E_QF
+ integer               :: ID_spin,ID_magn,ID_bands(3),IDs(5),nfiles(5),nqnt,ib,ID_occ,ik,prt_occ
+ real(SP)              :: dEf,E_fermi_e,E_fermi_h,E_h,E_e
  logical               :: BANDS_built_in,BANDS_interpolated,GRID_interpolate
 #if defined _YPP_SC
  integer               :: ID_proj
@@ -134,6 +141,13 @@ subroutine electrons_bands(Xk,Xen)
    !
  endif
  !
+ ! Find the Fermi energies of the Quasi-Fermi distribution
+ ! =========================
+ if(nel_cond/=rZERO.and.BANDS_steps> 0) then
+   call E_reset(E_QF)
+   call OCCUPATIONS_Quasi_Fermi(Xen,Xk,E_QF,E_fermi_h,E_fermi_e)        
+ endif
+ !
  ! Fermi Levels
  !==============
  !
@@ -174,15 +188,45 @@ subroutine electrons_bands(Xk,Xen)
    IDs(nqnt)=ID_proj
  endif
 #endif
+ !
+ ! Generate excited states occupations
+ if(nel_cond/=rZERO.and.BANDS_steps> 0) then
+   !
+   call INTERPOLATION_driver_seed("OCCinterp",ID_occ,BANDS_bands,SEED_k=Xk,SEED_E=Xen)
+   YAMBO_ALLOC(INTERP_obj(ID_occ)%INTERP,(BANDS_bands(1):BANDS_bands(2),CIRCUIT_k%nibz,INTERP_obj(ID_occ)%D))
+   INTERP_obj(ID_occ)%INTERP=rZERO
+   !
+   do ik=1,CIRCUIT_k%nibz
+     !
+     do ib=BANDS_bands(1),Xen%nbf(1)
+       E_h=INTERP_obj(ID_bands(2))%INTERP(ib,ik,1)-E_fermi_h
+       INTERP_obj(ID_occ)%INTERP(ib,ik,1)=spin_occ*Fermi_fnc(E_h,Tel)-spin_occ
+     enddo
+     !
+     do ib=Xen%nbf(1)+1,BANDS_bands(2)
+       E_h=INTERP_obj(ID_bands(2))%INTERP(ib,ik,1)-E_fermi_e
+       INTERP_obj(ID_occ)%INTERP(ib,ik,1)=spin_occ*Fermi_fnc(E_h,Tel)
+     enddo
+     !
+   enddo
+   !
+   nqnt=nqnt+1
+   nfiles(nqnt)=1
+   IDs(nqnt)=ID_occ
+   !
+ endif
  !
  if (BANDS_built_in.and.USER_k%nbz>0     )  call plot_interpolated_values &
 &   (Xk,Xen,dEf,USER_k,   IDs(1:nqnt),BANDS_bands,nqnt,nfiles(1:nqnt),"built_in",    "bands",0)
  !
  if (BANDS_interpolated.and.BANDS_from_db)  call plot_interpolated_values &
 &   (Xk,Xen,dEf,CIRCUIT_k,IDs(1:1),   BANDS_bands,1,   nfiles(1:1),   "from_DB",     "bands",0)
+ !
+ prt_occ=0
+ if(nel_cond/=rZERO) prt_occ=1
  !
  if(BANDS_interpolated                   )  call plot_interpolated_values &
-&   (Xk,Xen,dEf,CIRCUIT_k,IDs(1:nqnt),BANDS_bands,nqnt,nfiles(1:nqnt),"interpolated","bands",0)
+&   (Xk,Xen,dEf,CIRCUIT_k,IDs(1:nqnt),BANDS_bands,nqnt,nfiles(1:nqnt),"interpolated","bands",prt_occ)
  !
  if (l_PROJECT_plane) call PROJECTION_plot( "BANDS", 1, n_spin, BANDS_bands, OBS_B=GRID_E%E ) 
  !
diff --git a/ypp/electrons/electrons_dos.F b/ypp/electrons/electrons_dos.F
index ac6a259d68..2b982b2690 100644
--- a/ypp/electrons/electrons_dos.F
+++ b/ypp/electrons/electrons_dos.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine electrons_dos(Xk,Xen)
  !
  use units,          ONLY:HA2EV
@@ -21,8 +25,9 @@ subroutine electrons_dos(Xk,Xen)
  use QP_CTL_m,       ONLY:QP_apply
  use interpolate,    ONLY:INTERPOLATION_driver_seed,INTERPOLATION_driver_do,GRID_k,INTERP_obj
  use stderr,         ONLY:intc
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp) ::Xk
  type(levels)  ::Xen
diff --git a/ypp/electrons/electrons_driver.F b/ypp/electrons/electrons_driver.F
index 37192ecb51..b1a192705e 100644
--- a/ypp/electrons/electrons_driver.F
+++ b/ypp/electrons/electrons_driver.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine electrons_driver(Xk,Xen,Xq)
  !
  Use stderr,         ONLY:intc,STRING_same
@@ -21,8 +25,9 @@ subroutine electrons_driver(Xk,Xen,Xq)
  use interfaces,     ONLY:WF_load,WF_free
  use parallel_int,   ONLY:PARALLEL_global_indexes,PARALLEL_WF_distribute,PARALLEL_WF_index
  use interpolate,    ONLY:INTERP_mode
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp) ::Xk,Xq
  type(levels)  ::Xen
diff --git a/ypp/electrons/electrons_magn_factors.F b/ypp/electrons/electrons_magn_factors.F
index 51af39e85d..66141b827c 100644
--- a/ypp/electrons/electrons_magn_factors.F
+++ b/ypp/electrons/electrons_magn_factors.F
@@ -18,6 +18,8 @@ subroutine electrons_magn_factors(kpts,bands,magn_factors)
  !
  implicit none
  !
+#include<yambo_wfs.h>
+ !
  integer,       intent(in)  :: kpts(2),bands(2)
  real(SP),      intent(out) :: magn_factors(bands(1):bands(2),kpts(1):kpts(2),3)
  !
@@ -46,14 +48,14 @@ subroutine electrons_magn_factors(kpts,bands,magn_factors)
      if( .not. px%element_2D(ik-kpts(1)+1,ib-bands(1)+1) ) cycle
      i_wf=WF%index(ib,ik,1)
      ! x
-     magn_factors(ib,ik,1)=real(Vstar_dot_V_omp( fft_size, WF%c(:, 1,i_wf), WF%c(:,2,i_wf) ),SP) &
-&                         +real(Vstar_dot_V_omp( fft_size, WF%c(:, 2,i_wf), WF%c(:,1,i_wf) ),SP)
+     magn_factors(ib,ik,1)=real(Vstar_dot_V_omp( fft_size, WF%r(:,1,i_wf), WF%r(:,2,i_wf) ),SP) &
+&                         +real(Vstar_dot_V_omp( fft_size, WF%r(:,2,i_wf), WF%r(:,1,i_wf) ),SP)
      ! y
-     magn_factors(ib,ik,2)=aimag(Vstar_dot_V_omp( fft_size, WF%c(:,1,i_wf), WF%c(:,2,i_wf) )) &
-&                         -aimag(Vstar_dot_V_omp( fft_size, WF%c(:,2,i_wf), WF%c(:,1,i_wf) ))
+     magn_factors(ib,ik,2)=wfaimag(Vstar_dot_V_omp( fft_size, WF%r(:,1,i_wf), WF%r(:,2,i_wf) )) &
+&                         -wfaimag(Vstar_dot_V_omp( fft_size, WF%r(:,2,i_wf), WF%r(:,1,i_wf) ))
      ! z
-     magn_factors(ib,ik,3)=real(Vstar_dot_V_omp( fft_size, WF%c(:,1,i_wf), WF%c(:,1,i_wf) ),SP) &
-&                         -real(Vstar_dot_V_omp( fft_size, WF%c(:,2,i_wf), WF%c(:,2,i_wf) ),SP)
+     magn_factors(ib,ik,3)=real(Vstar_dot_V_omp( fft_size, WF%r(:,1,i_wf), WF%r(:,1,i_wf) ),SP) &
+&                         -real(Vstar_dot_V_omp( fft_size, WF%r(:,2,i_wf), WF%r(:,2,i_wf) ),SP)
      !
      call live_timing(steps=1)
    enddo
diff --git a/ypp/electrons/electrons_magnetization.F b/ypp/electrons/electrons_magnetization.F
index c67cd4e13e..788a226b04 100644
--- a/ypp/electrons/electrons_magnetization.F
+++ b/ypp/electrons/electrons_magnetization.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine electrons_magnetization(Xk,Xen)
  !
  use pars,           ONLY:SP,lchlen
@@ -17,8 +21,9 @@ subroutine electrons_magnetization(Xk,Xen)
 &                         use_gnuplot,use_cube,plot_title,l_density,mag_dir
  use com,            ONLY:msg,of_open_close
  use xc_functionals, ONLY:magn
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp) ::Xk
  type(levels)  ::Xen
diff --git a/ypp/electrons/electrons_position.F b/ypp/electrons/electrons_position.F
index 4f6e5de39c..17f843f8d9 100644
--- a/ypp/electrons/electrons_position.F
+++ b/ypp/electrons/electrons_position.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine electrons_position(nkpt_pos,nband_pos,power,what,format)
  !
  use pars,           ONLY:SP,schlen,pi
@@ -19,8 +23,9 @@ subroutine electrons_position(nkpt_pos,nband_pos,power,what,format)
  use wrapper,        ONLY:Vstar_dot_V
  use stderr,         ONLY:intc
  use interfaces,     ONLY:WF_load,WF_free
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  ! Input variables
  integer                  :: nkpt_pos(2),nband_pos(2)
@@ -38,7 +43,7 @@ subroutine electrons_position(nkpt_pos,nband_pos,power,what,format)
  logical                  :: l_3d,l_2d
  !
  ! Dummyes
- integer                  :: ifft,ib,ik,i_spin
+ integer                  :: i_wf,ib,ik,i_spin
  character(schlen)        :: titles(6)
  real(SP)                 :: tmp_var,x_zero(3)
  complex(SP),allocatable  :: wf_tmp_2d(:,:,:),rperp_tmp_2d(:,:,:),rperp_med_2d(:,:,:)
@@ -98,7 +103,7 @@ subroutine electrons_position(nkpt_pos,nband_pos,power,what,format)
    !
    do i_spin=1,n_sp_pol
      do ib=nband_pos(1),nband_pos(2)
-       ifft=WF%index(ib,ik,i_spin)
+       i_wf=WF%index(ib,ik,i_spin)
        if(i_spin==2) cycle
        !
        ! *******************************
@@ -108,8 +113,8 @@ subroutine electrons_position(nkpt_pos,nband_pos,power,what,format)
          ! 1)  3D
          if(l_3d) then
            do i1=1,3
-             x_tmp(:,i1)=x_cc(:,i1,1)**power*WF%c(:,1,ifft)
-             x_med(i1) = Vstar_dot_V(fft_size,WF%c(:,1,ifft),x_tmp(:,i1))
+             x_tmp(:,i1)=x_cc(:,i1,1)**power*WF%r(:,1,i_wf)
+             x_med(i1) = Vstar_dot_V(fft_size,cmplx(WF%r(:,1,i_wf),kind=SP),x_tmp(:,i1))
              x_med(i1) = x_med(i1)**(1._SP/power)
            enddo
            !
@@ -127,14 +132,14 @@ subroutine electrons_position(nkpt_pos,nband_pos,power,what,format)
          if (l_3d) then
            do ir=1,fft_size
              tmp_var=sqrt((x_cc(ir,1,1)**2+x_cc(ir,2,1)**2))
-             rperp_tmp(ir)=tmp_var**power*WF%c(ir,1,ifft)
-             z_tmp(ir)=x_cc(ir,3,1)**power*WF%c(ir,1,ifft)
-             if ( x_cc(ir,1,1)> 0 ) phi_tmp(ir)=(0.5*pi+atan(x_cc(ir,2,1)/x_cc(ir,1,1)))*WF%c(ir,1,ifft)
-             if ( x_cc(ir,1,1)< 0 ) phi_tmp(ir)=(1.5*pi+atan(x_cc(ir,2,1)/x_cc(ir,1,1)))*WF%c(ir,1,ifft)
+             rperp_tmp(ir)=tmp_var**power*WF%r(ir,1,i_wf)
+             z_tmp(ir)=x_cc(ir,3,1)**power*WF%r(ir,1,i_wf)
+             if ( x_cc(ir,1,1)> 0 ) phi_tmp(ir)=(0.5*pi+atan(x_cc(ir,2,1)/x_cc(ir,1,1)))*WF%r(ir,1,i_wf)
+             if ( x_cc(ir,1,1)< 0 ) phi_tmp(ir)=(1.5*pi+atan(x_cc(ir,2,1)/x_cc(ir,1,1)))*WF%r(ir,1,i_wf)
            enddo
-           rperp_med = Vstar_dot_V(fft_size,WF%c(:,1,ifft),rperp_tmp(:))
-           z_med     = Vstar_dot_V(fft_size,WF%c(:,1,ifft),    z_tmp(:))
-           phi_med   = Vstar_dot_V(fft_size,WF%c(:,1,ifft),  phi_tmp(:))
+           rperp_med = Vstar_dot_V(fft_size,cmplx(WF%r(:,1,i_wf),kind=SP),rperp_tmp(:))
+           z_med     = Vstar_dot_V(fft_size,cmplx(WF%r(:,1,i_wf),kind=SP),    z_tmp(:))
+           phi_med   = Vstar_dot_V(fft_size,cmplx(WF%r(:,1,i_wf),kind=SP),  phi_tmp(:))
            rperp_med = rperp_med**(1./power)
            z_med     = z_med**(1./power)
            AB_dev    = (1.-6.41**2*rperp_med**(-2))  ! 5.92 (8,0)  6.41 (5,5)  10.36  (14,0)
@@ -144,8 +149,8 @@ subroutine electrons_position(nkpt_pos,nband_pos,power,what,format)
          endif
          ! 2)  2D
          if(l_2d) then
-           wf_tmp_2d(:,:,:)=reshape(WF%c(:,1,ifft),(/fft_dim(1),fft_dim(2),fft_dim(3)/))
-           rperp_tmp_2d(:,:,:)=reshape(sqrt((x_cc(:,1,1)**2+x_cc(:,2,1)**2))**power*WF%c(:,1,ifft),&
+           wf_tmp_2d(:,:,:)=reshape(WF%r(:,1,i_wf),(/fft_dim(1),fft_dim(2),fft_dim(3)/))
+           rperp_tmp_2d(:,:,:)=reshape(sqrt((x_cc(:,1,1)**2+x_cc(:,2,1)**2))**power*WF%r(:,1,i_wf),&
 &                                   (/fft_dim(1),fft_dim(2),fft_dim(3)/))
            rperp_med=(0.,0.)
            do ir3 = 0, fft_dim(3)-1         ! z 
@@ -165,15 +170,15 @@ subroutine electrons_position(nkpt_pos,nband_pos,power,what,format)
        if(l_spherical) then
          do ir=1,fft_size
            tmp_var=sqrt((x_cc(ir,1,1)**2+x_cc(ir,2,1)**2+x_cc(ir,3,1)**2))
-           r_tmp(ir)=tmp_var**power*WF%c(ir,1,ifft)
+           r_tmp(ir)=tmp_var**power*WF%r(ir,1,i_wf)
            tmp_var=sqrt(x_cc(ir,1,1)**2+x_cc(ir,2,1)**2)
-           theta_tmp(ir)=atan(x_cc(ir,3,1)/tmp_var)*WF%c(ir,1,ifft)
-           if ( x_cc(ir,1,1)> 0 ) psi_tmp(ir)=(0.5*pi+atan(x_cc(ir,2,1)/x_cc(ir,1,1)))*WF%c(ir,1,ifft)
-           if ( x_cc(ir,1,1)< 0 ) psi_tmp(ir)=(1.5*pi+atan(x_cc(ir,2,1)/x_cc(ir,1,1)))*WF%c(ir,1,ifft)
+           theta_tmp(ir)=atan(x_cc(ir,3,1)/tmp_var)*WF%r(ir,1,i_wf)
+           if ( x_cc(ir,1,1)> 0 ) psi_tmp(ir)=(0.5*pi+atan(x_cc(ir,2,1)/x_cc(ir,1,1)))*WF%r(ir,1,i_wf)
+           if ( x_cc(ir,1,1)< 0 ) psi_tmp(ir)=(1.5*pi+atan(x_cc(ir,2,1)/x_cc(ir,1,1)))*WF%r(ir,1,i_wf)
          enddo
-         r_med = Vstar_dot_V(fft_size,WF%c(:,1,ifft),rperp_tmp(:))
-         theta_med = Vstar_dot_V(fft_size,WF%c(:,1,ifft),theta_tmp(:))
-         psi_med   = Vstar_dot_V(fft_size,WF%c(:,1,ifft),  psi_tmp(:))
+         r_med     = Vstar_dot_V(fft_size,cmplx(WF%r(:,1,i_wf),kind=SP),rperp_tmp(:))
+         theta_med = Vstar_dot_V(fft_size,cmplx(WF%r(:,1,i_wf),kind=SP),theta_tmp(:))
+         psi_med   = Vstar_dot_V(fft_size,cmplx(WF%r(:,1,i_wf),kind=SP),  psi_tmp(:))
          r_med=r_med**(1./power)
        endif
        !
diff --git a/ypp/electrons/electrons_spin_factors.F b/ypp/electrons/electrons_spin_factors.F
index 7d8738113c..d731a84bb9 100644
--- a/ypp/electrons/electrons_spin_factors.F
+++ b/ypp/electrons/electrons_spin_factors.F
@@ -40,8 +40,8 @@ subroutine electrons_spin_factors(kpts,bands,spin_factors)
    call WF_load(WF,0,1,(/ib_ref,ib_ref/),(/ik_ref,ik_ref/),title='-Spinor Factor',impose_free_and_alloc=.TRUE.)
    !
    i_wf=WF%index(ib_ref,ik_ref,1)
-   v(1)=real(Vstar_dot_V_omp( fft_size, WF%c(:,1,i_wf ), WF%c(:,1,i_wf) ),SP)
-   v(2)=real(Vstar_dot_V_omp( fft_size, WF%c(:,2,i_wf ), WF%c(:,2,i_wf) ),SP)
+   v(1)=real(Vstar_dot_V_omp( fft_size, WF%r(:,1,i_wf ), WF%r(:,1,i_wf) ),SP)
+   v(2)=real(Vstar_dot_V_omp( fft_size, WF%r(:,2,i_wf ), WF%r(:,2,i_wf) ),SP)
    !
    call WF_free(WF)
    !
@@ -71,8 +71,8 @@ subroutine electrons_spin_factors(kpts,bands,spin_factors)
    do ib=bands(1),bands(2)
      if( .not. px%element_2D(ik-kpts(1)+1,ib-bands(1)+1) ) cycle
      i_wf=WF%index(ib,ik,1)
-     v(1)=real(Vstar_dot_V_omp( fft_size, WF%c(:,1,i_wf), WF%c(:,1,i_wf) ),SP)
-     v(2)=real(Vstar_dot_V_omp( fft_size, WF%c(:,2,i_wf), WF%c(:,2,i_wf) ),SP)
+     v(1)=real(Vstar_dot_V_omp( fft_size, WF%r(:,1,i_wf), WF%r(:,1,i_wf) ),SP)
+     v(2)=real(Vstar_dot_V_omp( fft_size, WF%r(:,2,i_wf), WF%r(:,2,i_wf) ),SP)
      spin_factors(ib,ik,1)=abs(dot_product(v,ref_dir(:,1)))
      spin_factors(ib,ik,2)=abs(dot_product(v,ref_dir(:,2)))
      call live_timing(steps=1)
diff --git a/ypp/excitons/.objects b/ypp/excitons/.objects
index 1476b77067..20b75329f7 100644
--- a/ypp/excitons/.objects
+++ b/ypp/excitons/.objects
@@ -1,6 +1,6 @@
 #if defined _YPP_ELPH
 ELPH_objs=excitons_ph_ass_dos.o
 #endif
-objs = excitons_driver.o excitons_sort_and_report.o excitons_find_degeneracies.o \
+objs = excitons_driver.o excitons_get_user_states.o excitons_sort_and_report.o excitons_find_degeneracies.o \
        excitons_spin.o excitons_read.o excitons_interpolate_setup.o excitons_bands.o $(ELPH_objs) \
        excitons_amplitudes.o excitons_WFs.o excitons_degeneracy_average.o excitons_kinematics.o
diff --git a/ypp/excitons/DOUBLE_project.dep b/ypp/excitons/DOUBLE_project.dep
index 8c341417f7..970c87bfee 100644
--- a/ypp/excitons/DOUBLE_project.dep
+++ b/ypp/excitons/DOUBLE_project.dep
@@ -4,6 +4,7 @@
  excitons_degeneracy_average.o
  excitons_driver.o
  excitons_find_degeneracies.o
+ excitons_get_user_states.o
  excitons_interpolate_setup.o
  excitons_kinematics.o
  excitons_ph_ass_dos.o
diff --git a/ypp/excitons/YPP_ELPH_project.dep b/ypp/excitons/YPP_ELPH_project.dep
index 73a76b2b29..76e2252e86 100644
--- a/ypp/excitons/YPP_ELPH_project.dep
+++ b/ypp/excitons/YPP_ELPH_project.dep
@@ -1,3 +1,5 @@
+ excitons_bands.o
  excitons_driver.o
+ excitons_interpolate_setup.o
  excitons_ph_ass_dos.o
 
diff --git a/ypp/excitons/excitons_WFs.F b/ypp/excitons/excitons_WFs.F
index 113472e187..35ac2122b8 100644
--- a/ypp/excitons/excitons_WFs.F
+++ b/ypp/excitons/excitons_WFs.F
@@ -5,32 +5,37 @@
 !
 ! Authors (see AUTHORS file for details): AM DV
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine exciton_WFs(Xk,BS_E_degs,iq)
  !
- use pars,          ONLY:SP,cZERO,pi
+ use pars,          ONLY:SP,cZERO,pi,schlen
  use R_lattice,     ONLY:bz_samp,qindx_X
  use units,         ONLY:BO2ANG
  use stderr,        ONLY:intc
  use D_lattice,     ONLY:a,alat
- use FFT_m,         ONLY:fft_dim
+ use FFT_m,         ONLY:fft_dim,fft_size
  use com,           ONLY:msg,of_open_close
- use electrons,     ONLY:spin,n_spinor
- use YPPm,          ONLY:v2plot,ncell,r_hole,l_free_hole,AVE_exc,&
-&                        nr,nr_tot,output_fname,l_average_eh,&
+ use electrons,     ONLY:n_spinor,n_sp_pol
+ use YPPm,          ONLY:v2plot,ncell,r_fixed,l_free_hole,AVE_exc,center_kind,&
+&                        nr,nr_tot,output_fname,l_average_eh,elec_spin,hole_spin,&
 &                        use_xcrysden,use_gnuplot,use_cube,plot_dim,&
 &                        EXCITONS_user_indexes,EXCITONS_n_user_states
- use BS_solvers,    ONLY:BSS_eh_table,BS_mat,BSS_desc,BSS_n_eig
+ use BS_solvers,    ONLY:BSS_eh_table,BSS_eh_table_m1,BS_mat,BSS_desc,BSS_n_eig
  use BS,            ONLY:BS_bands,BS_H_dim
  use interfaces,    ONLY:WF_load,WF_free,DESC_write
- use wave_func,     ONLY:WF,WF_symm,wf_ng
+ use wave_func,     ONLY:WF,wf_ng
  use parallel_m,    ONLY:PP_indexes,myid,PP_indexes_reset
  use parallel_int,  ONLY:PP_wait,PP_redux_wait,PARALLEL_index
  use openmp,        ONLY:OPENMP_update,master_thread
  use LIVE_t,        ONLY:live_timing
  use wrapper,       ONLY:V_dot_V
  use vec_operate,   ONLY:c2a
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp)     :: Xk
  integer           :: BS_E_degs(BSS_n_eig),iq
@@ -38,14 +43,18 @@ subroutine exciton_WFs(Xk,BS_E_degs,iq)
  ! Workspace
  !
  type(PP_indexes)  :: px
- integer           :: iv,ic,ikbz,ikibz,is,neh,i1,i2,i_l,i_lambda,j_lambda,i_sp_pol,ir,ir_hole,j1,&
-&                     n_lambda_deg,neh1,iv1,ic1,ikbz1,ikibz1,i_sp_pol1,is1
- integer           :: ikpbz,ikp,isp
- real(SP)          :: r_hole_rlu(3),kp_dot_r_h,k_dot_r_e
- complex(SP)       :: wf_,wf_vc(BS_H_dim),WF1_ir(n_spinor),WF2_ir(n_spinor),phase,B_coeff
+ logical           :: fixed_elec_plot,fixed_hole_plot
+ integer           :: iv,ic,ikbz,ikibz,is,neh,i1,i2,i_lambda,j_lambda,i_sp_c,i_sp_v,ir,ir_fixed,j1,&
+&                     n_lambda_deg,BS_E_deg_done,neh1,iv1,ic1,ikbz1,ikibz1,i_sp_c1,i_sp_v1,is1,&
+&                     i_spin_hole,i_spin_elec,n1_lambda,n2_lambda
+ integer           :: ikpbz,ikp,isp,i_spinor,j_spinor,rh_size,ir_fixed_rlu(3)
+ character(schlen) :: exc_index_msg
+ real(SP)          :: r_fixed_rlu(3),k1(3),k2(3)
+ complex(SP)       :: B_coeff
  !
  integer,  allocatable :: rindex(:)
- real(SP), allocatable :: r_cell_cc(:,:)
+ real(SP), allocatable :: r_cell_cc(:,:),kp_dot_r_h(:),k_dot_r_e(:),alpha(:)
+ complex(SP), allocatable :: wf_tmp(:,:,:),wf_tmp_uc(:,:,:),wf_vc(:,:,:),WF1(:,:),WF2(:,:),phase(:)
  !
  call section('+','Excitonic Wave Function')
  !
@@ -59,6 +68,11 @@ subroutine exciton_WFs(Xk,BS_E_degs,iq)
  !
  if (l_free_hole.or.l_average_eh) ncell=1
  !
+ fixed_hole_plot=trim(center_kind)=="Hole"
+ fixed_elec_plot=trim(center_kind)=="Electron"
+ !
+ if (.not.(fixed_hole_plot.or.fixed_elec_plot)) call error("Wrong CenterKind in input")
+ !
  call WF_load(WF,wf_ng,1,BS_bands,(/1,Xk%nibz/),space='R',title='-EXCWF')
  !
  call expand_grid()
@@ -72,211 +86,429 @@ subroutine exciton_WFs(Xk,BS_E_degs,iq)
    ! 
    ! [1] Bare position pushed in the smallest cell 
    ! 
-   call c2a(b_in=a,v_in=r_hole,v_out=r_hole_rlu,mode='kc2a')
+   call c2a(b_in=a,v_in=r_fixed,v_out=r_fixed_rlu,mode='kc2a')
    do j1=1,3
-     r_hole_rlu(j1)=r_hole_rlu(j1)-int(r_hole_rlu(j1))
+     r_fixed_rlu(j1)=r_fixed_rlu(j1)-int(r_fixed_rlu(j1))
    enddo
-   call c2a(b_in=a,v_in=r_hole_rlu,v_out=r_hole,mode='ka2c')
-   call msg('s',' Hole position in the DL cell      ',r_hole,"[c.c.]")
+   call c2a(b_in=a,v_in=r_fixed_rlu,v_out=r_fixed,mode='ka2c')
+   call msg('s',' '//center_kind(1:4)//' position in the DL cell      ',r_fixed,"[c.c.]")
    !
    ! [2] Bare position in the FFT grid
    !
-   call c2a(b_in=a,v_in=r_hole,v_out=r_hole_rlu,mode='kc2a')
-   r_hole_rlu(:)=nint(r_hole_rlu(:)*fft_dim(:))
-   ir_hole=1+r_hole_rlu(1)+ r_hole_rlu(2)*nr(1)+ r_hole_rlu(3)*nr(1)*nr(2)
-   if (ir_hole.lt.0) call error(' Hole coordinates in input must be positive') 
-   call msg('s','      position in the FFT grid     ',r_cell_cc(:,ir_hole),"[c.c.]")
+   call c2a(b_in=a,v_in=r_fixed,v_out=r_fixed_rlu,mode='kc2a')
+   ir_fixed_rlu(:)=nint(r_fixed_rlu(:)*fft_dim(:))
+   ir_fixed=1+r_fixed_rlu(1)+ r_fixed_rlu(2)*nr(1)+ r_fixed_rlu(3)*nr(1)*nr(2)
+   if (ir_fixed.lt.0) call error(' Fixed '//trim(center_kind)//' coordinates in input must be positive') 
+   call msg('s','      position in the FFT grid     ',r_cell_cc(ir_fixed,:),"[c.c.]")
    !
    ! [3] Translation in the center of the Big grid
    !
    do j1=1,3
      if (ncell(j1)==1) cycle
      !
-     ! (***) Daniele 15/7/07 rev Andrea 12/07:
-     ! 
      ! The number of cells is always odd so that the hole can
      ! be placed in the middle.
      !
-     r_hole_rlu(j1)=r_hole_rlu(j1)+ncell(j1)/2*fft_dim(j1)
+     ir_fixed_rlu(j1)=ir_fixed_rlu(j1)+ncell(j1)/2*fft_dim(j1)
      !
    enddo
-   ir_hole=1+r_hole_rlu(1)+ r_hole_rlu(2)*nr(1)+ r_hole_rlu(3)*nr(1)*nr(2)
-   r_hole=r_cell_cc(:,ir_hole)
+   ir_fixed=1+ir_fixed_rlu(1)+ ir_fixed_rlu(2)*nr(1)+ ir_fixed_rlu(3)*nr(1)*nr(2)
+   r_fixed=r_cell_cc(ir_fixed,:)
    !
-   call msg('s','      translated position          ',r_hole,"[c.c.]")
-   call msg('s','                                   ',r_hole*BO2ANG,"[A]")
+   call msg('s','      translated position          ',r_fixed,"[c.c.]")
+   call msg('s','                                   ',r_fixed*BO2ANG,"[A]")
    !
  endif
  !
+ i_spin_hole=-1
+ if(trim(hole_spin)=="up")      i_spin_hole=1
+ if(trim(hole_spin)=="dn")      i_spin_hole=2
+ if(trim(hole_spin)=="average") i_spin_hole=0
+ if(trim(hole_spin)=="diag")    i_spin_hole=-10
+ !
+ i_spin_elec=-1
+ if(trim(elec_spin)=="up")      i_spin_elec=1
+ if(trim(elec_spin)=="dn")      i_spin_elec=2
+ if(trim(elec_spin)=="average") i_spin_elec=0
+ if(trim(elec_spin)=="diag")    i_spin_elec=-10
+ !
+ if (i_spin_elec==-1) call error(" wrong electron spin in input")
+ if (i_spin_hole==-1) call error(" wrong hole spin in input")
+ !
+ ! DS note: to change this for magnons
+ if (n_sp_pol==2 .and. (i_spin_elec+i_spin_hole)==3) then
+   call error("Spin polarized case: electron and hole cannot have opposite spin")
+ endif
+ !
+ if (n_spinor==2 .and. (i_spin_elec+i_spin_hole)<0) then
+   call warning("Spinorial case: diagonal approximation in spin space")
+   i_spin_elec=-10
+   i_spin_hole=-10
+ endif
+ !
+ if (l_free_hole) then
+   call warning("Free hole case, imposing elec spin to be identical to hole spin")
+   i_spin_elec=i_spin_hole
+ endif
+ !
  ! Allocation
  !
  YAMBO_ALLOC(v2plot,(nr_tot))
  !
+ YAMBO_ALLOC(WF1,(fft_size,n_spinor))
+ YAMBO_ALLOC(WF2,(fft_size,n_spinor))
+ !
+ if(.not.l_average_eh) then
+   rh_size=1
+   if (l_free_hole) rh_size=nr_tot
+   YAMBO_ALLOC(k_dot_r_e,(nr_tot))
+   YAMBO_ALLOC(kp_dot_r_h,(rh_size))
+   !
+   i1=n_spinor
+   if(l_free_hole) i1=1
+   YAMBO_ALLOC(wf_vc,(fft_size,n_spinor,i1))
+   YAMBO_ALLOC(wf_tmp_uc,(fft_size,n_spinor,i1))
+   YAMBO_ALLOC(alpha,(nr_tot))
+   YAMBO_ALLOC(phase,(nr_tot))
+   YAMBO_ALLOC(wf_tmp,(nr_tot,n_spinor,i1))
+ endif
+ !
+ call build_inverse_BS_eh_table_from_BSS_table(Xk%nbz)
+ !
  ! Par Proc
  !
- call PP_indexes_reset(px)
- call PARALLEL_index(px,(/nr_tot/))
- call PP_wait()
+ if(l_average_eh.or.Xk%nbz==1) then
+   call PP_indexes_reset(px)
+   call PARALLEL_index(px,(/BS_H_dim/))
+   call PP_wait()
+ else
+   call PP_indexes_reset(px)
+   call PARALLEL_index(px,(/Xk%nbz/))
+   call PP_wait()
+ endif
  !
  ! Loop on exc states
  !===================
  !
  call msg('s','Processing '//trim(intc(EXCITONS_n_user_states))//' states')
  !
+ BS_E_deg_done=-1
+ !
  do i_lambda=1,EXCITONS_n_user_states
    !
    j_lambda=EXCITONS_user_indexes(i_lambda)
    !
-   n_lambda_deg=count(BS_E_degs==BS_E_degs(j_lambda))
+   if (BS_E_degs(j_lambda)/=BS_E_deg_done) i1=0
    !
-   if (n_lambda_deg>1) call msg('s',' State '//trim(intc(j_lambda))//' Merged with states '//&
-&                                   trim(intc(BS_E_degs(j_lambda)))//' -> '//&
-&                                   trim(intc(BS_E_degs(j_lambda)+n_lambda_deg-1)))
+   BS_E_deg_done=BS_E_degs(j_lambda)
    !
-   call live_timing('ExcWF@'//trim(intc(j_lambda)),px%n_of_elements(myid+1))  
+   i1=i1+1
    !
-   v2plot  =0._SP
+   n_lambda_deg=count(BS_E_degs==BS_E_degs(j_lambda))
+   !
+   n1_lambda=BS_E_degs(j_lambda)
+   n2_lambda=BS_E_degs(j_lambda)+n_lambda_deg-1
    !
    if(l_average_eh) then
      !
-     do ir=1,nr_tot
+     if(i1==1) v2plot  =0._SP
+     !
+     call live_timing('ExcWF@'//trim(intc(j_lambda)),px%n_of_elements(myid+1))  
+     !
+     do neh = 1,BS_H_dim
+       !
+       if (.not.px%element_1D(neh)) cycle
+       !
+       ikbz  = BSS_eh_table(neh,1)
+       iv    = BSS_eh_table(neh,2)
+       ic    = BSS_eh_table(neh,3)
+       i_sp_c= BSS_eh_table(neh,4)
+       i_sp_v= BSS_eh_table(neh,5)
        !
-       if (.not.px%element_1D(ir)) cycle
+       ikibz = Xk%sstar(ikbz,1)
+       is    = Xk%sstar(ikbz,2)
        !
-       do i_l=BS_E_degs(j_lambda),BS_E_degs(j_lambda)+n_lambda_deg-1
+       do neh1 = 1,BS_H_dim
+         !
+         ikbz1  = BSS_eh_table(neh1,1)
+         iv1    = BSS_eh_table(neh1,2)
+         ic1    = BSS_eh_table(neh1,3)
+         i_sp_c1= BSS_eh_table(neh1,4)
+         i_sp_v1= BSS_eh_table(neh1,5)
+         !
+         ikibz1 = Xk%sstar(ikbz1,1)
+         is1    = Xk%sstar(ikbz1,2)
          !
-         do neh = 1,BS_H_dim
+         if(ikbz/=ikbz1) cycle
+         !      
+         ! Build the B_{vvk} (or B_{cck} ) matrix        
+         !
+         if (ic1==ic.and.trim(AVE_exc)=='h') then
+           !
+           if (n_sp_pol==2 .and. i_spin_hole>0) then
+             if (i_sp_v /=i_spin_hole) cycle
+             if (i_sp_v1/=i_spin_hole) cycle
+           endif
            !
-           ikbz  = BSS_eh_table(neh,1)
-           iv    = BSS_eh_table(neh,2)
-           ic    = BSS_eh_table(neh,3)
-           i_sp_pol = spin(BSS_eh_table(neh,:))
+           call WF_apply_symm((/iv, ikibz, is, i_sp_v /),WF1)
+           call WF_apply_symm((/iv1,ikibz1,is1,i_sp_v1/),WF2)
            !
-           ikibz = Xk%sstar(ikbz,1)
-           is    = Xk%sstar(ikbz,2)
+           B_coeff = BS_mat(neh, j_lambda)* conjg(BS_mat(neh1, j_lambda))
            !
-           do neh1 = 1,BS_H_dim
-             !
-             ikbz1  = BSS_eh_table(neh1,1)
-             iv1    = BSS_eh_table(neh1,2)
-             ic1    = BSS_eh_table(neh1,3)
-             i_sp_pol1 = spin(BSS_eh_table(neh1,:))
-             !
-             ikibz1 = Xk%sstar(ikbz1,1)
-             is1    = Xk%sstar(ikbz1,2)
-             !
-             if(ikbz==ikbz1) then
-               !      
-               ! Build the B_{vvk} (or B_{cck} ) matrix        
-               !
-               if (ic1==ic.and.trim(AVE_exc)=='h') then
-                 !      
-                 B_coeff =  BS_mat(neh, i_l)* conjg(BS_mat(neh1, i_l))   
-                 !
-                 WF1_ir=WF_symm(rindex(ir),(/iv, ikibz,is,i_sp_pol/))
-                 WF2_ir=WF_symm(rindex(ir),(/iv1,ikibz1,is1,i_sp_pol1/))
-                 !
-                 v2plot(ir) = v2plot(ir)+B_coeff*WF1_ir(1)*conjg(WF2_ir(1))
-                 if(n_spinor==2) v2plot(ir) = v2plot(ir)+B_coeff*WF1_ir(2)*conjg(WF2_ir(2))
-                 !
-               elseif(iv1==iv.and.trim(AVE_exc)=='e') then
-                 !      
-                 B_coeff =  conjg(BS_mat(neh, i_l))*BS_mat(neh1, i_l)   
-                 !
-                 WF1_ir=WF_symm(rindex(ir),(/ic, ikibz,is,i_sp_pol/))
-                 WF2_ir=WF_symm(rindex(ir),(/ic1,ikibz1,is1,i_sp_pol1/))
-                 !
-                 v2plot(ir) = v2plot(ir)+B_coeff*conjg(WF1_ir(1))*WF2_ir(1)
-                 if(n_spinor==2) v2plot(ir) = v2plot(ir)+B_coeff*conjg(WF1_ir(2))*WF2_ir(2)
-                 !
+           do i_spinor=1,n_spinor
+             do j_spinor=1,n_spinor
+               if (n_spinor==2 .and. i_spin_hole>0) then
+                 if(i_spinor /= i_spin_hole) cycle
+                 if(j_spinor /= i_spin_hole) cycle
+                 if(i_spinor /= j_spinor .and. i_spin_hole==-10) cycle
                endif
-               !
-             endif
-             !
-           enddo ! loop on neh1
+               v2plot(:) = v2plot(:)+B_coeff*WF1(:,i_spinor)*conjg(WF2(:,i_spinor))
+             enddo
+           enddo
+           !
+         elseif(iv1==iv.and.trim(AVE_exc)=='e') then
+           !
+           if (n_sp_pol==2 .and. i_spin_elec>0) then
+             if (i_sp_c /=i_spin_elec) cycle
+             if (i_sp_c1/=i_spin_elec) cycle
+           endif
+           !
+           call WF_apply_symm((/ic, ikibz, is, i_sp_c /),WF1)
+           call WF_apply_symm((/ic1,ikibz1,is1,i_sp_c1/),WF2)
+           !
+           B_coeff = conjg(BS_mat(neh,j_lambda))* BS_mat(neh1,j_lambda)
            !
-         enddo ! loop on neh
+           do i_spinor=1,n_spinor
+             do j_spinor=1,n_spinor
+               if (n_spinor==2) then
+                 if(i_spinor /= i_spin_elec .and. i_spin_elec>0) cycle
+                 if(j_spinor /= i_spin_elec .and. i_spin_elec>0) cycle
+                 if(i_spinor /= j_spinor .and. i_spin_elec==-10) cycle
+               endif
+               v2plot(:) = v2plot(:)+B_coeff*conjg(WF1(:,i_spinor))*WF2(:,j_spinor)
+             enddo
+           enddo
+           !
+         endif
          !
-       enddo ! loop on degenerate excitons
+       enddo ! loop on neh1
        !
-       if (master_thread) call live_timing(steps=1)
+       call live_timing(steps=1)
        !
-     enddo ! loop on i_r 
+     enddo ! loop on neh
+     !
+     call live_timing()
+     !
+     call PP_redux_wait(v2plot)
      !
    else
      !   
-     !$omp parallel default(shared), firstprivate(ir_hole,r_hole), &
-     !$omp private(wf_vc, ir,neh, ikbz,iv,ic,i_sp_pol,ikibz,is, ikpbz,ikp,isp, &
-     !$omp &       k_dot_r_e,kp_dot_r_h, WF1_ir,WF2_ir,phase, i_l,wf_ )
+     !
+     call live_timing('ExcWF@'//trim(intc(j_lambda)),px%n_of_elements(myid+1))
+     !
+     wf_tmp= cZERO
+     !
+     !$omp parallel default(shared), &
+     !$omp private(k_dot_r_e,kp_dot_r_h,alpha,phase,ir,neh,&
+     !$omp         wf_tmp_uc,WF1,WF2,wf_vc,ikibz,is,ikpbz,ikp,isp,k1,k2,&
+     !$omp         ikbz,iv,ic,i_sp_c,i_sp_v,i_spinor,j_spinor), &
+     !$omp reduction(+:wf_tmp)
+     !
+     WF1 = cZERO
+     WF2 = cZERO
      !
      call OPENMP_update(master_thread)
      !
-     !$omp do schedule(dynamic)
-     do ir=1,nr_tot
+     do ikbz  = 1,Xk%nbz
+       !
+       if (Xk%nbz>1) then
+         if (.not.px%element_1D(ikbz)) cycle
+       endif
+       !
+       ikibz = Xk%sstar(ikbz,1)
+       is    = Xk%sstar(ikbz,2)
+       !
+       ikpbz = qindx_X(iq,ikbz,1)
+       ikp   = Xk%sstar(ikpbz,1)
+       isp   = Xk%sstar(ikpbz,2)
+       !
+       k1=Xk%ptbz(ikbz,:)/alat(:)*2._SP*pi
+       k2=Xk%ptbz(ikpbz,:)/alat(:)*2._SP*pi
+       !
+       wf_tmp_uc= cZERO
+       !
+     do iv    = BS_bands(1),BS_bands(2)
+     do ic    = BS_bands(1),BS_bands(2)
+     do i_sp_c= 1,n_sp_pol
+     do i_sp_v= 1,n_sp_pol
+       !
+       neh=BSS_eh_table_m1(ikbz,iv,ic,i_sp_c,i_sp_v)
+       if (neh==0) cycle
        !
-       if (.not.px%element_1D(ir)) cycle
-       wf_vc   =cZERO
+       if (Xk%nbz==1) then
+         if (.not.px%element_1D(neh)) cycle
+       endif
+       !
+       if (n_sp_pol==2 .and. i_spin_hole>0 .and. i_sp_v /=i_spin_hole) cycle
+       !
+       call WF_apply_symm((/iv,ikp,  isp,i_sp_v/),WF1)
+       call WF_apply_symm((/ic,ikibz,is, i_sp_c/),WF2)
        !
+       ! All the following operations are only up to fft_size
+       wf_vc=cZERO
        if (l_free_hole) then
-         ir_hole=ir
-         r_hole=r_cell_cc(:,ir)
+         do i_spinor=1,n_spinor
+           if (n_spinor==2 .and. i_spin_hole>0 .and. i_spinor /= i_spin_hole) cycle
+           wf_vc(:,i_spinor,1) = wf_vc(:,i_spinor,1) + conjg(WF1(:,i_spinor))*WF2(:,i_spinor)
+         enddo
+       else if (fixed_hole_plot) then
+         do i_spinor=1,n_spinor
+           if (n_spinor==2 .and. i_spin_hole>0 .and. i_spinor /= i_spin_hole) cycle
+           do j_spinor=1,n_spinor
+             if (n_spinor==2 .and. i_spin_elec>0 .and. j_spinor /= i_spin_elec) cycle
+             if (n_spinor==2 .and. i_spin_elec==-10 .and. i_spinor/= j_spinor) cycle
+             wf_vc(:,i_spinor,j_spinor) = wf_vc(:,i_spinor,j_spinor) + &
+             &      conjg(WF1(rindex(ir_fixed),i_spinor))*WF2(:,j_spinor)
+           enddo
+         enddo
+       else if (fixed_elec_plot) then
+         do i_spinor=1,n_spinor
+           if (n_spinor==2 .and. i_spin_hole>0 .and. i_spinor /= i_spin_hole) cycle
+           do j_spinor=1,n_spinor
+             if (n_spinor==2 .and. i_spin_elec>0 .and. j_spinor /= i_spin_elec) cycle
+             if (n_spinor==2 .and. i_spin_elec==-10 .and. i_spinor/= j_spinor) cycle
+             wf_vc(:,i_spinor,j_spinor) = wf_vc(:,i_spinor,j_spinor) + &
+             &      conjg(WF1(:,i_spinor))*WF2(rindex(ir_fixed),j_spinor)
+           enddo
+         enddo
        endif
        !
-       do neh = 1,BS_H_dim
-         !
-         ikbz  = BSS_eh_table(neh,1)
-         iv    = BSS_eh_table(neh,2)
-         ic    = BSS_eh_table(neh,3)
-         i_sp_pol = spin(BSS_eh_table(neh,:))
+       if (l_free_hole) then
          !
-         ikibz = Xk%sstar(ikbz,1)
-         is    = Xk%sstar(ikbz,2)
+         do i_spinor=1,n_spinor
+           if (n_spinor==2 .and. i_spin_hole>0 .and. i_spinor /= i_spin_hole) cycle
+           wf_tmp_uc(:,i_spinor,i_spinor) = wf_tmp_uc(:,i_spinor,i_spinor)+  &
+             &  BS_mat(neh,j_lambda)*wf_vc(:,i_spinor,1)
+         enddo
          !
-         ikpbz = qindx_X(iq,ikbz,1)
-         ikp   = Xk%sstar(ikpbz,1)
-         isp   = Xk%sstar(ikpbz,2)
+       else
          !
-         k_dot_r_e  = dot_product(r_cell_cc(:,ir),Xk%ptbz(ikbz,:)/alat(:))*2._SP*pi
-         kp_dot_r_h = dot_product(r_hole,Xk%ptbz(ikpbz,:)/alat(:))*2._SP*pi
+         do i_spinor=1,n_spinor
+           if (n_spinor==2 .and. i_spin_hole>0 .and. i_spinor /= i_spin_hole) cycle
+           do j_spinor=1,n_spinor
+             if (n_spinor==2 .and. i_spin_elec>0 .and. j_spinor /= i_spin_elec) cycle
+             if (n_spinor==2 .and. i_spin_elec==-10 .and. i_spinor/= j_spinor) cycle
+             wf_tmp_uc(:,i_spinor,j_spinor) = wf_tmp_uc(:,i_spinor,j_spinor)+&
+                &  BS_mat(neh,j_lambda)*wf_vc(:,i_spinor,j_spinor)
+           enddo
+         enddo
          !
-         WF1_ir=WF_symm(rindex(ir_hole),(/iv,ikp,isp,i_sp_pol/))
-         WF2_ir=WF_symm(rindex(ir     ),(/ic,ikibz,is,i_sp_pol/))
+       endif
+       !
+       if (master_thread.and.Xk%nbz==1) call live_timing(steps=1)
+       !
+     enddo
+     enddo
+     enddo
+     enddo
+       !
+       if (l_free_hole) then
          !
-         wf_vc(neh) = conjg(WF1_ir(1))*WF2_ir(1)
-         if(n_spinor==2) wf_vc(neh) = wf_vc(neh) + conjg(WF1_ir(2))*WF2_ir(2)
+         do i_spinor=1,n_spinor
+           if (n_spinor==2 .and. i_spin_hole>0 .and. i_spinor /= i_spin_hole) cycle
+           !$omp do
+           do ir=1,nr_tot
+             k_dot_r_e(ir) =r_cell_cc(ir,1)*k1(1)+r_cell_cc(ir,2)*k1(2)+r_cell_cc(ir,3)*k1(3)
+             kp_dot_r_h(ir)=r_cell_cc(ir,1)*k2(1)+r_cell_cc(ir,2)*k2(2)+r_cell_cc(ir,3)*k2(3)
+             alpha(ir)=k_dot_r_e(ir)-kp_dot_r_h(ir)
+             phase(ir)=cmplx(cos(alpha(ir)),sin(alpha(ir)),kind=SP)
+             wf_tmp(ir,i_spinor,i_spinor) = wf_tmp(ir,i_spinor,i_spinor)+  &
+             &  wf_tmp_uc(rindex(ir),i_spinor,1)*phase(ir)
+           enddo
+         enddo
          !
-         phase=cmplx(cos(k_dot_r_e-kp_dot_r_h),sin(k_dot_r_e-kp_dot_r_h),kind=SP)
-         wf_vc(neh) = wf_vc(neh)*phase
+       else if (fixed_hole_plot) then
          ! 
-       enddo  !Matrix elements
-       !
-       do i_l=BS_E_degs(j_lambda),BS_E_degs(j_lambda)+n_lambda_deg-1
+         kp_dot_r_h(1) = dot_product(r_cell_cc(ir_fixed,:),k2(:))
          !
-         wf_ = V_dot_V(BS_H_dim,BS_mat(:,i_l),wf_vc)
+         do i_spinor=1,n_spinor
+           if (n_spinor==2 .and. i_spin_hole>0 .and. i_spinor /= i_spin_hole) cycle
+           do j_spinor=1,n_spinor
+             if (n_spinor==2 .and. i_spin_elec>0 .and. j_spinor /= i_spin_elec) cycle
+             if (n_spinor==2 .and. i_spin_elec==-10 .and. i_spinor/= j_spinor) cycle
+             !$omp do
+             do ir=1,nr_tot
+               k_dot_r_e(ir) =r_cell_cc(ir,1)*k1(1)+r_cell_cc(ir,2)*k1(2)+r_cell_cc(ir,3)*k1(3)
+               alpha(ir)=k_dot_r_e(ir)-kp_dot_r_h(1)
+               phase(ir)=cmplx(cos(alpha(ir)),sin(alpha(ir)),kind=SP)
+               wf_tmp(ir,i_spinor,j_spinor) = wf_tmp(ir,i_spinor,j_spinor)+&
+               &  wf_tmp_uc(rindex(ir),i_spinor,j_spinor)*phase(ir)
+             enddo
+           enddo
+         enddo
          !
-         v2plot(ir) = v2plot(ir)+abs(wf_)**2
+       else if (fixed_elec_plot) then
+         ! 
+         k_dot_r_e(1) = dot_product(r_cell_cc(ir_fixed,:),k1(:))
          !
-       enddo
+         do i_spinor=1,n_spinor
+           if (n_spinor==2 .and. i_spin_hole>0 .and. i_spinor /= i_spin_hole) cycle
+           do j_spinor=1,n_spinor
+             if (n_spinor==2 .and. i_spin_elec>0 .and. j_spinor /= i_spin_elec) cycle
+             if (n_spinor==2 .and. i_spin_elec==-10 .and. i_spinor/= j_spinor) cycle
+             !$omp do
+              do ir=1,nr_tot
+                kp_dot_r_h(ir) =r_cell_cc(ir,1)*k2(1)+r_cell_cc(ir,2)*k2(2)+r_cell_cc(ir,3)*k2(3)
+                alpha(ir)=k_dot_r_e(1)-kp_dot_r_h(ir)
+                phase(ir)=cmplx(cos(alpha(ir)),sin(alpha(ir)),kind=SP)
+                wf_tmp(ir,i_spinor,j_spinor) = wf_tmp(ir,i_spinor,j_spinor)+&
+                &  BS_mat(neh,j_lambda)*wf_vc(rindex(ir),i_spinor,j_spinor)*phase(ir)
+             enddo
+           enddo
+         enddo
+         !
+       endif
        !
-       if (master_thread) call live_timing(steps=1)
+       if (master_thread.and.Xk%nbz>1) call live_timing(steps=1)
        !
-     enddo   !grid points
-     !
-     !$omp end do
+     enddo
      !$omp end parallel
      !
+     call live_timing()
+     !
+     call PP_wait()
+     !
+     call PP_redux_wait(wf_tmp)
+     !
+     if (i1==1) v2plot= 0._SP
+     !
+     do i_spinor=1,n_spinor
+       if (n_spinor==2 .and. i_spin_hole>0 .and. i_spinor /= i_spin_hole) cycle
+       do j_spinor=1,n_spinor
+         if (n_spinor==2 .and. i_spin_elec>0 .and. j_spinor /= i_spin_elec) cycle
+         if (n_spinor==2 .and. i_spin_elec==-10 .and. i_spinor/= j_spinor) cycle
+         do ir=1,nr_tot
+           v2plot(ir) = v2plot(ir)+abs(wf_tmp(ir,i_spinor,j_spinor))**2
+         enddo
+       enddo
+     enddo
+     !
    endif
    !
-   call live_timing()
+   if (i1/=n_lambda_deg) cycle
    !
-   call PP_redux_wait(v2plot)
+   if (n_lambda_deg==1) exc_index_msg=trim(intc(j_lambda))
+   if (n_lambda_deg> 1) exc_index_msg=trim(intc(n1_lambda))//'-'//trim(intc(n2_lambda))
    !
+   call msg('s','Plotting state(s) '//trim(exc_index_msg))
    !
    ! PLOT
    !
-   if (use_cube)      output_fname='exc_qpt'//trim(intc(iq))//"_"//trim(intc(plot_dim))//'d_'//trim(intc(j_lambda))//'.cube'
-   if (use_xcrysden)  output_fname='exc_qpt'//trim(intc(iq))//"_"//trim(intc(plot_dim))//'d_'//trim(intc(j_lambda))//'.xsf'
-   if (use_gnuplot)   output_fname='exc_qpt'//trim(intc(iq))//"_"//trim(intc(plot_dim))//'d_'//trim(intc(j_lambda))
+   output_fname='exc_qpt'//trim(intc(iq))//"_"//trim(intc(plot_dim))//'d_'//trim(exc_index_msg)
+   !
+   if (use_cube)      output_fname=trim(output_fname)//'.cube'
+   if (use_xcrysden)  output_fname=trim(output_fname)//'.xsf'
+   if (use_gnuplot)   output_fname=trim(output_fname)
    !
    if (use_cube) then 
      call of_open_close(trim(output_fname),'o')
@@ -297,14 +529,23 @@ subroutine exciton_WFs(Xk,BS_E_degs,iq)
  YAMBO_FREE(r_cell_cc)
  YAMBO_FREE(rindex)
  !
+ YAMBO_FREE( k_dot_r_e)
+ YAMBO_FREE(kp_dot_r_h)
+ !
+ YAMBO_FREE(WF1)
+ YAMBO_FREE(WF2)
+ !
+ YAMBO_FREE(wf_vc)
+ YAMBO_FREE(wf_tmp)
+ YAMBO_FREE(wf_tmp_uc)
+ !
  call WF_free(WF)
  !
  contains
    !
    subroutine expand_grid()
      !
-     use FFT_m,       ONLY:fft_size
-#include<y_memory.h>
+     implicit none
      ! 
      ! Work Space
      !
@@ -327,7 +568,7 @@ subroutine expand_grid()
      enddo
      !
      nr_tot = fft_size*ncell(1)*ncell(2)*ncell(3)
-     YAMBO_ALLOC(r_cell_cc,(3,nr_tot))
+     YAMBO_ALLOC(r_cell_cc,(nr_tot,3))
      YAMBO_ALLOC(rindex,(nr_tot))
      !
      nr=(/ncell(1)*fft_dim(1),ncell(2)*fft_dim(2),ncell(3)*fft_dim(3)/)
@@ -345,7 +586,7 @@ subroutine expand_grid()
            j2=ir2/fft_dim(2)
            j3=ir3/fft_dim(3)
            rindex(ir)=i1+fft_dim(1)*i2+fft_dim(1)*fft_dim(2)*i3+1
-           r_cell_cc(:,ir) = ir1*a(1,:)/fft_dim(1) +&
+           r_cell_cc(ir,:) = ir1*a(1,:)/fft_dim(1) +&
 &                            ir2*a(2,:)/fft_dim(2) +&
 &                            ir3*a(3,:)/fft_dim(3)
          end do
diff --git a/ypp/excitons/excitons_amplitudes.F b/ypp/excitons/excitons_amplitudes.F
index 709186d26f..93bebb453f 100644
--- a/ypp/excitons/excitons_amplitudes.F
+++ b/ypp/excitons/excitons_amplitudes.F
@@ -29,7 +29,7 @@ subroutine excitons_amplitudes(Xk,Xen,q,BS_E_degs,iq)
  !
  type(bz_samp)    :: Xk,q
  type(levels)     :: Xen
- integer          :: BS_E_degs(BSS_n_eig),iq
+ integer          :: BS_E_degs(BSS_n_eig),iq,i_sp_v,i_sp_c
  !
  ! Workspace
  integer          :: j1,j2,i_l,i_l_grp,i_lambda,neh,i_spin,iv,ic,ikbz,ikibz,is,n_lambda_deg,S_indx(BS_H_dim),&
@@ -156,9 +156,10 @@ subroutine excitons_amplitudes(Xk,Xen,q,BS_E_degs,iq)
      ikbz = BSS_eh_table(S_indx(neh),1)
      iv = BSS_eh_table(S_indx(neh),2)
      ic = BSS_eh_table(S_indx(neh),3)
-     i_spin= spin(BSS_eh_table(S_indx(neh),:))
-     if (i_spin==1) j1= 1
-     if (i_spin==2) j1=-1
+     i_sp_c=BSS_eh_table(S_indx(neh),4)
+     i_sp_v=BSS_eh_table(S_indx(neh),5)
+     if (i_sp_c==1) j1= 1                                               
+     if (i_sp_v==2) j1=-1      
      !
      ikibz = Xk%sstar(ikbz,1)
      is = Xk%sstar(ikbz,2)
@@ -168,7 +169,7 @@ subroutine excitons_amplitudes(Xk,Xen,q,BS_E_degs,iq)
      isp = Xk%sstar(ikpbz,2)
      !
      amp_n_trans=amp_n_trans+1
-     amp_trans(amp_n_trans,1)=Xen%E(ic,ikibz,i_spin)-Xen%E(iv,ikp,i_spin)
+     amp_trans(amp_n_trans,1)=Xen%E(ic,ikibz,i_sp_c)-Xen%E(iv,ikp,i_sp_v)
      amp_trans(amp_n_trans,2)=A_weight(neh)
      !
      if (A_weight(neh)<EXCITON_weight_treshold) cycle
diff --git a/ypp/excitons/excitons_bands.F b/ypp/excitons/excitons_bands.F
index 68f6d82cee..b45702ab3f 100644
--- a/ypp/excitons/excitons_bands.F
+++ b/ypp/excitons/excitons_bands.F
@@ -5,38 +5,47 @@
 !
 ! Authors (see AUTHORS file for details): CA
 !
+! headers
+!
+#include<y_memory.h> 
+!
 subroutine excitons_bands(k,Xk,en,Xen,q)
  !
  use pars,          ONLY:SP,schlen,rZERO
  use LIVE_t,        ONLY:live_timing
  use R_lattice,     ONLY:bz_samp
+ use D_lattice,     ONLY:Boltz_Temp
  use interpolate,   ONLY:INTERPOLATION_BZ
- use electrons,     ONLY:levels
- use functions,     ONLY:Fermi_fnc_derivative
+ use electrons,     ONLY:levels,nel_cond
+ use functions,     ONLY:Fermi_fnc_derivative,boltzman_f
  use units,         ONLY:HA2EV
  use YPPm,          ONLY:EXCITONS_user_indexes,EXCITONS_n_user_states,coo_out, &
 &                        BANDS_steps,INTERP_grid,K_transform,CIRCUIT_k,DOS_E_step, &
-&                        DOS_E_steps,DOS_broadening,USER_k
+&                        DOS_E_steps,DOS_broadening,USER_k,l_exc_occ_interp,EXC_min,&
+&                        exc_kind
  use parser_m,      ONLY:parser
  use com,           ONLY:msg,of_open_close
  use stderr,        ONLY:intc
- use vec_operate,   ONLY:iku_v_norm
+ use vec_operate,   ONLY:iku_v_2D_norm,iku_v_norm
  use LIVE_t,        ONLY:live_timing
  use parallel_m,    ONLY:PP_indexes,myid,PP_indexes_reset
  use parallel_int,  ONLY:PP_redux_wait,PARALLEL_index
-
+#if defined _YPP_ELPH
+ use EXCPH,         ONLY:alphaQ
+#endif
+ use y_memory_alloc
  !
-#include<y_memory.h> 
+ implicit none
  !
  type(bz_samp) ::Xk,k,q
  type(levels)  ::Xen,en
  !
  ! Work space
  !
- integer :: iq,i_c,i_l,ID_INTERP_EXC,i_E
- real(SP), allocatable    :: BSE_interp_E(:,:),values(:)
+ integer :: iq,i_c,i_l,ID_INTERP_EXC,ID_INTERP_OCC,i_E
+ real(SP), allocatable    :: BSE_interp_E(:,:),EXC_occ_interp(:,:),values(:)
  type(bz_samp)            :: GRID_q
- character(schlen)        :: file_name
+ character(schlen)        :: file_name,file_name2
  character(schlen), allocatable :: headings(:)
  logical                  :: BANDS_built_in,GRID_interpolate,l_prt_dos
  real(SP)                 :: tmp_q(3)
@@ -57,10 +66,21 @@ subroutine excitons_bands(k,Xk,en,Xen,q)
  YAMBO_ALLOC(values,(EXCITONS_n_user_states+4))
  allocate(headings(EXCITONS_n_user_states+4))
  !
- ! Read and interpolate excitons dispersion
+ ! Read and interpolate excitons/magnons dispersion
  !
  ID_INTERP_EXC=1
- call excitons_interpolate_setup(k,Xk,en,Xen,q,BS_all_E,ID_INTERP_EXC,.TRUE.)
+ ID_INTERP_OCC=2
+ call excitons_interpolate_setup(k,Xk,en,Xen,q,BS_all_E,ID_INTERP_EXC,ID_INTERP_OCC,.TRUE.)
+ !
+ ! Check exciton occupations
+ !
+ if(Boltz_Temp/=rZERO.and.EXC_min/=rZERO) then
+   call msg('rs','Boltzman occupation for the excitons')
+   if(nel_cond/=rZERO) call error('NelCond /= 0 not compatible with Boltzman occupation ')
+   l_exc_occ_interp=.true.
+ endif
+ !
+ if(nel_cond/=rZERO.and.Boltz_Temp/=rZERO) call warning('Excitonic Occupation, EXCTemp ignored, use ElecTemp!')
  !
  ! Interpolate and write on disk
  !
@@ -68,11 +88,36 @@ subroutine excitons_bands(k,Xk,en,Xen,q)
  !
  if(USER_k%nbz>0.and.BANDS_steps>0)  then
    !
-   call msg('s','Exciton bands')
+   call msg('s',exc_kind//' bands')
    !   
    YAMBO_ALLOC(BSE_interp_E,(EXCITONS_n_user_states,CIRCUIT_k%nibz))      
    call INTERPOLATION_BZ(K=CIRCUIT_k,NK=CIRCUIT_k%nbz,R1D=BSE_interp_E,ID=ID_INTERP_EXC)
    !
+#if defined _YPP_ELPH
+   if(alphaQ/=rZERO) then
+     call msg('rs','Excitonic 2D bands stretching :',alphaQ)
+      do iq=1,CIRCUIT_k%nibz
+        BSE_interp_E(:,iq)=BSE_interp_E(:,iq)+alphaQ*iku_v_2D_norm(CIRCUIT_k%pt(iq,:3))
+      enddo
+   endif
+#endif
+   !
+   if(l_exc_occ_interp) then
+     YAMBO_ALLOC(EXC_occ_interp,(EXCITONS_n_user_states,CIRCUIT_k%nibz))      
+     if(nel_cond/=rZERO) then
+        ! Excitonic occupations are interpolated     
+        call INTERPOLATION_BZ(K=CIRCUIT_k,NK=CIRCUIT_k%nbz,R1D=EXC_occ_interp,ID=ID_INTERP_OCC)
+     else
+        ! Boltzman occupations are directly calculated
+        do iq=1,CIRCUIT_k%nibz
+          do i_c=1,EXCITONS_n_user_states
+            EXC_occ_interp(i_c,iq)=boltzman_f(BSE_interp_E(i_c,iq)-EXC_min)
+          enddo
+        enddo
+        !
+     endif
+   endif
+   !
    call print_interpolated_BANDS()
    !
    if (BANDS_built_in) call print_built_in_BANDS()
@@ -83,7 +128,7 @@ subroutine excitons_bands(k,Xk,en,Xen,q)
  !
  if (GRID_interpolate) then
    !      
-   call msg('s','Exciton Grid')
+   call msg('s',exc_kind//' Grid')
    !
    GRID_q%description="INTERP"
    call INTERPOLATION_grid(GRID_q,'i',INTERP_grid)
@@ -100,7 +145,7 @@ subroutine excitons_bands(k,Xk,en,Xen,q)
  !
  if(l_prt_dos) then
    !      
-   call msg('s','Exciton Density of States')
+   call msg('s',exc_kind//' Density of States')
    !
    call electrons_dos_setup(BSE_interp_E, dos_E, (/1,EXCITONS_n_user_states/))
    !
@@ -137,7 +182,7 @@ subroutine excitons_bands(k,Xk,en,Xen,q)
    subroutine print_DOS()
      implicit none
      ! 
-     file_name="excitons_dos"
+     file_name=trim(exc_kind)//"s_dos"
      call of_open_close(trim(file_name),'ot')
      headings(1)=" E [eV] "
      headings(2)=" Dos    "
@@ -164,11 +209,18 @@ subroutine print_interpolated_BANDS()
      headings(EXCITONS_n_user_states+2:EXCITONS_n_user_states+4)=&
 &           (/"q_x ("//trim(coo_out)//")","q_y ("//trim(coo_out)//")","q_z ("//trim(coo_out)//")"/)
      !
-     file_name="excitons_interpolated"
+     file_name=trim(exc_kind)//"s_interpolated"
      call of_open_close(trim(file_name),'ot')
      call msg('o '//trim(file_name),"#",headings,INDENT=0,USE_TABS=.true.)
      call msg('o '//trim(file_name),"#")
      !
+     if(l_exc_occ_interp) then
+       file_name2="excitons_occupation"
+       call of_open_close(trim(file_name2),'ot')
+       call msg('o '//trim(file_name2),"#",headings,INDENT=0,USE_TABS=.true.)
+       call msg('o '//trim(file_name2),"#")
+     endif
+     !
      values=0._SP
      !
      do iq=1,CIRCUIT_k%nibz
@@ -181,8 +233,16 @@ subroutine print_interpolated_BANDS()
        values(1+1:EXCITONS_n_user_states+1)=BSE_interp_E(:,iq)*HA2EV
        values(EXCITONS_n_user_states+2:EXCITONS_n_user_states+4)=tmp_q
        call msg('o '//trim(file_name),' ',values,INDENT=0,USE_TABS=.true.)
+       !
+       if(l_exc_occ_interp) then
+         values(1+1:EXCITONS_n_user_states+1)=abs(EXC_occ_interp(:,iq))
+         values(EXCITONS_n_user_states+2:EXCITONS_n_user_states+4)=tmp_q
+         call msg('o '//trim(file_name2),' ',values,INDENT=0,USE_TABS=.true.)
+       endif
+       !
      enddo
      call of_open_close(trim(file_name))
+     if(l_exc_occ_interp) call of_open_close(trim(file_name2))
      !
    end subroutine print_interpolated_BANDS
    ! 
@@ -211,7 +271,7 @@ subroutine print_built_in_BANDS()
      headings(EXCITONS_n_user_states+2:EXCITONS_n_user_states+4)=&
 &           (/"q_x ("//trim(coo_out)//")","q_y ("//trim(coo_out)//")","q_z ("//trim(coo_out)//")"/)
      !
-     file_name="excitons_built_in"
+     file_name=trim(exc_kind)//"s_built_in"
      call of_open_close(trim(file_name),'ot')
      call msg('o '//trim(file_name),"#",headings,INDENT=0,USE_TABS=.true.)
      call msg('o '//trim(file_name),"#")
@@ -264,7 +324,7 @@ subroutine print_INTERP_GRID()
      headings(EXCITONS_n_user_states+2:EXCITONS_n_user_states+4)=&
 &           (/"q_x ("//trim(coo_out)//")","q_y ("//trim(coo_out)//")","q_z ("//trim(coo_out)//")"/)
      !
-     file_name="excitons_interpolated_IBZ"
+     file_name=trim(exc_kind)//"s_interpolated_IBZ"
      call of_open_close(trim(file_name),'ot')
      call msg('o '//trim(file_name),"#",headings,INDENT=0,USE_TABS=.true.)
      call msg('o '//trim(file_name),"#")
diff --git a/ypp/excitons/excitons_driver.F b/ypp/excitons/excitons_driver.F
index 2a9c181e6f..69ed4e38d5 100644
--- a/ypp/excitons/excitons_driver.F
+++ b/ypp/excitons/excitons_driver.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DV DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine excitons_driver(k,Xk,en,Xen,q)
  !
  use pars,          ONLY:SP,pi,schlen
@@ -12,8 +16,10 @@ subroutine excitons_driver(k,Xk,en,Xen,q)
  use stderr,        ONLY:intc
  use electrons,     ONLY:levels,spin
  use YPP_interfaces,ONLY:excitons_sort_and_report
- use YPPm,          ONLY:l_sort,l_exc_wf,l_spin,l_amplitude,EXCITONS_user_indexes,BSiq, &
-&                        BS_R_right,BS_E,BS_E_SOC_corr,l_interp,l_dipoles
+ use YPPm,          ONLY:l_magnons,l_sort,l_exc_wf,l_spin,l_amplitude,exc_kind,&
+ &                       EXCITONS_n_user_states,EXCITONS_user_indexes,BSiq, &
+ &                       BS_R_left,BS_R_right,BS_R_left_magn,BS_R_right_magn,&
+ &                       BS_E,BS_E_SOC_corr,l_interp,l_dipoles,MAGNON_Res_threshold
  use BS_solvers,    ONLY:BSS_n_eig
 #if defined _YPP_ELPH
  use YPP_ELPH,      ONLY:l_eliashberg,l_gkkp,l_ph_ass_dos
@@ -21,8 +27,9 @@ subroutine excitons_driver(k,Xk,en,Xen,q)
 #if defined _YPP_RT
  use YPP_real_time, ONLY:l_RT_abs
 #endif
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp) ::Xk,k,q
  type(levels)  ::Xen,en
@@ -31,6 +38,7 @@ subroutine excitons_driver(k,Xk,en,Xen,q)
  !
  logical              :: l_flag
  integer ,allocatable :: BS_E_degs(:)
+ real(SP),allocatable :: Res(:)
  !... I/0
  integer              :: iq,io_err
  !
@@ -44,10 +52,16 @@ subroutine excitons_driver(k,Xk,en,Xen,q)
  !
  if(.not.l_flag) return
  !
- if(.not.l_interp) call section('*','Excitonic Properties @ Q-index #'//trim(intc(BSiq)))
- if(     l_interp) call section('*','Excitonic Properties')
+ exc_kind="exciton"
+ if (l_magnons) exc_kind="magnon "
  !
- iq = BSiq
+ if(.not.l_interp) then
+   call section('*',exc_kind//'ic Properties @ Q-index #'//trim(intc(BSiq)))
+   iq= BSiq
+ else if(l_interp) then
+   call section('*',exc_kind//'ic Properties')
+   iq = 1
+ endif
  !
  call                excitons_read(k,Xk,en,Xen,BSiq,"check",io_err)
  !
@@ -61,9 +75,13 @@ subroutine excitons_driver(k,Xk,en,Xen,q)
    return
  endif
  !
+ YAMBO_ALLOC(Res,(BSS_n_eig))
+ if (l_magnons)       Res(:)=real(BS_R_left_magn(:,1)*BS_R_right_magn(:,1),SP)
+ if (.not.l_magnons)  Res(:)=real(BS_R_left(:)*BS_R_right(:),SP)
+ !
  ! Sort energies and write to output
  !===================================
- if (l_sort) call excitons_sort_and_report(iq,BS_R_right,BS_E,BS_E_SOC_corr=BS_E_SOC_corr)
+ if (l_sort) call excitons_sort_and_report(iq,Res,BS_E,BS_E_SOC_corr=BS_E_SOC_corr)
  !
 #if defined _YPP_RT
  if (.not.l_RT_abs) then
@@ -72,11 +90,11 @@ subroutine excitons_driver(k,Xk,en,Xen,q)
    ! Sort energies to find degenerate states
    !========================================
    YAMBO_ALLOC(BS_E_degs,(BSS_n_eig))
-   call excitons_find_degeneracies(BS_E,BS_E_degs)
+   call excitons_find_degeneracies(BS_E,BS_E_degs,BSS_n_eig)
    !
    ! Define the USER set of excitonic states
    !=========================================
-   if(.not.l_sort) call get_user_excitonic_states( )
+   if(.not.l_sort) call excitons_get_user_states(BS_E_degs,Res,BSS_n_eig,.false.)
    !
 #if defined _YPP_RT
  endif
@@ -102,7 +120,7 @@ subroutine excitons_driver(k,Xk,en,Xen,q)
  ! 
  ! Exciton's spin
  !=====================
- if (l_spin)       call excitons_spin(Xk,BS_R_right,BS_E,BS_E_degs)
+ if (l_spin)       call excitons_spin(Xk,Res,BS_E,BS_E_degs)
  !
  ! Exciton's Amplitude
  !=====================
@@ -136,95 +154,4 @@ subroutine excitons_driver(k,Xk,en,Xen,q)
  !
  YAMBO_FREE(EXCITONS_user_indexes)
  !
- contains
-   !
-   subroutine get_user_excitonic_states( )
-     !
-     use pars,   ONLY:schlen
-     use stderr, ONLY:STRING_split,intc
-     use YPPm,   ONLY:state_ctl,EXCITONS_user_indexes,EXCITONS_n_user_states,EXCITON_E_treshold,EXCITON_Res_treshold
-     use com,    ONLY:msg
-     !
-     implicit none
-     !
-     integer          ::i_start,i_end,i_str,i_st,TMP_index(BSS_n_eig),n_user
-     real(SP)         ::Residuals(BSS_n_eig)
-     character(schlen)::str_piece(50)
-     !
-     call STRING_split(state_ctl,str_piece)
-     !
-     EXCITONS_n_user_states=0
-     n_user    =0
-     TMP_index =0
-     !
-     ! 1. Using the "States" variable
-     !--------------------------------
-     i_str     =1
-     do while (i_str<50)
-       !
-       if (len_trim(str_piece(i_str))==0) exit
-       !
-       if (trim(str_piece(i_str+1))=="-") then
-         read(str_piece(i_str  ),*) i_start
-         read(str_piece(i_str+2),*) i_end
-         i_str=i_str+3
-       else
-         read(str_piece(i_str),*) i_start
-         i_end=i_start
-         i_str=i_str+1
-       endif
-       !
-       do i_st=i_start,i_end
-         !
-         n_user=n_user+1
-         !
-         call ADD_me_to_the_list(i_st,TMP_index)
-         !
-       enddo
-     enddo
-     !
-     ! 2. Using the "En_treshold" variable
-     !------------------------------------
-     if (EXCITON_E_treshold>0.) then
-       do i_st=1,BSS_n_eig
-         if (real(BS_E(i_st))<EXCITON_E_treshold) call ADD_me_to_the_list(i_st,TMP_index)
-       enddo
-     endif
-     !
-     ! 3. Using the "Res_treshold" variable
-     !--------------------------------------
-     if (EXCITON_Res_treshold>0.) then
-       Residuals(:) = BS_R_right(:)*conjg(BS_R_right(:))
-       do i_st=1,BSS_n_eig
-         if (real(Residuals(i_st))>EXCITON_Res_treshold*maxval(Residuals)) call ADD_me_to_the_list(i_st,TMP_index)
-       enddo
-     endif
-     !
-     if (EXCITONS_n_user_states==0) return
-     !
-     if (EXCITONS_n_user_states>n_user) call warning(" "//trim(intc(EXCITONS_n_user_states-n_user))//&
-&                                                    " excitonic states added to the USER selection to preserve degeneracy")
-     call msg("s"," "//trim(intc(EXCITONS_n_user_states))//" excitonic states selected")
-     !
-     YAMBO_ALLOC(EXCITONS_user_indexes,(EXCITONS_n_user_states))
-     EXCITONS_user_indexes=TMP_index(:EXCITONS_n_user_states)
-     !
-   end subroutine
-   !
-   subroutine ADD_me_to_the_list(i_state,TMP_index)
-     !
-     use YPPm,   ONLY:EXCITONS_n_user_states
-     !
-     integer i_st_p,i_state,TMP_index(BSS_n_eig)
-     !
-     do i_st_p=1,BSS_n_eig
-       if (BS_E_degs(i_st_p)==BS_E_degs(i_state)) then
-         if (.not.any(TMP_index==i_st_p)) then
-           EXCITONS_n_user_states=EXCITONS_n_user_states+1
-           TMP_index(EXCITONS_n_user_states)=i_st_p
-         endif
-       endif
-     enddo
-   end subroutine
-   !
 end subroutine
diff --git a/ypp/excitons/excitons_find_degeneracies.F b/ypp/excitons/excitons_find_degeneracies.F
index 420343116c..7fd5e84e0c 100644
--- a/ypp/excitons/excitons_find_degeneracies.F
+++ b/ypp/excitons/excitons_find_degeneracies.F
@@ -5,24 +5,24 @@
 !
 ! Authors (see AUTHORS file for details): AM DV
 !
-subroutine excitons_find_degeneracies(BS_E,BS_E_degs)
+subroutine excitons_find_degeneracies(BS_E,BS_E_degs,n_eig)
  !
  use pars,          ONLY:SP
  use com,           ONLY:msg
  use vec_operate,   ONLY:sort
- use BS_solvers,    ONLY:BSS_n_eig
  use YPPm,          ONLY:deg_energy
  !
  implicit none
  !
- complex(SP)   :: BS_E(BSS_n_eig)
- integer       :: BS_E_degs(BSS_n_eig)
+ integer, intent(in) :: n_eig
+ complex(SP)   :: BS_E(n_eig)
+ integer       :: BS_E_degs(n_eig)
  !
  ! Work space
  !
  integer :: j1,j2
- integer :: S_indx(BSS_n_eig)
- real(SP):: v2sort(BSS_n_eig)
+ integer :: S_indx(n_eig)
+ real(SP):: v2sort(n_eig)
  !
  call msg('s',' Sorting energies')
  !==================================
@@ -32,13 +32,13 @@ subroutine excitons_find_degeneracies(BS_E,BS_E_degs)
  !
  BS_E_degs=0
  !
- do j1=1,BSS_n_eig
+ do j1=1,n_eig
    !
    if (BS_E_degs(S_indx(j1))>0) cycle
    !
    BS_E_degs(S_indx(j1))=S_indx(j1)
    !
-   do j2=j1+1,BSS_n_eig
+   do j2=j1+1,n_eig
      if ( abs( real(BS_E(S_indx(j1)))-real(BS_E(S_indx(j2))) )>deg_energy) exit
      BS_E_degs(S_indx(j2))=S_indx(j1)
    enddo
diff --git a/ypp/excitons/excitons_get_user_states.F b/ypp/excitons/excitons_get_user_states.F
new file mode 100644
index 0000000000..255bb223bd
--- /dev/null
+++ b/ypp/excitons/excitons_get_user_states.F
@@ -0,0 +1,129 @@
+!
+!        Copyright (C) 2000-2021 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): AM DV DS
+!
+! This file is distributed under the terms of the GNU
+! General Public License. You can redistribute it and/or
+! modify it under the terms of the GNU General Public
+! License as published by the Free Software Foundation;
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will
+! be useful, but WITHOUT ANY WARRANTY; without even the
+! implied warranty of MERCHANTABILITY or FITNESS FOR A
+! PARTICULAR PURPOSE.  See the GNU General Public License
+! for more details.
+!
+! You should have received a copy of the GNU General Public
+! License along with this program; if not, write to the Free
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston,
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+subroutine excitons_get_user_states(BS_E_degs,Res,n_eig,recompute)
+ !
+ use pars,          ONLY:SP,schlen
+ use com,           ONLY:msg
+ use stderr,        ONLY:STRING_split,intc
+ use YPPm,          ONLY:EXCITONS_user_indexes,EXCITONS_n_user_states,EXCITON_E_treshold,&
+                         EXCITON_Res_treshold,MAGNON_Res_threshold,state_ctl,BS_E,exc_kind
+ !
+ implicit none
+ !
+ integer, intent(in)    :: n_eig
+ integer, intent(inout) :: BS_E_degs(n_eig)
+ real(SP),intent(in)    :: Res(n_eig)
+ logical, intent(in)    :: recompute
+ !
+ integer          ::i_start,i_end,i_str,i_st,TMP_index(n_eig),n_user
+ character(schlen)::str_piece(50)
+ !
+ ! Sort energies to find degenerate states
+ !========================================
+ if(recompute) call excitons_find_degeneracies(BS_E,BS_E_degs,n_eig)
+ !
+ call STRING_split(state_ctl,str_piece)
+ !
+ EXCITONS_n_user_states=0
+ n_user    =0
+ TMP_index =0
+ !
+ ! 1. Using the "States" variable
+ !--------------------------------
+ i_str     =1
+ do while (i_str<50)
+   !
+   if (len_trim(str_piece(i_str))==0) exit
+   !
+   if (trim(str_piece(i_str+1))=="-") then
+     read(str_piece(i_str  ),*) i_start
+     read(str_piece(i_str+2),*) i_end
+     i_str=i_str+3
+   else
+     read(str_piece(i_str),*) i_start
+     i_end=i_start
+     i_str=i_str+1
+   endif
+   !
+   do i_st=i_start,i_end
+     !
+     n_user=n_user+1
+     !
+     call ADD_me_to_the_list(i_st,TMP_index)
+     !
+   enddo
+ enddo
+ !
+ if (EXCITONS_n_user_states>n_user) &
+ & call warning(" "//trim(intc(EXCITONS_n_user_states-n_user))//&
+ & ""//exc_kind//"ic states added to the USER selection to preserve degeneracy")
+ !
+ ! 2. Using the "En_treshold" variable
+ !------------------------------------
+ if (EXCITON_E_treshold>0.) then
+   do i_st=1,n_eig
+     if (real(BS_E(i_st))<EXCITON_E_treshold) call ADD_me_to_the_list(i_st,TMP_index)
+   enddo
+ endif
+ !
+ ! 3. Using the "Res_treshold" variable
+ !--------------------------------------
+ if (EXCITON_Res_treshold>0.) then
+   do i_st=1,n_eig
+     if (real(Res(i_st))>EXCITON_Res_treshold*maxval(Res)) call ADD_me_to_the_list(i_st,TMP_index)
+   enddo
+ endif
+ !
+ if (MAGNON_Res_threshold>0.) then
+   do i_st=1,n_eig
+     if (real(Res(i_st))>MAGNON_Res_threshold*maxval(Res)) call ADD_me_to_the_list(i_st,TMP_index)
+   enddo
+ endif
+ !
+ if (EXCITONS_n_user_states==0) return
+ call msg("s"," "//trim(intc(EXCITONS_n_user_states))//" "//exc_kind//"ic states selected")
+ !
+ if(allocated(EXCITONS_user_indexes)) deallocate(EXCITONS_user_indexes)
+ allocate(EXCITONS_user_indexes(EXCITONS_n_user_states))
+ EXCITONS_user_indexes=TMP_index(:EXCITONS_n_user_states)
+ !
+ contains
+   !
+   subroutine ADD_me_to_the_list(i_state,TMP_index)
+     !
+     use YPPm,   ONLY:EXCITONS_n_user_states
+     !
+     integer i_st_p,i_state,TMP_index(n_eig)
+     !
+     do i_st_p=1,n_eig
+       if (BS_E_degs(i_st_p)==BS_E_degs(i_state)) then
+         if (.not.any(TMP_index==i_st_p)) then
+           EXCITONS_n_user_states=EXCITONS_n_user_states+1
+           TMP_index(EXCITONS_n_user_states)=i_st_p
+         endif
+       endif
+     enddo
+   end subroutine ADD_me_to_the_list
+   !
+end subroutine
diff --git a/ypp/excitons/excitons_interpolate_setup.F b/ypp/excitons/excitons_interpolate_setup.F
index c71c4a10c0..7a4b73d072 100644
--- a/ypp/excitons/excitons_interpolate_setup.F
+++ b/ypp/excitons/excitons_interpolate_setup.F
@@ -5,54 +5,119 @@
 !
 ! Authors (see AUTHORS file for details): CA
 !
-subroutine excitons_interpolate_setup(k,Xk,en,Xen,q,BS_all_E,ID_INTERP_EXC,report)
+! headers
+!
+#include<y_memory.h> 
+!
+subroutine excitons_interpolate_setup(k,Xk,en,Xen,q,BS_all_E,ID_INTERP_EXC,ID_INTERP_OCC,report)
  !
  ! This subroutine read and interpolate exciton 
  !
- use pars,          ONLY:SP,schlen
+ use pars,          ONLY:SP,schlen,rZERO
  use units,         ONLY:HA2EV
- use R_lattice,     ONLY:bz_samp
- use electrons,     ONLY:levels
+ use R_lattice,     ONLY:bz_samp,coll_scattering,qindx_C
+ use electrons,     ONLY:levels,nel_cond,E_reset
  use interpolate,   ONLY:INTERPOLATION_BZ,INTERP_obj,INTERPOLATION_coefficients
- use YPPm,          ONLY:BS_E,EXCITONS_user_indexes,EXCITONS_n_user_states,coo_out,K_transform
+ use YPPm,          ONLY:BS_E,EXCITONS_user_indexes,EXCITONS_n_user_states,coo_out,K_transform,&
+ &                       l_exc_occ_interp,EXCITONS_n_user_states,&
+ &                       l_magnons,BS_R_left,BS_R_right,BS_R_left_magn,BS_R_right_magn,exc_kind
+ use BS_solvers,    ONLY:BSS_n_eig
  use com,           ONLY:msg
+ use IO_int,              ONLY:io_control
+ use IO_m,                ONLY:OP_APP_CL,REP,OP_RD_CL
  use stderr,        ONLY:intc
+ use y_memory_alloc
  !
-#include<y_memory.h> 
+ implicit none
  !
  type(bz_samp) ::Xk,k,q
  type(levels)  ::Xen,en
  real(SP),intent(out)  :: BS_all_E(EXCITONS_n_user_states,q%nibz)
  logical, intent(in) :: report
- integer, intent(inout) :: ID_INTERP_EXC
+ integer, intent(inout) :: ID_INTERP_EXC,ID_INTERP_OCC
  !
  ! Work space
  !
- integer :: iq,i_c,i_l,io_err
+ type(levels)  ::E_QF
+ integer :: iq,i_c,i_l,io_err,ID,EXC_n_states_ref,BSS_n_eig_ref,n_eig
+ real(SP), allocatable    :: EXC_occ(:,:)
  real(SP), allocatable    :: values(:)
- real(SP) :: tmp_q(3)
+ real(SP) :: Res(BSS_n_eig),BS_E_degs(BSS_n_eig)
+ real(SP) :: tmp_q(3),E_fermi_h,E_fermi_e
  character(schlen), allocatable :: headings(:)
+ integer, external  ::io_QINDX
  !
- ! Allocate arrays for all eigenvalues/eigenvectors
+ EXC_n_states_ref=EXCITONS_n_user_states
+ BSS_n_eig_ref=BSS_n_eig
  ! 
- call section('*','Excitons Interpolation')
+ do i_c=1,EXC_n_states_ref
+   i_l=EXCITONS_user_indexes(i_c)
+   BS_all_E(i_c,1)=BS_E(i_l)
+ enddo
+ !
+ call section('*',exc_kind//'s Interpolation')
+ !
+ if(nel_cond/=rZERO) then
+   call msg('rs','Excitonic quasi-fermi distribution interpolation')
+   l_exc_occ_interp=.TRUE.
+   call E_reset(E_QF)
+   call OCCUPATIONS_Quasi_Fermi(Xen,Xk,E_QF,E_fermi_h,E_fermi_e)
+   if(.not.allocated(q%k_table))  call k_build_up_BZ_tables(q)
+   if(.not.allocated(qindx_C)) then
+     if(coll_scattering.eqv..FALSE.) call error('Please run again the setup with: K_grids= "X B C S" ')
+     call io_control(ACTION=OP_RD_CL,COM=REP,SEC=(/6/),ID=ID)
+     io_err=io_QINDX(k,q,ID,'minus_q')
+     if (io_err/=0) then
+       call msg('s',"Missing k/q scattering database")
+       call error('Please (re)run the setup K_grids="C"')
+     endif
+   endif
+#if defined _YPP_ELPH
+   YAMBO_ALLOC(EXC_occ,(EXCITONS_n_user_states,q%nibz))
+#else
+   call error("Excitonic occupations only available in ypp_ph")
+#endif
+ endif
  !
  call excitons_read(k,Xk,en,Xen,1,"clean",io_err)
+ if(io_err/=0) call error("Error reading exciton at q = "//intc(1))
  !
  ! Read all eigenvalues and eigenvectors
  !
- do iq=1,q%nibz
-   call msg('s','Reading excitons at @ Q-index #',iq)
+ n_eig=BSS_n_eig
+ do iq=2,q%nibz
+   call msg('s','Reading '//exc_kind//'s at @ Q-index #',iq)
    call excitons_read(k,Xk,en,Xen,iq,"check",io_err)
-   call excitons_read(k,Xk,en,Xen,iq,"eigenvalues",io_err)
-   do i_c=1,EXCITONS_n_user_states
+   if(io_err/=0) call error("Error reading exciton at q = "//intc(iq))
+   !
+   if(l_exc_occ_interp) then
+     call excitons_read(k,Xk,en,Xen,iq,"eigenvectors eigenvalues",io_err)
+#if defined _YPP_ELPH
+     call EXC_occupations(E_QF,Xk,q,iq,EXC_n_states_ref,EXCITONS_user_indexes,EXC_occ(:,iq))
+#endif
+   else
+     call excitons_read(k,Xk,en,Xen,iq,"eigenvalues",io_err)
+   endif
+   !
+   n_eig=min(n_eig,BSS_n_eig)
+   if (     l_magnons)  Res(:n_eig)=real(BS_R_left_magn(:n_eig,1)*BS_R_right_magn(:n_eig,1),SP)
+   if (.not.l_magnons)  Res(:n_eig)=real(BS_R_left(:n_eig)*BS_R_right(:n_eig),SP)
+   call excitons_get_user_states(BS_E_degs(:n_eig),Res(:n_eig),n_eig,.true.)
+   if(EXC_n_states_ref>size(EXCITONS_user_indexes)) then
+     call error("Degenerate excitons please increase the State range to : 1-"//intc(EXC_n_states_ref))
+   endif
+   do i_c=1,EXC_n_states_ref
      i_l=EXCITONS_user_indexes(i_c)
      BS_all_E(i_c,iq)=BS_E(i_l)
    enddo
+   !
    call excitons_read(k,Xk,en,Xen,iq,"clean",io_err)
    !
  enddo
  !
+ EXCITONS_n_user_states=EXC_n_states_ref
+ BSS_n_eig=BSS_n_eig_ref
+ !
  if(report.eqv..true.) then
    !
    ! Write in the report the exciton energies
@@ -60,7 +125,7 @@ subroutine excitons_interpolate_setup(k,Xk,en,Xen,q,BS_all_E,ID_INTERP_EXC,repor
    YAMBO_ALLOC(values,(EXCITONS_n_user_states+4))
    allocate(headings(EXCITONS_n_user_states+4))
    !
-   call msg('nr','Excitons Energies:')
+   call msg('nr',exc_kind//'s Energies:')
    headings(1:3)=(/"q_x ("//trim(coo_out)//")","q_y ("//trim(coo_out)//")","q_z ("//trim(coo_out)//")"/)
    !
    do i_c=1,EXCITONS_n_user_states
@@ -87,7 +152,12 @@ subroutine excitons_interpolate_setup(k,Xk,en,Xen,q,BS_all_E,ID_INTERP_EXC,repor
  ! Fourier interpolation of exciton dispersion
  !
  call INTERPOLATION_BZ_setup(q)
- INTERP_obj(ID_INTERP_EXC)%what="excitons"
+ INTERP_obj(ID_INTERP_EXC)%what=exc_kind//"s"
  call INTERPOLATION_coefficients(R1D=BS_all_E,k=q,NK=q%nibz,ID=ID_INTERP_EXC,ID_obj=ID_INTERP_EXC)
  !
+ if(l_exc_occ_interp) then
+   INTERP_obj(ID_INTERP_OCC)%what="occupations"
+   call INTERPOLATION_coefficients(R1D=EXC_occ,k=q,NK=q%nibz,ID=ID_INTERP_OCC,ID_obj=ID_INTERP_OCC)
+ endif
+ !
 end subroutine
diff --git a/ypp/excitons/excitons_ph_ass_dos.F b/ypp/excitons/excitons_ph_ass_dos.F
index 6727f50614..684ca93362 100644
--- a/ypp/excitons/excitons_ph_ass_dos.F
+++ b/ypp/excitons/excitons_ph_ass_dos.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): CA
 !
+! headers
+!
+#include<y_memory.h> 
+!
 subroutine excitons_ph_ass_dos(k,Xk,en,Xen,q_exc)
  !
  use pars,          ONLY:SP,pi,rZERO,schlen
@@ -24,21 +28,23 @@ subroutine excitons_ph_ass_dos(k,Xk,en,Xen,q_exc)
  use ELPH,          ONLY:FAN_deltaE_treshold,elph_branches
  use functions,     ONLY:Lorentzian_func,boltzman_f,bose_f
  use stderr,        ONLY:STRING_same,intc
+ use y_memory_alloc
  !
-#include<y_memory.h> 
+ implicit none
  !
  type(bz_samp) ::Xk,k,q_exc
  type(levels)  ::Xen,en
  !
  logical, external     :: file_exists
  integer               :: i_q,i_c,i_E,ph_modes,i_idx,i_l
- integer               :: ID_INTERP_EXC,n_full_q
+ integer               :: ID_INTERP_EXC,n_full_q,ID_INTERP_OCC
  type(bz_samp)         :: q_matdyn
  real(SP), allocatable :: ph_freqs(:,:)
  integer, allocatable  :: iq_indx(:)
  real(SP), allocatable :: BSE_interp_E(:,:)
  real(SP)              :: dos_E(DOS_E_steps),exc_ph_DOS(DOS_E_steps),exc_OCC,Boltz_F_exc,Bose_F_ph,min_E 
  real(SP)              :: occ_threshold=1E-5
+ real(SP)              :: BS_all_E(EXCITONS_n_user_states,q_exc%nibz)
  type(PP_indexes)      :: px
  complex(SP), allocatable  :: ph_pol(:,:,:,:)
  !
@@ -88,7 +94,8 @@ subroutine excitons_ph_ass_dos(k,Xk,en,Xen,q_exc)
  ! Read and interpolate excitons dispersion
  !
  ID_INTERP_EXC=1
- call excitons_interpolate_setup(k,Xk,en,Xen,q_exc,ID_INTERP_EXC,.TRUE.)
+ ID_INTERP_OCC=2
+ call excitons_interpolate_setup(k,Xk,en,Xen,q_exc,BS_all_E,ID_INTERP_EXC,ID_INTERP_OCC,.TRUE.)
  YAMBO_ALLOC(BSE_interp_E,(EXCITONS_n_user_states,q_matdyn%nibz))
  call INTERPOLATION_BZ(K=q_matdyn,NK=q_matdyn%nibz,R1D=BSE_interp_E,ID=ID_INTERP_EXC)
  !
diff --git a/ypp/excitons/excitons_read.F b/ypp/excitons/excitons_read.F
index ae57938011..82eb5447dd 100644
--- a/ypp/excitons/excitons_read.F
+++ b/ypp/excitons/excitons_read.F
@@ -5,9 +5,13 @@
 !
 ! Authors (see AUTHORS file for details): AM DV DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine excitons_read(k,Xk,en,Xen,iq,what,io_err)
  !
- use pars,          ONLY:pi
+ use pars,          ONLY:SP,pi
  use R_lattice,     ONLY:bz_samp
  use stderr,        ONLY:intc
  use X_m,           ONLY:X_reset
@@ -17,9 +21,13 @@ subroutine excitons_read(k,Xk,en,Xen,iq,what,io_err)
  use IO_m,          ONLY:OP_RD_CL,DUMP,NONE,REP,VERIFY
  use BS_solvers,    ONLY:BSS_eh_table,BSS_write_eig_2_db,io_BSS_diago,BS_mat,BSS_n_eig
  use BS,            ONLY:BS_H_dim,BS_K_dim,BS_bands,BS_K_coupling,BSqpts
- use YPPm,          ONLY:BS_R_left,BS_R_right,BS_E,BS_E_SOC_corr,BS_R_PL,BS_R_kerr,Xbsk   
+ use YPPm,          ONLY:BS_R_left,BS_R_right,BS_R_left_kerr,BS_R_right_kerr,&
+ &                       BS_R_left_magn,BS_R_right_magn,BS_R_right_dich,&
+ &                       BS_E,BS_E_SOC_corr,BS_R_PL,Xbsk,MAGNON_Res_threshold,&
+ &                       l_magnons
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp) ::Xk,k
  type(levels)  ::Xen,en
@@ -43,6 +51,8 @@ subroutine excitons_read(k,Xk,en,Xen,iq,what,io_err)
  l_eigenvec=index(what,"eigenvectors") >0
  l_clean   =index(what,"clean")        >0
  !
+ io_err=0
+ !
  if(l_check) then
    !
    ! E SOC perturbative
@@ -84,13 +94,19 @@ subroutine excitons_read(k,Xk,en,Xen,iq,what,io_err)
    YAMBO_ALLOC_P(BS_R_right,(BSS_n_eig))
    YAMBO_ALLOC_P(BS_R_left,(BSS_n_eig))
    YAMBO_ALLOC(BS_E,(BSS_n_eig))
+   if (l_magnons) then
+     YAMBO_ALLOC_P(BS_R_right_magn,(BSS_n_eig,2))
+     YAMBO_ALLOC_P(BS_R_left_magn,(BSS_n_eig,2))
+   endif
    if(en%nb_SOC/=0) then
      YAMBO_ALLOC_P(BS_E_SOC_corr,(2/n_sp_pol,BSS_n_eig))
    endif
    !
    do i_sec=1,2
      call io_control(ACTION=OP_RD_CL,COM=REP,MODE=VERIFY,SEC=(/i_sec/),ID=io_ID)
-     io_err=io_BSS_diago(iq,1,io_ID,Xbsk,BS_E,BS_R_left,BS_R_right,BsE_corr=BS_E_SOC_corr,bsR_kerr=BS_R_kerr,bsR_pl=BS_R_PL)
+     io_err=io_BSS_diago(iq,1,io_ID,Xbsk,bsE=BS_E,bsRl=BS_R_left,bsRr=BS_R_right,BsE_corr=BS_E_SOC_corr,&
+     &       bsL_kerr=BS_R_left_kerr,bsR_kerr=BS_R_right_kerr,bsR_dich=BS_R_right_dich,   &
+             bsL_magn=BS_R_left_magn,bsR_magn=BS_R_right_magn,bsR_pl=BS_R_PL)
      if (io_err/=0) then
        if(i_sec==1) then
          call warning('Error reading SEC1 in ndb.BSS_diago, this can be due to the missing ndb.dipoles or ndb.ems1')
@@ -114,12 +130,11 @@ subroutine excitons_read(k,Xk,en,Xen,iq,what,io_err)
    !
    ! Loading tables and eigenvectors
    !
-   YAMBO_ALLOC(BSS_eh_table,(BS_H_dim,3+n_sp_pol-1))
+   YAMBO_ALLOC(BSS_eh_table,(BS_H_dim,5))
    YAMBO_ALLOC(BS_mat,(BS_H_dim,BSS_n_eig))
    !
    call io_control(ACTION=OP_RD_CL,COM=REP,MODE=VERIFY,SEC=(/3/),ID=io_ID)
-   io_err=io_BSS_diago(iq,1,io_ID,Xbsk,BS_E,BS_R_left,BS_R_right,&
-   &                   BsE_corr=BS_E_SOC_corr,bsR_kerr=BS_R_kerr,bsR_pl=BS_R_PL,BS_mat=BS_mat)
+   io_err=io_BSS_diago(iq,1,io_ID,Xbsk,BS_mat=BS_mat)
    if (io_err/=0) call error(' ndb.BSS_diago, SEC 3, not compatible with ypp input')
    !
    ! Create the anti-resonant part of the eh_table
@@ -145,6 +160,8 @@ subroutine excitons_read(k,Xk,en,Xen,iq,what,io_err)
    !
    YAMBO_FREE_P(BS_R_left)
    YAMBO_FREE_P(BS_R_right)
+   YAMBO_FREE_P(BS_R_left_magn)
+   YAMBO_FREE_P(BS_R_right_magn)
    YAMBO_FREE(BS_E)
    YAMBO_FREE(BSS_eh_table)
    YAMBO_FREE(BS_mat)
@@ -153,6 +170,7 @@ subroutine excitons_read(k,Xk,en,Xen,iq,what,io_err)
      YAMBO_FREE_P(BS_E_SOC_corr)
    endif
    call X_reset(Xbsk)
+   io_err=0
    !
  endif
  !
diff --git a/ypp/excitons/excitons_sort_and_report.F b/ypp/excitons/excitons_sort_and_report.F
index 536923642d..ef3ab5f248 100644
--- a/ypp/excitons/excitons_sort_and_report.F
+++ b/ypp/excitons/excitons_sort_and_report.F
@@ -3,9 +3,9 @@
 !
 ! Copyright (C) 2015 The Yambo Team
 !
-! Authors (see AUTHORS file for details): AM DV
+! Authors (see AUTHORS file for details): AM DV DS
 !
-subroutine excitons_sort_and_report(iq,BS_R,BS_E,BS_E_SOC_corr,EXC_spin)
+subroutine excitons_sort_and_report(iq,Res,BS_E,BS_E_SOC_corr,EXC_spin)
  !
  use pars,          ONLY:SP,schlen,pi,zero_dfl
  use units,         ONLY:HA2EV
@@ -23,7 +23,8 @@ subroutine excitons_sort_and_report(iq,BS_R,BS_E,BS_E_SOC_corr,EXC_spin)
  implicit none 
  !
  integer                      :: iq
- complex(SP)                  :: BS_R(BSS_n_eig),BS_E(BSS_n_eig)
+ real(SP)                     :: Res(BSS_n_eig)
+ complex(SP)                  :: BS_E(BSS_n_eig)
  real(SP), optional, pointer  :: BS_E_SOC_corr(:,:)
  type(EXCITON_spin),optional  :: EXC_spin
  !
@@ -107,8 +108,8 @@ subroutine excitons_sort_and_report(iq,BS_R,BS_E,BS_E_SOC_corr,EXC_spin)
      titles(n_elements  )='dE(2)[meV]'
    endif
    !
-   Residuals(:) = real(BS_R(:)*conjg(BS_R(:)),SP)
-   if (write_widths)  Residuals(:) = abs(BS_R(:))
+   Residuals(:) = Res(:)
+   if (write_widths)  Residuals(:) = sqrt(Res(:))
    if(iq==1) q_norm=q0_def_norm**2
    if(iq> 1) q_norm=iku_v_norm(BSqpts(:,iq))**2
    Residuals(:) = Residuals(:)*real(spin_occ,SP)/(2._SP*pi)**3*d3k_factor*4._SP*pi/q_norm*HA2EV
@@ -174,7 +175,7 @@ subroutine excitons_sort_and_report(iq,BS_R,BS_E,BS_E_SOC_corr,EXC_spin)
        enddo
      endif
      if (write_soc_corr) then
-       if (.not.any((/EXCITONS_user_indexes==j2/))) cycle
+       !if (.not.any((/EXCITONS_user_indexes==j2/))) cycle
        n_elements=n_elements+2
        rv(n_elements-1:n_elements)=(/BS_E_SOC_corr(1,j2),BS_E_SOC_corr(2,j2)/)*HA2EV*1000._SP
      endif
diff --git a/ypp/excitons/excitons_spin.F b/ypp/excitons/excitons_spin.F
index 853e1b0f79..af616d10ed 100644
--- a/ypp/excitons/excitons_spin.F
+++ b/ypp/excitons/excitons_spin.F
@@ -5,7 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
-subroutine excitons_spin(Xk,BS_R_right,BS_E,BS_E_degs)
+! headers
+!
+#include<y_memory.h>
+!
+subroutine excitons_spin(Xk,Res,BS_E,BS_E_degs)
  !
  use pars,          ONLY:SP,cZERO,rZERO
  use BS,            ONLY:BS_bands
@@ -22,12 +26,14 @@ subroutine excitons_spin(Xk,BS_R_right,BS_E,BS_E_degs)
 &                        EXCITON_kin,excitons_degeneracy_average,EXCITON_spin,BSiq
  use parallel_m,    ONLY:PP_indexes,myid,PP_indexes_reset
  use parallel_int,  ONLY:PP_wait,PP_redux_wait,PARALLEL_index
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp) :: Xk
  integer       :: BS_E_degs(BSS_n_eig)
- complex(SP)   :: BS_R_right(BSS_n_eig),BS_E(BSS_n_eig)
+ real(SP)      :: Res(BSS_n_eig)
+ complex(SP)   :: BS_E(BSS_n_eig)
  ! 
  ! Work Space
  !
@@ -158,7 +164,7 @@ subroutine excitons_spin(Xk,BS_R_right,BS_E,BS_E_degs)
  ! Sorting & reporting'
  !=====================
  !
- call excitons_sort_and_report(BSiq,BS_R_right,BS_E,EXC_spin=EXC_spin)
+ call excitons_sort_and_report(BSiq,Res,BS_E,EXC_spin=EXC_spin)
  !
  do i_c=1,EXCITONS_n_user_states
    !
diff --git a/ypp/interface/INIT_load_ypp.F b/ypp/interface/INIT_load_ypp.F
index 79a84341f8..4ca306caf7 100644
--- a/ypp/interface/INIT_load_ypp.F
+++ b/ypp/interface/INIT_load_ypp.F
@@ -12,27 +12,28 @@ subroutine INIT_ypp_load(defs)
 &                        Time_unit,Bfield_unit,V_real_time,T_unit,V_nl_optics,V_io,V_sc
  use wave_func,     ONLY:wf_ng
  use LIVE_t,        ONLY:nhash
- use D_lattice,     ONLY:Tel,Bose_Temp
+ use D_lattice,     ONLY:Tel,Bose_Temp,Boltz_Temp
  use R_lattice,     ONLY:BZ_FineGd_mode
- use electrons,     ONLY:n_spinor
+ use electrons,     ONLY:n_spinor,nel_cond
  use interpolate,   ONLY:INTERP_shell_factor,INTERP_mode,NN_n_of_nearest
  use YPPm,          ONLY:E_field,alat_used_for_output,B_field,B_psi,B_theta,BANDS_bands,BANDS_path,&
 &                        BZ_FineGd_nkpts_to_use,BZ_random_nkpts,CIRCUIT_E_DB_path,coo_in,coo_out,current_dir,&
 &                        deg_energy,BANDS_steps,DOS_bands,DOS_broadening,PROJECT_path,QP_DB_to_expand,&
 &                        DOS_E_range,DOS_E_steps,EXCITON_E_treshold,EXCITON_Res_treshold,EXCITON_weight_treshold,&
-&                        INTERP_grid,mag_dir,N_path_pts,ncell,p_dir,p_format,PDOS_atoms,&
-&                        PDOS_j,PDOS_wfcs,PDOS_kinds,PDOS_l,PDOS_m,perturbative_SOC_path,PtsPath,r_hole,seed_name,&
+&                        INTERP_grid,mag_dir,N_path_pts,ncell,p_dir,p_format,PDOS_atoms,elec_spin,hole_spin,EXC_min,&
+&                        PDOS_j,PDOS_wfcs,PDOS_kinds,PDOS_l,PDOS_m,perturbative_SOC_path,PtsPath,r_fixed,seed_name,&
 &                        SOC_bands_buffer,SOC_bands_to_map,SOC_split,state_ctl,AVE_exc,BSiq_via_command_line,&
 &                        WF_multiplier,WF_ref,what_to_write,BSiq,PROJECT_mode,PROJECT_N_perp,PROJECT_steps,PROJECT_threshold,&
-&                        DIPs_direction,DIPs_E_range,DIPs_C_bands,DIPs_V_bands,DIPs_kind
+&                        DIPs_direction,DIPs_E_range,DIPs_C_bands,DIPs_V_bands,DIPs_kind,center_kind,MAGNON_Res_threshold
  use YPP_symm,      ONLY:wf_ng_cut
 #if defined _YPP_RT
  use YPPm,          ONLY:Nel_fac
- use YPP_real_time, ONLY:RT_conf,Eh_pumped_Nel,Eh_pumped_pair_deltaE,Rho_deph,&
+ use YPP_real_time, ONLY:RT_conf,Eh_pumped_Nel,Eh_pumped_pair_deltaE,Rho_deph,ypp_chirp,&
 &                        Eh_pumped_pair_width,Probe_Keyword,Pump_Keyword,X_order,X_kind,Eh_pumped_pair_energy,&
 &                        Eh_pumped_pair_BZ_width,Eh_mu,Eh_temp,h_mu_autotune_thr,TRabsWHAT,TRabsDIP_plane,&
 &                        TRabsMODE,TRabsDIP_dir,TRabs_Eeh_treshold,Pump_path,RT_pol_mode
  use real_time,     ONLY:RT_bands
+ use fields,        ONLY:n_ext_fields
 #endif
 #if defined _YPP_SC
  use YPPm,          ONLY:V_value
@@ -41,7 +42,8 @@ subroutine INIT_ypp_load(defs)
 #if defined _YPP_ELPH
  use YPP_ELPH,      ONLY:elph_dbs_path,elph_Ef,elph_gamma_broad,elph_residual_treshold,elph_steps,&
 &                        ph_broad,ph_freqs_file,ph_modes_file
- use ELPH,          ONLY:EkplusQ_mode,elph_branches
+ use ELPH,          ONLY:EkplusQ_mode,elph_bands,elph_branches
+ use EXCPH,         ONLY:alphaQ
 #endif
  !
  implicit none
@@ -75,6 +77,7 @@ subroutine INIT_ypp_load(defs)
 #if defined _YPP_ELPH
  call it('r',defs,'eliashberg', '[R] Eliashberg')
  call it('r',defs,'gkkp_db',    '[R] GKKP database')
+ call it('r',defs,'gkkp_sngl',  '[R] GKKP single q-database')
  call it('r',defs,'gkkp_dg',    '[R] GKKP double grid')
  call it('r',defs,'gkkp_plot',  '[R] GKKP plot')
  call it('r',defs,'ph_ass_dos', '[R] Phonon-assisted DOS')
@@ -103,6 +106,7 @@ subroutine INIT_ypp_load(defs)
  call it('r',defs,'RToccupations','[R] Analize time-dependent occupations')
  call it('r',defs,'RTlifetimes',  '[R] Analize time-dependent lifetimes')
  call it('r',defs,'RTpol',        '[R] Analize time-dependent polarization')
+ call it('r',defs,'RTfields',     '[R] Analize time-dependent fields')
  call it('r',defs,'RTdeltaRho',   '[R] Analize the density')
  call it('r',defs,'RTGtwotimes',  '[R] Construct G</G>/G(r)/G(a)/A(t,tp) from rho within GKBA')
  !
@@ -130,6 +134,15 @@ subroutine INIT_ypp_load(defs)
  !
  call it(defs,'ElecTemp','Electronic Temperature',Tel,T_unit,verb_level=V_general)
  call it(defs,'BoseTemp','Bosonic Temperature',Bose_Temp,T_unit,verb_level=V_general)
+ !
+ ! Electrons/Exciton Occupations
+ ! 
+ call it(defs,'NelCond' ,'Number of electrons in conduction (Quasi-Fermi distribution)',nel_cond,verb_level=V_general)
+ call it(defs,'ExcMin' ,'Minimum excitonic energy',EXC_min,unit=E_unit,verb_level=V_general)
+ call it(defs,'EXCTemp','Excitonic Temperature (for luminescence spectra)',Boltz_Temp,T_unit,verb_level=V_general)
+#if defined _YPP_ELPH
+ call it(defs,'AlphaQ',"Distortion of excitonic dispersion E'(Q)=E(Q)+\alpha*|Q| ",alphaQ,verb_level=V_general)
+#endif
  !
  ! DOS
  !
@@ -244,6 +257,8 @@ subroutine INIT_ypp_load(defs)
  !
  call it(defs,'Res_treshold', 'Select states above this optical strength treshold (max normalized to 1.)',&
 &              EXCITON_Res_treshold,verb_level=V_general)
+ call it(defs,'MRes_threshold', 'Select states above this X+- strength treshold (max normalized to 1.)',&
+&              MAGNON_Res_threshold,verb_level=V_general)
  call it(defs,'En_treshold', 'Select states below this energy treshold',&
 &              EXCITON_E_treshold,verb_level=V_general,unit=E_unit)
  call it(defs,'Weight_treshold', 'Print transitions above this weight treshold (max normalized to 1.)',&
@@ -251,7 +266,11 @@ subroutine INIT_ypp_load(defs)
  call it(defs,'States','Index of the BS state(s)',state_ctl)
  call it(defs,'EHdensity', 'Calculate (h)ole/(e)lectron density from BSE wave-function',AVE_exc,case="a")
  call it(defs,'Cells', 'Number of cell repetitions in each direction (odd or 1)',ncell)
- call it(defs,'Hole', '[cc] Hole position in unit cell (positive)',r_hole)
+ call it(defs,'Hole', '[cc] Hole/Elec position in unit cell (positive)',r_fixed)
+ !call it(defs,'PlotCenter', '[cc] Hole/Elec position in unit cell (positive)',r_fixed)
+ call it(defs,'CenterKind', 'Hole or Electron center is used ',center_kind)
+ call it(defs,'HoleSpin', '[cc] The spin of the hole can be "up" "dn" "average" "diag"',hole_spin)
+ call it(defs,'ElecSpin', '[cc] The spin of the elec can be "up" "dn" "average" "diag"',elec_spin)
  call it('f',defs,'PrtDOS', 'Print Exciton Density of States') 
  !
  ! WFs
@@ -296,9 +315,12 @@ subroutine INIT_ypp_load(defs)
  call it(defs,'PhBroad','Phonon broadening (Eliashberg & DOS)',ph_broad,E_unit)
  call it(defs,'EE_Fermi','External Fermi Level (Eliashberg)',elph_Ef,E_unit)
  call it(defs,'PhStps', 'Energy steps',elph_steps)
+ call it(defs,'GkkpBands', 'Bands to be used',elph_bands)
  call it('f',defs,'TestPHDGrid','Test double-grid: set all values of the fine grid equal to the couse ones')
  call it('f',defs,'GkkpReadBare','Read the bare gkkp')
  call it('f',defs,'GkkpExpand','Expand the gkkp in the whole BZ')
+ call it('f',defs,'GkkpExpOnlyK','Expand only k-points and not the q-points (that should be already in BZ)')
+ call it('f',defs,'GkkpSkipDW','Do not include the DW factor in the databases')
  call it('f',defs,'UseQindxB','Use qindx_B to expand gkkp (for testing purposes)')
  !
 #endif
@@ -364,7 +386,10 @@ subroutine INIT_ypp_load(defs)
  call it('f',defs,'SkipOBS_IO','Do not dump on file the RT observables (P(t),J(t),D(t)...)')
  call it('f',defs,'RmAsymLim','Remove Asymptotic limit from P or J',verb_level=V_real_time) 
  !
- call it('f',defs,'STD_style','Print occupations using the standard style',verb_level=V_real_time) 
+ call it('f',defs,'STD_style','Print occupations using the standard style',verb_level=V_real_time)
+ ! 
+ call it(defs,'ChirpFac','Prefactor for linear chirping in frequency space',ypp_chirp,unit=Time_unit(1))
+ call Afield(defs,1)
  !
 #endif
  !
diff --git a/ypp/interface/INIT_ypp.F b/ypp/interface/INIT_ypp.F
index 726a2f42f2..78b1e8c4d7 100644
--- a/ypp/interface/INIT_ypp.F
+++ b/ypp/interface/INIT_ypp.F
@@ -5,11 +5,15 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine INIT_ypp(E,instr,FINALIZE)
  !
  use units,          ONLY:FS2AUT
  use YPPm,           ONLY:l_plot,l_SOC_map,l_map_kpts,l_mag,l_k_grid,l_high_sym_pts,BSiq,&
-&                         l_free_hole,l_fix_syms,l_excitons,l_dipoles,l_exc_wf,DIPs_kind,&
+&                         l_free_hole,l_fix_syms,l_excitons,l_magnons,l_dipoles,l_exc_wf,DIPs_kind,&
 &                         l_amplitude,l_bands,l_bz_grids,l_current,BANDS_bands,BSiq_via_command_line,&
 &                         l_density,l_dos,l_electrons,l_q_grid,l_QP_DBs_create_and_modify,&
 &                         l_QP_DBs_manipulate,l_random_grid,l_shifted_grid,l_sort,l_sp_wf,l_spin,&
@@ -42,7 +46,8 @@ subroutine INIT_ypp(E,instr,FINALIZE)
  use YPP_real_time,  ONLY:l_RealTime,l_RT_dos,l_RT_abs,l_RT_bands,l_RT_DBs,l_RT_density,l_RT_G_two_times,&
 &                         l_RT_energy,l_RT_lifetimes,l_RT_occupations,l_RT_time,l_RT_X,l_RTpump_Fermi,&
 &                         l_RTpump_energy,Eh_pumped_pair_n_BZ_regions,Eh_pumped_pair_BZ_regions,&
-&                         l_NL_X,l_NL_exc,RT_conf,l_RT_pol
+&                         l_NL_X,l_NL_exc,RT_conf,l_RT_pol,l_RT_fields
+ use fields,         ONLY:n_ext_fields
  use real_time,      ONLY:RT_bands
 #endif
 #if defined _YPP_NL 
@@ -51,15 +56,16 @@ subroutine INIT_ypp(E,instr,FINALIZE)
 #endif
 #if defined _YPP_ELPH
  use YPP_ELPH,       ONLY:l_phonons,l_atomic_amplitude,l_eliashberg,l_gkkp,l_gkkp_db,l_gkkp_dg,l_gkkp_plot,&
-&                         l_ph_ass_dos,l_phonons
+&                         l_gkkp_sngl,l_ph_ass_dos,l_phonons
 #endif
 #if defined _YAML_OUTPUT
  use pars,           ONLY:logfile_index,repfile_index
  use com,            ONLY:of_yaml_IDs
  use yaml_output,    ONLY:yaml_close_stream
 #endif
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)     ::E
  character(*)     ::instr
@@ -257,7 +263,7 @@ subroutine INIT_ypp(E,instr,FINALIZE)
        BSiq_via_command_line=.TRUE.
        if (l_sort) infile_editing=.false.
      endif
-     if ( trim(rstr_piece(i1)) == 'excitons') then
+     if ( trim(rstr_piece(i1)) == 'excitons' .or. trim(rstr_piece(i1)) == 'magnons') then
        l_wavefunction   =STRING_match(rstr_piece(i1+1),'w')
        l_sort           =STRING_match(rstr_piece(i1+1),'s').and..not.STRING_match(rstr_piece(i1+1),'sp')
        l_amplitude      =STRING_match(rstr_piece(i1+1),'a')
@@ -278,6 +284,7 @@ subroutine INIT_ypp(E,instr,FINALIZE)
        l_gkkp_db   =STRING_match(rstr_piece(i1+1),'g')
        l_gkkp_dg   =STRING_match(rstr_piece(i1+1),'d')
        l_gkkp_plot =STRING_match(rstr_piece(i1+1),'p')
+       l_gkkp_sngl =STRING_match(rstr_piece(i1+1),'s')
      endif
 #endif
      if ( trim(rstr_piece(i1)) == 'dipoles') then
@@ -321,6 +328,7 @@ subroutine INIT_ypp(E,instr,FINALIZE)
        l_RT_G_two_times =STRING_match(rstr_piece(i1+1),'g')
        l_RT_abs         =STRING_match(rstr_piece(i1+1),'a')
        l_RT_pol         =STRING_match(rstr_piece(i1+1),'p')
+       l_RT_fields      =STRING_match(rstr_piece(i1+1),'f')
        !
        l_rtplot=.TRUE.
        !
@@ -364,6 +372,7 @@ subroutine INIT_ypp(E,instr,FINALIZE)
  if (l_RT_energy)      call initactivate(1,'RTenergy')
  if (l_RT_dos)         call initactivate(1,'RTdos')
  if (l_RT_pol)         call initactivate(1,'RTpol')
+ if (l_RT_fields)      call initactivate(1,'RTfields')
  !
 #endif
  !
@@ -376,9 +385,10 @@ subroutine INIT_ypp(E,instr,FINALIZE)
 #if defined _YPP_ELPH
  !
  if (l_eliashberg)        call initactivate(1,'eliashberg')
- if (l_gkkp.or.l_gkkp_db) call initactivate(1,'gkkp')
+ if (l_gkkp.or.l_gkkp_db.or.l_gkkp_sngl) call initactivate(1,'gkkp')
  if (l_gkkp_db)           call initactivate(1,'gkkp_db')
  if (l_gkkp_dg)           call initactivate(1,'gkkp_dg')
+ if (l_gkkp_sngl)         call initactivate(1,'gkkp_sngl')
  if (l_gkkp_plot)         call initactivate(1,'gkkp_plot')
  if (l_ph_ass_dos)        call initactivate(1,'ph_ass_dos')
  !
@@ -436,9 +446,10 @@ subroutine INIT_ypp(E,instr,FINALIZE)
  l_phonons   =runlevel_is_on('phonons')
  l_gkkp      =runlevel_is_on('gkkp')
  l_gkkp_db   =runlevel_is_on('gkkp_db')
+ l_gkkp_sngl =runlevel_is_on('gkkp_sngl')
  l_gkkp_dg   =runlevel_is_on('gkkp_dg')
  l_gkkp_plot =runlevel_is_on('gkkp_plot')
- l_ph_ass_dos=runlevel_is_on('ph_ass_dos').and.runlevel_is_on('excitons')
+ l_ph_ass_dos=runlevel_is_on('ph_ass_dos').and.(runlevel_is_on('excitons').or.runlevel_is_on('magnons'))
 #endif
 #if defined _YPP_RT
  !
@@ -451,6 +462,7 @@ subroutine INIT_ypp(E,instr,FINALIZE)
  l_RTpump_Fermi     = runlevel_is_on('Select_Fermi')
  !
  l_RT_pol           = runlevel_is_on('RTpol')
+ l_RT_fields        = runlevel_is_on('RTfields')
  l_RT_abs           = runlevel_is_on('RTabs')
  l_RT_occupations   = runlevel_is_on('RToccupations')
  l_RT_lifetimes     = runlevel_is_on('RTlifetimes')
@@ -472,6 +484,7 @@ subroutine INIT_ypp(E,instr,FINALIZE)
 #endif
  l_dipoles =runlevel_is_on('dipoles')
  l_excitons =runlevel_is_on('excitons')
+ l_magnons =runlevel_is_on('magnons')
  l_electrons=runlevel_is_on('electrons')
  l_plot=any((/runlevel_is_on('wavefunction'),runlevel_is_on('magnetization'),&
 &             runlevel_is_on('density'),runlevel_is_on('current')/))
@@ -480,13 +493,13 @@ subroutine INIT_ypp(E,instr,FINALIZE)
 #endif
 #if defined _YPP_RT
  l_plot=l_plot .or. runlevel_is_on('RTdeltaRho')
-#endif
+#endif 
  l_free_hole =runlevel_is_on('freehole')
  l_average_eh=runlevel_is_on('avehole')
  l_amplitude=runlevel_is_on('amplitude')
- l_exc_wf   =runlevel_is_on('wavefunction').and.runlevel_is_on('excitons')
- l_interp    =runlevel_is_on('interpolate').and.runlevel_is_on('excitons')
- l_sp_wf    =runlevel_is_on('wavefunction').and.runlevel_is_on('electrons')
+ l_exc_wf   =runlevel_is_on('wavefunction').and.(runlevel_is_on('excitons').or.runlevel_is_on('magnons'))
+ l_interp    =runlevel_is_on('interpolate').and.(runlevel_is_on('excitons').or.runlevel_is_on('magnons'))
+ l_sp_wf    =runlevel_is_on('wavefunction').and. runlevel_is_on('electrons')
  l_density  =runlevel_is_on('density').and.runlevel_is_on('electrons')
  l_current  =runlevel_is_on('current').and.runlevel_is_on('electrons')
  l_mag      =runlevel_is_on('magnetization')
@@ -550,7 +563,8 @@ subroutine INIT_ypp(E,instr,FINALIZE)
 #endif
  !
  ! Interpolation
- l_flag=(l_dipoles.and.trim(DIPs_kind)=="IP").or.(l_electrons.and.l_dos).or.l_bands.or.(l_excitons.and.l_interp)
+ l_flag=(l_dipoles.and.trim(DIPs_kind)=="IP").or.(l_electrons.and.l_dos)&
+ &      .or.l_bands.or.(l_excitons.and.l_interp).or.(l_magnons.and.l_interp)
 #if defined _YPP_RT
  l_flag=l_flag.or.(l_RealTime.and.(l_RT_bands.or.l_RT_dos.or.l_RT_G_two_times))
 #endif
@@ -562,14 +576,14 @@ subroutine INIT_ypp(E,instr,FINALIZE)
 #endif
  if (l_flag) call initactivate(1,"INTERP_Grid")
  !
- if (l_excitons.and.l_interp) call initactivate(1,"BANDS_steps cooIn cooOut")
+ if ((l_excitons.or.l_magnons).and.l_interp) call initactivate(1,"BANDS_steps cooIn cooOut")
  l_flag=l_bands
 #if defined _YPP_RT
  l_flag=l_bands.or.l_RT_bands.or.l_RT_G_two_times
 #endif
  if (l_flag) then
    call initactivate(1,"OutputAlat cooIn cooOut NormN NelFac")
-   if (l_bands) call initactivate(1,"BANDS_bands")
+   if (l_bands) call initactivate(1,"NelCond BANDS_bands")
    call initactivate(1,"CIRCUIT_E_DB_path BANDS_path BANDS_steps BANDS_built_in")
  endif
  if (l_flag) then
@@ -622,7 +636,7 @@ subroutine INIT_ypp(E,instr,FINALIZE)
    !
    l_flag=l_RT_occupations.or.l_RT_abs.or.l_RT_pol
    if (l_flag)      call init_RT_ctl_switch('G')
-   l_flag=l_RT_occupations.or.l_RT_density.or.l_RT_lifetimes.or.l_RT_abs.or.l_RT_pol.or.l_RT_G_two_times
+   l_flag=l_RT_occupations.or.l_RT_density.or.l_RT_lifetimes.or.l_RT_abs.or.l_RT_pol.or.l_RT_G_two_times.or.l_RT_fields
    if (l_flag)      call initactivate(1,"TimeStep TimeRange")
    !
    if (l_RT_pol)    call initactivate(1,"PolMode EnRngeRt ETStpsRt DampFactor")
@@ -649,6 +663,11 @@ subroutine INIT_ypp(E,instr,FINALIZE)
      call initactivate(1,"BSQindex EnRngeRt ETStpsRt DampFactor")
      call initactivate(1,"TRabsOBS TRabsDIPdir TRabsDIPplane TRabsMODE TRabsEtresh")
    endif
+   if (l_RT_fields) then
+     call initactivate(1,"EnRngeRt ETStpsRt ChirpFac")
+     n_ext_fields=1
+     call Afield_activate(1)
+   endif
    !
  endif
 #endif
@@ -673,21 +692,25 @@ subroutine INIT_ypp(E,instr,FINALIZE)
    if (l_current) call initactivate(1,'CurrentDir')
  endif
  !
- if (l_spin.and..not.l_excitons) call initactivate(1,"FFTGvecs")
+ if (l_spin.and..not.(l_excitons.or.l_magnons)) call initactivate(1,"FFTGvecs")
  !
- if (l_excitons) then
+ if (l_excitons.or.l_magnons) then
    call initactivate(1,"States En_treshold Res_treshold BSQindex")
    if (l_amplitude) call INIT_QP_ctl_switch('G')
    if (l_exc_wf.or.l_amplitude) call initactivate(1,"Degen_Step Weight_treshold")
-   if (l_exc_wf.and.(.not.l_free_hole.and..not.l_average_eh)) call initactivate(1,"Cells Hole Dimension") 
+   if (l_exc_wf.and.(.not.l_free_hole.and..not.l_average_eh)) &
+   &  call initactivate(1,"Cells Hole CenterKind Dimension") 
+   !& call initactivate(1,"Cells PlotCenter CenterKind Dimension") 
    if (l_exc_wf.and.(l_free_hole.or.l_average_eh)) call initactivate(1,"WFMult") 
    if (l_exc_wf.and.l_average_eh) call initactivate(1,"EHdensity")
+   if (l_exc_wf.and.n_spin>1) call initactivate(1,"ElecSpin HoleSpin")
    if (l_spin) call initactivate(1,"Degen_Step")
 #if defined _YPP_ELPH
    if (l_eliashberg)  call initactivate(1,"Degen_Step") 
 #endif
    if (l_interp) then
      call initactivate(-1,"BSQindex")
+     call initactivate(1,"NelCond ExcMin EXCTemp AlphaQ")
      call initactivate(1,"INTERP_mode INTERP_Grid BANDS_steps cooIn cooOut")
      call initactivate(1,"PrtDOS DOSERange DOSESteps DOS_broad")
    endif
@@ -701,12 +724,17 @@ subroutine INIT_ypp(E,instr,FINALIZE)
  endif
  !
 #if defined _YPP_ELPH
- if (l_gkkp_db)                call initactivate(1,"GkkpReadBare DBsPATH PHfreqF PHmodeF GkkpExpand UseQindxB")
- if (l_gkkp_dg.or.l_gkkp_db)   call initactivate(1,"PHfreqF PHmodeF")
+ if (l_gkkp_db) then
+   call initactivate(1,"GkkpReadBare DBsPATH PHfreqF PHmodeF GkkpExpand GkkpExpOnlyK UseQindxB")
+   call initactivate(1,"GkkpBands GkkpSkipDW")
+ endif
+ if (l_gkkp_sngl)               call initactivate(1,"DBsPATH GkkpExpOnlyK UseQindxB")
+ if (l_gkkp_dg.or.l_gkkp_db)    call initactivate(1,"PHfreqF PHmodeF")
+ if (l_gkkp_dg.or.l_gkkp_db.or.l_gkkp_sngl)  call initactivate(-1,"ElecTemp")
  if (l_gkkp_dg)                call initactivate(1,"FineGd_mode SkipBorderPts EkplusQmode TestPHDGrid")
  if (l_gkkp_plot)              call initactivate(1,"Degen_Step cooOut")
  if (l_phonons.and.l_eliashberg)   call initactivate(1,"EE_Fermi EfGbroad")
- if ( ((l_excitons.or.l_electrons).and.l_eliashberg) .or. (l_phonons.and.l_dos) ) call initactivate(1,"PhBroad PhStps")
+ if ( ((l_excitons.or.l_magnons.or.l_electrons).and.l_eliashberg) .or. (l_phonons.and.l_dos) ) call initactivate(1,"PhBroad PhStps")
 #endif
  !
  ! Check for existing QP control fields to be proposed in the new input files
diff --git a/ypp/interface/QP_DBs_initialize.F b/ypp/interface/QP_DBs_initialize.F
index 00682ed415..4853fd1e35 100644
--- a/ypp/interface/QP_DBs_initialize.F
+++ b/ypp/interface/QP_DBs_initialize.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM CA
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine QP_DBs_initialize()
  !
  ! QPDB_states -> input file
@@ -17,8 +21,9 @@ subroutine QP_DBs_initialize()
  use units,     ONLY:HA2EV
  use YPPm,      ONLY:l_QP_DBs_create_and_modify,n_QP_dbs,&
 &                    QP_user_corrections,QP_db_input_control,l_QP_DBs_manipulate
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  ! 
  ! Work Space
  !
diff --git a/ypp/k-points/k_circuit_driver.F b/ypp/k-points/k_circuit_driver.F
index becfe511d9..a4e2e79d8e 100644
--- a/ypp/k-points/k_circuit_driver.F
+++ b/ypp/k-points/k_circuit_driver.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): DS CA AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine k_circuit_driver(PATH,n_points)
  !
  use pars,           ONLY:SP
@@ -18,8 +22,9 @@ subroutine k_circuit_driver(PATH,n_points)
 &                         CIRCUIT_E_DB_path,CIRCUIT_E_db,USER_k,CIRCUIT_k,&
 &                         CIRCUIT_made_of_SPECIAL_k,BANDS_from_db,SPECIAL_k,&
 &                         BANDS_path,CIRCUIT_k_label,SPECIAL_k_label
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  character(*) :: PATH
  integer      :: n_points
diff --git a/ypp/k-points/k_circuit_made_of_special_points.F b/ypp/k-points/k_circuit_made_of_special_points.F
index 1c24a1a1aa..272c60e959 100644
--- a/ypp/k-points/k_circuit_made_of_special_points.F
+++ b/ypp/k-points/k_circuit_made_of_special_points.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine k_circuit_made_of_special_points( PATH, n_points )
  !
  use pars,                ONLY:SP,schlen
@@ -16,8 +20,9 @@ subroutine k_circuit_made_of_special_points( PATH, n_points )
 &                              SPECIAL_k,n_SPECIAL_k_MAX,CIRCUIT_k,SPECIAL_k_label,&
 &                              n_SPECIAL_k_MAX,CIRCUIT_k_label,CIRCUIT_k_norm,CIRCUIT_made_of_SPECIAL_k
  use stderr,              ONLY:STRING_split,STRING_same,intc
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  character(*) :: PATH
  integer      :: n_points
diff --git a/ypp/k-points/k_circuit_made_of_user_points.F b/ypp/k-points/k_circuit_made_of_user_points.F
index cc467bca46..e48787a7fc 100644
--- a/ypp/k-points/k_circuit_made_of_user_points.F
+++ b/ypp/k-points/k_circuit_made_of_user_points.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): DS CA AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine k_circuit_made_of_user_points( )
  !
  use pars,           ONLY:SP,schlen
@@ -12,8 +16,9 @@ subroutine k_circuit_made_of_user_points( )
  use vec_operate,    ONLY:c2a,v_norm
  use parser_m,       ONLY:PARSER_array
  use YPPm,           ONLY:BANDS_steps,USER_k,CIRCUIT_k,coo_in,coo_out,K_transform,PROJECT_mode
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  ! Work space
  !
diff --git a/ypp/k-points/k_find_border_and_merge.F b/ypp/k-points/k_find_border_and_merge.F
index c494f6096d..97050e7264 100644
--- a/ypp/k-points/k_find_border_and_merge.F
+++ b/ypp/k-points/k_find_border_and_merge.F
@@ -5,14 +5,19 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine k_find_border_and_merge(FineGd_E,FineGd_k,n_FineGd_DBs,FineGd_io_E,FineGd_io_k,Xk,l_skip_border_pts)
  !
  use R_lattice, ONLY:bz_samp,k_the_nearest,bz_samp_reset
  use electrons, ONLY:levels,E_reset
  use stderr,    ONLY:intc
  use com,       ONLY:msg
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer,       intent(in)     :: n_FineGd_DBs 
  type(levels),  intent(inout)  :: FineGd_io_E(n_FineGd_DBs)
diff --git a/ypp/k-points/k_grids.F b/ypp/k-points/k_grids.F
index 8861ae7ef3..86548bb1fe 100644
--- a/ypp/k-points/k_grids.F
+++ b/ypp/k-points/k_grids.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine k_grids(en,k,Xk,q)
  !
  use pars,                ONLY:SP,schlen,zero_dfl
@@ -22,8 +26,9 @@ subroutine k_grids(en,k,Xk,q)
  use LIVE_t,              ONLY:live_timing_is_on
  use zeros,               ONLY:k_iku_zero
  use YPP_interfaces,      ONLY:k_special
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels) ::en       
  type(bz_samp)::k,Xk,q
diff --git a/ypp/k-points/k_map_fine_to_coarse.F b/ypp/k-points/k_map_fine_to_coarse.F
index e8eca347b4..fc6896dc9b 100644
--- a/ypp/k-points/k_map_fine_to_coarse.F
+++ b/ypp/k-points/k_map_fine_to_coarse.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine k_map_fine_to_coarse(zone,Xk,FG,FineGd_k,Xen)
  !
  use pars,                ONLY:SP
@@ -12,8 +16,9 @@ subroutine k_map_fine_to_coarse(zone,Xk,FG,FineGd_k,Xen)
  use electrons,           ONLY:levels
  use R_lattice,           ONLY:bz_samp,bz_fine_grid,bz_samp_FineGd_alloc
  use LIVE_t,              ONLY:live_timing
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  character(*)      , intent(in)    :: zone
  type(bz_fine_grid), intent(inout) :: FG
diff --git a/ypp/k-points/k_map_fine_to_coarse_driver.F b/ypp/k-points/k_map_fine_to_coarse_driver.F
index 98e2eb3672..0f8f618fe1 100644
--- a/ypp/k-points/k_map_fine_to_coarse_driver.F
+++ b/ypp/k-points/k_map_fine_to_coarse_driver.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine k_map_fine_to_coarse_driver(Xk,Xen)
  !
  use pars,                ONLY:SP
@@ -15,8 +19,9 @@ subroutine k_map_fine_to_coarse_driver(Xk,Xen)
  use electrons,           ONLY:levels,FineGd_E_components_reset
  use IO_int,              ONLY:io_control
  use IO_m,                ONLY:REP,OP_WR_CL
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp) :: Xk
  type(levels)  :: Xen
diff --git a/ypp/k-points/k_map_fine_to_coarse_engine.F b/ypp/k-points/k_map_fine_to_coarse_engine.F
index 6ac0468c97..ab0d26679b 100644
--- a/ypp/k-points/k_map_fine_to_coarse_engine.F
+++ b/ypp/k-points/k_map_fine_to_coarse_engine.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine k_map_fine_to_coarse_engine(zone,pts_fg,N_pts_fg,pts_cg,N_pts_cg,MAP)
  !
  use pars,                ONLY:SP,rZERO,zero_dfl
@@ -14,8 +18,9 @@ subroutine k_map_fine_to_coarse_engine(zone,pts_fg,N_pts_fg,pts_cg,N_pts_cg,MAP)
  use parallel_int,        ONLY:PP_wait,PP_redux_wait,PARALLEL_index
  use LIVE_t,              ONLY:live_timing
  use com,                 ONLY:msg
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  character(*)      , intent(in)    :: zone
  integer           , intent(in)    :: N_pts_fg,N_pts_cg
diff --git a/ypp/k-points/k_map_fine_to_coarse_init.F b/ypp/k-points/k_map_fine_to_coarse_init.F
index 8e4071acbb..cffafd1b3e 100644
--- a/ypp/k-points/k_map_fine_to_coarse_init.F
+++ b/ypp/k-points/k_map_fine_to_coarse_init.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DS CA
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine k_map_fine_to_coarse_init(Xk,Xen,FineGd_k,FineGd_is_expanded,PH_grid)
  !
  use pars,                ONLY:SP,pi
@@ -25,8 +29,9 @@ subroutine k_map_fine_to_coarse_init(Xk,Xen,FineGd_k,FineGd_is_expanded,PH_grid)
  use stderr,              ONLY:STRING_same
  use vec_operate,         ONLY:c2a,v_is_zero
 #endif
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp), intent(in)     :: Xk
  type(levels),  intent(inout)  :: Xen
diff --git a/ypp/k-points/k_map_fine_to_coarse_reduce.F b/ypp/k-points/k_map_fine_to_coarse_reduce.F
index 921b1e2502..667834fe78 100644
--- a/ypp/k-points/k_map_fine_to_coarse_reduce.F
+++ b/ypp/k-points/k_map_fine_to_coarse_reduce.F
@@ -5,28 +5,41 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 logical function k_map_fine_to_coarse_reduce(FineGd_k)
  !
  use zeros,               ONLY:k_rlu_zero
  use pars,                ONLY:SP
  use vec_operate,         ONLY:c2a,rlu_v_is_zero
+ use parallel_m,          ONLY:PP_indexes,myid,PP_indexes_reset
+ use parallel_int,        ONLY:PP_wait,PP_redux_wait,PARALLEL_index
  use R_lattice,           ONLY:bz_samp
  use LIVE_t,              ONLY:live_timing
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp), intent(inout)      :: FineGd_k
  !
  ! Work Space
  !
  integer :: ik,ip,unique(FineGd_k%nbz)
+ type(PP_indexes)     :: px
  real(SP):: v(3)
  !
  unique=1
  !
- call live_timing('Map Fine Grid to Reduced Coarse one',FineGd_k%nbz)
+ call PP_indexes_reset(px)
+ call PARALLEL_index(px,(/FineGd_k%nbz/))
+ call PP_wait()
+ !
+ call live_timing('Map Fine Grid to Reduced Coarse one',px%n_of_elements(myid+1))
  !
  do ik=1,FineGd_k%nbz
+   if (.not.px%element_1D(ik)) cycle
    call live_timing(steps=1)
    if (unique(ik)==0) cycle
    do ip=ik+1,FineGd_k%nbz
@@ -37,6 +50,8 @@ logical function k_map_fine_to_coarse_reduce(FineGd_k)
    enddo
  enddo
  ! 
+ call PP_redux_wait(unique,imode=2)
+ !
  call live_timing( )
  !
  k_map_fine_to_coarse_reduce=count(unique==1)==FineGd_k%nbz
diff --git a/ypp/k-points/k_random.F b/ypp/k-points/k_random.F
index c69a6a47b4..592b2301a9 100644
--- a/ypp/k-points/k_random.F
+++ b/ypp/k-points/k_random.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine k_random(Xk,No_Weight)
  !
  use pars,                ONLY:SP,DP,lchlen
@@ -17,8 +21,9 @@ subroutine k_random(Xk,No_Weight)
  use parallel_m,          ONLY:PP_indexes,myid,PP_indexes_reset
  use parallel_int,        ONLY:PP_wait,PP_redux_wait,PARALLEL_index 
  use parser_m,            ONLY:parser 
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp), intent(in) :: Xk
  logical, intent(in)       :: No_Weight
diff --git a/ypp/k-points/k_special.F b/ypp/k-points/k_special.F
index d5bcabd20f..1d354ed646 100644
--- a/ypp/k-points/k_special.F
+++ b/ypp/k-points/k_special.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine k_special(LIST_only)
  !
  ! Special k-points. Output in IKU.
@@ -26,8 +30,9 @@ subroutine k_special(LIST_only)
  use vec_operate,         ONLY:c2a
  use YPPm,                ONLY:K_transform,SPECIAL_k,n_SPECIAL_k_MAX,SPECIAL_k_label,coo_out
  use LIVE_t,              ONLY:live_timing_is_on
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  ! Input 
  !
diff --git a/ypp/plotting/PROJECTION_plot.F b/ypp/plotting/PROJECTION_plot.F
index 81315d3127..295da90a7d 100644
--- a/ypp/plotting/PROJECTION_plot.F
+++ b/ypp/plotting/PROJECTION_plot.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine PROJECTION_plot( what, N, Ns, bands, W, TIME, OBS_K, OBS_B)
  !
  use pars,           ONLY:SP,schlen
@@ -19,8 +23,9 @@ subroutine PROJECTION_plot( what, N, Ns, bands, W, TIME, OBS_K, OBS_B)
 &                         l_PROJECT_plane,PROJECT_plane_X,PROJECT_plane_Y,PROJECT_steps
  use interpolate,    ONLY:GRID_k
  use stderr,         ONLY:intc,real2ch,STRING_remove
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  character(*)       :: what
  integer            :: N,Ns,bands(2)
diff --git a/ypp/plotting/plot_cube.F b/ypp/plotting/plot_cube.F
index b8eb9862be..85837fca94 100644
--- a/ypp/plotting/plot_cube.F
+++ b/ypp/plotting/plot_cube.F
@@ -11,7 +11,7 @@ subroutine plot_cube()
  use FFT_m,       ONLY:fft_dim 
  use C_driver,    ONLY:code_version
  use com,         ONLY:msg
- use YPPm,        ONLY:nr,v2plot,ncell,r_hole,l_free_hole,l_average_eh,&
+ use YPPm,        ONLY:nr,v2plot,ncell,r_fixed,l_free_hole,l_average_eh,&
 &                      l_norm_to_one,WF_multiplier,l_exc_wf,plot_title,output_string
  use D_lattice,   ONLY:n_atomic_species,n_atoms_species,a,atom_pos,Z_species,n_atoms
  use LIVE_t,      ONLY:live_timing
@@ -40,7 +40,7 @@ subroutine plot_cube()
  ! write the atoms position 
  !
  if ((.not.l_free_hole.and..not.l_average_eh).and.l_exc_wf) then
-   write(ch,'(2i5,3f10.5)') -1,-1,r_hole
+   write(ch,'(2i5,3f10.5)') -1,-1,r_fixed
    call msg(output_string,'',ch,INDENT=0,USE_TABS=.FALSE.)
  endif 
 
diff --git a/ypp/plotting/plot_gnuplot.F b/ypp/plotting/plot_gnuplot.F
index 194382e305..82dc84d447 100644
--- a/ypp/plotting/plot_gnuplot.F
+++ b/ypp/plotting/plot_gnuplot.F
@@ -5,17 +5,22 @@
 !
 ! Authors (see AUTHORS file for details): AM DV
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine plot_gnuplot(eval_only)
  !
  use pars,        ONLY:SP,schlen
  use units,       ONLY:BO2ANG
  use com,         ONLY:msg
- use YPPm,        ONLY:nr,v2plot,r_hole,v2plot2D,l_norm_to_one,WF_multiplier,plot_dim,&
+ use YPPm,        ONLY:nr,v2plot,r_fixed,v2plot2D,l_norm_to_one,WF_multiplier,plot_dim,&
 &                      plot_title,output_string,l_wavefunction,l_current,use_xcrysden,&
 &                      plot_is_1D,plot_is_2D,l_free_hole,l_average_eh
  use LIVE_t,      ONLY:live_timing
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  logical    :: eval_only
  ! 
  ! Work Space...
@@ -45,7 +50,7 @@ subroutine plot_gnuplot(eval_only)
  ! SET CENTER OF THE PLOT
  !
  r_plot_center=0.
- if (.not.l_free_hole.and..not.l_average_eh) r_plot_center=r_hole
+ if (.not.l_free_hole.and..not.l_average_eh) r_plot_center=r_fixed
  !
  ! DIMENSIONs
  !
@@ -75,14 +80,6 @@ subroutine plot_gnuplot(eval_only)
      enddo
      v_max=maxval(v2plot2D(:,1))
      ! 
-     ! Daniele [15/7/2007]
-     ! Questo e' corretto, e' la normalizzazione. Io in una versione precedente
-     ! di questa subroutine, la parte 1d di gnuplot non la avevo normalizzata
-     ! a proposito. Siccome e' il plot piu' veloce, lo usavo per vedere il
-     ! valore assoluto. Quando si mette la hole, magari un po' casaccio, cosi'
-     ! vedi se stai pescando valori della wf, oppure no. Appunto nel caso che
-     ! uno mette la hole in un punto con poca densita'.
-     !
      if (l_norm_to_one) v2plot2D=v2plot2D/v_max  
      v2plot2D=v2plot2D*WF_multiplier
      !
diff --git a/ypp/plotting/plot_interpolated_values.F b/ypp/plotting/plot_interpolated_values.F
index 03d35e19cb..954fe5f505 100644
--- a/ypp/plotting/plot_interpolated_values.F
+++ b/ypp/plotting/plot_interpolated_values.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): CA DS AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine plot_interpolated_values(Xk,Xen,Ef,LOCAL_k,IDs,dims,nquantities,nfiles,basename,int_kind,i_T)
  !
  use units,          ONLY:HA2EV
@@ -22,8 +26,9 @@ subroutine plot_interpolated_values(Xk,Xen,Ef,LOCAL_k,IDs,dims,nquantities,nfile
 #if defined _YPP_RT
  use YPP_real_time,  ONLY:GreenF_T_and_W
 #endif
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp),    intent(in)  :: Xk,LOCAL_k
  type(levels),     intent(in)  :: Xen
@@ -292,8 +297,9 @@ subroutine define_grid_map(Xk,Local_k,Grid_to_path_map,n_points_max)
  use pars,           ONLY:SP
  use R_lattice,      ONLY:bz_samp
  use vec_operate,    ONLY:v_norm,iku_v_norm
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp),  intent(in)  :: Xk,LOCAL_k
  integer,        intent(out) :: Grid_to_path_map(LOCAL_k%nbz,Xk%nbz)
diff --git a/ypp/plotting/plot_xcrysden.F b/ypp/plotting/plot_xcrysden.F
index 629db07ec3..ec30db2181 100644
--- a/ypp/plotting/plot_xcrysden.F
+++ b/ypp/plotting/plot_xcrysden.F
@@ -5,18 +5,23 @@
 !
 ! Authors (see AUTHORS file for details): AM DV
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine plot_xcrysden()
  !
  use pars,        ONLY:SP,schlen
  use units,       ONLY:BO2ANG
  use C_driver,    ONLY:code_version
  use com,         ONLY:msg
- use YPPm,        ONLY:nr,v2plot,ncell,v2plot2D,r_hole,plot_dim,l_free_hole,l_average_eh,&
+ use YPPm,        ONLY:nr,v2plot,ncell,v2plot2D,r_fixed,plot_dim,l_free_hole,l_average_eh,&
 &                      l_norm_to_one,WF_multiplier,l_exc_wf,plot_title,output_string,plot_is_2D
  use D_lattice,   ONLY:n_atomic_species,n_atoms_species,a,atom_pos,Z_species,n_atoms
  use LIVE_t,      ONLY:live_timing
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  ! Work Space...
  !
@@ -44,7 +49,7 @@ subroutine plot_xcrysden()
    !
    ! write Hole position
    ! 
-   write(ch,'(i2,f10.5,f10.5,f10.5)') -1,r_hole*BO2ANG
+   write(ch,'(i2,f10.5,f10.5,f10.5)') -1,r_fixed*BO2ANG
    call msg(output_string,'',ch,INDENT=0,USE_TABS=.FALSE.)
  else
    call msg(output_string,'',(/n_atoms*ncell(1)*ncell(2)*ncell(3),1/))
diff --git a/ypp/qp/QP_DB_expand.F b/ypp/qp/QP_DB_expand.F
index 39ffe91c68..85a85ede33 100644
--- a/ypp/qp/QP_DB_expand.F
+++ b/ypp/qp/QP_DB_expand.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine QP_DB_expand( k )
  !
  use pars,          ONLY:schlen
@@ -13,8 +17,9 @@ subroutine QP_DB_expand( k )
  use IO_int,        ONLY:io_control
  use IO_m,          ONLY:OP_WR_CL,DUMP,OP_RD_CL,LOG
  use QP_m,          ONLY:QP_t,QP_reset
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp)           :: k
  !
diff --git a/ypp/qp/QP_DBs_create_and_modify.F b/ypp/qp/QP_DBs_create_and_modify.F
index 764c31a6c8..683204d2cd 100644
--- a/ypp/qp/QP_DBs_create_and_modify.F
+++ b/ypp/qp/QP_DBs_create_and_modify.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): CA AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine QP_DBs_create_and_modify(en,k)
  !
  use pars,          ONLY:SP
@@ -15,8 +19,9 @@ subroutine QP_DBs_create_and_modify(en,k)
  use QP_m,          ONLY:QP_t,QP_reset,QP_state,QP_nb,QP_nk,QP_n_states,QP_alloc
  use R_lattice,     ONLY:bz_samp
  use YPPm,          ONLY:QP_user_corrections
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels), intent(in) :: en
  type(bz_samp),intent(in) :: k
diff --git a/ypp/qp/QP_DBs_manipulate.F b/ypp/qp/QP_DBs_manipulate.F
index 1bcb759bc3..0cbe963015 100644
--- a/ypp/qp/QP_DBs_manipulate.F
+++ b/ypp/qp/QP_DBs_manipulate.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine QP_DBs_manipulate( E )
  !
  use pars,          ONLY:schlen,msg_len
@@ -16,8 +20,9 @@ subroutine QP_DBs_manipulate( E )
  use QP_m,          ONLY:QP_t,QP_reset,QP_solver,QP_table,QP_n_states
  use interfaces,    ONLY:QP_DBs_merge,DESC_write
  use electrons,     ONLY:levels,n_sp_pol
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)     ::E
  !
diff --git a/ypp/real_time/.objects b/ypp/real_time/.objects
index 01a0a9c4bd..21daf3b4f7 100644
--- a/ypp/real_time/.objects
+++ b/ypp/real_time/.objects
@@ -1,7 +1,7 @@
 #if defined _YPP_RT
 RT_objects = RT_ypp_driver.o \
              RT_OBSERVABLES_IO_path.o RT_OBSERVABLES_IO.o \
-             RT_TRabs_driver.o RT_TRabs_residuals.o RT_TRabs_output.o \
+             RT_TRabs_driver.o RT_TRabs_residuals.o RT_TRabs_output.o RT_fields.o \
              RT_X_LRR_real_field.o RT_X_response.o RT_manual_excitation.o RT_DBs_carriers_setup.o \
              RT_OBSERVABLES_damp_and_write.o RT_1D_Fourier_setup.o RT_1D_Fourier_Transform.o \
              RT_occupations_driver.o RT_components_energy_plot.o RT_occ_time_plot.o RT_density.o \
diff --git a/ypp/real_time/DOUBLE_project.dep b/ypp/real_time/DOUBLE_project.dep
index e1f722a192..538a897ae5 100644
--- a/ypp/real_time/DOUBLE_project.dep
+++ b/ypp/real_time/DOUBLE_project.dep
@@ -26,6 +26,7 @@
  RT_damp_it.o
  RT_density.o
  RT_dos_time_plot.o
+ RT_fields.o
  RT_manual_excitation.o
  RT_occ_bands_interpolation.o
  RT_occ_time_plot.o
diff --git a/ypp/real_time/NL_exc_driver.F b/ypp/real_time/NL_exc_driver.F
index d61fde2721..f48d845c1d 100644
--- a/ypp/real_time/NL_exc_driver.F
+++ b/ypp/real_time/NL_exc_driver.F
@@ -5,12 +5,17 @@
 !
 ! Authors (see AUTHORS file for details): MG
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine NL_exc_driver(en,k)
  !
  use R_lattice,      ONLY:bz_samp
  use electrons,      ONLY:levels
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)  :: en
  type(bz_samp) :: k
diff --git a/ypp/real_time/NL_ypp_driver.F b/ypp/real_time/NL_ypp_driver.F
index 5e4834259e..c134bb65c7 100644
--- a/ypp/real_time/NL_ypp_driver.F
+++ b/ypp/real_time/NL_ypp_driver.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): MG CA
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine NL_ypp_driver(en,k)
  !
  use YPP_real_time,  ONLY:X_order,RT_conf,RT_time,Pol,l_skip_OBS_IO,Pump_path
@@ -22,8 +26,9 @@ subroutine NL_ypp_driver(en,k)
  use stderr,         ONLY:intc,real2ch
  use RT_control,     ONLY:OBS_RT_IO_t
  use LIVE_t,         ONLY:live_timing
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)  :: en
  type(bz_samp) :: k
@@ -119,7 +124,7 @@ subroutine NL_ypp_driver(en,k)
      RT_P_probe(:,i_f,i_d)=NL_P_t(:,i_d)
    enddo
    !
-   Probe_frequency(i_f)=Efield(i_Probe)%frequency(1)
+   Probe_frequency(i_f)=Efield(i_Probe)%frequency
    if(loop_on_angles) Probe_versors(:,i_f)=Efield(i_Probe)%versor(:)
    !
    if(io_Nonlinear_REF==0) then
@@ -215,13 +220,13 @@ subroutine NL_ypp_driver(en,k)
          !
          if(.not.runs_ok(i_f)) cycle
          !
-         if (i_order==1) then
-           Susceptibility(i_order,i_f,1)=4._SP*pi*dot_product( Efield(i_Probe)%versor(:),X_effective(i_order,i_f,:) )*&
-&                                        Divide_by_Field(Efield(i_Probe),i_order)
-	   Susceptibility(i_order,i_f,2:3)=cZERO
-         else 
+!         if (i_order==1) then
+!           Susceptibility(i_order,i_f,1)=4._SP*pi*dot_product( Efield(i_Probe)%versor(:),X_effective(i_order,i_f,:) )*&
+!&                                        Divide_by_Field(Efield(i_Probe),i_order)
+!	   Susceptibility(i_order,i_f,2:3)=cZERO
+!         else 
            Susceptibility(i_order,i_f,:)=X_effective(i_order,i_f,:)*Divide_by_Field(Efield(i_Probe),i_order)
-         end if
+!         end if
          !
        enddo
        ! 
@@ -400,6 +405,12 @@ subroutine plot_X_effective()
        !
        call DESC_write('o probe','#',NL_desc)
        !
+       if (X_order==1) then
+         call msg('o probe','#')
+         call msg('o probe','#',"xhi_ab(\omega)")
+         call msg('o probe','#',"multiply by 4 \pi and field versor to compare with epsilon")
+         call msg('o probe','#')
+       endif
        call msg('o probe','#')
        call msg('o probe','#',headings(1:n_headings,i_order),INDENT=0,USE_TABS=.TRUE.)
        call msg('o probe','#')
diff --git a/ypp/real_time/RT_1D_Fourier_Transform.F b/ypp/real_time/RT_1D_Fourier_Transform.F
index 5b2839e7bb..86160f9359 100644
--- a/ypp/real_time/RT_1D_Fourier_Transform.F
+++ b/ypp/real_time/RT_1D_Fourier_Transform.F
@@ -3,7 +3,7 @@
 !
 ! Copyright (C) 2016 The Yambo Team
 !
-! Authors (see AUTHORS file for details): AM CA
+! Authors (see AUTHORS file for details): AM CA DS
 !
 subroutine RT_1D_Fourier_Transform(mode,FT,RT,I_sign)
  !
@@ -44,7 +44,7 @@ subroutine RT_1D_Fourier_Transform(mode,FT,RT,I_sign)
  !===================
  if (mode=="T2W") then
    !
-   if (l_live_timing) call live_timing('FT@'//trim(FT%title)//'',RT%W_n_steps*FT%N)
+   if (l_live_timing) call live_timing('FT-'//mode//'@'//trim(FT%title)//'',RT%W_n_steps*FT%N)
    !
    FT%F_w=cZERO_DP
    !
@@ -90,7 +90,7 @@ subroutine RT_1D_Fourier_Transform(mode,FT,RT,I_sign)
    !
    FT%F_t=cZERO_DP
    !
-   if (l_live_timing) call live_timing('FT'//trim(FT%title)//'',RT%T_n_steps*FT%N)
+   if (l_live_timing) call live_timing('FT-'//mode//'@'//trim(FT%title)//'',RT%T_n_steps*FT%N)
    do i_c=1,FT%N
      do i_t = 1, RT%T_n_steps
        do i_w = 1, RT%W_n_steps
@@ -104,4 +104,46 @@ subroutine RT_1D_Fourier_Transform(mode,FT,RT,I_sign)
  endif
  !
 end subroutine RT_1D_Fourier_Transform
-
+!
+!
+subroutine RT_1D_Fourier_Chirp(FT,RT)
+ !
+ ! This subroutine adds a chirp in frequency space
+ !
+ use fields,         ONLY:Efield
+ use YPP_real_time,  ONLY:RT_time,Use_FFT,OBS_el,RT_ctl,ypp_chirp
+ use pars,           ONLY:DP,cI,cZERO_DP,pi
+ use LIVE_t,         ONLY:live_timing
+ !
+ implicit none
+ !
+ type(OBS_el),intent(inout)::FT
+ type(RT_ctl),intent(in)   ::RT
+ !
+ ! Work Space 
+ !
+ integer    :: i_w,i_t,i_c
+ logical    :: l_live_timing
+ complex(DP):: chirp_phase(RT%W_n_steps),dw
+ !
+ l_live_timing=len_trim(FT%title)>0
+ !
+ FT%F_t=cZERO_DP
+ dw=(FT%W(2)-FT%W(1))
+ do i_w = 1, RT%W_n_steps
+   chirp_phase(i_w)=exp(-cI*ypp_chirp**2*(FT%W(i_w)-Efield(1)%frequency)**2)
+ enddo
+ !
+ if (l_live_timing) call live_timing('FT-CHIRP@'//trim(FT%title)//'',RT%T_n_steps*FT%N)
+ do i_c=1,FT%N
+   do i_t = 1, RT%T_n_steps
+     do i_w = 1, RT%W_n_steps
+       FT%F_t(i_t,i_c) = FT%F_t(i_t,i_c)+FT%F_w(i_w,i_c)*chirp_phase(i_w)*exp(-cI*FT%W(i_w)*RT_time(i_t))
+     enddo
+     if (l_live_timing) call live_timing(steps=1)
+   enddo
+ enddo
+ FT%F_t=FT%F_t*dw/pi
+ if (l_live_timing) call live_timing()
+ !
+end subroutine RT_1D_Fourier_Chirp
diff --git a/ypp/real_time/RT_1D_Fourier_setup.F b/ypp/real_time/RT_1D_Fourier_setup.F
index 073f1504f0..2fffff7318 100644
--- a/ypp/real_time/RT_1D_Fourier_setup.F
+++ b/ypp/real_time/RT_1D_Fourier_setup.F
@@ -5,12 +5,17 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine RT_1D_Fourier_setup(FT,RT,N)
  !
  use YPP_real_time,  ONLY:OBS_el,Use_FFT,RT_ctl
  use pars,           ONLY:SP,cZERO_DP,cZERO
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(OBS_el) :: FT
  type(RT_ctl) :: RT
diff --git a/ypp/real_time/RT_G_two_times_build.F b/ypp/real_time/RT_G_two_times_build.F
index 2fe03ba163..3ff2d3101d 100644
--- a/ypp/real_time/RT_G_two_times_build.F
+++ b/ypp/real_time/RT_G_two_times_build.F
@@ -5,15 +5,19 @@
 !
 ! Authors (see AUTHORS file for details): DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine RT_G_two_times_build(en,kpt,qpt)
  !
  use pars,           ONLY:SP,DP,cZERO,cI,cONE,cZERO,schlen
  use units,          ONLY:FS2AUT,HA2EV
  use R_lattice,      ONLY:bz_samp
- use electrons,      ONLY:levels
+ use electrons,      ONLY:levels,n_sp_pol
  use parser_m,       ONLY:parser
  use matrix_operate, ONLY:hermitian
- use stderr,         ONLY:real2ch
+ use stderr,         ONLY:real2ch,intc
  use wrapper,        ONLY:M_by_M
  use com,            ONLY:msg,of_open_close
  use IO_int,         ONLY:io_control
@@ -26,8 +30,9 @@ subroutine RT_G_two_times_build(en,kpt,qpt)
 &                         RT_dyn_step,G_MEM_steps,Ho_plus_Sigma,H_EQ,I1_matrix,RTibz
  use YPP_real_time,  ONLY:RT_time,RT_conf,GreenF_T_and_W,Rho_deph
  use LIVE_t,         ONLY:live_timing
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels),  intent(in)    :: en
  type(bz_samp), intent(in)    :: kpt,qpt
@@ -35,10 +40,7 @@ subroutine RT_G_two_times_build(en,kpt,qpt)
  ! Workspace
  !
  real(SP)              :: T_c,REF_diff,TIME_diff,deph_factor
-#if !defined _PAR_IO
- integer               :: IO_ACT
-#endif
- integer               :: ID,ik,ik_RT,ib1,ib2,io_err,i_T(2),i_w,i_T1,i_T2,i_Tc,i_kind,i_t_tmp,Nt
+ integer               :: ID,i_sp,ik,ik_RT,ib1,ib2,io_err,i_T(2),i_w,i_T1,i_T2,i_Tc,i_kind,i_t_tmp,Nt
  character(2)          :: function_name
  logical               :: include_eq_occ,keep_cc,keep_vv,keep_cv,keep_vc,&
 &                         build_G_ret,build_G_adv,build_G_les,build_G_grt,build_Spect,&
@@ -48,8 +50,8 @@ subroutine RT_G_two_times_build(en,kpt,qpt)
  !
  ! Allocs
  !
- complex(SP), allocatable :: rho_T(:,:,:),b_rho_T(:,:,:),H_rho_T(:,:,:,:),&
-&                            F_k_tmtp(:,:),G_ret(:,:,:),G_tmp(:,:),TMP_M(:,:,:),&
+ complex(SP), allocatable :: rho_T(:,:,:,:),b_rho_T(:,:,:,:),H_rho_T(:,:,:,:,:),&
+&                            F_k_tmtp(:,:,:),G_ret(:,:,:,:),G_tmp(:,:),TMP_M(:,:,:),&
 &                            G_w(:)
  !
  integer, external     :: io_RT_components
@@ -95,32 +97,32 @@ subroutine RT_G_two_times_build(en,kpt,qpt)
  YAMBO_ALLOC(G_tmp,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2)))
  G_tmp=cZERO
  !
- YAMBO_ALLOC(  rho_T,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),RT_nk))
- YAMBO_ALLOC(b_rho_T,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),RT_nk))
- YAMBO_ALLOC(H_rho_T,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),RT_nk,4))
+ YAMBO_ALLOC(  rho_T,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),RT_nk,n_sp_pol))
+ YAMBO_ALLOC(b_rho_T,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),RT_nk,n_sp_pol))
+ YAMBO_ALLOC(H_rho_T,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),RT_nk,n_sp_pol,4))
    rho_T=cZERO
  b_rho_T=cZERO
  H_rho_T=cZERO
  !
- YAMBO_ALLOC(Ho_plus_Sigma,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),RT_nk,1))
+ YAMBO_ALLOC(Ho_plus_Sigma,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),RT_nk,n_sp_pol))
  Ho_plus_Sigma=cZERO
- YAMBO_ALLOC(H_EQ,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),RT_nk,1))
+ YAMBO_ALLOC(H_EQ,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),RT_nk,n_sp_pol))
  H_EQ=cZERO
- YAMBO_ALLOC(dG_lesser,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),RT_nk,1))
+ YAMBO_ALLOC(dG_lesser,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),RT_nk,n_sp_pol,1))
  dG_lesser=cZERO
  !
  if(include_eq_occ) then
-   YAMBO_ALLOC(G_lesser_reference,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),RT_nk))
+   YAMBO_ALLOC(G_lesser_reference,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),RT_nk,n_sp_pol))
    call io_control(ACTION=OP_RD_CL,COM=NONE,SEC=(/1,2/),ID=ID) 
    io_err=io_RT_components('REF',ID)
  endif
  !
- YAMBO_ALLOC(G_ret,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),RT_nk))
+ YAMBO_ALLOC(G_ret,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),RT_nk,n_sp_pol))
  G_ret=cZERO
  !
  ! I need to keep k resolution to interpolate on a path
  !
- YAMBO_ALLOC(F_k_tmtp,(RT_nk,RT_conf%T_n_steps)) ! F(k,t-t') for a given T
+ YAMBO_ALLOC(F_k_tmtp,(RT_nk,n_sp_pol,RT_conf%T_n_steps)) ! F(k,t-t') for a given T
  F_k_tmtp=cZERO
  !
  YAMBO_ALLOC(TMP_M,    (RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),2))
@@ -151,59 +153,51 @@ subroutine RT_G_two_times_build(en,kpt,qpt)
  !
  ! Load rho and H[rho]
  !
-#if defined _PAR_IO
  call io_control(ACTION=OP_RD_CL,COM=NONE,SEC=(/3/),ID=ID)
  io_err=io_RT_components('G_lesser_K_section',ID)
-#else
- do ik = 1, RT_nk
-   IO_ACT=manage_action(OP_IF_START_RD_CL_IF_END,ik,1,RT_nk)
-   call io_control(ACTION=IO_ACT,COM=NONE,SEC=(/ik+1/),ID=ID)
-   io_err=io_RT_components('G_lesser_K_section',ID)
- enddo
-#endif
  !
  ! rho and 1-rho
  !
+ do i_sp=1,n_sp_pol
  do ik=1,RT_nk
    !
-   if(     include_eq_occ) TMP_M(:,:,1)=           -cI*G_lesser_reference(:,:,ik)
+   if(     include_eq_occ) TMP_M(:,:,1)=           -cI*G_lesser_reference(:,:,ik,i_sp)
    if(.not.include_eq_occ) TMP_M(:,:,1)=  cZERO
    !
-   if(     include_eq_occ) TMP_M(:,:,2)=( I1_matrix+cI*G_lesser_reference(:,:,ik) )
-   if(.not.include_eq_occ) TMP_M(:,:,2)=( I1_matrix                               )
+   if(     include_eq_occ) TMP_M(:,:,2)=  I1_matrix+cI*G_lesser_reference(:,:,ik,i_sp)
+   if(.not.include_eq_occ) TMP_M(:,:,2)=  I1_matrix
    !
-   b_rho_T(:,:,ik)= TMP_M(:,:,2)-rho_T(:,:,ik)
-     rho_T(:,:,ik)= TMP_M(:,:,1)+rho_T(:,:,ik)
+   b_rho_T(:,:,ik,i_sp)= TMP_M(:,:,2)-rho_T(:,:,ik,i_sp)
+     rho_T(:,:,ik,i_sp)= TMP_M(:,:,1)+rho_T(:,:,ik,i_sp)
+   !
+   ! Build G_ret
+   !
+   G_ret(:,:,ik,i_sp)=-cI*I1_matrix(:,:)
+   !
+   H_rho_T(:,:,ik,i_sp,3)=H_EQ(:,:,ik,i_sp)+Ho_plus_sigma(:,:,ik,i_sp)
+   H_rho_T(:,:,ik,i_sp,4)=H_EQ(:,:,ik,i_sp)+Ho_plus_sigma(:,:,ik,i_sp)
    !
- enddo
- !
- ! Build G_ret
- !
- do ik=1,RT_nk
-   G_ret(:,:,ik)=-cI*I1_matrix(:,:)
  enddo 
- !
- H_rho_T(:,:,:,3)=H_EQ(:,:,:,1)+Ho_plus_sigma(:,:,:,1)
- H_rho_T(:,:,:,4)=H_EQ(:,:,:,1)+Ho_plus_sigma(:,:,:,1)
- !
+ enddo
  !
  ! Build G_lesser / G_ret / Spectr
  !
+ do i_sp=1,n_sp_pol
  do ik=1,RT_nk
    !
    if(build_G_les) then
-     call M_by_M('n','n',RT_nbands,-G_ret(:,:,ik),rho_T(:,:,ik),cZERO,G_tmp)
-     call M_by_M('n','c',RT_nbands,+rho_T(:,:,ik),G_ret(:,:,ik),cONE ,G_tmp)
+     call M_by_M('n','n',RT_nbands,-G_ret(:,:,ik,i_sp),rho_T(:,:,ik,i_sp),cZERO,G_tmp)
+     call M_by_M('n','c',RT_nbands,+rho_T(:,:,ik,i_sp),G_ret(:,:,ik,i_sp),cONE ,G_tmp)
    endif
    !
    if(build_G_grt) then
-     call M_by_M('n','n',RT_nbands,+G_ret(:,:,ik),b_rho_T(:,:,ik),G_tmp)
-     call M_by_M('n','c',RT_nbands,-b_rho_T(:,:,ik),G_ret(:,:,ik),G_tmp)
+     call M_by_M('n','n',RT_nbands,+G_ret(:,:,ik,i_sp),b_rho_T(:,:,ik,i_sp),G_tmp)
+     call M_by_M('n','c',RT_nbands,-b_rho_T(:,:,ik,i_sp),G_ret(:,:,ik,i_sp),G_tmp)
    endif
    !
-   if(build_G_ret) G_tmp=G_ret(:,:,ik)
-   if(build_G_adv) G_tmp=              hermitian(G_ret(:,:,ik))
-   if(build_Spect) G_tmp=G_ret(:,:,ik)-hermitian(G_ret(:,:,ik))
+   if(build_G_ret) G_tmp=G_ret(:,:,ik,i_sp)
+   if(build_G_adv) G_tmp=                   hermitian(G_ret(:,:,ik,i_sp))
+   if(build_Spect) G_tmp=G_ret(:,:,ik,i_sp)-hermitian(G_ret(:,:,ik,i_sp))
    !
    do ib1=RT_bands(1),RT_bands(2)
      do ib2=RT_bands(1),RT_bands(2)
@@ -213,11 +207,12 @@ subroutine RT_G_two_times_build(en,kpt,qpt)
        if((ib1<=en%nbf(1).and.ib2> en%nbf(1)).and.(.not.keep_vc) ) cycle
        !if ( build_G_ret .and. (ib1> en%nbf(1).and.ib2<=en%nbf(1)).and. keep_cv ) G_tmp(ib1,ib2)=G_tmp(ib1,ib2)*rho_T(ib2,ib1,ik)
        !if ( build_G_ret .and. (ib1<=en%nbf(1).and.ib2> en%nbf(1)).and. keep_vc ) G_tmp(ib1,ib2)=G_tmp(ib1,ib2)*rho_T(ib2,ib1,ik)
-       F_k_tmtp(ik,i_Tc)=F_k_tmtp(ik,i_Tc)+G_tmp(ib1,ib2)
+       F_k_tmtp(ik,i_sp,i_Tc)=F_k_tmtp(ik,i_sp,i_Tc)+G_tmp(ib1,ib2)
      enddo
    enddo
    !
  enddo
+ enddo
  !
  deph_factor=1._SP
  !
@@ -246,19 +241,12 @@ subroutine RT_G_two_times_build(en,kpt,qpt)
      !
      ! Load rho and H[rho]
      !
-#if defined _PAR_IO
      call io_control(ACTION=OP_RD_CL,COM=NONE,SEC=(/3/),ID=ID)
      io_err=io_RT_components('G_lesser_K_section',ID)
-#else
-     do ik = 1, RT_nk 
-       IO_ACT=manage_action(OP_IF_START_RD_CL_IF_END,ik,1,RT_nk)
-       call io_control(ACTION=IO_ACT,COM=NONE,SEC=(/ik+2/),ID=ID)
-       io_err=io_RT_components('G_lesser_K_section',ID)
-     enddo
-#endif
+     !
      ! exp(-abs(RT_time(i_t)*damp_factor))
      !
-     if(.not.l_dephase_rho) H_rho_T(:,:,:,i_kind)=H_EQ(:,:,:,1)+Ho_plus_sigma(:,:,:,1)
+     if(.not.l_dephase_rho) H_rho_T(:,:,:,:,i_kind)=H_EQ+Ho_plus_sigma
      !
      if(l_dephase_rho) then
        !
@@ -267,7 +255,7 @@ subroutine RT_G_two_times_build(en,kpt,qpt)
            deph_factor=1._SP
            if((ib1> en%nbf(1).and.ib2<=en%nbf(1))) deph_factor=exp(-Rho_deph*RT_time(i_T(i_kind)))
            if((ib1<=en%nbf(1).and.ib2> en%nbf(1))) deph_factor=exp(-Rho_deph*RT_time(i_T(i_kind)))
-           H_rho_T(ib1,ib2,:,i_kind)=H_EQ(ib1,ib2,:,1)+Ho_plus_sigma(ib1,ib2,:,1)*deph_factor
+           H_rho_T(ib1,ib2,:,:,i_kind)=H_EQ(ib1,ib2,:,:)+Ho_plus_sigma(ib1,ib2,:,:)*deph_factor
          enddo
        enddo
        !
@@ -275,7 +263,7 @@ subroutine RT_G_two_times_build(en,kpt,qpt)
      !
      if(i_kind==2) then
        !
-       if(.not.l_dephase_rho) rho_T(:,:,:)=-cI*dG_lesser(:,:,:,1)
+       if(.not.l_dephase_rho) rho_T(:,:,:,:)=-cI*dG_lesser(:,:,:,:,1)
        !
        if(l_dephase_rho) then
          !
@@ -284,7 +272,7 @@ subroutine RT_G_two_times_build(en,kpt,qpt)
              deph_factor=1._SP
              if((ib1> en%nbf(1).and.ib2<=en%nbf(1))) deph_factor=exp(-Rho_deph*RT_time(i_T(i_kind)))
              if((ib1<=en%nbf(1).and.ib2> en%nbf(1))) deph_factor=exp(-Rho_deph*RT_time(i_T(i_kind)))
-             rho_T(ib1,ib2,:)=-cI*dG_lesser(ib1,ib2,:,1)*deph_factor
+             rho_T(ib1,ib2,:,:)=-cI*dG_lesser(ib1,ib2,:,:,1)*deph_factor
            enddo
          enddo
          !
@@ -292,18 +280,20 @@ subroutine RT_G_two_times_build(en,kpt,qpt)
        !
        ! rho and 1-rho
        !
+       do i_sp=1,n_sp_pol
        do ik=1,RT_nk
          !
-         if(     include_eq_occ) TMP_M(:,:,1)=           -cI*G_lesser_reference(:,:,ik)
+         if(     include_eq_occ) TMP_M(:,:,1)=           -cI*G_lesser_reference(:,:,ik,i_sp)
          if(.not.include_eq_occ) TMP_M(:,:,1)=  cZERO
          !
-         if(     include_eq_occ) TMP_M(:,:,2)=( I1_matrix+cI*G_lesser_reference(:,:,ik) )
-         if(.not.include_eq_occ) TMP_M(:,:,2)=( I1_matrix                               )
+         if(     include_eq_occ) TMP_M(:,:,2)=  I1_matrix+cI*G_lesser_reference(:,:,ik,i_sp)
+         if(.not.include_eq_occ) TMP_M(:,:,2)=  I1_matrix                               
          !
-         b_rho_T(:,:,ik)= TMP_M(:,:,2)-rho_T(:,:,ik)
-           rho_T(:,:,ik)= TMP_M(:,:,1)+rho_T(:,:,ik)
+         b_rho_T(:,:,ik,i_sp)= TMP_M(:,:,2)-rho_T(:,:,ik,i_sp)
+           rho_T(:,:,ik,i_sp)= TMP_M(:,:,1)+rho_T(:,:,ik,i_sp)
          !
        enddo
+       enddo
        !
      endif
      !
@@ -311,17 +301,20 @@ subroutine RT_G_two_times_build(en,kpt,qpt)
    !
    ! G^{(r)}(t+dt,t'-dt) = e^{-ih_{HSEX}[rho(t+dt)]dt} G^{(r)}(t,t') e^{-ih_{HSEX}[rho(t'-dt)]dt} 
    !
+   do i_sp=1,n_sp_pol
    do ik=1,RT_nk
-     TMP_M(:,:,1)=(H_rho_T(:,:,ik,1)+H_rho_T(:,:,ik,3))/2._SP
-     TMP_M(:,:,2)=(H_rho_T(:,:,ik,2)+H_rho_T(:,:,ik,4))/2._SP
-     call RT_apply_Texp(TMP_M(:,:,1),G_ret(:,:,ik),TMP_M(:,:,2),RT_conf%delta_T,RT_conf%damp_factor,5)  
+     TMP_M(:,:,1)=(H_rho_T(:,:,ik,i_sp,1)+H_rho_T(:,:,ik,i_sp,3))/2._SP
+     TMP_M(:,:,2)=(H_rho_T(:,:,ik,i_sp,2)+H_rho_T(:,:,ik,i_sp,4))/2._SP
+     call RT_apply_Texp(TMP_M(:,:,1),G_ret(:,:,ik,i_sp),TMP_M(:,:,2),RT_conf%delta_T,RT_conf%damp_factor,5)  
+   enddo
    enddo
    !
    ! Store previous H_rho_T
    !
-   H_rho_T(:,:,:,3)=H_rho_T(:,:,:,1)
-   H_rho_T(:,:,:,4)=H_rho_T(:,:,:,2)
+   H_rho_T(:,:,:,:,3)=H_rho_T(:,:,:,:,1)
+   H_rho_T(:,:,:,:,4)=H_rho_T(:,:,:,:,2)
    !
+   do i_sp=1,n_sp_pol
    do ik=1,RT_nk
      !
      if( build_G_les ) then
@@ -329,7 +322,7 @@ subroutine RT_G_two_times_build(en,kpt,qpt)
        ! Build G<(t,t') = -G(r)(t,t') rho(t') for t>t'
        ! PRA 92, 033419 (2015), Eq.(19a)
        !
-       call M_by_M('n','n',RT_nbands,-G_ret(:,:,ik),rho_T(:,:,ik),G_tmp)
+       call M_by_M('n','n',RT_nbands,-G_ret(:,:,ik,i_sp),rho_T(:,:,ik,i_sp),G_tmp)
        !
      endif
      !
@@ -338,13 +331,13 @@ subroutine RT_G_two_times_build(en,kpt,qpt)
        ! Build G>(t,t') = +G(r)(t,t') (1-rho(t')) for t>t'
        ! PRA 92, 033419 (2015), Eq.(19b)
        !
-       call M_by_M('n','n',RT_nbands,+G_ret(:,:,ik),b_rho_T(:,:,ik),G_tmp)
+       call M_by_M('n','n',RT_nbands,+G_ret(:,:,ik,i_sp),b_rho_T(:,:,ik,i_sp),G_tmp)
        !
      endif
      !
-     if( build_G_ret ) G_tmp= G_ret(:,:,ik)
-     if( build_G_adv ) G_tmp=-G_ret(:,:,ik)
-     if( build_Spect ) G_tmp= G_ret(:,:,ik)
+     if( build_G_ret ) G_tmp= G_ret(:,:,ik,i_sp)
+     if( build_G_adv ) G_tmp=-G_ret(:,:,ik,i_sp)
+     if( build_Spect ) G_tmp= G_ret(:,:,ik,i_sp)
      !
      ! PRA 92, 033419 (2015), Below Eq.(19)
      !
@@ -364,13 +357,14 @@ subroutine RT_G_two_times_build(en,kpt,qpt)
          !if ( build_G_adv .and. (ib1> en%nbf(1).and.ib2<=en%nbf(1)).and. keep_cv ) G_tmp(ib2,ib1)=rho_T(ib1,ib2,ik)*G_tmp(ib1,ib2)
          !if ( build_G_adv .and. (ib1<=en%nbf(1).and.ib2> en%nbf(1)).and. keep_vc ) G_tmp(ib2,ib1)=rho_T(ib1,ib2,ik)*G_tmp(ib1,ib2)
          !
-         if( .not.build_G_adv ) F_k_tmtp(ik,i_T(1))=F_k_tmtp(ik,i_T(1))+      G_tmp(ib1,ib2)  ! G_adv is zero for t>t'
-         if( .not.build_G_ret ) F_k_tmtp(ik,i_T(2))=F_k_tmtp(ik,i_T(2))-conjg(G_tmp(ib2,ib1)) ! G_ret is zero for t<t'
+         if( .not.build_G_adv ) F_k_tmtp(ik,i_sp,i_T(1))=F_k_tmtp(ik,i_sp,i_T(1))+      G_tmp(ib1,ib2)  ! G_adv is zero for t>t'
+         if( .not.build_G_ret ) F_k_tmtp(ik,i_sp,i_T(2))=F_k_tmtp(ik,i_sp,i_T(2))-conjg(G_tmp(ib2,ib1)) ! G_ret is zero for t<t'
          !
        enddo !ib1
      enddo   !ib2
      !
    enddo     !ik
+   enddo     !i_sp_pol
    !
    call live_timing(steps=1)
    !
@@ -401,8 +395,10 @@ subroutine RT_G_two_times_build(en,kpt,qpt)
  !
  call RT_1D_Fourier_setup(GreenF_T_and_W,RT_conf,RT_nk) 
  !
+ do i_sp=1,n_sp_pol
  do ik=1,RT_nk
-   GreenF_T_and_W%F_T(:,ik)=cmplx(F_k_tmtp(ik,:),kind=DP)
+   GreenF_T_and_W%F_T(:,ik+RT_nk*(i_sp-1))=cmplx(F_k_tmtp(ik,i_sp,:),kind=DP)
+ enddo
  enddo
  !
  YAMBO_FREE(F_k_tmtp)
@@ -413,59 +409,71 @@ subroutine RT_G_two_times_build(en,kpt,qpt)
  !
  ! Print G(w) integrated over k
  !
- file_name="G_w_integrated"
- call of_open_close(trim(file_name),'ot')
- headings=(/" E[eV]  ","Im[G(w)]","Re[G(w)]"/)
- call msg('o '//trim(file_name),"#",headings,INDENT=0,USE_TABS=.true.)
- !
  YAMBO_ALLOC(G_w,(RT_conf%W_n_steps))
  !
- G_w=cZERO
- do ik_RT=1,RT_nk
-   G_w(:)=G_w(:)+GreenF_T_and_W%F_W(:,ik_RT)*RTibz%weights(ik_RT)
- enddo
- !
- do i_w=1,RT_conf%W_n_steps
-   odata=(/real(GreenF_T_and_W%W(i_w),SP)*HA2EV,real(-cI*G_w(i_w),SP),real(G_w(i_w),SP)/)
-   call msg('o '//trim(file_name),'',odata,INDENT=-2,USE_TABS=.true.)
+ do i_sp=1,n_sp_pol
+   !
+   file_name="G_w_integrated"
+   if (n_sp_pol>1) file_name=trim(file_name)//"_sp"//trim(intc(i_sp))
+   call of_open_close(trim(file_name),'ot')
+   headings=(/" E[eV]  ","Im[G(w)]","Re[G(w)]"/)
+   call msg('o '//trim(file_name),"#",headings,INDENT=0,USE_TABS=.true.)
+   !
+   G_w=cZERO
+   do ik_RT=1,RT_nk
+     G_w(:)=G_w(:)+GreenF_T_and_W%F_W(:,ik_RT+RT_nk*(i_sp-1))*RTibz%weights(ik_RT)
+   enddo
+   !
+   do i_w=1,RT_conf%W_n_steps
+     odata=(/real(GreenF_T_and_W%W(i_w),SP)*HA2EV,real(-cI*G_w(i_w),SP),real(G_w(i_w),SP)/)
+     call msg('o '//trim(file_name),'',odata,INDENT=-2,USE_TABS=.true.)
+   enddo
+   !
+   call of_open_close(trim(file_name))
+   !
  enddo
  !
- call of_open_close(trim(file_name))
- !
  YAMBO_FREE(G_w)
  !
  ! Print G(w,k) for specific k  (only gamma for now)
  !
- file_name="G_k1_w"
- call of_open_close(trim(file_name),'ot')
- headings=(/" E[eV]  ","Im[G(w)]","Re[G(w)]"/)
- call msg('o '//trim(file_name),"#",headings,INDENT=0,USE_TABS=.true.)
- !
- do i_w=1,RT_conf%W_n_steps
-   odata=(/real(GreenF_T_and_W%W(i_w),SP)*HA2EV,    &
-   &      -real(cI*GreenF_T_and_W%F_W(i_w,1),SP),   &
-   &       real(   GreenF_T_and_W%F_W(i_w,1),SP)     /)
-   call msg('o '//trim(file_name),'',odata,INDENT=-2,USE_TABS=.true.)
+ do i_sp=1,n_sp_pol
+  !
+   file_name="G_k1_w"
+   if (n_sp_pol>1) file_name="G_k1_sp"//trim(intc(i_sp))//"_w"
+   call of_open_close(trim(file_name),'ot')
+   headings=(/" E[eV]  ","Im[G(w)]","Re[G(w)]"/)
+   call msg('o '//trim(file_name),"#",headings,INDENT=0,USE_TABS=.true.)
+   !
+   do i_w=1,RT_conf%W_n_steps
+     odata=(/real(GreenF_T_and_W%W(i_w),SP)*HA2EV,    &
+     &      -real(cI*GreenF_T_and_W%F_W(i_w,1+RT_nk*(i_sp-1)),SP),   &
+     &       real(   GreenF_T_and_W%F_W(i_w,1+RT_nk*(i_sp-1)),SP)     /)
+     call msg('o '//trim(file_name),'',odata,INDENT=-2,USE_TABS=.true.)
+   enddo
+   !
+   call of_open_close(trim(file_name))
+   !
  enddo
  !
- call of_open_close(trim(file_name))
- !
  ! DEBUG <
+ !do i_sp=1,n_sp_pol
  !do ik=1,RT_nk
  !  !
  !  if(ik>99) cycle
  !  !
  !  do i_dT=1,RT_conf%T_n_steps
  !    write(100+ik,*) RT_time(i_dT)/FS2AUT,&
- !    &  real(GreenF_T_and_W%F_T(i_dT,ik),SP),real(-cI*GreenF_T_and_W%F_T(i_dT,ik),SP)
+ !    &  real(GreenF_T_and_W%F_T(i_dT,ik+RT_nk*(i_sp-1)),SP),real(-cI*GreenF_T_and_W%F_T(i_dT,ik+RT_nk*(i_sp-1)),SP)
  !  enddo
  !  !
  !  do i_w=1,RT_conf%W_n_steps
  !    write(200+ik,*) real(GreenF_T_and_W%W(i_w))*HA2EV,&
- !    & real(-cI*GreenF_T_and_W%F_W(i_w,ik),SP),real(GreenF_T_and_W%F_W(i_w,ik),SP)
+ !    & real(-cI*GreenF_T_and_W%F_W(i_w,ik+RT_nk*(i_sp-1)),SP),real(GreenF_T_and_W%F_W(i_w,ik+RT_nk*(i_sp-1)),SP)
  !  enddo
  !  !
  !enddo
+ !enddo
  ! DEBUG >
  !
 end subroutine RT_G_two_times_build
diff --git a/ypp/real_time/RT_G_two_times_interp_and_plot.F b/ypp/real_time/RT_G_two_times_interp_and_plot.F
index c8d3d3ba84..503ed91e9d 100644
--- a/ypp/real_time/RT_G_two_times_interp_and_plot.F
+++ b/ypp/real_time/RT_G_two_times_interp_and_plot.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine RT_G_two_times_interp_and_plot(en,kpt,qpt)
  !
  use pars,           ONLY:SP
@@ -17,8 +21,9 @@ subroutine RT_G_two_times_interp_and_plot(en,kpt,qpt)
  use R_lattice,      ONLY:bz_samp
  use D_lattice,      ONLY:lattice
  use interpolate,    ONLY:INTERPOLATION_driver_seed,INTERPOLATION_driver_do
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)  :: en
  type(bz_samp) :: kpt,qpt,RT_k
diff --git a/ypp/real_time/RT_OBSERVABLES_IO.F b/ypp/real_time/RT_OBSERVABLES_IO.F
index b25c2a6604..ead5ceb4df 100644
--- a/ypp/real_time/RT_OBSERVABLES_IO.F
+++ b/ypp/real_time/RT_OBSERVABLES_IO.F
@@ -5,18 +5,24 @@
 !
 ! Authors (see AUTHORS file for details): AM CA DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine RT_OBSERVABLES_IO(what,Curr,Pol,IO_error,first_sec_only,G_lesser_only,COM_)
  !
  use YPP_real_time,  ONLY:RT_conf,OBS_el
  use pars,           ONLY:DP,cZERO_DP,cZERO
  use R_lattice,      ONLY:nkibz
+ use electrons,      ONLY:n_sp_pol
  use IO_int,         ONLY:io_control
  use IO_m,           ONLY:OP_RD,RD_CL,OP_RD_CL,manage_action,NONE,DUMP,LOG,OP_IF_START_RD_CL_IF_END
  use RT_control,     ONLY:RT_control_alloc,RT_control_free,J_cache,P_cache,CACHE_OBS_steps,CACHE_OBS_steps_now
  use real_time,      ONLY:NE_steps,NE_i_time,G_lesser,G_lesser_reference,RT_bands,RT_dyn_step,RT_step
  use fields,         ONLY:Efield,n_ext_fields,Efield_strength
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  character(*)           :: what
  type(OBS_el), optional :: Curr,Pol
@@ -37,9 +43,9 @@ subroutine RT_OBSERVABLES_IO(what,Curr,Pol,IO_error,first_sec_only,G_lesser_only
    !
    if (G_lesser_only) then
      !
-     YAMBO_ALLOC(G_lesser,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),nkibz,1))
+     YAMBO_ALLOC(G_lesser,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),nkibz,n_sp_pol,1))
      !
-     YAMBO_ALLOC(G_lesser_reference,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),nkibz))
+     YAMBO_ALLOC(G_lesser_reference,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),nkibz,n_sp_pol))
      !
      call io_control(ACTION=OP_RD_CL,COM=COM_here,MODE=DUMP,SEC=(/1,2/),ID=ID) 
      io_G=io_RT_components('G_lesser_RESTART',ID)
@@ -47,11 +53,8 @@ subroutine RT_OBSERVABLES_IO(what,Curr,Pol,IO_error,first_sec_only,G_lesser_only
      if (present(IO_error)) IO_error=io_G/=0
      if (io_G/=0) return
      !
-     do ik=1,nkibz
-       IO_ACT=manage_action(OP_IF_START_RD_CL_IF_END,ik,1,nkibz)
-       call io_control(ACTION=IO_ACT,COM=COM_here,SEC=(/ik+2/),MODE=DUMP,ID=ID)
-       io_G=io_RT_components('G_lesser_RESTART_K_section',ID)
-     enddo
+     call io_control(ACTION=OP_RD_CL,COM=COM_here,SEC=(/3/),MODE=DUMP,ID=ID)
+     io_G=io_RT_components('G_lesser_RESTART_K_section',ID)
      !
    endif
    !
diff --git a/ypp/real_time/RT_OBSERVABLES_IO_path.F b/ypp/real_time/RT_OBSERVABLES_IO_path.F
index a0713c7aa9..5e9aad6e7d 100644
--- a/ypp/real_time/RT_OBSERVABLES_IO_path.F
+++ b/ypp/real_time/RT_OBSERVABLES_IO_path.F
@@ -9,7 +9,7 @@ subroutine RT_OBSERVABLES_IO_paths(what,en)
  !
  use YPP_interfaces, ONLY:RT_OBSERVABLES_IO
  use YPP_real_time,  ONLY:Probe_Keyword,Pump_Keyword,max_n_of_paths,Pump_path,Probe_path,&
-&                         N_pumps,N_probes,N_probe_frequencies
+&                         N_pumps,N_probes,N_probe_frequencies,X_order
  use RT_output_m,    ONLY:RT_desc
  use com,            ONLY:core_io_path,msg,jobstr
  use IO_m,           ONLY:NONE
@@ -57,12 +57,13 @@ subroutine RT_OBSERVABLES_IO_paths(what,en)
    ! Remove "." and ".."
    !
    i_f=1
-   do while (i_f<nf)
+   do while (i_f<=nf)
      if ( trim(Probe_path(i_f)) == "." .or. trim(Probe_path(i_f)) == ".." ) then
        forall (i_fp=i_f+1:nf) Probe_path(i_fp-1)=Probe_path(i_fp)
        nf=nf-1
+     else
+       i_f=i_f+1
      endif
-     i_f=i_f+1
    enddo
    !
    Pump_path="none"
@@ -81,7 +82,9 @@ subroutine RT_OBSERVABLES_IO_paths(what,en)
 &         (trim(Probe_Keyword)=="none".and.index(trim(Probe_path(i_f)),trim(jobstr))>0)) then
          N_probes =N_probes+1
          i_f=i_f+1
+         cycle
        else if ( index(trim(Probe_path(i_f)),trim(Pump_Keyword))>0 ) then
+         call msg('s',' Candidate pump found in ',trim(Probe_path(i_f)))
          Pump_path=Probe_path(i_f)
          N_pumps=1
        endif
@@ -110,11 +113,12 @@ subroutine RT_OBSERVABLES_IO_paths(what,en)
      call IO_temporary_jobdir_and_strings("JOBSTR",Pump_path)
      call RT_OBSERVABLES_IO(what=what,IO_error=IO_error,first_sec_only=.TRUE.,COM_=NONE)
      if (.not.IO_error) then
-       N_pump_frequencies= Efield(i_Pump)%n_frequencies
+       N_pump_frequencies=1
        call IO_desc_reset(Pump_RT_desc)
        call IO_desc_duplicate(RT_desc,Pump_RT_desc)
        if (N_pump_frequencies>1) call error('Pumps with multiple frequencies still not supported')
      else
+       call warning('Pump discarded due to IO_error in RT_OBSERVABLES') 
        N_pumps=0
      endif
    endif
@@ -125,20 +129,26 @@ subroutine RT_OBSERVABLES_IO_paths(what,en)
      if (.not.STRING_same(Probe_path(i_f),"default")) call IO_temporary_jobdir_and_strings("JOBSTR",Probe_path(i_f))
      if (     STRING_same(Probe_path(i_f),"default")) call IO_temporary_jobdir_and_strings("ALL","RESTORE")
      call RT_OBSERVABLES_IO(what=what,IO_error=IO_error,first_sec_only=.TRUE.,COM_=NONE)
-     N_probe_frequencies= Efield(i_Probe)%n_frequencies
+     N_probe_frequencies=1
    enddo
    !
    ! Pump & Probe comparison
    !
    if (N_pumps>0) then
-     call DESC_compare(RT_desc,Pump_RT_desc,i_desc_err,exclude="FIELDs")
-     if (any(i_desc_err/=0)) call error("Probe and Pump runs are not compatible")
+     !call DESC_compare(RT_desc,Pump_RT_desc,i_desc_err,exclude="FIELDs")
+     !if (any(i_desc_err/=0)) call warning("Probe and Pump runs differ")
      if (n_ext_fields/=2) call warning("Probe run has only one field. Pump ignored")
    endif
    !
    ! In case of several probe_frequencies only one probe is supported
    !
-   if (N_probes>1.and.N_probe_frequencies>1) N_probes=0
+   if (N_probes>1.and.N_probe_frequencies>1) then
+     if (X_order==1) call warning("Using multi-frequency probe")
+     if (X_order/=1) then
+       call warning("Multi-frequency probe not supported for NL optics")
+       N_probes=0
+     endif
+   endif
    !
    call IO_temporary_jobdir_and_strings("ALL","RESTORE")
    !
@@ -158,7 +168,6 @@ subroutine RT_OBSERVABLES_IO_paths(what,en)
    ! I can use ANTIRES/RES probes with multiple frequencies only by using
    ! ProbeKey in the input file. This is because of the procedures defined in X_effective/X_inversion.
    !
-   if (Efield(i_Probe)%n_frequencies>1) IO_error=.TRUE.
    if (.not.IO_error)                   N_probes=1
    !
    ! If no Probes using the Keyword try using jobstr
diff --git a/ypp/real_time/RT_OBSERVABLES_damp_and_write.F b/ypp/real_time/RT_OBSERVABLES_damp_and_write.F
index 4cf74d2530..de5ff8640b 100644
--- a/ypp/real_time/RT_OBSERVABLES_damp_and_write.F
+++ b/ypp/real_time/RT_OBSERVABLES_damp_and_write.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM CA DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine RT_OBSERVABLES_damp_and_write(OBS,RT,IO_skip)
  !
  use YPP_real_time,  ONLY:l_force_min_damp,RT_time,OBS_el,RT_ctl
@@ -12,9 +16,11 @@ subroutine RT_OBSERVABLES_damp_and_write(OBS,RT,IO_skip)
  use pars,           ONLY:pi,SP
  use units,          ONLY:AUT2FS
  use RT_control,     ONLY:RT_output
+ use fields,         ONLY:A_ext
  use real_time,      ONLY:RT_ind_J,RT_P
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(OBS_el) :: OBS
  type(RT_ctl) :: RT
@@ -59,6 +65,7 @@ subroutine RT_OBSERVABLES_damp_and_write(OBS,RT,IO_skip)
  do i_t=1,RT%T_n_steps
    if (STRING_match(OBS%title,"polarization")) RT_P=OBS%F_t(i_t,1:3)
    if (STRING_match(OBS%title,"current")     ) RT_ind_J=OBS%F_t(i_t,1:3)
+   if (STRING_match(OBS%title,"Field")       ) A_ext%vecpot_vel(1)=OBS%F_t(i_t,1)
    call RT_output(trim(OBS%title),TIME=RT_time(i_t)*AUT2FS)
  enddo
  call RT_output(what="close "//trim(OBS%title))
diff --git a/ypp/real_time/RT_Polarization.F b/ypp/real_time/RT_Polarization.F
index c2385c555d..cb4fa6c96b 100644
--- a/ypp/real_time/RT_Polarization.F
+++ b/ypp/real_time/RT_Polarization.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM 
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine RT_Polarization(en,k,q)
  !
  use pars,           ONLY:cZERO,SP,schlen,rZERO
@@ -24,8 +28,9 @@ subroutine RT_Polarization(en,k,q)
  use parallel_int,   ONLY:PP_redux_wait
  use DIPOLES,        ONLY:DIP_iR
  use functions,      ONLY:Fermi_fnc_derivative
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)  :: en
  type(bz_samp) :: k,q
@@ -70,7 +75,7 @@ subroutine RT_Polarization(en,k,q)
  ! Alloc
  !-------
  if (STRING_same(RT_pol_mode,"slice")) then
-   YAMBO_ALLOC(dG_lesser,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),RT_nk,1))
+   YAMBO_ALLOC(dG_lesser,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),RT_nk,n_sp_pol,1))
    dG_lesser=cZERO
    !
    ! Time conf
@@ -99,16 +104,8 @@ subroutine RT_Polarization(en,k,q)
      !
      call gimme_a_G_Time_Point(i_T)
      !
-#if defined _PAR_IO
      call io_control(ACTION=OP_RD_CL,COM=NONE,SEC=(/3/),ID=ID)
      io_err=io_RT_components('G_lesser',ID)
-#else
-     do ik = 1, RT_nk 
-       IO_ACT=manage_action(OP_IF_START_RD_CL_IF_END,ik,1,RT_nk)
-       call io_control(ACTION=IO_ACT,COM=NONE,SEC=(/ik+2/),ID=ID)
-       io_err=io_RT_components('G_lesser',ID)
-     enddo
-#endif
      !
      RT_P=cZERO
      !
@@ -143,7 +140,7 @@ subroutine RT_Polarization(en,k,q)
              enddo
            endif
            if (STRING_same(RT_pol_mode,"slice")) then
-             RT_P=RT_P+DIP_iR(ic,i_n,i_m,ik_mem,i_sp_pol)*dG_lesser(i_m,i_n,ik,1)/DL_vol*k%weights(ik)
+             RT_P=RT_P+DIP_iR(ic,i_n,i_m,ik_mem,i_sp_pol)*dG_lesser(i_m,i_n,ik,i_sp_pol,1)/DL_vol*k%weights(ik)
            endif
          enddo K_LOOP
        enddo
diff --git a/ypp/real_time/RT_TRabs_driver.F b/ypp/real_time/RT_TRabs_driver.F
index ca5de6ac7d..b0d1c3f5b9 100644
--- a/ypp/real_time/RT_TRabs_driver.F
+++ b/ypp/real_time/RT_TRabs_driver.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): DS AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine RT_transient_absorption(Xen,Xk,Xq)
  !
  use drivers,         ONLY:l_rt_carriers_in_use
@@ -28,8 +32,9 @@ subroutine RT_transient_absorption(Xen,Xk,Xq)
  use stderr,          ONLY:STRING_match,STRING_same,STRING_remove
  use com,             ONLY:msg
  use interpolate,     ONLY:INTERPOLATE_is_quiet
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)      :: Xen
  type(bz_samp)     :: Xk,Xq
diff --git a/ypp/real_time/RT_TRabs_output.F b/ypp/real_time/RT_TRabs_output.F
index 884e845e6b..91538c8323 100644
--- a/ypp/real_time/RT_TRabs_output.F
+++ b/ypp/real_time/RT_TRabs_output.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): DS AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine RT_TRabs_output(what,how,ofile,N_files,W,TIME)
  !
  use pars,            ONLY:SP,schlen,pi,cI
@@ -18,8 +22,9 @@ subroutine RT_TRabs_output(what,how,ofile,N_files,W,TIME)
  use BS,              ONLY:BS_K_dim
  use BS_solvers,      ONLY:BSS_desc
  use stderr,          ONLY:STRING_match,set_real_printed_length
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  character(*)      :: what,how
  character(schlen) :: ofile(20)
diff --git a/ypp/real_time/RT_TRabs_residuals.F b/ypp/real_time/RT_TRabs_residuals.F
index d91a804473..ff1ff87a42 100644
--- a/ypp/real_time/RT_TRabs_residuals.F
+++ b/ypp/real_time/RT_TRabs_residuals.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): DS AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine RT_TRabs_residuals(what,N_trans,N_dirs,DIP_dir,Xen,Xk,nb,l_EQ)
  !
  use pars,            ONLY:SP,pi,cI
@@ -17,8 +21,9 @@ subroutine RT_TRabs_residuals(what,N_trans,N_dirs,DIP_dir,Xen,Xk,nb,l_EQ)
  use YPPm,            ONLY:BS_E
  use real_time,       ONLY:RT_bands
  use parallel_m,      ONLY:PAR_IND_DIPk_ibz,PAR_IND_VAL_BANDS_DIP,PAR_IND_CON_BANDS_DIP
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  character(*)      :: what
  integer           :: N_trans,nb(2),N_dirs
diff --git a/ypp/real_time/RT_X_LRR_real_field.F b/ypp/real_time/RT_X_LRR_real_field.F
index 20429d3f18..64b83823bb 100644
--- a/ypp/real_time/RT_X_LRR_real_field.F
+++ b/ypp/real_time/RT_X_LRR_real_field.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM CA DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 ! Partialy inspired from Octopus v. 3.1.0
 !
 ! Copyright (C) 2002 M. Marques, A. Castro, A. Rubio, G. Bertsch
@@ -12,28 +16,30 @@
 subroutine RT_X_LRR_real_field(en)
  !
  use YPP_real_time,  ONLY:RT_conf,Use_FFT,l_force_min_damp,RT_time,Pol,Curr,ElField,&
-&                         OBS_el_free,X_kind,N_pumps
+&                         OBS_el_free,X_kind,N_pumps,l_skip_OBS_IO
  use pars,           ONLY:SP,pi,cI,cZERO,schlen
  use units,          ONLY:HA2EV
  use electrons,      ONLY:levels
  use vec_operate,    ONLY:v_norm
  use RT_control,     ONLY:RT_output
  use real_time,      ONLY:l_RT_induced_field
- use fields,         ONLY:Efield,small_a_frequency,small_a
+ use fields,         ONLY:Efield
+ use fields_int,     ONLY:small_a_frequency,small_a
  use com,            ONLY:of_open_close,msg
  use functions,      ONLY:theta_function
  use parser_m,       ONLY:parser
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)  :: en
  !
  ! Work Space
  !
- integer      :: i_t,i_w,i_p_dir,iE_err,i_Probe,i_field,n_fields,NW
+ integer      :: i_t,i_w,i_p_dir,iE_err,i_Probe,i_field,n_fields,n_freqs,NW
  logical      :: l_rm_asympt_limit
  real(SP)     :: W_reached,Field_time,outdata(6),E_ver(3,3),Fac
- complex(SP)  :: OBS_proj,OBS_vec(3),E_w_SP(3),Field,E_of_t(2),A_of_t(2),theta,delta,Field_freq
+ complex(SP)  :: OBS_proj,OBS_vec(3),E_w_SP(3),Field,E_ver_cmplx(3,3),E_of_t(2),A_of_t(2),theta,delta,Field_freq
  complex(SP), allocatable :: EELS(:,:),EPS(:,:)
  !
 #if defined _YPP_NL
@@ -46,18 +52,10 @@ subroutine RT_X_LRR_real_field(en)
  call parser('RmAsymLim',l_rm_asympt_limit)
  !
  i_Probe=1
- if(trim(Efield(i_Probe)%ef_pol)=="linear"  ) n_fields=1
+ n_freqs=1
+ n_fields=1
  if(trim(Efield(i_Probe)%ef_pol)=="circular") n_fields=2
  !
- ! Pump removal (if any)
- !-----------------------
- if (N_pumps==1) then
-   do i_t=1,RT_conf%T_n_steps
-     Curr(i_Probe)%F_t(i_t,:)=Curr(i_Probe)%F_t(i_t,:)-Curr(i_Probe+1)%F_t(i_t,:)
-     Pol(i_Probe)%F_t(i_t,:)=Pol(i_Probe)%F_t(i_t,:)-Pol(i_Probe+1)%F_t(i_t,:)
-   enddo
- endif
- !
  ! 1D FFT setup
  !--------------
  call RT_1D_Fourier_setup(ElField,RT_conf,n_fields) 
@@ -88,10 +86,13 @@ subroutine RT_X_LRR_real_field(en)
  do i_t=1,RT_conf%T_n_steps
    !
    Field_time=RT_time(i_t)-Efield(i_Probe)%t_initial
-   A_of_t =small_a(Field_time,Efield(i_Probe),0)
-   E_of_t =small_a(Field_time,Efield(i_Probe),1)
-   theta  =theta_function(Field_time,RT_time(2)-RT_time(1),0)
-   delta  =theta_function(Field_time,RT_time(2)-RT_time(1),1)
+   !
+   if (index(Efield(i_Probe)%ef_name,"FROM_FILE")>0 .and. i_t>Efield(i_Probe)%t_final_indx+2) cycle
+   !
+   A_of_t =small_a(Field_time,RT_conf%delta_T,Efield(i_Probe),0)
+   E_of_t =small_a(Field_time,RT_conf%delta_T,Efield(i_Probe),1)
+   theta  =theta_function(Field_time,RT_conf%delta_T,0)
+   delta  =theta_function(Field_time,RT_conf%delta_T,1)
    !
    do i_field=1,n_fields
      Fac=Efield(i_Probe)%amplitude
@@ -104,12 +105,12 @@ subroutine RT_X_LRR_real_field(en)
  ! ... FT
  !--------
  !
- call RT_OBSERVABLES_damp_and_write(ElField,RT_conf,.TRUE.)
+ call RT_OBSERVABLES_damp_and_write(ElField,RT_conf,(l_skip_OBS_IO.or.N_pumps==0))
  !
  !... Analytic ...
  !
  iE_err=-1
- if (n_fields==1.and.trim(X_kind)=="rhorho") then
+ if (n_fields==1.and.n_freqs==1.and.trim(X_kind)=="rhorho") then
    do i_w = 1, RT_conf%W_n_steps
      !
      if(trim(RT_conf%damping) /= "NONE")                      Field_freq=ElField%W(i_w)-cI*RT_conf%damp_factor
@@ -222,13 +223,15 @@ subroutine RT_X_LRR_real_field(en)
      E_ver(:,1)=abs(E_w_SP/v_norm(abs(E_w_SP)))
    endif
    !
+   E_ver_cmplx=cmplx(E_ver,kind=SP)
+   !
    do i_p_dir=1,3
      !
      if(trim(X_kind)=="rhorho") OBS_vec=cmplx( Pol(1)%F_w(i_w,:),kind=SP)
      if(trim(X_kind)=="jj")     OBS_vec=cmplx(Curr(1)%F_w(i_w,:),kind=SP)/ElField%W(i_w)**2
      !
-     OBS_proj=dot_product(E_ver(:,i_p_dir),OBS_vec)
-     if(Efield(i_Probe)%ef_pol=="circular") Field=dot_product(E_ver(:,i_p_dir),E_w_SP )
+     OBS_proj=dot_product(E_ver_cmplx(:,i_p_dir),OBS_vec)
+     if(Efield(i_Probe)%ef_pol=="circular") Field=dot_product(E_ver_cmplx(:,i_p_dir),E_w_SP )
      !
      if(     l_RT_induced_field) EELS(i_w,i_p_dir) = 4._SP*pi*OBS_proj/Field
      if(.not.l_RT_induced_field) EPS( i_w,i_p_dir) = 4._SP*pi*OBS_proj/Field
diff --git a/ypp/real_time/RT_X_effective.F b/ypp/real_time/RT_X_effective.F
index 81d9a9a261..368da1bf94 100644
--- a/ypp/real_time/RT_X_effective.F
+++ b/ypp/real_time/RT_X_effective.F
@@ -5,9 +5,13 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine RT_X_effective(en)
  !
- use pars,           ONLY:SP,lchlen,pi,schlen,cZERO
+ use pars,           ONLY:SP,lchlen,pi,schlen,cZERO,n_fields_defs_max
  use YPP_real_time,  ONLY:RT_P_t,Pump_path,N_probes,Probe_path,RT_IO_J_and_P,&
 &                         max_n_of_paths,Use_FFT,RT_conf,N_pumps,N_probe_frequencies,X_order,&
 &                         l_RT_X_inversion,l_skip_pol_and_curr_IO
@@ -22,13 +26,14 @@ subroutine RT_X_effective(en)
  use vec_operate,    ONLY:sort
  use LIVE_t,         ONLY:live_timing
  use stderr,         ONLY:intc,real2ch,STRING_split
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)  :: en
  !
  character(lchlen) :: jobstr_save
- character(schlen) :: X_file_name,field_defs(3)
+ character(schlen) :: X_file_name,field_defs(n_fields_defs_max)
  integer           :: i_f,i_fp,f_indx(max_n_of_paths),i_order,min_X_order,i_c,i_t,i_conv,&
 &                     i_p,N_periods,i_Pump,i_Probe
  real(SP)          :: Pump_Frequency(1),Unit_of_Measure
diff --git a/ypp/real_time/RT_X_inversion.F b/ypp/real_time/RT_X_inversion.F
index 6396dde12f..b2f1c84998 100644
--- a/ypp/real_time/RT_X_inversion.F
+++ b/ypp/real_time/RT_X_inversion.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM MG
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine RT_X_inversion(RT_P_Probe,Susceptibility,Harmonic_Frequency,X_effective)
  !
  use pars,           ONLY:SP,pi
@@ -15,7 +19,9 @@ subroutine RT_X_inversion(RT_P_Probe,Susceptibility,Harmonic_Frequency,X_effecti
  use units,          ONLY:AUT2FS
  use com,            ONLY:msg
  use LIVE_t,         ONLY:live_timing
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  real(SP),     intent(in)    :: RT_P_probe(RT_conf%T_n_steps,N_probes,3),Harmonic_Frequency(0:X_order,N_probe_frequencies)
  complex(SP),  intent(out)   :: Susceptibility(0:X_order,N_probe_frequencies),X_effective(0:X_order,N_probe_frequencies,3)
diff --git a/ypp/real_time/RT_X_qssin.F b/ypp/real_time/RT_X_qssin.F
index efa2f2de9d..53a15a1c35 100644
--- a/ypp/real_time/RT_X_qssin.F
+++ b/ypp/real_time/RT_X_qssin.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM CA MG
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine RT_X_qssin
  !
  use YPP_real_time,  ONLY:RT_P_t,RT_conf,Use_FFT,i_t_start,i_t_end
@@ -12,11 +16,13 @@ subroutine RT_X_qssin
  use units,          ONLY:HA2EV,SVCMm12VMm1,AU2VMm1
  use com,            ONLY:error
  use RT_control,     ONLY:RT_output
- use fields,         ONLY:Efield,small_a
+ use fields,         ONLY:Efield
+ use fields_int,     ONLY:small_a
  use functions,      ONLY:theta_function
  use LIVE_t,         ONLY:live_timing
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  ! Work Space
  !
@@ -48,8 +54,8 @@ subroutine RT_X_qssin
  YAMBO_ALLOC(E_t,(RT_conf%T_n_steps))
  do i_t=1,RT_conf%T_n_steps
    Field_time=(i_t-1)*RT_conf%delta_T-Efield(i_Probe)%t_initial
-   A_of_t =small_a(Field_time,Efield(i_Probe),0)
-   E_of_t =small_a(Field_time,Efield(i_Probe),1)
+   A_of_t =small_a(Field_time,RT_conf%delta_T,Efield(i_Probe),0)
+   E_of_t =small_a(Field_time,RT_conf%delta_T,Efield(i_Probe),1)
    theta  =theta_function(Field_time,RT_conf%delta_T,0)
    delta  =theta_function(Field_time,RT_conf%delta_T,1)
    select case(Efield(i_Probe)%ef_pol)
diff --git a/ypp/real_time/RT_X_response.F b/ypp/real_time/RT_X_response.F
index 8d28ea74b7..fb9a78ed0a 100644
--- a/ypp/real_time/RT_X_response.F
+++ b/ypp/real_time/RT_X_response.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM CA
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine RT_X_response(en,k,q)
  !
  use YPP_real_time,  ONLY:Use_FFT,Pol,Curr,N_probe_frequencies,&
@@ -12,14 +16,15 @@ subroutine RT_X_response(en,k,q)
 &                         l_RT_X_inversion,Probe_path,Pump_path,N_probes,RT_conf,l_skip_OBS_IO
  use YPP_interfaces, ONLY:RT_OBSERVABLES_IO
  use RT_control,     ONLY:OBS_RT_IO_t
- use pars,           ONLY:schlen
+ use pars,           ONLY:schlen,n_fields_defs_max
  use stderr,         ONLY:STRING_split,STRING_same
  use R_lattice,      ONLY:bz_samp
  use electrons,      ONLY:levels
  use fields,         ONLY:Efield
  use parser_m,       ONLY:parser
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)  :: en
  type(bz_samp) :: k,q
@@ -27,7 +32,7 @@ subroutine RT_X_response(en,k,q)
  ! Work Space
  !
  integer           :: Ndefs,i_o
- character(schlen) :: probe_defs(3)
+ character(schlen) :: probe_defs(n_fields_defs_max)
  logical           :: l_RT_LRR_real_field,l_RT_X_effective,l_RT_SHG_qssin,IO_error
  !
  call RT_OBSERVABLES_IO_paths('JP',en)
@@ -36,7 +41,12 @@ subroutine RT_X_response(en,k,q)
  !=======================================
  call STRING_split(Efield(1)%ef_name,probe_defs,n_non_empty_strings=Ndefs)
  !
- l_RT_LRR_real_field= all( (/ Ndefs==1 ,N_pumps<=1, N_probes==1, X_order==1/) )
+ if ( trim(probe_defs(1))=='FROM_FILE') then
+   call RT_load_field_from_file_init(probe_defs(2),probe_defs(3),1)
+   call RT_load_field_from_file(probe_defs(2),probe_defs(3),1)
+ endif
+ !
+ l_RT_LRR_real_field= all( (/ N_pumps<=1, N_probes==1, X_order==1/) )
  !
  l_RT_SHG_qssin     = all( (/ trim(probe_defs(1))=="QSSIN" , N_pumps==0, N_probes==1, X_order==2/) )
  !
@@ -58,38 +68,67 @@ subroutine RT_X_response(en,k,q)
    !--------------------------------------------------
    ! Database
    !
-   allocate(Pol(N_pumps+N_probes))
-   allocate(Curr(N_pumps+N_probes))
+   allocate(Pol(N_pumps+N_probes+1))
+   allocate(Curr(N_pumps+N_probes+1))
    !
    call IO_temporary_jobdir_and_strings("ALL","SAVE")
    !
-   if (N_pumps==0) then
-     Pol(1)%title ="polarization"
-     Curr(1)%title="current"
-   else
-     Pol(1)%title ="probe-polarization"
-     Curr(1)%title="probe-current"
+   if (N_pumps>0) then
+     !
+     ! Here the assumption is that
+     ! - run 1 contains both the pump(s) and the probe. Probe is Field1
+     ! - run 2 contains only the pump(s)
+     !
      Pol(2)%title ="pump-polarization"
      Curr(2)%title="pump-current"
      if (.not.STRING_same(Pump_path,"default")) call IO_temporary_jobdir_and_strings("JOBSTR",Pump_path)
      call RT_OBSERVABLES_IO( what='JP',J=Curr(2),P=Pol(2),IO_error=IO_error)
      if (.not.allocated(Curr(2)%F_t)) call error('Pump databases missing/corrupt')
+     !
+     call IO_temporary_jobdir_and_strings("ALL","RESTORE")
+     !
    endif
    !
-   if (     STRING_same(Probe_path(1),"default")) call IO_temporary_jobdir_and_strings("ALL","RESTORE")
+   ! NB: the full run needs to be loaded as last,
+   !     otherwise the wrong fields  are stored
+   !
+   Pol(1)%title ="polarization"
+   Curr(1)%title="current"
    if (.not.STRING_same(Probe_path(1),"default")) call IO_temporary_jobdir_and_strings("JOBSTR",Probe_path(1))
    call RT_OBSERVABLES_IO( what='JP',J=Curr(1),P=Pol(1),IO_error=IO_error)
    if (.not.allocated(Curr(1)%F_t)) call error('Probe databases missing/corrupt')
    !
    call IO_temporary_jobdir_and_strings("ALL","RESTORE")
    !
+   if (N_pumps>0) then
+     !
+     Pol(3)%title ="probe-polarization"
+     Curr(3)%title="probe-current"
+     Pol(3)%N=3
+     Curr(3)%N=3
+     YAMBO_ALLOC(Pol(3)%F_t,(RT_conf%T_n_steps,3))
+     YAMBO_ALLOC(Curr(3)%F_t,(RT_conf%T_n_steps,3))
+     Pol(3)%F_t =Pol(1)%F_t -Pol(2)%F_t
+     Curr(3)%F_t=Curr(1)%F_t-Curr(2)%F_t
+     !
+   endif
+   !
    !  Damping and output 
    !---------------------
    call RT_OBSERVABLES_damp_and_write(Pol(1),RT_conf,l_skip_OBS_IO)
    call RT_OBSERVABLES_damp_and_write(Curr(1),RT_conf,l_skip_OBS_IO)
+   !
    if (N_pumps>0) then
+     !
      call RT_OBSERVABLES_damp_and_write(Pol(2),RT_conf,l_skip_OBS_IO)
      call RT_OBSERVABLES_damp_and_write(Curr(2),RT_conf,l_skip_OBS_IO)
+     !
+     call RT_OBSERVABLES_damp_and_write(Pol(3),RT_conf,l_skip_OBS_IO)
+     call RT_OBSERVABLES_damp_and_write(Curr(3),RT_conf,l_skip_OBS_IO)
+     !
+     Pol(1)%F_t =Pol(1)%F_t -Pol(2)%F_t
+     Curr(1)%F_t=Curr(1)%F_t-Curr(2)%F_t
+     !
    endif
    !
    if ( l_RT_pol ) then
diff --git a/ypp/real_time/RT_coefficients_Fourier.F b/ypp/real_time/RT_coefficients_Fourier.F
index 8952f8e6f5..d2d28a7290 100644
--- a/ypp/real_time/RT_coefficients_Fourier.F
+++ b/ypp/real_time/RT_coefficients_Fourier.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine RT_coefficients_Fourier(nt,P,nw,W,X,X_err,deltaW)
  !
  ! Here I calculate the coefficients of a given Fourier series
@@ -13,7 +17,9 @@ subroutine RT_coefficients_Fourier(nt,P,nw,W,X,X_err,deltaW)
  use YPP_real_time,  ONLY:i_t_start,RT_conf
  use fields,         ONLY:EtoT
  use real_time,      ONLY:RT_step
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  integer,     intent(in)   :: nw,nt
  real(SP),    intent(in)   :: P(nt),W(nw),deltaW
diff --git a/ypp/real_time/RT_coefficients_Inversion.F b/ypp/real_time/RT_coefficients_Inversion.F
index 8114a453e1..415e7a5d56 100644
--- a/ypp/real_time/RT_coefficients_Inversion.F
+++ b/ypp/real_time/RT_coefficients_Inversion.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM MG
 !
+! headers
+!
+#include<y_memory.h>
+!
 !> @brief Polarization coefficient inversion see Sec. III in PRB 88, 235113 (2013) 
 !!
 !! @param[in]       NW          order of the response functions 
@@ -22,7 +26,9 @@ subroutine RT_coefficients_Inversion(NW,NX,P,X,W,T_period)
  use fields,          ONLY:Efield
  use interfaces,      ONLY:LINEAR_ALGEBRA_driver
  use linear_algebra,  ONLY:INV
-#include<y_memory.h>
+ use y_memory_alloc
+ !
+ implicit none
  !
  integer,     intent(in)   :: NW,NX
  real(SP),    intent(in)   :: P(NE_steps),W(NW),T_period
diff --git a/ypp/real_time/RT_components_energy_plot.F b/ypp/real_time/RT_components_energy_plot.F
index ae30dac46d..662474bc6f 100644
--- a/ypp/real_time/RT_components_energy_plot.F
+++ b/ypp/real_time/RT_components_energy_plot.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): DS AM
 !
+! headers
+!
+#include <y_memory.h>
+!
 subroutine RT_components_energy_plot(En,kpt,n_T_steps)
  ! 
  use units,          ONLY:HA2EV,HA2KEL,AUT2FS
@@ -24,8 +28,9 @@ subroutine RT_components_energy_plot(En,kpt,n_T_steps)
  use real_time,      ONLY:RT_carriers,RT_bands
  use QP_m,           ONLY:QP_table,QP_n_states
  use interfaces,     ONLY:DESC_write
+ use y_memory_alloc
  !
-#include <y_memory.h>
+ implicit none
  !
  type(bz_samp) :: kpt
  type(levels)  :: En
diff --git a/ypp/real_time/RT_damp_it.F b/ypp/real_time/RT_damp_it.F
index 87a55eb62f..0a5380a022 100644
--- a/ypp/real_time/RT_damp_it.F
+++ b/ypp/real_time/RT_damp_it.F
@@ -8,6 +8,7 @@
 subroutine RT_damp_it(damp_type,damp_factor,ft,lda)
  !
  use YPP_real_time,  ONLY:RT_time,RT_conf
+ use units,          ONLY:FS2AUT
  use pars,           ONLY:SP,DP
  use fields,         ONLY:Efield
  !
@@ -28,7 +29,7 @@ subroutine RT_damp_it(damp_type,damp_factor,ft,lda)
  !
  select case ( trim(damp_type) )
    case("LORENTZIAN")  
-     forall(i_t=1:RT_conf%T_n_steps) damp_function(i_t) = exp(-abs((RT_time(i_t)-T_ref)*damp_factor))
+     forall(i_t=1:RT_conf%T_n_steps) damp_function(i_t) = exp(-(RT_time(i_t)-T_ref)*damp_factor)
    case("GAUSSIAN") 
      forall(i_t=1:RT_conf%T_n_steps) damp_function(i_t) = exp(-(RT_time(i_t)-T_ref)**2*damp_factor**2)
    case("NONE") 
@@ -38,7 +39,10 @@ subroutine RT_damp_it(damp_type,damp_factor,ft,lda)
      return
  end select
  !
- forall(i_t=1:RT_conf%T_n_steps)  ft(i_t,:)=ft(i_t,:)*damp_function(i_t)
+ do i_t=1,RT_conf%T_n_steps
+  if( (RT_time(i_t)-T_ref)<0._SP) cycle
+  ft(i_t,:)=ft(i_t,:)*damp_function(i_t)
+ enddo
  !
 end subroutine RT_damp_it
 
diff --git a/ypp/real_time/RT_density.F b/ypp/real_time/RT_density.F
index 54704a457b..3d03d087ae 100644
--- a/ypp/real_time/RT_density.F
+++ b/ypp/real_time/RT_density.F
@@ -5,13 +5,17 @@
 !
 ! Authors (see AUTHORS file for details): DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine RT_density(en,kpt,qpt)
  !
  use pars,           ONLY:SP,schlen,cZERO
  use units,          ONLY:FS2AUT
  use R_lattice,      ONLY:bz_samp
  use FFT_m,          ONLY:fft_size,fft_dim
- use electrons,      ONLY:levels,n_spin
+ use electrons,      ONLY:levels,n_spin,n_sp_pol
  use parser_m,       ONLY:parser
  use stderr,         ONLY:intc,real2ch
  use com,            ONLY:msg,of_open_close
@@ -26,8 +30,9 @@ subroutine RT_density(en,kpt,qpt)
  use interfaces,     ONLY:WF_load,WF_free,el_density_and_current
  use wave_func,      ONLY:WF,wf_ng
  use parallel_int,   ONLY:PARALLEL_global_indexes,PARALLEL_WF_distribute,PARALLEL_WF_index
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels)  :: en
  type(bz_samp) :: kpt,qpt
@@ -41,7 +46,7 @@ subroutine RT_density(en,kpt,qpt)
  ! Allocs
  !
  real(SP), allocatable    :: drho(:),delta_magn(:,:)
- complex(SP), allocatable :: G_loc(:,:,:)
+ complex(SP), allocatable :: G_loc(:,:,:,:)
  !
  integer, external     :: io_RT_components
  !
@@ -65,12 +70,12 @@ subroutine RT_density(en,kpt,qpt)
  !
  if(load_G_history) then
    ch_ws(1)="Computing the density from the G lesser"
-   YAMBO_ALLOC(G_loc,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),RT_nk))
+   YAMBO_ALLOC(G_loc,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),RT_nk,n_sp_pol))
    G_loc=cZERO
-   YAMBO_ALLOC(dG_lesser,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),RT_nk,1))
+   YAMBO_ALLOC(dG_lesser,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),RT_nk,n_sp_pol,1))
    dG_lesser=cZERO
    if(include_eq_occ) then
-     YAMBO_ALLOC(G_lesser_reference,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),RT_nk))
+     YAMBO_ALLOC(G_lesser_reference,(RT_bands(1):RT_bands(2),RT_bands(1):RT_bands(2),RT_nk,n_sp_pol))
      G_lesser_reference=cZERO
      call io_control(ACTION=OP_RD_CL,COM=NONE,SEC=(/2/),ID=ID) 
      io_err=io_RT_components('REFERENCE',ID)
@@ -117,19 +122,11 @@ subroutine RT_density(en,kpt,qpt)
      !
      call gimme_a_G_Time_Point(i_T)
      !
-#if defined _PAR_IO
      call io_control(ACTION=OP_RD_CL,COM=NONE,SEC=(/3/),ID=ID)
      io_err=io_RT_components('G_lesser_K_section',ID)
-#else
-     do ik = 1, RT_nk 
-       IO_ACT=manage_action(OP_IF_START_RD_CL_IF_END,ik,1,RT_nk)
-       call io_control(ACTION=IO_ACT,COM=NONE,SEC=(/ik+2/),ID=ID)
-       io_err=io_RT_components('G_lesser_K_section',ID)
-     enddo
-#endif
      !
-     if(     include_eq_occ) G_loc=dG_lesser(:,:,:,1)+G_lesser_reference
-     if(.not.include_eq_occ) G_loc=dG_lesser(:,:,:,1)
+     if(     include_eq_occ) G_loc=dG_lesser(:,:,:,:,1)+G_lesser_reference
+     if(.not.include_eq_occ) G_loc=dG_lesser(:,:,:,:,1)
      !
      call el_density_matrix(G_loc,en,kpt,drho,RT_bands(1))
      !
diff --git a/ypp/real_time/RT_dos_time_plot.F b/ypp/real_time/RT_dos_time_plot.F
index d806adabba..20af189709 100644
--- a/ypp/real_time/RT_dos_time_plot.F
+++ b/ypp/real_time/RT_dos_time_plot.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine RT_dos_time_plot(En,k,n_T_steps)
  !
  use units,          ONLY:HA2EV,AUT2FS
@@ -23,8 +27,9 @@ subroutine RT_dos_time_plot(En,k,n_T_steps)
  use RT_control,     ONLY:RT_output,RT_apply,RT_carriers_to_RT_k,RT_carriers_to_RT_E
  use real_time,      ONLY:RT_carriers,RT_bands
  use interpolate,    ONLY:GRID_k,INTERPOLATION_driver_seed,INTERPOLATION_driver_do
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp) ::k
  type(levels)  ::En
diff --git a/ypp/real_time/RT_fields.F b/ypp/real_time/RT_fields.F
new file mode 100644
index 0000000000..e4abf2f4c0
--- /dev/null
+++ b/ypp/real_time/RT_fields.F
@@ -0,0 +1,225 @@
+!
+!        Copyright (C) 2000-2022 the YAMBO team
+!              http://www.yambo-code.org
+!
+! Authors (see AUTHORS file for details): DS
+! 
+! This file is distributed under the terms of the GNU 
+! General Public License. You can redistribute it and/or 
+! modify it under the terms of the GNU General Public 
+! License as published by the Free Software Foundation; 
+! either version 2, or (at your option) any later version.
+!
+! This program is distributed in the hope that it will 
+! be useful, but WITHOUT ANY WARRANTY; without even the 
+! implied warranty of MERCHANTABILITY or FITNESS FOR A 
+! PARTICULAR PURPOSE.  See the GNU General Public License 
+! for more details.
+!
+! You should have received a copy of the GNU General Public 
+! License along with this program; if not, write to the Free 
+! Software Foundation, Inc., 59 Temple Place - Suite 330,Boston, 
+! MA 02111-1307, USA or visit http://www.gnu.org/copyleft/gpl.txt.
+!
+! headers
+!
+#include<y_memory.h>
+!
+subroutine RT_fields()
+ !
+ use units,          ONLY:AUT2FS
+ use stderr,         ONLY:intc
+ use com,            ONLY:msg
+ use pars,           ONLY:SP,schlen,pi,n_fields_defs_max
+ use com,            ONLY:com_path
+ use YPP_real_time,  ONLY:RT_conf,ElField,RT_time,ypp_chirp
+ use fields,         ONLY:Efield
+ use stderr,         ONLY:STRING_split
+ use y_memory_alloc
+ !
+ implicit none
+ !
+ ! Work Space
+ !
+ integer           :: i_field,i_t
+ logical           :: l_load_from_file,l_from_time_fft 
+ character(schlen) :: field_defs(n_fields_defs_max),filename
+ real(SP)          :: field_data(7),Field_time
+ !
+ RT_conf%T_n_steps=nint( (RT_conf%T_range(2)-RT_conf%T_range(1))/RT_conf%delta_T )+1
+ !
+ ! Time Initialization from input
+ !call RT_time_configuration_setup('FIELDS',OBS_RT_IO_t)
+ YAMBO_ALLOC(RT_time,(RT_conf%T_n_steps))
+ do i_t=1,RT_conf%T_n_steps
+   RT_time(i_t)=(i_t-1)*RT_conf%delta_T+RT_conf%T_range(1)
+ enddo
+ !
+ call msg('s',' Time range         :',(/minval(RT_time),maxval(RT_time)/)*AUT2FS,"[fs]")
+ call msg('s',' Time step          :',RT_conf%delta_T*AUT2FS,"[fs]")
+ call msg('s',' Time steps         :',RT_conf%T_n_steps)
+ !
+ ! Analize the input variables for the external field
+ call STRING_split(Efield(1)%ef_name,field_defs)
+ !
+ i_field=1
+ !
+ ! Field in frequency space from external file
+ if ( field_defs(1)=='FROM_W_FILE') call RT_Field_w_load(i_field)
+ ! 
+ ! Field in frequency space from FT of real time expression
+ if ( field_defs(1)/='FROM_W_FILE') call RT_Field_t_to_w(i_field)
+ !
+ if (.not. ypp_chirp>0._SP) return
+ !
+ ! Chirp the field and convert back into real time
+ call RT_1D_Fourier_Chirp(ElField,RT_conf)
+ !
+ call RT_print_Elfield(1,"time-chirped",Elfield)
+ !
+ call RT_1D_Fourier_Transform('T2W',ElField,RT_conf,1)
+ !
+ call RT_print_Elfield(1,"freq-chirped",Elfield)
+ !
+end subroutine RT_fields
+!
+!
+subroutine RT_Field_w_load(i_field)
+ !
+ use YPP_real_time,  ONLY:RT_conf,ElField
+ !
+ ! TODO
+ !
+ implicit none
+ !
+ integer, intent(in) :: i_field
+ !
+ call error(' Loading field frequency from file not yet coded')
+ !
+ call RT_print_Elfield(1,"freq",Elfield)
+ !
+end subroutine RT_Field_w_load
+!
+!
+subroutine RT_Field_t_to_w(i_field)
+ !
+ use units,          ONLY:FS2AUT
+ use pars,           ONLY:SP,schlen
+ use fields_int,     ONLY:small_a
+ use fields,         ONLY:Efield
+ use YPP_real_time,  ONLY:RT_conf,ElField,RT_time
+ !
+ implicit none
+ !
+ integer, intent(in) :: i_field
+ !
+ integer   :: i_t
+ real(SP)  :: Field_time,dT
+ !
+ ElField%title="Field"
+ ElField%N    =6
+ call RT_1D_Fourier_setup(ElField,RT_conf,6)
+ !
+ dT=RT_conf%delta_T
+ !
+ do i_t=1,RT_conf%T_n_steps
+   Field_time=RT_time(i_t)-Efield(i_field)%t_initial
+   ElField%F_t(i_t,1:2) =small_a(Field_time,dT,Efield(i_field),0)
+   ElField%F_t(i_t,3:4) =small_a(Field_time,dT,Efield(i_field),1)
+   ElField%F_t(i_t,5:6) =small_a(Field_time,dT,Efield(i_field),2)
+ enddo
+ !
+ call RT_print_Elfield(1,"time",Elfield)
+ !
+ call RT_1D_Fourier_Transform('T2W',ElField,RT_conf,1)
+ !
+ call RT_print_Elfield(1,"freq",Elfield)
+ !
+end subroutine RT_Field_t_to_w
+!
+!
+subroutine RT_print_Elfield(i_field,mode,Elfield)
+ !
+ use units,          ONLY:FS2AUT
+ use YPP_real_time,  ONLY:RT_conf,OBS_el
+ use pars,           ONLY:SP,schlen
+ use stderr,         ONLY:intc
+ use com,            ONLY:com_path
+ !
+ implicit none
+ !
+ integer,      intent(in) :: i_field
+ character(*), intent(in) :: mode
+ type(OBS_el), intent(in) :: ElField
+ !
+ integer           :: i_t,i_w,idata,i_re_data,i_im_data,i_pol_data
+ character(schlen) :: filename1,filename2,basename
+ real(SP)          :: field_data(7),Field_t,Field_w
+ !
+ basename="YPP_EXTFIELD"//trim(intc(i_field))
+ !
+ if (mode=="time")         filename1=trim(basename)//"_P1.time"
+ if (mode=="time-chirped") filename1=trim(basename)//"_P1_LINCHIRP.time"
+ if (mode=="freq")         filename1=trim(basename)//"_P1.freq"
+ if (mode=="freq-chirped") filename1=trim(basename)//"_P1_LINCHIRP.freq"
+ !
+ if (mode=="time")         filename2=trim(basename)//"_P2.time"
+ if (mode=="time-chirped") filename2=trim(basename)//"_P2_LINCHIRP.time"
+ if (mode=="freq")         filename2=trim(basename)//"_P2.freq"
+ if (mode=="freq-chirped") filename2=trim(basename)//"_P2_LINCHIRP.freq"
+ !
+ open(unit=100+i_field,file=trim(com_path)//"/"//trim(filename1))
+ open(unit=200+i_field,file=trim(com_path)//"/"//trim(filename2))
+ !
+ if (index(mode,"time")>0) then
+   !
+   write(100+i_field,*) RT_conf%T_n_steps,RT_conf%delta_t
+   write(200+i_field,*) RT_conf%T_n_steps,RT_conf%delta_t
+   !
+   do i_t=1,RT_conf%T_n_steps
+     Field_t=RT_conf%T_range(1)+(i_t-1)*RT_conf%delta_t   !-Efield%t_initial
+     field_data(1)=Field_t/FS2AUT
+     do idata=1,3
+       i_pol_data=2*idata-1
+       field_data(idata+1)=real(ElField%F_t(i_t,i_pol_data))
+     enddo
+     write(100+i_field,*) field_data(1:4)
+     do idata=1,3
+       i_pol_data=2*idata
+       field_data(idata+1)=real(ElField%F_t(i_t,i_pol_data))
+     enddo
+     write(200+i_field,*) field_data(1:4)
+   enddo
+   !
+ else if (index(mode,"freq")>0) then
+   !
+   write(100+i_field,*) RT_conf%W_n_steps,RT_conf%delta_W
+   write(200+i_field,*) RT_conf%W_n_steps,RT_conf%delta_W
+   !
+   do i_w=1,RT_conf%W_n_steps
+     Field_w=RT_conf%X_w_range(1)+(i_w-1)*RT_conf%delta_W
+     field_data(1)=Field_w !/FS2AUT
+     do idata=1,3
+       i_pol_data=2*idata-1
+       i_re_data=2*idata-1
+       i_im_data=2*idata
+       field_data(i_re_data+1)=real(ElField%F_w(i_w,i_pol_data))
+       field_data(i_im_data+1)=aimag(ElField%F_w(i_w,i_pol_data))
+     enddo
+     write(100+i_field,*) field_data
+     do idata=1,3
+       i_pol_data=2*idata
+       i_re_data=2*idata-1
+       i_im_data=2*idata
+       field_data(i_re_data+1)=real(ElField%F_w(i_w,i_pol_data))
+       field_data(i_im_data+1)=aimag(ElField%F_w(i_w,i_pol_data))
+     enddo
+     write(200+i_field,*) field_data
+   enddo
+   !
+ endif
+ !
+ close(100+i_field)
+ close(200+i_field)
+ !
+end subroutine RT_print_Elfield
diff --git a/ypp/real_time/RT_occ_bands_interpolation.F b/ypp/real_time/RT_occ_bands_interpolation.F
index 4360b83175..88306899f2 100644
--- a/ypp/real_time/RT_occ_bands_interpolation.F
+++ b/ypp/real_time/RT_occ_bands_interpolation.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): DS AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine RT_occ_bands_interpolation(en,k,q,n_T_steps)
  !
  use pars,           ONLY:SP,schlen,rZERO
@@ -23,8 +27,9 @@ subroutine RT_occ_bands_interpolation(en,k,q,n_T_steps)
  use interpolate,    ONLY:INTERPOLATION_driver_seed,INTERPOLATION_driver_do,electrons_bands_interpolate,&
 &                         INTERP_obj,INTERP_mode,GRID_k
  use LIVE_t,         ONLY:live_timing
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer,       intent(in)    :: n_T_steps
  type(bz_samp), intent(in)    :: k,q
diff --git a/ypp/real_time/RT_occ_time_plot.F b/ypp/real_time/RT_occ_time_plot.F
index ded3da7ca2..b889767f27 100644
--- a/ypp/real_time/RT_occ_time_plot.F
+++ b/ypp/real_time/RT_occ_time_plot.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): DS AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine RT_occ_time_plot(En,kpt,n_T_steps)
  !
  use drivers,        ONLY:l_elel_scatt,l_elph_scatt
@@ -27,8 +31,9 @@ subroutine RT_occ_time_plot(En,kpt,n_T_steps)
  use OUTPUT,         ONLY:OUTPUT_driver
  use interfaces,     ONLY:DESC_write
  use timing_m,       ONLY:timing
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp) :: kpt
  type(levels)  :: En
diff --git a/ypp/real_time/RT_occupations_driver.F b/ypp/real_time/RT_occupations_driver.F
index 2a9611328a..ac614e2731 100644
--- a/ypp/real_time/RT_occupations_driver.F
+++ b/ypp/real_time/RT_occupations_driver.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): DS AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine RT_occupations_driver(k,q,en)
  !
  use drivers,        ONLY:l_rt_carriers_in_use
@@ -17,8 +21,9 @@ subroutine RT_occupations_driver(k,q,en)
  use real_time,      ONLY:RT_bands
  use YPPm,           ONLY:DIPs_mask,l_dipoles,DIPs,DIPs_V_bands
  use YPP_real_time,  ONLY:l_RT_time,l_RT_energy,l_RT_bands,l_RT_dos,RT_conf
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp) :: k,q
  type(levels)  :: en
diff --git a/ypp/real_time/RT_split_Polarization.F b/ypp/real_time/RT_split_Polarization.F
index de796ffd1c..1a1dfb435e 100644
--- a/ypp/real_time/RT_split_Polarization.F
+++ b/ypp/real_time/RT_split_Polarization.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine RT_split_Polarization(nh,nt,W,X,P,what)
  !
  use pars,           ONLY:SP,cI,schlen,DP
@@ -16,8 +20,9 @@ subroutine RT_split_Polarization(nh,nt,W,X,P,what)
  use LIVE_t,         ONLY:live_timing
  !
  ! Fourier
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer,     intent(in)   :: nt,nh
  real(SP),    intent(inout):: P(nt,3),W(nh)
diff --git a/ypp/real_time/RT_time_configuration_setup.F b/ypp/real_time/RT_time_configuration_setup.F
index db3c324e70..be4e72be4f 100644
--- a/ypp/real_time/RT_time_configuration_setup.F
+++ b/ypp/real_time/RT_time_configuration_setup.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): AM DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine RT_time_configuration_setup(what,GENERAL_RT_IO_t)
  !
  use units,          ONLY:AUT2FS,pi,HA2EV
@@ -13,8 +17,9 @@ subroutine RT_time_configuration_setup(what,GENERAL_RT_IO_t)
  use com,            ONLY:msg
  use real_time,      ONLY:RT_step
  use RT_control,     ONLY:TIME_adjust,RT_IO_t
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  character(*),  intent(in) :: what
  type(RT_IO_t), intent(in) :: GENERAL_RT_IO_t
@@ -142,14 +147,14 @@ subroutine RT_time_configuration_setup(what,GENERAL_RT_IO_t)
      !
    else
      RT_conf%T_n_steps=1
-     YAMBO_ALLOC(RT_time,(RT_conf%T_n_steps))
+     YAMBO_ALLOC(RT_time,(1))
      RT_time=0._SP
      RT_conf%delta_T=0._SP
    endif
  else
-   RT_conf%T_n_steps = GENERAL_RT_IO_t%N
+   RT_conf%T_n_steps = RT_conf%T_n_range(2)-RT_conf%T_n_range(1)+1
    YAMBO_ALLOC(RT_time,(RT_conf%T_n_steps))
-   RT_time=GENERAL_RT_IO_t%Time
+   RT_time=GENERAL_RT_IO_t%Time(RT_conf%T_n_range(1):RT_conf%T_n_range(2))
    RT_conf%delta_T   = STEP
  endif
  !
diff --git a/ypp/real_time/RT_ypp_driver.F b/ypp/real_time/RT_ypp_driver.F
index 97b6f604d5..a2413a479c 100644
--- a/ypp/real_time/RT_ypp_driver.F
+++ b/ypp/real_time/RT_ypp_driver.F
@@ -10,7 +10,7 @@ subroutine RT_ypp_driver(en,k,q)
  use R_lattice,      ONLY:bz_samp
  use electrons,      ONLY:levels
  use YPP_real_time,  ONLY:l_RT_X,l_RT_occupations,l_RT_lifetimes,l_RT_density,&
-&                         l_RT_G_two_times,l_skip_OBS_IO,l_force_min_damp,l_RT_pol
+&                         l_RT_G_two_times,l_skip_OBS_IO,l_force_min_damp,l_RT_pol,l_RT_fields
  use YPPm,           ONLY:PROJECT_mode,l_PROJECT_atom,l_PROJECT_line,l_PROJECT_plane,&
 &                         l_add_EQ_occ,l_separate_eh,l_skip_occ
  use interpolate,    ONLY:l_integral_respect
@@ -60,4 +60,8 @@ subroutine RT_ypp_driver(en,k,q)
  !====================
  if ( l_RT_X .or. l_RT_pol )               call RT_X_response(en,k,q)
  !
+ ! Response Functions
+ !====================
+ if ( l_RT_fields )                        call RT_fields()
+ !
 end subroutine
diff --git a/ypp/real_time/YPP_RT_project.dep b/ypp/real_time/YPP_RT_project.dep
index b1463c24d6..2ec8a939e6 100644
--- a/ypp/real_time/YPP_RT_project.dep
+++ b/ypp/real_time/YPP_RT_project.dep
@@ -19,6 +19,7 @@
  RT_damp_it.o
  RT_density.o
  RT_dos_time_plot.o
+ RT_fields.o
  RT_manual_excitation.o
  RT_occ_bands_interpolation.o
  RT_occ_time_plot.o
diff --git a/ypp/symmetries/ELPH_project.dep b/ypp/symmetries/ELPH_project.dep
new file mode 100644
index 0000000000..23bff92dca
--- /dev/null
+++ b/ypp/symmetries/ELPH_project.dep
@@ -0,0 +1,2 @@
+  symmetries_driver.o
+
diff --git a/ypp/symmetries/YPP_ELPH_project.dep b/ypp/symmetries/YPP_ELPH_project.dep
new file mode 100644
index 0000000000..645782fb3e
--- /dev/null
+++ b/ypp/symmetries/YPP_ELPH_project.dep
@@ -0,0 +1,2 @@
+ symmetries_driver.o
+
diff --git a/ypp/symmetries/fix_ATOMPROJs.F b/ypp/symmetries/fix_ATOMPROJs.F
index 445ca86b60..5e445019ce 100644
--- a/ypp/symmetries/fix_ATOMPROJs.F
+++ b/ypp/symmetries/fix_ATOMPROJs.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine fix_ATOMPROJs(E,k,k_save,kpoints_map,old_nsym,old_dl_sop,old_sop_inv,S_contains_TR)
  !
  use pars,                ONLY:LP,SP,lchlen,cZERO
@@ -18,8 +22,9 @@ subroutine fix_ATOMPROJs(E,k,k_save,kpoints_map,old_nsym,old_dl_sop,old_sop_inv,
  use R_lattice,           ONLY:bz_samp
  use electrons,           ONLY:levels
  use atom_proj
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels),intent(in)  :: E
  !
diff --git a/ypp/symmetries/fix_PPs.F b/ypp/symmetries/fix_PPs.F
index 0799d4b55b..89ff134104 100644
--- a/ypp/symmetries/fix_PPs.F
+++ b/ypp/symmetries/fix_PPs.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): CA DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine fix_PPs(E,k,k_save,kpoints_map,old_nsym,old_dl_sop,old_sop_inv,S_contains_TR)
  !
  use pars,                ONLY:LP,SP,lchlen
@@ -14,7 +18,7 @@ subroutine fix_PPs(E,k,k_save,kpoints_map,old_nsym,old_dl_sop,old_sop_inv,S_cont
 &                              OP_APP,cp_file,OP_RD_CL,RD_CL,RD,DUMP
  use electrons,           ONLY:n_sp_pol
  use pseudo,              ONLY:pp_kb,pp_kbd,pp_kbs,n_atomic_species,PP_alloc_pwscf,&
-&                              pp_n_l_times_proj_max,PP_free,PP_alloc_abinit,Vnl
+&                              pp_n_l_times_proj_max,PP_free,PP_alloc_abinit
  use LIVE_t,              ONLY:live_timing
  use DIPOLES,             ONLY:DIPOLE_t
  use stderr,              ONLY:intc
@@ -25,8 +29,9 @@ subroutine fix_PPs(E,k,k_save,kpoints_map,old_nsym,old_dl_sop,old_sop_inv,S_cont
  use YPP_symm,            ONLY:wf_nc_k_save,wf_igk_save,wf_nc_k_reduced,wf_igk_reduced,&
 &                              wf_nc_k_new,wf_igk_new,&
 &                              wf_nc_k_new,wf_ncx_new,wf_ncx_save
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels),intent(in)  :: E
  !
@@ -44,12 +49,11 @@ subroutine fix_PPs(E,k,k_save,kpoints_map,old_nsym,old_dl_sop,old_sop_inv,S_cont
  type(DIPOLE_t)        :: Dip
  character(lchlen)     :: core_io_path_save,fragment_name,fragment_name_new
  integer               :: n_steps,ID
- integer               :: ierr,io_KB_abinit_err,io_KB_pwscf_err,io_Vnl_err
+ integer               :: ierr,io_KB_abinit_err,io_KB_pwscf_err
  !
  real(SP),allocatable     :: pp_kb_store(:,:,:,:,:)
  real(SP),allocatable     :: pp_kbd_store(:,:,:,:,:)
  real(SP),allocatable     :: pp_kbs_store(:,:)
- complex(SP), allocatable :: Vnl_store(:,:,:,:,:)
  !
  ! Dummies
  !
@@ -61,13 +65,11 @@ subroutine fix_PPs(E,k,k_save,kpoints_map,old_nsym,old_dl_sop,old_sop_inv,S_cont
  !
  integer, external :: io_KB_abinit
  integer, external :: io_KB_pwscf
- integer, external :: io_Vnl
  !
  ! Check the presence of PPs DBs
  !
  n_steps=k%nibz-k_save%nibz
  !
- io_Vnl_err     =-1
  io_KB_abinit_err=-1
  io_KB_pwscf_err =-1
  !
@@ -76,9 +78,6 @@ subroutine fix_PPs(E,k,k_save,kpoints_map,old_nsym,old_dl_sop,old_sop_inv,S_cont
  Dip%ng=1                ! to overcome check for io
  nkibz=k_save%nibz
  !
- call io_control(ACTION=OP_RD_CL,COM=NONE,SEC=(/1/),MODE=VERIFY,ID=ID)
- io_Vnl_err=io_Vnl(Dip,E,ID)
- !
  ! Check for the KB pwscf DB 
  if(.not.io_KB_abinit_err==0) then
    call io_control(ACTION=OP_RD,COM=NONE,SEC=(/1/),MODE=VERIFY,ID=ID)
@@ -91,11 +90,10 @@ subroutine fix_PPs(E,k,k_save,kpoints_map,old_nsym,old_dl_sop,old_sop_inv,S_cont
    io_KB_abinit_err=io_KB_abinit(ID)
  endif
  !
- if(io_KB_abinit_err/=0.and.io_KB_pwscf_err/=0.and.io_Vnl_err/=0) return
+ if(io_KB_abinit_err/=0.and.io_KB_pwscf_err/=0) return
  !
  if(io_KB_abinit_err==0)   call section('=',"PseudoPotentials KB (Abinit)")
  if(io_KB_pwscf_err ==0)   call section('=',"PseudoPotentials KB (PWscf) ")
- if(io_Vnl_err==0)         call section('=',"PseudoPotentials old (PWscf)")
  !
  ! Read PP DBs
  !
@@ -155,19 +153,6 @@ subroutine fix_PPs(E,k,k_save,kpoints_map,old_nsym,old_dl_sop,old_sop_inv,S_cont
    !
  endif
  !
- if(io_Vnl_err==0) then
-   YAMBO_ALLOC(Vnl_store,(3,E%nb,maxval(E%nbm),k_save%nibz,n_sp_pol))
-   Vnl_store=(0._SP,0._SP)
-   !
-   call io_control(ACTION=OP_RD_CL,SEC=(/1,2/),ID=ID)
-   io_Vnl_err=io_Vnl(Dip,E,ID)
-   !
-   Vnl_store=Vnl
-   !
-   YAMBO_FREE(Vnl)
-   !   
- endif
- !
  core_io_path_save=core_io_path
  core_io_path=more_io_path
  !
@@ -379,41 +364,6 @@ subroutine fix_PPs(E,k,k_save,kpoints_map,old_nsym,old_dl_sop,old_sop_inv,S_cont
      !
    endif
    !
-   if(io_Vnl_err==0) then 
-     !
-     YAMBO_ALLOC(Vnl,(3,E%nb,maxval(E%nbm),k%nibz,n_sp_pol))
-     !
-     Vnl(1:3, 1:E%nb, 1:maxval(E%nbm), 1:k_save%nibz, 1:n_sp_pol)=&
-  &       Vnl_store(1:3, 1:E%nb, 1:maxval(E%nbm), 1:k_save%nibz, 1:n_sp_pol)
-     !
-     if(n_steps>0) call live_timing('PPs rotation',n_steps)
-     !
-     do ik=k_save%nibz+1,k%nibz
-       !
-       ik_save=kpoints_map(1,ik)
-       is=kpoints_map(2,ik)
-       !
-       forall(ib=1:E%nb,ibm=1:maxval(E%nbm),i_spin=1:n_sp_pol) &
-  &          Vnl(:,ib,ibm,ik,i_spin) = matmul( old_dl_sop(:,:,is), Vnl_store(:,ib,ibm,ik_save,i_spin) ) 
-       ! Vnl is invariant under T-rev as iR and p ??
-       if ( S_contains_TR(is) ==1 ) Vnl(:,:,:,ik,:)=conjg( Vnl(:,:,:,ik,:) )
-       !
-       call live_timing(steps=1)
-       !
-     enddo
-     !
-     Dip%ng=wf_ng
-     !
-     call io_control(ACTION=OP_WR_CL,COM=NONE,SEC=(/1,2/),ID=ID)      
-     io_Vnl_err=io_Vnl(Dip,E,ID)
-     !
-     YAMBO_FREE(Vnl)
-     YAMBO_FREE(Vnl_store)
-     !
-     if(n_steps>0) call live_timing()
-     !
-   endif
-   !
  !
  YAMBO_FREE(wf_nc_k_reduced)
  YAMBO_FREE(wf_igk_reduced)
diff --git a/ypp/symmetries/fix_QP_DBs.F b/ypp/symmetries/fix_QP_DBs.F
index fb1890a72d..222adbf355 100644
--- a/ypp/symmetries/fix_QP_DBs.F
+++ b/ypp/symmetries/fix_QP_DBs.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine fix_QP_DBs(k,k_save,kpoints_map)
  !
  use pars,                ONLY:lchlen
@@ -17,8 +21,9 @@ subroutine fix_QP_DBs(k,k_save,kpoints_map)
  use descriptors,         ONLY:IO_desc_duplicate
  use electrons,           ONLY:n_sp_pol
  use R_lattice,           ONLY:bz_samp
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp),intent(in) :: k
  type(bz_samp),intent(in) :: k_save
diff --git a/ypp/symmetries/fix_SC_DBs.F b/ypp/symmetries/fix_SC_DBs.F
index c82015248a..6d806047d8 100644
--- a/ypp/symmetries/fix_SC_DBs.F
+++ b/ypp/symmetries/fix_SC_DBs.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): CA DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine fix_SC_DBs(k,k_save,kpoints_map)
  !
  use pars,                ONLY:SP,lchlen
@@ -17,8 +21,9 @@ subroutine fix_SC_DBs(k,k_save,kpoints_map)
  use hamiltonian,         ONLY:H_rotation,H_nl_sc,l_sc_V_is_local,H_potential
  use electrons,           ONLY:levels,E_reset,n_sp_pol
  use R_lattice,           ONLY:bz_samp,nkibz
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp),intent(in) :: k
  type(bz_samp),intent(in) :: k_save
diff --git a/ypp/symmetries/fix_WFs_Gshells.F b/ypp/symmetries/fix_WFs_Gshells.F
index 8b7102aa4c..fadab5983d 100644
--- a/ypp/symmetries/fix_WFs_Gshells.F
+++ b/ypp/symmetries/fix_WFs_Gshells.F
@@ -5,7 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): CA DS
 !
+! headers
+!
 #include<dev_defs.h>
+#include<y_memory.h>
+!
 !
 subroutine fix_WFs_Gshells(k,k_save,kpoints_map,old_nsym)
  !
@@ -19,8 +23,9 @@ subroutine fix_WFs_Gshells(k,k_save,kpoints_map,old_nsym)
  use YPP_symm,            ONLY:wf_nc_k_save,wf_igk_save,wf_nc_k_reduced,wf_igk_reduced,&
 &                              wf_nc_k_new,l_wf_nc_k_red,ng_vec_new,ng_vec_save,wf_igk_new,&
 &                              wf_nc_k_new,wf_ncx_new,wf_ncx_save,wf_ng_new,wf_ng_save,wf_ng_cut
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(bz_samp),intent(inout) :: k
  type(bz_samp),intent(in)    :: k_save
diff --git a/ypp/symmetries/fix_WFs_and_E.F b/ypp/symmetries/fix_WFs_and_E.F
index 0669c7aca9..e11cf6fbfb 100644
--- a/ypp/symmetries/fix_WFs_and_E.F
+++ b/ypp/symmetries/fix_WFs_and_E.F
@@ -5,7 +5,11 @@
 !
 ! Authors (see AUTHORS file for details): CA DS
 !
+! headers
+!
 #include<dev_defs.h>
+#include<y_memory.h>
+!
 !
 subroutine fix_WFs_and_E(E,k,k_save,kpoints_map,old_nsym,S_contains_TR)
  !
@@ -24,8 +28,9 @@ subroutine fix_WFs_and_E(E,k,k_save,kpoints_map,old_nsym,S_contains_TR)
  use YPP_symm,            ONLY:wf_nc_k_save,wf_igk_save,wf_nc_k_reduced,wf_igk_reduced,&
 &                              wf_nc_k_new,wf_igk_new,&
 &                              wf_nc_k_new,wf_ncx_new,wf_ncx_save,wf_ng_new,wf_ng_save
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels),intent(in)  :: E
  !
@@ -281,8 +286,9 @@ subroutine map_energies(E,E_new,k,kpoints_map)
  !
  use electrons,           ONLY:levels,E_reset,n_sp_pol
  use R_lattice,           ONLY:bz_samp
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels), intent(in)     :: E
  type(levels), intent(out)    :: E_new
diff --git a/ypp/symmetries/fix_symmetries.F b/ypp/symmetries/fix_symmetries.F
index 881897784d..f093106da1 100644
--- a/ypp/symmetries/fix_symmetries.F
+++ b/ypp/symmetries/fix_symmetries.F
@@ -5,6 +5,10 @@
 !
 ! Authors (see AUTHORS file for details): CA DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine fix_symmetries(old_nsym,old_dl_sop,old_sop_inv,S_contains_TR)
  !
  use pars,                ONLY:LP,SP
@@ -18,8 +22,9 @@ subroutine fix_symmetries(old_nsym,old_dl_sop,old_sop_inv,S_contains_TR)
  use vec_operate,         ONLY:v_norm
  use D_lattice,           ONLY:nsym,dl_sop,i_time_rev,alat,mag_syms,&
 &                              i_space_inv,sop_inv,inv_index,symmetry_group_table
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  integer, intent(out)   :: old_nsym
  real(SP),intent(out)   :: old_dl_sop(3,3,nsym)
diff --git a/ypp/symmetries/k_build_map.F b/ypp/symmetries/k_build_map.F
index 98b777a97d..b2ff3bc1ce 100644
--- a/ypp/symmetries/k_build_map.F
+++ b/ypp/symmetries/k_build_map.F
@@ -8,8 +8,8 @@
 subroutine k_build_map(k,k_save,kpoints_map)
  !
  use pars,                ONLY:SP
- use zeros,               ONLY:k_rlu_zero
- use vec_operate,         ONLY:rlu_v_is_zero,c2a
+ use zeros,               ONLY:k_iku_zero    !,k_rlu_zero
+ use vec_operate,         ONLY:c2a,v_is_zero !,rlu_v_is_zero
  use R_lattice,           ONLY:bz_samp
  !
  implicit none
@@ -35,20 +35,30 @@ subroutine k_build_map(k,k_save,kpoints_map)
  nk=k_save%nbz
  !
  kpoints_map=0
+ !
  do ik=1,k%nibz 
-   do ik_save=1,k_save%nbz
+   do ik_save=1,nk
      v1=k_save%ptbz(ik_save,:)
      !
      call c2a(v_in=v1,mode='ki2a')
      call c2a(v_in=k%pt(ik,:),v_out=v2,mode='ki2a')
      !
-     if (rlu_v_is_zero(v2-v1,zero_=k_rlu_zero)) then
-       kpoints_map(:, ik ) = k_save%sstar(ik_save,:)
+     ! 2022/05/25 DS: If we have problems with the following procedure,
+     !                It means that we need to use the procedure in between DEBUG
+     !                commands and track the Go shifts
+     !
+     ! DEBUG <
+     !if (rlu_v_is_zero(v2-v1,zero_=k_rlu_zero)) then
+     ! DEBUG >
+     if ( v_is_zero(v1-v2,k_iku_zero) ) then
+       kpoints_map(:, ik ) = k_save%sstar(ik_save,:2)
        exit 
      endif
    enddo
  enddo
  !
+ if ( any(kpoints_map==0) ) call error(" kpoints map is not complete")
+ !
  ! swap ik and i1 to sort
  ! first all the points in the ibz
  ! 
diff --git a/ypp/symmetries/symmetries_driver.F b/ypp/symmetries/symmetries_driver.F
index 4f7461ae67..c192ccab17 100644
--- a/ypp/symmetries/symmetries_driver.F
+++ b/ypp/symmetries/symmetries_driver.F
@@ -5,25 +5,32 @@
 !
 ! Authors (see AUTHORS file for details): AM CA DS
 !
+! headers
+!
+#include<y_memory.h>
+!
 subroutine symmetries_driver(E,Xk)
  !
- use pars,                ONLY:SP
+ use pars,                ONLY:SP,lchlen
  use xc_functionals,      ONLY:magn
  use electrons,           ONLY:levels,Spin_magn,n_spinor
  use FFT_m,               ONLY:fft_size
- use IO_m,                ONLY:rm_file
- use com,                 ONLY:msg,core_io_path,more_io_path
- use R_lattice,           ONLY:bz_samp,ng_closed,ng_vec,bz_samp_reset,bz_samp_duplicate
+ use IO_m,                ONLY:rm_file,cp_file,OP_RD_CL,NONE,DUMP
+ use IO_int,              ONLY:ver_is_gt_or_eq,io_control
+ use com,                 ONLY:msg,core_io_path,more_io_path,jobstr
+ use R_lattice,           ONLY:bz_samp,ng_closed,ng_vec,bz_samp_reset,bz_samp_duplicate,nqbz
  use D_lattice,           ONLY:nsym,atom_mass,atoms_map,n_atoms_species_max,n_atomic_species
  use wave_func,           ONLY:wf_ng,WF
  use stderr,              ONLY:intc
+ use LIVE_t,              ONLY:live_timing
  use interfaces,          ONLY:WF_load,WF_free,el_magnetization
  use IO_int,              ONLY:io_control
  use IO_m,                ONLY:REP,OP_WR_CL,VERIFY
  use parser_m,            ONLY:parser
  use D_lattice,           ONLY:kpoints_map,old_nsym,old_dl_sop,old_S_contains_TR
+ use y_memory_alloc
  !
-#include<y_memory.h>
+ implicit none
  !
  type(levels),intent(in)     :: E
  type(bz_samp),intent(inout) :: Xk
@@ -36,8 +43,15 @@ subroutine symmetries_driver(E,Xk)
  logical               :: l_keep_k_grid
  !
  integer               :: old_nkpt,ID,io_old_SYMMs
+ character(lchlen)     :: core_io_path_save
  integer, external     :: io_full_SYMMs
  !
+#if defined _YPP_ELPH
+ character(lchlen)  :: filename,infile,outfile,in_path
+ integer, external  :: io_ELPH
+ integer            :: iq,ID_gkkp,io_err
+#endif
+ !
  YAMBO_ALLOC(old_dl_sop,(3,3,nsym))
  YAMBO_ALLOC(old_S_contains_TR,(nsym))
  !     
@@ -70,8 +84,6 @@ subroutine symmetries_driver(E,Xk)
  !
  ! Expansion of default k-points
  call k_ibz2bz(Xk,'i',.false.)
- YAMBO_FREE(Xk%pt)
- call k_reduce(Xk,.false.)
  !
  call fix_symmetries(old_nsym,old_dl_sop,old_sop_inv,old_S_contains_TR)
  !
@@ -127,8 +139,55 @@ subroutine symmetries_driver(E,Xk)
  !
  ! Savel old symmetries on file
  ! ============================
+ core_io_path_save=core_io_path
+ core_io_path=more_io_path
  call io_control(ACTION=OP_WR_CL,COM=REP,SEC=(/1/),MODE=VERIFY,ID=ID)
  io_old_SYMMs=io_full_SYMMs(Xk,ID)
+ core_io_path=core_io_path_save
+ !
+#if defined _YPP_ELPH
+ !
+ ! Copy gkkp_expanded if presents
+ !
+ call section('=',"GKKP-databases")
+ !
+ call io_control(ACTION=OP_RD_CL,COM=NONE,MODE=DUMP,SEC=(/1/),ID=ID_gkkp)
+ io_err=io_ELPH(ID_gkkp,'gkkp_expanded') 
+ !
+ if(io_err==0) then
+   !      
+    if (trim(jobstr)/='') then
+      in_path =trim(jobstr)//"/"
+   else
+      in_path='SAVE/'
+   endif
+   !
+   call live_timing('Copying gkkp files',nqbz+1)
+   !
+   do iq=0,nqbz
+     !
+     if(iq==0) then
+       filename ='ndb.elph_gkkp_expanded'
+     else
+       filename ='ndb.elph_gkkp_expanded_fragment_'//trim(intc(iq))
+     endif
+     infile=trim(in_path)//trim(filename)
+     !
+     outfile="./"//trim(more_io_path)//"/SAVE/"//trim(filename)
+     call cp_file(trim(infile),trim(outfile),io_err)
+     if(io_err/=0) call error(' Error copying the gkkp_expanded files')
+     !
+     call live_timing(steps=1)
+     !
+   enddo
+   !
+   call live_timing()
+   !
+ else
+   call msg('s','Electron-phonon DBs not present')
+ endif
+ !
+#endif
  !
  ! CLEAN
  !=======