From b1acd469bdbe1d4c10e2fa49c7e51ba2fe25bd79 Mon Sep 17 00:00:00 2001
From: Mikhail Svechnikov <m.svechnikov@fz-juelich.de>
Date: Mon, 5 Feb 2024 16:43:28 +0100
Subject: [PATCH] optimize exponents summation

---
 Base/Math/Numeric.cpp                   |  6 +++
 Base/Math/Numeric.h                     |  2 +
 Resample/Particle/ReMesocrystal.cpp     | 58 ++++++++++++++++++---
 Resample/Particle/ReMesocrystal.h       |  3 +-
 Resample/Processed/Slicer.cpp           |  5 +-
 Sample/Particle/Mesocrystal.cpp         | 68 ++++++++++++++++++-------
 Sample/Particle/Mesocrystal.h           |  2 +-
 auto/Wrap/libBornAgainBase_wrap.cpp     |  2 +-
 auto/Wrap/libBornAgainDevice_wrap.cpp   |  2 +-
 auto/Wrap/libBornAgainFit_wrap.cpp      |  2 +-
 auto/Wrap/libBornAgainParam_wrap.cpp    |  2 +-
 auto/Wrap/libBornAgainResample_wrap.cpp |  2 +-
 auto/Wrap/libBornAgainSample_wrap.cpp   |  2 +-
 auto/Wrap/libBornAgainSim_wrap.cpp      |  2 +-
 14 files changed, 122 insertions(+), 36 deletions(-)

diff --git a/Base/Math/Numeric.cpp b/Base/Math/Numeric.cpp
index 479fba8733b..195c5a6224a 100644
--- a/Base/Math/Numeric.cpp
+++ b/Base/Math/Numeric.cpp
@@ -41,6 +41,12 @@ bool Numeric::almostEqual(double a, double b, int ulp)
     return std::abs(a - b) <= eps * ulp * (std::abs(a) + std::abs(b)) / 2;
 }
 
+bool Numeric::almostEqual(complex_t a, complex_t b, int ulp)
+{
+    constexpr double eps = std::numeric_limits<double>::epsilon();
+    return std::abs(a - b) <= eps * ulp * (std::abs(a) + std::abs(b)) / 2;
+}
+
 bool Numeric::almostEqual(const R3& a, const R3& b, int ulp)
 {
     return almostEqual(a.x(), b.x(), ulp) && almostEqual(a.y(), b.y(), ulp)
diff --git a/Base/Math/Numeric.h b/Base/Math/Numeric.h
index 2d06abb3cfc..e5568ca0d6b 100644
--- a/Base/Math/Numeric.h
+++ b/Base/Math/Numeric.h
@@ -15,6 +15,7 @@
 #ifndef BORNAGAIN_BASE_MATH_NUMERIC_H
 #define BORNAGAIN_BASE_MATH_NUMERIC_H
 
+#include <heinz/Complex.h>
 #include <heinz/Vectors3D.h>
 
 void check_scalar(double a, double b, int ulp);
@@ -30,6 +31,7 @@ double relativeDifference(double a, double b);
 
 //! Returns true if two doubles agree within machine epsilon times ulp (units in the last place).
 bool almostEqual(double a, double b, int ulp);
+bool almostEqual(complex_t a, complex_t b, int ulp);
 bool almostEqual(const R3& a, const R3& b, int ulp);
 
 double ignoreDenormalized(double value);
diff --git a/Resample/Particle/ReMesocrystal.cpp b/Resample/Particle/ReMesocrystal.cpp
index 7f5a239ac14..06d7fc262e4 100644
--- a/Resample/Particle/ReMesocrystal.cpp
+++ b/Resample/Particle/ReMesocrystal.cpp
@@ -13,6 +13,7 @@
 //  ************************************************************************************************
 
 #include "Resample/Particle/ReMesocrystal.h"
+#include "Base/Math/Numeric.h"
 #include "Base/Spin/SpinMatrix.h"
 #include "Base/Types/Span.h"
 #include "Base/Vector/WavevectorInfo.h"
@@ -80,22 +81,63 @@ complex_t ReMesocrystal::structureFactor(const WavevectorInfo& wavevectors) cons
     complex_t exp_c = exp_I(m_lattice.basisVectorC().dot(q));
 
     I3 range = m_max_index - m_min_index;
-    std::vector<complex_t> exponents_a(range.x() + 1);
+
     std::vector<complex_t> exponents_b(range.y() + 1);
     std::vector<complex_t> exponents_c(range.z() + 1);
 
-    for (int i = 0; i <= range.x(); i++)
-        exponents_a[i] = pow(exp_a, i + m_min_index.x());
     for (int j = 0; j <= range.y(); j++)
         exponents_b[j] = pow(exp_b, j + m_min_index.y());
     for (int k = 0; k <= range.z(); k++)
         exponents_c[k] = pow(exp_c, k + m_min_index.z());
 
     complex_t field_factor(0.0, 0.0);
-    for (I3 index : m_basis_indexes) {
-        index -= m_min_index;
-        field_factor += exponents_a[index.x()] * exponents_b[index.y()] * exponents_c[index.z()];
+
+    //---------------------------------------------------------------------------------
+
+//    std::vector<complex_t> exponents_a(range.x() + 1);
+//    for (int i = 0; i <= range.x(); i++)
+//        exponents_a[i] = pow(exp_a, i + m_min_index.x());
+
+//    for (I3 index : m_basis_indexes) {
+//        index -= m_min_index;
+//        field_factor += exponents_a[index.x()] * exponents_b[index.y()] * exponents_c[index.z()];
+//    }
+
+    //------------------------------------------------------------------
+
+    complex_t denom_factor = Numeric::almostEqual(exp_c, 1., 1) ? 1. : 1./(exp_c -1.);
+    std::vector<complex_t> exponents_c_r(range.z() + 2);
+    for (int k = 0; k <= range.z(); k++)
+        exponents_c_r[k] = (pow(exp_c, k+1) - 1.) * denom_factor;
+
+    if(Numeric::almostEqual(exp_c, 1., 1)) {
+        for(int i = 0; i<=range.x(); i++) {
+            const complex_t e_a = pow(exp_a, i + m_min_index.x());
+
+            for(int j = 0; j<=range.y(); j++) {
+                const complex_t e_ab = e_a * exponents_b[j];
+
+                for(const auto& p : m_pairs[i][j])
+                    field_factor += e_ab * complex_t(p.second-p.first+1);
+            }
+        }
+    } else {
+        for(int i = 0; i<=range.x(); i++) {
+            const complex_t e_a = pow(exp_a, i + m_min_index.x());
+
+            for(int j = 0; j<=range.y(); j++) {
+                const complex_t e_ab = e_a * exponents_b[j];
+
+                for(const auto& p : m_pairs[i][j]) {
+                    int N = p.second-p.first;
+                    field_factor += e_ab * exponents_c[p.first-m_min_index.z()] * exponents_c_r[N];
+                }
+            }
+        }
     }
+
+    //------------------------------------------------------------------
+
     return field_factor * debyeWallerFactor(q.real());
 }
 
@@ -173,9 +215,11 @@ SpinMatrix ReMesocrystal::thePolFF(const WavevectorInfo& wavevectors) const
     return m_compute_FF_pol(wavevectors);
 }
 
-void ReMesocrystal::setBasisIndexes(const std::vector<I3>& indexes, const I3& min, const I3& max)
+void ReMesocrystal::setBasisIndexes(const std::vector<I3>& indexes, const I3& min, const I3& max,
+                                    const std::vector< std::vector< std::vector< std::pair<int, int> > > >& pairs)
 {
     m_basis_indexes = indexes;
+    m_pairs = pairs;
     m_min_index = min;
     m_max_index = max;
 }
diff --git a/Resample/Particle/ReMesocrystal.h b/Resample/Particle/ReMesocrystal.h
index ba34b94eb1e..78fb03a84b2 100644
--- a/Resample/Particle/ReMesocrystal.h
+++ b/Resample/Particle/ReMesocrystal.h
@@ -48,7 +48,7 @@ public:
     complex_t theFF(const WavevectorInfo& wavevectors) const override;
     SpinMatrix thePolFF(const WavevectorInfo& wavevectors) const override;
 
-    void setBasisIndexes(const std::vector<I3>& indexes, const I3& min, const I3& max);
+    void setBasisIndexes(const std::vector<I3>& indexes, const I3& min, const I3& max, const std::vector<std::vector<std::vector<std::pair<int, int> > > > &pairs);
 
 private:
     complex_t structureFactor(const WavevectorInfo& wavevectors) const;
@@ -72,6 +72,7 @@ private:
     MesoOptions m_meso_options;
     double m_max_rec_length;
     std::vector<I3> m_basis_indexes; //!< The lattice points inside the outer shape
+    std::vector< std::vector< std::vector< std::pair<int, int> > > > m_pairs;
     I3 m_min_index;
     I3 m_max_index;
 };
diff --git a/Resample/Processed/Slicer.cpp b/Resample/Processed/Slicer.cpp
index c3804f24520..e6468c7fbca 100644
--- a/Resample/Processed/Slicer.cpp
+++ b/Resample/Processed/Slicer.cpp
@@ -294,8 +294,9 @@ OwningVector<IReParticle> Compute::Slicing::particlesInSlice(const IParticle* pa
         if (!meso_options.use_reciprocal_sum) {
             I3 min_index, max_index;
             std::vector<I3> basisIndexes;
-            p->calcBasisIndexes(basisIndexes, min_index, max_index);
-            mc->setBasisIndexes(basisIndexes, min_index, max_index);
+            std::vector< std::vector< std::vector< std::pair<int, int> > > > pairs;
+            p->calcBasisIndexes(basisIndexes, min_index, max_index, pairs);
+            mc->setBasisIndexes(basisIndexes, min_index, max_index, pairs);
         }
 
         std::vector<double> weights;
diff --git a/Sample/Particle/Mesocrystal.cpp b/Sample/Particle/Mesocrystal.cpp
index 0cee5939cc4..fc8ff3293fc 100644
--- a/Sample/Particle/Mesocrystal.cpp
+++ b/Sample/Particle/Mesocrystal.cpp
@@ -20,12 +20,13 @@
 #include "Sample/Particle/IFormFactor.h"
 #include "Sample/Particle/Particle.h"
 #include "Sample/Scattering/Rotations.h"
+#include <algorithm>
 
 namespace {
 
 int sgn(int val)
 {
-    if (val >= 0)
+    if (val > 0)
         return 1;
     return -1;
 }
@@ -75,7 +76,8 @@ std::vector<R3> Mesocrystal::calcBasisPositions() const
 {
     I3 unused_min, unused_max;
     std::vector<I3> basisIndexes;
-    calcBasisIndexes(basisIndexes, unused_min, unused_max);
+    std::vector< std::vector< std::vector< std::pair<int, int> > > > pairs;
+    calcBasisIndexes(basisIndexes, unused_min, unused_max, pairs);
 
     std::vector<R3> result(basisIndexes.size());
     size_t counter = 0;
@@ -86,19 +88,23 @@ std::vector<R3> Mesocrystal::calcBasisPositions() const
     return result;
 }
 
-void Mesocrystal::calcBasisIndexes(std::vector<I3>& indexes, I3& min_index, I3& max_index) const
+void Mesocrystal::calcBasisIndexes(std::vector<I3>& indexes, I3& min_index, I3& max_index,
+                                   std::vector<std::vector<std::vector<std::pair<int, int>>>> &pairs) const
 {
     indexes.clear();
     const Lattice3D& lattice = *m_crystal->lattice();
-    const IParticle* basis = m_crystal->basis();
+    R3 shift = m_crystal->basis()->particlePosition();
 
-    auto check_and_add_particle = [&](int i, int& i_lim, int j, int& j_lim, int k, int& k_lim) {
+    auto contains = [&](int i, int j, int k) -> bool {
         R3 position =
             i * lattice.basisVectorA() + j * lattice.basisVectorB() + k * lattice.basisVectorC();
 
-        R3 shift = basis->particlePosition();
+        return m_meso_formfactor->contains(position + shift);
+    };
 
-        if (m_meso_formfactor->contains(position + shift)) {
+    // On the first pass we find min and max indices along each axis
+    auto check_index = [&](int i, int& i_lim, int j, int& j_lim, int k, int& k_lim) {
+        if (contains(i,j,k)) {
             indexes.push_back(I3(i, j, k));
 
             // if boundary particle belongs to the outer shape, extend the boundaries
@@ -112,26 +118,26 @@ void Mesocrystal::calcBasisIndexes(std::vector<I3>& indexes, I3& min_index, I3&
     };
 
     // positive and negative boundaries are changeable
-    int k_min = -1, k_max = 0;
-    int j_min = -1, j_max = 0;
-    int i_min = -1, i_max = 0;
+    int k_min = 0, k_max = 1;
+    int j_min = 0, j_max = 1;
+    int i_min = 0, i_max = 1;
 
     auto iterate_i = [&](int j, int& j_lim, int k, int& k_lim) {
-        for (int i = 0; i <= i_max; i++)
-            check_and_add_particle(i, i_max, j, j_lim, k, k_lim);
-        for (int i = -1; i >= i_min; i--)
-            check_and_add_particle(i, i_min, j, j_lim, k, k_lim);
+        for (int i = 1; i <= i_max; i++)
+            check_index(i, i_max, j, j_lim, k, k_lim);
+        for (int i = 0; i >= i_min; i--)
+            check_index(i, i_min, j, j_lim, k, k_lim);
     };
     auto iterate_j = [&](int k, int& k_lim) {
-        for (int j = 0; j <= j_max; j++)
+        for (int j = 1; j <= j_max; j++)
             iterate_i(j, j_max, k, k_lim);
-        for (int j = -1; j >= j_min; j--)
+        for (int j = 0; j >= j_min; j--)
             iterate_i(j, j_min, k, k_lim);
     };
     auto iterate_k = [&]() {
-        for (int k = 0; k <= k_max; k++)
+        for (int k = 1; k <= k_max; k++)
             iterate_j(k, k_max);
-        for (int k = -1; k >= k_min; k--)
+        for (int k = 0; k >= k_min; k--)
             iterate_j(k, k_min);
     };
     iterate_k();
@@ -139,4 +145,30 @@ void Mesocrystal::calcBasisIndexes(std::vector<I3>& indexes, I3& min_index, I3&
     // save min and max indexes belonging to the outer shape
     min_index = I3(i_min + 1, j_min + 1, k_min + 1);
     max_index = I3(i_max - 1, j_max - 1, k_max - 1);
+
+    //----------------------------------------------------------------------------
+    I3 range = max_index - min_index;
+
+    auto contains_shifted = [&](int i, int j, int k) -> bool {
+        return contains(i+min_index.x(), j+min_index.y(), k+min_index.z());
+    };
+
+    pairs.clear();
+    pairs.resize(range.x() + 1, std::vector<std::vector<std::pair<int, int>>>(range.y()+1));
+    for(int i = 0; i<=range.x(); i++) {
+        for(int j = 0; j<=range.y(); j++) {
+            for(int k = 0; k<=range.z(); k++)
+            {
+                bool has_k = contains_shifted(i,j,k);
+
+                // open pair
+                if(has_k && (k == 0 || !contains_shifted(i,j,k-1)))
+                    pairs[i][j].push_back(std::make_pair(k,k));
+
+                // close pair
+                if(has_k && (k == range.z() || !contains_shifted(i,j,k+1)))
+                    pairs[i][j].back().second = k;
+            }
+        }
+    }
 }
diff --git a/Sample/Particle/Mesocrystal.h b/Sample/Particle/Mesocrystal.h
index e88bd349edc..d085342be1d 100644
--- a/Sample/Particle/Mesocrystal.h
+++ b/Sample/Particle/Mesocrystal.h
@@ -47,7 +47,7 @@ public:
     Span zSpan() const override;
 
     std::vector<R3> calcBasisPositions() const;
-    void calcBasisIndexes(std::vector<I3>& indexes, I3& min_index, I3& max_index) const;
+    void calcBasisIndexes(std::vector<I3>& indexes, I3& min_index, I3& max_index, std::vector<std::vector<std::vector<std::pair<int, int> > > > &pairs) const;
 
 private:
     Mesocrystal(Crystal* crystal, IFormFactor* formfactor);
diff --git a/auto/Wrap/libBornAgainBase_wrap.cpp b/auto/Wrap/libBornAgainBase_wrap.cpp
index edd4ff893c2..94ee9ea3215 100644
--- a/auto/Wrap/libBornAgainBase_wrap.cpp
+++ b/auto/Wrap/libBornAgainBase_wrap.cpp
@@ -6055,7 +6055,7 @@ SWIG_AsVal_std_complex_Sl_double_Sg_  (PyObject *o, std::complex<double>* val)
 
 
 SWIGINTERNINLINE PyObject*
-SWIG_From_std_complex_Sl_double_Sg_  (/*@SWIG:/usr/local/share/swig/4.2.0/typemaps/swigmacros.swg,104,%ifcplusplus@*/
+SWIG_From_std_complex_Sl_double_Sg_  (/*@SWIG:/home/mikhail/Projects/swig-4.2.0/installed/share/swig/4.2.0/typemaps/swigmacros.swg,104,%ifcplusplus@*/
 
 const std::complex<double>&
 
diff --git a/auto/Wrap/libBornAgainDevice_wrap.cpp b/auto/Wrap/libBornAgainDevice_wrap.cpp
index 7168a4e465f..60d08096af5 100644
--- a/auto/Wrap/libBornAgainDevice_wrap.cpp
+++ b/auto/Wrap/libBornAgainDevice_wrap.cpp
@@ -6086,7 +6086,7 @@ SWIG_AsVal_std_complex_Sl_double_Sg_  (PyObject *o, std::complex<double>* val)
 
 
 SWIGINTERNINLINE PyObject*
-SWIG_From_std_complex_Sl_double_Sg_  (/*@SWIG:/usr/local/share/swig/4.2.0/typemaps/swigmacros.swg,104,%ifcplusplus@*/
+SWIG_From_std_complex_Sl_double_Sg_  (/*@SWIG:/home/mikhail/Projects/swig-4.2.0/installed/share/swig/4.2.0/typemaps/swigmacros.swg,104,%ifcplusplus@*/
 
 const std::complex<double>&
 
diff --git a/auto/Wrap/libBornAgainFit_wrap.cpp b/auto/Wrap/libBornAgainFit_wrap.cpp
index def0cd09202..e1ec0b15efb 100644
--- a/auto/Wrap/libBornAgainFit_wrap.cpp
+++ b/auto/Wrap/libBornAgainFit_wrap.cpp
@@ -6060,7 +6060,7 @@ SWIG_AsVal_std_complex_Sl_double_Sg_  (PyObject *o, std::complex<double>* val)
 
 
 SWIGINTERNINLINE PyObject*
-SWIG_From_std_complex_Sl_double_Sg_  (/*@SWIG:/usr/local/share/swig/4.2.0/typemaps/swigmacros.swg,104,%ifcplusplus@*/
+SWIG_From_std_complex_Sl_double_Sg_  (/*@SWIG:/home/mikhail/Projects/swig-4.2.0/installed/share/swig/4.2.0/typemaps/swigmacros.swg,104,%ifcplusplus@*/
 
 const std::complex<double>&
 
diff --git a/auto/Wrap/libBornAgainParam_wrap.cpp b/auto/Wrap/libBornAgainParam_wrap.cpp
index 810a3acec89..319da1257a0 100644
--- a/auto/Wrap/libBornAgainParam_wrap.cpp
+++ b/auto/Wrap/libBornAgainParam_wrap.cpp
@@ -6059,7 +6059,7 @@ SWIG_AsVal_std_complex_Sl_double_Sg_  (PyObject *o, std::complex<double>* val)
 
 
 SWIGINTERNINLINE PyObject*
-SWIG_From_std_complex_Sl_double_Sg_  (/*@SWIG:/usr/local/share/swig/4.2.0/typemaps/swigmacros.swg,104,%ifcplusplus@*/
+SWIG_From_std_complex_Sl_double_Sg_  (/*@SWIG:/home/mikhail/Projects/swig-4.2.0/installed/share/swig/4.2.0/typemaps/swigmacros.swg,104,%ifcplusplus@*/
 
 const std::complex<double>&
 
diff --git a/auto/Wrap/libBornAgainResample_wrap.cpp b/auto/Wrap/libBornAgainResample_wrap.cpp
index 1114ee28e2d..eb488c28248 100644
--- a/auto/Wrap/libBornAgainResample_wrap.cpp
+++ b/auto/Wrap/libBornAgainResample_wrap.cpp
@@ -6052,7 +6052,7 @@ SWIG_AsVal_std_complex_Sl_double_Sg_  (PyObject *o, std::complex<double>* val)
 
 
 SWIGINTERNINLINE PyObject*
-SWIG_From_std_complex_Sl_double_Sg_  (/*@SWIG:/usr/local/share/swig/4.2.0/typemaps/swigmacros.swg,104,%ifcplusplus@*/
+SWIG_From_std_complex_Sl_double_Sg_  (/*@SWIG:/home/mikhail/Projects/swig-4.2.0/installed/share/swig/4.2.0/typemaps/swigmacros.swg,104,%ifcplusplus@*/
 
 const std::complex<double>&
 
diff --git a/auto/Wrap/libBornAgainSample_wrap.cpp b/auto/Wrap/libBornAgainSample_wrap.cpp
index c2a7c3c4499..9c61f4a6a75 100644
--- a/auto/Wrap/libBornAgainSample_wrap.cpp
+++ b/auto/Wrap/libBornAgainSample_wrap.cpp
@@ -6159,7 +6159,7 @@ SWIG_AsVal_std_complex_Sl_double_Sg_  (PyObject *o, std::complex<double>* val)
 
 
 SWIGINTERNINLINE PyObject*
-SWIG_From_std_complex_Sl_double_Sg_  (/*@SWIG:/usr/local/share/swig/4.2.0/typemaps/swigmacros.swg,104,%ifcplusplus@*/
+SWIG_From_std_complex_Sl_double_Sg_  (/*@SWIG:/home/mikhail/Projects/swig-4.2.0/installed/share/swig/4.2.0/typemaps/swigmacros.swg,104,%ifcplusplus@*/
 
 const std::complex<double>&
 
diff --git a/auto/Wrap/libBornAgainSim_wrap.cpp b/auto/Wrap/libBornAgainSim_wrap.cpp
index 3a72518aeb1..74fb0618099 100644
--- a/auto/Wrap/libBornAgainSim_wrap.cpp
+++ b/auto/Wrap/libBornAgainSim_wrap.cpp
@@ -6091,7 +6091,7 @@ SWIG_AsVal_std_complex_Sl_double_Sg_  (PyObject *o, std::complex<double>* val)
 
 
 SWIGINTERNINLINE PyObject*
-SWIG_From_std_complex_Sl_double_Sg_  (/*@SWIG:/usr/local/share/swig/4.2.0/typemaps/swigmacros.swg,104,%ifcplusplus@*/
+SWIG_From_std_complex_Sl_double_Sg_  (/*@SWIG:/home/mikhail/Projects/swig-4.2.0/installed/share/swig/4.2.0/typemaps/swigmacros.swg,104,%ifcplusplus@*/
 
 const std::complex<double>&
 
-- 
GitLab