diff --git a/advisor.cpp b/advisor.cpp index 43ea01b..7346efb 100644 --- a/advisor.cpp +++ b/advisor.cpp @@ -37,7 +37,7 @@ std::vector *Advisor::recommend( SizeOptimizer optimizer(CudaVersion::V_8, tr, allowTransposition); std::vector *result = - optimizer.optimize(howMany, maxSignalInc, maxMemory, squareOnly, crop); + optimizer.optimize(howMany, maxSignalInc, maxMemory, squareOnly, crop, tr.rank); resetDevice(); return result; } diff --git a/generalTransform.cpp b/generalTransform.cpp index ee47051..eba67f3 100644 --- a/generalTransform.cpp +++ b/generalTransform.cpp @@ -17,7 +17,9 @@ GeneralTransform::GeneralTransform(int device, int X, int Y, int Z, int N, isFloat(isFloat), isForward(isForward), isInPlace(isInPlace), - isReal(isReal) {} + isReal(isReal) { + setRankInfo(); + } GeneralTransform::GeneralTransform(int X, int Y, int Z, const GeneralTransform &tr) @@ -30,7 +32,9 @@ GeneralTransform::GeneralTransform(int X, int Y, int Z, isFloat(tr.isFloat), isForward(tr.isForward), isInPlace(tr.isInPlace), - isReal(tr.isReal) {} + isReal(tr.isReal) { + setRankInfo(); + } GeneralTransform::GeneralTransform(const GeneralTransform &tr) { *this = tr; } @@ -46,8 +50,20 @@ GeneralTransform &GeneralTransform::operator=(const GeneralTransform &tr) { this->isForward = tr.isForward; this->isInPlace = tr.isInPlace; this->isReal = tr.isReal; + setRankInfo(); } return *this; } +void GeneralTransform::setRankInfo() { + rank = RANK_3D; + if (1 == Z) { + if (1 == Y) { + rank = RANK_1D; + } else { + rank = RANK_2D; + } + } +} + } // namespace cuFFTAdvisor diff --git a/generalTransform.h b/generalTransform.h index 9e5b913..415271d 100644 --- a/generalTransform.h +++ b/generalTransform.h @@ -7,6 +7,9 @@ namespace cuFFTAdvisor { class GeneralTransform { public: + + enum Rank { RANK_1D = 1, RANK_2D = 2, RANK_3D = 3 }; + GeneralTransform(int device, int X, int Y, int Z, int N, Tristate::Tristate isBatched, Tristate::Tristate isFloat, Tristate::Tristate isForward, Tristate::Tristate isInPlace, @@ -30,6 +33,14 @@ class GeneralTransform { Tristate::Tristate isForward; // otherwise inverse Tristate::Tristate isInPlace; // otherwise out-of-place Tristate::Tristate isReal; // otherwise C2C + + Rank rank; + +private: + /** + * Sets fields that describe rank (dimensionality) + */ + void setRankInfo(); }; } // namespace cuFFTAdvisor diff --git a/sizeOptimizer.cpp b/sizeOptimizer.cpp index 0670a96..47585fa 100644 --- a/sizeOptimizer.cpp +++ b/sizeOptimizer.cpp @@ -13,7 +13,7 @@ SizeOptimizer::SizeOptimizer(CudaVersion::CudaVersion version, log_5(1.0 / std::log(5)), log_7(1.0 / std::log(7)) { if (Tristate::BOTH == tr.isFloat) { - // if user is not sure if he/she needs double, then he/she doesn't need it + // if user is not sure if they needs double, then they doesn't need it tr.isFloat = Tristate::TRUE; } @@ -37,11 +37,15 @@ std::vector *SizeOptimizer::optimize(size_t nBest, int maxPercIncrease, int maxMemMB, bool squareOnly, - bool crop) { + bool crop, int rank) { std::vector preoptimized; for (auto in : input) { - std::vector *tmp = - optimizeXYZ(in, nBest, maxPercIncrease, squareOnly, crop); + std::vector *tmp; + if(rank == GeneralTransform::RANK_3D){ + tmp = optimizeXYZ_3D(in, nBest, maxPercIncrease, squareOnly, crop); + }else{ + tmp = optimizeXYZ_1D_2D(in, nBest, maxPercIncrease, squareOnly, crop); + } preoptimized.insert(preoptimized.end(), tmp->begin(), tmp->end()); delete tmp; } @@ -161,11 +165,83 @@ size_t SizeOptimizer::getMinSize(GeneralTransform &tr, int maxPercDecrease, bool return std::max(0.f, afterPercInc); } -std::vector *SizeOptimizer::optimizeXYZ(GeneralTransform &tr, +void SizeOptimizer::cutter(std::vector* polys, GeneralTransform &tr, bool crop, size_t nBest){ + //This function generate Polynom, sort them resize the vector + if(crop){ + std::sort (polys->begin(), polys->end(), std::greater()); + }else{ + std::sort (polys->begin(), polys->end(), std::less()); + } + polys->resize(nBest); +} + +std::vector *SizeOptimizer::optimizeXYZ_3D(GeneralTransform &tr, + size_t nBest, + int maxPercIncrease, + bool squareOnly, + bool crop) { + + std::vector *polysX = generatePolys(tr.X, tr.isFloat, crop); + cutter(polysX, tr, crop, nBest); + std::vector *polysY; + std::vector *polysZ; + + if ((tr.X == tr.Y) || squareOnly) { + polysY = polysX; + } else { + polysY = generatePolys(tr.Y, tr.isFloat, crop); + cutter(polysY, tr, crop, nBest); + } + + if ((tr.X == tr.Z) || squareOnly) { + polysZ = polysX; + } else if (tr.Y == tr.Z) { + polysZ = polysY; + } else { + polysZ = generatePolys(tr.Z, tr.isFloat, crop); + cutter(polysZ, tr, crop, nBest); + } + + size_t minSize = getMinSize(tr, maxPercIncrease, crop); + size_t maxSize = getMaxSize(tr, maxPercIncrease, squareOnly, crop); + + std::vector *result = new std::vector; + size_t found = 0; + for (auto& x : *polysX) { + for (auto& y : *polysY) { + if (squareOnly && (x.value != y.value)) continue; + size_t xy = x.value * y.value; + if (xy > maxSize) + break; // polynoms are sorted by size, we're already above the limit + for (auto& z : *polysZ) { + if (squareOnly && (x.value != z.value)) continue; + size_t xyz = xy * z.value; + if ((found < nBest) && (xyz >= minSize) && (xyz <= maxSize)) { + // we can take nbest only, as others very probably won't be faster + found++; + GeneralTransform t((int)x.value, (int)y.value, (int)z.value, tr); + result->push_back(t); + } + } + } + } + + if ((polysZ != polysY) && (polysZ != polysX)) { + delete polysZ; + } + if (polysY != polysX) { + delete polysY; + } + delete polysX; + return result; +} + +std::vector *SizeOptimizer::optimizeXYZ_1D_2D(GeneralTransform &tr, size_t nBest, int maxPercIncrease, bool squareOnly, bool crop) { + std::vector *polysX = generatePolys(tr.X, tr.isFloat, crop); std::vector *polysY; std::vector *polysZ; @@ -198,27 +274,27 @@ std::vector *SizeOptimizer::optimizeXYZ(GeneralTransform &tr, size_t maxSize = getMaxSize(tr, maxPercIncrease, squareOnly, crop); std::vector *result = new std::vector; - size_t found = 0; - for (auto& x : *recPolysX) { - for (auto& y : *recPolysY) { - if (squareOnly && (x.value != y.value) && (y.value != 1)) continue; - size_t xy = x.value * y.value; - if (xy > maxSize) - break; // polynoms are sorted by size, we're already above the limit - for (auto& z : *recPolysZ) { - if (squareOnly && (x.value != z.value) && (z.value != 1)) continue; - size_t xyz = xy * z.value; - if ((found < nBest) && (xyz >= minSize) && (xyz <= maxSize)) { - // we can take nbest only, as others very probably won't be faster - found++; - GeneralTransform t((int)x.value, (int)y.value, (int)z.value, tr); - result->push_back(t); + size_t found = 0; + for (auto& x : *recPolysX) { + for (auto& y : *recPolysY) { + if (squareOnly && (x.value != y.value) && (y.value != 1)) continue; + size_t xy = x.value * y.value; + if (xy > maxSize) + break; // polynoms are sorted by size, we're already above the limit + for (auto& z : *recPolysZ) { + if (squareOnly && (x.value != z.value) && (z.value != 1)) continue; + size_t xyz = xy * z.value; + if ((found < nBest) && (xyz >= minSize) && (xyz <= maxSize)) { + // we can take nbest only, as others very probably won't be faster + found++; + GeneralTransform t((int)x.value, (int)y.value, (int)z.value, tr); + result->push_back(t); + } } } } - } - if (polysZ != polysY) { + if ((polysZ != polysY) && (polysZ != polysX)) { delete polysZ; delete recPolysZ; } diff --git a/sizeOptimizer.h b/sizeOptimizer.h index 2cdc536..6ecd492 100644 --- a/sizeOptimizer.h +++ b/sizeOptimizer.h @@ -22,6 +22,16 @@ class SizeOptimizer { size_t exponent3; size_t exponent5; size_t exponent7; + + bool operator < (const Polynom& cmp) const + { + return (value < cmp.value); + } + + bool operator > (const Polynom& cmp) const + { + return (value > cmp.value); + } }; struct valueComparator { @@ -46,7 +56,7 @@ class SizeOptimizer { bool allowTrans); std::vector *optimize(size_t nBest, int maxPercIncrease, int maxMemMB, bool squareOnly, - bool crop); + bool crop, int rank); private: int getNoOfPrimes(Polynom &poly); @@ -59,7 +69,11 @@ class SizeOptimizer { std::set *filterOptimal( std::vector *input, bool crop); std::vector *generatePolys(size_t num, bool isFloat, bool crop); - std::vector *optimizeXYZ(GeneralTransform &tr, size_t nBest, + void cutter(std::vector* polys, GeneralTransform &tr, bool crop, size_t nBest); + std::vector *optimizeXYZ_3D(GeneralTransform &tr, size_t nBest, + int maxPercIncrease, bool squareOnly, + bool crop); + std::vector *optimizeXYZ_1D_2D(GeneralTransform &tr, size_t nBest, int maxPercIncrease, bool squareOnly, bool crop); std::vector *optimizeN( diff --git a/transform.h b/transform.h index 4497f1d..01962ad 100644 --- a/transform.h +++ b/transform.h @@ -5,6 +5,7 @@ #include #include #include "utils.h" +#include "generalTransform.h" namespace cuFFTAdvisor { @@ -13,8 +14,6 @@ class Transform { void print(FILE *stream = stdout) const; static void printHeader(FILE *stream = stdout); - enum Rank { RANK_1D = 1, RANK_2D = 2, RANK_3D = 3 }; - Transform(int device, int X, int Y, int Z, int N, bool isBatched, bool isFloat, bool isForward, bool isInPlace, bool isReal) : device(device), @@ -48,7 +47,7 @@ class Transform { bool isForward; // otherwise inverse // derived - Rank rank; + GeneralTransform::Rank rank; size_t elems; // of the transform // FIXME remove size_t inTypeSize; size_t outTypeSize;