IncompleteLUT.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462
  1. // This file is part of Eigen, a lightweight C++ template library
  2. // for linear algebra.
  3. //
  4. // Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
  5. // Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
  6. //
  7. // This Source Code Form is subject to the terms of the Mozilla
  8. // Public License v. 2.0. If a copy of the MPL was not distributed
  9. // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
  10. #ifndef EIGEN_INCOMPLETE_LUT_H
  11. #define EIGEN_INCOMPLETE_LUT_H
  12. namespace Eigen {
  13. namespace internal {
  14. /** \internal
  15. * Compute a quick-sort split of a vector
  16. * On output, the vector row is permuted such that its elements satisfy
  17. * abs(row(i)) >= abs(row(ncut)) if i<ncut
  18. * abs(row(i)) <= abs(row(ncut)) if i>ncut
  19. * \param row The vector of values
  20. * \param ind The array of index for the elements in @p row
  21. * \param ncut The number of largest elements to keep
  22. **/
  23. template <typename VectorV, typename VectorI>
  24. Index QuickSplit(VectorV &row, VectorI &ind, Index ncut)
  25. {
  26. typedef typename VectorV::RealScalar RealScalar;
  27. using std::swap;
  28. using std::abs;
  29. Index mid;
  30. Index n = row.size(); /* length of the vector */
  31. Index first, last ;
  32. ncut--; /* to fit the zero-based indices */
  33. first = 0;
  34. last = n-1;
  35. if (ncut < first || ncut > last ) return 0;
  36. do {
  37. mid = first;
  38. RealScalar abskey = abs(row(mid));
  39. for (Index j = first + 1; j <= last; j++) {
  40. if ( abs(row(j)) > abskey) {
  41. ++mid;
  42. swap(row(mid), row(j));
  43. swap(ind(mid), ind(j));
  44. }
  45. }
  46. /* Interchange for the pivot element */
  47. swap(row(mid), row(first));
  48. swap(ind(mid), ind(first));
  49. if (mid > ncut) last = mid - 1;
  50. else if (mid < ncut ) first = mid + 1;
  51. } while (mid != ncut );
  52. return 0; /* mid is equal to ncut */
  53. }
  54. }// end namespace internal
  55. /** \ingroup IterativeLinearSolvers_Module
  56. * \class IncompleteLUT
  57. * \brief Incomplete LU factorization with dual-threshold strategy
  58. *
  59. * \implsparsesolverconcept
  60. *
  61. * During the numerical factorization, two dropping rules are used :
  62. * 1) any element whose magnitude is less than some tolerance is dropped.
  63. * This tolerance is obtained by multiplying the input tolerance @p droptol
  64. * by the average magnitude of all the original elements in the current row.
  65. * 2) After the elimination of the row, only the @p fill largest elements in
  66. * the L part and the @p fill largest elements in the U part are kept
  67. * (in addition to the diagonal element ). Note that @p fill is computed from
  68. * the input parameter @p fillfactor which is used the ratio to control the fill_in
  69. * relatively to the initial number of nonzero elements.
  70. *
  71. * The two extreme cases are when @p droptol=0 (to keep all the @p fill*2 largest elements)
  72. * and when @p fill=n/2 with @p droptol being different to zero.
  73. *
  74. * References : Yousef Saad, ILUT: A dual threshold incomplete LU factorization,
  75. * Numerical Linear Algebra with Applications, 1(4), pp 387-402, 1994.
  76. *
  77. * NOTE : The following implementation is derived from the ILUT implementation
  78. * in the SPARSKIT package, Copyright (C) 2005, the Regents of the University of Minnesota
  79. * released under the terms of the GNU LGPL:
  80. * http://www-users.cs.umn.edu/~saad/software/SPARSKIT/README
  81. * However, Yousef Saad gave us permission to relicense his ILUT code to MPL2.
  82. * See the Eigen mailing list archive, thread: ILUT, date: July 8, 2012:
  83. * http://listengine.tuxfamily.org/lists.tuxfamily.org/eigen/2012/07/msg00064.html
  84. * alternatively, on GMANE:
  85. * http://comments.gmane.org/gmane.comp.lib.eigen/3302
  86. */
  87. template <typename _Scalar, typename _StorageIndex = int>
  88. class IncompleteLUT : public SparseSolverBase<IncompleteLUT<_Scalar, _StorageIndex> >
  89. {
  90. protected:
  91. typedef SparseSolverBase<IncompleteLUT> Base;
  92. using Base::m_isInitialized;
  93. public:
  94. typedef _Scalar Scalar;
  95. typedef _StorageIndex StorageIndex;
  96. typedef typename NumTraits<Scalar>::Real RealScalar;
  97. typedef Matrix<Scalar,Dynamic,1> Vector;
  98. typedef Matrix<StorageIndex,Dynamic,1> VectorI;
  99. typedef SparseMatrix<Scalar,RowMajor,StorageIndex> FactorType;
  100. enum {
  101. ColsAtCompileTime = Dynamic,
  102. MaxColsAtCompileTime = Dynamic
  103. };
  104. public:
  105. IncompleteLUT()
  106. : m_droptol(NumTraits<Scalar>::dummy_precision()), m_fillfactor(10),
  107. m_analysisIsOk(false), m_factorizationIsOk(false)
  108. {}
  109. template<typename MatrixType>
  110. explicit IncompleteLUT(const MatrixType& mat, const RealScalar& droptol=NumTraits<Scalar>::dummy_precision(), int fillfactor = 10)
  111. : m_droptol(droptol),m_fillfactor(fillfactor),
  112. m_analysisIsOk(false),m_factorizationIsOk(false)
  113. {
  114. eigen_assert(fillfactor != 0);
  115. compute(mat);
  116. }
  117. Index rows() const { return m_lu.rows(); }
  118. Index cols() const { return m_lu.cols(); }
  119. /** \brief Reports whether previous computation was successful.
  120. *
  121. * \returns \c Success if computation was succesful,
  122. * \c NumericalIssue if the matrix.appears to be negative.
  123. */
  124. ComputationInfo info() const
  125. {
  126. eigen_assert(m_isInitialized && "IncompleteLUT is not initialized.");
  127. return m_info;
  128. }
  129. template<typename MatrixType>
  130. void analyzePattern(const MatrixType& amat);
  131. template<typename MatrixType>
  132. void factorize(const MatrixType& amat);
  133. /**
  134. * Compute an incomplete LU factorization with dual threshold on the matrix mat
  135. * No pivoting is done in this version
  136. *
  137. **/
  138. template<typename MatrixType>
  139. IncompleteLUT& compute(const MatrixType& amat)
  140. {
  141. analyzePattern(amat);
  142. factorize(amat);
  143. return *this;
  144. }
  145. void setDroptol(const RealScalar& droptol);
  146. void setFillfactor(int fillfactor);
  147. template<typename Rhs, typename Dest>
  148. void _solve_impl(const Rhs& b, Dest& x) const
  149. {
  150. x = m_Pinv * b;
  151. x = m_lu.template triangularView<UnitLower>().solve(x);
  152. x = m_lu.template triangularView<Upper>().solve(x);
  153. x = m_P * x;
  154. }
  155. protected:
  156. /** keeps off-diagonal entries; drops diagonal entries */
  157. struct keep_diag {
  158. inline bool operator() (const Index& row, const Index& col, const Scalar&) const
  159. {
  160. return row!=col;
  161. }
  162. };
  163. protected:
  164. FactorType m_lu;
  165. RealScalar m_droptol;
  166. int m_fillfactor;
  167. bool m_analysisIsOk;
  168. bool m_factorizationIsOk;
  169. ComputationInfo m_info;
  170. PermutationMatrix<Dynamic,Dynamic,StorageIndex> m_P; // Fill-reducing permutation
  171. PermutationMatrix<Dynamic,Dynamic,StorageIndex> m_Pinv; // Inverse permutation
  172. };
  173. /**
  174. * Set control parameter droptol
  175. * \param droptol Drop any element whose magnitude is less than this tolerance
  176. **/
  177. template<typename Scalar, typename StorageIndex>
  178. void IncompleteLUT<Scalar,StorageIndex>::setDroptol(const RealScalar& droptol)
  179. {
  180. this->m_droptol = droptol;
  181. }
  182. /**
  183. * Set control parameter fillfactor
  184. * \param fillfactor This is used to compute the number @p fill_in of largest elements to keep on each row.
  185. **/
  186. template<typename Scalar, typename StorageIndex>
  187. void IncompleteLUT<Scalar,StorageIndex>::setFillfactor(int fillfactor)
  188. {
  189. this->m_fillfactor = fillfactor;
  190. }
  191. template <typename Scalar, typename StorageIndex>
  192. template<typename _MatrixType>
  193. void IncompleteLUT<Scalar,StorageIndex>::analyzePattern(const _MatrixType& amat)
  194. {
  195. // Compute the Fill-reducing permutation
  196. // Since ILUT does not perform any numerical pivoting,
  197. // it is highly preferable to keep the diagonal through symmetric permutations.
  198. #ifndef EIGEN_MPL2_ONLY
  199. // To this end, let's symmetrize the pattern and perform AMD on it.
  200. SparseMatrix<Scalar,ColMajor, StorageIndex> mat1 = amat;
  201. SparseMatrix<Scalar,ColMajor, StorageIndex> mat2 = amat.transpose();
  202. // FIXME for a matrix with nearly symmetric pattern, mat2+mat1 is the appropriate choice.
  203. // on the other hand for a really non-symmetric pattern, mat2*mat1 should be prefered...
  204. SparseMatrix<Scalar,ColMajor, StorageIndex> AtA = mat2 + mat1;
  205. AMDOrdering<StorageIndex> ordering;
  206. ordering(AtA,m_P);
  207. m_Pinv = m_P.inverse(); // cache the inverse permutation
  208. #else
  209. // If AMD is not available, (MPL2-only), then let's use the slower COLAMD routine.
  210. SparseMatrix<Scalar,ColMajor, StorageIndex> mat1 = amat;
  211. COLAMDOrdering<StorageIndex> ordering;
  212. ordering(mat1,m_Pinv);
  213. m_P = m_Pinv.inverse();
  214. #endif
  215. m_analysisIsOk = true;
  216. m_factorizationIsOk = false;
  217. m_isInitialized = true;
  218. }
  219. template <typename Scalar, typename StorageIndex>
  220. template<typename _MatrixType>
  221. void IncompleteLUT<Scalar,StorageIndex>::factorize(const _MatrixType& amat)
  222. {
  223. using std::sqrt;
  224. using std::swap;
  225. using std::abs;
  226. using internal::convert_index;
  227. eigen_assert((amat.rows() == amat.cols()) && "The factorization should be done on a square matrix");
  228. Index n = amat.cols(); // Size of the matrix
  229. m_lu.resize(n,n);
  230. // Declare Working vectors and variables
  231. Vector u(n) ; // real values of the row -- maximum size is n --
  232. VectorI ju(n); // column position of the values in u -- maximum size is n
  233. VectorI jr(n); // Indicate the position of the nonzero elements in the vector u -- A zero location is indicated by -1
  234. // Apply the fill-reducing permutation
  235. eigen_assert(m_analysisIsOk && "You must first call analyzePattern()");
  236. SparseMatrix<Scalar,RowMajor, StorageIndex> mat;
  237. mat = amat.twistedBy(m_Pinv);
  238. // Initialization
  239. jr.fill(-1);
  240. ju.fill(0);
  241. u.fill(0);
  242. // number of largest elements to keep in each row:
  243. Index fill_in = (amat.nonZeros()*m_fillfactor)/n + 1;
  244. if (fill_in > n) fill_in = n;
  245. // number of largest nonzero elements to keep in the L and the U part of the current row:
  246. Index nnzL = fill_in/2;
  247. Index nnzU = nnzL;
  248. m_lu.reserve(n * (nnzL + nnzU + 1));
  249. // global loop over the rows of the sparse matrix
  250. for (Index ii = 0; ii < n; ii++)
  251. {
  252. // 1 - copy the lower and the upper part of the row i of mat in the working vector u
  253. Index sizeu = 1; // number of nonzero elements in the upper part of the current row
  254. Index sizel = 0; // number of nonzero elements in the lower part of the current row
  255. ju(ii) = convert_index<StorageIndex>(ii);
  256. u(ii) = 0;
  257. jr(ii) = convert_index<StorageIndex>(ii);
  258. RealScalar rownorm = 0;
  259. typename FactorType::InnerIterator j_it(mat, ii); // Iterate through the current row ii
  260. for (; j_it; ++j_it)
  261. {
  262. Index k = j_it.index();
  263. if (k < ii)
  264. {
  265. // copy the lower part
  266. ju(sizel) = convert_index<StorageIndex>(k);
  267. u(sizel) = j_it.value();
  268. jr(k) = convert_index<StorageIndex>(sizel);
  269. ++sizel;
  270. }
  271. else if (k == ii)
  272. {
  273. u(ii) = j_it.value();
  274. }
  275. else
  276. {
  277. // copy the upper part
  278. Index jpos = ii + sizeu;
  279. ju(jpos) = convert_index<StorageIndex>(k);
  280. u(jpos) = j_it.value();
  281. jr(k) = convert_index<StorageIndex>(jpos);
  282. ++sizeu;
  283. }
  284. rownorm += numext::abs2(j_it.value());
  285. }
  286. // 2 - detect possible zero row
  287. if(rownorm==0)
  288. {
  289. m_info = NumericalIssue;
  290. return;
  291. }
  292. // Take the 2-norm of the current row as a relative tolerance
  293. rownorm = sqrt(rownorm);
  294. // 3 - eliminate the previous nonzero rows
  295. Index jj = 0;
  296. Index len = 0;
  297. while (jj < sizel)
  298. {
  299. // In order to eliminate in the correct order,
  300. // we must select first the smallest column index among ju(jj:sizel)
  301. Index k;
  302. Index minrow = ju.segment(jj,sizel-jj).minCoeff(&k); // k is relative to the segment
  303. k += jj;
  304. if (minrow != ju(jj))
  305. {
  306. // swap the two locations
  307. Index j = ju(jj);
  308. swap(ju(jj), ju(k));
  309. jr(minrow) = convert_index<StorageIndex>(jj);
  310. jr(j) = convert_index<StorageIndex>(k);
  311. swap(u(jj), u(k));
  312. }
  313. // Reset this location
  314. jr(minrow) = -1;
  315. // Start elimination
  316. typename FactorType::InnerIterator ki_it(m_lu, minrow);
  317. while (ki_it && ki_it.index() < minrow) ++ki_it;
  318. eigen_internal_assert(ki_it && ki_it.col()==minrow);
  319. Scalar fact = u(jj) / ki_it.value();
  320. // drop too small elements
  321. if(abs(fact) <= m_droptol)
  322. {
  323. jj++;
  324. continue;
  325. }
  326. // linear combination of the current row ii and the row minrow
  327. ++ki_it;
  328. for (; ki_it; ++ki_it)
  329. {
  330. Scalar prod = fact * ki_it.value();
  331. Index j = ki_it.index();
  332. Index jpos = jr(j);
  333. if (jpos == -1) // fill-in element
  334. {
  335. Index newpos;
  336. if (j >= ii) // dealing with the upper part
  337. {
  338. newpos = ii + sizeu;
  339. sizeu++;
  340. eigen_internal_assert(sizeu<=n);
  341. }
  342. else // dealing with the lower part
  343. {
  344. newpos = sizel;
  345. sizel++;
  346. eigen_internal_assert(sizel<=ii);
  347. }
  348. ju(newpos) = convert_index<StorageIndex>(j);
  349. u(newpos) = -prod;
  350. jr(j) = convert_index<StorageIndex>(newpos);
  351. }
  352. else
  353. u(jpos) -= prod;
  354. }
  355. // store the pivot element
  356. u(len) = fact;
  357. ju(len) = convert_index<StorageIndex>(minrow);
  358. ++len;
  359. jj++;
  360. } // end of the elimination on the row ii
  361. // reset the upper part of the pointer jr to zero
  362. for(Index k = 0; k <sizeu; k++) jr(ju(ii+k)) = -1;
  363. // 4 - partially sort and insert the elements in the m_lu matrix
  364. // sort the L-part of the row
  365. sizel = len;
  366. len = (std::min)(sizel, nnzL);
  367. typename Vector::SegmentReturnType ul(u.segment(0, sizel));
  368. typename VectorI::SegmentReturnType jul(ju.segment(0, sizel));
  369. internal::QuickSplit(ul, jul, len);
  370. // store the largest m_fill elements of the L part
  371. m_lu.startVec(ii);
  372. for(Index k = 0; k < len; k++)
  373. m_lu.insertBackByOuterInnerUnordered(ii,ju(k)) = u(k);
  374. // store the diagonal element
  375. // apply a shifting rule to avoid zero pivots (we are doing an incomplete factorization)
  376. if (u(ii) == Scalar(0))
  377. u(ii) = sqrt(m_droptol) * rownorm;
  378. m_lu.insertBackByOuterInnerUnordered(ii, ii) = u(ii);
  379. // sort the U-part of the row
  380. // apply the dropping rule first
  381. len = 0;
  382. for(Index k = 1; k < sizeu; k++)
  383. {
  384. if(abs(u(ii+k)) > m_droptol * rownorm )
  385. {
  386. ++len;
  387. u(ii + len) = u(ii + k);
  388. ju(ii + len) = ju(ii + k);
  389. }
  390. }
  391. sizeu = len + 1; // +1 to take into account the diagonal element
  392. len = (std::min)(sizeu, nnzU);
  393. typename Vector::SegmentReturnType uu(u.segment(ii+1, sizeu-1));
  394. typename VectorI::SegmentReturnType juu(ju.segment(ii+1, sizeu-1));
  395. internal::QuickSplit(uu, juu, len);
  396. // store the largest elements of the U part
  397. for(Index k = ii + 1; k < ii + len; k++)
  398. m_lu.insertBackByOuterInnerUnordered(ii,ju(k)) = u(k);
  399. }
  400. m_lu.finalize();
  401. m_lu.makeCompressed();
  402. m_factorizationIsOk = true;
  403. m_info = Success;
  404. }
  405. } // end namespace Eigen
  406. #endif // EIGEN_INCOMPLETE_LUT_H