StableNorm.h 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. // This file is part of Eigen, a lightweight C++ template library
  2. // for linear algebra.
  3. //
  4. // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
  5. //
  6. // This Source Code Form is subject to the terms of the Mozilla
  7. // Public License v. 2.0. If a copy of the MPL was not distributed
  8. // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
  9. #ifndef EIGEN_STABLENORM_H
  10. #define EIGEN_STABLENORM_H
  11. namespace Eigen {
  12. namespace internal {
  13. template<typename ExpressionType, typename Scalar>
  14. inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& scale, Scalar& invScale)
  15. {
  16. Scalar maxCoeff = bl.cwiseAbs().maxCoeff();
  17. if(maxCoeff>scale)
  18. {
  19. ssq = ssq * numext::abs2(scale/maxCoeff);
  20. Scalar tmp = Scalar(1)/maxCoeff;
  21. if(tmp > NumTraits<Scalar>::highest())
  22. {
  23. invScale = NumTraits<Scalar>::highest();
  24. scale = Scalar(1)/invScale;
  25. }
  26. else if(maxCoeff>NumTraits<Scalar>::highest()) // we got a INF
  27. {
  28. invScale = Scalar(1);
  29. scale = maxCoeff;
  30. }
  31. else
  32. {
  33. scale = maxCoeff;
  34. invScale = tmp;
  35. }
  36. }
  37. else if(maxCoeff!=maxCoeff) // we got a NaN
  38. {
  39. scale = maxCoeff;
  40. }
  41. // TODO if the maxCoeff is much much smaller than the current scale,
  42. // then we can neglect this sub vector
  43. if(scale>Scalar(0)) // if scale==0, then bl is 0
  44. ssq += (bl*invScale).squaredNorm();
  45. }
  46. template<typename Derived>
  47. inline typename NumTraits<typename traits<Derived>::Scalar>::Real
  48. blueNorm_impl(const EigenBase<Derived>& _vec)
  49. {
  50. typedef typename Derived::RealScalar RealScalar;
  51. using std::pow;
  52. using std::sqrt;
  53. using std::abs;
  54. const Derived& vec(_vec.derived());
  55. static bool initialized = false;
  56. static RealScalar b1, b2, s1m, s2m, rbig, relerr;
  57. if(!initialized)
  58. {
  59. int ibeta, it, iemin, iemax, iexp;
  60. RealScalar eps;
  61. // This program calculates the machine-dependent constants
  62. // bl, b2, slm, s2m, relerr overfl
  63. // from the "basic" machine-dependent numbers
  64. // nbig, ibeta, it, iemin, iemax, rbig.
  65. // The following define the basic machine-dependent constants.
  66. // For portability, the PORT subprograms "ilmaeh" and "rlmach"
  67. // are used. For any specific computer, each of the assignment
  68. // statements can be replaced
  69. ibeta = std::numeric_limits<RealScalar>::radix; // base for floating-point numbers
  70. it = std::numeric_limits<RealScalar>::digits; // number of base-beta digits in mantissa
  71. iemin = std::numeric_limits<RealScalar>::min_exponent; // minimum exponent
  72. iemax = std::numeric_limits<RealScalar>::max_exponent; // maximum exponent
  73. rbig = (std::numeric_limits<RealScalar>::max)(); // largest floating-point number
  74. iexp = -((1-iemin)/2);
  75. b1 = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // lower boundary of midrange
  76. iexp = (iemax + 1 - it)/2;
  77. b2 = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // upper boundary of midrange
  78. iexp = (2-iemin)/2;
  79. s1m = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for lower range
  80. iexp = - ((iemax+it)/2);
  81. s2m = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for upper range
  82. eps = RealScalar(pow(double(ibeta), 1-it));
  83. relerr = sqrt(eps); // tolerance for neglecting asml
  84. initialized = true;
  85. }
  86. Index n = vec.size();
  87. RealScalar ab2 = b2 / RealScalar(n);
  88. RealScalar asml = RealScalar(0);
  89. RealScalar amed = RealScalar(0);
  90. RealScalar abig = RealScalar(0);
  91. for(typename Derived::InnerIterator it(vec, 0); it; ++it)
  92. {
  93. RealScalar ax = abs(it.value());
  94. if(ax > ab2) abig += numext::abs2(ax*s2m);
  95. else if(ax < b1) asml += numext::abs2(ax*s1m);
  96. else amed += numext::abs2(ax);
  97. }
  98. if(amed!=amed)
  99. return amed; // we got a NaN
  100. if(abig > RealScalar(0))
  101. {
  102. abig = sqrt(abig);
  103. if(abig > rbig) // overflow, or *this contains INF values
  104. return abig; // return INF
  105. if(amed > RealScalar(0))
  106. {
  107. abig = abig/s2m;
  108. amed = sqrt(amed);
  109. }
  110. else
  111. return abig/s2m;
  112. }
  113. else if(asml > RealScalar(0))
  114. {
  115. if (amed > RealScalar(0))
  116. {
  117. abig = sqrt(amed);
  118. amed = sqrt(asml) / s1m;
  119. }
  120. else
  121. return sqrt(asml)/s1m;
  122. }
  123. else
  124. return sqrt(amed);
  125. asml = numext::mini(abig, amed);
  126. abig = numext::maxi(abig, amed);
  127. if(asml <= abig*relerr)
  128. return abig;
  129. else
  130. return abig * sqrt(RealScalar(1) + numext::abs2(asml/abig));
  131. }
  132. } // end namespace internal
  133. /** \returns the \em l2 norm of \c *this avoiding underflow and overflow.
  134. * This version use a blockwise two passes algorithm:
  135. * 1 - find the absolute largest coefficient \c s
  136. * 2 - compute \f$ s \Vert \frac{*this}{s} \Vert \f$ in a standard way
  137. *
  138. * For architecture/scalar types supporting vectorization, this version
  139. * is faster than blueNorm(). Otherwise the blueNorm() is much faster.
  140. *
  141. * \sa norm(), blueNorm(), hypotNorm()
  142. */
  143. template<typename Derived>
  144. inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
  145. MatrixBase<Derived>::stableNorm() const
  146. {
  147. using std::sqrt;
  148. using std::abs;
  149. const Index blockSize = 4096;
  150. RealScalar scale(0);
  151. RealScalar invScale(1);
  152. RealScalar ssq(0); // sum of square
  153. typedef typename internal::nested_eval<Derived,2>::type DerivedCopy;
  154. typedef typename internal::remove_all<DerivedCopy>::type DerivedCopyClean;
  155. const DerivedCopy copy(derived());
  156. enum {
  157. CanAlign = ( (int(DerivedCopyClean::Flags)&DirectAccessBit)
  158. || (int(internal::evaluator<DerivedCopyClean>::Alignment)>0) // FIXME Alignment)>0 might not be enough
  159. ) && (blockSize*sizeof(Scalar)*2<EIGEN_STACK_ALLOCATION_LIMIT)
  160. && (EIGEN_MAX_STATIC_ALIGN_BYTES>0) // if we cannot allocate on the stack, then let's not bother about this optimization
  161. };
  162. typedef typename internal::conditional<CanAlign, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, internal::evaluator<DerivedCopyClean>::Alignment>,
  163. typename DerivedCopyClean::ConstSegmentReturnType>::type SegmentWrapper;
  164. Index n = size();
  165. if(n==1)
  166. return abs(this->coeff(0));
  167. Index bi = internal::first_default_aligned(copy);
  168. if (bi>0)
  169. internal::stable_norm_kernel(copy.head(bi), ssq, scale, invScale);
  170. for (; bi<n; bi+=blockSize)
  171. internal::stable_norm_kernel(SegmentWrapper(copy.segment(bi,numext::mini(blockSize, n - bi))), ssq, scale, invScale);
  172. return scale * sqrt(ssq);
  173. }
  174. /** \returns the \em l2 norm of \c *this using the Blue's algorithm.
  175. * A Portable Fortran Program to Find the Euclidean Norm of a Vector,
  176. * ACM TOMS, Vol 4, Issue 1, 1978.
  177. *
  178. * For architecture/scalar types without vectorization, this version
  179. * is much faster than stableNorm(). Otherwise the stableNorm() is faster.
  180. *
  181. * \sa norm(), stableNorm(), hypotNorm()
  182. */
  183. template<typename Derived>
  184. inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
  185. MatrixBase<Derived>::blueNorm() const
  186. {
  187. return internal::blueNorm_impl(*this);
  188. }
  189. /** \returns the \em l2 norm of \c *this avoiding undeflow and overflow.
  190. * This version use a concatenation of hypot() calls, and it is very slow.
  191. *
  192. * \sa norm(), stableNorm()
  193. */
  194. template<typename Derived>
  195. inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
  196. MatrixBase<Derived>::hypotNorm() const
  197. {
  198. return this->cwiseAbs().redux(internal::scalar_hypot_op<RealScalar>());
  199. }
  200. } // end namespace Eigen
  201. #endif // EIGEN_STABLENORM_H