cuda_basic.cu 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. // This file is part of Eigen, a lightweight C++ template library
  2. // for linear algebra.
  3. //
  4. // Copyright (C) 2015-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
  5. //
  6. // This Source Code Form is subject to the terms of the Mozilla
  7. // Public License v. 2.0. If a copy of the MPL was not distributed
  8. // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
  9. // workaround issue between gcc >= 4.7 and cuda 5.5
  10. #if (defined __GNUC__) && (__GNUC__>4 || __GNUC_MINOR__>=7)
  11. #undef _GLIBCXX_ATOMIC_BUILTINS
  12. #undef _GLIBCXX_USE_INT128
  13. #endif
  14. #define EIGEN_TEST_NO_LONGDOUBLE
  15. #define EIGEN_TEST_NO_COMPLEX
  16. #define EIGEN_TEST_FUNC cuda_basic
  17. #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
  18. #include <math_constants.h>
  19. #include <cuda.h>
  20. #include "main.h"
  21. #include "cuda_common.h"
  22. // Check that dense modules can be properly parsed by nvcc
  23. #include <Eigen/Dense>
  24. // struct Foo{
  25. // EIGEN_DEVICE_FUNC
  26. // void operator()(int i, const float* mats, float* vecs) const {
  27. // using namespace Eigen;
  28. // // Matrix3f M(data);
  29. // // Vector3f x(data+9);
  30. // // Map<Vector3f>(data+9) = M.inverse() * x;
  31. // Matrix3f M(mats+i/16);
  32. // Vector3f x(vecs+i*3);
  33. // // using std::min;
  34. // // using std::sqrt;
  35. // Map<Vector3f>(vecs+i*3) << x.minCoeff(), 1, 2;// / x.dot(x);//(M.inverse() * x) / x.x();
  36. // //x = x*2 + x.y() * x + x * x.maxCoeff() - x / x.sum();
  37. // }
  38. // };
  39. template<typename T>
  40. struct coeff_wise {
  41. EIGEN_DEVICE_FUNC
  42. void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const
  43. {
  44. using namespace Eigen;
  45. T x1(in+i);
  46. T x2(in+i+1);
  47. T x3(in+i+2);
  48. Map<T> res(out+i*T::MaxSizeAtCompileTime);
  49. res.array() += (in[0] * x1 + x2).array() * x3.array();
  50. }
  51. };
  52. template<typename T>
  53. struct replicate {
  54. EIGEN_DEVICE_FUNC
  55. void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const
  56. {
  57. using namespace Eigen;
  58. T x1(in+i);
  59. int step = x1.size() * 4;
  60. int stride = 3 * step;
  61. typedef Map<Array<typename T::Scalar,Dynamic,Dynamic> > MapType;
  62. MapType(out+i*stride+0*step, x1.rows()*2, x1.cols()*2) = x1.replicate(2,2);
  63. MapType(out+i*stride+1*step, x1.rows()*3, x1.cols()) = in[i] * x1.colwise().replicate(3);
  64. MapType(out+i*stride+2*step, x1.rows(), x1.cols()*3) = in[i] * x1.rowwise().replicate(3);
  65. }
  66. };
  67. template<typename T>
  68. struct redux {
  69. EIGEN_DEVICE_FUNC
  70. void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const
  71. {
  72. using namespace Eigen;
  73. int N = 10;
  74. T x1(in+i);
  75. out[i*N+0] = x1.minCoeff();
  76. out[i*N+1] = x1.maxCoeff();
  77. out[i*N+2] = x1.sum();
  78. out[i*N+3] = x1.prod();
  79. out[i*N+4] = x1.matrix().squaredNorm();
  80. out[i*N+5] = x1.matrix().norm();
  81. out[i*N+6] = x1.colwise().sum().maxCoeff();
  82. out[i*N+7] = x1.rowwise().maxCoeff().sum();
  83. out[i*N+8] = x1.matrix().colwise().squaredNorm().sum();
  84. }
  85. };
  86. template<typename T1, typename T2>
  87. struct prod_test {
  88. EIGEN_DEVICE_FUNC
  89. void operator()(int i, const typename T1::Scalar* in, typename T1::Scalar* out) const
  90. {
  91. using namespace Eigen;
  92. typedef Matrix<typename T1::Scalar, T1::RowsAtCompileTime, T2::ColsAtCompileTime> T3;
  93. T1 x1(in+i);
  94. T2 x2(in+i+1);
  95. Map<T3> res(out+i*T3::MaxSizeAtCompileTime);
  96. res += in[i] * x1 * x2;
  97. }
  98. };
  99. template<typename T1, typename T2>
  100. struct diagonal {
  101. EIGEN_DEVICE_FUNC
  102. void operator()(int i, const typename T1::Scalar* in, typename T1::Scalar* out) const
  103. {
  104. using namespace Eigen;
  105. T1 x1(in+i);
  106. Map<T2> res(out+i*T2::MaxSizeAtCompileTime);
  107. res += x1.diagonal();
  108. }
  109. };
  110. template<typename T>
  111. struct eigenvalues {
  112. EIGEN_DEVICE_FUNC
  113. void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const
  114. {
  115. using namespace Eigen;
  116. typedef Matrix<typename T::Scalar, T::RowsAtCompileTime, 1> Vec;
  117. T M(in+i);
  118. Map<Vec> res(out+i*Vec::MaxSizeAtCompileTime);
  119. T A = M*M.adjoint();
  120. SelfAdjointEigenSolver<T> eig;
  121. eig.computeDirect(M);
  122. res = eig.eigenvalues();
  123. }
  124. };
  125. void test_cuda_basic()
  126. {
  127. ei_test_init_cuda();
  128. int nthreads = 100;
  129. Eigen::VectorXf in, out;
  130. #ifndef __CUDA_ARCH__
  131. int data_size = nthreads * 512;
  132. in.setRandom(data_size);
  133. out.setRandom(data_size);
  134. #endif
  135. CALL_SUBTEST( run_and_compare_to_cuda(coeff_wise<Vector3f>(), nthreads, in, out) );
  136. CALL_SUBTEST( run_and_compare_to_cuda(coeff_wise<Array44f>(), nthreads, in, out) );
  137. CALL_SUBTEST( run_and_compare_to_cuda(replicate<Array4f>(), nthreads, in, out) );
  138. CALL_SUBTEST( run_and_compare_to_cuda(replicate<Array33f>(), nthreads, in, out) );
  139. CALL_SUBTEST( run_and_compare_to_cuda(redux<Array4f>(), nthreads, in, out) );
  140. CALL_SUBTEST( run_and_compare_to_cuda(redux<Matrix3f>(), nthreads, in, out) );
  141. CALL_SUBTEST( run_and_compare_to_cuda(prod_test<Matrix3f,Matrix3f>(), nthreads, in, out) );
  142. CALL_SUBTEST( run_and_compare_to_cuda(prod_test<Matrix4f,Vector4f>(), nthreads, in, out) );
  143. CALL_SUBTEST( run_and_compare_to_cuda(diagonal<Matrix3f,Vector3f>(), nthreads, in, out) );
  144. CALL_SUBTEST( run_and_compare_to_cuda(diagonal<Matrix4f,Vector4f>(), nthreads, in, out) );
  145. CALL_SUBTEST( run_and_compare_to_cuda(eigenvalues<Matrix3f>(), nthreads, in, out) );
  146. CALL_SUBTEST( run_and_compare_to_cuda(eigenvalues<Matrix2f>(), nthreads, in, out) );
  147. }