convolution.cpp 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. #include "convolution.hpp"
  2. #include "avx.hpp"
  3. namespace Layer{
  4. ConvolutionLayer::ConvolutionLayer(size_t nf_,size_t ni_,size_t nj_,size_t p_,size_t q_,size_t mf_):Layer(nf_*ni_*nj_,mf_*(ni_-p_+1)*(nj_-q_+1)){
  5. nf=nf_;
  6. ni=ni_;
  7. nj=nj_;
  8. p=p_;
  9. q=q_;
  10. mf=mf_;
  11. mi=ni-p+1;
  12. mj=nj-q+1;
  13. K=init_vector(mf*nf*p*q);
  14. b=init_vector(mf*nf);
  15. nabla_K=init_vector(mf*nf*p*q);
  16. nabla_b=init_vector(mf*nf);
  17. size_t n_vindex1=8*AVX_SIZE(p*q);
  18. vindex1=new v8i[AVX_SIZE(p*q)];
  19. for(size_t i=0;i<AVX_SIZE(p*q);++i){
  20. for(size_t j=0;j<8;++j){
  21. vindex1[i].i[j]=0;
  22. }
  23. }
  24. for(size_t j=0;j<q;++j){
  25. for(size_t i=0;i<p;++i){
  26. size_t ind=j*p+i;
  27. vindex1[ind/8].i[ind%8]=j*nj+i;
  28. }
  29. }
  30. }
  31. ConvolutionLayer::~ConvolutionLayer(){
  32. delete_vector(K);
  33. delete_vector(b);
  34. delete_vector(nabla_K);
  35. delete_vector(nabla_b);
  36. }
  37. void
  38. ConvolutionLayer::init(Real m,Real d){
  39. default_random_engine generator;
  40. normal_distribution<Real> distribution(m,d);
  41. for(size_t i=0;i<mf*nf*i*q;++i){
  42. K[i]=distribution(generator);
  43. }
  44. for(size_t i=0;i<mf*nf;++i){
  45. b[i]=distribution(generator);
  46. }
  47. }
  48. Vector
  49. ConvolutionLayer::feed_forward(Vector x_){
  50. return avx_feed_forward(x_);
  51. x=x_;
  52. for(size_t g=0;g<mf;++g){
  53. for(size_t k=0;k<mi;++k){
  54. for(size_t l=0;l<mj;++l){
  55. Real temp=0;
  56. size_t offset_x=k*nj+l;
  57. for(size_t f=0;f<nf;++f){
  58. for(size_t r=0;r<p;++r){
  59. for(size_t s=0;s<q;++s){
  60. temp+=x[indice3(f,k+r,l+s,ni,nj)]*K[indice4(g,f,r,s,nf,p,q)];
  61. }
  62. }
  63. temp+=b[indice2(g,f,nf)];
  64. }
  65. y[indice3(g,k,l,mi,mj)]=temp;
  66. }
  67. }
  68. }
  69. return y;
  70. }
  71. Vector
  72. ConvolutionLayer::avx_feed_forward(Vector x_){
  73. x=x_;
  74. for(size_t g=0;g<mf;++g){
  75. for(size_t k=0;k<mi;++k){
  76. for(size_t l=0;l<mj;++l){
  77. Real temp=0;
  78. Real temp_avx=0;
  79. for(size_t f=0;f<nf;++f){
  80. size_t n=AVX_SIZE(p*q);
  81. Real* ptrK=&K[(g*nf+f)*p*q];
  82. Real* ptrx=&x[indice3(f,k,l,ni,nj)];
  83. for(size_t t=0;t<n;++t){
  84. cout<<t<<" > "<<endl;
  85. //__m256 x_data=_mm256_i32gather_ps(&x[indice3(f,k,l,ni,nj)],*(__m256i*)(&vindex1[t]),4);
  86. v8f u={_mm256_loadu_ps(ptrK)}; //Read 8 values of K
  87. __m256i vi=*(__m256i*)(&vindex1[t]);
  88. v8f v={_mm256_i32gather_ps(ptrx,vi,4)};
  89. u.avx=_mm256_mul_ps(u.avx,v.avx);
  90. temp_avx+=hadd(u);
  91. ptrK+=8; //There is 8 floats in m256
  92. }
  93. cout<<p<<" "<<q<<endl;
  94. // exit(0);
  95. for(size_t r=0;r<p;++r){
  96. for(size_t s=0;s<q;++s){
  97. temp+=x[indice3(f,k+r,l+s,ni,nj)]*K[indice4(g,f,r,s,nf,p,q)];
  98. }
  99. }
  100. cout<<temp<<" vs "<<temp_avx<<endl;
  101. temp+=b[indice2(g,f,nf)];
  102. temp_avx+=b[indice2(g,f,nf)];
  103. }
  104. cout<<endl;
  105. char c;
  106. //cin>>c;
  107. //exit(0);
  108. y[indice3(g,k,l,mi,mj)]=temp;
  109. }
  110. }
  111. }
  112. return y;
  113. }
  114. void
  115. ConvolutionLayer::init_nabla(){
  116. for(size_t i=0;i<mf*nf;++i){
  117. nabla_b[i]=0;
  118. }
  119. for(size_t i=0;i<mf*nf*p*q;++i){
  120. nabla_K[i]=0;
  121. }
  122. }
  123. Vector
  124. ConvolutionLayer::back_propagation(Vector e){
  125. for(size_t f=0;f<nf;++f){
  126. for(size_t i=0;i<ni;++i){
  127. for(size_t j=0;j<nj;++j){
  128. Real temp=0;
  129. for(size_t g=0;g<mf;++g){
  130. size_t r=(i>=mi-1)?i-mi+1:0;
  131. for(;r<min(i,p);++r){
  132. size_t s=(j>=mj-1)?j-mj+1:0;
  133. for(;s<min(j,q);++s){
  134. temp+=K[indice4(g,f,r,s,nf,p,q)]*e[indice3(g,i-r,j-s,mi,mj)];
  135. }//s
  136. }//r
  137. }//g
  138. d[indice3(f,i,j,ni,nj)]=temp;
  139. }//j
  140. }//i
  141. }//f
  142. //display(delta,nf*ni*nj);
  143. //char a;cin>>a;
  144. //cout<<" - Update nabla_b"<<endl;
  145. //Update nabla_b<<
  146. for(size_t g=0;g<mf;++g){
  147. for(size_t f=0;f<nf;++f){
  148. Real temp=0;
  149. for(size_t k=0;k<mi;++k){
  150. for(size_t l=0;l<mj;++l){
  151. temp+=e[indice3(g,k,l,mi,mj)];
  152. }//l
  153. }//k
  154. nabla_b[indice2(g,f,nf)]+=temp;
  155. }
  156. }
  157. //Update nabla_w
  158. for(size_t g=0;g<mf;++g){
  159. for(size_t f=0;f<nf;++f){
  160. for(size_t r=0;r<p;++r){
  161. for(size_t s=0;s<q;++s){
  162. Real temp=0;
  163. for(size_t k=0;k<mi;++k){
  164. for(size_t l=0;l<mj;++l){
  165. temp+=e[indice3(g,k,l,mi,mj)]*x[indice3(f,k+r,l+s,ni,nj)];
  166. }//l
  167. }//k
  168. nabla_K[indice4(g,f,r,s,nf,p,q)]+=temp;
  169. }
  170. }
  171. }
  172. }
  173. return d;
  174. }
  175. void
  176. ConvolutionLayer::update(Real eta){
  177. //Update b
  178. for(size_t i=0;i<mf*nf;++i){
  179. b[i]-=eta*nabla_b[i];
  180. }
  181. //Update K
  182. for(size_t i=0;i<mf*nf*p*q;++i){
  183. K[i]-=eta*nabla_K[i];
  184. }
  185. }
  186. }