123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196 |
- #include "convolution.hpp"
- #include "avx.hpp"
- namespace Layer{
- ConvolutionLayer::ConvolutionLayer(size_t nf_,size_t ni_,size_t nj_,size_t p_,size_t q_,size_t mf_):Layer(nf_*ni_*nj_,mf_*(ni_-p_+1)*(nj_-q_+1)){
- nf=nf_;
- ni=ni_;
- nj=nj_;
- p=p_;
- q=q_;
- mf=mf_;
- mi=ni-p+1;
- mj=nj-q+1;
- K=init_vector(mf*nf*p*q);
- b=init_vector(mf*nf);
- nabla_K=init_vector(mf*nf*p*q);
- nabla_b=init_vector(mf*nf);
- size_t n_vindex1=8*AVX_SIZE(p*q);
- vindex1=new v8i[AVX_SIZE(p*q)];
- for(size_t i=0;i<AVX_SIZE(p*q);++i){
- for(size_t j=0;j<8;++j){
- vindex1[i].i[j]=0;
- }
- }
- for(size_t j=0;j<q;++j){
- for(size_t i=0;i<p;++i){
- size_t ind=j*p+i;
- vindex1[ind/8].i[ind%8]=j*nj+i;
- }
- }
- }
- ConvolutionLayer::~ConvolutionLayer(){
- delete_vector(K);
- delete_vector(b);
- delete_vector(nabla_K);
- delete_vector(nabla_b);
- }
- void
- ConvolutionLayer::init(Real m,Real d){
- default_random_engine generator;
- normal_distribution<Real> distribution(m,d);
- for(size_t i=0;i<mf*nf*i*q;++i){
- K[i]=distribution(generator);
- }
- for(size_t i=0;i<mf*nf;++i){
- b[i]=distribution(generator);
- }
- }
- Vector
- ConvolutionLayer::feed_forward(Vector x_){
- return avx_feed_forward(x_);
- x=x_;
- for(size_t g=0;g<mf;++g){
- for(size_t k=0;k<mi;++k){
- for(size_t l=0;l<mj;++l){
- Real temp=0;
- size_t offset_x=k*nj+l;
- for(size_t f=0;f<nf;++f){
- for(size_t r=0;r<p;++r){
- for(size_t s=0;s<q;++s){
- temp+=x[indice3(f,k+r,l+s,ni,nj)]*K[indice4(g,f,r,s,nf,p,q)];
- }
- }
- temp+=b[indice2(g,f,nf)];
- }
- y[indice3(g,k,l,mi,mj)]=temp;
- }
- }
- }
- return y;
- }
- Vector
- ConvolutionLayer::avx_feed_forward(Vector x_){
- x=x_;
- for(size_t g=0;g<mf;++g){
- for(size_t k=0;k<mi;++k){
- for(size_t l=0;l<mj;++l){
- Real temp=0;
- Real temp_avx=0;
- for(size_t f=0;f<nf;++f){
- size_t n=AVX_SIZE(p*q);
- Real* ptrK=&K[(g*nf+f)*p*q];
- Real* ptrx=&x[indice3(f,k,l,ni,nj)];
- for(size_t t=0;t<n;++t){
- cout<<t<<" > "<<endl;
- //__m256 x_data=_mm256_i32gather_ps(&x[indice3(f,k,l,ni,nj)],*(__m256i*)(&vindex1[t]),4);
- v8f u={_mm256_loadu_ps(ptrK)}; //Read 8 values of K
- __m256i vi=*(__m256i*)(&vindex1[t]);
- v8f v={_mm256_i32gather_ps(ptrx,vi,4)};
- u.avx=_mm256_mul_ps(u.avx,v.avx);
- temp_avx+=hadd(u);
- ptrK+=8; //There is 8 floats in m256
- }
- cout<<p<<" "<<q<<endl;
- // exit(0);
- for(size_t r=0;r<p;++r){
- for(size_t s=0;s<q;++s){
- temp+=x[indice3(f,k+r,l+s,ni,nj)]*K[indice4(g,f,r,s,nf,p,q)];
- }
- }
- cout<<temp<<" vs "<<temp_avx<<endl;
- temp+=b[indice2(g,f,nf)];
- temp_avx+=b[indice2(g,f,nf)];
- }
- cout<<endl;
- char c;
- //cin>>c;
- //exit(0);
- y[indice3(g,k,l,mi,mj)]=temp;
- }
- }
- }
- return y;
- }
- void
- ConvolutionLayer::init_nabla(){
- for(size_t i=0;i<mf*nf;++i){
- nabla_b[i]=0;
- }
- for(size_t i=0;i<mf*nf*p*q;++i){
- nabla_K[i]=0;
- }
- }
- Vector
- ConvolutionLayer::back_propagation(Vector e){
- for(size_t f=0;f<nf;++f){
- for(size_t i=0;i<ni;++i){
- for(size_t j=0;j<nj;++j){
- Real temp=0;
- for(size_t g=0;g<mf;++g){
- size_t r=(i>=mi-1)?i-mi+1:0;
- for(;r<min(i,p);++r){
- size_t s=(j>=mj-1)?j-mj+1:0;
- for(;s<min(j,q);++s){
- temp+=K[indice4(g,f,r,s,nf,p,q)]*e[indice3(g,i-r,j-s,mi,mj)];
- }//s
- }//r
- }//g
- d[indice3(f,i,j,ni,nj)]=temp;
- }//j
- }//i
- }//f
- //display(delta,nf*ni*nj);
- //char a;cin>>a;
- //cout<<" - Update nabla_b"<<endl;
- //Update nabla_b<<
- for(size_t g=0;g<mf;++g){
- for(size_t f=0;f<nf;++f){
- Real temp=0;
- for(size_t k=0;k<mi;++k){
- for(size_t l=0;l<mj;++l){
- temp+=e[indice3(g,k,l,mi,mj)];
- }//l
- }//k
- nabla_b[indice2(g,f,nf)]+=temp;
- }
- }
- //Update nabla_w
- for(size_t g=0;g<mf;++g){
- for(size_t f=0;f<nf;++f){
- for(size_t r=0;r<p;++r){
- for(size_t s=0;s<q;++s){
- Real temp=0;
- for(size_t k=0;k<mi;++k){
- for(size_t l=0;l<mj;++l){
- temp+=e[indice3(g,k,l,mi,mj)]*x[indice3(f,k+r,l+s,ni,nj)];
- }//l
- }//k
- nabla_K[indice4(g,f,r,s,nf,p,q)]+=temp;
- }
- }
- }
- }
- return d;
- }
- void
- ConvolutionLayer::update(Real eta){
- //Update b
- for(size_t i=0;i<mf*nf;++i){
- b[i]-=eta*nabla_b[i];
- }
- //Update K
- for(size_t i=0;i<mf*nf*p*q;++i){
- K[i]-=eta*nabla_K[i];
- }
- }
- }
|