|
@@ -0,0 +1,206 @@
|
|
|
+#include "network.hpp"
|
|
|
+
|
|
|
+void Network::init_normal_distribution(double m,double d){
|
|
|
+ default_random_engine generator;
|
|
|
+ normal_distribution<double> distribution(m,d);
|
|
|
+ for(size_t l=1;l<depth;++l){
|
|
|
+ Vector& b=biais[l];
|
|
|
+ for(size_t i=0;i<sizes[l];++i){
|
|
|
+ b.data[i]=distribution(generator);
|
|
|
+ }
|
|
|
+ Matrix& w=weights[l];
|
|
|
+ for(size_t i=0;i<sizes[l]*sizes[l-1];++i){
|
|
|
+ w.get(i)=distribution(generator);
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+void Network::init_standard(){
|
|
|
+ default_random_engine generator;
|
|
|
+ normal_distribution<double> distribution(0,1);
|
|
|
+ for(size_t l=1;l<depth;++l){
|
|
|
+ Vector& b=biais[l];
|
|
|
+ for(size_t i=0;i<sizes[l];++i){
|
|
|
+ b.data[i]=distribution(generator);
|
|
|
+ }
|
|
|
+ Matrix& w=weights[l];
|
|
|
+ for(size_t i=0;i<sizes[l]*sizes[l-1];++i){
|
|
|
+ normal_distribution<double> distribution2(0,1/sqrt(sizes[l-1]));
|
|
|
+ w.get(i)=distribution2(generator);
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+const Vector&
|
|
|
+Network::feed_forward(const Vector& x){
|
|
|
+ a[0]=x;
|
|
|
+ for(size_t l=1;l<depth;++l){
|
|
|
+ compute_z(l);
|
|
|
+ compute_a(l);
|
|
|
+ }
|
|
|
+ return a[depth-1];
|
|
|
+}
|
|
|
+
|
|
|
+double
|
|
|
+Network::eval(Dataset* dataset){
|
|
|
+ size_t n=dataset->get_test_size();
|
|
|
+ size_t nb=0;
|
|
|
+ for(size_t i=0;i<n;++i){
|
|
|
+ pair<const Vector&,const Vector&> t=dataset->get_test(i);
|
|
|
+ const Vector& a=feed_forward(t.first);
|
|
|
+ if(a.argmax()==t.second.argmax()) ++nb;
|
|
|
+ }
|
|
|
+ double res=double(nb)/double(n)*100;
|
|
|
+ cout<<"> Res = "<<res<<"%"<<endl;
|
|
|
+ return res;
|
|
|
+}
|
|
|
+
|
|
|
+void
|
|
|
+Network::train(Dataset* dataset,size_t nb_epochs,size_t batch_size,double eta){
|
|
|
+ size_t train_size=dataset->get_train_size();
|
|
|
+ size_t nb_batchs=(train_size-1)/batch_size+1;
|
|
|
+ size_t* indices=new size_t[train_size];
|
|
|
+ for(size_t i=0;i<train_size;++i){
|
|
|
+ indices[i]=i;
|
|
|
+ }
|
|
|
+ for(size_t epoch=0;epoch<nb_epochs;++epoch){
|
|
|
+ cout<<"Epoch "<<epoch<<endl;
|
|
|
+ shuffle(indices,train_size);
|
|
|
+ for(size_t batch=0;batch<nb_batchs;++batch){
|
|
|
+ size_t begin=batch*batch_size;
|
|
|
+ size_t end=min(train_size,begin+batch_size);
|
|
|
+ update_batch(dataset,indices,begin,end,eta);
|
|
|
+ }
|
|
|
+ eval(dataset);
|
|
|
+ }
|
|
|
+ delete[] indices;
|
|
|
+}
|
|
|
+
|
|
|
+void
|
|
|
+Network::shuffle(size_t* tab,size_t size){
|
|
|
+ default_random_engine generator;
|
|
|
+ uniform_int_distribution<int> distribution(0,size-1);
|
|
|
+ for(size_t k=0;k<size;++k){
|
|
|
+ size_t i=distribution(generator);
|
|
|
+ size_t j=distribution(generator);
|
|
|
+ swap(tab[i],tab[j]);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+void
|
|
|
+Network::update_batch(Dataset* dataset,size_t* indices,size_t begin,size_t end,double eta){
|
|
|
+ double batch_size=end-begin;
|
|
|
+ for(size_t l=1;l<depth;++l){
|
|
|
+ init_nabla_b(l);
|
|
|
+ init_nabla_w(l);
|
|
|
+ }
|
|
|
+ for(size_t i=begin;i<end;++i){
|
|
|
+ pair<const Vector&,const Vector&> data=dataset->get_train(indices[i]);
|
|
|
+ back_propagation(data.first,data.second,eta);
|
|
|
+ }
|
|
|
+ double eta_batch=eta/batch_size;
|
|
|
+ for(size_t l=1;l<depth;++l){
|
|
|
+ update_b(l,eta_batch);
|
|
|
+ update_w(l,eta_batch);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+void
|
|
|
+Network::back_propagation(const Vector&x, const Vector& y,double eta){
|
|
|
+ a[0]=x;
|
|
|
+ for(size_t l=1;l<depth;++l){
|
|
|
+ compute_z(l);
|
|
|
+ compute_a(l);
|
|
|
+ }
|
|
|
+ compute_last_delta(y);
|
|
|
+ for(size_t l=depth-2;l>=1;--l){
|
|
|
+ compute_delta(l);
|
|
|
+ }
|
|
|
+ for(size_t l=1;l<depth;++l){
|
|
|
+ update_nabla_b(l);
|
|
|
+ update_nabla_w(l);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+void Network::init_nabla_b(size_t l){
|
|
|
+ Vector& V=nabla_b[l];
|
|
|
+ for(size_t i=0;i<sizes[l];++i){
|
|
|
+ V.data[i]=0;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+void Network::init_nabla_w(size_t l){
|
|
|
+ Matrix& M=nabla_w[l];
|
|
|
+ for(size_t i=0;i<sizes[l-1]*sizes[l];++i){
|
|
|
+ M.get(i)=0;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+void Network::compute_a(size_t l){
|
|
|
+ for(size_t i=0;i<sizes[l];++i){
|
|
|
+ a[l].data[i]=sigmoid(z[l].data[i]);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+void Network::compute_z(size_t l){
|
|
|
+ for(size_t i=0;i<sizes[l];++i){
|
|
|
+ double temp=biais[l].data[i];
|
|
|
+ for(size_t j=0;j<sizes[l-1];++j){
|
|
|
+ temp+=weights[l].get(i,j)*a[l-1].data[j];
|
|
|
+ }
|
|
|
+ z[l].data[i]=temp;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+void
|
|
|
+Network::compute_last_delta(const Vector& y){
|
|
|
+ size_t L=depth-1;
|
|
|
+ for(size_t i=0;i<sizes[L];++i){
|
|
|
+ delta[L].data[i]=cost_derivative(a[L].data[i],y.data[i])*sigmoid_prime(z[L].data[i]);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+void
|
|
|
+Network::compute_delta(size_t l){
|
|
|
+ for(size_t i=0;i<sizes[l];++i){
|
|
|
+ double temp=0;
|
|
|
+ for(size_t j=0;j<sizes[l+1];++j){
|
|
|
+ temp+=(weights[l+1].get(j,i)*delta[l+1].data[j]);
|
|
|
+ }
|
|
|
+ delta[l].data[i]=temp*sigmoid_prime(z[l].data[i]);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+void
|
|
|
+Network::update_nabla_b(size_t l){
|
|
|
+ for(size_t i=0;i<sizes[l];++i){
|
|
|
+ nabla_b[l].data[i]+=delta[l].data[i];
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+void
|
|
|
+Network::update_nabla_w(size_t l){
|
|
|
+ for(size_t i=0;i<sizes[l];++i){
|
|
|
+ for(size_t j=0;j<sizes[l-1];++j){
|
|
|
+ nabla_w[l].get(i,j)+=a[l-1].data[j]*delta[l].data[i];
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+void
|
|
|
+Network::update_b(size_t l,double eta_batch){
|
|
|
+ Vector& U=biais[l];
|
|
|
+ Vector& V=nabla_b[l];
|
|
|
+ for(size_t i=0;i<sizes[l];++i){
|
|
|
+ U.data[i]-=V.data[i]*eta_batch;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+void
|
|
|
+Network::update_w(size_t l,double eta_batch){
|
|
|
+ Matrix& M=weights[l];
|
|
|
+ Matrix& P=nabla_w[l];
|
|
|
+ for(size_t i=0;i<sizes[l-1]*sizes[l];++i){
|
|
|
+ M.get(i)-=P.get(i)*eta_batch;
|
|
|
+ }
|
|
|
+}
|