瀏覽代碼

First commit

Jean Fromentin 2 年之前
當前提交
e9d4d7f044
共有 23 個文件被更改,包括 1053 次插入0 次删除
  1. 2 0
      Makefile
  2. 43 0
      dataset.hpp
  3. 14 0
      debug.hpp
  4. 64 0
      layers/activation.hpp
  5. 113 0
      layers/convolution.cpp
  6. 54 0
      layers/convolution.hpp
  7. 85 0
      layers/full_connected.cpp
  8. 44 0
      layers/full_connected.hpp
  9. 69 0
      layers/layer.hpp
  10. 9 0
      layers/layers.hpp
  11. 46 0
      layers/pooling.cpp
  12. 40 0
      layers/pooling.hpp
  13. 77 0
      main.cpp
  14. 13 0
      math.hpp
  15. 73 0
      mnist/mnist.cpp
  16. 42 0
      mnist/mnist.hpp
  17. 二進制
      mnist/t10k-images.idx3-ubyte
  18. 二進制
      mnist/t10k-labels.idx1-ubyte
  19. 二進制
      mnist/train-images.idx3-ubyte
  20. 二進制
      mnist/train-labels.idx1-ubyte
  21. 126 0
      network.cpp
  22. 39 0
      network.hpp
  23. 100 0
      vector.hpp

+ 2 - 0
Makefile

@@ -0,0 +1,2 @@
+all:
+	g++ -I. *.cpp layers/*.cpp mnist/*.cpp -O3 -g -o ia 

+ 43 - 0
dataset.hpp

@@ -0,0 +1,43 @@
+#ifndef DATASET_HPP
+#define DATASET_HPP
+
+#include <cstddef>
+#include "vector.hpp"
+
+using namespace std;
+
+class Dataset{
+protected:
+  size_t train_size;
+  size_t test_size;
+  size_t x_size;
+  size_t y_size;
+public:
+  Dataset();
+  size_t get_train_size() const;
+  size_t get_test_size() const;
+  size_t get_y_size() const;
+  virtual pair<Vector,Vector> get_train(const size_t i) const=0;
+  virtual pair<Vector,Vector> get_test(const size_t i) const=0;
+};
+
+inline
+Dataset::Dataset(){
+}
+
+inline size_t
+Dataset::get_train_size() const{
+  return train_size;
+}
+
+inline size_t
+Dataset::get_test_size() const{
+  return test_size;
+}
+
+inline size_t
+Dataset::get_y_size() const{
+  return y_size;
+}
+
+#endif

+ 14 - 0
debug.hpp

@@ -0,0 +1,14 @@
+#ifndef DEBUG_HPP
+#define DEBUG_HPP
+
+#include <iostream>
+using namespace std;
+
+#ifdef DEBUG
+#define assert(cond) if(!(cond)){cout<<"Assertion failed @ "<<__FILE__<<":"<<__LINE__<<endl;exit(0);}
+#else
+#define NDEBUG
+#define assert(cond)
+#endif
+
+#endif

+ 64 - 0
layers/activation.hpp

@@ -0,0 +1,64 @@
+#ifndef LAYER_ACTIVATION_HPP
+#define LAYER_ACTIVATION_HPP
+
+#include "layer.hpp"
+#include "math.hpp"
+
+namespace Layer{
+  enum ActivationMap{Sigmoid};
+
+  template<ActivationMap A> Real activation_map(Real);
+  template<ActivationMap A> Real activation_diff_map(Real);
+
+  template<> Real activation_map<Sigmoid>(Real);
+  template<> Real activation_diff_map<Sigmoid>(Real);
+
+  template<ActivationMap A> class Activation:public Layer{
+  private:
+    using Layer::x_out;
+  public:
+    Activation(size_t n);
+    Vector feed_forward(Vector x);
+    void init_nabla(){};
+    Vector back_propagation(Vector d);
+    void update(Real){};
+  };
+
+  template<ActivationMap A>
+  inline
+  Activation<A>::Activation(size_t n):Layer(n,n){
+  }
+
+  template<ActivationMap A>
+  inline Vector
+  Activation<A>::feed_forward(Vector x){
+    x_in=x;
+    for(size_t i=0;i<n_in;++i){
+      x_out[i]=activation_map<A>(x[i]);
+    }
+    return x_out;
+  }
+
+  template<ActivationMap A>
+  inline Vector
+  Activation<A>::back_propagation(Vector d){
+    for(size_t i=0;i<n_in;++i){
+      delta[i]=activation_diff_map<A>(x_in[i])*d[i];
+    }
+    return delta;
+  }
+
+  template<>
+  Real
+  activation_map<Sigmoid>(Real x){
+    return 1.0/(1.0+exp(-x));
+  }
+
+  template<>
+  Real
+  activation_diff_map<Sigmoid>(Real x){
+    Real t=activation_map<Sigmoid>(x);
+    return t*(1.0-t);
+  }
+}
+#endif

+ 113 - 0
layers/convolution.cpp

@@ -0,0 +1,113 @@
+#include "convolution.hpp"
+
+namespace Layer{
+
+  void
+  Convolution::init(Real m,Real d){
+    default_random_engine generator;
+    normal_distribution<Real> distribution(m,d);
+    for(size_t i=0;i<mf*nf*i*q;++i){
+      K[i]=distribution(generator);
+    }
+    for(size_t i=0;i<mf*nf;++i){
+      b[i]=distribution(generator);
+    }
+  }
+
+  Vector
+  Convolution::feed_forward(Vector x){
+    x_in=x;
+    for(size_t g=0;g<mf;++g){
+      for(size_t k=0;k<mi;++k){
+        for(size_t l=0;l<mj;++l){
+          Real temp=0;
+          for(size_t f=0;f<nf;++f){
+            for(size_t r=0;r<p;++r){
+              for(size_t s=0;s<q;++s){
+                temp+=x[indice3(f,k+r,l+s,ni,nj)]*K[indice4(g,f,r,s,nf,p,q)];
+              }
+            }
+            temp+=b[indice2(g,f,nf)];
+          }
+          x_out[indice3(g,k,l,mi,mj)]=temp;
+        }
+      }
+    }
+    return x_out;
+  }
+
+  void
+  Convolution::init_nabla(){
+    for(size_t i=0;i<mf*nf;++i){
+      nabla_b[i]=0;
+    }
+    for(size_t i=0;i<mf*nf*p*q;++i){
+      nabla_K[i]=0;
+    }
+  }
+
+  Vector
+  Convolution::back_propagation(Vector d){
+    for(size_t f=0;f<nf;++f){
+      for(size_t i=0;i<ni;++i){
+        for(size_t j=0;j<nj;++j){
+          Real temp=0;
+          for(size_t g=0;g<mf;++g){
+            size_t r=(i>=mi-1)?i-mi+1:0;
+            for(;r<min(i,p);++r){
+              size_t s=(j>=mj-1)?j-mj+1:0;
+              for(;s<min(j,q);++s){
+                temp+=K[indice4(g,f,r,s,nf,p,q)]*d[indice3(g,i-r,j-s,mi,mj)];
+              }//s
+            }//r
+          }//g
+          delta[indice3(f,i,j,ni,nj)]=temp;
+        }//j
+      }//i
+    }//f
+    //display(delta,nf*ni*nj);
+    //char a;cin>>a;
+    //cout<<"  - Update nabla_b"<<endl;
+    //Update nabla_b<<
+    for(size_t g=0;g<mf;++g){
+      for(size_t f=0;f<nf;++f){
+        Real temp=0;
+        for(size_t k=0;k<mi;++k){
+          for(size_t l=0;l<mj;++l){
+            temp+=d[indice3(g,k,l,mi,mj)];
+          }//l
+        }//k
+        nabla_b[indice2(g,f,nf)]+=temp;
+      }
+    }
+    //Update nabla_w
+    for(size_t g=0;g<mf;++g){
+      for(size_t f=0;f<nf;++f){
+        for(size_t r=0;r<p;++r){
+          for(size_t s=0;s<q;++s){
+            Real temp=0;
+            for(size_t k=0;k<mi;++k){
+              for(size_t l=0;l<mj;++l){
+                temp+=d[indice3(g,k,l,mi,mj)]*x_in[indice3(f,k+r,l+s,ni,nj)];
+              }//l
+            }//k
+            nabla_K[indice4(g,f,r,s,nf,p,q)]+=temp;
+          }
+        }
+      }
+    }
+    return delta;
+  }
+
+  void
+  Convolution::update(Real eta){
+    //Update b
+    for(size_t i=0;i<mf*nf;++i){
+      b[i]-=eta*nabla_b[i];
+    }
+    //Update K
+    for(size_t i=0;i<mf*nf*p*q;++i){
+      K[i]-=eta*nabla_K[i];
+    }
+  }
+}

+ 54 - 0
layers/convolution.hpp

@@ -0,0 +1,54 @@
+#ifndef LAYER_CONVOLUTION_HPP
+#define LAYER_CONVOLUTION_HPP
+
+#include <random>
+#include "layer.hpp"
+
+namespace Layer{
+  /*****************************************
+   * Implementation of a convolutionnal Layer
+
+   */
+  class Convolution:public Layer{
+  public:
+    size_t nf,ni,nj;
+    size_t mf,mi,mj;
+    size_t p,q;
+    Vector K;
+    Vector b;
+    Vector nabla_K;
+    Vector nabla_b;
+  public:
+    Convolution(size_t nf,size_t ni,size_t nj,size_t p,size_t q,size_t mf);
+    ~Convolution();
+    void init(Real m,Real d);
+    Vector feed_forward(Vector x);
+    void init_nabla();
+    Vector back_propagation(Vector d);
+    void update(Real eta);
+  };
+
+  inline Convolution::Convolution(size_t nf_,size_t ni_,size_t nj_,size_t p_,size_t q_,size_t mf_):Layer(nf_*ni_*nj_,mf_*(ni_-p_+1)*(nj_-q_+1)){
+    nf=nf_;
+    ni=ni_;
+    nj=nj_;
+    p=p_;
+    q=q_;
+    mf=mf_;
+    mi=ni-p+1;
+    mj=nj-q+1;
+    K=init_vector(mf*nf*p*q);
+    b=init_vector(mf*nf);
+    nabla_K=init_vector(mf*nf*p*q);
+    nabla_b=init_vector(mf*nf);
+  }
+
+  inline
+  Convolution::~Convolution(){
+    delete_vector(K);
+    delete_vector(b);
+    delete_vector(nabla_K);
+    delete_vector(nabla_b);
+  }
+}
+#endif

+ 85 - 0
layers/full_connected.cpp

@@ -0,0 +1,85 @@
+#include "full_connected.hpp"
+
+using namespace Layer;
+
+void
+FullConnected::init(Real m,Real d){
+  default_random_engine generator;
+  normal_distribution<Real> distribution(m,d);
+  for(size_t i=0;i<n_out;++i){
+    b[i]=distribution(generator);
+  }
+  for(size_t i=0;i<n_out*n_in;++i){
+    w[i]=distribution(generator);
+  }
+}
+
+void
+FullConnected::init_standard(){
+  default_random_engine generator;
+  normal_distribution<Real> distribution(0,1);
+  for(size_t i=0;i<n_out;++i){
+    b[i]=distribution(generator);
+  }
+  for(size_t i=0;i<n_out*n_in;++i){
+    normal_distribution<Real> distribution2(0,1/sqrt(n_in));
+    w[i]=distribution2(generator);
+  }
+}
+
+Vector
+FullConnected::feed_forward(Vector x){
+  x_in=x;
+  for(size_t i=0;i<n_out;++i){
+    Real temp=b[i];
+    for(size_t j=0;j<n_in;++j){
+      temp+=w[indice2(i,j,n_in)]*x[j];
+    }
+    x_out[i]=temp;
+  }
+  return x_out;
+}
+
+void
+FullConnected::init_nabla(){
+  for(size_t i=0;i<n_out;++i){
+    nabla_b[i]=0;
+  }
+  for(size_t i=0;i<n_out*n_in;++i){
+    nabla_w[i]=0;
+  }
+}
+
+Vector
+FullConnected::back_propagation(Vector d){
+  for(size_t i=0;i<n_in;++i){
+    Real temp=0;
+    for(size_t j=0;j<n_out;++j){
+      temp+=w[indice2(j,i,n_in)]*d[j];
+    }
+    delta[i]=temp;
+  }
+  //Update nabla_b
+  for(size_t i=0;i<n_out;++i){
+    nabla_b[i]+=d[i];
+  }
+  //Update nabla_w
+  for(size_t i=0;i<n_out;++i){
+    for(size_t j=0;j<n_in;++j){
+      nabla_w[indice2(i,j,n_in)]+=d[i]*x_in[j];
+    }
+  }
+  return delta;
+}
+
+void
+FullConnected::update(Real eta){
+  //Update b
+  for(size_t i=0;i<n_out;++i){
+    b[i]-=eta*nabla_b[i];
+  }
+  //Update w
+  for(size_t i=0;i<n_out*n_in;++i){
+    w[i]-=eta*nabla_w[i];
+  }
+}

+ 44 - 0
layers/full_connected.hpp

@@ -0,0 +1,44 @@
+#ifndef LAYER_FULLCONNECTED_HPP
+#define LAYER_FULLCONNECTED_HPP
+
+#include <random>
+#include "layer.hpp"
+
+namespace Layer{
+  class FullConnected:public Layer{
+  public:
+    //9using Layer::x_out;
+    Vector b;
+    Vector w;
+    Vector nabla_b;
+    Vector nabla_w;
+  public:
+    FullConnected(size_t n_in,size_t n_out);
+    ~FullConnected();
+    void init(Real m,Real d);
+    void init_standard();
+    Vector feed_forward(Vector x);
+    void init_nabla();
+    Vector back_propagation(Vector d);
+    void update(Real eta);
+  };
+
+  inline
+  FullConnected::FullConnected(size_t n_in,size_t n_out):Layer(n_in,n_out){
+    b=init_vector(n_out);
+    w=init_vector(n_out*n_in);
+    nabla_b=init_vector(n_out);
+    nabla_w=init_vector(n_out*n_in);
+  }
+
+  inline
+  FullConnected::~FullConnected(){
+    delete_vector(b);
+    delete_vector(w);
+    delete_vector(nabla_b);
+    delete_vector(nabla_w);
+  }
+
+}
+
+#endif

+ 69 - 0
layers/layer.hpp

@@ -0,0 +1,69 @@
+#ifndef LAYER_HPP
+#define LAYER_HPP
+
+#include "debug.hpp"
+#include "vector.hpp"
+#include <cmath>
+
+namespace Layer{
+  class Layer{
+  public:
+    size_t n_in,n_out;
+    Vector x_in;
+    Vector x_out;
+    Vector delta;
+    string name;
+  public:
+    Layer(size_t n);
+    Layer(size_t n_in,size_t n_out);
+    ~Layer();
+    size_t get_input_size() const;
+    size_t get_output_size() const;
+    Vector get_output() const;
+    virtual Vector feed_forward(Vector x)=0;
+    virtual void init_nabla()=0;
+    virtual Vector back_propagation(Vector d)=0;
+    virtual void update(Real eta)=0;
+  };
+
+
+  inline
+  Layer::Layer(size_t n){
+    n_in=n;
+    n_out=n;
+    x_out=init_vector(n);
+    delta=init_vector(n);
+  }
+
+  inline
+  Layer::Layer(size_t n_in_,size_t n_out_){
+    n_in=n_in_;
+    n_out=n_out_;
+    x_out=init_vector(n_out);
+    delta=init_vector(n_in);
+  }
+
+  inline
+  Layer::~Layer(){
+    delete_vector(x_out);
+    delete_vector(delta);
+  }
+
+  inline size_t
+  Layer::get_input_size() const{
+    return n_in;
+  }
+
+  inline size_t
+  Layer::get_output_size() const{
+    return n_out;
+  }
+
+  inline Vector
+  Layer::get_output() const{
+    return x_out;
+  }
+}
+
+
+#endif

+ 9 - 0
layers/layers.hpp

@@ -0,0 +1,9 @@
+#ifndef LAYERS_HPP
+#define LAYERS_HPP
+
+#include "activation.hpp"
+#include "full_connected.hpp"
+#include "convolution.hpp"
+#include "pooling.hpp"
+
+#endif

+ 46 - 0
layers/pooling.cpp

@@ -0,0 +1,46 @@
+#include "pooling.hpp"
+
+namespace Layer{
+
+  Vector
+  Pooling::feed_forward(Vector x){
+    x_in=x;
+    for(size_t f=0;f<nf;++f){
+      for(size_t k=0;k<mi;++k){
+        for(size_t l=0;l<mi;++l){
+          size_t i=p*k;
+          size_t j=q*l;
+          Real temp=x[indice3(f,i,j,ni,nj)];
+          for(;i<min(p*k+p,ni);++i){
+            for(;j<min(q*l+q,nj);++j){
+              temp=max(temp,x[indice3(f,i,j,ni,nj)]);
+            }
+          }
+          x_out[indice3(f,k,l,mi,mj)]=temp;
+        }
+      }
+    }
+    return x_out;
+  }
+
+  Vector
+  Pooling::back_propagation(Vector d){
+    for(size_t f=0;f<nf;++f){
+      for(size_t i=0;i<ni;++i){
+        size_t k=i/p;
+        for(size_t j=0;j<nj;++j){
+          size_t l=j/q;
+          bool is_max=true;
+          Real val=x_in[indice3(f,i,j,ni,nj)];
+          for(size_t i2=k*p;i2<min(k*p+p,ni) and is_max;++i2){
+            for(size_t j2=l*q;j2<min(l*q+q,nj) and is_max;++j2){
+              if(x_in[indice3(f,i2,j2,ni,nj)]>val) is_max=false;
+            }
+          }
+          delta[indice3(f,i,j,ni,nj)]=(is_max)?d[indice3(f,k,l,mi,mj)]:0;
+        }
+      }
+    }
+    return delta;
+  }
+}

+ 40 - 0
layers/pooling.hpp

@@ -0,0 +1,40 @@
+#ifndef LAYER_POOLING_HPP
+#define LAYER_POOLING_HPP
+
+#include "layer.hpp"
+
+namespace Layer{
+
+  class Pooling:public Layer{
+  public:
+    size_t nf;
+    size_t ni;
+    size_t nj;
+    size_t p;
+    size_t q;
+    size_t mi;
+    size_t mj;
+  public:
+    Pooling(size_t nf,size_t ni,size_t nj,size_t p,size_t q);
+    ~Pooling(){};
+    Vector feed_forward(Vector x);
+    void init_nabla(){};
+    Vector back_propagation(Vector d);
+    void update(Real){};
+  };
+
+  inline
+  Pooling::Pooling(size_t nf_,size_t ni_,size_t nj_,size_t p_,size_t q_):
+  Layer(nf_*ni_*nj_,nf_*((ni_+p_-1)/p_)*((nj_+q_-1)/q_)){
+    nf=nf_;
+    ni=ni_;
+    nj=nj_;
+    p=p_;
+    q=q_;
+    mi=(ni+p-1)/p;
+    mj=(nj+q-1)/p;
+  }
+
+}
+
+#endif

+ 77 - 0
main.cpp

@@ -0,0 +1,77 @@
+#include <iomanip>
+#include "layers/layers.hpp"
+#include "network.hpp"
+#include "mnist/mnist.hpp"
+
+
+int main(int argc,char** argv){
+  Network N;
+    size_t nf=4;
+    Layer::Convolution L1(1,28,28,5,5,nf);
+    L1.init(0,1);
+    Layer::Activation<Layer::Sigmoid> L2(nf*24*24);
+    //Layer::Pooling L3(nf,24,24,2,2);
+    Layer::FullConnected L4(nf*24*24,10);
+    L4.init_standard();
+    Layer::Activation<Layer::Sigmoid> L5(10);
+    L1.name="[Convolutionnal]";
+    L2.name="[Sigmoid of convolutionnal]";
+    //L3.name="[Pooling]";
+    L4.name="[Full connected]";
+    L5.name="[Sigmoid of full]";
+
+    N.push_layer(&L1);
+    N.push_layer(&L2);
+//    N.push_layer(&L3);
+    N.push_layer(&L4);
+    N.push_layer(&L5);
+    N.is_done();
+    Mnist dataset;
+    N.train(&dataset,10,10,0.1);
+
+
+  //exit(0);
+/*  Network N;
+  size_t nf=4;
+  Layer::Convolution L1(1,28,28,5,5,nf);
+  L1.init(0,1);
+  Layer::Activation<Layer::Sigmoid> L2(nf*24*24);
+  Layer::FullConnected L3(nf*24*24,10);
+  L3.init_standard();
+  Layer::Activation<Layer::Sigmoid> L4(10);
+  L1.name="[Convolutionnal]";
+  L2.name="[Sigmoid of convolutionnal]";
+  L3.name="[Full connected]";
+  L4.name="[Sigmoid of full]";
+
+  N.push_layer(&L1);
+  N.push_layer(&L2);
+  N.push_layer(&L3);
+  N.push_layer(&L4);
+  N.is_done();
+  cout<<"Network out size = "<<N.n_out<<endl;
+  Mnist dataset;
+ N.train(&dataset,20,10,0.1);
+*/
+/*  Network N;
+  size_t n=20;
+  Layer::FullConnected L0(28*28,n);
+  L0.init_standard();
+  Layer::Activation<Layer::Sigmoid> L1(n);
+  Layer::FullConnected L2(n,10);
+  L2.init_standard();
+  Layer::Activation<Layer::Sigmoid> L3(10);
+  L0.name="[Full 0]";
+  L1.name="[Sigmoid 0]";
+  L2.name="[Full 1]";
+  L3.name="[Sigmoid 1]";
+
+  N.push_layer(&L0);
+  N.push_layer(&L1);
+  N.push_layer(&L2);
+  N.push_layer(&L3);
+  N.is_done();
+  cout<<"Network out size = "<<N.n_out<<endl;
+  Mnist dataset;
+  N.train(&dataset,20,10,3.0);*/
+}

+ 13 - 0
math.hpp

@@ -0,0 +1,13 @@
+#ifndef MATH_HPP
+#define MATH_HPP
+
+namespace Math{
+  inline Real exp(Real x){
+    x=1+x/1024;
+    x *= x; x *= x; x *= x; x *= x;
+    x *= x; x *= x; x *= x; x *= x;
+    x *= x; x *= x;
+    return x;
+  }
+}
+#endif

+ 73 - 0
mnist/mnist.cpp

@@ -0,0 +1,73 @@
+#include "mnist.hpp"
+
+int
+Mnist::reverse_int32(unsigned char* buffer){
+  return (((int)buffer[0])<<24)|(((int)buffer[1])<<16)|(((int)buffer[2])<<8)|((int)buffer[3]);
+}
+
+Mnist::Mnist():Dataset(){
+  x_size=784;
+  y_size=10;
+  x=init_vector(784);
+  y=init_vector(10);
+  train_size=load_labels("mnist/train-labels.idx1-ubyte",&train_labels);
+  size_t temp=load_images("mnist/train-images.idx3-ubyte",&train_images);
+  assert(train_size==temp);
+  test_size=load_labels("mnist/t10k-labels.idx1-ubyte",&test_labels);
+  temp=load_images("mnist/t10k-images.idx3-ubyte",&test_images);
+  assert(test_size==temp);
+}
+
+size_t
+Mnist::load_labels(string filename,unsigned char** dst){
+  ifstream file(filename,ios::in|ios::binary);
+  if(not file.is_open()){
+    cerr<<"[error] Could not open "<<filename<<endl;
+    exit(-1);
+  }
+  unsigned char buffer[4];
+  file.read((char*)buffer,4);
+  assert(reverse_int32(buffer)==2049);
+  file.read((char*)buffer,4);
+  int size;
+  size=reverse_int32(buffer);
+  *dst=new unsigned char[size];
+  file.read((char*)*dst,size);
+  file.close();
+  return size;
+}
+
+size_t
+Mnist::load_images(string filename,unsigned char** dst){
+  ifstream file(filename,ios::in|ios::binary);
+  if(not file.is_open()){
+    cerr<<"[error] Could not open "<<filename<<endl;
+    exit(-1);
+  }
+  unsigned char buffer[4];
+  int size;
+  file.read((char*)buffer,4);
+  assert(reverse_int32(buffer)==2051);
+  file.read((char*)buffer,4);
+  size=reverse_int32(buffer);
+  file.read((char*)buffer,4);
+  assert(reverse_int32(buffer)==28);
+  file.read((char*)buffer,4);
+  assert(reverse_int32(buffer)==28);
+  *dst=new unsigned char[784*size];
+  file.read((char*)*dst,784*size);
+  file.close();
+  return size;
+}
+
+pair<Vector,Vector>
+Mnist::get(const size_t i,const unsigned char* const * labels,const unsigned char* const * images) const{
+  size_t c=(size_t)(*labels)[i];
+  for(size_t i=0;i<10;++i) y[i]=0;
+  y[c]=1;
+  const unsigned char* x_src=&(*images)[784*i];
+  for(size_t i=0;i<784;++i){
+    x[i]=double(x_src[i])/256.0;
+  }
+  return pair<Vector,Vector>(x,y);
+}

+ 42 - 0
mnist/mnist.hpp

@@ -0,0 +1,42 @@
+#ifndef MNIST_HPP
+#define MNIST_HPP
+
+#include <iostream>
+#include <string>
+#include <fstream>
+#include <cstdint>
+
+#include "../dataset.hpp"
+
+using namespace std;
+
+class Mnist:public Dataset{
+private:
+  unsigned char* train_labels;
+  unsigned char* test_labels;
+  unsigned char* train_images;
+  unsigned char* test_images;
+  mutable Vector x;
+  mutable Vector y;
+  size_t load_labels(string filename,unsigned char** dst);
+  size_t load_images(string filename,unsigned char** dst);
+  int reverse_int32(unsigned char* buffer);
+  pair<Vector,Vector> get(const size_t i,const unsigned char* const* labels,const unsigned char* const * images) const;
+public:
+  Mnist();
+  pair<Vector,Vector> get_train(const size_t i) const;
+  pair<Vector,Vector> get_test(const size_t i) const;
+};
+
+inline pair<Vector,Vector>
+Mnist::get_train(const size_t i) const{
+  assert(i<train_size);
+  return get(i,&train_labels,&train_images);
+}
+
+inline pair<Vector,Vector>
+Mnist::get_test(const size_t i) const{
+  assert(i<test_size);
+  return get(i,&test_labels,&test_images);
+}
+#endif

二進制
mnist/t10k-images.idx3-ubyte


二進制
mnist/t10k-labels.idx1-ubyte


二進制
mnist/train-images.idx3-ubyte


二進制
mnist/train-labels.idx1-ubyte


+ 126 - 0
network.cpp

@@ -0,0 +1,126 @@
+#include "network.hpp"
+
+Network::Network(){
+  C=Quadratic;
+}
+
+void
+Network::push_layer(Layer::Layer* l){
+  if(layers.empty()){
+    n_in=l->get_input_size();
+    layers.push_back(l);
+    cout<<"In size = "<<n_in<<endl;
+  }
+  else{
+    assert(l->get_input_size()==layers.back()->get_output_size());
+    layers.push_back(l);
+  }
+  n_out=l->get_output_size();
+}
+
+void
+Network::is_done(){
+  last_delta=init_vector(n_out);
+}
+Vector
+Network::feed_forward(Vector x_in){
+  Vector x=x_in;
+  for(auto it=layers.begin();it!=layers.end();++it){
+    //cout<<" - Try feed_forward on layer "<<(*it)->name<<endl;
+    x=(*it)->feed_forward(x);
+  }
+  a=x;
+  return a;
+}
+
+Real
+Network::eval(Dataset* dataset){
+  size_t n=dataset->get_test_size();
+  size_t nb=0;
+  for(size_t i=0;i<n;++i){
+    pair<Vector,Vector> t=dataset->get_test(i);
+    Vector a=feed_forward(t.first);
+    if(argmax(a,n_out)==argmax(t.second,n_out)) ++nb;
+  }
+  Real res=Real(nb)/Real(n)*100;
+  cout<<"> Res = "<<res<<"%"<<endl;
+  return res;
+}
+
+void
+Network::shuffle(size_t* tab,size_t size){
+  default_random_engine generator;
+  uniform_int_distribution<int> distribution(0,size-1);
+  for(size_t k=0;k<size;++k){
+    size_t i=distribution(generator);
+    size_t j=distribution(generator);
+    swap(tab[i],tab[j]);
+  }
+}
+
+void
+Network::train(Dataset* dataset,size_t nb_epochs,size_t batch_size,Real eta){
+  size_t train_size=dataset->get_train_size();
+  size_t nb_batchs=(train_size-1)/batch_size+1;
+  size_t* indices=new size_t[train_size];
+  for(size_t i=0;i<train_size;++i){
+    indices[i]=i;
+  }
+  for(size_t epoch=0;epoch<nb_epochs;++epoch){
+    cout<<"Epoch "<<epoch<<endl;
+    shuffle(indices,train_size);
+    for(size_t batch=0;batch<nb_batchs;++batch){
+      size_t begin=batch*batch_size;
+      size_t end=min(train_size,begin+batch_size);
+      update_batch(dataset,indices,begin,end,eta);
+    }
+    eval(dataset);
+  }
+  delete[] indices;
+}
+
+void
+Network::update_batch(Dataset* dataset,size_t* indices,size_t begin,size_t end,Real eta){
+  Real batch_size=end-begin;
+  for(auto it=layers.begin();it!=layers.end();++it){
+    (*it)->init_nabla();
+  }
+  for(size_t i=begin;i<end;++i){
+    pair<Vector,Vector> data=dataset->get_train(indices[i]);
+    //cout<<"Call back_propagation on batch data "<<i-begin<<"/"<<batch_size<<endl;
+    back_propagation(data.first,data.second,eta);
+  }
+  Real eta_batch=eta/batch_size;
+  for(auto it=layers.begin();it!=layers.end();++it){
+    (*it)->update(eta_batch);
+  }
+}
+
+void
+Network::back_propagation(Vector x,Vector y,Real eta){
+  Vector z=feed_forward(x);
+  //cout<<" - Feed forward done"<<endl;
+  compute_last_delta(y);
+  Vector delta=last_delta;
+  //cout<<" - Last_delta computed"<<endl;
+  for(auto it=layers.rbegin();it!=layers.rend();++it){
+    //cout<<" - Try back_propagation on layer "<<(*it)->name<<endl;
+    delta=(*it)->back_propagation(delta);
+    //cout<<"    - Done"<<endl;
+  }
+}
+
+void
+Network::compute_last_delta(Vector y){
+  switch(C){
+    case Quadratic:
+    case CrossEntropy:
+    for(size_t i=0;i<n_out;++i){
+      last_delta[i]=a[i]-y[i];
+    }
+    break;
+    default:
+    assert(false);
+    break;
+  }
+}

+ 39 - 0
network.hpp

@@ -0,0 +1,39 @@
+#ifndef NETWORK_HPP
+#define NETWORK_HPP
+
+#include <random>
+#include <list>
+
+#include "layers/layer.hpp"
+#include "dataset.hpp"
+
+enum CostFunction{CrossEntropy,Quadratic};
+
+class Network{
+public:
+  list<Layer::Layer*> layers;
+  size_t n_in;
+  size_t n_out;
+  Vector a;
+  Vector last_delta;
+  CostFunction C;
+  void compute_last_delta(Vector y);
+protected:
+  void shuffle(size_t* tab,size_t size);
+  void update_batch(Dataset* dataset,size_t* indices,size_t begin,size_t end,Real eta);
+  void back_propagation(Vector x,Vector y,Real eta);
+public:
+  Network();
+  void set_cost(CostFunction);
+  void push_layer(Layer::Layer* l);
+  void is_done();
+  Vector feed_forward(Vector x_in);
+  Real eval(Dataset *dataset);
+  void train(Dataset* dataset,size_t nb_epochs,size_t batch_size,Real eta);
+};
+
+inline void
+Network::set_cost(CostFunction C_){
+  C=C_;
+}
+#endif

+ 100 - 0
vector.hpp

@@ -0,0 +1,100 @@
+#ifndef VECTOR_HPP
+#define VECTOR_HPP
+
+#include <iostream>
+#include "debug.hpp"
+
+using namespace std;
+
+using Real = float;
+
+#ifdef DEBUG
+struct Vector{
+  size_t n;
+  Real* data;
+  Real& operator[](size_t i);
+};
+
+inline Real&
+Vector::operator[](size_t i){
+  assert(i<n);
+  return data[i];
+}
+
+inline Vector
+init_vector(size_t n){
+  Vector v;
+  v.n=n;
+  v.data=new Real[n];
+  return v;
+}
+
+inline void
+delete_vector(Vector v){
+  delete[] v.data;
+}
+
+inline bool
+is_null(Vector v){
+  return v.n==0;
+}
+
+static const Vector NullVector={0,nullptr};
+
+#else
+using Vector=Real*;
+
+inline Vector
+init_vector(size_t n){
+  return new Real[n];
+}
+
+inline void
+delete_vector(Vector v){
+  delete[] v;
+}
+#endif
+
+inline void
+display(Vector v,size_t n){
+  if(n==0){
+    cout<<"[]"<<endl;
+    return;
+  }
+  cout<<'['<<v[0];
+  for(size_t i=1;i<n;++i){
+    cout<<','<<v[i];
+  }
+  cout<<']'<<endl;
+}
+
+inline size_t
+argmax(Vector v,size_t n){
+  assert(n>0);
+  size_t imax=0;
+  Real vmax=v[0];
+  for(size_t i=1;i<n;++i){
+    if(v[i]>vmax){
+      vmax=v[i];
+      imax=i;
+    }
+  }
+  return imax;
+}
+
+inline size_t
+indice2(size_t i,size_t j,size_t nj){
+  return i*nj+j;
+}
+
+inline size_t
+indice3(size_t i,size_t j,size_t k,size_t nj,size_t nk){
+  return (i*nj+j)*nk+k;
+}
+
+inline size_t
+indice4(size_t i,size_t j,size_t k,size_t l,size_t nj,size_t nk,size_t nl){
+  return ((i*nj+j)*nk+k)*nl+l;
+}
+
+#endif