Browse Source

First commit

Jean Fromentin 9 months ago
commit
72e07974f1

+ 2 - 0
deep/Makefile

@@ -0,0 +1,2 @@
+all:
+	g++  *.cpp mnist/mnist.cpp -DNDEBUG -O3 -o ia 

+ 43 - 0
deep/dataset.hpp

@@ -0,0 +1,43 @@
+#ifndef DATASET_HPP
+#define DATASET_HPP
+
+#include <cstddef>
+#include "vector.hpp"
+
+using namespace std;
+
+class Dataset{
+protected:
+  size_t train_size;
+  size_t test_size;
+  size_t x_size;
+  size_t y_size;
+public:
+  Dataset();
+  size_t get_train_size() const;
+  size_t get_test_size() const;
+  size_t get_y_size() const;
+  virtual pair<const Vector&,const Vector&> get_train(const size_t i) const=0;
+  virtual pair<const Vector&,const Vector&> get_test(const size_t i) const=0;
+};
+
+inline
+Dataset::Dataset(){
+}
+
+inline size_t
+Dataset::get_train_size() const{
+  return train_size;
+}
+
+inline size_t
+Dataset::get_test_size() const{
+  return test_size;
+}
+
+inline size_t
+Dataset::get_y_size() const{
+  return y_size;
+}
+
+#endif

+ 10 - 0
deep/main.cpp

@@ -0,0 +1,10 @@
+#include "mnist/mnist.hpp"
+#include "network.hpp"
+
+int main(int argc,char** argv){
+  Mnist dataset;
+  Network network(28*28,30,10);
+  //network.init_normal_distribution(0,1);
+  network.init_standard();
+  network.train(&dataset,20,10,3.0);
+}

+ 13 - 0
deep/math.hpp

@@ -0,0 +1,13 @@
+#ifndef MATH_HPP
+#define MATH_HPP
+
+namespace Math{
+  inline double exp(double x){
+    x=1+x/1024;
+    x *= x; x *= x; x *= x; x *= x;
+    x *= x; x *= x; x *= x; x *= x;
+    x *= x; x *= x;
+    return x;
+  }
+}
+#endif

+ 22 - 0
deep/matrix.cpp

@@ -0,0 +1,22 @@
+#include "matrix.hpp"
+
+void
+Matrix::resize(size_t _nr,size_t _nc){
+  assert(_nr>0);
+  assert(_nc>0);
+  if(data!=nullptr) delete[] data;
+  nr=_nr;
+  nc=_nc;
+  data=new double[nc*nr];
+}
+
+void
+Matrix::view() const{
+  for(size_t i=0;i<nr;++i){
+    cout<<'['<<get(i,0);
+    for(size_t j=1;j<nc;++j){
+      cout<<','<<get(i,j);
+    }
+    cout<<endl;
+  }
+}

+ 67 - 0
deep/matrix.hpp

@@ -0,0 +1,67 @@
+#ifndef MATRIX_HPP
+#define MATRIX_HPP
+
+#include "vector.hpp"
+
+class Matrix{
+public:
+  size_t nr,nc;
+  double* data;
+  Matrix();
+  Matrix(size_t nr,size_t nc);
+  ~Matrix();
+  void resize(size_t nr,size_t nc);
+  double& get(size_t i);
+  double get(size_t i) const;
+  double& get(size_t i,size_t j);
+  double get(size_t i,size_t j) const;
+  void view() const;
+};
+
+inline
+Matrix::Matrix(){
+  nr=0;
+  nc=0;
+  data=nullptr;
+}
+
+inline
+Matrix::Matrix(size_t _nr,size_t _nc){
+  assert(_nr>0);
+  assert(_nc>0);
+  nr=_nr;
+  nc=_nc;
+  data=new double[nr*nc];
+}
+
+inline
+Matrix::~Matrix(){
+  if(data!=nullptr) delete[] data;
+}
+
+inline
+double& Matrix::get(size_t i,size_t j){
+  assert(i<nr);
+  assert(j<nc);
+  return data[i*nc+j];
+}
+
+inline
+double Matrix::get(size_t i,size_t j) const{
+  assert(i<nr);
+  assert(j<nc);
+  return data[i*nc+j];
+}
+
+inline
+double& Matrix::get(size_t i){
+  assert(i<nr*nc);
+  return data[i];
+}
+
+inline double
+Matrix::get(size_t i) const{
+  assert(i<nr*nc);
+  return data[i];
+}
+#endif

+ 73 - 0
deep/mnist/mnist.cpp

@@ -0,0 +1,73 @@
+#include "mnist.hpp"
+
+int
+Mnist::reverse_int32(unsigned char* buffer){
+  return (((int)buffer[0])<<24)|(((int)buffer[1])<<16)|(((int)buffer[2])<<8)|((int)buffer[3]);
+}
+
+Mnist::Mnist():Dataset(){
+  x_size=784;
+  y_size=10;
+  x.resize(784);
+  y.resize(10);
+  train_size=load_labels("mnist/train-labels.idx1-ubyte",&train_labels);
+  size_t temp=load_images("mnist/train-images.idx3-ubyte",&train_images);
+  assert(train_size==temp);
+  test_size=load_labels("mnist/t10k-labels.idx1-ubyte",&test_labels);
+  temp=load_images("mnist/t10k-images.idx3-ubyte",&test_images);
+  assert(test_size==temp);
+}
+
+size_t
+Mnist::load_labels(string filename,unsigned char** dst){
+  ifstream file(filename,ios::in|ios::binary);
+  if(not file.is_open()){
+    cerr<<"[error] Could not open "<<filename<<endl;
+    exit(-1);
+  }
+  unsigned char buffer[4];
+  file.read((char*)buffer,4);
+  assert(reverse_int32(buffer)==2049);
+  file.read((char*)buffer,4);
+  int size;
+  size=reverse_int32(buffer);
+  *dst=new unsigned char[size];
+  file.read((char*)*dst,size);
+  file.close();
+  return size;
+}
+
+size_t
+Mnist::load_images(string filename,unsigned char** dst){
+  ifstream file(filename,ios::in|ios::binary);
+  if(not file.is_open()){
+    cerr<<"[error] Could not open "<<filename<<endl;
+    exit(-1);
+  }
+  unsigned char buffer[4];
+  int size;
+  file.read((char*)buffer,4);
+  assert(reverse_int32(buffer)==2051);
+  file.read((char*)buffer,4);
+  size=reverse_int32(buffer);
+  file.read((char*)buffer,4);
+  assert(reverse_int32(buffer)==28);
+  file.read((char*)buffer,4);
+  assert(reverse_int32(buffer)==28);
+  *dst=new unsigned char[784*size];
+  file.read((char*)*dst,784*size);
+  file.close();
+  return size;
+}
+
+pair<const Vector&,const Vector&>
+Mnist::get(const size_t i,const unsigned char* const * labels,const unsigned char* const * images) const{
+  size_t c=(size_t)(*labels)[i];
+  for(size_t i=0;i<10;++i) y.data[i]=0;
+  y.data[c]=1;
+  const unsigned char* x_src=&(*images)[784*i];
+  for(size_t i=0;i<784;++i){
+    x.data[i]=double(x_src[i])/256.0;
+  }
+  return pair<const Vector&,const Vector&>(x,y);
+}

+ 43 - 0
deep/mnist/mnist.hpp

@@ -0,0 +1,43 @@
+#ifndef MNIST_HPP
+#define MNIST_HPP
+
+#include <iostream>
+#include <string>
+#include <fstream>
+#include <cstdint>
+#include <cassert>
+
+#include "../dataset.hpp"
+
+using namespace std;
+
+class Mnist:public Dataset{
+private:
+  unsigned char* train_labels;
+  unsigned char* test_labels;
+  unsigned char* train_images;
+  unsigned char* test_images;
+  mutable Vector x;
+  mutable Vector y;
+  size_t load_labels(string filename,unsigned char** dst);
+  size_t load_images(string filename,unsigned char** dst);
+  int reverse_int32(unsigned char* buffer);
+  pair<const Vector&,const Vector&> get(const size_t i,const unsigned char* const* labels,const unsigned char* const * images) const;
+public:
+  Mnist();
+  pair<const Vector&,const Vector&> get_train(const size_t i) const;
+  pair<const Vector&,const Vector&> get_test(const size_t i) const;
+};
+
+inline pair<const Vector&,const Vector&>
+Mnist::get_train(const size_t i) const{
+  assert(i<train_size);
+  return get(i,&train_labels,&train_images);
+}
+
+inline pair<const Vector&,const Vector&>
+Mnist::get_test(const size_t i) const{
+  assert(i<test_size);
+  return get(i,&test_labels,&test_images);
+}
+#endif

BIN
deep/mnist/t10k-images.idx3-ubyte


BIN
deep/mnist/t10k-labels.idx1-ubyte


BIN
deep/mnist/train-images.idx3-ubyte


BIN
deep/mnist/train-labels.idx1-ubyte


+ 206 - 0
deep/network.cpp

@@ -0,0 +1,206 @@
+#include "network.hpp"
+
+void Network::init_normal_distribution(double m,double d){
+  default_random_engine generator;
+  normal_distribution<double> distribution(m,d);
+  for(size_t l=1;l<depth;++l){
+    Vector& b=biais[l];
+    for(size_t i=0;i<sizes[l];++i){
+      b.data[i]=distribution(generator);
+    }
+    Matrix& w=weights[l];
+    for(size_t i=0;i<sizes[l]*sizes[l-1];++i){
+      w.get(i)=distribution(generator);
+    }
+  }
+}
+
+void Network::init_standard(){
+  default_random_engine generator;
+  normal_distribution<double> distribution(0,1);
+  for(size_t l=1;l<depth;++l){
+    Vector& b=biais[l];
+    for(size_t i=0;i<sizes[l];++i){
+      b.data[i]=distribution(generator);
+    }
+    Matrix& w=weights[l];
+    for(size_t i=0;i<sizes[l]*sizes[l-1];++i){
+      normal_distribution<double> distribution2(0,1/sqrt(sizes[l-1]));
+      w.get(i)=distribution2(generator);
+    }
+  }
+}
+
+const Vector&
+Network::feed_forward(const Vector& x){
+  a[0]=x;
+  for(size_t l=1;l<depth;++l){
+    compute_z(l);
+    compute_a(l);
+  }
+  return a[depth-1];
+}
+
+double
+Network::eval(Dataset* dataset){
+  size_t n=dataset->get_test_size();
+  size_t nb=0;
+  for(size_t i=0;i<n;++i){
+    pair<const Vector&,const Vector&> t=dataset->get_test(i);
+    const Vector& a=feed_forward(t.first);
+    if(a.argmax()==t.second.argmax()) ++nb;
+  }
+  double res=double(nb)/double(n)*100;
+  cout<<"> Res = "<<res<<"%"<<endl;
+  return res;
+}
+
+void
+Network::train(Dataset* dataset,size_t nb_epochs,size_t batch_size,double eta){
+  size_t train_size=dataset->get_train_size();
+  size_t nb_batchs=(train_size-1)/batch_size+1;
+  size_t* indices=new size_t[train_size];
+  for(size_t i=0;i<train_size;++i){
+    indices[i]=i;
+  }
+  for(size_t epoch=0;epoch<nb_epochs;++epoch){
+    cout<<"Epoch "<<epoch<<endl;
+    shuffle(indices,train_size);
+    for(size_t batch=0;batch<nb_batchs;++batch){
+      size_t begin=batch*batch_size;
+      size_t end=min(train_size,begin+batch_size);
+      update_batch(dataset,indices,begin,end,eta);
+    }
+    eval(dataset);
+  }
+  delete[] indices;
+}
+
+void
+Network::shuffle(size_t* tab,size_t size){
+  default_random_engine generator;
+  uniform_int_distribution<int> distribution(0,size-1);
+  for(size_t k=0;k<size;++k){
+    size_t i=distribution(generator);
+    size_t j=distribution(generator);
+    swap(tab[i],tab[j]);
+  }
+}
+
+void
+Network::update_batch(Dataset* dataset,size_t* indices,size_t begin,size_t end,double eta){
+  double batch_size=end-begin;
+  for(size_t l=1;l<depth;++l){
+    init_nabla_b(l);
+    init_nabla_w(l);
+  }
+  for(size_t i=begin;i<end;++i){
+    pair<const Vector&,const Vector&> data=dataset->get_train(indices[i]);
+    back_propagation(data.first,data.second,eta);
+  }
+  double eta_batch=eta/batch_size;
+  for(size_t l=1;l<depth;++l){
+    update_b(l,eta_batch);
+    update_w(l,eta_batch);
+  }
+}
+
+void
+Network::back_propagation(const Vector&x, const Vector& y,double eta){
+  a[0]=x;
+  for(size_t l=1;l<depth;++l){
+    compute_z(l);
+    compute_a(l);
+  }
+  compute_last_delta(y);
+  for(size_t l=depth-2;l>=1;--l){
+    compute_delta(l);
+  }
+  for(size_t l=1;l<depth;++l){
+    update_nabla_b(l);
+    update_nabla_w(l);
+  }
+}
+
+void Network::init_nabla_b(size_t l){
+  Vector& V=nabla_b[l];
+  for(size_t i=0;i<sizes[l];++i){
+    V.data[i]=0;
+  }
+}
+
+void Network::init_nabla_w(size_t l){
+  Matrix& M=nabla_w[l];
+  for(size_t i=0;i<sizes[l-1]*sizes[l];++i){
+    M.get(i)=0;
+  }
+}
+
+void Network::compute_a(size_t l){
+  for(size_t i=0;i<sizes[l];++i){
+    a[l].data[i]=sigmoid(z[l].data[i]);
+  }
+}
+
+void Network::compute_z(size_t l){
+  for(size_t i=0;i<sizes[l];++i){
+    double temp=biais[l].data[i];
+    for(size_t j=0;j<sizes[l-1];++j){
+      temp+=weights[l].get(i,j)*a[l-1].data[j];
+    }
+    z[l].data[i]=temp;
+  }
+}
+
+void
+Network::compute_last_delta(const Vector& y){
+  size_t L=depth-1;
+  for(size_t i=0;i<sizes[L];++i){
+    delta[L].data[i]=cost_derivative(a[L].data[i],y.data[i])*sigmoid_prime(z[L].data[i]);
+  }
+}
+
+void
+Network::compute_delta(size_t l){
+  for(size_t i=0;i<sizes[l];++i){
+    double temp=0;
+    for(size_t j=0;j<sizes[l+1];++j){
+      temp+=(weights[l+1].get(j,i)*delta[l+1].data[j]);
+    }
+    delta[l].data[i]=temp*sigmoid_prime(z[l].data[i]);
+  }
+}
+
+void
+Network::update_nabla_b(size_t l){
+  for(size_t i=0;i<sizes[l];++i){
+    nabla_b[l].data[i]+=delta[l].data[i];
+  }
+}
+
+void
+Network::update_nabla_w(size_t l){
+  for(size_t i=0;i<sizes[l];++i){
+    for(size_t j=0;j<sizes[l-1];++j){
+      nabla_w[l].get(i,j)+=a[l-1].data[j]*delta[l].data[i];
+    }
+  }
+}
+
+void
+Network::update_b(size_t l,double eta_batch){
+  Vector& U=biais[l];
+  Vector& V=nabla_b[l];
+  for(size_t i=0;i<sizes[l];++i){
+    U.data[i]-=V.data[i]*eta_batch;
+  }
+}
+
+void
+Network::update_w(size_t l,double eta_batch){
+  Matrix& M=weights[l];
+  Matrix& P=nabla_w[l];
+  for(size_t i=0;i<sizes[l-1]*sizes[l];++i){
+    M.get(i)-=P.get(i)*eta_batch;
+  }
+}

+ 117 - 0
deep/network.hpp

@@ -0,0 +1,117 @@
+#ifndef NETWORK_HPP
+#define NETWORK_HPP
+
+#include <iostream>
+#include <vector>
+#include <random>
+#include <algorithm>
+#include "dataset.hpp"
+#include "vector.hpp"
+#include "matrix.hpp"
+
+using namespace std;
+
+class Trainer;
+
+double sigmoid(double x);
+double sigmoid_prime(double x);
+double cost_derivative(double a,double y);
+
+class Network{
+  friend class Trainer;
+protected:
+  size_t depth;
+  vector<size_t> sizes;
+  Matrix* weights;
+  Vector *biais;
+  Vector *a;
+  Vector *z;
+  Vector *nabla_b;
+  Matrix *nabla_w;
+  Vector *delta;
+  void shuffle(size_t* tab,size_t size);
+  void compute_z(size_t l);
+  void compute_a(size_t l);
+  void compute_last_delta(const Vector& y);
+  void compute_delta(size_t l);
+  void init_nabla_b(size_t l);
+  void init_nabla_w(size_t l);
+  void update_nabla_b(size_t l);
+  void update_nabla_w(size_t l);
+  void update_b(size_t l,double eta_batch);
+  void update_w(size_t l,double eta_batch);
+public:
+  template<typename ... Sizes> Network(Sizes ... _sizes);
+  void init_normal_distribution(double m,double d);
+  void init_standard();
+  double* new_output_vector() const;
+  const Vector& feed_forward(const Vector& x);
+  double eval(Dataset* dataset);
+  void train(Dataset* dataset,size_t nb_epochs,size_t batch_size,double eta);
+  void update_batch(Dataset* dataset,size_t* indices,size_t begin,size_t end,double eta);
+  void back_propagation(const Vector& x,const Vector& y,double eta);
+  Vector hack(const Vector& x,const Vector& y,double eta,size_t nb_steps,void (*)(const Vector&));
+};
+
+
+inline double
+sigmoid(double x){
+  return 1.0/(1.0+exp(-x));
+};
+
+inline double
+sigmoid_prime(double x){
+  double t=sigmoid(x);
+  return t*(1.0-t);
+};
+
+template<typename ... Sizes> inline
+Network::Network(Sizes ... _sizes):sizes({(size_t)_sizes ...}){
+  depth=sizes.size();
+
+  // Biais vectors
+  biais=new Vector[depth];
+  for(size_t l=0;l<depth;++l){
+    biais[l].resize(sizes[l]);
+  }
+
+  // Weights vectors
+  weights=new Matrix[depth];
+  for(size_t l=1;l<depth;++l){
+    weights[l].resize(sizes[l],sizes[l-1]);
+  }
+
+  // Activation vectors
+  a=new Vector[depth];
+  for(size_t l=0;l<depth;++l){
+    a[l].resize(sizes[l]);
+  }
+
+  // Activation vectors
+  z=new Vector[depth];
+  for(size_t l=0;l<depth;++l){
+    z[l].resize(sizes[l]);
+  }
+
+  nabla_b=new Vector[depth];
+  for(size_t l=0;l<depth;++l){
+    nabla_b[l].resize(sizes[l]);
+  }
+
+  nabla_w=new Matrix[depth];
+  for(size_t l=1;l<depth;++l){
+    nabla_w[l].resize(sizes[l],sizes[l-1]);
+  }
+
+  delta=new Vector[depth];
+  for(size_t l=0;l<depth;++l){
+    delta[l].resize(sizes[l]);
+  }
+}
+
+inline double
+cost_derivative(double a,double y){
+  return a-y;
+}
+
+#endif

+ 65 - 0
deep/vector.cpp

@@ -0,0 +1,65 @@
+#include "vector.hpp"
+#include "matrix.hpp"
+
+Vector::Vector(const Vector& u){
+  n=u.n;
+  data=new double[n];
+  memcpy(data,u.data,n*sizeof(double));
+}
+
+const Vector&
+Vector::operator=(const Vector& u){
+  assert(n==u.n);
+  memcpy(data,u.data,n*sizeof(double));
+  return *this;
+}
+
+void
+Vector::resize(size_t _n){
+  if(data!=nullptr){
+    delete[] data;
+  }
+  n=_n;
+  data=new double[n];
+}
+
+size_t
+Vector::argmax() const{
+  double m=data[0];
+  size_t res=0;
+  for(size_t i=1;i<n;++i){
+    double t=data[i];
+    if(t>m){
+      m=t;
+      res=i;
+    }
+  }
+  return res;
+}
+
+void
+Vector::softmax(){
+  double s=0;
+  for(size_t i=0;i<n;++i){
+    s+=Math::exp(data[i]);
+  }
+  for(size_t i=0;i<n;++i){
+    data[i]=Math::exp(data[i])/s;
+  }
+}
+
+void
+Vector::clear(){
+  for(size_t i=0;i<n;++i){
+    data[i]=0;
+  }
+}
+
+ostream& operator<<(ostream& os,const Vector& v){
+  if(v.n==0) return os<<"[]";
+  os<<'['<<v.data[0];
+  for(size_t i=1;i<v.n;++i){
+    os<<", "<<v.data[i];
+  }
+  return os<<']';
+}

+ 47 - 0
deep/vector.hpp

@@ -0,0 +1,47 @@
+#ifndef VECTOR_HPP
+#define VECTOR_HPP
+#include <iostream>
+#include <cassert>
+#include <cstring>
+#include "math.hpp"
+
+using namespace std;
+
+class Vector{
+public:
+  size_t n;
+  double* data;
+  Vector();
+  Vector(const Vector& u);
+  Vector(size_t n);
+  ~Vector();
+  const Vector& operator=(const Vector& u);
+  void resize(size_t s);
+  size_t argmax() const;
+  void softmax();
+  void clear();
+  friend ostream& operator<<(ostream& os,const Vector& v);
+};
+
+ostream& operator<<(ostream& os,const Vector&);
+
+inline
+Vector::Vector(){
+  n=0;
+  data=nullptr;
+}
+
+inline
+Vector::Vector(size_t _n){
+  n=_n;
+  data=new double[n];
+}
+
+inline
+Vector::~Vector(){
+  if(data!=nullptr){
+    delete[] data;
+  }
+}
+
+#endif