Jean Fromentin 5 年 前
コミット
8f07986266
5 ファイル変更533 行追加0 行削除
  1. 25 0
      single/Makefile
  2. 74 0
      single/monoid.cpp
  3. 308 0
      single/monoid.hpp
  4. 122 0
      single/treewalk.cpp
  5. 4 0
      single/treewalk.hpp

+ 25 - 0
single/Makefile

@@ -0,0 +1,25 @@
+# CILK_ROOT must contains the GCC/Cilk root directory
+OS     	    = $(shell uname)
+CPPFLAGS    = -DMAX_GENUS=$(MAX_GENUS) #-DNDEBUG
+CXXFLAGS    = -std=c++11 -g -Wall -O3 # -fsanitize=thread # -Winline
+CXX         = g++-7
+TARGET_ARCH = -march=native -mtune=native
+TARGET 	    = wilf_alone
+
+# Pour compiler avec une valeur différente: make MAX_GENUS=35
+DEFAULT_MAX_GENUS=40
+MAX_GENUS=$(DEFAULT_MAX_GENUS)
+
+all: $(TARGET)
+
+monoid.o: monoid.cpp monoid.hpp
+treewalk.o: treewalk.cpp treewalk.hpp monoid.hpp
+wilf_alone: treewalk.o monoid.o
+	$(CXX) $(LDFLAGS) $^ $(LOADLIBES) $(LDLIBS) -o $@
+
+clean:
+	rm -rf $(TARGET) *.o *~
+
+test: all
+	./treewalk
+

+ 74 - 0
single/monoid.cpp

@@ -0,0 +1,74 @@
+#include <iostream>
+#include "monoid.hpp"
+
+void init_full_N(monoid &m)
+{
+  epi8 block ={1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8};
+  for(auto i=0; i<NBLOCKS; i++){
+    m.blocks[i] = block;
+    block = block + 8;
+  }
+  m.genus = 0;
+  m.conductor = 1;
+  m.min = 1;
+  m.left=1;
+  m.left_primitive=0;
+  m.e=1;
+  m.wilf=0;
+}
+
+void init_ordinary(monoid &O,int m){
+  O.decs[0]=1;
+  for(auto i=1;i<2*m;++i){
+    O.decs[i]=i/m;
+  }
+  for(auto i=0;i<SIZE-2*m;++i){
+    O.decs[2*m+i]=2+i/2;
+  }
+			     
+  O.genus = m-1;
+  O.conductor = m;
+  O.min = m;
+  O.left=1;
+  O.left_primitive=0;
+  O.e=m;
+  O.wilf=0;
+}
+
+void print_monoid(const monoid &m)
+{
+  unsigned int i;
+  std::cout<<"min = "<<m.min<<", cond = "<<m.conductor<<", genus = "<<m.genus<<", decs = ";
+  for (i=0; i<SIZE; i++) std::cout<<((int) m.decs[i])<<' ';
+  std::cout<<std::endl;
+}
+
+void print_monoid_gen(const monoid &m)
+{
+  unsigned int i;
+  std::cout<<"min = "<<m.min<<", cond = "<<m.conductor<<", genus = "<<m.genus<<", gen = ";
+  std::cout<<"< ";
+  for (i=1; i<SIZE; i++){
+    if(m.decs[i]==1){
+      std::cout<<i<<' ';
+    }
+  }
+  std::cout<<'>'<<std::endl;
+}
+
+void output(const monoid& m,fstream& f){
+  f<<"w = "<<m.wilf<<" : < ";
+  for (auto i=1; i<SIZE; i++){
+    if(m.decs[i]==1){
+      f<<i<<' ';
+    }
+  }
+  f<<'>'<<endl;
+}
+
+void print_epi8(epi8 bl)
+{
+  unsigned int i;
+  for (i=0; i<16; i++) std::cout<<((uint8_t*)&bl)[i]<<' ';
+  std::cout<<std::endl;
+}

+ 308 - 0
single/monoid.hpp

@@ -0,0 +1,308 @@
+#ifndef MONOID_HPP
+#define MONOID_HPP
+
+#include <cstdint>
+#include <fstream>
+
+// We cant have those as C++ constant because of the #define used below in
+// remove_generator manual loop unrolling. I don't know if the unrolling is
+// doable using template metaprogamming. I didn't manage to figure out how.
+
+#ifndef MAX_GENUS
+#error "Please define the MAX_GENUS macro"
+#endif
+#define SIZE_BOUND (3*(MAX_GENUS-1))
+#define NBLOCKS ((SIZE_BOUND+15) >> 4)
+#define SIZE (NBLOCKS << 4)
+// const uint_fast8_t MAX_GENUS = 40;
+// const uint_fast8_t SIZE_BOUND = (3*(MAX_GENUS-1));
+// const uint_fast8_t NBLOCKS = ((SIZE_BOUND+15) >> 4);
+// const uint_fast8_t SIZE = (NBLOCKS << 4);
+
+#include <x86intrin.h>
+
+typedef uint8_t epi8 __attribute__ ((vector_size (16)));
+typedef uint8_t dec_numbers[SIZE] __attribute__ ((aligned (16)));
+typedef epi8 dec_blocks[NBLOCKS];
+typedef uint_fast64_t ind_t;  // The type used for array indexes
+
+using namespace std;
+
+struct monoid
+{
+  union {
+    dec_numbers decs;
+    dec_blocks blocks;
+  };
+  // Dont use char as they have to be promoted to 64 bits to do pointer arithmetic.
+  ind_t conductor, min, genus,left_primitive,left,e,wilf;
+};
+
+void init_full_N(monoid &);
+void init_ordinary(monoid& O,int m);
+void print_monoid(const monoid &);
+void print_monoid_gen(const monoid&);
+void print_epi8(epi8);
+void output(const monoid& m,fstream& f);
+inline void copy_blocks(      dec_blocks &__restrict__ dst,
+			const dec_blocks &__restrict__ src);
+inline void remove_generator(monoid &__restrict__ dst,
+		      const monoid &__restrict__ src,
+			     ind_t gen,ind_t pos);
+inline monoid remove_generator(const monoid &src, ind_t gen,ind_t pos);
+
+
+// typedef enum { ALL, CHILDREN } generator_type;
+class ALL {};
+class CHILDREN {};
+
+// template <generator_type T> class generator_iter
+template <class T> class generator_iter
+{
+private:
+
+  const monoid &m;
+  unsigned int mask;   // movemask_epi8 returns a 32 bits values
+  ind_t iblock, gen, bound;
+
+public:
+
+  generator_iter(const monoid &mon);
+  bool move_next();
+  uint8_t count();
+  inline ind_t get_gen() const {return gen; };
+};
+
+
+
+///////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////
+////////////////   Implementation part here for inlining   ////////////////
+///////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////
+
+/*
+Note: for some reason the code using gcc vector variables is 2-3% faster than
+the code using gcc intrinsic instructions.
+Here are the results of two runs:
+data_mmx =  [9.757, 9.774, 9.757, 9.761, 9.811, 9.819, 9.765, 9.888, 9.774, 9.771]
+data_epi8 = [9.592, 9.535, 9.657, 9.468, 9.460, 9.679, 9.458, 9.461, 9.629, 9.474]
+*/
+
+extern inline epi8 load_unaligned_epi8(const uint8_t *t)
+{ return (epi8) _mm_loadu_si128((__m128i *) (t)); };
+
+extern inline epi8 shuffle_epi8(epi8 b, epi8 sh)    // Require SSE 3
+{ return (epi8) _mm_shuffle_epi8((__m128i) b, (__m128i) sh);}
+
+extern inline int movemask_epi8(epi8 b)
+{ return _mm_movemask_epi8((__m128i) b);};
+
+const epi8 zero   = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+const epi8 block1 = zero + 1;
+const uint8_t m1 = 255;
+const epi8 shift16[16] =
+  { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15},
+    {m1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14},
+    {m1,m1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13},
+    {m1,m1,m1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12},
+    {m1,m1,m1,m1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11},
+    {m1,m1,m1,m1,m1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10},
+    {m1,m1,m1,m1,m1,m1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
+    {m1,m1,m1,m1,m1,m1,m1, 0, 1, 2, 3, 4, 5, 6, 7, 8},
+    {m1,m1,m1,m1,m1,m1,m1,m1, 0, 1, 2, 3, 4, 5, 6, 7},
+    {m1,m1,m1,m1,m1,m1,m1,m1,m1, 0, 1, 2, 3, 4, 5, 6},
+    {m1,m1,m1,m1,m1,m1,m1,m1,m1,m1, 0, 1, 2, 3, 4, 5},
+    {m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1, 0, 1, 2, 3, 4},
+    {m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1, 0, 1, 2, 3},
+    {m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1, 0, 1, 2},
+    {m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1, 0, 1},
+    {m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1, 0} };
+const epi8 mask16[16] =
+  { {m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1},
+    { 0,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1},
+    { 0, 0,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1},
+    { 0, 0, 0,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1},
+    { 0, 0, 0, 0,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1},
+    { 0, 0, 0, 0, 0,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1},
+    { 0, 0, 0, 0, 0, 0,m1,m1,m1,m1,m1,m1,m1,m1,m1,m1},
+    { 0, 0, 0, 0, 0, 0, 0,m1,m1,m1,m1,m1,m1,m1,m1,m1},
+    { 0, 0, 0, 0, 0, 0, 0, 0,m1,m1,m1,m1,m1,m1,m1,m1},
+    { 0, 0, 0, 0, 0, 0, 0, 0, 0,m1,m1,m1,m1,m1,m1,m1},
+    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,m1,m1,m1,m1,m1,m1},
+    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,m1,m1,m1,m1,m1},
+    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,m1,m1,m1,m1},
+    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,m1,m1,m1},
+    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,m1,m1},
+    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,m1} };
+
+
+template<> inline generator_iter<ALL>::generator_iter(const monoid &mon)
+  : m(mon), bound((mon.conductor+mon.min+15) >> 4)
+{
+  epi8 block;
+  iblock = 0;
+  block = m.blocks[0];
+  mask  = movemask_epi8(block == block1);
+  mask &= 0xFFFE; // 0 is not a generator
+  gen = - 1;
+};
+
+
+template<> inline generator_iter<CHILDREN>::generator_iter(const monoid &mon)
+  : m(mon), bound((mon.conductor+mon.min+15) >> 4)
+{
+  epi8 block;
+  iblock = m.conductor >> 4;
+  block = m.blocks[iblock] & mask16[m.conductor & 0xF];
+  mask  = movemask_epi8(block == block1);
+  gen = (iblock << 4) - 1;
+};
+
+template <class T> inline uint8_t generator_iter<T>::count()
+{
+  uint8_t nbr = _mm_popcnt_u32(mask); // popcnt returns a 8 bits value
+  for (ind_t ib = iblock+1; ib < bound; ib++)
+    nbr += _mm_popcnt_u32(movemask_epi8(m.blocks[ib] == block1));
+  return nbr;
+};
+
+template <class T> inline bool generator_iter<T>::move_next()
+{
+  while (!mask)
+    {
+      iblock++;
+      if (iblock > bound) return false;
+      gen = (iblock << 4) - 1;
+      mask  = movemask_epi8(m.blocks[iblock] == block1);
+    }
+  unsigned char shift = __bsfd (mask) + 1; // Bit Scan Forward
+  gen += shift;
+  mask >>= shift;
+  return true;
+};
+
+
+
+inline __attribute__((always_inline))
+void copy_blocks(dec_blocks &dst, dec_blocks const &src)
+{
+  for (ind_t i=0; i<NBLOCKS; i++) dst[i] = src[i];
+}
+
+
+#include <cassert>
+
+inline __attribute__((always_inline))
+void remove_generator(monoid &__restrict__ dst,
+		      const monoid &__restrict__ src,
+		      ind_t gen,
+		      ind_t pos)
+{
+  ind_t start_block, decal;
+  epi8 block;
+
+  assert(src.decs[gen] == 1);
+
+  ind_t t=gen+1;
+  dst.conductor = t;
+  dst.genus = src.genus + 1;
+  dst.min=src.min;
+  int delta=(src.decs[gen+src.min]==2)?0:-1;
+  dst.e=src.e+delta;
+  assert(dst.e==((gen==src.min)?src.e+1:((src.decs[gen+src.min]==2)?src.e:src.e-1)));
+  ind_t k=gen-src.conductor;
+  assert(dst.conductor==src.conductor+k+1);
+  dst.left=src.left+k;
+  dst.left_primitive=src.left_primitive+pos;
+  //dst.wilf=dst.e*dst.left-dst.conductor;//src.wilf+delta*(src.left+k)-k-1;
+  dst.wilf=src.wilf+delta*(src.left+k)+(src.e-1)*k-1;
+  copy_blocks(dst.blocks, src.blocks);
+
+  start_block = gen >> 4;
+  decal = gen & 0xF;
+  // Shift block by decal uchar
+  block = shuffle_epi8(src.blocks[0], shift16[decal]);
+  dst.blocks[start_block] -= ((block != zero) & block1);
+#if NBLOCKS >= 5
+
+#define CASE_UNROLL(i_loop)			\
+  case i_loop : \
+    dst.blocks[i_loop+1] -= (load_unaligned_epi8(srcblock) != zero) & block1; \
+    srcblock += sizeof(epi8);
+
+  {
+    const uint8_t *srcblock = src.decs + sizeof(epi8) - decal;
+  switch(start_block)
+    {
+      CASE_UNROLL(0);
+#if NBLOCKS > 2
+      CASE_UNROLL(1);
+#endif
+#if NBLOCKS > 3
+      CASE_UNROLL(2);
+#endif
+#if NBLOCKS > 4
+      CASE_UNROLL(3);
+#endif
+#if NBLOCKS > 5
+      CASE_UNROLL(4);
+#endif
+#if NBLOCKS > 6
+      CASE_UNROLL(5);
+#endif
+#if NBLOCKS > 7
+      CASE_UNROLL(6);
+#endif
+#if NBLOCKS > 8
+      CASE_UNROLL(7);
+#endif
+#if NBLOCKS > 9
+      CASE_UNROLL(8);
+#endif
+#if NBLOCKS > 10
+      CASE_UNROLL(9);
+#endif
+#if NBLOCKS > 11
+      CASE_UNROLL(10);
+#endif
+#if NBLOCKS > 12
+      CASE_UNROLL(11);
+#endif
+#if NBLOCKS > 13
+      CASE_UNROLL(12);
+#endif
+#if NBLOCKS > 14
+      CASE_UNROLL(13);
+#endif
+#if NBLOCKS > 15
+      CASE_UNROLL(14);
+#endif
+#if NBLOCKS > 16
+#error "Too many blocks"
+#endif
+    }
+  }
+#else
+#warning "Loop not unrolled"
+
+  for (auto i=start_block+1; i<NBLOCKS; i++)
+    {
+      // The following won't work due to some alignment problem (bug in GCC 4.7.1?)
+      // block = *((epi8*)(src.decs + ((i-start_block)<<4) - decal));
+      block = load_unaligned_epi8(src.decs + ((i-start_block)<<4) - decal);
+      dst.blocks[i] -= ((block != zero) & block1);
+    }
+#endif
+
+  assert(dst.decs[dst.conductor-1] == 0);
+}
+
+inline monoid remove_generator(const monoid &src, ind_t gen,ind_t pos)
+{
+  monoid dst;
+  remove_generator(dst, src, gen,pos);
+  return dst;
+}
+
+#endif

+ 122 - 0
single/treewalk.cpp

@@ -0,0 +1,122 @@
+#include <iostream>
+#include <iomanip>
+#include <chrono>
+#include <cmath>
+#include <cpuid.h>
+#include <fstream>
+#include "treewalk.hpp"
+
+using namespace std;
+using namespace std::chrono;
+
+
+fstream file;
+
+void treat(const monoid& m){
+  int w=m.e*m.left-m.conductor;
+  if(w<=0){
+    if(m.e!=3 and (m.e!=m.min or m.left_primitive!=1)){
+      output(m,file);
+    }
+  }
+}
+
+//static const int mcut=ceil(float(3*(MAX_GENUS+2))/5);
+
+bool cut(const monoid& m){
+  if(3*m.left_primitive>=m.min) return true;
+  return false;
+}
+
+void walk_children(monoid m)
+{
+  monoid data[MAX_GENUS-1], *stack[MAX_GENUS], *current,temp;
+  monoid **stack_pointer = stack + 1;
+
+  for (ind_t i=1; i<MAX_GENUS; i++) stack[i] = &(data[i-1]); // Nathann's trick to avoid copy
+  stack[0] = &m;
+  while (stack_pointer != stack)
+    {
+      --stack_pointer;
+      current = *stack_pointer;
+      if(not cut(*current))
+	{
+	  if (current->genus < MAX_GENUS - 1)
+	    {
+	      auto it = generator_iter<CHILDREN>(*current);
+	      ind_t pos=0;
+	      while (it.move_next())
+		{
+		  // exchange top with top+1
+		  stack_pointer[0] = stack_pointer[1];
+		  remove_generator(**stack_pointer, *current, it.get_gen(),pos++);
+		  treat(**stack_pointer);
+		  stack_pointer++;
+		}
+	      *stack_pointer = current;
+
+	    }
+	  else
+	    {
+	      auto it = generator_iter<CHILDREN>(*current);
+	      ind_t pos=0;
+	      while (it.move_next())
+		{
+		  remove_generator(temp, *current, it.get_gen(),pos++);
+		  treat(temp);
+		}
+	    }
+	}
+    }
+}
+
+
+
+int main(int argc, char **argv)
+{
+  monoid O,S;
+  string nproc = "0";
+
+  if(argc!=3){
+    cerr<<"Usage : "<<argv[0]<<" [m] [k] with k in [1,m-1]"<<endl;
+    exit(-1);
+  }
+  int m=atoi(argv[1]);
+  int k=atoi(argv[2]);
+  if(k<=0 or k>=m){
+    cerr<<"k must be in [1,m-1]"<<endl;
+    exit(-2);
+  }
+  unsigned int ax, bx, cx, dx;
+  if (!__get_cpuid(0x00000001, &ax, &bx, &cx, &dx))
+    {
+      cerr << "Unable to determine the processor type !" << endl;
+      return EXIT_FAILURE;
+    }
+  if (!(cx & bit_SSSE3))
+    {
+      cerr << "This programm require sse3 instructions set !" << endl;
+      return EXIT_FAILURE;
+    }
+  if (!(cx & bit_POPCNT))
+    {
+      cerr << "This programm require popcount instruction !" << endl;
+      return EXIT_FAILURE;
+    }
+
+ 
+  cout << "Testing Wilf's conjecture for numerical semigroups of genus <= "
+       << MAX_GENUS << " which are sons of O_{"<<m<<"}\\{"<<m+k<<"}."<<endl;
+  auto begin = high_resolution_clock::now();
+  init_ordinary(O,m);
+  remove_generator(S,O,m+k,k);
+  string filename="wilf_"+to_string(m)+"_"+to_string(k);
+  file.open(filename.c_str(),ios::out|ios::trunc);
+  walk_children(S);
+  auto end = high_resolution_clock::now();
+  duration<double> ticks = end-begin;
+  cout << " Computation time = " << std::setprecision(4) << ticks.count() << " s."  << endl;
+  file.close();
+  return EXIT_SUCCESS;
+}
+

+ 4 - 0
single/treewalk.hpp

@@ -0,0 +1,4 @@
+#include "monoid.hpp"
+
+void walk_children_stack(monoid m);
+void walk_children(const monoid m);